14 #include "ruby/internal/config.h"
24 #include "debug_counter.h"
29 #include "internal/array.h"
30 #include "internal/compar.h"
31 #include "internal/compilers.h"
32 #include "internal/encoding.h"
33 #include "internal/error.h"
34 #include "internal/gc.h"
35 #include "internal/numeric.h"
36 #include "internal/object.h"
37 #include "internal/proc.h"
38 #include "internal/re.h"
39 #include "internal/sanitizers.h"
40 #include "internal/string.h"
41 #include "internal/transcode.h"
46 #include "ruby_assert.h"
49 #if defined HAVE_CRYPT_R
50 # if defined HAVE_CRYPT_H
53 #elif !defined HAVE_CRYPT
54 # include "missing/crypt.h"
55 # define HAVE_CRYPT_R 1
58 #define BEG(no) (regs->beg[(no)])
59 #define END(no) (regs->end[(no)])
62 #undef rb_usascii_str_new
63 #undef rb_utf8_str_new
65 #undef rb_str_new_cstr
66 #undef rb_tainted_str_new_cstr
67 #undef rb_usascii_str_new_cstr
68 #undef rb_utf8_str_new_cstr
69 #undef rb_enc_str_new_cstr
70 #undef rb_external_str_new_cstr
71 #undef rb_locale_str_new_cstr
72 #undef rb_str_dup_frozen
73 #undef rb_str_buf_new_cstr
75 #undef rb_str_buf_cat2
77 #undef rb_str_cat_cstr
78 #undef rb_fstring_cstr
104 #define RUBY_MAX_CHAR_LEN 16
105 #define STR_SHARED_ROOT FL_USER5
106 #define STR_BORROWED FL_USER6
107 #define STR_TMPLOCK FL_USER7
108 #define STR_NOFREE FL_USER18
109 #define STR_FAKESTR FL_USER19
111 #define STR_SET_NOEMBED(str) do {\
112 FL_SET((str), STR_NOEMBED);\
114 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
117 STR_SET_EMBED_LEN((str), 0);\
120 #define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
122 # define STR_SET_EMBED_LEN(str, n) do { \
123 assert(str_embed_capa(str) > (n));\
124 RSTRING(str)->as.embed.len = (n);\
127 # define STR_SET_EMBED_LEN(str, n) do { \
129 RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
130 RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
134 #define STR_SET_LEN(str, n) do { \
135 if (STR_EMBED_P(str)) {\
136 STR_SET_EMBED_LEN((str), (n));\
139 RSTRING(str)->as.heap.len = (n);\
143 #define STR_DEC_LEN(str) do {\
144 if (STR_EMBED_P(str)) {\
145 long n = RSTRING_LEN(str);\
147 STR_SET_EMBED_LEN((str), n);\
150 RSTRING(str)->as.heap.len--;\
154 #define TERM_LEN(str) rb_enc_mbminlen(rb_enc_get(str))
155 #define TERM_FILL(ptr, termlen) do {\
156 char *const term_fill_ptr = (ptr);\
157 const int term_fill_len = (termlen);\
158 *term_fill_ptr = '\0';\
159 if (UNLIKELY(term_fill_len > 1))\
160 memset(term_fill_ptr, 0, term_fill_len);\
163 #define RESIZE_CAPA(str,capacity) do {\
164 const int termlen = TERM_LEN(str);\
165 RESIZE_CAPA_TERM(str,capacity,termlen);\
167 #define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
168 if (STR_EMBED_P(str)) {\
169 if (str_embed_capa(str) < capacity + termlen) {\
170 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
171 const long tlen = RSTRING_LEN(str);\
172 memcpy(tmp, RSTRING_PTR(str), tlen);\
173 RSTRING(str)->as.heap.ptr = tmp;\
174 RSTRING(str)->as.heap.len = tlen;\
175 STR_SET_NOEMBED(str);\
176 RSTRING(str)->as.heap.aux.capa = (capacity);\
180 assert(!FL_TEST((str), STR_SHARED)); \
181 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
182 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
183 RSTRING(str)->as.heap.aux.capa = (capacity);\
187 #define STR_SET_SHARED(str, shared_str) do { \
188 if (!FL_TEST(str, STR_FAKESTR)) { \
189 assert(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
190 assert(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
191 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
192 FL_SET((str), STR_SHARED); \
193 FL_SET((shared_str), STR_SHARED_ROOT); \
194 if (RBASIC_CLASS((shared_str)) == 0) \
195 FL_SET_RAW((shared_str), STR_BORROWED); \
199 #define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
200 #define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
203 #define STR_ENC_GET(str) get_encoding(str)
205 #if !defined SHARABLE_MIDDLE_SUBSTRING
206 # define SHARABLE_MIDDLE_SUBSTRING 0
208 #if !SHARABLE_MIDDLE_SUBSTRING
209 #define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
211 #define SHARABLE_SUBSTRING_P(beg, len, end) 1
216 str_embed_capa(
VALUE str)
219 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
226 str_embed_size(
long capa)
232 STR_EMBEDDABLE_P(
long len,
long termlen)
235 return rb_gc_size_allocatable_p(str_embed_size(len + termlen));
243 static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
244 static VALUE str_new_static(
VALUE klass,
const char *ptr,
long len,
int encindex);
245 static VALUE str_new(
VALUE klass,
const char *ptr,
long len);
246 static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
247 static inline void str_modifiable(
VALUE str);
251 str_make_independent(
VALUE str)
254 int termlen = TERM_LEN(str);
255 str_make_independent_expand((str), len, 0L, termlen);
258 static inline int str_dependent_p(
VALUE str);
261 rb_str_make_independent(
VALUE str)
263 if (str_dependent_p(str)) {
264 str_make_independent(str);
269 rb_debug_rstring_null_ptr(
const char *func)
271 fprintf(stderr,
"%s is returning NULL!! "
272 "SIGSEGV is highly expected to follow immediately. "
273 "If you could reproduce, attach your debugger here, "
274 "and look at the passed string.",
279 static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
282 get_actual_encoding(
const int encidx,
VALUE str)
284 const unsigned char *q;
287 case ENCINDEX_UTF_16:
290 if (q[0] == 0xFE && q[1] == 0xFF) {
291 return rb_enc_get_from_index(ENCINDEX_UTF_16BE);
293 if (q[0] == 0xFF && q[1] == 0xFE) {
294 return rb_enc_get_from_index(ENCINDEX_UTF_16LE);
297 case ENCINDEX_UTF_32:
300 if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF) {
301 return rb_enc_get_from_index(ENCINDEX_UTF_32BE);
303 if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF) {
304 return rb_enc_get_from_index(ENCINDEX_UTF_32LE);
312 get_encoding(
VALUE str)
318 mustnot_broken(
VALUE str)
320 if (is_broken_string(str)) {
326 mustnot_wchar(
VALUE str)
336 static VALUE register_fstring(
VALUE str,
bool copy);
343 #define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
351 fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data,
int existing)
361 if (rb_objspace_garbage_object_p(str)) {
386 if (STR_SHARED_P(str)) {
388 str_make_independent(str);
391 if (!BARE_STRING_P(str)) {
395 RBASIC(str)->flags |= RSTRING_FSTR;
397 *key = *value = arg->fstr = str;
404 rb_fstring(
VALUE str)
411 if (
FL_TEST(str, RSTRING_FSTR))
414 bare = BARE_STRING_P(str);
416 if (STR_EMBED_P(str)) {
420 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_SHARED_ROOT|STR_SHARED) == (STR_NOEMBED|STR_SHARED_ROOT)) {
429 fstr = register_fstring(str, FALSE);
432 str_replace_shared_without_enc(str, fstr);
440 register_fstring(
VALUE str,
bool copy)
447 st_table *frozen_strings = rb_vm_fstring_table();
450 st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&args);
451 }
while (args.fstr ==
Qundef);
463 setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
476 fake_str->
as.
heap.len = len;
477 fake_str->
as.
heap.ptr = (
char *)name;
478 fake_str->
as.
heap.aux.capa = len;
479 return (
VALUE)fake_str;
486 rb_setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
rb_encoding *enc)
496 MJIT_FUNC_EXPORTED
VALUE
497 rb_fstring_new(
const char *ptr,
long len)
500 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), FALSE);
507 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), FALSE);
511 rb_fstring_cstr(
const char *
ptr)
513 return rb_fstring_new(
ptr, strlen(
ptr));
517 fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
527 const char *aptr, *bptr;
530 return (alen != blen ||
532 memcmp(aptr, bptr, alen) != 0);
536 single_byte_optimizable(
VALUE str)
544 enc = STR_ENC_GET(str);
555 static inline const char *
556 search_nonascii(
const char *p,
const char *e)
558 const uintptr_t *s, *t;
560 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
561 # if SIZEOF_UINTPTR_T == 8
562 # define NONASCII_MASK UINT64_C(0x8080808080808080)
563 # elif SIZEOF_UINTPTR_T == 4
564 # define NONASCII_MASK UINT32_C(0x80808080)
566 # error "don't know what to do."
569 # if SIZEOF_UINTPTR_T == 8
570 # define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
571 # elif SIZEOF_UINTPTR_T == 4
572 # define NONASCII_MASK 0x80808080UL
574 # error "don't know what to do."
578 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
579 #if !UNALIGNED_WORD_ACCESS
580 if ((uintptr_t)p % SIZEOF_VOIDP) {
581 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
586 case 7:
if (p[-7]&0x80)
return p-7;
587 case 6:
if (p[-6]&0x80)
return p-6;
588 case 5:
if (p[-5]&0x80)
return p-5;
589 case 4:
if (p[-4]&0x80)
return p-4;
591 case 3:
if (p[-3]&0x80)
return p-3;
592 case 2:
if (p[-2]&0x80)
return p-2;
593 case 1:
if (p[-1]&0x80)
return p-1;
598 #if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
599 #define aligned_ptr(value) \
600 __builtin_assume_aligned((value), sizeof(uintptr_t))
602 #define aligned_ptr(value) (uintptr_t *)(value)
605 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
608 if (*s & NONASCII_MASK) {
609 #ifdef WORDS_BIGENDIAN
610 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
612 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
622 case 7:
if (e[-7]&0x80)
return e-7;
623 case 6:
if (e[-6]&0x80)
return e-6;
624 case 5:
if (e[-5]&0x80)
return e-5;
625 case 4:
if (e[-4]&0x80)
return e-4;
627 case 3:
if (e[-3]&0x80)
return e-3;
628 case 2:
if (e[-2]&0x80)
return e-2;
629 case 1:
if (e[-1]&0x80)
return e-1;
637 const char *e = p +
len;
641 p = search_nonascii(p, e);
646 p = search_nonascii(p, e);
653 p = search_nonascii(p, e);
678 p = search_nonascii(p, e);
683 p = search_nonascii(p, e);
696 p = search_nonascii(p, e);
721 rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
726 str_enc_copy(dest, src);
751 rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
753 str_enc_copy(dest, src);
783 cr = enc_coderange_scan(str, enc, encidx);
802 str_mod_check(
VALUE s,
const char *p,
long len)
810 str_capacity(
VALUE str,
const int termlen)
812 if (STR_EMBED_P(str)) {
814 return str_embed_capa(str) - termlen;
819 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
820 return RSTRING(str)->as.heap.len;
823 return RSTRING(str)->as.heap.aux.capa;
830 return str_capacity(str, TERM_LEN(str));
834 must_not_null(
const char *
ptr)
842 str_alloc(
VALUE klass,
size_t size)
845 RVARGC_NEWOBJ_OF(str,
struct RString, klass,
851 str_alloc_embed(
VALUE klass,
size_t capa)
853 size_t size = str_embed_size(
capa);
854 assert(rb_gc_size_allocatable_p(size));
856 assert(size <=
sizeof(
struct RString));
858 return str_alloc(klass, size);
862 str_alloc_heap(
VALUE klass)
864 return str_alloc(klass,
sizeof(
struct RString));
868 empty_str_alloc(
VALUE klass)
870 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
871 VALUE str = str_alloc_embed(klass, 0);
872 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
877 str_new0(
VALUE klass,
const char *
ptr,
long len,
int termlen)
885 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
887 if (STR_EMBEDDABLE_P(
len, termlen)) {
888 str = str_alloc_embed(klass,
len + termlen);
894 str = str_alloc_heap(klass);
900 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
901 STR_SET_NOEMBED(str);
906 STR_SET_LEN(str,
len);
914 return str_new0(klass,
ptr,
len, 1);
959 __msan_unpoison_string(
ptr);
990 str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
999 rb_encoding *enc = rb_enc_get_from_index(encindex);
1003 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1004 str = str_alloc_heap(klass);
1008 STR_SET_NOEMBED(str);
1009 RBASIC(str)->flags |= STR_NOFREE;
1042 rb_warn_deprecated_to_remove_at(3.2,
"rb_tainted_str_new", NULL);
1049 rb_warn_deprecated_to_remove_at(3.2,
"rb_tainted_str_new_cstr", NULL);
1053 static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1055 int ecflags,
VALUE ecopts);
1062 return is_ascii_string(str);
1073 if (!to)
return str;
1075 if (from == to)
return str;
1078 if (STR_ENC_GET(str) != to) {
1087 from, to, ecflags, ecopts);
1088 if (
NIL_P(newstr)) {
1096 rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1102 if (ofs < -olen || olen < ofs)
1104 if (ofs < 0) ofs += olen;
1106 STR_SET_LEN(newstr, ofs);
1111 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1119 STR_SET_LEN(str, 0);
1126 str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1128 int ecflags,
VALUE ecopts)
1133 VALUE econv_wrapper;
1134 const unsigned char *start, *sp;
1135 unsigned char *dest, *dp;
1136 size_t converted_output = (size_t)ofs;
1141 RBASIC_CLEAR_CLASS(econv_wrapper);
1143 if (!ec)
return Qnil;
1146 sp = (
unsigned char*)
ptr;
1148 while ((dest = (
unsigned char*)
RSTRING_PTR(newstr)),
1149 (dp = dest + converted_output),
1153 size_t converted_input = sp - start;
1154 size_t rest =
len - converted_input;
1155 converted_output = dp - dest;
1157 if (converted_input && converted_output &&
1158 rest < (LONG_MAX / converted_output)) {
1159 rest = (rest * converted_output) / converted_input;
1164 olen += rest < 2 ? 2 : rest;
1205 if (!ienc || eenc == ienc) {
1219 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1220 rb_str_initialize(str,
ptr,
len, eenc);
1293 str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1295 const int termlen = TERM_LEN(str);
1300 if (str_embed_capa(str2) >=
len + termlen) {
1301 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1302 STR_SET_EMBED(str2);
1304 STR_SET_EMBED_LEN(str2,
len);
1305 TERM_FILL(ptr2+
len, termlen);
1309 if (STR_SHARED_P(str)) {
1310 root =
RSTRING(str)->as.heap.aux.shared;
1318 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1320 rb_fatal(
"about to free a possible shared root");
1322 char *ptr2 = STR_HEAP_PTR(str2);
1324 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1327 FL_SET(str2, STR_NOEMBED);
1330 STR_SET_SHARED(str2, root);
1338 str_replace_shared_without_enc(str2, str);
1339 rb_enc_cr_str_exact_copy(str2, str);
1346 return str_replace_shared(str_alloc_heap(klass), str);
1363 rb_str_new_frozen_String(
VALUE orig)
1370 rb_str_tmp_frozen_acquire(
VALUE orig)
1373 return str_new_frozen_buffer(0, orig, FALSE);
1377 rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1382 if (STR_EMBED_P(tmp)) {
1395 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1396 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1401 STR_SET_EMBED_LEN(tmp, 0);
1409 return str_new_frozen_buffer(klass, orig, TRUE);
1413 heap_str_make_shared(
VALUE klass,
VALUE orig)
1415 assert(!STR_EMBED_P(orig));
1416 assert(!STR_SHARED_P(orig));
1418 VALUE str = str_alloc_heap(klass);
1419 STR_SET_NOEMBED(str);
1422 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1423 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1424 RBASIC(orig)->flags &= ~STR_NOFREE;
1425 STR_SET_SHARED(orig, str);
1432 str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1438 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, 1)) {
1440 assert(STR_EMBED_P(str));
1451 assert(!STR_EMBED_P(
shared));
1455 if ((ofs > 0) || (rest > 0) ||
1458 str = str_new_shared(klass,
shared);
1459 assert(!STR_EMBED_P(str));
1460 RSTRING(str)->as.heap.ptr += ofs;
1461 RSTRING(str)->as.heap.len -= ofs + rest;
1469 else if (STR_EMBEDDABLE_P(
RSTRING_LEN(orig), TERM_LEN(orig))) {
1470 str = str_alloc_embed(klass,
RSTRING_LEN(orig) + TERM_LEN(orig));
1477 str = heap_str_make_shared(klass, orig);
1481 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1493 str_new_empty_String(
VALUE str)
1500 #define STR_BUF_MIN_SIZE 63
1508 if (STR_EMBEDDABLE_P(
capa, 1)) {
1515 if (
capa < STR_BUF_MIN_SIZE) {
1516 capa = STR_BUF_MIN_SIZE;
1519 FL_SET(str, STR_NOEMBED);
1522 RSTRING(str)->as.heap.ptr[0] =
'\0';
1542 return str_new(0, 0,
len);
1548 if (
FL_TEST(str, RSTRING_FSTR)) {
1549 st_data_t fstr = (st_data_t)str;
1553 st_delete(rb_vm_fstring_table(), &fstr, NULL);
1554 RB_DEBUG_COUNTER_INC(obj_str_fstr);
1559 if (STR_EMBED_P(str)) {
1560 RB_DEBUG_COUNTER_INC(obj_str_embed);
1562 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1563 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1564 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1567 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1568 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1572 RUBY_FUNC_EXPORTED
size_t
1573 rb_str_memsize(
VALUE str)
1575 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1576 return STR_HEAP_SIZE(str);
1586 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1589 static inline void str_discard(
VALUE str);
1590 static void str_shared_replace(
VALUE str,
VALUE str2);
1595 if (str != str2) str_shared_replace(str, str2);
1606 enc = STR_ENC_GET(str2);
1611 if (str_embed_capa(str) >=
RSTRING_LEN(str2) + termlen) {
1620 if (STR_EMBED_P(str2)) {
1621 assert(!
FL_TEST(str2, STR_SHARED));
1623 assert(
len + termlen <= str_embed_capa(str2));
1625 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1626 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1627 RSTRING(str2)->as.heap.ptr = new_ptr;
1630 STR_SET_NOEMBED(str2);
1634 STR_SET_NOEMBED(str);
1639 if (
FL_TEST(str2, STR_SHARED)) {
1641 STR_SET_SHARED(str,
shared);
1644 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1648 STR_SET_EMBED(str2);
1650 STR_SET_EMBED_LEN(str2, 0);
1665 return rb_obj_as_string_result(str, obj);
1668 MJIT_FUNC_EXPORTED
VALUE
1669 rb_obj_as_string_result(
VALUE str,
VALUE obj)
1682 if (STR_SHARED_P(str2)) {
1685 STR_SET_NOEMBED(str);
1688 STR_SET_SHARED(str,
shared);
1689 rb_enc_cr_str_exact_copy(str, str2);
1692 str_replace_shared(str, str2);
1702 RB_RVARGC_EC_NEWOBJ_OF(ec, str,
struct RString, klass,
1710 size_t size = str_embed_size(
capa);
1711 assert(rb_gc_size_allocatable_p(size));
1713 assert(size <=
sizeof(
struct RString));
1715 return ec_str_alloc(ec, klass, size);
1721 return ec_str_alloc(ec, klass,
sizeof(
struct RString));
1727 const VALUE flag_mask =
1729 RSTRING_NOEMBED | RSTRING_EMBED_LEN_MASK |
1736 if (STR_EMBED_P(str)) {
1739 assert(str_embed_capa(dup) >=
len + 1);
1740 STR_SET_EMBED_LEN(dup,
len);
1746 root =
RSTRING(str)->as.heap.aux.shared;
1748 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
1749 root = str = str_new_frozen(klass, str);
1752 assert(!STR_SHARED_P(root));
1757 if (STR_EMBED_P(root)) {
1766 flags |= RSTRING_NOEMBED | STR_SHARED;
1783 if (!USE_RVARGC ||
FL_TEST(str, STR_NOEMBED)) {
1784 dup = ec_str_alloc_heap(ec, klass);
1790 return str_duplicate_setup(klass, str, dup);
1797 if (!USE_RVARGC ||
FL_TEST(str, STR_NOEMBED)) {
1798 dup = str_alloc_heap(klass);
1804 return str_duplicate_setup(klass, str, dup);
1816 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1823 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1824 return ec_str_duplicate(ec,
rb_cString, str);
1874 rb_str_init(
int argc,
VALUE *argv,
VALUE str)
1876 static ID keyword_ids[2];
1877 VALUE orig, opt, venc, vcapa;
1882 if (!keyword_ids[0]) {
1883 keyword_ids[0] = rb_id_encoding();
1884 CONST_ID(keyword_ids[1],
"capacity");
1900 if (
capa < STR_BUF_MIN_SIZE) {
1901 capa = STR_BUF_MIN_SIZE;
1909 if (orig == str) n = 0;
1911 str_modifiable(str);
1912 if (STR_EMBED_P(str)) {
1913 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1915 assert(
RSTRING(str)->
as.embed.len + 1 <= str_embed_capa(str));
1920 RSTRING(str)->as.heap.ptr = new_ptr;
1922 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
1923 const size_t size = (size_t)
capa + termlen;
1925 const size_t osize =
RSTRING(str)->as.heap.len + TERM_LEN(str);
1926 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1927 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
1929 RSTRING(str)->as.heap.ptr = new_ptr;
1931 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
1932 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
1933 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
1939 rb_enc_cr_str_exact_copy(str, orig);
1941 FL_SET(str, STR_NOEMBED);
1958 #ifdef NONASCII_MASK
1959 #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1974 static inline uintptr_t
1975 count_utf8_lead_bytes_with_word(
const uintptr_t *s)
1980 d = (d>>6) | (~d>>7);
1981 d &= NONASCII_MASK >> 7;
1984 #if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
1986 return rb_popcount_intptr(d);
1990 # if SIZEOF_VOIDP == 8
1999 enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2005 long diff = (long)(e - p);
2008 #ifdef NONASCII_MASK
2011 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2012 const uintptr_t *s, *t;
2013 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2014 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2015 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2016 while (p < (
const char *)s) {
2017 if (is_utf8_lead_byte(*p))
len++;
2021 len += count_utf8_lead_bytes_with_word(s);
2024 p = (
const char *)s;
2027 if (is_utf8_lead_byte(*p))
len++;
2038 q = search_nonascii(p, e);
2051 q = search_nonascii(p, e);
2064 for (c=0; p<e; c++) {
2080 rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2088 long diff = (long)(e - p);
2095 q = search_nonascii(p, e);
2118 for (c=0; p<e; c++) {
2143 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2144 if (!enc) enc = STR_ENC_GET(str);
2150 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2155 return enc_strlen(p, e, enc, cr);
2162 return str_strlen(str, NULL);
2182 return LONG2NUM(str_strlen(str, NULL));
2198 rb_str_bytesize(
VALUE str)
2216 rb_str_empty(
VALUE str)
2236 char *ptr1, *ptr2, *ptr3;
2241 enc = rb_enc_check_str(str1, str2);
2245 if (len1 > LONG_MAX - len2) {
2248 str3 = str_new0(
rb_cString, 0, len1+len2, termlen);
2250 memcpy(ptr3, ptr1, len1);
2251 memcpy(ptr3+len1, ptr2, len2);
2252 TERM_FILL(&ptr3[len1+len2], termlen);
2262 MJIT_FUNC_EXPORTED
VALUE
2268 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2277 else if (enc2 < 0) {
2280 else if (enc1 != enc2) {
2283 else if (len1 > LONG_MAX - len2) {
2324 if (STR_EMBEDDABLE_P(
len, 1)) {
2332 STR_SET_NOEMBED(str2);
2334 STR_SET_LEN(str2,
len);
2343 termlen = TERM_LEN(str);
2349 while (n <=
len/2) {
2350 memcpy(ptr2 + n, ptr2, n);
2353 memcpy(ptr2 + n, ptr2,
len-n);
2355 STR_SET_LEN(str2,
len);
2356 TERM_FILL(&ptr2[
len], termlen);
2357 rb_enc_cr_str_copy_for_substr(str2, str);
2392 rb_check_lockedtmp(
VALUE str)
2394 if (
FL_TEST(str, STR_TMPLOCK)) {
2400 str_modifiable(
VALUE str)
2402 rb_check_lockedtmp(str);
2407 str_dependent_p(
VALUE str)
2409 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2418 str_independent(
VALUE str)
2420 str_modifiable(str);
2421 return !str_dependent_p(str);
2425 str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2433 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2438 STR_SET_EMBED_LEN(str,
len);
2445 memcpy(
ptr, oldptr,
len);
2447 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2450 STR_SET_NOEMBED(str);
2451 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2452 TERM_FILL(
ptr +
len, termlen);
2461 if (!str_independent(str))
2462 str_make_independent(str);
2469 int termlen = TERM_LEN(str);
2475 if (expand >= LONG_MAX -
len) {
2479 if (!str_independent(str)) {
2480 str_make_independent_expand(str,
len, expand, termlen);
2482 else if (expand > 0) {
2483 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2490 str_modify_keep_cr(
VALUE str)
2492 if (!str_independent(str))
2493 str_make_independent(str);
2500 str_discard(
VALUE str)
2502 str_modifiable(str);
2503 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2504 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2505 RSTRING(str)->as.heap.ptr = 0;
2506 RSTRING(str)->as.heap.len = 0;
2538 zero_filled(
const char *s,
int n)
2540 for (; n > 0; --n) {
2547 str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2549 const char *e = s +
len;
2552 if (zero_filled(s, minlen))
return s;
2558 str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2563 if (str_dependent_p(str)) {
2564 if (!zero_filled(s +
len, termlen))
2565 str_make_independent_expand(str,
len, 0L, termlen);
2568 TERM_FILL(s +
len, termlen);
2575 rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2577 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2582 rb_check_lockedtmp(str);
2583 str_make_independent_expand(str,
len, 0L, termlen);
2585 else if (str_dependent_p(str)) {
2586 if (termlen > oldtermlen)
2587 str_make_independent_expand(str,
len, 0L, termlen);
2590 if (!STR_EMBED_P(str)) {
2592 assert(!
FL_TEST((str), STR_SHARED));
2595 if (termlen > oldtermlen) {
2604 str_null_check(
VALUE str,
int *w)
2613 if (str_null_char(s,
len, minlen, enc)) {
2616 return str_fill_term(str, s,
len, minlen);
2619 if (!s || memchr(s, 0,
len)) {
2623 s = str_fill_term(str, s,
len, minlen);
2629 rb_str_to_cstr(
VALUE str)
2632 return str_null_check(str, &w);
2640 char *s = str_null_check(str, &w);
2651 rb_str_fill_terminator(
VALUE str,
const int newminlen)
2655 return str_fill_term(str, s,
len, newminlen);
2661 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2685 str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2695 const char *p2, *e2;
2698 while (p < e && 0 < nth) {
2705 p2 = search_nonascii(p, e2);
2725 while (p < e && nth--) {
2737 return str_nth_len(p, e, &nth, enc);
2741 str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2746 p = str_nth_len(p, e, &nth, enc);
2755 str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2757 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2758 if (!pp)
return e - p;
2766 STR_ENC_GET(str), single_byte_optimizable(str));
2769 #ifdef NONASCII_MASK
2771 str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2774 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2775 const uintptr_t *s, *t;
2776 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2777 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2778 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2779 while (p < (
const char *)s) {
2780 if (is_utf8_lead_byte(*p)) nth--;
2784 nth -= count_utf8_lead_bytes_with_word(s);
2786 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
2790 if (is_utf8_lead_byte(*p)) {
2791 if (nth == 0)
break;
2801 str_utf8_offset(
const char *p,
const char *e,
long nth)
2803 const char *pp = str_utf8_nth(p, e, &nth);
2812 if (single_byte_optimizable(str) || pos < 0)
2816 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
2825 if (!STR_EMBEDDABLE_P(
len, TERM_LEN(str)) &&
2829 RSTRING(str2)->as.heap.ptr += beg;
2830 olen =
RSTRING(str2)->as.heap.len;
2838 rb_enc_cr_str_copy_for_substr(str2, str);
2852 if (
len < 0)
return 0;
2856 if (single_byte_optimizable(str)) {
2857 if (beg > blen)
return 0;
2860 if (beg < 0)
return 0;
2862 if (
len > blen - beg)
2864 if (
len < 0)
return 0;
2869 if (
len > -beg)
len = -beg;
2881 slen = str_strlen(str, enc);
2883 if (beg < 0)
return 0;
2885 if (
len == 0)
goto end;
2892 if (beg > str_strlen(str, enc))
return 0;
2895 #ifdef NONASCII_MASK
2898 p = str_utf8_nth(s, e, &beg);
2899 if (beg > 0)
return 0;
2900 len = str_utf8_offset(p, e,
len);
2906 p = s + beg * char_sz;
2910 else if (
len * char_sz > e - p)
2915 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
2916 if (beg > 0)
return 0;
2920 len = str_offset(p, e,
len, enc, 0);
2928 static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
2933 return str_substr(str, beg,
len, TRUE);
2937 str_substr(
VALUE str,
long beg,
long len,
int empty)
2942 if (!p)
return Qnil;
2943 if (!STR_EMBEDDABLE_P(
len, TERM_LEN(str)) &&
2948 RSTRING(str2)->as.heap.ptr += ofs;
2953 if (!
len && !empty)
return Qnil;
2957 rb_enc_cr_str_copy_for_substr(str2, str);
2980 str_uplus(
VALUE str)
3000 str_uminus(
VALUE str)
3005 return rb_fstring(str);
3009 #define rb_str_dup_frozen rb_str_new_frozen
3014 if (
FL_TEST(str, STR_TMPLOCK)) {
3017 FL_SET(str, STR_TMPLOCK);
3024 if (!
FL_TEST(str, STR_TMPLOCK)) {
3031 RUBY_FUNC_EXPORTED
VALUE
3042 const int termlen = TERM_LEN(str);
3044 str_modifiable(str);
3045 if (STR_SHARED_P(str)) {
3048 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3051 STR_SET_LEN(str,
len);
3065 independent = str_independent(str);
3071 const int termlen = TERM_LEN(str);
3072 if (STR_EMBED_P(str)) {
3073 if (
len == slen)
return str;
3074 if (str_embed_capa(str) >=
len + termlen) {
3075 STR_SET_EMBED_LEN(str,
len);
3079 str_make_independent_expand(str, slen,
len - slen, termlen);
3081 else if (str_embed_capa(str) >=
len + termlen) {
3082 char *
ptr = STR_HEAP_PTR(str);
3084 if (slen >
len) slen =
len;
3087 STR_SET_EMBED_LEN(str,
len);
3091 else if (!independent) {
3092 if (
len == slen)
return str;
3093 str_make_independent_expand(str, slen,
len - slen, termlen);
3097 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3098 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3101 else if (
len == slen)
return str;
3111 long capa, total, olen, off = -1;
3113 const int termlen = TERM_LEN(str);
3119 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3123 if (
len == 0)
return 0;
3124 if (STR_EMBED_P(str)) {
3125 capa = str_embed_capa(str) - termlen;
3126 sptr =
RSTRING(str)->as.embed.ary;
3131 sptr =
RSTRING(str)->as.heap.ptr;
3132 olen =
RSTRING(str)->as.heap.len;
3134 if (olen > LONG_MAX -
len) {
3139 if (total >= LONG_MAX / 2) {
3142 while (total >
capa) {
3145 RESIZE_CAPA_TERM(str,
capa, termlen);
3151 memcpy(sptr + olen,
ptr,
len);
3152 STR_SET_LEN(str, total);
3153 TERM_FILL(sptr + total, termlen);
3158 #define str_buf_cat2(str, ptr) str_buf_cat((str), (ptr), strlen(ptr))
3163 if (
len == 0)
return str;
3167 return str_buf_cat(str,
ptr,
len);
3182 rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3183 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3192 if (str_encindex == ptr_encindex) {
3211 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3220 *ptr_cr_ret = ptr_cr;
3222 if (str_encindex != ptr_encindex &&
3231 res_encindex = str_encindex;
3236 res_encindex = str_encindex;
3240 res_encindex = ptr_encindex;
3245 res_encindex = str_encindex;
3252 res_encindex = str_encindex;
3260 str_buf_cat(str,
ptr,
len);
3273 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3284 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3290 unsigned int c = (
unsigned char)*
ptr;
3293 rb_enc_cr_str_buf_cat(str, buf,
len,
3323 #define MIN_PRE_ALLOC_SIZE 48
3325 MJIT_FUNC_EXPORTED
VALUE
3326 rb_str_concat_literals(
size_t num,
const VALUE *strary)
3336 if (LIKELY(
len < MIN_PRE_ALLOC_SIZE)) {
3346 for (i = s; i < num; ++i) {
3347 const VALUE v = strary[i];
3352 if (encidx != ENCINDEX_US_ASCII) {
3379 rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3381 str_modifiable(str);
3386 else if (argc > 1) {
3390 for (i = 0; i < argc; i++) {
3425 if (rb_num_to_uint(str2, &code) == 0) {
3439 if (encidx == ENCINDEX_ASCII || encidx == ENCINDEX_US_ASCII) {
3442 buf[0] = (char)code;
3447 if (encidx == ENCINDEX_US_ASCII && code > 127) {
3459 case ONIGERR_INVALID_CODE_POINT_VALUE:
3462 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
3495 rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
3497 str_modifiable(str);
3502 else if (argc > 1) {
3506 for (i = 0; i < argc; i++) {
3529 const char *ptr1, *ptr2;
3532 return (len1 != len2 ||
3534 memcmp(ptr1, ptr2, len1) != 0);
3548 rb_str_hash_m(
VALUE str)
3554 #define lesser(a,b) (((a)>(b))?(b):(a))
3566 if (idx1 == idx2)
return TRUE;
3585 const char *ptr1, *ptr2;
3588 if (str1 == str2)
return 0;
3591 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
3600 if (len1 > len2)
return 1;
3603 if (retval > 0)
return 1;
3630 if (str1 == str2)
return Qtrue;
3637 return rb_str_eql_internal(str1, str2);
3658 MJIT_FUNC_EXPORTED
VALUE
3661 if (str1 == str2)
return Qtrue;
3663 return rb_str_eql_internal(str1, str2);
3694 return rb_invcmp(str1, str2);
3736 return str_casecmp(str1, s);
3744 const char *p1, *p1end, *p2, *p2end;
3753 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
3754 while (p1 < p1end && p2 < p2end) {
3756 unsigned int c1 =
TOLOWER(*p1 & 0xff);
3757 unsigned int c2 =
TOLOWER(*p2 & 0xff);
3759 return INT2FIX(c1 < c2 ? -1 : 1);
3766 while (p1 < p1end && p2 < p2end) {
3770 if (0 <= c1 && 0 <= c2) {
3774 return INT2FIX(c1 < c2 ? -1 : 1);
3780 len = l1 < l2 ? l1 : l2;
3781 r = memcmp(p1, p2,
len);
3783 return INT2FIX(r < 0 ? -1 : 1);
3785 return INT2FIX(l1 < l2 ? -1 : 1);
3826 return str_casecmp_p(str1, s);
3833 VALUE folded_str1, folded_str2;
3834 VALUE fold_opt = sym_fold;
3841 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
3842 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
3844 return rb_str_eql(folded_str1, folded_str2);
3848 strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
3849 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
3851 const char *search_start = str_ptr;
3852 long pos, search_len = str_len - offset;
3856 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
3857 if (pos < 0)
return pos;
3859 if (t == search_start + pos)
break;
3860 search_len -= t - search_start;
3861 if (search_len <= 0)
return -1;
3862 offset += t - search_start;
3865 return pos + offset;
3868 #define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
3871 rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
3873 const char *str_ptr, *str_ptr_end, *sub_ptr;
3874 long str_len, sub_len;
3878 if (is_broken_string(sub))
return -1;
3886 if (str_len < sub_len)
return -1;
3889 long str_len_char, sub_len_char;
3890 int single_byte = single_byte_optimizable(str);
3891 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
3892 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
3894 offset += str_len_char;
3895 if (offset < 0)
return -1;
3897 if (str_len_char - offset < sub_len_char)
return -1;
3898 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
3901 if (sub_len == 0)
return offset;
3904 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
3947 rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
3953 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
3960 pos += str_strlen(str, NULL);
3970 if (pos > str_strlen(str, NULL))
3987 pos = rb_str_index(str, sub, pos);
3991 if (pos == -1)
return Qnil;
3999 char *hit, *adjusted;
4001 long slen, searchlen;
4005 if (slen == 0)
return pos;
4010 searchlen = s - sbeg + 1;
4013 hit = memrchr(sbeg, c, searchlen);
4016 if (hit != adjusted) {
4017 searchlen = adjusted - sbeg;
4020 if (memcmp(hit, t, slen) == 0)
4022 searchlen = adjusted - sbeg;
4023 }
while (searchlen > 0);
4040 if (memcmp(s, t, slen) == 0) {
4043 if (pos == 0)
break;
4053 rb_str_rindex(
VALUE str,
VALUE sub,
long pos)
4061 if (is_broken_string(sub))
return -1;
4062 singlebyte = single_byte_optimizable(str);
4063 len = singlebyte ?
RSTRING_LEN(str) : str_strlen(str, enc);
4064 slen = str_strlen(sub, enc);
4067 if (len < slen)
return -1;
4068 if (len - pos < slen) pos = len - slen;
4069 if (len == 0)
return pos;
4080 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4081 return str_rindex(str, sub, s, pos, enc);
4142 rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4147 long pos, len = str_strlen(str, enc);
4149 if (
rb_scan_args(argc, argv,
"11", &sub, &vpos) == 2) {
4160 if (pos > len) pos = len;
4169 enc, single_byte_optimizable(str));
4180 pos = rb_str_rindex(str, sub, pos);
4181 if (pos >= 0)
return LONG2NUM(pos);
4218 switch (OBJ_BUILTIN_TYPE(y)) {
4271 rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
4311 rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
4315 re = get_pat(argv[0]);
4316 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
4319 enum neighbor_char {
4325 static enum neighbor_char
4326 enc_succ_char(
char *p,
long len,
rb_encoding *enc)
4335 return NEIGHBOR_NOT_CHAR;
4339 if (!l)
return NEIGHBOR_NOT_CHAR;
4340 if (l != len)
return NEIGHBOR_WRAPPED;
4344 return NEIGHBOR_NOT_CHAR;
4346 return NEIGHBOR_FOUND;
4349 for (i = len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
4352 return NEIGHBOR_WRAPPED;
4353 ++((
unsigned char*)p)[i];
4358 return NEIGHBOR_FOUND;
4361 memset(p+l, 0xff, len-l);
4367 for (len2 = len-1; 0 < len2; len2--) {
4372 memset(p+len2+1, 0xff, len-(len2+1));
4377 static enum neighbor_char
4378 enc_pred_char(
char *p,
long len,
rb_encoding *enc)
4386 return NEIGHBOR_NOT_CHAR;
4389 if (!c)
return NEIGHBOR_NOT_CHAR;
4392 if (!l)
return NEIGHBOR_NOT_CHAR;
4393 if (l != len)
return NEIGHBOR_WRAPPED;
4397 return NEIGHBOR_NOT_CHAR;
4399 return NEIGHBOR_FOUND;
4402 for (i = len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
4405 return NEIGHBOR_WRAPPED;
4406 --((
unsigned char*)p)[i];
4411 return NEIGHBOR_FOUND;
4414 memset(p+l, 0, len-l);
4420 for (len2 = len-1; 0 < len2; len2--) {
4425 memset(p+len2+1, 0, len-(len2+1));
4439 static enum neighbor_char
4440 enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
4442 enum neighbor_char ret;
4446 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
4450 const int max_gaps = 1;
4454 ctype = ONIGENC_CTYPE_DIGIT;
4456 ctype = ONIGENC_CTYPE_ALPHA;
4458 return NEIGHBOR_NOT_CHAR;
4460 MEMCPY(save, p,
char, len);
4461 for (
try = 0;
try <= max_gaps; ++
try) {
4462 ret = enc_succ_char(p, len, enc);
4463 if (ret == NEIGHBOR_FOUND) {
4466 return NEIGHBOR_FOUND;
4469 MEMCPY(p, save,
char, len);
4472 MEMCPY(save, p,
char, len);
4473 ret = enc_pred_char(p, len, enc);
4474 if (ret == NEIGHBOR_FOUND) {
4477 MEMCPY(p, save,
char, len);
4482 MEMCPY(p, save,
char, len);
4488 return NEIGHBOR_NOT_CHAR;
4491 if (ctype != ONIGENC_CTYPE_DIGIT) {
4492 MEMCPY(carry, p,
char, len);
4493 return NEIGHBOR_WRAPPED;
4496 MEMCPY(carry, p,
char, len);
4497 enc_succ_char(carry, len, enc);
4498 return NEIGHBOR_WRAPPED;
4568 rb_enc_cr_str_copy_for_substr(str, orig);
4569 return str_succ(str);
4576 char *sbeg, *s, *e, *last_alnum = 0;
4577 int found_alnum = 0;
4579 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
4580 long carry_pos = 0, carry_len = 1;
4581 enum neighbor_char neighbor = NEIGHBOR_FOUND;
4584 if (slen == 0)
return str;
4586 enc = STR_ENC_GET(str);
4588 s = e = sbeg + slen;
4591 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
4598 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4599 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4600 neighbor = enc_succ_alnum_char(s, l, enc, carry);
4602 case NEIGHBOR_NOT_CHAR:
4604 case NEIGHBOR_FOUND:
4606 case NEIGHBOR_WRAPPED:
4611 carry_pos = s - sbeg;
4617 enum neighbor_char neighbor;
4618 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
4620 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4621 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4623 neighbor = enc_succ_char(tmp, l, enc);
4625 case NEIGHBOR_FOUND:
4629 case NEIGHBOR_WRAPPED:
4632 case NEIGHBOR_NOT_CHAR:
4637 enc_succ_char(s, l, enc);
4640 MEMCPY(carry, s,
char, l);
4643 carry_pos = s - sbeg;
4647 RESIZE_CAPA(str, slen + carry_len);
4649 s = sbeg + carry_pos;
4650 memmove(s + carry_len, s, slen - carry_pos);
4651 memmove(s, carry, carry_len);
4653 STR_SET_LEN(str, slen);
4670 rb_str_succ_bang(
VALUE str)
4678 all_digits_p(
const char *s,
long len)
4730 rb_str_upto(
int argc,
VALUE *argv,
VALUE beg)
4732 VALUE end, exclusive;
4736 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
4742 VALUE current, after_end;
4750 ascii = (is_ascii_string(beg) && is_ascii_string(end));
4756 if (c > e || (excl && c == e))
return beg;
4759 if (!excl && c == e)
break;
4761 if (excl && c == e)
break;
4781 if (excl && bi == ei)
break;
4782 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
4787 ID op = excl ?
'<' : idLE;
4788 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
4793 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
4801 if (n > 0 || (excl && n == 0))
return beg;
4809 if ((*each)(current, arg))
break;
4810 if (
NIL_P(next))
break;
4831 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
4839 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
4847 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
4855 if ((*each)(current, arg))
break;
4869 if (!
rb_equal(str, *argp))
return 0;
4898 if (b <= v && v < e)
return Qtrue;
4899 return RBOOL(!
RTEST(exclusive) && v == e);
4912 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
4914 return RBOOL(
NIL_P(val));
4937 return rb_str_subpat(str, indx,
INT2FIX(0));
4940 if (rb_str_index(str, indx, 0) != -1)
4946 long beg, len = str_strlen(str, NULL);
4958 return str_substr(str, idx, 1, FALSE);
5054 rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5058 return rb_str_subpat(str, argv[0], argv[1]);
5067 return rb_str_aref(str, argv[0]);
5076 str_modifiable(str);
5077 if (len > olen) len = olen;
5079 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5081 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5083 STR_SET_EMBED_LEN(str, nlen);
5084 ptr =
RSTRING(str)->as.embed.ary;
5085 memmove(ptr, oldptr + len, nlen);
5086 if (fl == STR_NOEMBED)
xfree(oldptr);
5089 if (!STR_SHARED_P(str)) {
5091 rb_enc_cr_str_exact_copy(shared, str);
5094 ptr =
RSTRING(str)->as.heap.ptr += len;
5095 RSTRING(str)->as.heap.len = nlen;
5103 rb_str_splice_0(
VALUE str,
long beg,
long len,
VALUE val)
5109 if (beg == 0 && vlen == 0) {
5114 str_modify_keep_cr(str);
5118 RESIZE_CAPA(str, slen + vlen - len);
5128 memmove(sptr + beg + vlen,
5130 slen - (beg + len));
5132 if (vlen < beg && len < 0) {
5133 MEMZERO(sptr + slen,
char, -len);
5139 STR_SET_LEN(str, slen);
5140 TERM_FILL(&sptr[slen], TERM_LEN(str));
5150 int singlebyte = single_byte_optimizable(str);
5157 slen = str_strlen(str, enc);
5159 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5166 assert(beg <= slen);
5167 if (len > slen - beg) {
5170 str_modify_keep_cr(str);
5173 e = str_nth(p,
RSTRING_END(str), len, enc, singlebyte);
5178 rb_str_splice_0(str, beg, len, val);
5185 #define rb_str_splice(str, beg, len, val) rb_str_update(str, beg, len, val)
5192 long start, end, len;
5202 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5206 nth += regs->num_regs;
5216 enc = rb_enc_check_str(str, val);
5217 rb_str_splice_0(str, start, len, val);
5226 switch (
TYPE(indx)) {
5228 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5232 beg = rb_str_index(str, indx, 0);
5237 rb_str_splice(str, beg, str_strlen(indx, NULL), val);
5245 rb_str_splice(str, beg, len, val);
5253 rb_str_splice(str, idx, 1, val);
5283 rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5287 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5295 return rb_str_aset(str, argv[0], argv[1]);
5327 rb_str_splice(str, pos, 0, str2);
5357 rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
5365 str_modify_keep_cr(str);
5373 if ((nth += regs->num_regs) <= 0)
return Qnil;
5375 else if (nth >= regs->num_regs)
return Qnil;
5377 len = END(nth) - beg;
5380 else if (argc == 2) {
5388 if (!len)
return Qnil;
5393 beg = rb_str_index(str, indx, 0);
5394 if (beg == -1)
return Qnil;
5406 if (!len)
return Qnil;
5420 rb_enc_cr_str_copy_for_substr(result, str);
5430 if (beg + len > slen)
5434 slen - (beg + len));
5436 STR_SET_LEN(str, slen);
5437 TERM_FILL(&sptr[slen], TERM_LEN(str));
5448 switch (OBJ_BUILTIN_TYPE(pat)) {
5467 get_pat_quoted(
VALUE pat,
int check)
5471 switch (OBJ_BUILTIN_TYPE(pat)) {
5485 if (check && is_broken_string(pat)) {
5492 rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
5495 pos = rb_strseq_index(str, pat, pos, 1);
5496 if (set_backref_str) {
5498 str = rb_str_new_frozen_String(str);
5499 rb_backref_set_string(str, pos,
RSTRING_LEN(pat));
5508 return rb_reg_search0(pat, str, pos, 0, set_backref_str);
5528 rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
5548 pat = get_pat_quoted(argv[0], 1);
5550 str_modifiable(str);
5551 beg = rb_pat_search(pat, str, 0, 1);
5574 if (iter || !
NIL_P(hash)) {
5584 str_mod_check(str, p, len);
5601 enc = STR_ENC_GET(repl);
5617 RESIZE_CAPA(str, len + rlen - plen);
5621 memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
5624 memmove(p + beg0, rp, rlen);
5626 STR_SET_LEN(str, len);
5654 rb_str_sub_bang(argc, argv, str);
5659 str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
5663 long beg, beg0, end0;
5664 long offset, blen, slen, len, last;
5665 enum {STR, ITER, MAP} mode = STR;
5667 int need_backref = -1;
5686 rb_error_arity(argc, 1, 2);
5689 pat = get_pat_quoted(argv[0], 1);
5690 beg = rb_pat_search(pat, str, 0, need_backref);
5692 if (bang)
return Qnil;
5702 str_enc = STR_ENC_GET(str);
5728 str_mod_check(str, sp, slen);
5733 else if (need_backref) {
5735 if (need_backref < 0) {
5736 need_backref = val != repl;
5743 len = beg0 - offset;
5760 offset = end0 + len;
5764 beg = rb_pat_search(pat, str, offset, need_backref);
5769 rb_pat_search(pat, str, last, 1);
5771 str_shared_replace(str, dest);
5799 rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
5801 str_modify_keep_cr(str);
5802 return str_gsub(argc, argv, str, 1);
5823 rb_str_gsub(
int argc,
VALUE *argv,
VALUE str)
5825 return str_gsub(argc, argv, str, 0);
5843 str_modifiable(str);
5844 if (str == str2)
return str;
5848 return str_replace(str, str2);
5863 rb_str_clear(
VALUE str)
5867 STR_SET_EMBED_LEN(str, 0);
5888 rb_str_chr(
VALUE str)
5935 char *ptr, *head, *left = 0;
5939 if (pos < -len || len <= pos)
5946 char byte = (char)(
NUM2INT(w) & 0xFF);
5948 if (!str_independent(str))
5949 str_make_independent(str);
5950 enc = STR_ENC_GET(str);
5953 if (!STR_EMBED_P(str)) {
5986 str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
5992 if (beg > n || len < 0)
return Qnil;
5995 if (beg < 0)
return Qnil;
6000 if (!empty)
return Qnil;
6007 if (!STR_EMBEDDABLE_P(len, TERM_LEN(str)) && SHARABLE_SUBSTRING_P(beg, len, n)) {
6010 RSTRING(str2)->as.heap.ptr += beg;
6011 RSTRING(str2)->as.heap.len = len;
6017 str_enc_copy(str2, str);
6056 return str_byte_substr(str, beg, len, TRUE);
6061 return str_byte_substr(str, idx, 1, FALSE);
6108 rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6113 return str_byte_substr(str, beg, end, TRUE);
6116 return str_byte_aref(str, argv[0]);
6130 rb_str_reverse(
VALUE str)
6138 enc = STR_ENC_GET(str);
6145 if (single_byte_optimizable(str)) {
6173 str_enc_copy(rev, str);
6193 rb_str_reverse_bang(
VALUE str)
6196 if (single_byte_optimizable(str)) {
6199 str_modify_keep_cr(str);
6209 str_shared_replace(str, rb_str_reverse(str));
6213 str_modify_keep_cr(str);
6238 i = rb_str_index(str, arg, 0);
6240 return RBOOL(i != -1);
6267 rb_str_to_i(
int argc,
VALUE *argv,
VALUE str)
6298 rb_str_to_f(
VALUE str)
6316 rb_str_to_s(
VALUE str)
6328 char s[RUBY_MAX_CHAR_LEN];
6336 #define CHAR_ESC_LEN 13
6339 rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
6341 char buf[CHAR_ESC_LEN + 1];
6349 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
6351 else if (c < 0x10000) {
6352 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
6355 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
6360 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
6363 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
6366 l = (int)strlen(buf);
6372 ruby_escaped_char(
int c)
6375 case '\0':
return "\\0";
6376 case '\n':
return "\\n";
6377 case '\r':
return "\\r";
6378 case '\t':
return "\\t";
6379 case '\f':
return "\\f";
6380 case '\013':
return "\\v";
6381 case '\010':
return "\\b";
6382 case '\007':
return "\\a";
6383 case '\033':
return "\\e";
6384 case '\x7f':
return "\\c?";
6390 rb_str_escape(
VALUE str)
6396 const char *prev = p;
6397 char buf[CHAR_ESC_LEN + 1];
6407 if (p > prev) str_buf_cat(result, prev, p - prev);
6410 n = (int)(pend - p);
6412 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6413 str_buf_cat(result, buf, strlen(buf));
6421 cc = ruby_escaped_char(c);
6423 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6424 str_buf_cat(result, cc, strlen(cc));
6430 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6431 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6435 if (p > prev) str_buf_cat(result, prev, p - prev);
6460 const char *p, *pend, *prev;
6461 char buf[CHAR_ESC_LEN + 1];
6470 str_buf_cat2(result,
"\"");
6474 actenc = get_actual_encoding(encidx, str);
6475 if (actenc != enc) {
6485 if (p > prev) str_buf_cat(result, prev, p - prev);
6488 n = (int)(pend - p);
6490 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6491 str_buf_cat(result, buf, strlen(buf));
6499 if ((asciicompat || unicode_p) &&
6500 (c ==
'"'|| c ==
'\\' ||
6505 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
6506 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6507 str_buf_cat2(result,
"\\");
6508 if (asciicompat || enc == resenc) {
6514 case '\n': cc =
'n';
break;
6515 case '\r': cc =
'r';
break;
6516 case '\t': cc =
't';
break;
6517 case '\f': cc =
'f';
break;
6518 case '\013': cc =
'v';
break;
6519 case '\010': cc =
'b';
break;
6520 case '\007': cc =
'a';
break;
6521 case 033: cc =
'e';
break;
6522 default: cc = 0;
break;
6525 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6528 str_buf_cat(result, buf, 2);
6537 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6538 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6543 if (p > prev) str_buf_cat(result, prev, p - prev);
6544 str_buf_cat2(result,
"\"");
6549 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
6572 const char *p, *pend;
6576 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
6581 len += strlen(enc->name);
6587 unsigned char c = *p++;
6590 case '"':
case '\\':
6591 case '\n':
case '\r':
6592 case '\t':
case '\f':
6593 case '\013':
case '\010':
case '\007':
case '\033':
6598 clen = IS_EVSTR(p, pend) ? 2 : 1;
6606 if (u8 && c > 0x7F) {
6612 else if (cc <= 0xFFFFF)
6625 if (clen > LONG_MAX - len) {
6637 unsigned char c = *p++;
6639 if (c ==
'"' || c ==
'\\') {
6643 else if (c ==
'#') {
6644 if (IS_EVSTR(p, pend)) *q++ =
'\\';
6647 else if (c ==
'\n') {
6651 else if (c ==
'\r') {
6655 else if (c ==
'\t') {
6659 else if (c ==
'\f') {
6663 else if (c ==
'\013') {
6667 else if (c ==
'\010') {
6671 else if (c ==
'\007') {
6675 else if (c ==
'\033') {
6690 snprintf(q, qend-q,
"u%04X", cc);
6692 snprintf(q, qend-q,
"u{%X}", cc);
6697 snprintf(q, qend-q,
"x%02X", c);
6704 snprintf(q, qend-q, nonascii_suffix, enc->name);
6714 unescape_ascii(
unsigned int c)
6738 undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
6740 const char *s = *ss;
6744 unsigned char buf[6];
6762 *buf = unescape_ascii(*s);
6775 if (*penc != enc_utf8) {
6794 if (hexlen == 0 || hexlen > 6) {
6800 if (0xd800 <= c && c <= 0xdfff) {
6813 if (0xd800 <= c && c <= 0xdfff) {
6844 static VALUE rb_str_is_ascii_only_p(
VALUE str);
6862 str_undump(
VALUE str)
6869 bool binary =
false;
6873 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
6876 if (!str_null_check(str, &w)) {
6880 if (*s !=
'"')
goto invalid_format;
6898 static const char force_encoding_suffix[] =
".force_encoding(\"";
6899 static const char dup_suffix[] =
".dup";
6900 const char *encname;
6905 size =
sizeof(dup_suffix) - 1;
6906 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
6908 size =
sizeof(force_encoding_suffix) - 1;
6909 if (s_end - s <= size)
goto invalid_format;
6910 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
6918 s = memchr(s,
'"', s_end-s);
6920 if (!s)
goto invalid_format;
6921 if (s_end - s != 2)
goto invalid_format;
6922 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
6924 encidx = rb_enc_find_index2(encname, (
long)size);
6938 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
6947 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
6960 str_true_enc(
VALUE str)
6963 rb_str_check_dummy_enc(enc);
6967 static OnigCaseFoldType
6968 check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
6974 if (argv[0]==sym_turkic) {
6975 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
6977 if (argv[1]==sym_lithuanian)
6978 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
6983 else if (argv[0]==sym_lithuanian) {
6984 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
6986 if (argv[1]==sym_turkic)
6987 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
6994 else if (argv[0]==sym_ascii)
6995 flags |= ONIGENC_CASE_ASCII_ONLY;
6996 else if (argv[0]==sym_fold) {
6997 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
6998 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7016 #define CASE_MAPPING_ADDITIONAL_LENGTH 20
7017 #ifndef CASEMAP_DEBUG
7018 # define CASEMAP_DEBUG 0
7026 OnigUChar space[FLEX_ARY_LEN];
7030 mapping_buffer_free(
void *p)
7034 while (current_buffer) {
7035 previous_buffer = current_buffer;
7036 current_buffer = current_buffer->next;
7037 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7043 {0, mapping_buffer_free,}
7051 const OnigUChar *source_current, *source_end;
7052 int target_length = 0;
7053 VALUE buffer_anchor;
7056 size_t buffer_count = 0;
7057 int buffer_length_or_invalid;
7066 while (source_current < source_end) {
7068 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7069 if (CASEMAP_DEBUG) {
7070 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n", capa);
7073 *pre_buffer = current_buffer;
7074 pre_buffer = ¤t_buffer->next;
7075 current_buffer->next = NULL;
7076 current_buffer->capa = capa;
7077 buffer_length_or_invalid = enc->case_map(flags,
7078 &source_current, source_end,
7079 current_buffer->space,
7080 current_buffer->space+current_buffer->capa,
7082 if (buffer_length_or_invalid < 0) {
7083 current_buffer =
DATA_PTR(buffer_anchor);
7085 mapping_buffer_free(current_buffer);
7088 target_length += current_buffer->used = buffer_length_or_invalid;
7090 if (CASEMAP_DEBUG) {
7091 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7094 if (buffer_count==1) {
7095 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7098 char *target_current;
7102 current_buffer =
DATA_PTR(buffer_anchor);
7103 while (current_buffer) {
7104 memcpy(target_current, current_buffer->space, current_buffer->used);
7105 target_current += current_buffer->used;
7106 current_buffer = current_buffer->next;
7109 current_buffer =
DATA_PTR(buffer_anchor);
7111 mapping_buffer_free(current_buffer);
7116 str_enc_copy(target, source);
7125 const OnigUChar *source_current, *source_end;
7126 OnigUChar *target_current, *target_end;
7128 int length_or_invalid;
7130 if (old_length == 0)
return Qnil;
7134 if (source == target) {
7135 target_current = (OnigUChar*)source_current;
7136 target_end = (OnigUChar*)source_end;
7143 length_or_invalid = onigenc_ascii_only_case_map(flags,
7144 &source_current, source_end,
7145 target_current, target_end, enc);
7146 if (length_or_invalid < 0)
7148 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7149 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7150 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7152 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7155 str_enc_copy(target, source);
7161 upcase_single(
VALUE str)
7164 bool modified =
false;
7167 unsigned int c = *(
unsigned char*)s;
7169 if (
'a' <= c && c <=
'z') {
7170 *s =
'A' + (c -
'a');
7198 rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7201 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7203 flags = check_case_options(argc, argv, flags);
7204 str_modify_keep_cr(str);
7205 enc = str_true_enc(str);
7206 if (case_option_single_p(flags, enc, str)) {
7207 if (upcase_single(str))
7208 flags |= ONIGENC_CASE_MODIFIED;
7210 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7211 rb_str_ascii_casemap(str, str, &flags, enc);
7213 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7215 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7237 rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7240 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7243 flags = check_case_options(argc, argv, flags);
7244 enc = str_true_enc(str);
7245 if (case_option_single_p(flags, enc, str)) {
7247 str_enc_copy(ret, str);
7250 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7252 rb_str_ascii_casemap(str, ret, &flags, enc);
7255 ret = rb_str_casemap(str, &flags, enc);
7262 downcase_single(
VALUE str)
7265 bool modified =
false;
7268 unsigned int c = *(
unsigned char*)s;
7270 if (
'A' <= c && c <=
'Z') {
7271 *s =
'a' + (c -
'A');
7300 rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
7303 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7305 flags = check_case_options(argc, argv, flags);
7306 str_modify_keep_cr(str);
7307 enc = str_true_enc(str);
7308 if (case_option_single_p(flags, enc, str)) {
7309 if (downcase_single(str))
7310 flags |= ONIGENC_CASE_MODIFIED;
7312 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7313 rb_str_ascii_casemap(str, str, &flags, enc);
7315 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7317 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7339 rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
7342 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7345 flags = check_case_options(argc, argv, flags);
7346 enc = str_true_enc(str);
7347 if (case_option_single_p(flags, enc, str)) {
7349 str_enc_copy(ret, str);
7350 downcase_single(ret);
7352 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7354 rb_str_ascii_casemap(str, ret, &flags, enc);
7357 ret = rb_str_casemap(str, &flags, enc);
7385 rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
7388 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7390 flags = check_case_options(argc, argv, flags);
7391 str_modify_keep_cr(str);
7392 enc = str_true_enc(str);
7394 if (flags&ONIGENC_CASE_ASCII_ONLY)
7395 rb_str_ascii_casemap(str, str, &flags, enc);
7397 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7399 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7423 rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
7426 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7429 flags = check_case_options(argc, argv, flags);
7430 enc = str_true_enc(str);
7432 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7434 rb_str_ascii_casemap(str, ret, &flags, enc);
7437 ret = rb_str_casemap(str, &flags, enc);
7464 rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
7467 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7469 flags = check_case_options(argc, argv, flags);
7470 str_modify_keep_cr(str);
7471 enc = str_true_enc(str);
7472 if (flags&ONIGENC_CASE_ASCII_ONLY)
7473 rb_str_ascii_casemap(str, str, &flags, enc);
7475 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7477 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7501 rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
7504 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7507 flags = check_case_options(argc, argv, flags);
7508 enc = str_true_enc(str);
7510 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7512 rb_str_ascii_casemap(str, ret, &flags, enc);
7515 ret = rb_str_casemap(str, &flags, enc);
7520 typedef unsigned char *USTR;
7524 unsigned int now, max;
7536 if (t->p == t->pend)
return -1;
7537 if (
rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
7542 if (
rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
7544 if (t->p < t->pend) {
7548 if (t->now < 0x80 && c < 0x80) {
7550 "invalid range \"%c-%c\" in string transliteration",
7565 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
7566 if (t->now == t->max) {
7571 if (t->now < t->max) {
7587 const unsigned int errc = -1;
7588 unsigned int trans[256];
7590 struct tr trsrc, trrepl;
7592 unsigned int c, c0, last = 0;
7593 int modify = 0, i, l;
7594 unsigned char *s, *send;
7596 int singlebyte = single_byte_optimizable(str);
7600 #define CHECK_IF_ASCII(c) \
7601 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
7602 (cr = ENC_CODERANGE_VALID) : 0)
7608 return rb_str_delete_bang(1, &src, str);
7623 trsrc.p + l < trsrc.pend) {
7629 trsrc.gen = trrepl.gen = 0;
7630 trsrc.now = trrepl.now = 0;
7631 trsrc.max = trrepl.max = 0;
7634 for (i=0; i<256; i++) {
7637 while ((c = trnext(&trsrc, enc)) != errc) {
7646 while ((c = trnext(&trrepl, enc)) != errc)
7649 for (i=0; i<256; i++) {
7650 if (trans[i] != errc) {
7658 for (i=0; i<256; i++) {
7661 while ((c = trnext(&trsrc, enc)) != errc) {
7662 r = trnext(&trrepl, enc);
7663 if (r == errc) r = trrepl.now;
7677 str_modify_keep_cr(str);
7683 unsigned int save = -1;
7684 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
7699 if (cflag) c = last;
7702 else if (cflag) c = errc;
7708 if (c != (
unsigned int)-1) {
7720 if (enc != e1) may_modify = 1;
7722 if ((offset = t - buf) + tlen > max) {
7723 size_t MAYBE_UNUSED(old) = max + termlen;
7724 max = offset + tlen + (send - s);
7725 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
7729 if (may_modify && memcmp(s, t, tlen) != 0) {
7735 if (!STR_EMBED_P(str)) {
7736 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
7738 TERM_FILL((
char *)t, termlen);
7739 RSTRING(str)->as.heap.ptr = (
char *)buf;
7740 RSTRING(str)->as.heap.len = t - buf;
7741 STR_SET_NOEMBED(str);
7742 RSTRING(str)->as.heap.aux.capa = max;
7746 c = (
unsigned char)*s;
7747 if (trans[c] != errc) {
7764 long offset, max = (long)((send - s) * 1.2);
7765 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
7778 if (cflag) c = last;
7781 else if (cflag) c = errc;
7785 c = cflag ? last : errc;
7793 if (enc != e1) may_modify = 1;
7795 if ((offset = t - buf) + tlen > max) {
7796 size_t MAYBE_UNUSED(old) = max + termlen;
7797 max = offset + tlen + (long)((send - s) * 1.2);
7798 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
7803 if (may_modify && memcmp(s, t, tlen) != 0) {
7811 if (!STR_EMBED_P(str)) {
7812 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
7814 TERM_FILL((
char *)t, termlen);
7815 RSTRING(str)->as.heap.ptr = (
char *)buf;
7816 RSTRING(str)->as.heap.len = t - buf;
7817 STR_SET_NOEMBED(str);
7818 RSTRING(str)->as.heap.aux.capa = max;
7843 return tr_trans(str, src, repl, 0);
7886 tr_trans(str, src, repl, 0);
7890 #define TR_TABLE_MAX (UCHAR_MAX+1)
7891 #define TR_TABLE_SIZE (TR_TABLE_MAX+1)
7893 tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
7896 const unsigned int errc = -1;
7897 char buf[TR_TABLE_MAX];
7900 VALUE table = 0, ptable = 0;
7901 int i, l, cflag = 0;
7904 tr.gen =
tr.now =
tr.max = 0;
7911 for (i=0; i<TR_TABLE_MAX; i++) {
7914 stable[TR_TABLE_MAX] = cflag;
7916 else if (stable[TR_TABLE_MAX] && !cflag) {
7917 stable[TR_TABLE_MAX] = 0;
7919 for (i=0; i<TR_TABLE_MAX; i++) {
7923 while ((c = trnext(&
tr, enc)) != errc) {
7924 if (c < TR_TABLE_MAX) {
7925 buf[(
unsigned char)c] = !cflag;
7930 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
7947 for (i=0; i<TR_TABLE_MAX; i++) {
7948 stable[i] = stable[i] && buf[i];
7950 if (!table && !cflag) {
7957 tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
7959 if (c < TR_TABLE_MAX) {
7960 return table[c] != 0;
7974 return table[TR_TABLE_MAX] ? TRUE : FALSE;
7987 rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
7989 char squeez[TR_TABLE_SIZE];
7992 VALUE del = 0, nodel = 0;
7994 int i, ascompat, cr;
7998 for (i=0; i<argc; i++) {
8003 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8006 str_modify_keep_cr(str);
8015 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8028 if (tr_find(c, squeez, del, nodel)) {
8039 TERM_FILL(t, TERM_LEN(str));
8043 if (modify)
return str;
8063 rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8066 rb_str_delete_bang(argc, argv, str);
8080 rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8082 char squeez[TR_TABLE_SIZE];
8084 VALUE del = 0, nodel = 0;
8085 unsigned char *s, *send, *t;
8087 int ascompat, singlebyte = single_byte_optimizable(str);
8091 enc = STR_ENC_GET(str);
8094 for (i=0; i<argc; i++) {
8099 if (singlebyte && !single_byte_optimizable(s))
8101 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8105 str_modify_keep_cr(str);
8114 unsigned int c = *s++;
8115 if (c != save || (argc > 0 && !squeez[c])) {
8125 if (ascompat && (c = *s) < 0x80) {
8126 if (c != save || (argc > 0 && !squeez[c])) {
8134 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8144 TERM_FILL((
char *)t, TERM_LEN(str));
8150 if (modify)
return str;
8171 rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8174 rb_str_squeeze_bang(argc, argv, str);
8190 return tr_trans(str, src, repl, 1);
8211 tr_trans(str, src, repl, 1);
8244 rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8246 char table[TR_TABLE_SIZE];
8248 VALUE del = 0, nodel = 0, tstr;
8263 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8264 !is_broken_string(str)) {
8272 if (*(
unsigned char*)s++ == c) n++;
8278 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8279 for (i=1; i<argc; i++) {
8283 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8293 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8302 if (tr_find(c, table, del, nodel)) {
8313 rb_fs_check(
VALUE val)
8317 if (
NIL_P(val))
return 0;
8322 static const char isspacetable[256] = {
8323 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8325 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8326 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8333 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8334 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
8341 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
8344 split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
8346 if (empty_count >= 0 && len == 0) {
8347 return empty_count + 1;
8349 if (empty_count > 0) {
8354 }
while (--empty_count > 0);
8358 rb_yield(str_new_empty_String(str));
8359 }
while (--empty_count > 0);
8373 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
8377 literal_split_pattern(
VALUE spat, split_type_t default_type)
8385 return SPLIT_TYPE_CHARS;
8388 if (len == 1 && ptr[0] ==
' ') {
8389 return SPLIT_TYPE_AWK;
8394 if (
rb_enc_ascget(ptr, ptr + len, &l, enc) ==
' ' && len == l) {
8395 return SPLIT_TYPE_AWK;
8398 return default_type;
8457 rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
8462 split_type_t split_type;
8463 long beg, end, i = 0, empty_count = -1;
8468 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
8470 if (lim <= 0) limit =
Qnil;
8471 else if (lim == 1) {
8483 if (
NIL_P(limit) && !lim) empty_count = 0;
8485 enc = STR_ENC_GET(str);
8486 split_type = SPLIT_TYPE_REGEXP;
8488 spat = get_pat_quoted(spat, 0);
8491 split_type = SPLIT_TYPE_AWK;
8493 else if (!(spat = rb_fs_check(spat))) {
8499 if (split_type != SPLIT_TYPE_AWK) {
8504 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
8505 if (split_type == SPLIT_TYPE_AWK) {
8507 split_type = SPLIT_TYPE_STRING;
8512 mustnot_broken(spat);
8513 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
8521 #define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
8527 if (split_type == SPLIT_TYPE_AWK) {
8533 if (is_ascii_string(str)) {
8534 while (ptr < eptr) {
8535 c = (
unsigned char)*ptr++;
8537 if (ascii_isspace(c)) {
8543 if (!
NIL_P(limit) && lim <= i)
break;
8546 else if (ascii_isspace(c)) {
8547 SPLIT_STR(beg, end-beg);
8550 if (!
NIL_P(limit)) ++i;
8558 while (ptr < eptr) {
8570 if (!
NIL_P(limit) && lim <= i)
break;
8574 SPLIT_STR(beg, end-beg);
8577 if (!
NIL_P(limit)) ++i;
8585 else if (split_type == SPLIT_TYPE_STRING) {
8586 char *str_start = ptr;
8587 char *substr_start = ptr;
8591 mustnot_broken(str);
8593 while (ptr < eptr &&
8594 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
8597 if (t != ptr + end) {
8601 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
8604 if (!
NIL_P(limit) && lim <= ++i)
break;
8606 beg = ptr - str_start;
8608 else if (split_type == SPLIT_TYPE_CHARS) {
8609 char *str_start = ptr;
8612 mustnot_broken(str);
8614 while (ptr < eptr &&
8616 SPLIT_STR(ptr - str_start, n);
8618 if (!
NIL_P(limit) && lim <= ++i)
break;
8620 beg = ptr - str_start;
8631 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (
void)0)) {
8636 if (start == end && BEG(0) == END(0)) {
8641 else if (last_null == 1) {
8655 SPLIT_STR(beg, end-beg);
8656 beg = start = END(0);
8660 for (idx=1; idx < regs->num_regs; idx++) {
8661 if (BEG(idx) == -1)
continue;
8662 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
8664 if (!
NIL_P(limit) && lim <= ++i)
break;
8666 if (match) rb_match_unbusy(match);
8672 return result ? result : str;
8682 return rb_str_split_m(1, &sep, str);
8685 #define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
8700 #define ENUM_ELEM(ary, e) enumerator_element(ary, e)
8703 chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
8728 #define rb_rs get_rs()
8735 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
8736 long pos, len, rslen;
8742 static ID keywords[1];
8751 if (!ENUM_ELEM(ary, str)) {
8775 const char *eol = NULL;
8777 while (subend < pend) {
8783 if (eol == subend)
break;
8785 if (subptr) eol = subend;
8788 if (!subptr) subptr = subend;
8792 }
while (subend < pend);
8795 subend - subptr + (chomp ? 0 : rslen));
8796 if (ENUM_ELEM(ary, line)) {
8797 str_mod_check(str, ptr, len);
8799 subptr = eol = NULL;
8818 while (subptr < pend) {
8819 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
8823 if (hit != adjusted) {
8827 subend = hit += rslen;
8830 subend = chomp_newline(subptr, subend, enc);
8837 if (ENUM_ELEM(ary, line)) {
8838 str_mod_check(str, ptr, len);
8843 if (subptr != pend) {
8846 pend = chomp_newline(subptr, pend, enc);
8848 else if (pend - subptr >= rslen &&
8849 memcmp(pend - rslen, rsptr, rslen) == 0) {
8854 ENUM_ELEM(ary, line);
8913 rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
8916 return rb_str_enumerate_lines(argc, argv, str, 0);
8939 rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
8941 VALUE ary = WANTARRAY(
"lines", 0);
8942 return rb_str_enumerate_lines(argc, argv, str, ary);
8981 rb_str_each_byte(
VALUE str)
8984 return rb_str_enumerate_bytes(str, 0);
8999 rb_str_bytes(
VALUE str)
9002 return rb_str_enumerate_bytes(str, ary);
9025 for (i = 0; i < len; i += n) {
9031 for (i = 0; i < len; i += n) {
9059 rb_str_each_char(
VALUE str)
9062 return rb_str_enumerate_chars(str, 0);
9077 rb_str_chars(
VALUE str)
9080 return rb_str_enumerate_chars(str, ary);
9084 rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9089 const char *ptr, *end;
9092 if (single_byte_optimizable(str))
9093 return rb_str_enumerate_bytes(str, ary);
9098 enc = STR_ENC_GET(str);
9133 rb_str_each_codepoint(
VALUE str)
9136 return rb_str_enumerate_codepoints(str, 0);
9152 rb_str_codepoints(
VALUE str)
9155 return rb_str_enumerate_codepoints(str, ary);
9162 regex_t *reg_grapheme_cluster = NULL;
9163 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9167 reg_grapheme_cluster = reg_grapheme_cluster_utf8;
9169 if (!reg_grapheme_cluster) {
9170 const OnigUChar source_ascii[] =
"\\X";
9172 const OnigUChar *source = source_ascii;
9173 size_t source_len =
sizeof(source_ascii) - 1;
9175 #define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9176 #define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9177 #define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9178 #define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9179 #define CASE_UTF(e) \
9180 case ENCINDEX_UTF_##e: { \
9181 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9182 source = source_UTF_##e; \
9183 source_len = sizeof(source_UTF_##e); \
9186 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9193 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9194 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9196 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9197 onig_error_code_to_str(message, r, &einfo);
9198 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9201 reg_grapheme_cluster_utf8 = reg_grapheme_cluster;
9204 return reg_grapheme_cluster;
9210 size_t grapheme_cluster_count = 0;
9211 regex_t *reg_grapheme_cluster = NULL;
9213 const char *ptr, *end;
9219 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9224 OnigPosition len = onig_match(reg_grapheme_cluster,
9225 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9226 (
const OnigUChar *)ptr, NULL, 0);
9227 if (len <= 0)
break;
9228 grapheme_cluster_count++;
9232 return SIZET2NUM(grapheme_cluster_count);
9236 rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9239 regex_t *reg_grapheme_cluster = NULL;
9241 const char *ptr0, *ptr, *end;
9244 return rb_str_enumerate_chars(str, ary);
9248 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9253 OnigPosition len = onig_match(reg_grapheme_cluster,
9254 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9255 (
const OnigUChar *)ptr, NULL, 0);
9256 if (len <= 0)
break;
9283 rb_str_each_grapheme_cluster(
VALUE str)
9286 return rb_str_enumerate_grapheme_clusters(str, 0);
9301 rb_str_grapheme_clusters(
VALUE str)
9304 return rb_str_enumerate_grapheme_clusters(str, ary);
9308 chopped_length(
VALUE str)
9311 const char *p, *p2, *beg, *end;
9315 if (beg >= end)
return 0;
9335 rb_str_chop_bang(
VALUE str)
9337 str_modify_keep_cr(str);
9340 len = chopped_length(str);
9341 STR_SET_LEN(str, len);
9371 rb_str_chop(
VALUE str)
9377 smart_chomp(
VALUE str,
const char *e,
const char *p)
9396 if (--e > p && *(e-1) ==
'\r') {
9413 char *pp, *e, *rsptr;
9418 if (len == 0)
return 0;
9421 return smart_chomp(str, e, p);
9442 while (e > p && *(e-1) ==
'\n') {
9444 if (e > p && *(e-1) ==
'\r')
9450 if (rslen > len)
return len;
9453 newline = rsptr[rslen-1];
9456 if (newline ==
'\n')
9457 return smart_chomp(str, e, p);
9461 return smart_chomp(str, e, p);
9466 if (is_broken_string(rs)) {
9470 if (p[len-1] == newline &&
9472 memcmp(rsptr, pp, rslen) == 0)) {
9486 chomp_rs(
int argc,
const VALUE *argv)
9503 long len = chompped_length(str, rs);
9504 if (len >= olen)
return Qnil;
9505 str_modify_keep_cr(str);
9506 STR_SET_LEN(str, len);
9524 rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
9527 str_modifiable(str);
9529 rs = chomp_rs(argc, argv);
9531 return rb_str_chomp_string(str, rs);
9558 rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
9560 VALUE rs = chomp_rs(argc, argv);
9568 const char *
const start = s;
9570 if (!s || s >= e)
return 0;
9573 if (single_byte_optimizable(str)) {
9574 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
9604 rb_str_lstrip_bang(
VALUE str)
9610 str_modify_keep_cr(str);
9611 enc = STR_ENC_GET(str);
9613 loffset = lstrip_offset(str, start, start+olen, enc);
9615 long len = olen-loffset;
9616 s = start + loffset;
9617 memmove(start, s, len);
9618 STR_SET_LEN(str, len);
9640 rb_str_lstrip(
VALUE str)
9645 loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str));
9646 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
9655 rb_str_check_dummy_enc(enc);
9656 if (!s || s >= e)
return 0;
9660 if (single_byte_optimizable(str)) {
9662 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
9692 rb_str_rstrip_bang(
VALUE str)
9698 str_modify_keep_cr(str);
9699 enc = STR_ENC_GET(str);
9701 roffset = rstrip_offset(str, start, start+olen, enc);
9703 long len = olen - roffset;
9705 STR_SET_LEN(str, len);
9727 rb_str_rstrip(
VALUE str)
9733 enc = STR_ENC_GET(str);
9735 roffset = rstrip_offset(str, start, start+olen, enc);
9737 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
9756 rb_str_strip_bang(
VALUE str)
9759 long olen, loffset, roffset;
9762 str_modify_keep_cr(str);
9763 enc = STR_ENC_GET(str);
9765 loffset = lstrip_offset(str, start, start+olen, enc);
9766 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9768 if (loffset > 0 || roffset > 0) {
9769 long len = olen-roffset;
9772 memmove(start, start + loffset, len);
9774 STR_SET_LEN(str, len);
9798 rb_str_strip(
VALUE str)
9801 long olen, loffset, roffset;
9805 loffset = lstrip_offset(str, start, start+olen, enc);
9806 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9808 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
9813 scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
9815 VALUE result, match;
9818 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
9844 if (!regs || regs->num_regs == 1) {
9849 for (i=1; i < regs->num_regs; i++) {
9899 long last = -1, prev = 0;
9902 pat = get_pat_quoted(pat, 1);
9903 mustnot_broken(str);
9907 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
9912 if (last >= 0) rb_pat_search(pat, str, last, 1);
9917 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
9921 str_mod_check(str, p, len);
9923 if (last >= 0) rb_pat_search(pat, str, last, 1);
9943 rb_str_hex(
VALUE str)
9967 rb_str_oct(
VALUE str)
9972 #ifndef HAVE_CRYPT_R
9978 rb_nativethread_lock_t lock;
9982 crypt_mutex_destroy(
void)
9986 crypt_mutex.initialized = 0;
9990 crypt_mutex_initialize(
void)
9997 atexit(crypt_mutex_destroy);
10004 rb_bug(
"crypt_mutex.initialized: %d->%d", i, crypt_mutex.initialized);
10072 #ifdef HAVE_CRYPT_R
10075 # define CRYPT_END() ALLOCV_END(databuf)
10077 extern char *crypt(
const char *,
const char *);
10078 # define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10081 const char *s, *saltp;
10083 #ifdef BROKEN_CRYPT
10084 char salt_8bit_clean[3];
10088 mustnot_wchar(str);
10089 mustnot_wchar(salt);
10092 if (
RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10096 #ifdef BROKEN_CRYPT
10097 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10098 salt_8bit_clean[0] = saltp[0] & 0x7f;
10099 salt_8bit_clean[1] = saltp[1] & 0x7f;
10100 salt_8bit_clean[2] =
'\0';
10101 saltp = salt_8bit_clean;
10104 #ifdef HAVE_CRYPT_R
10106 # ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10107 data->initialized = 0;
10109 res = crypt_r(s, saltp, data);
10111 crypt_mutex_initialize();
10113 res = crypt(s, saltp);
10136 rb_str_ord(
VALUE s)
10155 rb_str_sum(
int argc,
VALUE *argv,
VALUE str)
10158 char *ptr, *p, *pend;
10161 unsigned long sum0 = 0;
10173 str_mod_check(str, ptr, len);
10176 sum0 += (
unsigned char)*p;
10187 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10188 sum0 &= (((
unsigned long)1)<<bits)-1;
10208 rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10212 long width, len, flen = 1, fclen = 1;
10215 const char *f =
" ";
10216 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10218 int singlebyte = 1, cr;
10222 enc = STR_ENC_GET(str);
10230 fclen = str_strlen(pad, enc);
10231 singlebyte = single_byte_optimizable(pad);
10232 if (flen == 0 || fclen == 0) {
10236 len = str_strlen(str, enc);
10237 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10239 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10243 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10244 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10247 if ((len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10248 (len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10249 (len += llen2 + rlen2) >= LONG_MAX - size) {
10253 res = str_new0(
rb_cString, 0, len, termlen);
10256 memset(p, *f, llen);
10260 while (llen >= fclen) {
10266 memcpy(p, f, llen2);
10273 memset(p, *f, rlen);
10277 while (rlen >= fclen) {
10283 memcpy(p, f, rlen2);
10287 TERM_FILL(p, termlen);
10314 rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
10316 return rb_str_justify(argc, argv, str,
'l');
10334 rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
10336 return rb_str_justify(argc, argv, str,
'r');
10354 rb_str_center(
int argc,
VALUE *argv,
VALUE str)
10356 return rb_str_justify(argc, argv, str,
'c');
10379 sep = get_pat_quoted(sep, 0);
10391 pos = rb_str_index(str, sep, 0);
10392 if (pos < 0)
goto failed;
10400 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
10437 sep = get_pat_quoted(sep, 0);
10450 pos = rb_str_rindex(str, sep, pos);
10462 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
10481 rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
10485 for (i=0; i<argc; i++) {
10486 VALUE tmp = argv[i];
10488 if (rb_reg_start_with_p(tmp, str))
10516 rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
10522 for (i=0; i<argc; i++) {
10523 VALUE tmp = argv[i];
10550 deleted_prefix_length(
VALUE str,
VALUE prefix)
10552 char *strptr, *prefixptr;
10553 long olen, prefixlen;
10556 if (is_broken_string(prefix))
return 0;
10561 if (prefixlen <= 0)
return 0;
10563 if (olen < prefixlen)
return 0;
10566 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
10583 rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
10586 str_modify_keep_cr(str);
10588 prefixlen = deleted_prefix_length(str, prefix);
10589 if (prefixlen <= 0)
return Qnil;
10605 rb_str_delete_prefix(
VALUE str,
VALUE prefix)
10609 prefixlen = deleted_prefix_length(str, prefix);
10610 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
10625 deleted_suffix_length(
VALUE str,
VALUE suffix)
10627 char *strptr, *suffixptr, *s;
10628 long olen, suffixlen;
10632 if (is_broken_string(suffix))
return 0;
10637 if (suffixlen <= 0)
return 0;
10639 if (olen < suffixlen)
return 0;
10642 s = strptr + olen - suffixlen;
10643 if (memcmp(s, suffixptr, suffixlen) != 0)
return 0;
10661 rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
10663 long olen, suffixlen, len;
10664 str_modifiable(str);
10666 suffixlen = deleted_suffix_length(str, suffix);
10667 if (suffixlen <= 0)
return Qnil;
10670 str_modify_keep_cr(str);
10671 len = olen - suffixlen;
10672 STR_SET_LEN(str, len);
10673 TERM_FILL(&
RSTRING_PTR(str)[len], TERM_LEN(str));
10691 rb_str_delete_suffix(
VALUE str,
VALUE suffix)
10695 suffixlen = deleted_suffix_length(str, suffix);
10696 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
10713 val = rb_fs_check(val);
10716 "value of %"PRIsVALUE
" must be String or Regexp",
10720 rb_warn_deprecated(
"`$;'", NULL);
10736 str_modifiable(str);
10750 rb_str_b(
VALUE str)
10753 if (
FL_TEST(str, STR_NOEMBED)) {
10759 str_replace_shared_without_enc(str2, str);
10776 rb_str_valid_encoding_p(
VALUE str)
10794 rb_str_is_ascii_only_p(
VALUE str)
10804 static const char ellipsis[] =
"...";
10805 const long ellipsislen =
sizeof(ellipsis) - 1;
10808 const char *
const p =
RSTRING_PTR(str), *e = p + blen;
10809 VALUE estr, ret = 0;
10813 (e =
rb_enc_nth(p, e, len, enc)) - p == blen) {
10816 else if (len <= ellipsislen ||
10870 if (enc == STR_ENC_GET(str)) {
10875 return enc_str_scrub(enc, str, repl, cr);
10883 const char *rep, *p, *e, *p1, *sp;
10896 if (!
NIL_P(repl)) {
10897 repl = str_compat_and_valid(repl, enc);
10905 #define DEFAULT_REPLACE_CHAR(str) do { \
10906 static const char replace[sizeof(str)-1] = str; \
10907 rep = replace; replen = (int)sizeof(replace); \
10922 else if (!
NIL_P(repl)) {
10928 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
10932 DEFAULT_REPLACE_CHAR(
"?");
10937 p = search_nonascii(p, e);
10961 if (e - p < clen) clen = e - p;
10968 for (; clen > 1; clen--) {
10981 str_mod_check(str, sp, slen);
10982 repl = str_compat_and_valid(repl, enc);
10989 p = search_nonascii(p, e);
11016 str_mod_check(str, sp, slen);
11017 repl = str_compat_and_valid(repl, enc);
11030 else if (!
NIL_P(repl)) {
11034 else if (encidx == ENCINDEX_UTF_16BE) {
11035 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11037 else if (encidx == ENCINDEX_UTF_16LE) {
11038 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11040 else if (encidx == ENCINDEX_UTF_32BE) {
11041 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11043 else if (encidx == ENCINDEX_UTF_32LE) {
11044 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11047 DEFAULT_REPLACE_CHAR(
"?");
11064 if (e - p < clen) clen = e - p;
11065 if (clen <= mbminlen * 2) {
11070 for (; clen > mbminlen; clen-=mbminlen) {
11082 str_mod_check(str, sp, slen);
11083 repl = str_compat_and_valid(repl, enc);
11109 str_mod_check(str, sp, slen);
11110 repl = str_compat_and_valid(repl, enc);
11157 str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11165 static ID id_normalize;
11166 static ID id_normalized_p;
11167 static VALUE mUnicodeNormalize;
11170 unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11172 static int UnicodeNormalizeRequired = 0;
11175 if (!UnicodeNormalizeRequired) {
11176 rb_require(
"unicode_normalize/normalize.rb");
11177 UnicodeNormalizeRequired = 1;
11181 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11207 rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11209 return unicode_normalize_common(argc, argv, str, id_normalize);
11220 rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11222 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11243 rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11245 return unicode_normalize_common(argc, argv, str, id_normalized_p);
11378 #define sym_equal rb_obj_equal
11381 sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
11397 rb_str_symname_p(
VALUE sym)
11405 enc = STR_ENC_GET(sym);
11408 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) || len != (
long)strlen(ptr) ||
11416 rb_str_quote_unprintable(
VALUE str)
11426 enc = STR_ENC_GET(str);
11429 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
11430 !sym_printable(ptr, ptr + len, enc)) {
11431 return rb_str_escape(str);
11436 MJIT_FUNC_EXPORTED
VALUE
11437 rb_id_quote_unprintable(
ID id)
11440 if (!rb_str_symname_p(str)) {
11441 return rb_str_escape(str);
11456 sym_inspect(
VALUE sym)
11463 if (!rb_str_symname_p(str)) {
11468 memmove(dest + 1, dest, len);
11475 memcpy(dest + 1, ptr, len);
11535 sym_to_sym(
VALUE sym)
11540 MJIT_FUNC_EXPORTED
VALUE
11541 rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
11563 rb_sym_to_proc(
VALUE sym)
11577 sym_succ(
VALUE sym)
11686 return rb_str_match(
rb_sym2str(sym), other);
11698 sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
11700 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
11712 sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
11714 return rb_str_match_m_p(argc, argv, sym);
11730 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
11742 sym_length(
VALUE sym)
11755 sym_empty(
VALUE sym)
11771 sym_upcase(
int argc,
VALUE *argv,
VALUE sym)
11789 sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
11805 sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
11821 sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
11842 sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
11844 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
11861 sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
11863 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
11874 sym_encoding(
VALUE sym)
11880 string_for_symbol(
VALUE name)
11899 name = string_for_symbol(name);
11909 name = string_for_symbol(name);
11930 sym_all_symbols(
VALUE _)
11938 return rb_fstring(str);
11945 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), TRUE);
11957 if (UNLIKELY(rb_enc_autoload_p(enc))) {
11958 rb_enc_autoload(enc);
11962 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), TRUE);
12368 assert(rb_vm_fstring_table());
#define RUBY_ASSERT(expr)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ATOMIC_CAS(var, oldval, newval)
Atomic compare-and-swap.
std::atomic< unsigned > rb_atomic_t
Type that is eligible for atomic operations.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_isascii(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isascii(), except it additionally takes an encoding.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implenentation detail of RB_OBJ_FROZEN().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a method.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define OBJ_FREEZE_RAW
Old name of RB_OBJ_FREEZE_RAW.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports always regardless of runtime -W flag.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
void rb_fatal(const char *fmt,...)
Raises the unsung "fatal" exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eArgError
ArgumentError exception.
VALUE rb_eIndexError
IndexError exception.
VALUE rb_ensure(VALUE(*b_proc)(VALUE), VALUE data1, VALUE(*e_proc)(VALUE), VALUE data2)
An equivalent to ensure clause.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_cSymbol
Sumbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
rb_encoding * rb_locale_encoding(void)
Queries the encoding that represents the current locale.
rb_encoding * rb_default_external_encoding(void)
Queries the "default external" encoding.
int rb_enc_dummy_p(rb_encoding *enc)
Queries if the passed encoding is dummy.
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
int rb_enc_get_index(VALUE obj)
Queries the index of the encoding of the passed object, if any.
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Identical to rb_enc_associate(), except it takes an encoding itself instead of its index.
rb_encoding * rb_usascii_encoding(void)
Queries the encoding that represents US-ASCII.
int rb_enc_codelen(int code, rb_encoding *enc)
Queries the number of bytes requested to represent the passed code point using the passed encoding.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
void rb_enc_copy(VALUE dst, VALUE src)
Destructively copies the encoding of the latter object to that of former one.
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_mbclen() unless the character at p overruns e.
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Look for the "common" encoding between the two.
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
int rb_enc_unicode_p(rb_encoding *enc)
Queries if the passed encoding is either one of UTF-8/16/32.
rb_encoding * rb_default_internal_encoding(void)
Queries the "default internal" encoding.
int rb_enc_to_index(rb_encoding *enc)
Queries the index of the encoding.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
rb_encoding * rb_to_encoding(VALUE obj)
Identical to rb_find_encoding(), except it raises an exception instead of returning NULL.
void rb_enc_set_index(VALUE obj, int encindex)
Destructively assigns an encoding (via its index) to an object.
rb_encoding * rb_ascii8bit_encoding(void)
Queries the encoding that represents ASCII-8BIT a.k.a.
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Identical to rb_enc_compatible(), except it raises an exception instead of returning NULL.
VALUE rb_enc_from_encoding(rb_encoding *enc)
Queries the Ruby-level counterpart instance of rb_cEncoding that corresponds to the passed encoding.
static bool rb_enc_asciicompat(rb_encoding *enc)
Queries if the passed encoding is in some sense compatible with ASCII.
rb_encoding * rb_utf8_encoding(void)
Queries the encoding that represents UTF-8.
static char * rb_enc_prev_char(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the previous (left) character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc)
Identical to rb_enc_uint_chr(), except it writes back to the passed buffer instead of allocating one.
rb_encoding * rb_enc_from_index(int idx)
Identical to rb_find_encoding(), except it takes an encoding index instead of a Ruby object.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
VALUE rb_enc_associate_index(VALUE obj, int encindex)
Identical to rb_enc_set_index(), except it additionally does contents fix-up depending on the passed ...
rb_encoding * rb_enc_get(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
static const char * rb_enc_name(rb_encoding *enc)
Queries the (canonical) name of the passed encoding.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
int rb_usascii_encindex(void)
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding it...
rb_encoding * rb_filesystem_encoding(void)
Queries the "filesystem" encoding.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it additionally takes an encoding.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_obj_encoding(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcallv(VALUE recv, ID mid, int argc, const VALUE *argv)
Identical to rb_funcall(), except it takes the method arguments as a C array.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
void rb_gc_register_address(VALUE *valptr)
Inform the garbage collector that valptr points to a live Ruby object that should not be moved.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_str_to_inum(VALUE str, int base, int badcheck)
Identical to rb_cstr2inum(), except it takes Ruby's strings instead of C's.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
#define rb_check_frozen
Just another name of rb_check_frozen.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_check_hash_type(VALUE obj)
Try converting an object to its hash representation using its to_hash method, if any.
VALUE rb_hash_aref(VALUE hash, VALUE key)
Queries the given key in the given hash table.
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Inserts or replaces ("upsert"s) the objects into the given hash table.
VALUE rb_hash_lookup(VALUE hash, VALUE key)
Identical to rb_hash_aref(), except it always returns RUBY_Qnil for misshits.
VALUE rb_hash_new(void)
Creates a new, empty hash object.
VALUE rb_rs
The record separator character for inputs, or the $/.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_str_to_interned_str(VALUE str)
Identical to rb_interned_str(), except it takes a Ruby's string instead of C's.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
VALUE rb_utf8_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "UTF-8" encoding.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_utf8_str_new_cstr(const char *ptr)
Identical to rb_str_new_cstr(), except it generates a string of "UTF-8" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
VALUE rb_str_buf_cat(VALUE, const char *, long)
Just another name of rb_str_cat.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_cat2(VALUE, const char *)
Just another name of rb_str_cat_cstr.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
void rb_str_modify(VALUE str)
Declares that the string is about to be modified.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
VALUE rb_usascii_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
VALUE rb_usascii_str_new_cstr(const char *ptr)
Identical to rb_str_new_cstr(), except it generates a string of "US ASCII" encoding.
VALUE rb_str_buf_cat2(VALUE, const char *)
Just another name of rb_str_cat_cstr.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_str_buf_new_cstr(const char *ptr)
This is a rb_str_buf_new() + rb_str_buf_cat() combo.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
VALUE rb_tainted_str_new(const char *ptr, long len)
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_new(const char *ptr, long len)
Allocates an instance of rb_cString.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
VALUE rb_str_dup_frozen(VALUE)
Just another name of rb_str_new_frozen.
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
VALUE rb_locale_str_new_cstr(const char *ptr)
Identical to rb_locale_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_str_new_cstr(const char *ptr)
Identical to rb_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_tainted_str_new_cstr(const char *ptr)
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_external_str_new_cstr(const char *ptr)
Identical to rb_external_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
VALUE rb_str_cat_cstr(VALUE dst, const char *src)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
ID rb_intern(const char *name)
Finds or creates a symbol of the given name.
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
ID rb_intern_str(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
VALUE rb_id2str(ID id)
Identical to rb_id2name(), except it returns a Ruby's String instead of C's.
void rb_define_hooked_variable(const char *name, VALUE *var, rb_gvar_getter_t *getter, rb_gvar_setter_t *setter)
Identical to rb_define_virtual_variable(), but can also specify a storage.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
int st_foreach(st_table *q, int_type *w, st_data_t e)
Iteration over the given table.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
@ RSTRING_EMBED_LEN_MAX
Max possible number of characters that can be embedded.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
static long RSTRING_EMBED_LEN(VALUE str)
Queries the length of the string.
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RString::@47::@48 heap
Strings that use separated memory region for contents use this pattern.
union RString::@47 as
String's specific fields.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
struct RString::@47::@49 embed
Embedded contents.
long len
Length of the string, not including terminating NUL character.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
void rb_nativethread_lock_initialize(rb_nativethread_lock_t *lock)
Fills the passed lock with an initial value.
void rb_nativethread_lock_destroy(rb_nativethread_lock_t *lock)
Destroys the passed mutex.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
void ruby_xfree(void *ptr)
Deallocates a storage instance.