Ruby  3.1.4p223 (2023-03-30 revision HEAD)
regcomp.c
1 /**********************************************************************
2  regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "regparse.h"
32 
33 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
34 
35 extern OnigCaseFoldType
36 onig_get_default_case_fold_flag(void)
37 {
38  return OnigDefaultCaseFoldFlag;
39 }
40 
41 extern int
42 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
43 {
44  OnigDefaultCaseFoldFlag = case_fold_flag;
45  return 0;
46 }
47 
48 
49 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51 #endif
52 
53 #if 0
54 static UChar*
55 str_dup(UChar* s, UChar* end)
56 {
57  ptrdiff_t len = end - s;
58 
59  if (len > 0) {
60  UChar* r = (UChar* )xmalloc(len + 1);
61  CHECK_NULL_RETURN(r);
62  xmemcpy(r, s, len);
63  r[len] = (UChar )0;
64  return r;
65  }
66  else return NULL;
67 }
68 #endif
69 
70 static void
71 swap_node(Node* a, Node* b)
72 {
73  Node c;
74  c = *a; *a = *b; *b = c;
75 
76  if (NTYPE(a) == NT_STR) {
77  StrNode* sn = NSTR(a);
78  if (sn->capa == 0) {
79  size_t len = sn->end - sn->s;
80  sn->s = sn->buf;
81  sn->end = sn->s + len;
82  }
83  }
84 
85  if (NTYPE(b) == NT_STR) {
86  StrNode* sn = NSTR(b);
87  if (sn->capa == 0) {
88  size_t len = sn->end - sn->s;
89  sn->s = sn->buf;
90  sn->end = sn->s + len;
91  }
92  }
93 }
94 
95 static OnigDistance
96 distance_add(OnigDistance d1, OnigDistance d2)
97 {
98  if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99  return ONIG_INFINITE_DISTANCE;
100  else {
101  if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102  else return ONIG_INFINITE_DISTANCE;
103  }
104 }
105 
106 static OnigDistance
107 distance_multiply(OnigDistance d, int m)
108 {
109  if (m == 0) return 0;
110 
111  if (d < ONIG_INFINITE_DISTANCE / m)
112  return d * m;
113  else
114  return ONIG_INFINITE_DISTANCE;
115 }
116 
117 static int
118 bitset_is_empty(BitSetRef bs)
119 {
120  int i;
121  for (i = 0; i < BITSET_SIZE; i++) {
122  if (bs[i] != 0) return 0;
123  }
124  return 1;
125 }
126 
127 #ifdef ONIG_DEBUG
128 static int
129 bitset_on_num(BitSetRef bs)
130 {
131  int i, n;
132 
133  n = 0;
134  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135  if (BITSET_AT(bs, i)) n++;
136  }
137  return n;
138 }
139 #endif
140 
141 // Attempt to right size allocated buffers for a regex post compile
142 static void
143 onig_reg_resize(regex_t *reg)
144 {
145  do {
146  if (!reg->used) {
147  xfree(reg->p);
148  reg->alloc = 0;
149  reg->p = 0;
150  }
151  else if (reg->alloc > reg->used) {
152  unsigned char *new_ptr = xrealloc(reg->p, reg->used);
153  // Skip the right size optimization if memory allocation fails
154  if (new_ptr) {
155  reg->alloc = reg->used;
156  reg->p = new_ptr;
157  }
158  }
159  } while ((reg = reg->chain) != 0);
160 }
161 
162 extern int
163 onig_bbuf_init(BBuf* buf, OnigDistance size)
164 {
165  if (size <= 0) {
166  size = 0;
167  buf->p = NULL;
168  }
169  else {
170  buf->p = (UChar* )xmalloc(size);
171  if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
172  }
173 
174  buf->alloc = (unsigned int )size;
175  buf->used = 0;
176  return 0;
177 }
178 
179 
180 #ifdef USE_SUBEXP_CALL
181 
182 static int
183 unset_addr_list_init(UnsetAddrList* uslist, int size)
184 {
185  UnsetAddr* p;
186 
187  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
188  CHECK_NULL_RETURN_MEMERR(p);
189  uslist->num = 0;
190  uslist->alloc = size;
191  uslist->us = p;
192  return 0;
193 }
194 
195 static void
196 unset_addr_list_end(UnsetAddrList* uslist)
197 {
198  if (IS_NOT_NULL(uslist->us))
199  xfree(uslist->us);
200 }
201 
202 static int
203 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
204 {
205  UnsetAddr* p;
206  int size;
207 
208  if (uslist->num >= uslist->alloc) {
209  size = uslist->alloc * 2;
210  p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
211  CHECK_NULL_RETURN_MEMERR(p);
212  uslist->alloc = size;
213  uslist->us = p;
214  }
215 
216  uslist->us[uslist->num].offset = offset;
217  uslist->us[uslist->num].target = node;
218  uslist->num++;
219  return 0;
220 }
221 #endif /* USE_SUBEXP_CALL */
222 
223 
224 static int
225 add_opcode(regex_t* reg, int opcode)
226 {
227  BBUF_ADD1(reg, opcode);
228  return 0;
229 }
230 
231 #ifdef USE_COMBINATION_EXPLOSION_CHECK
232 static int
233 add_state_check_num(regex_t* reg, int num)
234 {
235  StateCheckNumType n = (StateCheckNumType )num;
236 
237  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
238  return 0;
239 }
240 #endif
241 
242 static int
243 add_rel_addr(regex_t* reg, int addr)
244 {
245  RelAddrType ra = (RelAddrType )addr;
246 
247  BBUF_ADD(reg, &ra, SIZE_RELADDR);
248  return 0;
249 }
250 
251 static int
252 add_abs_addr(regex_t* reg, int addr)
253 {
254  AbsAddrType ra = (AbsAddrType )addr;
255 
256  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
257  return 0;
258 }
259 
260 static int
261 add_length(regex_t* reg, OnigDistance len)
262 {
263  LengthType l = (LengthType )len;
264 
265  BBUF_ADD(reg, &l, SIZE_LENGTH);
266  return 0;
267 }
268 
269 static int
270 add_mem_num(regex_t* reg, int num)
271 {
272  MemNumType n = (MemNumType )num;
273 
274  BBUF_ADD(reg, &n, SIZE_MEMNUM);
275  return 0;
276 }
277 
278 #if 0
279 static int
280 add_pointer(regex_t* reg, void* addr)
281 {
282  PointerType ptr = (PointerType )addr;
283 
284  BBUF_ADD(reg, &ptr, SIZE_POINTER);
285  return 0;
286 }
287 #endif
288 
289 static int
290 add_option(regex_t* reg, OnigOptionType option)
291 {
292  BBUF_ADD(reg, &option, SIZE_OPTION);
293  return 0;
294 }
295 
296 static int
297 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
298 {
299  int r;
300 
301  r = add_opcode(reg, opcode);
302  if (r) return r;
303  r = add_rel_addr(reg, addr);
304  return r;
305 }
306 
307 static int
308 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
309 {
310  BBUF_ADD(reg, bytes, len);
311  return 0;
312 }
313 
314 static int
315 add_bitset(regex_t* reg, BitSetRef bs)
316 {
317  BBUF_ADD(reg, bs, SIZE_BITSET);
318  return 0;
319 }
320 
321 static int
322 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
323 {
324  int r;
325 
326  r = add_opcode(reg, opcode);
327  if (r) return r;
328  r = add_option(reg, option);
329  return r;
330 }
331 
332 static int compile_length_tree(Node* node, regex_t* reg);
333 static int compile_tree(Node* node, regex_t* reg);
334 
335 
336 #define IS_NEED_STR_LEN_OP_EXACT(op) \
337  ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
338  (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
339 
340 static int
341 select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
342 {
343  int op;
344  OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
345 
346  if (ignore_case) {
347  switch (str_len) {
348  case 1: op = OP_EXACT1_IC; break;
349  default: op = OP_EXACTN_IC; break;
350  }
351  }
352  else {
353  switch (mb_len) {
354  case 1:
355  switch (str_len) {
356  case 1: op = OP_EXACT1; break;
357  case 2: op = OP_EXACT2; break;
358  case 3: op = OP_EXACT3; break;
359  case 4: op = OP_EXACT4; break;
360  case 5: op = OP_EXACT5; break;
361  default: op = OP_EXACTN; break;
362  }
363  break;
364 
365  case 2:
366  switch (str_len) {
367  case 1: op = OP_EXACTMB2N1; break;
368  case 2: op = OP_EXACTMB2N2; break;
369  case 3: op = OP_EXACTMB2N3; break;
370  default: op = OP_EXACTMB2N; break;
371  }
372  break;
373 
374  case 3:
375  op = OP_EXACTMB3N;
376  break;
377 
378  default:
379  op = OP_EXACTMBN;
380  break;
381  }
382  }
383  return op;
384 }
385 
386 static int
387 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
388 {
389  int r;
390  int saved_num_null_check = reg->num_null_check;
391 
392  if (empty_info != 0) {
393  r = add_opcode(reg, OP_NULL_CHECK_START);
394  if (r) return r;
395  r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
396  if (r) return r;
397  reg->num_null_check++;
398  }
399 
400  r = compile_tree(node, reg);
401  if (r) return r;
402 
403  if (empty_info != 0) {
404  if (empty_info == NQ_TARGET_IS_EMPTY)
405  r = add_opcode(reg, OP_NULL_CHECK_END);
406  else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
407  r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
408  else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
409  r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
410 
411  if (r) return r;
412  r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
413  }
414  return r;
415 }
416 
417 #ifdef USE_SUBEXP_CALL
418 static int
419 compile_call(CallNode* node, regex_t* reg)
420 {
421  int r;
422 
423  r = add_opcode(reg, OP_CALL);
424  if (r) return r;
425  r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
426  node->target);
427  if (r) return r;
428  r = add_abs_addr(reg, 0 /*dummy addr.*/);
429  return r;
430 }
431 #endif
432 
433 static int
434 compile_tree_n_times(Node* node, int n, regex_t* reg)
435 {
436  int i, r;
437 
438  for (i = 0; i < n; i++) {
439  r = compile_tree(node, reg);
440  if (r) return r;
441  }
442  return 0;
443 }
444 
445 static int
446 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
447  regex_t* reg ARG_UNUSED, int ignore_case)
448 {
449  int len;
450  int op = select_str_opcode(mb_len, byte_len, ignore_case);
451 
452  len = SIZE_OPCODE;
453 
454  if (op == OP_EXACTMBN) len += SIZE_LENGTH;
455  if (IS_NEED_STR_LEN_OP_EXACT(op))
456  len += SIZE_LENGTH;
457 
458  len += (int )byte_len;
459  return len;
460 }
461 
462 static int
463 add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
464  regex_t* reg, int ignore_case)
465 {
466  int op = select_str_opcode(mb_len, byte_len, ignore_case);
467  add_opcode(reg, op);
468 
469  if (op == OP_EXACTMBN)
470  add_length(reg, mb_len);
471 
472  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
473  if (op == OP_EXACTN_IC)
474  add_length(reg, byte_len);
475  else
476  add_length(reg, byte_len / mb_len);
477  }
478 
479  add_bytes(reg, s, byte_len);
480  return 0;
481 }
482 
483 
484 static int
485 compile_length_string_node(Node* node, regex_t* reg)
486 {
487  int rlen, r, len, prev_len, blen, ambig;
488  OnigEncoding enc = reg->enc;
489  UChar *p, *prev;
490  StrNode* sn;
491 
492  sn = NSTR(node);
493  if (sn->end <= sn->s)
494  return 0;
495 
496  ambig = NSTRING_IS_AMBIG(node);
497 
498  p = prev = sn->s;
499  prev_len = enclen(enc, p, sn->end);
500  p += prev_len;
501  blen = prev_len;
502  rlen = 0;
503 
504  for (; p < sn->end; ) {
505  len = enclen(enc, p, sn->end);
506  if (len == prev_len || ambig) {
507  blen += len;
508  }
509  else {
510  r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
511  rlen += r;
512  prev = p;
513  blen = len;
514  prev_len = len;
515  }
516  p += len;
517  }
518  r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
519  rlen += r;
520  return rlen;
521 }
522 
523 static int
524 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
525 {
526  if (sn->end <= sn->s)
527  return 0;
528 
529  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
530 }
531 
532 static int
533 compile_string_node(Node* node, regex_t* reg)
534 {
535  int r, len, prev_len, blen, ambig;
536  OnigEncoding enc = reg->enc;
537  UChar *p, *prev, *end;
538  StrNode* sn;
539 
540  sn = NSTR(node);
541  if (sn->end <= sn->s)
542  return 0;
543 
544  end = sn->end;
545  ambig = NSTRING_IS_AMBIG(node);
546 
547  p = prev = sn->s;
548  prev_len = enclen(enc, p, end);
549  p += prev_len;
550  blen = prev_len;
551 
552  for (; p < end; ) {
553  len = enclen(enc, p, end);
554  if (len == prev_len || ambig) {
555  blen += len;
556  }
557  else {
558  r = add_compile_string(prev, prev_len, blen, reg, ambig);
559  if (r) return r;
560 
561  prev = p;
562  blen = len;
563  prev_len = len;
564  }
565 
566  p += len;
567  }
568  return add_compile_string(prev, prev_len, blen, reg, ambig);
569 }
570 
571 static int
572 compile_string_raw_node(StrNode* sn, regex_t* reg)
573 {
574  if (sn->end <= sn->s)
575  return 0;
576 
577  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
578 }
579 
580 static int
581 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
582 {
583 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
584  add_length(reg, mbuf->used);
585  return add_bytes(reg, mbuf->p, mbuf->used);
586 #else
587  int r, pad_size;
588  UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
589 
590  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
591  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
592  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
593 
594  r = add_bytes(reg, mbuf->p, mbuf->used);
595 
596  /* padding for return value from compile_length_cclass_node() to be fix. */
597  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
598  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
599  return r;
600 #endif
601 }
602 
603 static int
604 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
605 {
606  int len;
607 
608  if (IS_NULL(cc->mbuf)) {
609  len = SIZE_OPCODE + SIZE_BITSET;
610  }
611  else {
612  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
613  len = SIZE_OPCODE;
614  }
615  else {
616  len = SIZE_OPCODE + SIZE_BITSET;
617  }
618 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
619  len += SIZE_LENGTH + cc->mbuf->used;
620 #else
621  len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
622 #endif
623  }
624 
625  return len;
626 }
627 
628 static int
629 compile_cclass_node(CClassNode* cc, regex_t* reg)
630 {
631  int r;
632 
633  if (IS_NULL(cc->mbuf)) {
634  if (IS_NCCLASS_NOT(cc))
635  add_opcode(reg, OP_CCLASS_NOT);
636  else
637  add_opcode(reg, OP_CCLASS);
638 
639  r = add_bitset(reg, cc->bs);
640  }
641  else {
642  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
643  if (IS_NCCLASS_NOT(cc))
644  add_opcode(reg, OP_CCLASS_MB_NOT);
645  else
646  add_opcode(reg, OP_CCLASS_MB);
647 
648  r = add_multi_byte_cclass(cc->mbuf, reg);
649  }
650  else {
651  if (IS_NCCLASS_NOT(cc))
652  add_opcode(reg, OP_CCLASS_MIX_NOT);
653  else
654  add_opcode(reg, OP_CCLASS_MIX);
655 
656  r = add_bitset(reg, cc->bs);
657  if (r) return r;
658  r = add_multi_byte_cclass(cc->mbuf, reg);
659  }
660  }
661 
662  return r;
663 }
664 
665 static int
666 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
667 {
668 #define REPEAT_RANGE_ALLOC 4
669 
670  OnigRepeatRange* p;
671 
672  if (reg->repeat_range_alloc == 0) {
673  p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
674  CHECK_NULL_RETURN_MEMERR(p);
675  reg->repeat_range = p;
676  reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
677  }
678  else if (reg->repeat_range_alloc <= id) {
679  int n;
680  n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
681  p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
682  sizeof(OnigRepeatRange) * n);
683  CHECK_NULL_RETURN_MEMERR(p);
684  reg->repeat_range = p;
685  reg->repeat_range_alloc = n;
686  }
687  else {
688  p = reg->repeat_range;
689  }
690 
691  p[id].lower = lower;
692  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
693  return 0;
694 }
695 
696 static int
697 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
698  regex_t* reg)
699 {
700  int r;
701  int num_repeat = reg->num_repeat;
702 
703  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
704  if (r) return r;
705  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
706  reg->num_repeat++;
707  if (r) return r;
708  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
709  if (r) return r;
710 
711  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
712  if (r) return r;
713 
714  r = compile_tree_empty_check(qn->target, reg, empty_info);
715  if (r) return r;
716 
717  if (
718 #ifdef USE_SUBEXP_CALL
719  reg->num_call > 0 ||
720 #endif
721  IS_QUANTIFIER_IN_REPEAT(qn)) {
722  r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
723  }
724  else {
725  r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
726  }
727  if (r) return r;
728  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
729  return r;
730 }
731 
732 static int
733 is_anychar_star_quantifier(QtfrNode* qn)
734 {
735  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
736  NTYPE(qn->target) == NT_CANY)
737  return 1;
738  else
739  return 0;
740 }
741 
742 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
743 #define CKN_ON (ckn > 0)
744 
745 #ifdef USE_COMBINATION_EXPLOSION_CHECK
746 
747 static int
748 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
749 {
750  int len, mod_tlen, cklen;
751  int ckn;
752  int infinite = IS_REPEAT_INFINITE(qn->upper);
753  int empty_info = qn->target_empty_info;
754  int tlen = compile_length_tree(qn->target, reg);
755 
756  if (tlen < 0) return tlen;
757 
758  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
759 
760  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
761 
762  /* anychar repeat */
763  if (NTYPE(qn->target) == NT_CANY) {
764  if (qn->greedy && infinite) {
765  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
766  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
767  else
768  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
769  }
770  }
771 
772  if (empty_info != 0)
773  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
774  else
775  mod_tlen = tlen;
776 
777  if (infinite && qn->lower <= 1) {
778  if (qn->greedy) {
779  if (qn->lower == 1)
780  len = SIZE_OP_JUMP;
781  else
782  len = 0;
783 
784  len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
785  }
786  else {
787  if (qn->lower == 0)
788  len = SIZE_OP_JUMP;
789  else
790  len = 0;
791 
792  len += mod_tlen + SIZE_OP_PUSH + cklen;
793  }
794  }
795  else if (qn->upper == 0) {
796  if (qn->is_referred != 0) /* /(?<n>..){0}/ */
797  len = SIZE_OP_JUMP + tlen;
798  else
799  len = 0;
800  }
801  else if (qn->upper == 1 && qn->greedy) {
802  if (qn->lower == 0) {
803  if (CKN_ON) {
804  len = SIZE_OP_STATE_CHECK_PUSH + tlen;
805  }
806  else {
807  len = SIZE_OP_PUSH + tlen;
808  }
809  }
810  else {
811  len = tlen;
812  }
813  }
814  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
815  len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
816  }
817  else {
818  len = SIZE_OP_REPEAT_INC
819  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
820  if (CKN_ON)
821  len += SIZE_OP_STATE_CHECK;
822  }
823 
824  return len;
825 }
826 
827 static int
828 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
829 {
830  int r, mod_tlen;
831  int ckn;
832  int infinite = IS_REPEAT_INFINITE(qn->upper);
833  int empty_info = qn->target_empty_info;
834  int tlen = compile_length_tree(qn->target, reg);
835 
836  if (tlen < 0) return tlen;
837 
838  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
839 
840  if (is_anychar_star_quantifier(qn)) {
841  r = compile_tree_n_times(qn->target, qn->lower, reg);
842  if (r) return r;
843  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
844  if (IS_MULTILINE(reg->options))
845  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
846  else
847  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
848  if (r) return r;
849  if (CKN_ON) {
850  r = add_state_check_num(reg, ckn);
851  if (r) return r;
852  }
853 
854  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
855  }
856  else {
857  if (IS_MULTILINE(reg->options)) {
858  r = add_opcode(reg, (CKN_ON ?
859  OP_STATE_CHECK_ANYCHAR_ML_STAR
860  : OP_ANYCHAR_ML_STAR));
861  }
862  else {
863  r = add_opcode(reg, (CKN_ON ?
864  OP_STATE_CHECK_ANYCHAR_STAR
865  : OP_ANYCHAR_STAR));
866  }
867  if (r) return r;
868  if (CKN_ON)
869  r = add_state_check_num(reg, ckn);
870 
871  return r;
872  }
873  }
874 
875  if (empty_info != 0)
876  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
877  else
878  mod_tlen = tlen;
879 
880  if (infinite && qn->lower <= 1) {
881  if (qn->greedy) {
882  if (qn->lower == 1) {
883  r = add_opcode_rel_addr(reg, OP_JUMP,
884  (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
885  if (r) return r;
886  }
887 
888  if (CKN_ON) {
889  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
890  if (r) return r;
891  r = add_state_check_num(reg, ckn);
892  if (r) return r;
893  r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
894  }
895  else {
896  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
897  }
898  if (r) return r;
899  r = compile_tree_empty_check(qn->target, reg, empty_info);
900  if (r) return r;
901  r = add_opcode_rel_addr(reg, OP_JUMP,
902  -(mod_tlen + (int )SIZE_OP_JUMP
903  + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
904  }
905  else {
906  if (qn->lower == 0) {
907  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
908  if (r) return r;
909  }
910  r = compile_tree_empty_check(qn->target, reg, empty_info);
911  if (r) return r;
912  if (CKN_ON) {
913  r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
914  if (r) return r;
915  r = add_state_check_num(reg, ckn);
916  if (r) return r;
917  r = add_rel_addr(reg,
918  -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
919  }
920  else
921  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
922  }
923  }
924  else if (qn->upper == 0) {
925  if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
926  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
927  if (r) return r;
928  r = compile_tree(qn->target, reg);
929  }
930  else
931  r = 0;
932  }
933  else if (qn->upper == 1 && qn->greedy) {
934  if (qn->lower == 0) {
935  if (CKN_ON) {
936  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
937  if (r) return r;
938  r = add_state_check_num(reg, ckn);
939  if (r) return r;
940  r = add_rel_addr(reg, tlen);
941  }
942  else {
943  r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
944  }
945  if (r) return r;
946  }
947 
948  r = compile_tree(qn->target, reg);
949  }
950  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
951  if (CKN_ON) {
952  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
953  if (r) return r;
954  r = add_state_check_num(reg, ckn);
955  if (r) return r;
956  r = add_rel_addr(reg, SIZE_OP_JUMP);
957  }
958  else {
959  r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
960  }
961 
962  if (r) return r;
963  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
964  if (r) return r;
965  r = compile_tree(qn->target, reg);
966  }
967  else {
968  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
969  if (CKN_ON) {
970  if (r) return r;
971  r = add_opcode(reg, OP_STATE_CHECK);
972  if (r) return r;
973  r = add_state_check_num(reg, ckn);
974  }
975  }
976  return r;
977 }
978 
979 #else /* USE_COMBINATION_EXPLOSION_CHECK */
980 
981 static int
982 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
983 {
984  int len, mod_tlen;
985  int infinite = IS_REPEAT_INFINITE(qn->upper);
986  int empty_info = qn->target_empty_info;
987  int tlen = compile_length_tree(qn->target, reg);
988 
989  if (tlen < 0) return tlen;
990 
991  /* anychar repeat */
992  if (NTYPE(qn->target) == NT_CANY) {
993  if (qn->greedy && infinite) {
994  if (IS_NOT_NULL(qn->next_head_exact))
995  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
996  else
997  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
998  }
999  }
1000 
1001  if (empty_info != 0)
1002  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1003  else
1004  mod_tlen = tlen;
1005 
1006  if (infinite &&
1007  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1008  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1009  len = SIZE_OP_JUMP;
1010  }
1011  else {
1012  len = tlen * qn->lower;
1013  }
1014 
1015  if (qn->greedy) {
1016 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1017  if (IS_NOT_NULL(qn->head_exact))
1018  len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1019  else
1020 #endif
1021  if (IS_NOT_NULL(qn->next_head_exact))
1022  len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1023  else
1024  len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1025  }
1026  else
1027  len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1028  }
1029  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1030  len = SIZE_OP_JUMP + tlen;
1031  }
1032  else if (!infinite && qn->greedy &&
1033  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1034  <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1035  len = tlen * qn->lower;
1036  len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1037  }
1038  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1039  len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1040  }
1041  else {
1042  len = SIZE_OP_REPEAT_INC
1043  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1044  }
1045 
1046  return len;
1047 }
1048 
1049 static int
1050 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1051 {
1052  int i, r, mod_tlen;
1053  int infinite = IS_REPEAT_INFINITE(qn->upper);
1054  int empty_info = qn->target_empty_info;
1055  int tlen = compile_length_tree(qn->target, reg);
1056 
1057  if (tlen < 0) return tlen;
1058 
1059  if (is_anychar_star_quantifier(qn)) {
1060  r = compile_tree_n_times(qn->target, qn->lower, reg);
1061  if (r) return r;
1062  if (IS_NOT_NULL(qn->next_head_exact)) {
1063  if (IS_MULTILINE(reg->options))
1064  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1065  else
1066  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1067  if (r) return r;
1068  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1069  }
1070  else {
1071  if (IS_MULTILINE(reg->options))
1072  return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1073  else
1074  return add_opcode(reg, OP_ANYCHAR_STAR);
1075  }
1076  }
1077 
1078  if (empty_info != 0)
1079  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1080  else
1081  mod_tlen = tlen;
1082 
1083  if (infinite &&
1084  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1085  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1086  if (qn->greedy) {
1087 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1088  if (IS_NOT_NULL(qn->head_exact))
1089  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1090  else
1091 #endif
1092  if (IS_NOT_NULL(qn->next_head_exact))
1093  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1094  else
1095  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1096  }
1097  else {
1098  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1099  }
1100  if (r) return r;
1101  }
1102  else {
1103  r = compile_tree_n_times(qn->target, qn->lower, reg);
1104  if (r) return r;
1105  }
1106 
1107  if (qn->greedy) {
1108 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
1109  if (IS_NOT_NULL(qn->head_exact)) {
1110  r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1111  mod_tlen + SIZE_OP_JUMP);
1112  if (r) return r;
1113  add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1114  r = compile_tree_empty_check(qn->target, reg, empty_info);
1115  if (r) return r;
1116  r = add_opcode_rel_addr(reg, OP_JUMP,
1117  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1118  }
1119  else
1120 #endif
1121  if (IS_NOT_NULL(qn->next_head_exact)) {
1122  r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1123  mod_tlen + SIZE_OP_JUMP);
1124  if (r) return r;
1125  add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1126  r = compile_tree_empty_check(qn->target, reg, empty_info);
1127  if (r) return r;
1128  r = add_opcode_rel_addr(reg, OP_JUMP,
1129  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1130  }
1131  else {
1132  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1133  if (r) return r;
1134  r = compile_tree_empty_check(qn->target, reg, empty_info);
1135  if (r) return r;
1136  r = add_opcode_rel_addr(reg, OP_JUMP,
1137  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1138  }
1139  }
1140  else {
1141  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1142  if (r) return r;
1143  r = compile_tree_empty_check(qn->target, reg, empty_info);
1144  if (r) return r;
1145  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1146  }
1147  }
1148  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1149  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1150  if (r) return r;
1151  r = compile_tree(qn->target, reg);
1152  }
1153  else if (!infinite && qn->greedy &&
1154  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1155  <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1156  int n = qn->upper - qn->lower;
1157 
1158  r = compile_tree_n_times(qn->target, qn->lower, reg);
1159  if (r) return r;
1160 
1161  for (i = 0; i < n; i++) {
1162  r = add_opcode_rel_addr(reg, OP_PUSH,
1163  (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1164  if (r) return r;
1165  r = compile_tree(qn->target, reg);
1166  if (r) return r;
1167  }
1168  }
1169  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1170  r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1171  if (r) return r;
1172  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1173  if (r) return r;
1174  r = compile_tree(qn->target, reg);
1175  }
1176  else {
1177  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1178  }
1179  return r;
1180 }
1181 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1182 
1183 static int
1184 compile_length_option_node(EncloseNode* node, regex_t* reg)
1185 {
1186  int tlen;
1187  OnigOptionType prev = reg->options;
1188 
1189  reg->options = node->option;
1190  tlen = compile_length_tree(node->target, reg);
1191  reg->options = prev;
1192 
1193  if (tlen < 0) return tlen;
1194 
1195  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1196  return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1197  + tlen + SIZE_OP_SET_OPTION;
1198  }
1199  else
1200  return tlen;
1201 }
1202 
1203 static int
1204 compile_option_node(EncloseNode* node, regex_t* reg)
1205 {
1206  int r;
1207  OnigOptionType prev = reg->options;
1208 
1209  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1210  r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1211  if (r) return r;
1212  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1213  if (r) return r;
1214  r = add_opcode(reg, OP_FAIL);
1215  if (r) return r;
1216  }
1217 
1218  reg->options = node->option;
1219  r = compile_tree(node->target, reg);
1220  reg->options = prev;
1221 
1222  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1223  if (r) return r;
1224  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1225  }
1226  return r;
1227 }
1228 
1229 static int
1230 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1231 {
1232  int len;
1233  int tlen;
1234 
1235  if (node->type == ENCLOSE_OPTION)
1236  return compile_length_option_node(node, reg);
1237 
1238  if (node->target) {
1239  tlen = compile_length_tree(node->target, reg);
1240  if (tlen < 0) return tlen;
1241  }
1242  else
1243  tlen = 0;
1244 
1245  switch (node->type) {
1246  case ENCLOSE_MEMORY:
1247 #ifdef USE_SUBEXP_CALL
1248  if (IS_ENCLOSE_CALLED(node)) {
1249  len = SIZE_OP_MEMORY_START_PUSH + tlen
1250  + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1251  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1252  len += (IS_ENCLOSE_RECURSION(node)
1253  ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1254  else
1255  len += (IS_ENCLOSE_RECURSION(node)
1256  ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1257  }
1258  else if (IS_ENCLOSE_RECURSION(node)) {
1259  len = SIZE_OP_MEMORY_START_PUSH;
1260  len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1261  ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1262  }
1263  else
1264 #endif
1265  {
1266  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1267  len = SIZE_OP_MEMORY_START_PUSH;
1268  else
1269  len = SIZE_OP_MEMORY_START;
1270 
1271  len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1272  ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1273  }
1274  break;
1275 
1276  case ENCLOSE_STOP_BACKTRACK:
1277  if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1278  QtfrNode* qn = NQTFR(node->target);
1279  tlen = compile_length_tree(qn->target, reg);
1280  if (tlen < 0) return tlen;
1281 
1282  len = tlen * qn->lower
1283  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1284  }
1285  else {
1286  len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1287  }
1288  break;
1289 
1290  case ENCLOSE_CONDITION:
1291  len = SIZE_OP_CONDITION;
1292  if (NTYPE(node->target) == NT_ALT) {
1293  Node* x = node->target;
1294 
1295  tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1296  if (tlen < 0) return tlen;
1297  len += tlen + SIZE_OP_JUMP;
1298  if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1299  x = NCDR(x);
1300  tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1301  if (tlen < 0) return tlen;
1302  len += tlen;
1303  if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1304  }
1305  else {
1306  return ONIGERR_PARSER_BUG;
1307  }
1308  break;
1309 
1310  case ENCLOSE_ABSENT:
1311  len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1312  break;
1313 
1314  default:
1315  return ONIGERR_TYPE_BUG;
1316  break;
1317  }
1318 
1319  return len;
1320 }
1321 
1322 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1323 
1324 static int
1325 compile_enclose_node(EncloseNode* node, regex_t* reg)
1326 {
1327  int r, len;
1328 
1329  if (node->type == ENCLOSE_OPTION)
1330  return compile_option_node(node, reg);
1331 
1332  switch (node->type) {
1333  case ENCLOSE_MEMORY:
1334 #ifdef USE_SUBEXP_CALL
1335  if (IS_ENCLOSE_CALLED(node)) {
1336  r = add_opcode(reg, OP_CALL);
1337  if (r) return r;
1338  node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1339  node->state |= NST_ADDR_FIXED;
1340  r = add_abs_addr(reg, (int )node->call_addr);
1341  if (r) return r;
1342  len = compile_length_tree(node->target, reg);
1343  len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1344  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1345  len += (IS_ENCLOSE_RECURSION(node)
1346  ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1347  else
1348  len += (IS_ENCLOSE_RECURSION(node)
1349  ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1350 
1351  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1352  if (r) return r;
1353  }
1354 #endif
1355  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1356  r = add_opcode(reg, OP_MEMORY_START_PUSH);
1357  else
1358  r = add_opcode(reg, OP_MEMORY_START);
1359  if (r) return r;
1360  r = add_mem_num(reg, node->regnum);
1361  if (r) return r;
1362  r = compile_tree(node->target, reg);
1363  if (r) return r;
1364 #ifdef USE_SUBEXP_CALL
1365  if (IS_ENCLOSE_CALLED(node)) {
1366  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1367  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1368  ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1369  else
1370  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1371  ? OP_MEMORY_END_REC : OP_MEMORY_END));
1372 
1373  if (r) return r;
1374  r = add_mem_num(reg, node->regnum);
1375  if (r) return r;
1376  r = add_opcode(reg, OP_RETURN);
1377  }
1378  else if (IS_ENCLOSE_RECURSION(node)) {
1379  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1380  r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1381  else
1382  r = add_opcode(reg, OP_MEMORY_END_REC);
1383  if (r) return r;
1384  r = add_mem_num(reg, node->regnum);
1385  }
1386  else
1387 #endif
1388  {
1389  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1390  r = add_opcode(reg, OP_MEMORY_END_PUSH);
1391  else
1392  r = add_opcode(reg, OP_MEMORY_END);
1393  if (r) return r;
1394  r = add_mem_num(reg, node->regnum);
1395  }
1396  break;
1397 
1398  case ENCLOSE_STOP_BACKTRACK:
1399  if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1400  QtfrNode* qn = NQTFR(node->target);
1401  r = compile_tree_n_times(qn->target, qn->lower, reg);
1402  if (r) return r;
1403 
1404  len = compile_length_tree(qn->target, reg);
1405  if (len < 0) return len;
1406 
1407  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1408  if (r) return r;
1409  r = compile_tree(qn->target, reg);
1410  if (r) return r;
1411  r = add_opcode(reg, OP_POP);
1412  if (r) return r;
1413  r = add_opcode_rel_addr(reg, OP_JUMP,
1414  -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1415  }
1416  else {
1417  r = add_opcode(reg, OP_PUSH_STOP_BT);
1418  if (r) return r;
1419  r = compile_tree(node->target, reg);
1420  if (r) return r;
1421  r = add_opcode(reg, OP_POP_STOP_BT);
1422  }
1423  break;
1424 
1425  case ENCLOSE_CONDITION:
1426  r = add_opcode(reg, OP_CONDITION);
1427  if (r) return r;
1428  r = add_mem_num(reg, node->regnum);
1429  if (r) return r;
1430 
1431  if (NTYPE(node->target) == NT_ALT) {
1432  Node* x = node->target;
1433  int len2;
1434 
1435  len = compile_length_tree(NCAR(x), reg); /* yes-node */
1436  if (len < 0) return len;
1437  if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1438  x = NCDR(x);
1439  len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1440  if (len2 < 0) return len2;
1441  if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1442 
1443  x = node->target;
1444  r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1445  if (r) return r;
1446  r = compile_tree(NCAR(x), reg); /* yes-node */
1447  if (r) return r;
1448  r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1449  if (r) return r;
1450  x = NCDR(x);
1451  r = compile_tree(NCAR(x), reg); /* no-node */
1452  }
1453  else {
1454  return ONIGERR_PARSER_BUG;
1455  }
1456  break;
1457 
1458  case ENCLOSE_ABSENT:
1459  len = compile_length_tree(node->target, reg);
1460  if (len < 0) return len;
1461 
1462  r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1463  if (r) return r;
1464  r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1465  if (r) return r;
1466  r = compile_tree(node->target, reg);
1467  if (r) return r;
1468  r = add_opcode(reg, OP_ABSENT_END);
1469  break;
1470 
1471  default:
1472  return ONIGERR_TYPE_BUG;
1473  break;
1474  }
1475 
1476  return r;
1477 }
1478 
1479 static int
1480 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1481 {
1482  int len;
1483  int tlen = 0;
1484 
1485  if (node->target) {
1486  tlen = compile_length_tree(node->target, reg);
1487  if (tlen < 0) return tlen;
1488  }
1489 
1490  switch (node->type) {
1491  case ANCHOR_PREC_READ:
1492  len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1493  break;
1494  case ANCHOR_PREC_READ_NOT:
1495  len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1496  break;
1497  case ANCHOR_LOOK_BEHIND:
1498  len = SIZE_OP_LOOK_BEHIND + tlen;
1499  break;
1500  case ANCHOR_LOOK_BEHIND_NOT:
1501  len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1502  break;
1503 
1504  default:
1505  len = SIZE_OPCODE;
1506  break;
1507  }
1508 
1509  return len;
1510 }
1511 
1512 static int
1513 compile_anchor_node(AnchorNode* node, regex_t* reg)
1514 {
1515  int r, len;
1516 
1517  switch (node->type) {
1518  case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1519  case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1520  case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1521  case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1522  case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1523  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1524 
1525  case ANCHOR_WORD_BOUND:
1526  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1527  else r = add_opcode(reg, OP_WORD_BOUND);
1528  break;
1529  case ANCHOR_NOT_WORD_BOUND:
1530  if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1531  else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1532  break;
1533 #ifdef USE_WORD_BEGIN_END
1534  case ANCHOR_WORD_BEGIN:
1535  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1536  else r = add_opcode(reg, OP_WORD_BEGIN);
1537  break;
1538  case ANCHOR_WORD_END:
1539  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1540  else r = add_opcode(reg, OP_WORD_END);
1541  break;
1542 #endif
1543  case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1544 
1545  case ANCHOR_PREC_READ:
1546  r = add_opcode(reg, OP_PUSH_POS);
1547  if (r) return r;
1548  r = compile_tree(node->target, reg);
1549  if (r) return r;
1550  r = add_opcode(reg, OP_POP_POS);
1551  break;
1552 
1553  case ANCHOR_PREC_READ_NOT:
1554  len = compile_length_tree(node->target, reg);
1555  if (len < 0) return len;
1556  r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1557  if (r) return r;
1558  r = compile_tree(node->target, reg);
1559  if (r) return r;
1560  r = add_opcode(reg, OP_FAIL_POS);
1561  break;
1562 
1563  case ANCHOR_LOOK_BEHIND:
1564  {
1565  int n;
1566  r = add_opcode(reg, OP_LOOK_BEHIND);
1567  if (r) return r;
1568  if (node->char_len < 0) {
1569  r = get_char_length_tree(node->target, reg, &n);
1570  if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1571  }
1572  else
1573  n = node->char_len;
1574  r = add_length(reg, n);
1575  if (r) return r;
1576  r = compile_tree(node->target, reg);
1577  }
1578  break;
1579 
1580  case ANCHOR_LOOK_BEHIND_NOT:
1581  {
1582  int n;
1583  len = compile_length_tree(node->target, reg);
1584  r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1585  len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1586  if (r) return r;
1587  if (node->char_len < 0) {
1588  r = get_char_length_tree(node->target, reg, &n);
1589  if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1590  }
1591  else
1592  n = node->char_len;
1593  r = add_length(reg, n);
1594  if (r) return r;
1595  r = compile_tree(node->target, reg);
1596  if (r) return r;
1597  r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1598  }
1599  break;
1600 
1601  default:
1602  return ONIGERR_TYPE_BUG;
1603  break;
1604  }
1605 
1606  return r;
1607 }
1608 
1609 static int
1610 compile_length_tree(Node* node, regex_t* reg)
1611 {
1612  int len, type, r;
1613 
1614  type = NTYPE(node);
1615  switch (type) {
1616  case NT_LIST:
1617  len = 0;
1618  do {
1619  r = compile_length_tree(NCAR(node), reg);
1620  if (r < 0) return r;
1621  len += r;
1622  } while (IS_NOT_NULL(node = NCDR(node)));
1623  r = len;
1624  break;
1625 
1626  case NT_ALT:
1627  {
1628  int n = 0;
1629  len = 0;
1630  do {
1631  r = compile_length_tree(NCAR(node), reg);
1632  if (r < 0) return r;
1633  len += r;
1634  n++;
1635  } while (IS_NOT_NULL(node = NCDR(node)));
1636  r = len;
1637  r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1638  }
1639  break;
1640 
1641  case NT_STR:
1642  if (NSTRING_IS_RAW(node))
1643  r = compile_length_string_raw_node(NSTR(node), reg);
1644  else
1645  r = compile_length_string_node(node, reg);
1646  break;
1647 
1648  case NT_CCLASS:
1649  r = compile_length_cclass_node(NCCLASS(node), reg);
1650  break;
1651 
1652  case NT_CTYPE:
1653  case NT_CANY:
1654  r = SIZE_OPCODE;
1655  break;
1656 
1657  case NT_BREF:
1658  {
1659  BRefNode* br = NBREF(node);
1660 
1661 #ifdef USE_BACKREF_WITH_LEVEL
1662  if (IS_BACKREF_NEST_LEVEL(br)) {
1663  r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1664  SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1665  }
1666  else
1667 #endif
1668  if (br->back_num == 1) {
1669  r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1670  ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1671  }
1672  else {
1673  r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1674  }
1675  }
1676  break;
1677 
1678 #ifdef USE_SUBEXP_CALL
1679  case NT_CALL:
1680  r = SIZE_OP_CALL;
1681  break;
1682 #endif
1683 
1684  case NT_QTFR:
1685  r = compile_length_quantifier_node(NQTFR(node), reg);
1686  break;
1687 
1688  case NT_ENCLOSE:
1689  r = compile_length_enclose_node(NENCLOSE(node), reg);
1690  break;
1691 
1692  case NT_ANCHOR:
1693  r = compile_length_anchor_node(NANCHOR(node), reg);
1694  break;
1695 
1696  default:
1697  return ONIGERR_TYPE_BUG;
1698  break;
1699  }
1700 
1701  return r;
1702 }
1703 
1704 static int
1705 compile_tree(Node* node, regex_t* reg)
1706 {
1707  int n, type, len, pos, r = 0;
1708 
1709  type = NTYPE(node);
1710  switch (type) {
1711  case NT_LIST:
1712  do {
1713  r = compile_tree(NCAR(node), reg);
1714  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1715  break;
1716 
1717  case NT_ALT:
1718  {
1719  Node* x = node;
1720  len = 0;
1721  do {
1722  len += compile_length_tree(NCAR(x), reg);
1723  if (NCDR(x) != NULL) {
1724  len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1725  }
1726  } while (IS_NOT_NULL(x = NCDR(x)));
1727  pos = reg->used + len; /* goal position */
1728 
1729  do {
1730  len = compile_length_tree(NCAR(node), reg);
1731  if (IS_NOT_NULL(NCDR(node))) {
1732  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1733  if (r) break;
1734  }
1735  r = compile_tree(NCAR(node), reg);
1736  if (r) break;
1737  if (IS_NOT_NULL(NCDR(node))) {
1738  len = pos - (reg->used + SIZE_OP_JUMP);
1739  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1740  if (r) break;
1741  }
1742  } while (IS_NOT_NULL(node = NCDR(node)));
1743  }
1744  break;
1745 
1746  case NT_STR:
1747  if (NSTRING_IS_RAW(node))
1748  r = compile_string_raw_node(NSTR(node), reg);
1749  else
1750  r = compile_string_node(node, reg);
1751  break;
1752 
1753  case NT_CCLASS:
1754  r = compile_cclass_node(NCCLASS(node), reg);
1755  break;
1756 
1757  case NT_CTYPE:
1758  {
1759  int op;
1760 
1761  switch (NCTYPE(node)->ctype) {
1762  case ONIGENC_CTYPE_WORD:
1763  if (NCTYPE(node)->ascii_range != 0) {
1764  if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1765  else op = OP_ASCII_WORD;
1766  }
1767  else {
1768  if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1769  else op = OP_WORD;
1770  }
1771  break;
1772  default:
1773  return ONIGERR_TYPE_BUG;
1774  break;
1775  }
1776  r = add_opcode(reg, op);
1777  }
1778  break;
1779 
1780  case NT_CANY:
1781  if (IS_MULTILINE(reg->options))
1782  r = add_opcode(reg, OP_ANYCHAR_ML);
1783  else
1784  r = add_opcode(reg, OP_ANYCHAR);
1785  break;
1786 
1787  case NT_BREF:
1788  {
1789  BRefNode* br = NBREF(node);
1790 
1791 #ifdef USE_BACKREF_WITH_LEVEL
1792  if (IS_BACKREF_NEST_LEVEL(br)) {
1793  r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1794  if (r) return r;
1795  r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1796  if (r) return r;
1797  r = add_length(reg, br->nest_level);
1798  if (r) return r;
1799 
1800  goto add_bacref_mems;
1801  }
1802  else
1803 #endif
1804  if (br->back_num == 1) {
1805  n = br->back_static[0];
1806  if (IS_IGNORECASE(reg->options)) {
1807  r = add_opcode(reg, OP_BACKREFN_IC);
1808  if (r) return r;
1809  r = add_mem_num(reg, n);
1810  }
1811  else {
1812  switch (n) {
1813  case 1: r = add_opcode(reg, OP_BACKREF1); break;
1814  case 2: r = add_opcode(reg, OP_BACKREF2); break;
1815  default:
1816  r = add_opcode(reg, OP_BACKREFN);
1817  if (r) return r;
1818  r = add_mem_num(reg, n);
1819  break;
1820  }
1821  }
1822  }
1823  else {
1824  int i;
1825  int* p;
1826 
1827  if (IS_IGNORECASE(reg->options)) {
1828  r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1829  }
1830  else {
1831  r = add_opcode(reg, OP_BACKREF_MULTI);
1832  }
1833  if (r) return r;
1834 
1835 #ifdef USE_BACKREF_WITH_LEVEL
1836  add_bacref_mems:
1837 #endif
1838  r = add_length(reg, br->back_num);
1839  if (r) return r;
1840  p = BACKREFS_P(br);
1841  for (i = br->back_num - 1; i >= 0; i--) {
1842  r = add_mem_num(reg, p[i]);
1843  if (r) return r;
1844  }
1845  }
1846  }
1847  break;
1848 
1849 #ifdef USE_SUBEXP_CALL
1850  case NT_CALL:
1851  r = compile_call(NCALL(node), reg);
1852  break;
1853 #endif
1854 
1855  case NT_QTFR:
1856  r = compile_quantifier_node(NQTFR(node), reg);
1857  break;
1858 
1859  case NT_ENCLOSE:
1860  r = compile_enclose_node(NENCLOSE(node), reg);
1861  break;
1862 
1863  case NT_ANCHOR:
1864  r = compile_anchor_node(NANCHOR(node), reg);
1865  break;
1866 
1867  default:
1868 #ifdef ONIG_DEBUG
1869  fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1870 #endif
1871  break;
1872  }
1873 
1874  return r;
1875 }
1876 
1877 #ifdef USE_NAMED_GROUP
1878 
1879 static int
1880 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1881 {
1882  int r = 0;
1883  Node* node = *plink;
1884 
1885  switch (NTYPE(node)) {
1886  case NT_LIST:
1887  case NT_ALT:
1888  do {
1889  r = noname_disable_map(&(NCAR(node)), map, counter);
1890  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1891  break;
1892 
1893  case NT_QTFR:
1894  {
1895  Node** ptarget = &(NQTFR(node)->target);
1896  Node* old = *ptarget;
1897  r = noname_disable_map(ptarget, map, counter);
1898  if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1899  onig_reduce_nested_quantifier(node, *ptarget);
1900  }
1901  }
1902  break;
1903 
1904  case NT_ENCLOSE:
1905  {
1906  EncloseNode* en = NENCLOSE(node);
1907  if (en->type == ENCLOSE_MEMORY) {
1908  if (IS_ENCLOSE_NAMED_GROUP(en)) {
1909  (*counter)++;
1910  map[en->regnum].new_val = *counter;
1911  en->regnum = *counter;
1912  }
1913  else if (en->regnum != 0) {
1914  *plink = en->target;
1915  en->target = NULL_NODE;
1916  onig_node_free(node);
1917  r = noname_disable_map(plink, map, counter);
1918  break;
1919  }
1920  }
1921  r = noname_disable_map(&(en->target), map, counter);
1922  }
1923  break;
1924 
1925  case NT_ANCHOR:
1926  if (NANCHOR(node)->target)
1927  r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1928  break;
1929 
1930  default:
1931  break;
1932  }
1933 
1934  return r;
1935 }
1936 
1937 static int
1938 renumber_node_backref(Node* node, GroupNumRemap* map, const int num_mem)
1939 {
1940  int i, pos, n, old_num;
1941  int *backs;
1942  BRefNode* bn = NBREF(node);
1943 
1944  if (! IS_BACKREF_NAME_REF(bn))
1945  return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1946 
1947  old_num = bn->back_num;
1948  if (IS_NULL(bn->back_dynamic))
1949  backs = bn->back_static;
1950  else
1951  backs = bn->back_dynamic;
1952 
1953  for (i = 0, pos = 0; i < old_num; i++) {
1954  if (backs[i] > num_mem) return ONIGERR_INVALID_BACKREF;
1955  n = map[backs[i]].new_val;
1956  if (n > 0) {
1957  backs[pos] = n;
1958  pos++;
1959  }
1960  }
1961 
1962  bn->back_num = pos;
1963  return 0;
1964 }
1965 
1966 static int
1967 renumber_by_map(Node* node, GroupNumRemap* map, const int num_mem)
1968 {
1969  int r = 0;
1970 
1971  switch (NTYPE(node)) {
1972  case NT_LIST:
1973  case NT_ALT:
1974  do {
1975  r = renumber_by_map(NCAR(node), map, num_mem);
1976  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1977  break;
1978  case NT_QTFR:
1979  r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1980  break;
1981  case NT_ENCLOSE:
1982  {
1983  EncloseNode* en = NENCLOSE(node);
1984  if (en->type == ENCLOSE_CONDITION) {
1985  if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF;
1986  en->regnum = map[en->regnum].new_val;
1987  }
1988  r = renumber_by_map(en->target, map, num_mem);
1989  }
1990  break;
1991 
1992  case NT_BREF:
1993  r = renumber_node_backref(node, map, num_mem);
1994  break;
1995 
1996  case NT_ANCHOR:
1997  if (NANCHOR(node)->target)
1998  r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
1999  break;
2000 
2001  default:
2002  break;
2003  }
2004 
2005  return r;
2006 }
2007 
2008 static int
2009 numbered_ref_check(Node* node)
2010 {
2011  int r = 0;
2012 
2013  switch (NTYPE(node)) {
2014  case NT_LIST:
2015  case NT_ALT:
2016  do {
2017  r = numbered_ref_check(NCAR(node));
2018  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2019  break;
2020  case NT_QTFR:
2021  r = numbered_ref_check(NQTFR(node)->target);
2022  break;
2023  case NT_ENCLOSE:
2024  r = numbered_ref_check(NENCLOSE(node)->target);
2025  break;
2026 
2027  case NT_BREF:
2028  if (! IS_BACKREF_NAME_REF(NBREF(node)))
2029  return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2030  break;
2031 
2032  case NT_ANCHOR:
2033  if (NANCHOR(node)->target)
2034  r = numbered_ref_check(NANCHOR(node)->target);
2035  break;
2036 
2037  default:
2038  break;
2039  }
2040 
2041  return r;
2042 }
2043 
2044 static int
2045 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2046 {
2047  int r, i, pos, counter;
2048  BitStatusType loc;
2049  GroupNumRemap* map;
2050 
2051  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2052  CHECK_NULL_RETURN_MEMERR(map);
2053  for (i = 1; i <= env->num_mem; i++) {
2054  map[i].new_val = 0;
2055  }
2056  counter = 0;
2057  r = noname_disable_map(root, map, &counter);
2058  if (r != 0) return r;
2059 
2060  r = renumber_by_map(*root, map, env->num_mem);
2061  if (r != 0) return r;
2062 
2063  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2064  if (map[i].new_val > 0) {
2065  SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2066  pos++;
2067  }
2068  }
2069 
2070  loc = env->capture_history;
2071  BIT_STATUS_CLEAR(env->capture_history);
2072  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2073  if (BIT_STATUS_AT(loc, i)) {
2074  BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2075  }
2076  }
2077 
2078  env->num_mem = env->num_named;
2079  reg->num_mem = env->num_named;
2080 
2081  return onig_renumber_name_table(reg, map);
2082 }
2083 #endif /* USE_NAMED_GROUP */
2084 
2085 #ifdef USE_SUBEXP_CALL
2086 static int
2087 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2088 {
2089  int i, offset;
2090  EncloseNode* en;
2091  AbsAddrType addr;
2092 
2093  for (i = 0; i < uslist->num; i++) {
2094  en = NENCLOSE(uslist->us[i].target);
2095  if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2096  addr = en->call_addr;
2097  offset = uslist->us[i].offset;
2098 
2099  BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2100  }
2101  return 0;
2102 }
2103 #endif
2104 
2105 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2106 static int
2107 quantifiers_memory_node_info(Node* node)
2108 {
2109  int r = 0;
2110 
2111  switch (NTYPE(node)) {
2112  case NT_LIST:
2113  case NT_ALT:
2114  {
2115  int v;
2116  do {
2117  v = quantifiers_memory_node_info(NCAR(node));
2118  if (v > r) r = v;
2119  } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2120  }
2121  break;
2122 
2123 # ifdef USE_SUBEXP_CALL
2124  case NT_CALL:
2125  if (IS_CALL_RECURSION(NCALL(node))) {
2126  return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2127  }
2128  else
2129  r = quantifiers_memory_node_info(NCALL(node)->target);
2130  break;
2131 # endif
2132 
2133  case NT_QTFR:
2134  {
2135  QtfrNode* qn = NQTFR(node);
2136  if (qn->upper != 0) {
2137  r = quantifiers_memory_node_info(qn->target);
2138  }
2139  }
2140  break;
2141 
2142  case NT_ENCLOSE:
2143  {
2144  EncloseNode* en = NENCLOSE(node);
2145  switch (en->type) {
2146  case ENCLOSE_MEMORY:
2147  return NQ_TARGET_IS_EMPTY_MEM;
2148  break;
2149 
2150  case ENCLOSE_OPTION:
2151  case ENCLOSE_STOP_BACKTRACK:
2152  case ENCLOSE_CONDITION:
2153  case ENCLOSE_ABSENT:
2154  r = quantifiers_memory_node_info(en->target);
2155  break;
2156  default:
2157  break;
2158  }
2159  }
2160  break;
2161 
2162  case NT_BREF:
2163  case NT_STR:
2164  case NT_CTYPE:
2165  case NT_CCLASS:
2166  case NT_CANY:
2167  case NT_ANCHOR:
2168  default:
2169  break;
2170  }
2171 
2172  return r;
2173 }
2174 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2175 
2176 static int
2177 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2178 {
2179  OnigDistance tmin;
2180  int r = 0;
2181 
2182  *min = 0;
2183  switch (NTYPE(node)) {
2184  case NT_BREF:
2185  {
2186  int i;
2187  int* backs;
2188  Node** nodes = SCANENV_MEM_NODES(env);
2189  BRefNode* br = NBREF(node);
2190  if (br->state & NST_RECURSION) break;
2191 
2192  backs = BACKREFS_P(br);
2193  if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2194  r = get_min_match_length(nodes[backs[0]], min, env);
2195  if (r != 0) break;
2196  for (i = 1; i < br->back_num; i++) {
2197  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2198  r = get_min_match_length(nodes[backs[i]], &tmin, env);
2199  if (r != 0) break;
2200  if (*min > tmin) *min = tmin;
2201  }
2202  }
2203  break;
2204 
2205 #ifdef USE_SUBEXP_CALL
2206  case NT_CALL:
2207  if (IS_CALL_RECURSION(NCALL(node))) {
2208  EncloseNode* en = NENCLOSE(NCALL(node)->target);
2209  if (IS_ENCLOSE_MIN_FIXED(en))
2210  *min = en->min_len;
2211  }
2212  else
2213  r = get_min_match_length(NCALL(node)->target, min, env);
2214  break;
2215 #endif
2216 
2217  case NT_LIST:
2218  do {
2219  r = get_min_match_length(NCAR(node), &tmin, env);
2220  if (r == 0) *min += tmin;
2221  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2222  break;
2223 
2224  case NT_ALT:
2225  {
2226  Node *x, *y;
2227  y = node;
2228  do {
2229  x = NCAR(y);
2230  r = get_min_match_length(x, &tmin, env);
2231  if (r != 0) break;
2232  if (y == node) *min = tmin;
2233  else if (*min > tmin) *min = tmin;
2234  } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2235  }
2236  break;
2237 
2238  case NT_STR:
2239  {
2240  StrNode* sn = NSTR(node);
2241  *min = sn->end - sn->s;
2242  }
2243  break;
2244 
2245  case NT_CTYPE:
2246  *min = 1;
2247  break;
2248 
2249  case NT_CCLASS:
2250  case NT_CANY:
2251  *min = 1;
2252  break;
2253 
2254  case NT_QTFR:
2255  {
2256  QtfrNode* qn = NQTFR(node);
2257 
2258  if (qn->lower > 0) {
2259  r = get_min_match_length(qn->target, min, env);
2260  if (r == 0)
2261  *min = distance_multiply(*min, qn->lower);
2262  }
2263  }
2264  break;
2265 
2266  case NT_ENCLOSE:
2267  {
2268  EncloseNode* en = NENCLOSE(node);
2269  switch (en->type) {
2270  case ENCLOSE_MEMORY:
2271  if (IS_ENCLOSE_MIN_FIXED(en))
2272  *min = en->min_len;
2273  else {
2274  if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2275  *min = 0; /* recursive */
2276  else {
2277  SET_ENCLOSE_STATUS(node, NST_MARK1);
2278  r = get_min_match_length(en->target, min, env);
2279  CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2280  if (r == 0) {
2281  en->min_len = *min;
2282  SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2283  }
2284  }
2285  }
2286  break;
2287 
2288  case ENCLOSE_OPTION:
2289  case ENCLOSE_STOP_BACKTRACK:
2290  case ENCLOSE_CONDITION:
2291  r = get_min_match_length(en->target, min, env);
2292  break;
2293 
2294  case ENCLOSE_ABSENT:
2295  break;
2296  }
2297  }
2298  break;
2299 
2300  case NT_ANCHOR:
2301  default:
2302  break;
2303  }
2304 
2305  return r;
2306 }
2307 
2308 static int
2309 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2310 {
2311  OnigDistance tmax;
2312  int r = 0;
2313 
2314  *max = 0;
2315  switch (NTYPE(node)) {
2316  case NT_LIST:
2317  do {
2318  r = get_max_match_length(NCAR(node), &tmax, env);
2319  if (r == 0)
2320  *max = distance_add(*max, tmax);
2321  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2322  break;
2323 
2324  case NT_ALT:
2325  do {
2326  r = get_max_match_length(NCAR(node), &tmax, env);
2327  if (r == 0 && *max < tmax) *max = tmax;
2328  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2329  break;
2330 
2331  case NT_STR:
2332  {
2333  StrNode* sn = NSTR(node);
2334  *max = sn->end - sn->s;
2335  }
2336  break;
2337 
2338  case NT_CTYPE:
2339  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2340  break;
2341 
2342  case NT_CCLASS:
2343  case NT_CANY:
2344  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2345  break;
2346 
2347  case NT_BREF:
2348  {
2349  int i;
2350  int* backs;
2351  Node** nodes = SCANENV_MEM_NODES(env);
2352  BRefNode* br = NBREF(node);
2353  if (br->state & NST_RECURSION) {
2354  *max = ONIG_INFINITE_DISTANCE;
2355  break;
2356  }
2357  backs = BACKREFS_P(br);
2358  for (i = 0; i < br->back_num; i++) {
2359  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2360  r = get_max_match_length(nodes[backs[i]], &tmax, env);
2361  if (r != 0) break;
2362  if (*max < tmax) *max = tmax;
2363  }
2364  }
2365  break;
2366 
2367 #ifdef USE_SUBEXP_CALL
2368  case NT_CALL:
2369  if (! IS_CALL_RECURSION(NCALL(node)))
2370  r = get_max_match_length(NCALL(node)->target, max, env);
2371  else
2372  *max = ONIG_INFINITE_DISTANCE;
2373  break;
2374 #endif
2375 
2376  case NT_QTFR:
2377  {
2378  QtfrNode* qn = NQTFR(node);
2379 
2380  if (qn->upper != 0) {
2381  r = get_max_match_length(qn->target, max, env);
2382  if (r == 0 && *max != 0) {
2383  if (! IS_REPEAT_INFINITE(qn->upper))
2384  *max = distance_multiply(*max, qn->upper);
2385  else
2386  *max = ONIG_INFINITE_DISTANCE;
2387  }
2388  }
2389  }
2390  break;
2391 
2392  case NT_ENCLOSE:
2393  {
2394  EncloseNode* en = NENCLOSE(node);
2395  switch (en->type) {
2396  case ENCLOSE_MEMORY:
2397  if (IS_ENCLOSE_MAX_FIXED(en))
2398  *max = en->max_len;
2399  else {
2400  if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2401  *max = ONIG_INFINITE_DISTANCE;
2402  else {
2403  SET_ENCLOSE_STATUS(node, NST_MARK1);
2404  r = get_max_match_length(en->target, max, env);
2405  CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2406  if (r == 0) {
2407  en->max_len = *max;
2408  SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2409  }
2410  }
2411  }
2412  break;
2413 
2414  case ENCLOSE_OPTION:
2415  case ENCLOSE_STOP_BACKTRACK:
2416  case ENCLOSE_CONDITION:
2417  r = get_max_match_length(en->target, max, env);
2418  break;
2419 
2420  case ENCLOSE_ABSENT:
2421  break;
2422  }
2423  }
2424  break;
2425 
2426  case NT_ANCHOR:
2427  default:
2428  break;
2429  }
2430 
2431  return r;
2432 }
2433 
2434 #define GET_CHAR_LEN_VARLEN -1
2435 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2436 
2437 /* fixed size pattern node only */
2438 static int
2439 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2440 {
2441  int tlen;
2442  int r = 0;
2443 
2444  level++;
2445  *len = 0;
2446  switch (NTYPE(node)) {
2447  case NT_LIST:
2448  do {
2449  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2450  if (r == 0)
2451  *len = (int )distance_add(*len, tlen);
2452  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2453  break;
2454 
2455  case NT_ALT:
2456  {
2457  int tlen2;
2458  int varlen = 0;
2459 
2460  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2461  while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2462  r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2463  if (r == 0) {
2464  if (tlen != tlen2)
2465  varlen = 1;
2466  }
2467  }
2468  if (r == 0) {
2469  if (varlen != 0) {
2470  if (level == 1)
2471  r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2472  else
2473  r = GET_CHAR_LEN_VARLEN;
2474  }
2475  else
2476  *len = tlen;
2477  }
2478  }
2479  break;
2480 
2481  case NT_STR:
2482  {
2483  StrNode* sn = NSTR(node);
2484  UChar *s = sn->s;
2485  while (s < sn->end) {
2486  s += enclen(reg->enc, s, sn->end);
2487  (*len)++;
2488  }
2489  }
2490  break;
2491 
2492  case NT_QTFR:
2493  {
2494  QtfrNode* qn = NQTFR(node);
2495  if (qn->lower == qn->upper) {
2496  r = get_char_length_tree1(qn->target, reg, &tlen, level);
2497  if (r == 0)
2498  *len = (int )distance_multiply(tlen, qn->lower);
2499  }
2500  else
2501  r = GET_CHAR_LEN_VARLEN;
2502  }
2503  break;
2504 
2505 #ifdef USE_SUBEXP_CALL
2506  case NT_CALL:
2507  if (! IS_CALL_RECURSION(NCALL(node)))
2508  r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2509  else
2510  r = GET_CHAR_LEN_VARLEN;
2511  break;
2512 #endif
2513 
2514  case NT_CTYPE:
2515  *len = 1;
2516  break;
2517 
2518  case NT_CCLASS:
2519  case NT_CANY:
2520  *len = 1;
2521  break;
2522 
2523  case NT_ENCLOSE:
2524  {
2525  EncloseNode* en = NENCLOSE(node);
2526  switch (en->type) {
2527  case ENCLOSE_MEMORY:
2528 #ifdef USE_SUBEXP_CALL
2529  if (IS_ENCLOSE_CLEN_FIXED(en))
2530  *len = en->char_len;
2531  else {
2532  r = get_char_length_tree1(en->target, reg, len, level);
2533  if (r == 0) {
2534  en->char_len = *len;
2535  SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2536  }
2537  }
2538  break;
2539 #endif
2540  case ENCLOSE_OPTION:
2541  case ENCLOSE_STOP_BACKTRACK:
2542  case ENCLOSE_CONDITION:
2543  r = get_char_length_tree1(en->target, reg, len, level);
2544  break;
2545  case ENCLOSE_ABSENT:
2546  default:
2547  break;
2548  }
2549  }
2550  break;
2551 
2552  case NT_ANCHOR:
2553  break;
2554 
2555  default:
2556  r = GET_CHAR_LEN_VARLEN;
2557  break;
2558  }
2559 
2560  return r;
2561 }
2562 
2563 static int
2564 get_char_length_tree(Node* node, regex_t* reg, int* len)
2565 {
2566  return get_char_length_tree1(node, reg, len, 0);
2567 }
2568 
2569 /* x is not included y ==> 1 : 0 */
2570 static int
2571 is_not_included(Node* x, Node* y, regex_t* reg)
2572 {
2573  int i;
2574  OnigDistance len;
2575  OnigCodePoint code;
2576  UChar *p;
2577  int ytype;
2578 
2579  retry:
2580  ytype = NTYPE(y);
2581  switch (NTYPE(x)) {
2582  case NT_CTYPE:
2583  {
2584  switch (ytype) {
2585  case NT_CTYPE:
2586  if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2587  NCTYPE(y)->not != NCTYPE(x)->not &&
2588  NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2589  return 1;
2590  else
2591  return 0;
2592  break;
2593 
2594  case NT_CCLASS:
2595  swap:
2596  {
2597  Node* tmp;
2598  tmp = x; x = y; y = tmp;
2599  goto retry;
2600  }
2601  break;
2602 
2603  case NT_STR:
2604  goto swap;
2605  break;
2606 
2607  default:
2608  break;
2609  }
2610  }
2611  break;
2612 
2613  case NT_CCLASS:
2614  {
2615  CClassNode* xc = NCCLASS(x);
2616  switch (ytype) {
2617  case NT_CTYPE:
2618  switch (NCTYPE(y)->ctype) {
2619  case ONIGENC_CTYPE_WORD:
2620  if (NCTYPE(y)->not == 0) {
2621  if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2622  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2623  if (BITSET_AT(xc->bs, i)) {
2624  if (NCTYPE(y)->ascii_range) {
2625  if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2626  }
2627  else {
2628  if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2629  }
2630  }
2631  }
2632  return 1;
2633  }
2634  return 0;
2635  }
2636  else {
2637  if (IS_NOT_NULL(xc->mbuf)) return 0;
2638  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2639  int is_word;
2640  if (NCTYPE(y)->ascii_range)
2641  is_word = IS_CODE_SB_WORD(reg->enc, i);
2642  else
2643  is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2644  if (! is_word) {
2645  if (!IS_NCCLASS_NOT(xc)) {
2646  if (BITSET_AT(xc->bs, i))
2647  return 0;
2648  }
2649  else {
2650  if (! BITSET_AT(xc->bs, i))
2651  return 0;
2652  }
2653  }
2654  }
2655  return 1;
2656  }
2657  break;
2658 
2659  default:
2660  break;
2661  }
2662  break;
2663 
2664  case NT_CCLASS:
2665  {
2666  int v;
2667  CClassNode* yc = NCCLASS(y);
2668 
2669  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2670  v = BITSET_AT(xc->bs, i);
2671  if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2672  (v == 0 && IS_NCCLASS_NOT(xc))) {
2673  v = BITSET_AT(yc->bs, i);
2674  if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2675  (v == 0 && IS_NCCLASS_NOT(yc)))
2676  return 0;
2677  }
2678  }
2679  if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2680  (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2681  return 1;
2682  return 0;
2683  }
2684  break;
2685 
2686  case NT_STR:
2687  goto swap;
2688  break;
2689 
2690  default:
2691  break;
2692  }
2693  }
2694  break;
2695 
2696  case NT_STR:
2697  {
2698  StrNode* xs = NSTR(x);
2699  if (NSTRING_LEN(x) == 0)
2700  break;
2701 
2702  switch (ytype) {
2703  case NT_CTYPE:
2704  switch (NCTYPE(y)->ctype) {
2705  case ONIGENC_CTYPE_WORD:
2706  if (NCTYPE(y)->ascii_range) {
2707  if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2708  return NCTYPE(y)->not;
2709  else
2710  return !(NCTYPE(y)->not);
2711  }
2712  else {
2713  if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2714  return NCTYPE(y)->not;
2715  else
2716  return !(NCTYPE(y)->not);
2717  }
2718  break;
2719  default:
2720  break;
2721  }
2722  break;
2723 
2724  case NT_CCLASS:
2725  {
2726  CClassNode* cc = NCCLASS(y);
2727 
2728  code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2729  xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2730  return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2731  }
2732  break;
2733 
2734  case NT_STR:
2735  {
2736  UChar *q;
2737  StrNode* ys = NSTR(y);
2738  len = NSTRING_LEN(x);
2739  if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2740  if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2741  /* tiny version */
2742  return 0;
2743  }
2744  else {
2745  for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2746  if (*p != *q) return 1;
2747  }
2748  }
2749  }
2750  break;
2751 
2752  default:
2753  break;
2754  }
2755  }
2756  break;
2757 
2758  default:
2759  break;
2760  }
2761 
2762  return 0;
2763 }
2764 
2765 static Node*
2766 get_head_value_node(Node* node, int exact, regex_t* reg)
2767 {
2768  Node* n = NULL_NODE;
2769 
2770  switch (NTYPE(node)) {
2771  case NT_BREF:
2772  case NT_ALT:
2773  case NT_CANY:
2774 #ifdef USE_SUBEXP_CALL
2775  case NT_CALL:
2776 #endif
2777  break;
2778 
2779  case NT_CTYPE:
2780  case NT_CCLASS:
2781  if (exact == 0) {
2782  n = node;
2783  }
2784  break;
2785 
2786  case NT_LIST:
2787  n = get_head_value_node(NCAR(node), exact, reg);
2788  break;
2789 
2790  case NT_STR:
2791  {
2792  StrNode* sn = NSTR(node);
2793 
2794  if (sn->end <= sn->s)
2795  break;
2796 
2797  if (exact != 0 &&
2798  !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2799  }
2800  else {
2801  n = node;
2802  }
2803  }
2804  break;
2805 
2806  case NT_QTFR:
2807  {
2808  QtfrNode* qn = NQTFR(node);
2809  if (qn->lower > 0) {
2810 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2811  if (IS_NOT_NULL(qn->head_exact))
2812  n = qn->head_exact;
2813  else
2814 #endif
2815  n = get_head_value_node(qn->target, exact, reg);
2816  }
2817  }
2818  break;
2819 
2820  case NT_ENCLOSE:
2821  {
2822  EncloseNode* en = NENCLOSE(node);
2823  switch (en->type) {
2824  case ENCLOSE_OPTION:
2825  {
2826  OnigOptionType options = reg->options;
2827 
2828  reg->options = NENCLOSE(node)->option;
2829  n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2830  reg->options = options;
2831  }
2832  break;
2833 
2834  case ENCLOSE_MEMORY:
2835  case ENCLOSE_STOP_BACKTRACK:
2836  case ENCLOSE_CONDITION:
2837  n = get_head_value_node(en->target, exact, reg);
2838  break;
2839 
2840  case ENCLOSE_ABSENT:
2841  break;
2842  }
2843  }
2844  break;
2845 
2846  case NT_ANCHOR:
2847  if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2848  n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2849  break;
2850 
2851  default:
2852  break;
2853  }
2854 
2855  return n;
2856 }
2857 
2858 static int
2859 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2860 {
2861  int type, r = 0;
2862 
2863  type = NTYPE(node);
2864  if ((NTYPE2BIT(type) & type_mask) == 0)
2865  return 1;
2866 
2867  switch (type) {
2868  case NT_LIST:
2869  case NT_ALT:
2870  do {
2871  r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2872  anchor_mask);
2873  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2874  break;
2875 
2876  case NT_QTFR:
2877  r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2878  anchor_mask);
2879  break;
2880 
2881  case NT_ENCLOSE:
2882  {
2883  EncloseNode* en = NENCLOSE(node);
2884  if ((en->type & enclose_mask) == 0)
2885  return 1;
2886 
2887  r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2888  }
2889  break;
2890 
2891  case NT_ANCHOR:
2892  type = NANCHOR(node)->type;
2893  if ((type & anchor_mask) == 0)
2894  return 1;
2895 
2896  if (NANCHOR(node)->target)
2897  r = check_type_tree(NANCHOR(node)->target,
2898  type_mask, enclose_mask, anchor_mask);
2899  break;
2900 
2901  default:
2902  break;
2903  }
2904  return r;
2905 }
2906 
2907 #ifdef USE_SUBEXP_CALL
2908 
2909 # define RECURSION_EXIST 1
2910 # define RECURSION_INFINITE 2
2911 
2912 static int
2913 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2914 {
2915  int type;
2916  int r = 0;
2917 
2918  type = NTYPE(node);
2919  switch (type) {
2920  case NT_LIST:
2921  {
2922  Node *x;
2923  OnigDistance min;
2924  int ret;
2925 
2926  x = node;
2927  do {
2928  ret = subexp_inf_recursive_check(NCAR(x), env, head);
2929  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2930  r |= ret;
2931  if (head) {
2932  ret = get_min_match_length(NCAR(x), &min, env);
2933  if (ret != 0) return ret;
2934  if (min != 0) head = 0;
2935  }
2936  } while (IS_NOT_NULL(x = NCDR(x)));
2937  }
2938  break;
2939 
2940  case NT_ALT:
2941  {
2942  int ret;
2943  r = RECURSION_EXIST;
2944  do {
2945  ret = subexp_inf_recursive_check(NCAR(node), env, head);
2946  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2947  r &= ret;
2948  } while (IS_NOT_NULL(node = NCDR(node)));
2949  }
2950  break;
2951 
2952  case NT_QTFR:
2953  r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2954  if (r == RECURSION_EXIST) {
2955  if (NQTFR(node)->lower == 0) r = 0;
2956  }
2957  break;
2958 
2959  case NT_ANCHOR:
2960  {
2961  AnchorNode* an = NANCHOR(node);
2962  switch (an->type) {
2963  case ANCHOR_PREC_READ:
2964  case ANCHOR_PREC_READ_NOT:
2965  case ANCHOR_LOOK_BEHIND:
2966  case ANCHOR_LOOK_BEHIND_NOT:
2967  r = subexp_inf_recursive_check(an->target, env, head);
2968  break;
2969  }
2970  }
2971  break;
2972 
2973  case NT_CALL:
2974  r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2975  break;
2976 
2977  case NT_ENCLOSE:
2978  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2979  return 0;
2980  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2981  return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2982  else {
2983  SET_ENCLOSE_STATUS(node, NST_MARK2);
2984  r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2985  CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2986  }
2987  break;
2988 
2989  default:
2990  break;
2991  }
2992 
2993  return r;
2994 }
2995 
2996 static int
2997 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2998 {
2999  int type;
3000  int r = 0;
3001 
3002  type = NTYPE(node);
3003  switch (type) {
3004  case NT_LIST:
3005  case NT_ALT:
3006  do {
3007  r = subexp_inf_recursive_check_trav(NCAR(node), env);
3008  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3009  break;
3010 
3011  case NT_QTFR:
3012  r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3013  break;
3014 
3015  case NT_ANCHOR:
3016  {
3017  AnchorNode* an = NANCHOR(node);
3018  switch (an->type) {
3019  case ANCHOR_PREC_READ:
3020  case ANCHOR_PREC_READ_NOT:
3021  case ANCHOR_LOOK_BEHIND:
3022  case ANCHOR_LOOK_BEHIND_NOT:
3023  r = subexp_inf_recursive_check_trav(an->target, env);
3024  break;
3025  }
3026  }
3027  break;
3028 
3029  case NT_ENCLOSE:
3030  {
3031  EncloseNode* en = NENCLOSE(node);
3032 
3033  if (IS_ENCLOSE_RECURSION(en)) {
3034  SET_ENCLOSE_STATUS(node, NST_MARK1);
3035  r = subexp_inf_recursive_check(en->target, env, 1);
3036  if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3037  CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3038  }
3039  r = subexp_inf_recursive_check_trav(en->target, env);
3040  }
3041 
3042  break;
3043 
3044  default:
3045  break;
3046  }
3047 
3048  return r;
3049 }
3050 
3051 static int
3052 subexp_recursive_check(Node* node)
3053 {
3054  int r = 0;
3055 
3056  switch (NTYPE(node)) {
3057  case NT_LIST:
3058  case NT_ALT:
3059  do {
3060  r |= subexp_recursive_check(NCAR(node));
3061  } while (IS_NOT_NULL(node = NCDR(node)));
3062  break;
3063 
3064  case NT_QTFR:
3065  r = subexp_recursive_check(NQTFR(node)->target);
3066  break;
3067 
3068  case NT_ANCHOR:
3069  {
3070  AnchorNode* an = NANCHOR(node);
3071  switch (an->type) {
3072  case ANCHOR_PREC_READ:
3073  case ANCHOR_PREC_READ_NOT:
3074  case ANCHOR_LOOK_BEHIND:
3075  case ANCHOR_LOOK_BEHIND_NOT:
3076  r = subexp_recursive_check(an->target);
3077  break;
3078  }
3079  }
3080  break;
3081 
3082  case NT_CALL:
3083  r = subexp_recursive_check(NCALL(node)->target);
3084  if (r != 0) SET_CALL_RECURSION(node);
3085  break;
3086 
3087  case NT_ENCLOSE:
3088  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3089  return 0;
3090  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3091  return 1; /* recursion */
3092  else {
3093  SET_ENCLOSE_STATUS(node, NST_MARK2);
3094  r = subexp_recursive_check(NENCLOSE(node)->target);
3095  CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3096  }
3097  break;
3098 
3099  default:
3100  break;
3101  }
3102 
3103  return r;
3104 }
3105 
3106 
3107 static int
3108 subexp_recursive_check_trav(Node* node, ScanEnv* env)
3109 {
3110 # define FOUND_CALLED_NODE 1
3111 
3112  int type;
3113  int r = 0;
3114 
3115  type = NTYPE(node);
3116  switch (type) {
3117  case NT_LIST:
3118  case NT_ALT:
3119  {
3120  int ret;
3121  do {
3122  ret = subexp_recursive_check_trav(NCAR(node), env);
3123  if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3124  else if (ret < 0) return ret;
3125  } while (IS_NOT_NULL(node = NCDR(node)));
3126  }
3127  break;
3128 
3129  case NT_QTFR:
3130  r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3131  if (NQTFR(node)->upper == 0) {
3132  if (r == FOUND_CALLED_NODE)
3133  NQTFR(node)->is_referred = 1;
3134  }
3135  break;
3136 
3137  case NT_ANCHOR:
3138  {
3139  AnchorNode* an = NANCHOR(node);
3140  switch (an->type) {
3141  case ANCHOR_PREC_READ:
3142  case ANCHOR_PREC_READ_NOT:
3143  case ANCHOR_LOOK_BEHIND:
3144  case ANCHOR_LOOK_BEHIND_NOT:
3145  r = subexp_recursive_check_trav(an->target, env);
3146  break;
3147  }
3148  }
3149  break;
3150 
3151  case NT_ENCLOSE:
3152  {
3153  EncloseNode* en = NENCLOSE(node);
3154 
3155  if (! IS_ENCLOSE_RECURSION(en)) {
3156  if (IS_ENCLOSE_CALLED(en)) {
3157  SET_ENCLOSE_STATUS(node, NST_MARK1);
3158  r = subexp_recursive_check(en->target);
3159  if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3160  CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3161  }
3162  }
3163  r = subexp_recursive_check_trav(en->target, env);
3164  if (IS_ENCLOSE_CALLED(en))
3165  r |= FOUND_CALLED_NODE;
3166  }
3167  break;
3168 
3169  default:
3170  break;
3171  }
3172 
3173  return r;
3174 }
3175 
3176 static int
3177 setup_subexp_call(Node* node, ScanEnv* env)
3178 {
3179  int type;
3180  int r = 0;
3181 
3182  type = NTYPE(node);
3183  switch (type) {
3184  case NT_LIST:
3185  do {
3186  r = setup_subexp_call(NCAR(node), env);
3187  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3188  break;
3189 
3190  case NT_ALT:
3191  do {
3192  r = setup_subexp_call(NCAR(node), env);
3193  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3194  break;
3195 
3196  case NT_QTFR:
3197  r = setup_subexp_call(NQTFR(node)->target, env);
3198  break;
3199  case NT_ENCLOSE:
3200  r = setup_subexp_call(NENCLOSE(node)->target, env);
3201  break;
3202 
3203  case NT_CALL:
3204  {
3205  CallNode* cn = NCALL(node);
3206  Node** nodes = SCANENV_MEM_NODES(env);
3207 
3208  if (cn->group_num != 0) {
3209  int gnum = cn->group_num;
3210 
3211 # ifdef USE_NAMED_GROUP
3212  if (env->num_named > 0 &&
3213  IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3214  !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3215  return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3216  }
3217 # endif
3218  if (gnum > env->num_mem) {
3219  onig_scan_env_set_error_string(env,
3220  ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3221  return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3222  }
3223 
3224 # ifdef USE_NAMED_GROUP
3225  set_call_attr:
3226 # endif
3227  cn->target = nodes[cn->group_num];
3228  if (IS_NULL(cn->target)) {
3229  onig_scan_env_set_error_string(env,
3230  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3231  return ONIGERR_UNDEFINED_NAME_REFERENCE;
3232  }
3233  SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3234  BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3235  cn->unset_addr_list = env->unset_addr_list;
3236  }
3237 # ifdef USE_NAMED_GROUP
3238 # ifdef USE_PERL_SUBEXP_CALL
3239  else if (cn->name == cn->name_end) {
3240  goto set_call_attr;
3241  }
3242 # endif
3243  else {
3244  int *refs;
3245 
3246  int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3247  &refs);
3248  if (n <= 0) {
3249  onig_scan_env_set_error_string(env,
3250  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3251  return ONIGERR_UNDEFINED_NAME_REFERENCE;
3252  }
3253  else if (n > 1 &&
3254  ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3255  onig_scan_env_set_error_string(env,
3256  ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3257  return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3258  }
3259  else {
3260  cn->group_num = refs[0];
3261  goto set_call_attr;
3262  }
3263  }
3264 # endif
3265  }
3266  break;
3267 
3268  case NT_ANCHOR:
3269  {
3270  AnchorNode* an = NANCHOR(node);
3271 
3272  switch (an->type) {
3273  case ANCHOR_PREC_READ:
3274  case ANCHOR_PREC_READ_NOT:
3275  case ANCHOR_LOOK_BEHIND:
3276  case ANCHOR_LOOK_BEHIND_NOT:
3277  r = setup_subexp_call(an->target, env);
3278  break;
3279  }
3280  }
3281  break;
3282 
3283  default:
3284  break;
3285  }
3286 
3287  return r;
3288 }
3289 #endif
3290 
3291 /* divide different length alternatives in look-behind.
3292  (?<=A|B) ==> (?<=A)|(?<=B)
3293  (?<!A|B) ==> (?<!A)(?<!B)
3294 */
3295 static int
3296 divide_look_behind_alternatives(Node* node)
3297 {
3298  Node *head, *np, *insert_node;
3299  AnchorNode* an = NANCHOR(node);
3300  int anc_type = an->type;
3301 
3302  head = an->target;
3303  np = NCAR(head);
3304  swap_node(node, head);
3305  NCAR(node) = head;
3306  NANCHOR(head)->target = np;
3307 
3308  np = node;
3309  while ((np = NCDR(np)) != NULL_NODE) {
3310  insert_node = onig_node_new_anchor(anc_type);
3311  CHECK_NULL_RETURN_MEMERR(insert_node);
3312  NANCHOR(insert_node)->target = NCAR(np);
3313  NCAR(np) = insert_node;
3314  }
3315 
3316  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3317  np = node;
3318  do {
3319  SET_NTYPE(np, NT_LIST); /* alt -> list */
3320  } while ((np = NCDR(np)) != NULL_NODE);
3321  }
3322  return 0;
3323 }
3324 
3325 static int
3326 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3327 {
3328  int r, len;
3329  AnchorNode* an = NANCHOR(node);
3330 
3331  r = get_char_length_tree(an->target, reg, &len);
3332  if (r == 0)
3333  an->char_len = len;
3334  else if (r == GET_CHAR_LEN_VARLEN)
3335  r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3336  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3337  if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3338  r = divide_look_behind_alternatives(node);
3339  else
3340  r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3341  }
3342 
3343  return r;
3344 }
3345 
3346 static int
3347 next_setup(Node* node, Node* next_node, regex_t* reg)
3348 {
3349  int type;
3350 
3351  retry:
3352  type = NTYPE(node);
3353  if (type == NT_QTFR) {
3354  QtfrNode* qn = NQTFR(node);
3355  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3356 #ifdef USE_QTFR_PEEK_NEXT
3357  Node* n = get_head_value_node(next_node, 1, reg);
3358  /* '\0': for UTF-16BE etc... */
3359  if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3360  qn->next_head_exact = n;
3361  }
3362 #endif
3363  /* automatic possessification a*b ==> (?>a*)b */
3364  if (qn->lower <= 1) {
3365  int ttype = NTYPE(qn->target);
3366  if (IS_NODE_TYPE_SIMPLE(ttype)) {
3367  Node *x, *y;
3368  x = get_head_value_node(qn->target, 0, reg);
3369  if (IS_NOT_NULL(x)) {
3370  y = get_head_value_node(next_node, 0, reg);
3371  if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3372  Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3373  CHECK_NULL_RETURN_MEMERR(en);
3374  SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3375  swap_node(node, en);
3376  NENCLOSE(node)->target = en;
3377  }
3378  }
3379  }
3380  }
3381  }
3382  }
3383  else if (type == NT_ENCLOSE) {
3384  EncloseNode* en = NENCLOSE(node);
3385  if (en->type == ENCLOSE_MEMORY) {
3386  node = en->target;
3387  goto retry;
3388  }
3389  }
3390  return 0;
3391 }
3392 
3393 
3394 static int
3395 update_string_node_case_fold(regex_t* reg, Node *node)
3396 {
3397  UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3398  UChar *sbuf, *ebuf, *sp;
3399  int r, i, len;
3400  OnigDistance sbuf_size;
3401  StrNode* sn = NSTR(node);
3402 
3403  end = sn->end;
3404  sbuf_size = (end - sn->s) * 2;
3405  sbuf = (UChar* )xmalloc(sbuf_size);
3406  CHECK_NULL_RETURN_MEMERR(sbuf);
3407  ebuf = sbuf + sbuf_size;
3408 
3409  sp = sbuf;
3410  p = sn->s;
3411  while (p < end) {
3412  len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3413  for (i = 0; i < len; i++) {
3414  if (sp >= ebuf) {
3415  UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3416  if (IS_NULL(p)) {
3417  xfree(sbuf);
3418  return ONIGERR_MEMORY;
3419  }
3420  sbuf = p;
3421  sp = sbuf + sbuf_size;
3422  sbuf_size *= 2;
3423  ebuf = sbuf + sbuf_size;
3424  }
3425 
3426  *sp++ = buf[i];
3427  }
3428  }
3429 
3430  r = onig_node_str_set(node, sbuf, sp);
3431 
3432  xfree(sbuf);
3433  return r;
3434 }
3435 
3436 static int
3437 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3438  regex_t* reg)
3439 {
3440  int r;
3441  Node *node;
3442 
3443  node = onig_node_new_str(s, end);
3444  if (IS_NULL(node)) return ONIGERR_MEMORY;
3445 
3446  r = update_string_node_case_fold(reg, node);
3447  if (r != 0) {
3448  onig_node_free(node);
3449  return r;
3450  }
3451 
3452  NSTRING_SET_AMBIG(node);
3453  NSTRING_SET_DONT_GET_OPT_INFO(node);
3454  *rnode = node;
3455  return 0;
3456 }
3457 
3458 static int
3459 is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3460  int slen)
3461 {
3462  int i;
3463 
3464  for (i = 0; i < item_num; i++) {
3465  if (items[i].byte_len != slen) {
3466  return 1;
3467  }
3468  if (items[i].code_len != 1) {
3469  return 1;
3470  }
3471  }
3472  return 0;
3473 }
3474 
3475 static int
3476 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3477  UChar *p, int slen, UChar *end,
3478  regex_t* reg, Node **rnode)
3479 {
3480  int r, i, j, len, varlen;
3481  Node *anode, *var_anode, *snode, *xnode, *an;
3482  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3483 
3484  *rnode = var_anode = NULL_NODE;
3485 
3486  varlen = 0;
3487  for (i = 0; i < item_num; i++) {
3488  if (items[i].byte_len != slen) {
3489  varlen = 1;
3490  break;
3491  }
3492  }
3493 
3494  if (varlen != 0) {
3495  *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3496  if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3497 
3498  xnode = onig_node_new_list(NULL, NULL);
3499  if (IS_NULL(xnode)) goto mem_err;
3500  NCAR(var_anode) = xnode;
3501 
3502  anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3503  if (IS_NULL(anode)) goto mem_err;
3504  NCAR(xnode) = anode;
3505  }
3506  else {
3507  *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3508  if (IS_NULL(anode)) return ONIGERR_MEMORY;
3509  }
3510 
3511  snode = onig_node_new_str(p, p + slen);
3512  if (IS_NULL(snode)) goto mem_err;
3513 
3514  NCAR(anode) = snode;
3515 
3516  for (i = 0; i < item_num; i++) {
3517  snode = onig_node_new_str(NULL, NULL);
3518  if (IS_NULL(snode)) goto mem_err;
3519 
3520  for (j = 0; j < items[i].code_len; j++) {
3521  len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3522  if (len < 0) {
3523  r = len;
3524  goto mem_err2;
3525  }
3526 
3527  r = onig_node_str_cat(snode, buf, buf + len);
3528  if (r != 0) goto mem_err2;
3529  }
3530 
3531  an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3532  if (IS_NULL(an)) {
3533  goto mem_err2;
3534  }
3535 
3536  if (items[i].byte_len != slen) {
3537  Node *rem;
3538  UChar *q = p + items[i].byte_len;
3539 
3540  if (q < end) {
3541  r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3542  if (r != 0) {
3543  onig_node_free(an);
3544  goto mem_err2;
3545  }
3546 
3547  xnode = onig_node_list_add(NULL_NODE, snode);
3548  if (IS_NULL(xnode)) {
3549  onig_node_free(an);
3550  onig_node_free(rem);
3551  goto mem_err2;
3552  }
3553  if (IS_NULL(onig_node_list_add(xnode, rem))) {
3554  onig_node_free(an);
3555  onig_node_free(xnode);
3556  onig_node_free(rem);
3557  goto mem_err;
3558  }
3559 
3560  NCAR(an) = xnode;
3561  }
3562  else {
3563  NCAR(an) = snode;
3564  }
3565 
3566  NCDR(var_anode) = an;
3567  var_anode = an;
3568  }
3569  else {
3570  NCAR(an) = snode;
3571  NCDR(anode) = an;
3572  anode = an;
3573  }
3574  }
3575 
3576  return varlen;
3577 
3578  mem_err2:
3579  onig_node_free(snode);
3580 
3581  mem_err:
3582  onig_node_free(*rnode);
3583 
3584  return ONIGERR_MEMORY;
3585 }
3586 
3587 static int
3588 expand_case_fold_string(Node* node, regex_t* reg)
3589 {
3590 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3591 
3592  int r, n, len, alt_num;
3593  int varlen = 0;
3594  UChar *start, *end, *p;
3595  Node *top_root, *root, *snode, *prev_node;
3596  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3597  StrNode* sn = NSTR(node);
3598 
3599  if (NSTRING_IS_AMBIG(node)) return 0;
3600 
3601  start = sn->s;
3602  end = sn->end;
3603  if (start >= end) return 0;
3604 
3605  r = 0;
3606  top_root = root = prev_node = snode = NULL_NODE;
3607  alt_num = 1;
3608  p = start;
3609  while (p < end) {
3610  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3611  p, end, items);
3612  if (n < 0) {
3613  r = n;
3614  goto err;
3615  }
3616 
3617  len = enclen(reg->enc, p, end);
3618 
3619  varlen = is_case_fold_variable_len(n, items, len);
3620  if (n == 0 || varlen == 0) {
3621  if (IS_NULL(snode)) {
3622  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3623  onig_node_free(top_root);
3624  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3625  if (IS_NULL(root)) {
3626  onig_node_free(prev_node);
3627  goto mem_err;
3628  }
3629  }
3630 
3631  prev_node = snode = onig_node_new_str(NULL, NULL);
3632  if (IS_NULL(snode)) goto mem_err;
3633  if (IS_NOT_NULL(root)) {
3634  if (IS_NULL(onig_node_list_add(root, snode))) {
3635  onig_node_free(snode);
3636  goto mem_err;
3637  }
3638  }
3639  }
3640 
3641  r = onig_node_str_cat(snode, p, p + len);
3642  if (r != 0) goto err;
3643  }
3644  else {
3645  alt_num *= (n + 1);
3646  if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3647 
3648  if (IS_NOT_NULL(snode)) {
3649  r = update_string_node_case_fold(reg, snode);
3650  if (r == 0) {
3651  NSTRING_SET_AMBIG(snode);
3652  }
3653  }
3654  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3655  onig_node_free(top_root);
3656  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3657  if (IS_NULL(root)) {
3658  onig_node_free(prev_node);
3659  goto mem_err;
3660  }
3661  }
3662 
3663  r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3664  if (r < 0) goto mem_err;
3665  if (r == 1) {
3666  if (IS_NULL(root)) {
3667  top_root = prev_node;
3668  }
3669  else {
3670  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3671  onig_node_free(prev_node);
3672  goto mem_err;
3673  }
3674  }
3675 
3676  root = NCAR(prev_node);
3677  }
3678  else { /* r == 0 */
3679  if (IS_NOT_NULL(root)) {
3680  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3681  onig_node_free(prev_node);
3682  goto mem_err;
3683  }
3684  }
3685  }
3686 
3687  snode = NULL_NODE;
3688  }
3689 
3690  p += len;
3691  }
3692  if (IS_NOT_NULL(snode)) {
3693  r = update_string_node_case_fold(reg, snode);
3694  if (r == 0) {
3695  NSTRING_SET_AMBIG(snode);
3696  }
3697  }
3698 
3699  if (p < end) {
3700  Node *srem;
3701 
3702  r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3703  if (r != 0) goto mem_err;
3704 
3705  if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3706  onig_node_free(top_root);
3707  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3708  if (IS_NULL(root)) {
3709  onig_node_free(srem);
3710  onig_node_free(prev_node);
3711  goto mem_err;
3712  }
3713  }
3714 
3715  if (IS_NULL(root)) {
3716  prev_node = srem;
3717  }
3718  else {
3719  if (IS_NULL(onig_node_list_add(root, srem))) {
3720  onig_node_free(srem);
3721  goto mem_err;
3722  }
3723  }
3724  }
3725 
3726  /* ending */
3727  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3728  swap_node(node, top_root);
3729  onig_node_free(top_root);
3730  return 0;
3731 
3732  mem_err:
3733  r = ONIGERR_MEMORY;
3734 
3735  err:
3736  onig_node_free(top_root);
3737  return r;
3738 }
3739 
3740 
3741 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3742 
3743 # define CEC_THRES_NUM_BIG_REPEAT 512
3744 # define CEC_INFINITE_NUM 0x7fffffff
3745 
3746 # define CEC_IN_INFINITE_REPEAT (1<<0)
3747 # define CEC_IN_FINITE_REPEAT (1<<1)
3748 # define CEC_CONT_BIG_REPEAT (1<<2)
3749 
3750 static int
3751 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3752 {
3753  int type;
3754  int r = state;
3755 
3756  type = NTYPE(node);
3757  switch (type) {
3758  case NT_LIST:
3759  {
3760  do {
3761  r = setup_comb_exp_check(NCAR(node), r, env);
3762  } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3763  }
3764  break;
3765 
3766  case NT_ALT:
3767  {
3768  int ret;
3769  do {
3770  ret = setup_comb_exp_check(NCAR(node), state, env);
3771  r |= ret;
3772  } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3773  }
3774  break;
3775 
3776  case NT_QTFR:
3777  {
3778  int child_state = state;
3779  int add_state = 0;
3780  QtfrNode* qn = NQTFR(node);
3781  Node* target = qn->target;
3782  int var_num;
3783 
3784  if (! IS_REPEAT_INFINITE(qn->upper)) {
3785  if (qn->upper > 1) {
3786  /* {0,1}, {1,1} are allowed */
3787  child_state |= CEC_IN_FINITE_REPEAT;
3788 
3789  /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3790  if (env->backrefed_mem == 0) {
3791  if (NTYPE(qn->target) == NT_ENCLOSE) {
3792  EncloseNode* en = NENCLOSE(qn->target);
3793  if (en->type == ENCLOSE_MEMORY) {
3794  if (NTYPE(en->target) == NT_QTFR) {
3795  QtfrNode* q = NQTFR(en->target);
3796  if (IS_REPEAT_INFINITE(q->upper)
3797  && q->greedy == qn->greedy) {
3798  qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3799  if (qn->upper == 1)
3800  child_state = state;
3801  }
3802  }
3803  }
3804  }
3805  }
3806  }
3807  }
3808 
3809  if (state & CEC_IN_FINITE_REPEAT) {
3810  qn->comb_exp_check_num = -1;
3811  }
3812  else {
3813  if (IS_REPEAT_INFINITE(qn->upper)) {
3814  var_num = CEC_INFINITE_NUM;
3815  child_state |= CEC_IN_INFINITE_REPEAT;
3816  }
3817  else {
3818  var_num = qn->upper - qn->lower;
3819  }
3820 
3821  if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3822  add_state |= CEC_CONT_BIG_REPEAT;
3823 
3824  if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3825  ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3826  var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3827  if (qn->comb_exp_check_num == 0) {
3828  env->num_comb_exp_check++;
3829  qn->comb_exp_check_num = env->num_comb_exp_check;
3830  if (env->curr_max_regnum > env->comb_exp_max_regnum)
3831  env->comb_exp_max_regnum = env->curr_max_regnum;
3832  }
3833  }
3834  }
3835 
3836  r = setup_comb_exp_check(target, child_state, env);
3837  r |= add_state;
3838  }
3839  break;
3840 
3841  case NT_ENCLOSE:
3842  {
3843  EncloseNode* en = NENCLOSE(node);
3844 
3845  switch (en->type) {
3846  case ENCLOSE_MEMORY:
3847  {
3848  if (env->curr_max_regnum < en->regnum)
3849  env->curr_max_regnum = en->regnum;
3850 
3851  r = setup_comb_exp_check(en->target, state, env);
3852  }
3853  break;
3854 
3855  default:
3856  r = setup_comb_exp_check(en->target, state, env);
3857  break;
3858  }
3859  }
3860  break;
3861 
3862 # ifdef USE_SUBEXP_CALL
3863  case NT_CALL:
3864  if (IS_CALL_RECURSION(NCALL(node)))
3865  env->has_recursion = 1;
3866  else
3867  r = setup_comb_exp_check(NCALL(node)->target, state, env);
3868  break;
3869 # endif
3870 
3871  default:
3872  break;
3873  }
3874 
3875  return r;
3876 }
3877 #endif
3878 
3879 #define IN_ALT (1<<0)
3880 #define IN_NOT (1<<1)
3881 #define IN_REPEAT (1<<2)
3882 #define IN_VAR_REPEAT (1<<3)
3883 #define IN_CALL (1<<4)
3884 #define IN_RECCALL (1<<5)
3885 
3886 /* setup_tree does the following work.
3887  1. check empty loop. (set qn->target_empty_info)
3888  2. expand ignore-case in char class.
3889  3. set memory status bit flags. (reg->mem_stats)
3890  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3891  5. find invalid patterns in look-behind.
3892  6. expand repeated string.
3893  */
3894 static int
3895 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3896 {
3897  int type;
3898  int r = 0;
3899 
3900 restart:
3901  type = NTYPE(node);
3902  switch (type) {
3903  case NT_LIST:
3904  {
3905  Node* prev = NULL_NODE;
3906  do {
3907  r = setup_tree(NCAR(node), reg, state, env);
3908  if (IS_NOT_NULL(prev) && r == 0) {
3909  r = next_setup(prev, NCAR(node), reg);
3910  }
3911  prev = NCAR(node);
3912  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3913  }
3914  break;
3915 
3916  case NT_ALT:
3917  do {
3918  r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3919  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3920  break;
3921 
3922  case NT_CCLASS:
3923  break;
3924 
3925  case NT_STR:
3926  if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3927  r = expand_case_fold_string(node, reg);
3928  }
3929  break;
3930 
3931  case NT_CTYPE:
3932  case NT_CANY:
3933  break;
3934 
3935 #ifdef USE_SUBEXP_CALL
3936  case NT_CALL:
3937  break;
3938 #endif
3939 
3940  case NT_BREF:
3941  {
3942  int i;
3943  int* p;
3944  Node** nodes = SCANENV_MEM_NODES(env);
3945  BRefNode* br = NBREF(node);
3946  p = BACKREFS_P(br);
3947  for (i = 0; i < br->back_num; i++) {
3948  if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3949  BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3950  BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3951 #ifdef USE_BACKREF_WITH_LEVEL
3952  if (IS_BACKREF_NEST_LEVEL(br)) {
3953  BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3954  }
3955 #endif
3956  SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3957  }
3958  }
3959  break;
3960 
3961  case NT_QTFR:
3962  {
3963  OnigDistance d;
3964  QtfrNode* qn = NQTFR(node);
3965  Node* target = qn->target;
3966 
3967  if ((state & IN_REPEAT) != 0) {
3968  qn->state |= NST_IN_REPEAT;
3969  }
3970 
3971  if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3972  r = get_min_match_length(target, &d, env);
3973  if (r) break;
3974  if (d == 0) {
3975  qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3976 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3977  r = quantifiers_memory_node_info(target);
3978  if (r < 0) break;
3979  if (r > 0) {
3980  qn->target_empty_info = r;
3981  }
3982 #endif
3983 #if 0
3984  r = get_max_match_length(target, &d, env);
3985  if (r == 0 && d == 0) {
3986  /* ()* ==> ()?, ()+ ==> () */
3987  qn->upper = 1;
3988  if (qn->lower > 1) qn->lower = 1;
3989  if (NTYPE(target) == NT_STR) {
3990  qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3991  }
3992  }
3993 #endif
3994  }
3995  }
3996 
3997  state |= IN_REPEAT;
3998  if (qn->lower != qn->upper)
3999  state |= IN_VAR_REPEAT;
4000  r = setup_tree(target, reg, state, env);
4001  if (r) break;
4002 
4003  /* expand string */
4004 #define EXPAND_STRING_MAX_LENGTH 100
4005  if (NTYPE(target) == NT_STR) {
4006  if (qn->lower > 1) {
4007  int i, n = qn->lower;
4008  OnigDistance len = NSTRING_LEN(target);
4009  StrNode* sn = NSTR(target);
4010  Node* np;
4011 
4012  np = onig_node_new_str(sn->s, sn->end);
4013  if (IS_NULL(np)) return ONIGERR_MEMORY;
4014  NSTR(np)->flag = sn->flag;
4015 
4016  for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4017  r = onig_node_str_cat(np, sn->s, sn->end);
4018  if (r) {
4019  onig_node_free(np);
4020  return r;
4021  }
4022  }
4023  if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4024  Node *np1, *np2;
4025 
4026  qn->lower -= i;
4027  if (! IS_REPEAT_INFINITE(qn->upper))
4028  qn->upper -= i;
4029 
4030  np1 = onig_node_new_list(np, NULL);
4031  if (IS_NULL(np1)) {
4032  onig_node_free(np);
4033  return ONIGERR_MEMORY;
4034  }
4035  swap_node(np1, node);
4036  np2 = onig_node_list_add(node, np1);
4037  if (IS_NULL(np2)) {
4038  onig_node_free(np1);
4039  return ONIGERR_MEMORY;
4040  }
4041  }
4042  else {
4043  swap_node(np, node);
4044  onig_node_free(np);
4045  }
4046  break; /* break case NT_QTFR: */
4047  }
4048  }
4049 
4050 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4051  if (qn->greedy && (qn->target_empty_info != 0)) {
4052  if (NTYPE(target) == NT_QTFR) {
4053  QtfrNode* tqn = NQTFR(target);
4054  if (IS_NOT_NULL(tqn->head_exact)) {
4055  qn->head_exact = tqn->head_exact;
4056  tqn->head_exact = NULL;
4057  }
4058  }
4059  else {
4060  qn->head_exact = get_head_value_node(qn->target, 1, reg);
4061  }
4062  }
4063 #endif
4064  }
4065  break;
4066 
4067  case NT_ENCLOSE:
4068  {
4069  EncloseNode* en = NENCLOSE(node);
4070 
4071  switch (en->type) {
4072  case ENCLOSE_OPTION:
4073  {
4074  OnigOptionType options = reg->options;
4075  reg->options = NENCLOSE(node)->option;
4076  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4077  reg->options = options;
4078  }
4079  break;
4080 
4081  case ENCLOSE_MEMORY:
4082  if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4083  BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4084  /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4085  }
4086  if (IS_ENCLOSE_CALLED(en))
4087  state |= IN_CALL;
4088  if (IS_ENCLOSE_RECURSION(en))
4089  state |= IN_RECCALL;
4090  else if ((state & IN_RECCALL) != 0)
4091  SET_CALL_RECURSION(node);
4092  r = setup_tree(en->target, reg, state, env);
4093  break;
4094 
4095  case ENCLOSE_STOP_BACKTRACK:
4096  {
4097  Node* target = en->target;
4098  r = setup_tree(target, reg, state, env);
4099  if (NTYPE(target) == NT_QTFR) {
4100  QtfrNode* tqn = NQTFR(target);
4101  if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4102  tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4103  int qtype = NTYPE(tqn->target);
4104  if (IS_NODE_TYPE_SIMPLE(qtype))
4105  SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4106  }
4107  }
4108  }
4109  break;
4110 
4111  case ENCLOSE_CONDITION:
4112 #ifdef USE_NAMED_GROUP
4113  if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4114  env->num_named > 0 &&
4115  IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4116  !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4117  return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4118  }
4119 #endif
4120  if (NENCLOSE(node)->regnum > env->num_mem)
4121  return ONIGERR_INVALID_BACKREF;
4122  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4123  break;
4124 
4125  case ENCLOSE_ABSENT:
4126  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4127  break;
4128  }
4129  }
4130  break;
4131 
4132  case NT_ANCHOR:
4133  {
4134  AnchorNode* an = NANCHOR(node);
4135 
4136  switch (an->type) {
4137  case ANCHOR_PREC_READ:
4138  r = setup_tree(an->target, reg, state, env);
4139  break;
4140  case ANCHOR_PREC_READ_NOT:
4141  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4142  break;
4143 
4144 /* allowed node types in look-behind */
4145 #define ALLOWED_TYPE_IN_LB \
4146  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4147  BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4148 
4149 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4150 #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4151 
4152 #define ALLOWED_ANCHOR_IN_LB \
4153 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4154  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4155  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4156  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4157 #define ALLOWED_ANCHOR_IN_LB_NOT \
4158 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4159  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4160  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4161  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4162 
4163  case ANCHOR_LOOK_BEHIND:
4164  {
4165  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4166  ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4167  if (r < 0) return r;
4168  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4169  if (NTYPE(node) != NT_ANCHOR) goto restart;
4170  r = setup_tree(an->target, reg, state, env);
4171  if (r != 0) return r;
4172  r = setup_look_behind(node, reg, env);
4173  }
4174  break;
4175 
4176  case ANCHOR_LOOK_BEHIND_NOT:
4177  {
4178  r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4179  ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4180  if (r < 0) return r;
4181  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4182  if (NTYPE(node) != NT_ANCHOR) goto restart;
4183  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4184  if (r != 0) return r;
4185  r = setup_look_behind(node, reg, env);
4186  }
4187  break;
4188  }
4189  }
4190  break;
4191 
4192  default:
4193  break;
4194  }
4195 
4196  return r;
4197 }
4198 
4199 #ifndef USE_SUNDAY_QUICK_SEARCH
4200 /* set skip map for Boyer-Moore search */
4201 static int
4202 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4203  UChar skip[], int** int_skip, int ignore_case)
4204 {
4205  OnigDistance i, len;
4206  int clen, flen, n, j, k;
4207  UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4208  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4209  OnigEncoding enc = reg->enc;
4210 
4211  len = end - s;
4212  if (len < ONIG_CHAR_TABLE_SIZE) {
4213  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4214 
4215  n = 0;
4216  for (i = 0; i < len - 1; i += clen) {
4217  p = s + i;
4218  if (ignore_case)
4219  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4220  p, end, items);
4221  clen = enclen(enc, p, end);
4222  if (p + clen > end)
4223  clen = (int )(end - p);
4224 
4225  for (j = 0; j < n; j++) {
4226  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4227  return 1; /* different length isn't supported. */
4228  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4229  if (flen != clen)
4230  return 1; /* different length isn't supported. */
4231  }
4232  for (j = 0; j < clen; j++) {
4233  skip[s[i + j]] = (UChar )(len - 1 - i - j);
4234  for (k = 0; k < n; k++) {
4235  skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4236  }
4237  }
4238  }
4239  }
4240  else {
4241 # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4242  /* This should not happen. */
4243  return ONIGERR_TYPE_BUG;
4244 # else
4245  if (IS_NULL(*int_skip)) {
4246  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4247  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4248  }
4249  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4250 
4251  n = 0;
4252  for (i = 0; i < len - 1; i += clen) {
4253  p = s + i;
4254  if (ignore_case)
4255  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4256  p, end, items);
4257  clen = enclen(enc, p, end);
4258  if (p + clen > end)
4259  clen = (int )(end - p);
4260 
4261  for (j = 0; j < n; j++) {
4262  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4263  return 1; /* different length isn't supported. */
4264  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4265  if (flen != clen)
4266  return 1; /* different length isn't supported. */
4267  }
4268  for (j = 0; j < clen; j++) {
4269  (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4270  for (k = 0; k < n; k++) {
4271  (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4272  }
4273  }
4274  }
4275 # endif
4276  }
4277  return 0;
4278 }
4279 
4280 #else /* USE_SUNDAY_QUICK_SEARCH */
4281 
4282 /* set skip map for Sunday's quick search */
4283 static int
4284 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4285  UChar skip[], int** int_skip, int ignore_case)
4286 {
4287  OnigDistance i, len;
4288  int clen, flen, n, j, k;
4289  UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4290  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4291  OnigEncoding enc = reg->enc;
4292 
4293  len = end - s;
4294  if (len < ONIG_CHAR_TABLE_SIZE) {
4295  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4296 
4297  n = 0;
4298  for (i = 0; i < len; i += clen) {
4299  p = s + i;
4300  if (ignore_case)
4301  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4302  p, end, items);
4303  clen = enclen(enc, p, end);
4304  if (p + clen > end)
4305  clen = (int )(end - p);
4306 
4307  for (j = 0; j < n; j++) {
4308  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4309  return 1; /* different length isn't supported. */
4310  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4311  if (flen != clen)
4312  return 1; /* different length isn't supported. */
4313  }
4314  for (j = 0; j < clen; j++) {
4315  skip[s[i + j]] = (UChar )(len - i - j);
4316  for (k = 0; k < n; k++) {
4317  skip[buf[k][j]] = (UChar )(len - i - j);
4318  }
4319  }
4320  }
4321  }
4322  else {
4323 # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4324  /* This should not happen. */
4325  return ONIGERR_TYPE_BUG;
4326 # else
4327  if (IS_NULL(*int_skip)) {
4328  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4329  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4330  }
4331  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4332 
4333  n = 0;
4334  for (i = 0; i < len; i += clen) {
4335  p = s + i;
4336  if (ignore_case)
4337  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4338  p, end, items);
4339  clen = enclen(enc, p, end);
4340  if (p + clen > end)
4341  clen = (int )(end - p);
4342 
4343  for (j = 0; j < n; j++) {
4344  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4345  return 1; /* different length isn't supported. */
4346  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4347  if (flen != clen)
4348  return 1; /* different length isn't supported. */
4349  }
4350  for (j = 0; j < clen; j++) {
4351  (*int_skip)[s[i + j]] = (int )(len - i - j);
4352  for (k = 0; k < n; k++) {
4353  (*int_skip)[buf[k][j]] = (int )(len - i - j);
4354  }
4355  }
4356  }
4357 # endif
4358  }
4359  return 0;
4360 }
4361 #endif /* USE_SUNDAY_QUICK_SEARCH */
4362 
4363 typedef struct {
4364  OnigDistance min; /* min byte length */
4365  OnigDistance max; /* max byte length */
4366 } MinMaxLen;
4367 
4368 typedef struct {
4369  MinMaxLen mmd;
4370  OnigEncoding enc;
4371  OnigOptionType options;
4372  OnigCaseFoldType case_fold_flag;
4373  ScanEnv* scan_env;
4374 } OptEnv;
4375 
4376 typedef struct {
4377  int left_anchor;
4378  int right_anchor;
4379 } OptAncInfo;
4380 
4381 typedef struct {
4382  MinMaxLen mmd; /* info position */
4383  OptAncInfo anc;
4384 
4385  int reach_end;
4386  int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4387  int len;
4388  UChar s[OPT_EXACT_MAXLEN];
4389 } OptExactInfo;
4390 
4391 typedef struct {
4392  MinMaxLen mmd; /* info position */
4393  OptAncInfo anc;
4394 
4395  int value; /* weighted value */
4396  UChar map[ONIG_CHAR_TABLE_SIZE];
4397 } OptMapInfo;
4398 
4399 typedef struct {
4400  MinMaxLen len;
4401 
4402  OptAncInfo anc;
4403  OptExactInfo exb; /* boundary */
4404  OptExactInfo exm; /* middle */
4405  OptExactInfo expr; /* prec read (?=...) */
4406 
4407  OptMapInfo map; /* boundary */
4408 } NodeOptInfo;
4409 
4410 
4411 static int
4412 map_position_value(OnigEncoding enc, int i)
4413 {
4414  static const short int ByteValTable[] = {
4415  5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4416  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4417  12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4418  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4419  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4420  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4421  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4422  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4423  };
4424 
4425  if (i < numberof(ByteValTable)) {
4426  if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4427  return 20;
4428  else
4429  return (int )ByteValTable[i];
4430  }
4431  else
4432  return 4; /* Take it easy. */
4433 }
4434 
4435 static int
4436 distance_value(MinMaxLen* mm)
4437 {
4438  /* 1000 / (min-max-dist + 1) */
4439  static const short int dist_vals[] = {
4440  1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4441  91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4442  48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4443  32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4444  24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4445  20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4446  16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4447  14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4448  12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4449  11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4450  };
4451 
4452  OnigDistance d;
4453 
4454  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4455 
4456  d = mm->max - mm->min;
4457  if (d < numberof(dist_vals))
4458  /* return dist_vals[d] * 16 / (mm->min + 12); */
4459  return (int )dist_vals[d];
4460  else
4461  return 1;
4462 }
4463 
4464 static int
4465 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4466 {
4467  if (v2 <= 0) return -1;
4468  if (v1 <= 0) return 1;
4469 
4470  v1 *= distance_value(d1);
4471  v2 *= distance_value(d2);
4472 
4473  if (v2 > v1) return 1;
4474  if (v2 < v1) return -1;
4475 
4476  if (d2->min < d1->min) return 1;
4477  if (d2->min > d1->min) return -1;
4478  return 0;
4479 }
4480 
4481 static int
4482 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4483 {
4484  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4485 }
4486 
4487 
4488 static void
4489 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4490 {
4491  mml->min = min;
4492  mml->max = max;
4493 }
4494 
4495 static void
4496 clear_mml(MinMaxLen* mml)
4497 {
4498  mml->min = mml->max = 0;
4499 }
4500 
4501 static void
4502 copy_mml(MinMaxLen* to, MinMaxLen* from)
4503 {
4504  to->min = from->min;
4505  to->max = from->max;
4506 }
4507 
4508 static void
4509 add_mml(MinMaxLen* to, MinMaxLen* from)
4510 {
4511  to->min = distance_add(to->min, from->min);
4512  to->max = distance_add(to->max, from->max);
4513 }
4514 
4515 #if 0
4516 static void
4517 add_len_mml(MinMaxLen* to, OnigDistance len)
4518 {
4519  to->min = distance_add(to->min, len);
4520  to->max = distance_add(to->max, len);
4521 }
4522 #endif
4523 
4524 static void
4525 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4526 {
4527  if (to->min > from->min) to->min = from->min;
4528  if (to->max < from->max) to->max = from->max;
4529 }
4530 
4531 static void
4532 copy_opt_env(OptEnv* to, OptEnv* from)
4533 {
4534  *to = *from;
4535 }
4536 
4537 static void
4538 clear_opt_anc_info(OptAncInfo* anc)
4539 {
4540  anc->left_anchor = 0;
4541  anc->right_anchor = 0;
4542 }
4543 
4544 static void
4545 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4546 {
4547  *to = *from;
4548 }
4549 
4550 static void
4551 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4552  OnigDistance left_len, OnigDistance right_len)
4553 {
4554  clear_opt_anc_info(to);
4555 
4556  to->left_anchor = left->left_anchor;
4557  if (left_len == 0) {
4558  to->left_anchor |= right->left_anchor;
4559  }
4560 
4561  to->right_anchor = right->right_anchor;
4562  if (right_len == 0) {
4563  to->right_anchor |= left->right_anchor;
4564  }
4565  else {
4566  to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4567  }
4568 }
4569 
4570 static int
4571 is_left_anchor(int anc)
4572 {
4573  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4574  anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4575  anc == ANCHOR_PREC_READ_NOT)
4576  return 0;
4577 
4578  return 1;
4579 }
4580 
4581 static int
4582 is_set_opt_anc_info(OptAncInfo* to, int anc)
4583 {
4584  if ((to->left_anchor & anc) != 0) return 1;
4585 
4586  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4587 }
4588 
4589 static void
4590 add_opt_anc_info(OptAncInfo* to, int anc)
4591 {
4592  if (is_left_anchor(anc))
4593  to->left_anchor |= anc;
4594  else
4595  to->right_anchor |= anc;
4596 }
4597 
4598 static void
4599 remove_opt_anc_info(OptAncInfo* to, int anc)
4600 {
4601  if (is_left_anchor(anc))
4602  to->left_anchor &= ~anc;
4603  else
4604  to->right_anchor &= ~anc;
4605 }
4606 
4607 static void
4608 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4609 {
4610  to->left_anchor &= add->left_anchor;
4611  to->right_anchor &= add->right_anchor;
4612 }
4613 
4614 static int
4615 is_full_opt_exact_info(OptExactInfo* ex)
4616 {
4617  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4618 }
4619 
4620 static void
4621 clear_opt_exact_info(OptExactInfo* ex)
4622 {
4623  clear_mml(&ex->mmd);
4624  clear_opt_anc_info(&ex->anc);
4625  ex->reach_end = 0;
4626  ex->ignore_case = -1; /* unset */
4627  ex->len = 0;
4628  ex->s[0] = '\0';
4629 }
4630 
4631 static void
4632 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4633 {
4634  *to = *from;
4635 }
4636 
4637 static void
4638 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4639 {
4640  int i, j, len;
4641  UChar *p, *end;
4642  OptAncInfo tanc;
4643 
4644  if (to->ignore_case < 0)
4645  to->ignore_case = add->ignore_case;
4646  else if (to->ignore_case != add->ignore_case)
4647  return ; /* avoid */
4648 
4649  p = add->s;
4650  end = p + add->len;
4651  for (i = to->len; p < end; ) {
4652  len = enclen(enc, p, end);
4653  if (i + len > OPT_EXACT_MAXLEN) break;
4654  for (j = 0; j < len && p < end; j++)
4655  to->s[i++] = *p++;
4656  }
4657 
4658  to->len = i;
4659  to->reach_end = (p == end ? add->reach_end : 0);
4660 
4661  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4662  if (! to->reach_end) tanc.right_anchor = 0;
4663  copy_opt_anc_info(&to->anc, &tanc);
4664 }
4665 
4666 static void
4667 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4668  int raw ARG_UNUSED, OnigEncoding enc)
4669 {
4670  int i, j, len;
4671  UChar *p;
4672 
4673  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4674  len = enclen(enc, p, end);
4675  if (i + len > OPT_EXACT_MAXLEN) break;
4676  for (j = 0; j < len && p < end; j++)
4677  to->s[i++] = *p++;
4678  }
4679 
4680  to->len = i;
4681 }
4682 
4683 static void
4684 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4685 {
4686  int i, j, len;
4687 
4688  if (add->len == 0 || to->len == 0) {
4689  clear_opt_exact_info(to);
4690  return ;
4691  }
4692 
4693  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4694  clear_opt_exact_info(to);
4695  return ;
4696  }
4697 
4698  for (i = 0; i < to->len && i < add->len; ) {
4699  if (to->s[i] != add->s[i]) break;
4700  len = enclen(env->enc, to->s + i, to->s + to->len);
4701 
4702  for (j = 1; j < len; j++) {
4703  if (to->s[i+j] != add->s[i+j]) break;
4704  }
4705  if (j < len) break;
4706  i += len;
4707  }
4708 
4709  if (! add->reach_end || i < add->len || i < to->len) {
4710  to->reach_end = 0;
4711  }
4712  to->len = i;
4713  if (to->ignore_case < 0)
4714  to->ignore_case = add->ignore_case;
4715  else if (add->ignore_case >= 0)
4716  to->ignore_case |= add->ignore_case;
4717 
4718  alt_merge_opt_anc_info(&to->anc, &add->anc);
4719  if (! to->reach_end) to->anc.right_anchor = 0;
4720 }
4721 
4722 static void
4723 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4724 {
4725  int v1, v2;
4726 
4727  v1 = now->len;
4728  v2 = alt->len;
4729 
4730  if (v2 == 0) {
4731  return ;
4732  }
4733  else if (v1 == 0) {
4734  copy_opt_exact_info(now, alt);
4735  return ;
4736  }
4737  else if (v1 <= 2 && v2 <= 2) {
4738  /* ByteValTable[x] is big value --> low price */
4739  v2 = map_position_value(enc, now->s[0]);
4740  v1 = map_position_value(enc, alt->s[0]);
4741 
4742  if (now->len > 1) v1 += 5;
4743  if (alt->len > 1) v2 += 5;
4744  }
4745 
4746  if (now->ignore_case <= 0) v1 *= 2;
4747  if (alt->ignore_case <= 0) v2 *= 2;
4748 
4749  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4750  copy_opt_exact_info(now, alt);
4751 }
4752 
4753 static void
4754 clear_opt_map_info(OptMapInfo* map)
4755 {
4756  static const OptMapInfo clean_info = {
4757  {0, 0}, {0, 0}, 0,
4758  {
4759  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4760  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4761  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4762  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4763  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4764  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4765  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4766  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4767  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4768  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4769  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4770  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4771  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4772  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4773  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4774  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4775  }
4776  };
4777 
4778  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4779 }
4780 
4781 static void
4782 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4783 {
4784  *to = *from;
4785 }
4786 
4787 static void
4788 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4789 {
4790  if (map->map[c] == 0) {
4791  map->map[c] = 1;
4792  map->value += map_position_value(enc, c);
4793  }
4794 }
4795 
4796 static int
4797 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4798  OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4799 {
4800  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4801  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4802  int i, n;
4803 
4804  add_char_opt_map_info(map, p[0], enc);
4805 
4806  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4807  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4808  if (n < 0) return n;
4809 
4810  for (i = 0; i < n; i++) {
4811  ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4812  add_char_opt_map_info(map, buf[0], enc);
4813  }
4814 
4815  return 0;
4816 }
4817 
4818 static void
4819 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4820 {
4821  const int z = 1<<15; /* 32768: something big value */
4822 
4823  int v1, v2;
4824 
4825  if (alt->value == 0) return ;
4826  if (now->value == 0) {
4827  copy_opt_map_info(now, alt);
4828  return ;
4829  }
4830 
4831  v1 = z / now->value;
4832  v2 = z / alt->value;
4833  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4834  copy_opt_map_info(now, alt);
4835 }
4836 
4837 static int
4838 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4839 {
4840 #define COMP_EM_BASE 20
4841  int ve, vm;
4842 
4843  if (m->value <= 0) return -1;
4844 
4845  ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4846  vm = COMP_EM_BASE * 5 * 2 / m->value;
4847  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4848 }
4849 
4850 static void
4851 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4852 {
4853  int i, val;
4854 
4855  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4856  if (to->value == 0) return ;
4857  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4858  clear_opt_map_info(to);
4859  return ;
4860  }
4861 
4862  alt_merge_mml(&to->mmd, &add->mmd);
4863 
4864  val = 0;
4865  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4866  if (add->map[i])
4867  to->map[i] = 1;
4868 
4869  if (to->map[i])
4870  val += map_position_value(enc, i);
4871  }
4872  to->value = val;
4873 
4874  alt_merge_opt_anc_info(&to->anc, &add->anc);
4875 }
4876 
4877 static void
4878 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4879 {
4880  copy_mml(&(opt->exb.mmd), mmd);
4881  copy_mml(&(opt->expr.mmd), mmd);
4882  copy_mml(&(opt->map.mmd), mmd);
4883 }
4884 
4885 static void
4886 clear_node_opt_info(NodeOptInfo* opt)
4887 {
4888  clear_mml(&opt->len);
4889  clear_opt_anc_info(&opt->anc);
4890  clear_opt_exact_info(&opt->exb);
4891  clear_opt_exact_info(&opt->exm);
4892  clear_opt_exact_info(&opt->expr);
4893  clear_opt_map_info(&opt->map);
4894 }
4895 
4896 static void
4897 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4898 {
4899  *to = *from;
4900 }
4901 
4902 static void
4903 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4904 {
4905  int exb_reach, exm_reach;
4906  OptAncInfo tanc;
4907 
4908  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4909  copy_opt_anc_info(&to->anc, &tanc);
4910 
4911  if (add->exb.len > 0 && to->len.max == 0) {
4912  concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4913  to->len.max, add->len.max);
4914  copy_opt_anc_info(&add->exb.anc, &tanc);
4915  }
4916 
4917  if (add->map.value > 0 && to->len.max == 0) {
4918  if (add->map.mmd.max == 0)
4919  add->map.anc.left_anchor |= to->anc.left_anchor;
4920  }
4921 
4922  exb_reach = to->exb.reach_end;
4923  exm_reach = to->exm.reach_end;
4924 
4925  if (add->len.max != 0)
4926  to->exb.reach_end = to->exm.reach_end = 0;
4927 
4928  if (add->exb.len > 0) {
4929  if (exb_reach) {
4930  concat_opt_exact_info(&to->exb, &add->exb, enc);
4931  clear_opt_exact_info(&add->exb);
4932  }
4933  else if (exm_reach) {
4934  concat_opt_exact_info(&to->exm, &add->exb, enc);
4935  clear_opt_exact_info(&add->exb);
4936  }
4937  }
4938  select_opt_exact_info(enc, &to->exm, &add->exb);
4939  select_opt_exact_info(enc, &to->exm, &add->exm);
4940 
4941  if (to->expr.len > 0) {
4942  if (add->len.max > 0) {
4943  if (to->expr.len > (int )add->len.max)
4944  to->expr.len = (int )add->len.max;
4945 
4946  if (to->expr.mmd.max == 0)
4947  select_opt_exact_info(enc, &to->exb, &to->expr);
4948  else
4949  select_opt_exact_info(enc, &to->exm, &to->expr);
4950  }
4951  }
4952  else if (add->expr.len > 0) {
4953  copy_opt_exact_info(&to->expr, &add->expr);
4954  }
4955 
4956  select_opt_map_info(&to->map, &add->map);
4957 
4958  add_mml(&to->len, &add->len);
4959 }
4960 
4961 static void
4962 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4963 {
4964  alt_merge_opt_anc_info (&to->anc, &add->anc);
4965  alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4966  alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4967  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4968  alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4969 
4970  alt_merge_mml(&to->len, &add->len);
4971 }
4972 
4973 
4974 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4975 
4976 static int
4977 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4978 {
4979  int type;
4980  int r = 0;
4981 
4982  clear_node_opt_info(opt);
4983  set_bound_node_opt_info(opt, &env->mmd);
4984 
4985  type = NTYPE(node);
4986  switch (type) {
4987  case NT_LIST:
4988  {
4989  OptEnv nenv;
4990  NodeOptInfo nopt;
4991  Node* nd = node;
4992 
4993  copy_opt_env(&nenv, env);
4994  do {
4995  r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4996  if (r == 0) {
4997  add_mml(&nenv.mmd, &nopt.len);
4998  concat_left_node_opt_info(env->enc, opt, &nopt);
4999  }
5000  } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
5001  }
5002  break;
5003 
5004  case NT_ALT:
5005  {
5006  NodeOptInfo nopt;
5007  Node* nd = node;
5008 
5009  do {
5010  r = optimize_node_left(NCAR(nd), &nopt, env);
5011  if (r == 0) {
5012  if (nd == node) copy_node_opt_info(opt, &nopt);
5013  else alt_merge_node_opt_info(opt, &nopt, env);
5014  }
5015  } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
5016  }
5017  break;
5018 
5019  case NT_STR:
5020  {
5021  StrNode* sn = NSTR(node);
5022  OnigDistance slen = sn->end - sn->s;
5023  int is_raw = NSTRING_IS_RAW(node);
5024 
5025  if (! NSTRING_IS_AMBIG(node)) {
5026  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5027  is_raw, env->enc);
5028  opt->exb.ignore_case = 0;
5029  if (slen > 0) {
5030  add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
5031  }
5032  set_mml(&opt->len, slen, slen);
5033  }
5034  else {
5035  OnigDistance max;
5036 
5037  if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
5038  int n = onigenc_strlen(env->enc, sn->s, sn->end);
5039  max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
5040  }
5041  else {
5042  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5043  is_raw, env->enc);
5044  opt->exb.ignore_case = 1;
5045 
5046  if (slen > 0) {
5047  r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5048  env->enc, env->case_fold_flag);
5049  if (r != 0) break;
5050  }
5051 
5052  max = slen;
5053  }
5054 
5055  set_mml(&opt->len, slen, max);
5056  }
5057 
5058  if ((OnigDistance )opt->exb.len == slen)
5059  opt->exb.reach_end = 1;
5060  }
5061  break;
5062 
5063  case NT_CCLASS:
5064  {
5065  int i, z;
5066  CClassNode* cc = NCCLASS(node);
5067 
5068  /* no need to check ignore case. (set in setup_tree()) */
5069 
5070  if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5071  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5072  OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5073 
5074  set_mml(&opt->len, min, max);
5075  }
5076  else {
5077  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5078  z = BITSET_AT(cc->bs, i);
5079  if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5080  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5081  }
5082  }
5083  set_mml(&opt->len, 1, 1);
5084  }
5085  }
5086  break;
5087 
5088  case NT_CTYPE:
5089  {
5090  int i, min, max;
5091  int maxcode;
5092 
5093  max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5094 
5095  if (max == 1) {
5096  min = 1;
5097 
5098  maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5099  switch (NCTYPE(node)->ctype) {
5100  case ONIGENC_CTYPE_WORD:
5101  if (NCTYPE(node)->not != 0) {
5102  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5103  if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5104  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5105  }
5106  }
5107  }
5108  else {
5109  for (i = 0; i < maxcode; i++) {
5110  if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5111  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5112  }
5113  }
5114  }
5115  break;
5116  }
5117  }
5118  else {
5119  min = ONIGENC_MBC_MINLEN(env->enc);
5120  }
5121  set_mml(&opt->len, min, max);
5122  }
5123  break;
5124 
5125  case NT_CANY:
5126  {
5127  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5128  OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5129  set_mml(&opt->len, min, max);
5130  }
5131  break;
5132 
5133  case NT_ANCHOR:
5134  switch (NANCHOR(node)->type) {
5135  case ANCHOR_BEGIN_BUF:
5136  case ANCHOR_BEGIN_POSITION:
5137  case ANCHOR_BEGIN_LINE:
5138  case ANCHOR_END_BUF:
5139  case ANCHOR_SEMI_END_BUF:
5140  case ANCHOR_END_LINE:
5141  case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5142  case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5143  add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5144  break;
5145 
5146  case ANCHOR_PREC_READ:
5147  {
5148  NodeOptInfo nopt;
5149 
5150  r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5151  if (r == 0) {
5152  if (nopt.exb.len > 0)
5153  copy_opt_exact_info(&opt->expr, &nopt.exb);
5154  else if (nopt.exm.len > 0)
5155  copy_opt_exact_info(&opt->expr, &nopt.exm);
5156 
5157  opt->expr.reach_end = 0;
5158 
5159  if (nopt.map.value > 0)
5160  copy_opt_map_info(&opt->map, &nopt.map);
5161  }
5162  }
5163  break;
5164 
5165  case ANCHOR_LOOK_BEHIND_NOT:
5166  break;
5167  }
5168  break;
5169 
5170  case NT_BREF:
5171  {
5172  int i;
5173  int* backs;
5174  OnigDistance min, max, tmin, tmax;
5175  Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5176  BRefNode* br = NBREF(node);
5177 
5178  if (br->state & NST_RECURSION) {
5179  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5180  break;
5181  }
5182  backs = BACKREFS_P(br);
5183  r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5184  if (r != 0) break;
5185  r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5186  if (r != 0) break;
5187  for (i = 1; i < br->back_num; i++) {
5188  r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5189  if (r != 0) break;
5190  r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5191  if (r != 0) break;
5192  if (min > tmin) min = tmin;
5193  if (max < tmax) max = tmax;
5194  }
5195  if (r == 0) set_mml(&opt->len, min, max);
5196  }
5197  break;
5198 
5199 #ifdef USE_SUBEXP_CALL
5200  case NT_CALL:
5201  if (IS_CALL_RECURSION(NCALL(node)))
5202  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5203  else {
5204  OnigOptionType save = env->options;
5205  env->options = NENCLOSE(NCALL(node)->target)->option;
5206  r = optimize_node_left(NCALL(node)->target, opt, env);
5207  env->options = save;
5208  }
5209  break;
5210 #endif
5211 
5212  case NT_QTFR:
5213  {
5214  int i;
5215  OnigDistance min, max;
5216  NodeOptInfo nopt;
5217  QtfrNode* qn = NQTFR(node);
5218 
5219  r = optimize_node_left(qn->target, &nopt, env);
5220  if (r) break;
5221 
5222  if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5223  if (env->mmd.max == 0 &&
5224  NTYPE(qn->target) == NT_CANY && qn->greedy) {
5225  if (IS_MULTILINE(env->options))
5226  /* implicit anchor: /.*a/ ==> /\A.*a/ */
5227  add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5228  else
5229  add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5230  }
5231  }
5232  else {
5233  if (qn->lower > 0) {
5234  copy_node_opt_info(opt, &nopt);
5235  if (nopt.exb.len > 0) {
5236  if (nopt.exb.reach_end) {
5237  for (i = 2; i <= qn->lower &&
5238  ! is_full_opt_exact_info(&opt->exb); i++) {
5239  concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5240  }
5241  if (i < qn->lower) {
5242  opt->exb.reach_end = 0;
5243  }
5244  }
5245  }
5246 
5247  if (qn->lower != qn->upper) {
5248  opt->exb.reach_end = 0;
5249  opt->exm.reach_end = 0;
5250  }
5251  if (qn->lower > 1)
5252  opt->exm.reach_end = 0;
5253  }
5254  }
5255 
5256  min = distance_multiply(nopt.len.min, qn->lower);
5257  if (IS_REPEAT_INFINITE(qn->upper))
5258  max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5259  else
5260  max = distance_multiply(nopt.len.max, qn->upper);
5261 
5262  set_mml(&opt->len, min, max);
5263  }
5264  break;
5265 
5266  case NT_ENCLOSE:
5267  {
5268  EncloseNode* en = NENCLOSE(node);
5269 
5270  switch (en->type) {
5271  case ENCLOSE_OPTION:
5272  {
5273  OnigOptionType save = env->options;
5274 
5275  env->options = en->option;
5276  r = optimize_node_left(en->target, opt, env);
5277  env->options = save;
5278  }
5279  break;
5280 
5281  case ENCLOSE_MEMORY:
5282 #ifdef USE_SUBEXP_CALL
5283  en->opt_count++;
5284  if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5285  OnigDistance min, max;
5286 
5287  min = 0;
5288  max = ONIG_INFINITE_DISTANCE;
5289  if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5290  if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5291  set_mml(&opt->len, min, max);
5292  }
5293  else
5294 #endif
5295  {
5296  r = optimize_node_left(en->target, opt, env);
5297 
5298  if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5299  if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5300  remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5301  }
5302  }
5303  break;
5304 
5305  case ENCLOSE_STOP_BACKTRACK:
5306  case ENCLOSE_CONDITION:
5307  r = optimize_node_left(en->target, opt, env);
5308  break;
5309 
5310  case ENCLOSE_ABSENT:
5311  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5312  break;
5313  }
5314  }
5315  break;
5316 
5317  default:
5318 #ifdef ONIG_DEBUG
5319  fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5320  NTYPE(node));
5321 #endif
5322  r = ONIGERR_TYPE_BUG;
5323  break;
5324  }
5325 
5326  return r;
5327 }
5328 
5329 static int
5330 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5331 {
5332  int r;
5333  int allow_reverse;
5334 
5335  if (e->len == 0) return 0;
5336 
5337  reg->exact = (UChar* )xmalloc(e->len);
5338  CHECK_NULL_RETURN_MEMERR(reg->exact);
5339  xmemcpy(reg->exact, e->s, e->len);
5340  reg->exact_end = reg->exact + e->len;
5341 
5342  allow_reverse =
5343  ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5344 
5345  if (e->ignore_case > 0) {
5346  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5347  r = set_bm_skip(reg->exact, reg->exact_end, reg,
5348  reg->map, &(reg->int_map), 1);
5349  if (r == 0) {
5350  reg->optimize = (allow_reverse != 0
5351  ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5352  }
5353  else {
5354  reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5355  }
5356  }
5357  else {
5358  reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5359  }
5360  }
5361  else {
5362  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5363  r = set_bm_skip(reg->exact, reg->exact_end, reg,
5364  reg->map, &(reg->int_map), 0);
5365  if (r == 0) {
5366  reg->optimize = (allow_reverse != 0
5367  ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5368  }
5369  else {
5370  reg->optimize = ONIG_OPTIMIZE_EXACT;
5371  }
5372  }
5373  else {
5374  reg->optimize = ONIG_OPTIMIZE_EXACT;
5375  }
5376  }
5377 
5378  reg->dmin = e->mmd.min;
5379  reg->dmax = e->mmd.max;
5380 
5381  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5382  reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5383  }
5384 
5385  return 0;
5386 }
5387 
5388 static void
5389 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5390 {
5391  int i;
5392 
5393  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5394  reg->map[i] = m->map[i];
5395 
5396  reg->optimize = ONIG_OPTIMIZE_MAP;
5397  reg->dmin = m->mmd.min;
5398  reg->dmax = m->mmd.max;
5399 
5400  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5401  reg->threshold_len = (int )(reg->dmin + 1);
5402  }
5403 }
5404 
5405 static void
5406 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5407 {
5408  reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5409  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5410 }
5411 
5412 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5413 static void print_optimize_info(FILE* f, regex_t* reg);
5414 #endif
5415 
5416 static int
5417 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5418 {
5419 
5420  int r;
5421  NodeOptInfo opt;
5422  OptEnv env;
5423 
5424  env.enc = reg->enc;
5425  env.options = reg->options;
5426  env.case_fold_flag = reg->case_fold_flag;
5427  env.scan_env = scan_env;
5428  clear_mml(&env.mmd);
5429 
5430  r = optimize_node_left(node, &opt, &env);
5431  if (r) return r;
5432 
5433  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5434  ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5435  ANCHOR_LOOK_BEHIND);
5436 
5437  if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5438  reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5439 
5440  reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5441  ANCHOR_PREC_READ_NOT);
5442 
5443  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5444  reg->anchor_dmin = opt.len.min;
5445  reg->anchor_dmax = opt.len.max;
5446  }
5447 
5448  if (opt.exb.len > 0 || opt.exm.len > 0) {
5449  select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5450  if (opt.map.value > 0 &&
5451  comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5452  goto set_map;
5453  }
5454  else {
5455  r = set_optimize_exact_info(reg, &opt.exb);
5456  set_sub_anchor(reg, &opt.exb.anc);
5457  }
5458  }
5459  else if (opt.map.value > 0) {
5460  set_map:
5461  set_optimize_map_info(reg, &opt.map);
5462  set_sub_anchor(reg, &opt.map.anc);
5463  }
5464  else {
5465  reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5466  if (opt.len.max == 0)
5467  reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5468  }
5469 
5470 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5471  print_optimize_info(stderr, reg);
5472 #endif
5473  return r;
5474 }
5475 
5476 static void
5477 clear_optimize_info(regex_t* reg)
5478 {
5479  reg->optimize = ONIG_OPTIMIZE_NONE;
5480  reg->anchor = 0;
5481  reg->anchor_dmin = 0;
5482  reg->anchor_dmax = 0;
5483  reg->sub_anchor = 0;
5484  reg->exact_end = (UChar* )NULL;
5485  reg->threshold_len = 0;
5486  if (IS_NOT_NULL(reg->exact)) {
5487  xfree(reg->exact);
5488  reg->exact = (UChar* )NULL;
5489  }
5490 }
5491 
5492 #ifdef ONIG_DEBUG
5493 
5494 static void print_enc_string(FILE* fp, OnigEncoding enc,
5495  const UChar *s, const UChar *end)
5496 {
5497  fprintf(fp, "\nPATTERN: /");
5498 
5499  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5500  const UChar *p;
5501  OnigCodePoint code;
5502 
5503  p = s;
5504  while (p < end) {
5505  code = ONIGENC_MBC_TO_CODE(enc, p, end);
5506  if (code >= 0x80) {
5507  fprintf(fp, " 0x%04x ", (int )code);
5508  }
5509  else {
5510  fputc((int )code, fp);
5511  }
5512 
5513  p += enclen(enc, p, end);
5514  }
5515  }
5516  else {
5517  while (s < end) {
5518  fputc((int )*s, fp);
5519  s++;
5520  }
5521  }
5522 
5523  fprintf(fp, "/ (%s)\n", enc->name);
5524 }
5525 #endif /* ONIG_DEBUG */
5526 
5527 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5528 static void
5529 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5530 {
5531  if (a == ONIG_INFINITE_DISTANCE)
5532  fputs("inf", f);
5533  else
5534  fprintf(f, "(%"PRIuPTR")", a);
5535 
5536  fputs("-", f);
5537 
5538  if (b == ONIG_INFINITE_DISTANCE)
5539  fputs("inf", f);
5540  else
5541  fprintf(f, "(%"PRIuPTR")", b);
5542 }
5543 
5544 static void
5545 print_anchor(FILE* f, int anchor)
5546 {
5547  int q = 0;
5548 
5549  fprintf(f, "[");
5550 
5551  if (anchor & ANCHOR_BEGIN_BUF) {
5552  fprintf(f, "begin-buf");
5553  q = 1;
5554  }
5555  if (anchor & ANCHOR_BEGIN_LINE) {
5556  if (q) fprintf(f, ", ");
5557  q = 1;
5558  fprintf(f, "begin-line");
5559  }
5560  if (anchor & ANCHOR_BEGIN_POSITION) {
5561  if (q) fprintf(f, ", ");
5562  q = 1;
5563  fprintf(f, "begin-pos");
5564  }
5565  if (anchor & ANCHOR_END_BUF) {
5566  if (q) fprintf(f, ", ");
5567  q = 1;
5568  fprintf(f, "end-buf");
5569  }
5570  if (anchor & ANCHOR_SEMI_END_BUF) {
5571  if (q) fprintf(f, ", ");
5572  q = 1;
5573  fprintf(f, "semi-end-buf");
5574  }
5575  if (anchor & ANCHOR_END_LINE) {
5576  if (q) fprintf(f, ", ");
5577  q = 1;
5578  fprintf(f, "end-line");
5579  }
5580  if (anchor & ANCHOR_ANYCHAR_STAR) {
5581  if (q) fprintf(f, ", ");
5582  q = 1;
5583  fprintf(f, "anychar-star");
5584  }
5585  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5586  if (q) fprintf(f, ", ");
5587  fprintf(f, "anychar-star-ml");
5588  }
5589 
5590  fprintf(f, "]");
5591 }
5592 
5593 static void
5594 print_optimize_info(FILE* f, regex_t* reg)
5595 {
5596  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5597  "EXACT_IC", "MAP",
5598  "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5599 
5600  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5601  fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5602  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5603  print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5604  fprintf(f, "\n");
5605 
5606  if (reg->optimize) {
5607  fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5608  fprintf(f, "\n");
5609  }
5610  fprintf(f, "\n");
5611 
5612  if (reg->exact) {
5613  UChar *p;
5614  fprintf(f, "exact: [");
5615  for (p = reg->exact; p < reg->exact_end; p++) {
5616  fputc(*p, f);
5617  }
5618  fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5619  }
5620  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5621  int c, i, n = 0;
5622 
5623  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5624  if (reg->map[i]) n++;
5625 
5626  fprintf(f, "map: n=%d\n", n);
5627  if (n > 0) {
5628  c = 0;
5629  fputc('[', f);
5630  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5631  if (reg->map[i] != 0) {
5632  if (c > 0) fputs(", ", f);
5633  c++;
5634  if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5635  ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5636  fputc(i, f);
5637  else
5638  fprintf(f, "%d", i);
5639  }
5640  }
5641  fprintf(f, "]\n");
5642  }
5643  }
5644 }
5645 #endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5646 
5647 
5648 extern void
5649 onig_free_body(regex_t* reg)
5650 {
5651  if (IS_NOT_NULL(reg)) {
5652  if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5653  if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5654  if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5655  if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
5656  if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5657  if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5658 
5659 #ifdef USE_NAMED_GROUP
5660  onig_names_free(reg);
5661 #endif
5662  }
5663 }
5664 
5665 extern void
5666 onig_free(regex_t* reg)
5667 {
5668  if (IS_NOT_NULL(reg)) {
5669  onig_free_body(reg);
5670  xfree(reg);
5671  }
5672 }
5673 
5674 #ifdef RUBY
5675 size_t
5676 onig_memsize(const regex_t *reg)
5677 {
5678  size_t size = sizeof(regex_t);
5679  if (IS_NULL(reg)) return 0;
5680  if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5681  if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5682  if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5683  if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5684  if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5685  if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5686 
5687  return size;
5688 }
5689 
5690 size_t
5691 onig_region_memsize(const OnigRegion *regs)
5692 {
5693  size_t size = sizeof(*regs);
5694  if (IS_NULL(regs)) return 0;
5695  size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5696  return size;
5697 }
5698 #endif
5699 
5700 #define REGEX_TRANSFER(to,from) do {\
5701  onig_free_body(to);\
5702  xmemcpy(to, from, sizeof(regex_t));\
5703  xfree(from);\
5704 } while (0)
5705 
5706 #if 0
5707 extern void
5708 onig_transfer(regex_t* to, regex_t* from)
5709 {
5710  REGEX_TRANSFER(to, from);
5711 }
5712 #endif
5713 
5714 #ifdef ONIG_DEBUG_COMPILE
5715 static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5716 #endif
5717 #ifdef ONIG_DEBUG_PARSE_TREE
5718 static void print_tree(FILE* f, Node* node);
5719 #endif
5720 
5721 #ifdef RUBY
5722 extern int
5723 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5724  OnigErrorInfo* einfo)
5725 {
5726  return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5727 }
5728 #endif
5729 
5730 #ifdef RUBY
5731 extern int
5732 onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5733  OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5734 #else
5735 extern int
5736 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5737  OnigErrorInfo* einfo)
5738 #endif
5739 {
5740 #define COMPILE_INIT_SIZE 20
5741 
5742  int r;
5743  OnigDistance init_size;
5744  Node* root;
5745  ScanEnv scan_env = {0};
5746 #ifdef USE_SUBEXP_CALL
5747  UnsetAddrList uslist;
5748 #endif
5749 
5750  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5751 
5752 #ifdef RUBY
5753  scan_env.sourcefile = sourcefile;
5754  scan_env.sourceline = sourceline;
5755 #endif
5756 
5757 #ifdef ONIG_DEBUG
5758  print_enc_string(stderr, reg->enc, pattern, pattern_end);
5759 #endif
5760 
5761  if (reg->alloc == 0) {
5762  init_size = (pattern_end - pattern) * 2;
5763  if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5764  r = BBUF_INIT(reg, init_size);
5765  if (r != 0) goto end;
5766  }
5767  else
5768  reg->used = 0;
5769 
5770  reg->num_mem = 0;
5771  reg->num_repeat = 0;
5772  reg->num_null_check = 0;
5773  reg->repeat_range_alloc = 0;
5774  reg->repeat_range = (OnigRepeatRange* )NULL;
5775 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5776  reg->num_comb_exp_check = 0;
5777 #endif
5778 
5779  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5780  if (r != 0) goto err;
5781 
5782 #ifdef ONIG_DEBUG_PARSE_TREE
5783 # if 0
5784  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5785  print_tree(stderr, root);
5786 # endif
5787 #endif
5788 
5789 #ifdef USE_NAMED_GROUP
5790  /* mixed use named group and no-named group */
5791  if (scan_env.num_named > 0 &&
5792  IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5793  !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5794  if (scan_env.num_named != scan_env.num_mem)
5795  r = disable_noname_group_capture(&root, reg, &scan_env);
5796  else
5797  r = numbered_ref_check(root);
5798 
5799  if (r != 0) goto err;
5800  }
5801 #endif
5802 
5803 #ifdef USE_SUBEXP_CALL
5804  if (scan_env.num_call > 0) {
5805  r = unset_addr_list_init(&uslist, scan_env.num_call);
5806  if (r != 0) goto err;
5807  scan_env.unset_addr_list = &uslist;
5808  r = setup_subexp_call(root, &scan_env);
5809  if (r != 0) goto err_unset;
5810  r = subexp_recursive_check_trav(root, &scan_env);
5811  if (r < 0) goto err_unset;
5812  r = subexp_inf_recursive_check_trav(root, &scan_env);
5813  if (r != 0) goto err_unset;
5814 
5815  reg->num_call = scan_env.num_call;
5816  }
5817  else
5818  reg->num_call = 0;
5819 #endif
5820 
5821  r = setup_tree(root, reg, 0, &scan_env);
5822  if (r != 0) goto err_unset;
5823 
5824 #ifdef ONIG_DEBUG_PARSE_TREE
5825  print_tree(stderr, root);
5826 #endif
5827 
5828  reg->capture_history = scan_env.capture_history;
5829  reg->bt_mem_start = scan_env.bt_mem_start;
5830  reg->bt_mem_start |= reg->capture_history;
5831  if (IS_FIND_CONDITION(reg->options))
5832  BIT_STATUS_ON_ALL(reg->bt_mem_end);
5833  else {
5834  reg->bt_mem_end = scan_env.bt_mem_end;
5835  reg->bt_mem_end |= reg->capture_history;
5836  }
5837 
5838 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5839  if (scan_env.backrefed_mem == 0
5840 # ifdef USE_SUBEXP_CALL
5841  || scan_env.num_call == 0
5842 # endif
5843  ) {
5844  setup_comb_exp_check(root, 0, &scan_env);
5845 # ifdef USE_SUBEXP_CALL
5846  if (scan_env.has_recursion != 0) {
5847  scan_env.num_comb_exp_check = 0;
5848  }
5849  else
5850 # endif
5851  if (scan_env.comb_exp_max_regnum > 0) {
5852  int i;
5853  for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5854  if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5855  scan_env.num_comb_exp_check = 0;
5856  break;
5857  }
5858  }
5859  }
5860  }
5861 
5862  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5863 #endif
5864 
5865  clear_optimize_info(reg);
5866 #ifndef ONIG_DONT_OPTIMIZE
5867  r = set_optimize_info_from_tree(root, reg, &scan_env);
5868  if (r != 0) goto err_unset;
5869 #endif
5870 
5871  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5872  xfree(scan_env.mem_nodes_dynamic);
5873  scan_env.mem_nodes_dynamic = (Node** )NULL;
5874  }
5875 
5876  r = compile_tree(root, reg);
5877  if (r == 0) {
5878  r = add_opcode(reg, OP_END);
5879 #ifdef USE_SUBEXP_CALL
5880  if (scan_env.num_call > 0) {
5881  r = unset_addr_list_fix(&uslist, reg);
5882  unset_addr_list_end(&uslist);
5883  if (r) goto err;
5884  }
5885 #endif
5886 
5887  if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5888  reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5889  else {
5890  if (reg->bt_mem_start != 0)
5891  reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5892  else
5893  reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5894  }
5895  }
5896 #ifdef USE_SUBEXP_CALL
5897  else if (scan_env.num_call > 0) {
5898  unset_addr_list_end(&uslist);
5899  }
5900 #endif
5901  onig_node_free(root);
5902 
5903 #ifdef ONIG_DEBUG_COMPILE
5904 # ifdef USE_NAMED_GROUP
5905  onig_print_names(stderr, reg);
5906 # endif
5907  print_compiled_byte_code_list(stderr, reg);
5908 #endif
5909 
5910  end:
5911  onig_reg_resize(reg);
5912  return r;
5913 
5914  err_unset:
5915 #ifdef USE_SUBEXP_CALL
5916  if (scan_env.num_call > 0) {
5917  unset_addr_list_end(&uslist);
5918  }
5919 #endif
5920  err:
5921  if (IS_NOT_NULL(scan_env.error)) {
5922  if (IS_NOT_NULL(einfo)) {
5923  einfo->enc = scan_env.enc;
5924  einfo->par = scan_env.error;
5925  einfo->par_end = scan_env.error_end;
5926  }
5927  }
5928 
5929  onig_node_free(root);
5930  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5931  xfree(scan_env.mem_nodes_dynamic);
5932  return r;
5933 }
5934 
5935 static int onig_inited = 0;
5936 
5937 extern int
5938 onig_reg_init(regex_t* reg, OnigOptionType option,
5939  OnigCaseFoldType case_fold_flag,
5940  OnigEncoding enc, const OnigSyntaxType* syntax)
5941 {
5942  if (! onig_inited)
5943  onig_init();
5944 
5945  if (IS_NULL(reg))
5946  return ONIGERR_INVALID_ARGUMENT;
5947 
5948  if (ONIGENC_IS_UNDEF(enc))
5949  return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5950 
5951  if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5952  == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5953  return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5954  }
5955 
5956  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5957  option |= syntax->options;
5958  option &= ~ONIG_OPTION_SINGLELINE;
5959  }
5960  else
5961  option |= syntax->options;
5962 
5963  (reg)->enc = enc;
5964  (reg)->options = option;
5965  (reg)->syntax = syntax;
5966  (reg)->optimize = 0;
5967  (reg)->exact = (UChar* )NULL;
5968  (reg)->int_map = (int* )NULL;
5969  (reg)->int_map_backward = (int* )NULL;
5970  (reg)->chain = (regex_t* )NULL;
5971 
5972  (reg)->p = (UChar* )NULL;
5973  (reg)->alloc = 0;
5974  (reg)->used = 0;
5975  (reg)->name_table = (void* )NULL;
5976 
5977  (reg)->case_fold_flag = case_fold_flag;
5978  return 0;
5979 }
5980 
5981 extern int
5982 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5983  const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5984  const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5985 {
5986  int r;
5987 
5988  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5989  if (r) return r;
5990 
5991  r = onig_compile(reg, pattern, pattern_end, einfo);
5992  return r;
5993 }
5994 
5995 extern int
5996 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5997  OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5998  OnigErrorInfo* einfo)
5999 {
6000  int r;
6001 
6002  *reg = (regex_t* )xmalloc(sizeof(regex_t));
6003  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
6004 
6005  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6006  if (r) goto err;
6007 
6008  r = onig_compile(*reg, pattern, pattern_end, einfo);
6009  if (r) {
6010  err:
6011  onig_free(*reg);
6012  *reg = NULL;
6013  }
6014  return r;
6015 }
6016 
6017 extern int
6018 onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
6019 {
6020  return onig_init();
6021 }
6022 
6023 extern int
6024 onig_init(void)
6025 {
6026  if (onig_inited != 0)
6027  return 0;
6028 
6029  onig_inited = 1;
6030 
6031 #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6032  _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6033 #endif
6034 
6035  onigenc_init();
6036  /* onigenc_set_default_caseconv_table((UChar* )0); */
6037 
6038 #ifdef ONIG_DEBUG_STATISTICS
6039  onig_statistics_init();
6040 #endif
6041 
6042  return 0;
6043 }
6044 
6045 
6046 static OnigEndCallListItemType* EndCallTop;
6047 
6048 extern void onig_add_end_call(void (*func)(void))
6049 {
6051 
6052  item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
6053  if (item == 0) return ;
6054 
6055  item->next = EndCallTop;
6056  item->func = func;
6057 
6058  EndCallTop = item;
6059 }
6060 
6061 static void
6062 exec_end_call_list(void)
6063 {
6065  void (*func)(void);
6066 
6067  while (EndCallTop != 0) {
6068  func = EndCallTop->func;
6069  (*func)();
6070 
6071  prev = EndCallTop;
6072  EndCallTop = EndCallTop->next;
6073  xfree(prev);
6074  }
6075 }
6076 
6077 extern int
6078 onig_end(void)
6079 {
6080  exec_end_call_list();
6081 
6082 #ifdef ONIG_DEBUG_STATISTICS
6083  onig_print_statistics(stderr);
6084 #endif
6085 
6086 #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6087  _CrtDumpMemoryLeaks();
6088 #endif
6089 
6090  onig_inited = 0;
6091 
6092  return 0;
6093 }
6094 
6095 extern int
6096 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
6097 {
6098  OnigCodePoint n, *data;
6099  OnigCodePoint low, high, x;
6100 
6101  GET_CODE_POINT(n, p);
6102  data = (OnigCodePoint* )p;
6103  data++;
6104 
6105  for (low = 0, high = n; low < high; ) {
6106  x = (low + high) >> 1;
6107  if (code > data[x * 2 + 1])
6108  low = x + 1;
6109  else
6110  high = x;
6111  }
6112 
6113  return ((low < n && code >= data[low * 2]) ? 1 : 0);
6114 }
6115 
6116 extern int
6117 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
6118 {
6119  int found;
6120 
6121  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6122  if (IS_NULL(cc->mbuf)) {
6123  found = 0;
6124  }
6125  else {
6126  found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6127  }
6128  }
6129  else {
6130  found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6131  }
6132 
6133  if (IS_NCCLASS_NOT(cc))
6134  return !found;
6135  else
6136  return found;
6137 }
6138 
6139 extern int
6140 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6141 {
6142  int len;
6143 
6144  if (ONIGENC_MBC_MINLEN(enc) > 1) {
6145  len = 2;
6146  }
6147  else {
6148  len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6149  }
6150  return onig_is_code_in_cc_len(len, code, cc);
6151 }
6152 
6153 
6154 #ifdef ONIG_DEBUG
6155 
6156 /* arguments type */
6157 # define ARG_SPECIAL -1
6158 # define ARG_NON 0
6159 # define ARG_RELADDR 1
6160 # define ARG_ABSADDR 2
6161 # define ARG_LENGTH 3
6162 # define ARG_MEMNUM 4
6163 # define ARG_OPTION 5
6164 # define ARG_STATE_CHECK 6
6165 
6166 OnigOpInfoType OnigOpInfo[] = {
6167  { OP_FINISH, "finish", ARG_NON },
6168  { OP_END, "end", ARG_NON },
6169  { OP_EXACT1, "exact1", ARG_SPECIAL },
6170  { OP_EXACT2, "exact2", ARG_SPECIAL },
6171  { OP_EXACT3, "exact3", ARG_SPECIAL },
6172  { OP_EXACT4, "exact4", ARG_SPECIAL },
6173  { OP_EXACT5, "exact5", ARG_SPECIAL },
6174  { OP_EXACTN, "exactn", ARG_SPECIAL },
6175  { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6176  { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6177  { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6178  { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6179  { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6180  { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6181  { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6182  { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6183  { OP_CCLASS, "cclass", ARG_SPECIAL },
6184  { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6185  { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6186  { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6187  { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6188  { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6189  { OP_ANYCHAR, "anychar", ARG_NON },
6190  { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6191  { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6192  { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6193  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6194  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6195  { OP_WORD, "word", ARG_NON },
6196  { OP_NOT_WORD, "not-word", ARG_NON },
6197  { OP_WORD_BOUND, "word-bound", ARG_NON },
6198  { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6199  { OP_WORD_BEGIN, "word-begin", ARG_NON },
6200  { OP_WORD_END, "word-end", ARG_NON },
6201  { OP_ASCII_WORD, "ascii-word", ARG_NON },
6202  { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6203  { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6204  { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6205  { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6206  { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6207  { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6208  { OP_END_BUF, "end-buf", ARG_NON },
6209  { OP_BEGIN_LINE, "begin-line", ARG_NON },
6210  { OP_END_LINE, "end-line", ARG_NON },
6211  { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6212  { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6213  { OP_BACKREF1, "backref1", ARG_NON },
6214  { OP_BACKREF2, "backref2", ARG_NON },
6215  { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6216  { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6217  { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6218  { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6219  { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6220  { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6221  { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6222  { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6223  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6224  { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6225  { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6226  { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6227  { OP_SET_OPTION, "set-option", ARG_OPTION },
6228  { OP_KEEP, "keep", ARG_NON },
6229  { OP_FAIL, "fail", ARG_NON },
6230  { OP_JUMP, "jump", ARG_RELADDR },
6231  { OP_PUSH, "push", ARG_RELADDR },
6232  { OP_POP, "pop", ARG_NON },
6233  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6234  { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6235  { OP_REPEAT, "repeat", ARG_SPECIAL },
6236  { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6237  { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6238  { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6239  { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6240  { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6241  { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6242  { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6243  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6244  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6245  { OP_PUSH_POS, "push-pos", ARG_NON },
6246  { OP_POP_POS, "pop-pos", ARG_NON },
6247  { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6248  { OP_FAIL_POS, "fail-pos", ARG_NON },
6249  { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6250  { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6251  { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6252  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6253  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6254  { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
6255  { OP_ABSENT, "absent", ARG_RELADDR },
6256  { OP_ABSENT_END, "absent-end", ARG_NON },
6257  { OP_CALL, "call", ARG_ABSADDR },
6258  { OP_RETURN, "return", ARG_NON },
6259  { OP_CONDITION, "condition", ARG_SPECIAL },
6260  { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6261  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6262  { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6263  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6264  { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6265  "state-check-anychar-ml*", ARG_STATE_CHECK },
6266  { -1, "", ARG_NON }
6267 };
6268 
6269 static const char*
6270 op2name(int opcode)
6271 {
6272  int i;
6273 
6274  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6275  if (opcode == OnigOpInfo[i].opcode)
6276  return OnigOpInfo[i].name;
6277  }
6278  return "";
6279 }
6280 
6281 static int
6282 op2arg_type(int opcode)
6283 {
6284  int i;
6285 
6286  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6287  if (opcode == OnigOpInfo[i].opcode)
6288  return OnigOpInfo[i].arg_type;
6289  }
6290  return ARG_SPECIAL;
6291 }
6292 
6293 # ifdef ONIG_DEBUG_PARSE_TREE
6294 static void
6295 Indent(FILE* f, int indent)
6296 {
6297  int i;
6298  for (i = 0; i < indent; i++) putc(' ', f);
6299 }
6300 # endif /* ONIG_DEBUG_PARSE_TREE */
6301 
6302 static void
6303 p_string(FILE* f, ptrdiff_t len, UChar* s)
6304 {
6305  fputs(":", f);
6306  while (len-- > 0) { fputc(*s++, f); }
6307 }
6308 
6309 static void
6310 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6311 {
6312  int x = len * mb_len;
6313 
6314  fprintf(f, ":%d:", len);
6315  while (x-- > 0) { fputc(*s++, f); }
6316 }
6317 
6318 extern void
6319 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6320  OnigEncoding enc)
6321 {
6322  int i, n, arg_type;
6323  RelAddrType addr;
6324  LengthType len;
6325  MemNumType mem;
6326  StateCheckNumType scn;
6327  OnigCodePoint code;
6328  UChar *q;
6329 
6330  fprintf(f, "[%s", op2name(*bp));
6331  arg_type = op2arg_type(*bp);
6332  if (arg_type != ARG_SPECIAL) {
6333  bp++;
6334  switch (arg_type) {
6335  case ARG_NON:
6336  break;
6337  case ARG_RELADDR:
6338  GET_RELADDR_INC(addr, bp);
6339  fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6340  break;
6341  case ARG_ABSADDR:
6342  GET_ABSADDR_INC(addr, bp);
6343  fprintf(f, ":(%d)", addr);
6344  break;
6345  case ARG_LENGTH:
6346  GET_LENGTH_INC(len, bp);
6347  fprintf(f, ":%d", len);
6348  break;
6349  case ARG_MEMNUM:
6350  mem = *((MemNumType* )bp);
6351  bp += SIZE_MEMNUM;
6352  fprintf(f, ":%d", mem);
6353  break;
6354  case ARG_OPTION:
6355  {
6356  OnigOptionType option = *((OnigOptionType* )bp);
6357  bp += SIZE_OPTION;
6358  fprintf(f, ":%d", option);
6359  }
6360  break;
6361 
6362  case ARG_STATE_CHECK:
6363  scn = *((StateCheckNumType* )bp);
6364  bp += SIZE_STATE_CHECK_NUM;
6365  fprintf(f, ":%d", scn);
6366  break;
6367  }
6368  }
6369  else {
6370  switch (*bp++) {
6371  case OP_EXACT1:
6372  case OP_ANYCHAR_STAR_PEEK_NEXT:
6373  case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6374  p_string(f, 1, bp++); break;
6375  case OP_EXACT2:
6376  p_string(f, 2, bp); bp += 2; break;
6377  case OP_EXACT3:
6378  p_string(f, 3, bp); bp += 3; break;
6379  case OP_EXACT4:
6380  p_string(f, 4, bp); bp += 4; break;
6381  case OP_EXACT5:
6382  p_string(f, 5, bp); bp += 5; break;
6383  case OP_EXACTN:
6384  GET_LENGTH_INC(len, bp);
6385  p_len_string(f, len, 1, bp);
6386  bp += len;
6387  break;
6388 
6389  case OP_EXACTMB2N1:
6390  p_string(f, 2, bp); bp += 2; break;
6391  case OP_EXACTMB2N2:
6392  p_string(f, 4, bp); bp += 4; break;
6393  case OP_EXACTMB2N3:
6394  p_string(f, 6, bp); bp += 6; break;
6395  case OP_EXACTMB2N:
6396  GET_LENGTH_INC(len, bp);
6397  p_len_string(f, len, 2, bp);
6398  bp += len * 2;
6399  break;
6400  case OP_EXACTMB3N:
6401  GET_LENGTH_INC(len, bp);
6402  p_len_string(f, len, 3, bp);
6403  bp += len * 3;
6404  break;
6405  case OP_EXACTMBN:
6406  {
6407  int mb_len;
6408 
6409  GET_LENGTH_INC(mb_len, bp);
6410  GET_LENGTH_INC(len, bp);
6411  fprintf(f, ":%d:%d:", mb_len, len);
6412  n = len * mb_len;
6413  while (n-- > 0) { fputc(*bp++, f); }
6414  }
6415  break;
6416 
6417  case OP_EXACT1_IC:
6418  len = enclen(enc, bp, bpend);
6419  p_string(f, len, bp);
6420  bp += len;
6421  break;
6422  case OP_EXACTN_IC:
6423  GET_LENGTH_INC(len, bp);
6424  p_len_string(f, len, 1, bp);
6425  bp += len;
6426  break;
6427 
6428  case OP_CCLASS:
6429  n = bitset_on_num((BitSetRef )bp);
6430  bp += SIZE_BITSET;
6431  fprintf(f, ":%d", n);
6432  break;
6433 
6434  case OP_CCLASS_NOT:
6435  n = bitset_on_num((BitSetRef )bp);
6436  bp += SIZE_BITSET;
6437  fprintf(f, ":%d", n);
6438  break;
6439 
6440  case OP_CCLASS_MB:
6441  case OP_CCLASS_MB_NOT:
6442  GET_LENGTH_INC(len, bp);
6443  q = bp;
6444 # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6445  ALIGNMENT_RIGHT(q);
6446 # endif
6447  GET_CODE_POINT(code, q);
6448  bp += len;
6449  fprintf(f, ":%d:%d", (int )code, len);
6450  break;
6451 
6452  case OP_CCLASS_MIX:
6453  case OP_CCLASS_MIX_NOT:
6454  n = bitset_on_num((BitSetRef )bp);
6455  bp += SIZE_BITSET;
6456  GET_LENGTH_INC(len, bp);
6457  q = bp;
6458 # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6459  ALIGNMENT_RIGHT(q);
6460 # endif
6461  GET_CODE_POINT(code, q);
6462  bp += len;
6463  fprintf(f, ":%d:%d:%d", n, (int )code, len);
6464  break;
6465 
6466  case OP_BACKREFN_IC:
6467  mem = *((MemNumType* )bp);
6468  bp += SIZE_MEMNUM;
6469  fprintf(f, ":%d", mem);
6470  break;
6471 
6472  case OP_BACKREF_MULTI_IC:
6473  case OP_BACKREF_MULTI:
6474  fputs(" ", f);
6475  GET_LENGTH_INC(len, bp);
6476  for (i = 0; i < len; i++) {
6477  GET_MEMNUM_INC(mem, bp);
6478  if (i > 0) fputs(", ", f);
6479  fprintf(f, "%d", mem);
6480  }
6481  break;
6482 
6483  case OP_BACKREF_WITH_LEVEL:
6484  {
6485  OnigOptionType option;
6486  LengthType level;
6487 
6488  GET_OPTION_INC(option, bp);
6489  fprintf(f, ":%d", option);
6490  GET_LENGTH_INC(level, bp);
6491  fprintf(f, ":%d", level);
6492 
6493  fputs(" ", f);
6494  GET_LENGTH_INC(len, bp);
6495  for (i = 0; i < len; i++) {
6496  GET_MEMNUM_INC(mem, bp);
6497  if (i > 0) fputs(", ", f);
6498  fprintf(f, "%d", mem);
6499  }
6500  }
6501  break;
6502 
6503  case OP_REPEAT:
6504  case OP_REPEAT_NG:
6505  {
6506  mem = *((MemNumType* )bp);
6507  bp += SIZE_MEMNUM;
6508  addr = *((RelAddrType* )bp);
6509  bp += SIZE_RELADDR;
6510  fprintf(f, ":%d:%d", mem, addr);
6511  }
6512  break;
6513 
6514  case OP_PUSH_OR_JUMP_EXACT1:
6515  case OP_PUSH_IF_PEEK_NEXT:
6516  addr = *((RelAddrType* )bp);
6517  bp += SIZE_RELADDR;
6518  fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6519  p_string(f, 1, bp);
6520  bp += 1;
6521  break;
6522 
6523  case OP_LOOK_BEHIND:
6524  GET_LENGTH_INC(len, bp);
6525  fprintf(f, ":%d", len);
6526  break;
6527 
6528  case OP_PUSH_LOOK_BEHIND_NOT:
6529  GET_RELADDR_INC(addr, bp);
6530  GET_LENGTH_INC(len, bp);
6531  fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6532  break;
6533 
6534  case OP_STATE_CHECK_PUSH:
6535  case OP_STATE_CHECK_PUSH_OR_JUMP:
6536  scn = *((StateCheckNumType* )bp);
6537  bp += SIZE_STATE_CHECK_NUM;
6538  addr = *((RelAddrType* )bp);
6539  bp += SIZE_RELADDR;
6540  fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6541  break;
6542 
6543  case OP_CONDITION:
6544  GET_MEMNUM_INC(mem, bp);
6545  GET_RELADDR_INC(addr, bp);
6546  fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6547  break;
6548 
6549  default:
6550  fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6551  bp[-1]);
6552  }
6553  }
6554  fputs("]", f);
6555  if (nextp) *nextp = bp;
6556 }
6557 
6558 # ifdef ONIG_DEBUG_COMPILE
6559 static void
6560 print_compiled_byte_code_list(FILE* f, regex_t* reg)
6561 {
6562  int ncode;
6563  UChar* bp = reg->p;
6564  UChar* end = reg->p + reg->used;
6565 
6566  fprintf(f, "code length: %d", reg->used);
6567 
6568  ncode = -1;
6569  while (bp < end) {
6570  ncode++;
6571  if (ncode % 5 == 0)
6572  fprintf(f, "\n%ld:", bp - reg->p);
6573  else
6574  fprintf(f, " %ld:", bp - reg->p);
6575  onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6576  }
6577 
6578  fprintf(f, "\n");
6579 }
6580 # endif /* ONIG_DEBUG_COMPILE */
6581 
6582 # ifdef ONIG_DEBUG_PARSE_TREE
6583 static void
6584 print_indent_tree(FILE* f, Node* node, int indent)
6585 {
6586  int i, type, container_p = 0;
6587  int add = 3;
6588  UChar* p;
6589 
6590  Indent(f, indent);
6591  if (IS_NULL(node)) {
6592  fprintf(f, "ERROR: null node!!!\n");
6593  exit (0);
6594  }
6595 
6596  type = NTYPE(node);
6597  switch (type) {
6598  case NT_LIST:
6599  case NT_ALT:
6600  if (NTYPE(node) == NT_LIST)
6601  fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6602  else
6603  fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6604 
6605  print_indent_tree(f, NCAR(node), indent + add);
6606  while (IS_NOT_NULL(node = NCDR(node))) {
6607  if (NTYPE(node) != type) {
6608  fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6609  exit(0);
6610  }
6611  print_indent_tree(f, NCAR(node), indent + add);
6612  }
6613  break;
6614 
6615  case NT_STR:
6616  fprintf(f, "<string%s:%"PRIxPTR">",
6617  (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6618  for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6619  if (*p >= 0x20 && *p < 0x7f)
6620  fputc(*p, f);
6621  else {
6622  fprintf(f, " 0x%02x", *p);
6623  }
6624  }
6625  break;
6626 
6627  case NT_CCLASS:
6628  fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6629  if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6630  if (NCCLASS(node)->mbuf) {
6631  BBuf* bbuf = NCCLASS(node)->mbuf;
6632  OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6633  OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6634  fprintf(f, "%d", *data++);
6635  for (; data < end; data+=2) {
6636  fprintf(f, ",");
6637  fprintf(f, "%04x-%04x", data[0], data[1]);
6638  }
6639  }
6640  break;
6641 
6642  case NT_CTYPE:
6643  fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6644  switch (NCTYPE(node)->ctype) {
6645  case ONIGENC_CTYPE_WORD:
6646  if (NCTYPE(node)->not != 0)
6647  fputs("not word", f);
6648  else
6649  fputs("word", f);
6650  break;
6651 
6652  default:
6653  fprintf(f, "ERROR: undefined ctype.\n");
6654  exit(0);
6655  }
6656  break;
6657 
6658  case NT_CANY:
6659  fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6660  break;
6661 
6662  case NT_ANCHOR:
6663  fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6664  switch (NANCHOR(node)->type) {
6665  case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6666  case ANCHOR_END_BUF: fputs("end buf", f); break;
6667  case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6668  case ANCHOR_END_LINE: fputs("end line", f); break;
6669  case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6670  case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6671 
6672  case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6673  case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6674 # ifdef USE_WORD_BEGIN_END
6675  case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6676  case ANCHOR_WORD_END: fputs("word end", f); break;
6677 # endif
6678  case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6679  case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6680  case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6681  case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6682  case ANCHOR_KEEP: fputs("keep",f); break;
6683 
6684  default:
6685  fprintf(f, "ERROR: undefined anchor type.\n");
6686  break;
6687  }
6688  break;
6689 
6690  case NT_BREF:
6691  {
6692  int* p;
6693  BRefNode* br = NBREF(node);
6694  p = BACKREFS_P(br);
6695  fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6696  for (i = 0; i < br->back_num; i++) {
6697  if (i > 0) fputs(", ", f);
6698  fprintf(f, "%d", p[i]);
6699  }
6700  }
6701  break;
6702 
6703 # ifdef USE_SUBEXP_CALL
6704  case NT_CALL:
6705  {
6706  CallNode* cn = NCALL(node);
6707  fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6708  p_string(f, cn->name_end - cn->name, cn->name);
6709  }
6710  break;
6711 # endif
6712 
6713  case NT_QTFR:
6714  fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6715  NQTFR(node)->lower, NQTFR(node)->upper,
6716  (NQTFR(node)->greedy ? "" : "?"));
6717  print_indent_tree(f, NQTFR(node)->target, indent + add);
6718  break;
6719 
6720  case NT_ENCLOSE:
6721  fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6722  switch (NENCLOSE(node)->type) {
6723  case ENCLOSE_OPTION:
6724  fprintf(f, "option:%d", NENCLOSE(node)->option);
6725  break;
6726  case ENCLOSE_MEMORY:
6727  fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6728  break;
6729  case ENCLOSE_STOP_BACKTRACK:
6730  fprintf(f, "stop-bt");
6731  break;
6732  case ENCLOSE_CONDITION:
6733  fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6734  break;
6735  case ENCLOSE_ABSENT:
6736  fprintf(f, "absent");
6737  break;
6738 
6739  default:
6740  break;
6741  }
6742  fprintf(f, "\n");
6743  print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6744  break;
6745 
6746  default:
6747  fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6748  break;
6749  }
6750 
6751  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6752  type != NT_ENCLOSE)
6753  fprintf(f, "\n");
6754 
6755  if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6756 
6757  fflush(f);
6758 }
6759 
6760 static void
6761 print_tree(FILE* f, Node* node)
6762 {
6763  print_indent_tree(f, node, 0);
6764 }
6765 # endif /* ONIG_DEBUG_PARSE_TREE */
6766 #endif /* ONIG_DEBUG */
#define xfree
Old name of ruby_xfree.
Definition: xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition: xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition: xmalloc.h:53
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
Definition: regint.h:441