Ruby  3.1.4p223 (2023-03-30 revision HEAD)
regexec.c
1 /**********************************************************************
2  regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "regint.h"
32 
33 #ifdef RUBY
34 # undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35 #else
36 # define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
37 #endif
38 
39 #ifndef USE_TOKEN_THREADED_VM
40 # ifdef __GNUC__
41 # define USE_TOKEN_THREADED_VM 1
42 # else
43 # define USE_TOKEN_THREADED_VM 0
44 # endif
45 #endif
46 
47 #ifdef RUBY
48 # define ENC_DUMMY_FLAG (1<<24)
49 static inline int
51 {
52  return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
53 }
54 # undef ONIGENC_IS_MBC_ASCII_WORD
55 # define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56  (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57  onigenc_ascii_is_code_ctype( \
58  ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
59 #endif /* RUBY */
60 
61 #ifdef USE_CRNL_AS_LINE_TERMINATOR
62 # define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63  (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64  ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65 # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66  is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
67 static int
68 is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
69  const UChar *end, OnigOptionType option, int check_prev)
70 {
71  if (IS_NEWLINE_CRLF(option)) {
72  if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
73  if (check_prev) {
74  const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75  if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
76  return 0;
77  else
78  return 1;
79  }
80  else
81  return 1;
82  }
83  else {
84  const UChar *pnext = p + enclen(enc, p, end);
85  if (pnext < end &&
86  ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87  ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
88  return 1;
89  if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
90  return 1;
91  return 0;
92  }
93  }
94  else {
95  return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
96  }
97 }
98 #else /* USE_CRNL_AS_LINE_TERMINATOR */
99 # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100  ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101 #endif /* USE_CRNL_AS_LINE_TERMINATOR */
102 
103 #ifdef USE_CAPTURE_HISTORY
104 static void history_tree_free(OnigCaptureTreeNode* node);
105 
106 static void
107 history_tree_clear(OnigCaptureTreeNode* node)
108 {
109  int i;
110 
111  if (IS_NOT_NULL(node)) {
112  for (i = 0; i < node->num_childs; i++) {
113  if (IS_NOT_NULL(node->childs[i])) {
114  history_tree_free(node->childs[i]);
115  }
116  }
117  for (i = 0; i < node->allocated; i++) {
118  node->childs[i] = (OnigCaptureTreeNode* )0;
119  }
120  node->num_childs = 0;
121  node->beg = ONIG_REGION_NOTPOS;
122  node->end = ONIG_REGION_NOTPOS;
123  node->group = -1;
124  xfree(node->childs);
125  node->childs = (OnigCaptureTreeNode** )0;
126  }
127 }
128 
129 static void
130 history_tree_free(OnigCaptureTreeNode* node)
131 {
132  history_tree_clear(node);
133  xfree(node);
134 }
135 
136 static void
137 history_root_free(OnigRegion* r)
138 {
139  if (IS_NOT_NULL(r->history_root)) {
140  history_tree_free(r->history_root);
141  r->history_root = (OnigCaptureTreeNode* )0;
142  }
143 }
144 
145 static OnigCaptureTreeNode*
146 history_node_new(void)
147 {
148  OnigCaptureTreeNode* node;
149 
150  node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
151  CHECK_NULL_RETURN(node);
152  node->childs = (OnigCaptureTreeNode** )0;
153  node->allocated = 0;
154  node->num_childs = 0;
155  node->group = -1;
156  node->beg = ONIG_REGION_NOTPOS;
157  node->end = ONIG_REGION_NOTPOS;
158 
159  return node;
160 }
161 
162 static int
163 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
164 {
165 # define HISTORY_TREE_INIT_ALLOC_SIZE 8
166 
167  if (parent->num_childs >= parent->allocated) {
168  int n, i;
169 
170  if (IS_NULL(parent->childs)) {
171  n = HISTORY_TREE_INIT_ALLOC_SIZE;
172  parent->childs =
173  (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
174  CHECK_NULL_RETURN_MEMERR(parent->childs);
175  }
176  else {
177  OnigCaptureTreeNode** tmp;
178  n = parent->allocated * 2;
179  tmp =
180  (OnigCaptureTreeNode** )xrealloc(parent->childs,
181  sizeof(OnigCaptureTreeNode*) * n);
182  if (tmp == 0) {
183  history_tree_clear(parent);
184  return ONIGERR_MEMORY;
185  }
186  parent->childs = tmp;
187  }
188  for (i = parent->allocated; i < n; i++) {
189  parent->childs[i] = (OnigCaptureTreeNode* )0;
190  }
191  parent->allocated = n;
192  }
193 
194  parent->childs[parent->num_childs] = child;
195  parent->num_childs++;
196  return 0;
197 }
198 
199 static OnigCaptureTreeNode*
200 history_tree_clone(OnigCaptureTreeNode* node)
201 {
202  int i, r;
203  OnigCaptureTreeNode *clone, *child;
204 
205  clone = history_node_new();
206  CHECK_NULL_RETURN(clone);
207 
208  clone->beg = node->beg;
209  clone->end = node->end;
210  for (i = 0; i < node->num_childs; i++) {
211  child = history_tree_clone(node->childs[i]);
212  if (IS_NULL(child)) {
213  history_tree_free(clone);
214  return (OnigCaptureTreeNode* )0;
215  }
216  r = history_tree_add_child(clone, child);
217  if (r != 0) {
218  history_tree_free(child);
219  history_tree_free(clone);
220  return (OnigCaptureTreeNode* )0;
221  }
222  }
223 
224  return clone;
225 }
226 
227 extern OnigCaptureTreeNode*
228 onig_get_capture_tree(OnigRegion* region)
229 {
230  return region->history_root;
231 }
232 #endif /* USE_CAPTURE_HISTORY */
233 
234 extern void
235 onig_region_clear(OnigRegion* region)
236 {
237  int i;
238 
239  for (i = 0; i < region->num_regs; i++) {
240  region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
241  }
242 #ifdef USE_CAPTURE_HISTORY
243  history_root_free(region);
244 #endif
245 }
246 
247 extern int
248 onig_region_resize(OnigRegion* region, int n)
249 {
250  region->num_regs = n;
251 
252  if (n < ONIG_NREGION)
253  n = ONIG_NREGION;
254 
255  if (region->allocated == 0) {
256  region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
257  if (region->beg == 0)
258  return ONIGERR_MEMORY;
259 
260  region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
261  if (region->end == 0) {
262  xfree(region->beg);
263  return ONIGERR_MEMORY;
264  }
265 
266  region->allocated = n;
267  }
268  else if (region->allocated < n) {
269  OnigPosition *tmp;
270 
271  region->allocated = 0;
272  tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
273  if (tmp == 0) {
274  xfree(region->beg);
275  xfree(region->end);
276  return ONIGERR_MEMORY;
277  }
278  region->beg = tmp;
279  tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
280  if (tmp == 0) {
281  xfree(region->beg);
282  xfree(region->end);
283  return ONIGERR_MEMORY;
284  }
285  region->end = tmp;
286 
287  region->allocated = n;
288  }
289 
290  return 0;
291 }
292 
293 static int
294 onig_region_resize_clear(OnigRegion* region, int n)
295 {
296  int r;
297 
298  r = onig_region_resize(region, n);
299  if (r != 0) return r;
300  onig_region_clear(region);
301  return 0;
302 }
303 
304 extern int
305 onig_region_set(OnigRegion* region, int at, int beg, int end)
306 {
307  if (at < 0) return ONIGERR_INVALID_ARGUMENT;
308 
309  if (at >= region->allocated) {
310  int r = onig_region_resize(region, at + 1);
311  if (r < 0) return r;
312  }
313 
314  region->beg[at] = beg;
315  region->end[at] = end;
316  return 0;
317 }
318 
319 extern void
320 onig_region_init(OnigRegion* region)
321 {
322  region->num_regs = 0;
323  region->allocated = 0;
324  region->beg = (OnigPosition* )0;
325  region->end = (OnigPosition* )0;
326 #ifdef USE_CAPTURE_HISTORY
327  region->history_root = (OnigCaptureTreeNode* )0;
328 #endif
329 }
330 
331 extern OnigRegion*
332 onig_region_new(void)
333 {
334  OnigRegion* r;
335 
336  r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
337  if (r)
338  onig_region_init(r);
339  return r;
340 }
341 
342 extern void
343 onig_region_free(OnigRegion* r, int free_self)
344 {
345  if (r) {
346  if (r->allocated > 0) {
347  if (r->beg) xfree(r->beg);
348  if (r->end) xfree(r->end);
349  r->allocated = 0;
350  }
351 #ifdef USE_CAPTURE_HISTORY
352  history_root_free(r);
353 #endif
354  if (free_self) xfree(r);
355  }
356 }
357 
358 extern void
359 onig_region_copy(OnigRegion* to, const OnigRegion* from)
360 {
361 #define RREGC_SIZE (sizeof(int) * from->num_regs)
362  int i, r;
363 
364  if (to == from) return;
365 
366  r = onig_region_resize(to, from->num_regs);
367  if (r) return;
368 
369  for (i = 0; i < from->num_regs; i++) {
370  to->beg[i] = from->beg[i];
371  to->end[i] = from->end[i];
372  }
373  to->num_regs = from->num_regs;
374 
375 #ifdef USE_CAPTURE_HISTORY
376  history_root_free(to);
377 
378  if (IS_NOT_NULL(from->history_root)) {
379  to->history_root = history_tree_clone(from->history_root);
380  }
381 #endif
382 }
383 
384 
386 #define INVALID_STACK_INDEX -1
387 
388 /* stack type */
389 /* used by normal-POP */
390 #define STK_ALT 0x0001
391 #define STK_LOOK_BEHIND_NOT 0x0002
392 #define STK_POS_NOT 0x0003
393 /* handled by normal-POP */
394 #define STK_MEM_START 0x0100
395 #define STK_MEM_END 0x8200
396 #define STK_REPEAT_INC 0x0300
397 #define STK_STATE_CHECK_MARK 0x1000
398 /* avoided by normal-POP */
399 #define STK_NULL_CHECK_START 0x3000
400 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */
401 #define STK_MEM_END_MARK 0x8400
402 #define STK_POS 0x0500 /* used when POP-POS */
403 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
404 #define STK_REPEAT 0x0700
405 #define STK_CALL_FRAME 0x0800
406 #define STK_RETURN 0x0900
407 #define STK_VOID 0x0a00 /* for fill a blank */
408 #define STK_ABSENT_POS 0x0b00 /* for absent */
409 #define STK_ABSENT 0x0c00 /* absent inner loop marker */
410 
411 /* stack type check mask */
412 #define STK_MASK_POP_USED 0x00ff
413 #define STK_MASK_TO_VOID_TARGET 0x10ff
414 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
415 
416 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
417 # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
418  (msa).stack_p = (void* )0;\
419  (msa).options = (arg_option);\
420  (msa).region = (arg_region);\
421  (msa).start = (arg_start);\
422  (msa).gpos = (arg_gpos);\
423  (msa).best_len = ONIG_MISMATCH;\
424 } while(0)
425 #else
426 # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
427  (msa).stack_p = (void* )0;\
428  (msa).options = (arg_option);\
429  (msa).region = (arg_region);\
430  (msa).start = (arg_start);\
431  (msa).gpos = (arg_gpos);\
432 } while(0)
433 #endif
434 
435 #ifdef USE_COMBINATION_EXPLOSION_CHECK
436 
437 # define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
438 
439 # define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
440  if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
441  unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
442  offset = ((offset) * (state_num)) >> 3;\
443  if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
444  if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
445  (msa).state_check_buff = (void* )xmalloc(size);\
446  CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
447  }\
448  else \
449  (msa).state_check_buff = (void* )xalloca(size);\
450  xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
451  (size_t )(size - (offset))); \
452  (msa).state_check_buff_size = size;\
453  }\
454  else {\
455  (msa).state_check_buff = (void* )0;\
456  (msa).state_check_buff_size = 0;\
457  }\
458  }\
459  else {\
460  (msa).state_check_buff = (void* )0;\
461  (msa).state_check_buff_size = 0;\
462  }\
463  } while(0)
464 
465 # define MATCH_ARG_FREE(msa) do {\
466  if ((msa).stack_p) xfree((msa).stack_p);\
467  if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
468  if ((msa).state_check_buff) xfree((msa).state_check_buff);\
469  }\
470 } while(0)
471 #else /* USE_COMBINATION_EXPLOSION_CHECK */
472 # define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
473 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
474 
475 
476 
477 #define MAX_PTR_NUM 100
478 
479 #define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
480  if (ptr_num > MAX_PTR_NUM) {\
481  alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
482  heap_addr = alloc_addr;\
483  if (msa->stack_p) {\
484  stk_alloc = (OnigStackType* )(msa->stack_p);\
485  stk_base = stk_alloc;\
486  stk = stk_base;\
487  stk_end = stk_base + msa->stack_n;\
488  } else {\
489  stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
490  stk_base = stk_alloc;\
491  stk = stk_base;\
492  stk_end = stk_base + (stack_num);\
493  }\
494  } else if (msa->stack_p) {\
495  alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
496  heap_addr = NULL;\
497  stk_alloc = (OnigStackType* )(msa->stack_p);\
498  stk_base = stk_alloc;\
499  stk = stk_base;\
500  stk_end = stk_base + msa->stack_n;\
501  }\
502  else {\
503  alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
504  + sizeof(OnigStackType) * (stack_num));\
505  heap_addr = NULL;\
506  stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
507  stk_base = stk_alloc;\
508  stk = stk_base;\
509  stk_end = stk_base + (stack_num);\
510  }\
511 } while(0)
512 
513 #define STACK_SAVE do{\
514  if (stk_base != stk_alloc) {\
515  msa->stack_p = stk_base;\
516  msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
517  };\
518 } while(0)
519 
520 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
521 
522 extern unsigned int
523 onig_get_match_stack_limit_size(void)
524 {
525  return MatchStackLimitSize;
526 }
527 
528 extern int
529 onig_set_match_stack_limit_size(unsigned int size)
530 {
531  MatchStackLimitSize = size;
532  return 0;
533 }
534 
535 static int
536 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
537  OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
538 {
539  size_t n;
540  OnigStackType *x, *stk_base, *stk_end, *stk;
541 
542  stk_base = *arg_stk_base;
543  stk_end = *arg_stk_end;
544  stk = *arg_stk;
545 
546  n = stk_end - stk_base;
547  if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
548  x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
549  if (IS_NULL(x)) {
550  STACK_SAVE;
551  return ONIGERR_MEMORY;
552  }
553  xmemcpy(x, stk_base, n * sizeof(OnigStackType));
554  n *= 2;
555  }
556  else {
557  unsigned int limit_size = MatchStackLimitSize;
558  n *= 2;
559  if (limit_size != 0 && n > limit_size) {
560  if ((unsigned int )(stk_end - stk_base) == limit_size)
561  return ONIGERR_MATCH_STACK_LIMIT_OVER;
562  else
563  n = limit_size;
564  }
565  x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
566  if (IS_NULL(x)) {
567  STACK_SAVE;
568  return ONIGERR_MEMORY;
569  }
570  }
571  *arg_stk = x + (stk - stk_base);
572  *arg_stk_base = x;
573  *arg_stk_end = x + n;
574  return 0;
575 }
576 
577 #define STACK_ENSURE(n) do {\
578  if (stk_end - stk < (n)) {\
579  int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
580  if (r != 0) {\
581  STACK_SAVE;\
582  if (xmalloc_base) xfree(xmalloc_base);\
583  return r;\
584  }\
585  }\
586 } while(0)
587 
588 #define STACK_AT(index) (stk_base + (index))
589 #define GET_STACK_INDEX(stk) ((stk) - stk_base)
590 
591 #define STACK_PUSH_TYPE(stack_type) do {\
592  STACK_ENSURE(1);\
593  stk->type = (stack_type);\
594  STACK_INC;\
595 } while(0)
596 
597 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
598 
599 #ifdef USE_COMBINATION_EXPLOSION_CHECK
600 # define STATE_CHECK_POS(s,snum) \
601  (((s) - str) * num_comb_exp_check + ((snum) - 1))
602 # define STATE_CHECK_VAL(v,snum) do {\
603  if (state_check_buff != NULL) {\
604  ptrdiff_t x = STATE_CHECK_POS(s,snum);\
605  (v) = state_check_buff[x/8] & (1<<(x%8));\
606  }\
607  else (v) = 0;\
608 } while(0)
609 
610 
611 # define ELSE_IF_STATE_CHECK_MARK(stk) \
612  else if ((stk)->type == STK_STATE_CHECK_MARK) { \
613  ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
614  state_check_buff[x/8] |= (1<<(x%8)); \
615  }
616 
617 # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
618  STACK_ENSURE(1);\
619  stk->type = (stack_type);\
620  stk->u.state.pcode = (pat);\
621  stk->u.state.pstr = (s);\
622  stk->u.state.pstr_prev = (sprev);\
623  stk->u.state.state_check = 0;\
624  stk->u.state.pkeep = (keep);\
625  STACK_INC;\
626 } while(0)
627 
628 # define STACK_PUSH_ENSURED(stack_type,pat) do {\
629  stk->type = (stack_type);\
630  stk->u.state.pcode = (pat);\
631  stk->u.state.state_check = 0;\
632  STACK_INC;\
633 } while(0)
634 
635 # define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
636  STACK_ENSURE(1);\
637  stk->type = STK_ALT;\
638  stk->u.state.pcode = (pat);\
639  stk->u.state.pstr = (s);\
640  stk->u.state.pstr_prev = (sprev);\
641  stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
642  stk->u.state.pkeep = (keep);\
643  STACK_INC;\
644 } while(0)
645 
646 # define STACK_PUSH_STATE_CHECK(s,snum) do {\
647  if (state_check_buff != NULL) {\
648  STACK_ENSURE(1);\
649  stk->type = STK_STATE_CHECK_MARK;\
650  stk->u.state.pstr = (s);\
651  stk->u.state.state_check = (snum);\
652  STACK_INC;\
653  }\
654 } while(0)
655 
656 #else /* USE_COMBINATION_EXPLOSION_CHECK */
657 
658 # define ELSE_IF_STATE_CHECK_MARK(stk)
659 
660 # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
661  STACK_ENSURE(1);\
662  stk->type = (stack_type);\
663  stk->u.state.pcode = (pat);\
664  stk->u.state.pstr = (s);\
665  stk->u.state.pstr_prev = (sprev);\
666  stk->u.state.pkeep = (keep);\
667  STACK_INC;\
668 } while(0)
669 
670 # define STACK_PUSH_ENSURED(stack_type,pat) do {\
671  stk->type = (stack_type);\
672  stk->u.state.pcode = (pat);\
673  STACK_INC;\
674 } while(0)
675 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
676 
677 #define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
678 #define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
679 #define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
680 #define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
681 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
682 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
683  STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
684 
685 #define STACK_PUSH_REPEAT(id, pat) do {\
686  STACK_ENSURE(1);\
687  stk->type = STK_REPEAT;\
688  stk->u.repeat.num = (id);\
689  stk->u.repeat.pcode = (pat);\
690  stk->u.repeat.count = 0;\
691  STACK_INC;\
692 } while(0)
693 
694 #define STACK_PUSH_REPEAT_INC(sindex) do {\
695  STACK_ENSURE(1);\
696  stk->type = STK_REPEAT_INC;\
697  stk->u.repeat_inc.si = (sindex);\
698  STACK_INC;\
699 } while(0)
700 
701 #define STACK_PUSH_MEM_START(mnum, s) do {\
702  STACK_ENSURE(1);\
703  stk->type = STK_MEM_START;\
704  stk->u.mem.num = (mnum);\
705  stk->u.mem.pstr = (s);\
706  stk->u.mem.start = mem_start_stk[mnum];\
707  stk->u.mem.end = mem_end_stk[mnum];\
708  mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
709  mem_end_stk[mnum] = INVALID_STACK_INDEX;\
710  STACK_INC;\
711 } while(0)
712 
713 #define STACK_PUSH_MEM_END(mnum, s) do {\
714  STACK_ENSURE(1);\
715  stk->type = STK_MEM_END;\
716  stk->u.mem.num = (mnum);\
717  stk->u.mem.pstr = (s);\
718  stk->u.mem.start = mem_start_stk[mnum];\
719  stk->u.mem.end = mem_end_stk[mnum];\
720  mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
721  STACK_INC;\
722 } while(0)
723 
724 #define STACK_PUSH_MEM_END_MARK(mnum) do {\
725  STACK_ENSURE(1);\
726  stk->type = STK_MEM_END_MARK;\
727  stk->u.mem.num = (mnum);\
728  STACK_INC;\
729 } while(0)
730 
731 #define STACK_GET_MEM_START(mnum, k) do {\
732  int level = 0;\
733  k = stk;\
734  while (k > stk_base) {\
735  k--;\
736  if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
737  && k->u.mem.num == (mnum)) {\
738  level++;\
739  }\
740  else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
741  if (level == 0) break;\
742  level--;\
743  }\
744  }\
745 } while(0)
746 
747 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
748  int level = 0;\
749  while (k < stk) {\
750  if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
751  if (level == 0) (start) = k->u.mem.pstr;\
752  level++;\
753  }\
754  else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
755  level--;\
756  if (level == 0) {\
757  (end) = k->u.mem.pstr;\
758  break;\
759  }\
760  }\
761  k++;\
762  }\
763 } while(0)
764 
765 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
766  STACK_ENSURE(1);\
767  stk->type = STK_NULL_CHECK_START;\
768  stk->u.null_check.num = (cnum);\
769  stk->u.null_check.pstr = (s);\
770  STACK_INC;\
771 } while(0)
772 
773 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\
774  STACK_ENSURE(1);\
775  stk->type = STK_NULL_CHECK_END;\
776  stk->u.null_check.num = (cnum);\
777  STACK_INC;\
778 } while(0)
779 
780 #define STACK_PUSH_CALL_FRAME(pat) do {\
781  STACK_ENSURE(1);\
782  stk->type = STK_CALL_FRAME;\
783  stk->u.call_frame.ret_addr = (pat);\
784  STACK_INC;\
785 } while(0)
786 
787 #define STACK_PUSH_RETURN do {\
788  STACK_ENSURE(1);\
789  stk->type = STK_RETURN;\
790  STACK_INC;\
791 } while(0)
792 
793 #define STACK_PUSH_ABSENT_POS(start, end) do {\
794  STACK_ENSURE(1);\
795  stk->type = STK_ABSENT_POS;\
796  stk->u.absent_pos.abs_pstr = (start);\
797  stk->u.absent_pos.end_pstr = (end);\
798  STACK_INC;\
799 } while(0)
800 
801 
802 #ifdef ONIG_DEBUG
803 # define STACK_BASE_CHECK(p, at) \
804  if ((p) < stk_base) {\
805  fprintf(stderr, "at %s\n", at);\
806  goto stack_error;\
807  }
808 #else
809 # define STACK_BASE_CHECK(p, at)
810 #endif
811 
812 #define STACK_POP_ONE do {\
813  stk--;\
814  STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
815 } while(0)
816 
817 #define STACK_POP do {\
818  switch (pop_level) {\
819  case STACK_POP_LEVEL_FREE:\
820  while (1) {\
821  stk--;\
822  STACK_BASE_CHECK(stk, "STACK_POP"); \
823  if ((stk->type & STK_MASK_POP_USED) != 0) break;\
824  ELSE_IF_STATE_CHECK_MARK(stk);\
825  }\
826  break;\
827  case STACK_POP_LEVEL_MEM_START:\
828  while (1) {\
829  stk--;\
830  STACK_BASE_CHECK(stk, "STACK_POP 2"); \
831  if ((stk->type & STK_MASK_POP_USED) != 0) break;\
832  else if (stk->type == STK_MEM_START) {\
833  mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
834  mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
835  }\
836  ELSE_IF_STATE_CHECK_MARK(stk);\
837  }\
838  break;\
839  default:\
840  while (1) {\
841  stk--;\
842  STACK_BASE_CHECK(stk, "STACK_POP 3"); \
843  if ((stk->type & STK_MASK_POP_USED) != 0) break;\
844  else if (stk->type == STK_MEM_START) {\
845  mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
846  mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
847  }\
848  else if (stk->type == STK_REPEAT_INC) {\
849  STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
850  }\
851  else if (stk->type == STK_MEM_END) {\
852  mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
853  mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
854  }\
855  ELSE_IF_STATE_CHECK_MARK(stk);\
856  }\
857  break;\
858  }\
859 } while(0)
860 
861 #define STACK_POP_TIL_POS_NOT do {\
862  while (1) {\
863  stk--;\
864  STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
865  if (stk->type == STK_POS_NOT) break;\
866  else if (stk->type == STK_MEM_START) {\
867  mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
868  mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
869  }\
870  else if (stk->type == STK_REPEAT_INC) {\
871  STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
872  }\
873  else if (stk->type == STK_MEM_END) {\
874  mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
875  mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
876  }\
877  ELSE_IF_STATE_CHECK_MARK(stk);\
878  }\
879 } while(0)
880 
881 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
882  while (1) {\
883  stk--;\
884  STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
885  if (stk->type == STK_LOOK_BEHIND_NOT) break;\
886  else if (stk->type == STK_MEM_START) {\
887  mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
888  mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
889  }\
890  else if (stk->type == STK_REPEAT_INC) {\
891  STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
892  }\
893  else if (stk->type == STK_MEM_END) {\
894  mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
895  mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
896  }\
897  ELSE_IF_STATE_CHECK_MARK(stk);\
898  }\
899 } while(0)
900 
901 #define STACK_POP_TIL_ABSENT do {\
902  while (1) {\
903  stk--;\
904  STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
905  if (stk->type == STK_ABSENT) break;\
906  else if (stk->type == STK_MEM_START) {\
907  mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
908  mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
909  }\
910  else if (stk->type == STK_REPEAT_INC) {\
911  STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
912  }\
913  else if (stk->type == STK_MEM_END) {\
914  mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
915  mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
916  }\
917  ELSE_IF_STATE_CHECK_MARK(stk);\
918  }\
919 } while(0)
920 
921 #define STACK_POP_ABSENT_POS(start, end) do {\
922  stk--;\
923  STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
924  (start) = stk->u.absent_pos.abs_pstr;\
925  (end) = stk->u.absent_pos.end_pstr;\
926 } while(0)
927 
928 #define STACK_POS_END(k) do {\
929  k = stk;\
930  while (1) {\
931  k--;\
932  STACK_BASE_CHECK(k, "STACK_POS_END"); \
933  if (IS_TO_VOID_TARGET(k)) {\
934  k->type = STK_VOID;\
935  }\
936  else if (k->type == STK_POS) {\
937  k->type = STK_VOID;\
938  break;\
939  }\
940  }\
941 } while(0)
942 
943 #define STACK_STOP_BT_END do {\
944  OnigStackType *k = stk;\
945  while (1) {\
946  k--;\
947  STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
948  if (IS_TO_VOID_TARGET(k)) {\
949  k->type = STK_VOID;\
950  }\
951  else if (k->type == STK_STOP_BT) {\
952  k->type = STK_VOID;\
953  break;\
954  }\
955  }\
956 } while(0)
957 
958 #define STACK_NULL_CHECK(isnull,id,s) do {\
959  OnigStackType* k = stk;\
960  while (1) {\
961  k--;\
962  STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
963  if (k->type == STK_NULL_CHECK_START) {\
964  if (k->u.null_check.num == (id)) {\
965  (isnull) = (k->u.null_check.pstr == (s));\
966  break;\
967  }\
968  }\
969  }\
970 } while(0)
971 
972 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
973  int level = 0;\
974  OnigStackType* k = stk;\
975  while (1) {\
976  k--;\
977  STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
978  if (k->type == STK_NULL_CHECK_START) {\
979  if (k->u.null_check.num == (id)) {\
980  if (level == 0) {\
981  (isnull) = (k->u.null_check.pstr == (s));\
982  break;\
983  }\
984  else level--;\
985  }\
986  }\
987  else if (k->type == STK_NULL_CHECK_END) {\
988  level++;\
989  }\
990  }\
991 } while(0)
992 
993 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
994  OnigStackType* k = stk;\
995  while (1) {\
996  k--;\
997  STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
998  if (k->type == STK_NULL_CHECK_START) {\
999  if (k->u.null_check.num == (id)) {\
1000  if (k->u.null_check.pstr != (s)) {\
1001  (isnull) = 0;\
1002  break;\
1003  }\
1004  else {\
1005  UChar* endp;\
1006  (isnull) = 1;\
1007  while (k < stk) {\
1008  if (k->type == STK_MEM_START) {\
1009  if (k->u.mem.end == INVALID_STACK_INDEX) {\
1010  (isnull) = 0; break;\
1011  }\
1012  if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1013  endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1014  else\
1015  endp = (UChar* )k->u.mem.end;\
1016  if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1017  (isnull) = 0; break;\
1018  }\
1019  else if (endp != s) {\
1020  (isnull) = -1; /* empty, but position changed */ \
1021  }\
1022  }\
1023  k++;\
1024  }\
1025  break;\
1026  }\
1027  }\
1028  }\
1029  }\
1030 } while(0)
1031 
1032 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1033  int level = 0;\
1034  OnigStackType* k = stk;\
1035  while (1) {\
1036  k--;\
1037  STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1038  if (k->type == STK_NULL_CHECK_START) {\
1039  if (k->u.null_check.num == (id)) {\
1040  if (level == 0) {\
1041  if (k->u.null_check.pstr != (s)) {\
1042  (isnull) = 0;\
1043  break;\
1044  }\
1045  else {\
1046  UChar* endp;\
1047  (isnull) = 1;\
1048  while (k < stk) {\
1049  if (k->type == STK_MEM_START) {\
1050  if (k->u.mem.end == INVALID_STACK_INDEX) {\
1051  (isnull) = 0; break;\
1052  }\
1053  if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1054  endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1055  else\
1056  endp = (UChar* )k->u.mem.end;\
1057  if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1058  (isnull) = 0; break;\
1059  }\
1060  else if (endp != s) {\
1061  (isnull) = -1; /* empty, but position changed */ \
1062  }\
1063  }\
1064  k++;\
1065  }\
1066  break;\
1067  }\
1068  }\
1069  else {\
1070  level--;\
1071  }\
1072  }\
1073  }\
1074  else if (k->type == STK_NULL_CHECK_END) {\
1075  if (k->u.null_check.num == (id)) level++;\
1076  }\
1077  }\
1078 } while(0)
1079 
1080 #define STACK_GET_REPEAT(id, k) do {\
1081  int level = 0;\
1082  k = stk;\
1083  while (1) {\
1084  k--;\
1085  STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1086  if (k->type == STK_REPEAT) {\
1087  if (level == 0) {\
1088  if (k->u.repeat.num == (id)) {\
1089  break;\
1090  }\
1091  }\
1092  }\
1093  else if (k->type == STK_CALL_FRAME) level--;\
1094  else if (k->type == STK_RETURN) level++;\
1095  }\
1096 } while(0)
1097 
1098 #define STACK_RETURN(addr) do {\
1099  int level = 0;\
1100  OnigStackType* k = stk;\
1101  while (1) {\
1102  k--;\
1103  STACK_BASE_CHECK(k, "STACK_RETURN"); \
1104  if (k->type == STK_CALL_FRAME) {\
1105  if (level == 0) {\
1106  (addr) = k->u.call_frame.ret_addr;\
1107  break;\
1108  }\
1109  else level--;\
1110  }\
1111  else if (k->type == STK_RETURN)\
1112  level++;\
1113  }\
1114 } while(0)
1115 
1116 
1117 #define STRING_CMP(s1,s2,len) do {\
1118  while (len-- > 0) {\
1119  if (*s1++ != *s2++) goto fail;\
1120  }\
1121 } while(0)
1122 
1123 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1124  if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1125  goto fail; \
1126 } while(0)
1127 
1128 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1129  UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
1130 {
1131  UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1132  UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1133  UChar *p1, *p2, *end1, *s2;
1134  int len1, len2;
1135 
1136  s2 = *ps2;
1137  end1 = s1 + mblen;
1138  while (s1 < end1) {
1139  len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1140  len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1141  if (len1 != len2) return 0;
1142  p1 = buf1;
1143  p2 = buf2;
1144  while (len1-- > 0) {
1145  if (*p1 != *p2) return 0;
1146  p1++;
1147  p2++;
1148  }
1149  }
1150 
1151  *ps2 = s2;
1152  return 1;
1153 }
1154 
1155 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1156  is_fail = 0;\
1157  while (len-- > 0) {\
1158  if (*s1++ != *s2++) {\
1159  is_fail = 1; break;\
1160  }\
1161  }\
1162 } while(0)
1163 
1164 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1165  if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1166  is_fail = 1; \
1167  else \
1168  is_fail = 0; \
1169 } while(0)
1170 
1171 
1172 #define IS_EMPTY_STR (str == end)
1173 #define ON_STR_BEGIN(s) ((s) == str)
1174 #define ON_STR_END(s) ((s) == end)
1175 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1176 # define DATA_ENSURE_CHECK1 (s < right_range)
1177 # define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1178 # define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1179 # define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1180 # define ABSENT_END_POS right_range
1181 #else
1182 # define DATA_ENSURE_CHECK1 (s < end)
1183 # define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1184 # define DATA_ENSURE(n) if (s + (n) > end) goto fail
1185 # define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1186 # define ABSENT_END_POS end
1187 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1188 
1189 
1190 #ifdef USE_CAPTURE_HISTORY
1191 static int
1192 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1193  OnigStackType* stk_top, UChar* str, regex_t* reg)
1194 {
1195  int n, r;
1196  OnigCaptureTreeNode* child;
1197  OnigStackType* k = *kp;
1198 
1199  while (k < stk_top) {
1200  if (k->type == STK_MEM_START) {
1201  n = k->u.mem.num;
1202  if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1203  BIT_STATUS_AT(reg->capture_history, n) != 0) {
1204  child = history_node_new();
1205  CHECK_NULL_RETURN_MEMERR(child);
1206  child->group = n;
1207  child->beg = k->u.mem.pstr - str;
1208  r = history_tree_add_child(node, child);
1209  if (r != 0) {
1210  history_tree_free(child);
1211  return r;
1212  }
1213  *kp = (k + 1);
1214  r = make_capture_history_tree(child, kp, stk_top, str, reg);
1215  if (r != 0) return r;
1216 
1217  k = *kp;
1218  child->end = k->u.mem.pstr - str;
1219  }
1220  }
1221  else if (k->type == STK_MEM_END) {
1222  if (k->u.mem.num == node->group) {
1223  node->end = k->u.mem.pstr - str;
1224  *kp = k;
1225  return 0;
1226  }
1227  }
1228  k++;
1229  }
1230 
1231  return 1; /* 1: root node ending. */
1232 }
1233 #endif /* USE_CAPTURE_HISTORY */
1234 
1235 #ifdef USE_BACKREF_WITH_LEVEL
1236 static int mem_is_in_memp(int mem, int num, UChar* memp)
1237 {
1238  int i;
1239  MemNumType m;
1240 
1241  for (i = 0; i < num; i++) {
1242  GET_MEMNUM_INC(m, memp);
1243  if (mem == (int )m) return 1;
1244  }
1245  return 0;
1246 }
1247 
1248 static int backref_match_at_nested_level(regex_t* reg,
1249  OnigStackType* top, OnigStackType* stk_base,
1250  int ignore_case, int case_fold_flag,
1251  int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1252 {
1253  UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1254  int level;
1255  OnigStackType* k;
1256 
1257  level = 0;
1258  k = top;
1259  k--;
1260  while (k >= stk_base) {
1261  if (k->type == STK_CALL_FRAME) {
1262  level--;
1263  }
1264  else if (k->type == STK_RETURN) {
1265  level++;
1266  }
1267  else if (level == nest) {
1268  if (k->type == STK_MEM_START) {
1269  if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1270  pstart = k->u.mem.pstr;
1271  if (pend != NULL_UCHARP) {
1272  if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1273  p = pstart;
1274  ss = *s;
1275 
1276  if (ignore_case != 0) {
1277  if (string_cmp_ic(reg->enc, case_fold_flag,
1278  pstart, &ss, pend - pstart, send) == 0)
1279  return 0; /* or goto next_mem; */
1280  }
1281  else {
1282  while (p < pend) {
1283  if (*p++ != *ss++) return 0; /* or goto next_mem; */
1284  }
1285  }
1286 
1287  *s = ss;
1288  return 1;
1289  }
1290  }
1291  }
1292  else if (k->type == STK_MEM_END) {
1293  if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1294  pend = k->u.mem.pstr;
1295  }
1296  }
1297  }
1298  k--;
1299  }
1300 
1301  return 0;
1302 }
1303 #endif /* USE_BACKREF_WITH_LEVEL */
1304 
1305 
1306 #ifdef ONIG_DEBUG_STATISTICS
1307 
1308 # ifdef _WIN32
1309 # include <windows.h>
1310 static LARGE_INTEGER ts, te, freq;
1311 # define GETTIME(t) QueryPerformanceCounter(&(t))
1312 # define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
1313  * 1000000 / freq.QuadPart)
1314 # else /* _WIN32 */
1315 
1316 # define USE_TIMEOFDAY
1317 
1318 # ifdef USE_TIMEOFDAY
1319 # ifdef HAVE_SYS_TIME_H
1320 # include <sys/time.h>
1321 # endif
1322 # ifdef HAVE_UNISTD_H
1323 # include <unistd.h>
1324 # endif
1325 static struct timeval ts, te;
1326 # define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1327 # define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1328  (((te).tv_sec - (ts).tv_sec)*1000000))
1329 # else /* USE_TIMEOFDAY */
1330 # ifdef HAVE_SYS_TIMES_H
1331 # include <sys/times.h>
1332 # endif
1333 static struct tms ts, te;
1334 # define GETTIME(t) times(&(t))
1335 # define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1336 # endif /* USE_TIMEOFDAY */
1337 
1338 # endif /* _WIN32 */
1339 
1340 static int OpCounter[256];
1341 static int OpPrevCounter[256];
1342 static unsigned long OpTime[256];
1343 static int OpCurr = OP_FINISH;
1344 static int OpPrevTarget = OP_FAIL;
1345 static int MaxStackDepth = 0;
1346 
1347 # define MOP_IN(opcode) do {\
1348  if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1349  OpCurr = opcode;\
1350  OpCounter[opcode]++;\
1351  GETTIME(ts);\
1352 } while(0)
1353 
1354 # define MOP_OUT do {\
1355  GETTIME(te);\
1356  OpTime[OpCurr] += TIMEDIFF(te, ts);\
1357 } while(0)
1358 
1359 extern void
1360 onig_statistics_init(void)
1361 {
1362  int i;
1363  for (i = 0; i < 256; i++) {
1364  OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1365  }
1366  MaxStackDepth = 0;
1367 # ifdef _WIN32
1368  QueryPerformanceFrequency(&freq);
1369 # endif
1370 }
1371 
1372 extern void
1373 onig_print_statistics(FILE* f)
1374 {
1375  int i;
1376  fprintf(f, " count prev time\n");
1377  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1378  fprintf(f, "%8d: %8d: %10lu: %s\n",
1379  OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1380  }
1381  fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1382 }
1383 
1384 # define STACK_INC do {\
1385  stk++;\
1386  if (stk - stk_base > MaxStackDepth) \
1387  MaxStackDepth = stk - stk_base;\
1388 } while(0)
1389 
1390 #else /* ONIG_DEBUG_STATISTICS */
1391 # define STACK_INC stk++
1392 
1393 # define MOP_IN(opcode)
1394 # define MOP_OUT
1395 #endif /* ONIG_DEBUG_STATISTICS */
1396 
1397 
1398 #ifdef ONIG_DEBUG_MATCH
1399 static char *
1400 stack_type_str(int stack_type)
1401 {
1402  switch (stack_type) {
1403  case STK_ALT: return "Alt ";
1404  case STK_LOOK_BEHIND_NOT: return "LBNot ";
1405  case STK_POS_NOT: return "PosNot";
1406  case STK_MEM_START: return "MemS ";
1407  case STK_MEM_END: return "MemE ";
1408  case STK_REPEAT_INC: return "RepInc";
1409  case STK_STATE_CHECK_MARK: return "StChMk";
1410  case STK_NULL_CHECK_START: return "NulChS";
1411  case STK_NULL_CHECK_END: return "NulChE";
1412  case STK_MEM_END_MARK: return "MemEMk";
1413  case STK_POS: return "Pos ";
1414  case STK_STOP_BT: return "StopBt";
1415  case STK_REPEAT: return "Rep ";
1416  case STK_CALL_FRAME: return "Call ";
1417  case STK_RETURN: return "Ret ";
1418  case STK_VOID: return "Void ";
1419  case STK_ABSENT_POS: return "AbsPos";
1420  case STK_ABSENT: return "Absent";
1421  default: return " ";
1422  }
1423 }
1424 #endif
1425 
1426 /* match data(str - end) from position (sstart). */
1427 /* if sstart == str then set sprev to NULL. */
1428 static OnigPosition
1429 match_at(regex_t* reg, const UChar* str, const UChar* end,
1430 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1431  const UChar* right_range,
1432 #endif
1433  const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
1434 {
1435  static const UChar FinishCode[] = { OP_FINISH };
1436 
1437  int i, num_mem, pop_level;
1438  ptrdiff_t n, best_len;
1439  LengthType tlen, tlen2;
1440  MemNumType mem;
1441  RelAddrType addr;
1442  OnigOptionType option = reg->options;
1443  OnigEncoding encode = reg->enc;
1444  OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1445  UChar *s, *q, *sbegin;
1446  UChar *p = reg->p;
1447  UChar *pkeep;
1448  char *alloca_base;
1449  char *xmalloc_base = NULL;
1450  OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
1451  OnigStackType *stkp; /* used as any purpose. */
1452  OnigStackIndex si;
1453  OnigStackIndex *repeat_stk;
1454  OnigStackIndex *mem_start_stk, *mem_end_stk;
1455 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1456  int scv;
1457  unsigned char* state_check_buff = msa->state_check_buff;
1458  int num_comb_exp_check = reg->num_comb_exp_check;
1459 #endif
1460 
1461 #if USE_TOKEN_THREADED_VM
1462 # define OP_OFFSET 1
1463 # define VM_LOOP JUMP;
1464 # define VM_LOOP_END
1465 # define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
1466 # define DEFAULT L_DEFAULT:
1467 # define NEXT sprev = sbegin; JUMP
1468 # define JUMP RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
1469 
1470  RB_GNUC_EXTENSION static const void *oplabels[] = {
1471  &&L_OP_FINISH, /* matching process terminator (no more alternative) */
1472  &&L_OP_END, /* pattern code terminator (success end) */
1473 
1474  &&L_OP_EXACT1, /* single byte, N = 1 */
1475  &&L_OP_EXACT2, /* single byte, N = 2 */
1476  &&L_OP_EXACT3, /* single byte, N = 3 */
1477  &&L_OP_EXACT4, /* single byte, N = 4 */
1478  &&L_OP_EXACT5, /* single byte, N = 5 */
1479  &&L_OP_EXACTN, /* single byte */
1480  &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
1481  &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
1482  &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
1483  &&L_OP_EXACTMB2N, /* mb-length = 2 */
1484  &&L_OP_EXACTMB3N, /* mb-length = 3 */
1485  &&L_OP_EXACTMBN, /* other length */
1486 
1487  &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */
1488  &&L_OP_EXACTN_IC, /* single byte, ignore case */
1489 
1490  &&L_OP_CCLASS,
1491  &&L_OP_CCLASS_MB,
1492  &&L_OP_CCLASS_MIX,
1493  &&L_OP_CCLASS_NOT,
1494  &&L_OP_CCLASS_MB_NOT,
1495  &&L_OP_CCLASS_MIX_NOT,
1496 
1497  &&L_OP_ANYCHAR, /* "." */
1498  &&L_OP_ANYCHAR_ML, /* "." multi-line */
1499  &&L_OP_ANYCHAR_STAR, /* ".*" */
1500  &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
1501  &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
1502  &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
1503 
1504  &&L_OP_WORD,
1505  &&L_OP_NOT_WORD,
1506  &&L_OP_WORD_BOUND,
1507  &&L_OP_NOT_WORD_BOUND,
1508 # ifdef USE_WORD_BEGIN_END
1509  &&L_OP_WORD_BEGIN,
1510  &&L_OP_WORD_END,
1511 # else
1512  &&L_DEFAULT,
1513  &&L_DEFAULT,
1514 # endif
1515  &&L_OP_ASCII_WORD,
1516  &&L_OP_NOT_ASCII_WORD,
1517  &&L_OP_ASCII_WORD_BOUND,
1518  &&L_OP_NOT_ASCII_WORD_BOUND,
1519 # ifdef USE_WORD_BEGIN_END
1520  &&L_OP_ASCII_WORD_BEGIN,
1521  &&L_OP_ASCII_WORD_END,
1522 # else
1523  &&L_DEFAULT,
1524  &&L_DEFAULT,
1525 # endif
1526 
1527  &&L_OP_BEGIN_BUF,
1528  &&L_OP_END_BUF,
1529  &&L_OP_BEGIN_LINE,
1530  &&L_OP_END_LINE,
1531  &&L_OP_SEMI_END_BUF,
1532  &&L_OP_BEGIN_POSITION,
1533 
1534  &&L_OP_BACKREF1,
1535  &&L_OP_BACKREF2,
1536  &&L_OP_BACKREFN,
1537  &&L_OP_BACKREFN_IC,
1538  &&L_OP_BACKREF_MULTI,
1539  &&L_OP_BACKREF_MULTI_IC,
1540 # ifdef USE_BACKREF_WITH_LEVEL
1541  &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
1542 # else
1543  &&L_DEFAULT,
1544 # endif
1545  &&L_OP_MEMORY_START,
1546  &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */
1547  &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */
1548 # ifdef USE_SUBEXP_CALL
1549  &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
1550 # else
1551  &&L_DEFAULT,
1552 # endif
1553  &&L_OP_MEMORY_END,
1554 # ifdef USE_SUBEXP_CALL
1555  &&L_OP_MEMORY_END_REC, /* push marker to stack */
1556 # else
1557  &&L_DEFAULT,
1558 # endif
1559 
1560  &&L_OP_KEEP,
1561 
1562  &&L_OP_FAIL, /* pop stack and move */
1563  &&L_OP_JUMP,
1564  &&L_OP_PUSH,
1565  &&L_OP_POP,
1566 # ifdef USE_OP_PUSH_OR_JUMP_EXACT
1567  &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
1568 # else
1569  &&L_DEFAULT,
1570 # endif
1571  &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
1572  &&L_OP_REPEAT, /* {n,m} */
1573  &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */
1574  &&L_OP_REPEAT_INC,
1575  &&L_OP_REPEAT_INC_NG, /* non greedy */
1576  &&L_OP_REPEAT_INC_SG, /* search and get in stack */
1577  &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
1578  &&L_OP_NULL_CHECK_START, /* null loop checker start */
1579  &&L_OP_NULL_CHECK_END, /* null loop checker end */
1580 # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1581  &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
1582 # else
1583  &&L_DEFAULT,
1584 # endif
1585 # ifdef USE_SUBEXP_CALL
1586  &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
1587 # else
1588  &&L_DEFAULT,
1589 # endif
1590 
1591  &&L_OP_PUSH_POS, /* (?=...) start */
1592  &&L_OP_POP_POS, /* (?=...) end */
1593  &&L_OP_PUSH_POS_NOT, /* (?!...) start */
1594  &&L_OP_FAIL_POS, /* (?!...) end */
1595  &&L_OP_PUSH_STOP_BT, /* (?>...) start */
1596  &&L_OP_POP_STOP_BT, /* (?>...) end */
1597  &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
1598  &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
1599  &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
1600  &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */
1601  &&L_OP_ABSENT, /* (?~...) start of inner loop */
1602  &&L_OP_ABSENT_END, /* (?~...) end */
1603 
1604 # ifdef USE_SUBEXP_CALL
1605  &&L_OP_CALL, /* \g<name> */
1606  &&L_OP_RETURN,
1607 # else
1608  &&L_DEFAULT,
1609  &&L_DEFAULT,
1610 # endif
1611  &&L_OP_CONDITION,
1612 
1613 # ifdef USE_COMBINATION_EXPLOSION_CHECK
1614  &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */
1615  &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
1616  &&L_OP_STATE_CHECK, /* check only */
1617 # else
1618  &&L_DEFAULT,
1619  &&L_DEFAULT,
1620  &&L_DEFAULT,
1621 # endif
1622 # ifdef USE_COMBINATION_EXPLOSION_CHECK
1623  &&L_OP_STATE_CHECK_ANYCHAR_STAR,
1624  &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
1625 # else
1626  &&L_DEFAULT,
1627  &&L_DEFAULT,
1628 # endif
1629  /* no need: IS_DYNAMIC_OPTION() == 0 */
1630 # if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
1631  &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */
1632  &&L_OP_SET_OPTION /* set option */
1633 # else
1634  &&L_DEFAULT,
1635  &&L_DEFAULT
1636 # endif
1637  };
1638 #else /* USE_TOKEN_THREADED_VM */
1639 
1640 # define OP_OFFSET 0
1641 # define VM_LOOP \
1642  while (1) { \
1643  OPCODE_EXEC_HOOK; \
1644  sbegin = s; \
1645  switch (*p++) {
1646 # define VM_LOOP_END } sprev = sbegin; }
1647 # define CASE(x) case x:
1648 # define DEFAULT default:
1649 # define NEXT break
1650 # define JUMP continue; break
1651 #endif /* USE_TOKEN_THREADED_VM */
1652 
1653 
1654 #ifdef USE_SUBEXP_CALL
1655 /* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
1656  etc. Additional space is required. */
1657 # define ADD_NUMMEM 1
1658 #else
1659 /* Stack #0 not is used. */
1660 # define ADD_NUMMEM 0
1661 #endif
1662 
1663  n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
1664 
1665  STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
1666  pop_level = reg->stack_pop_level;
1667  num_mem = reg->num_mem;
1668  repeat_stk = (OnigStackIndex* )alloca_base;
1669 
1670  mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
1671  mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
1672  {
1673  OnigStackIndex *pp = mem_start_stk;
1674  for (; pp < repeat_stk + n; pp += 2) {
1675  pp[0] = INVALID_STACK_INDEX;
1676  pp[1] = INVALID_STACK_INDEX;
1677  }
1678  }
1679 #ifndef USE_SUBEXP_CALL
1680  mem_start_stk--; /* for index start from 1,
1681  mem_start_stk[1]..mem_start_stk[num_mem] */
1682  mem_end_stk--; /* for index start from 1,
1683  mem_end_stk[1]..mem_end_stk[num_mem] */
1684 #endif
1685 
1686 #ifdef ONIG_DEBUG_MATCH
1687  fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
1688  (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
1689  fprintf(stderr, "size: %d, start offset: %d\n",
1690  (int )(end - str), (int )(sstart - str));
1691  fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
1692 #endif
1693 
1694  STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
1695  best_len = ONIG_MISMATCH;
1696  s = (UChar* )sstart;
1697  pkeep = (UChar* )sstart;
1698 
1699 
1700 #ifdef ONIG_DEBUG_MATCH
1701 # define OPCODE_EXEC_HOOK \
1702  if (s) { \
1703  UChar *op, *q, *bp, buf[50]; \
1704  int len; \
1705  op = p - OP_OFFSET; \
1706  fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
1707  bp = buf; \
1708  q = s; \
1709  if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
1710  for (i = 0; i < 7 && q < end; i++) { \
1711  len = enclen(encode, q, end); \
1712  while (len-- > 0) *bp++ = *q++; \
1713  } \
1714  if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
1715  } \
1716  xmemcpy(bp, "\"", 1); bp += 1; \
1717  *bp = 0; \
1718  fputs((char* )buf, stderr); \
1719  for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
1720  fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
1721  stk - stk_base - 1, \
1722  (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
1723  (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
1724  onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
1725  fprintf(stderr, "\n"); \
1726  }
1727 #else
1728 # define OPCODE_EXEC_HOOK ((void) 0)
1729 #endif
1730 
1731 
1732  VM_LOOP {
1733  CASE(OP_END) MOP_IN(OP_END);
1734  n = s - sstart;
1735  if (n > best_len) {
1736  OnigRegion* region;
1737 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1738  if (IS_FIND_LONGEST(option)) {
1739  if (n > msa->best_len) {
1740  msa->best_len = n;
1741  msa->best_s = (UChar* )sstart;
1742  }
1743  else
1744  goto end_best_len;
1745  }
1746 #endif
1747  best_len = n;
1748  region = msa->region;
1749  if (region) {
1750  region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
1751  region->end[0] = s - str;
1752  for (i = 1; i <= num_mem; i++) {
1753  if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1754  if (BIT_STATUS_AT(reg->bt_mem_start, i))
1755  region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1756  else
1757  region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1758 
1759  region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1760  ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1761  : (UChar* )((void* )mem_end_stk[i])) - str;
1762  }
1763  else {
1764  region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1765  }
1766  }
1767 
1768 #ifdef USE_CAPTURE_HISTORY
1769  if (reg->capture_history != 0) {
1770  int r;
1771  OnigCaptureTreeNode* node;
1772 
1773  if (IS_NULL(region->history_root)) {
1774  region->history_root = node = history_node_new();
1775  CHECK_NULL_RETURN_MEMERR(node);
1776  }
1777  else {
1778  node = region->history_root;
1779  history_tree_clear(node);
1780  }
1781 
1782  node->group = 0;
1783  node->beg = ((pkeep > s) ? s : pkeep) - str;
1784  node->end = s - str;
1785 
1786  stkp = stk_base;
1787  r = make_capture_history_tree(region->history_root, &stkp,
1788  stk, (UChar* )str, reg);
1789  if (r < 0) {
1790  best_len = r; /* error code */
1791  goto finish;
1792  }
1793  }
1794 #endif /* USE_CAPTURE_HISTORY */
1795  } /* if (region) */
1796  } /* n > best_len */
1797 
1798 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1799  end_best_len:
1800 #endif
1801  MOP_OUT;
1802 
1803  if (IS_FIND_CONDITION(option)) {
1804  if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1805  best_len = ONIG_MISMATCH;
1806  goto fail; /* for retry */
1807  }
1808  if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1809  goto fail; /* for retry */
1810  }
1811  }
1812 
1813  /* default behavior: return first-matching result. */
1814  goto finish;
1815  NEXT;
1816 
1817  CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
1818  DATA_ENSURE(1);
1819  if (*p != *s) goto fail;
1820  p++; s++;
1821  MOP_OUT;
1822  NEXT;
1823 
1824  CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
1825  {
1826  int len;
1827  UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1828 
1829  DATA_ENSURE(1);
1830  len = ONIGENC_MBC_CASE_FOLD(encode,
1831  /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1832  case_fold_flag,
1833  &s, end, lowbuf);
1834  DATA_ENSURE(0);
1835  q = lowbuf;
1836  while (len-- > 0) {
1837  if (*p != *q) {
1838  goto fail;
1839  }
1840  p++; q++;
1841  }
1842  }
1843  MOP_OUT;
1844  NEXT;
1845 
1846  CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
1847  DATA_ENSURE(2);
1848  if (*p != *s) goto fail;
1849  p++; s++;
1850  if (*p != *s) goto fail;
1851  sprev = s;
1852  p++; s++;
1853  MOP_OUT;
1854  JUMP;
1855 
1856  CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
1857  DATA_ENSURE(3);
1858  if (*p != *s) goto fail;
1859  p++; s++;
1860  if (*p != *s) goto fail;
1861  p++; s++;
1862  if (*p != *s) goto fail;
1863  sprev = s;
1864  p++; s++;
1865  MOP_OUT;
1866  JUMP;
1867 
1868  CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
1869  DATA_ENSURE(4);
1870  if (*p != *s) goto fail;
1871  p++; s++;
1872  if (*p != *s) goto fail;
1873  p++; s++;
1874  if (*p != *s) goto fail;
1875  p++; s++;
1876  if (*p != *s) goto fail;
1877  sprev = s;
1878  p++; s++;
1879  MOP_OUT;
1880  JUMP;
1881 
1882  CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
1883  DATA_ENSURE(5);
1884  if (*p != *s) goto fail;
1885  p++; s++;
1886  if (*p != *s) goto fail;
1887  p++; s++;
1888  if (*p != *s) goto fail;
1889  p++; s++;
1890  if (*p != *s) goto fail;
1891  p++; s++;
1892  if (*p != *s) goto fail;
1893  sprev = s;
1894  p++; s++;
1895  MOP_OUT;
1896  JUMP;
1897 
1898  CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
1899  GET_LENGTH_INC(tlen, p);
1900  DATA_ENSURE(tlen);
1901  while (tlen-- > 0) {
1902  if (*p++ != *s++) goto fail;
1903  }
1904  sprev = s - 1;
1905  MOP_OUT;
1906  JUMP;
1907 
1908  CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
1909  {
1910  int len;
1911  UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1912 
1913  GET_LENGTH_INC(tlen, p);
1914  endp = p + tlen;
1915 
1916  while (p < endp) {
1917  sprev = s;
1918  DATA_ENSURE(1);
1919  len = ONIGENC_MBC_CASE_FOLD(encode,
1920  /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1921  case_fold_flag,
1922  &s, end, lowbuf);
1923  DATA_ENSURE(0);
1924  q = lowbuf;
1925  while (len-- > 0) {
1926  if (*p != *q) goto fail;
1927  p++; q++;
1928  }
1929  }
1930  }
1931 
1932  MOP_OUT;
1933  JUMP;
1934 
1935  CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
1936  DATA_ENSURE(2);
1937  if (*p != *s) goto fail;
1938  p++; s++;
1939  if (*p != *s) goto fail;
1940  p++; s++;
1941  MOP_OUT;
1942  NEXT;
1943 
1944  CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
1945  DATA_ENSURE(4);
1946  if (*p != *s) goto fail;
1947  p++; s++;
1948  if (*p != *s) goto fail;
1949  p++; s++;
1950  sprev = s;
1951  if (*p != *s) goto fail;
1952  p++; s++;
1953  if (*p != *s) goto fail;
1954  p++; s++;
1955  MOP_OUT;
1956  JUMP;
1957 
1958  CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
1959  DATA_ENSURE(6);
1960  if (*p != *s) goto fail;
1961  p++; s++;
1962  if (*p != *s) goto fail;
1963  p++; s++;
1964  if (*p != *s) goto fail;
1965  p++; s++;
1966  if (*p != *s) goto fail;
1967  p++; s++;
1968  sprev = s;
1969  if (*p != *s) goto fail;
1970  p++; s++;
1971  if (*p != *s) goto fail;
1972  p++; s++;
1973  MOP_OUT;
1974  JUMP;
1975 
1976  CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
1977  GET_LENGTH_INC(tlen, p);
1978  DATA_ENSURE(tlen * 2);
1979  while (tlen-- > 0) {
1980  if (*p != *s) goto fail;
1981  p++; s++;
1982  if (*p != *s) goto fail;
1983  p++; s++;
1984  }
1985  sprev = s - 2;
1986  MOP_OUT;
1987  JUMP;
1988 
1989  CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
1990  GET_LENGTH_INC(tlen, p);
1991  DATA_ENSURE(tlen * 3);
1992  while (tlen-- > 0) {
1993  if (*p != *s) goto fail;
1994  p++; s++;
1995  if (*p != *s) goto fail;
1996  p++; s++;
1997  if (*p != *s) goto fail;
1998  p++; s++;
1999  }
2000  sprev = s - 3;
2001  MOP_OUT;
2002  JUMP;
2003 
2004  CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2005  GET_LENGTH_INC(tlen, p); /* mb-len */
2006  GET_LENGTH_INC(tlen2, p); /* string len */
2007  tlen2 *= tlen;
2008  DATA_ENSURE(tlen2);
2009  while (tlen2-- > 0) {
2010  if (*p != *s) goto fail;
2011  p++; s++;
2012  }
2013  sprev = s - tlen;
2014  MOP_OUT;
2015  JUMP;
2016 
2017  CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2018  DATA_ENSURE(1);
2019  if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2020  p += SIZE_BITSET;
2021  s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2022  MOP_OUT;
2023  NEXT;
2024 
2025  CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2026  if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2027 
2028  cclass_mb:
2029  GET_LENGTH_INC(tlen, p);
2030  {
2031  OnigCodePoint code;
2032  UChar *ss;
2033  int mb_len;
2034 
2035  DATA_ENSURE(1);
2036  mb_len = enclen(encode, s, end);
2037  DATA_ENSURE(mb_len);
2038  ss = s;
2039  s += mb_len;
2040  code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2041 
2042 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2043  if (! onig_is_in_code_range(p, code)) goto fail;
2044 #else
2045  q = p;
2046  ALIGNMENT_RIGHT(q);
2047  if (! onig_is_in_code_range(q, code)) goto fail;
2048 #endif
2049  }
2050  p += tlen;
2051  MOP_OUT;
2052  NEXT;
2053 
2054  CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2055  DATA_ENSURE(1);
2056  if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2057  p += SIZE_BITSET;
2058  goto cclass_mb;
2059  }
2060  else {
2061  if (BITSET_AT(((BitSetRef )p), *s) == 0)
2062  goto fail;
2063 
2064  p += SIZE_BITSET;
2065  GET_LENGTH_INC(tlen, p);
2066  p += tlen;
2067  s++;
2068  }
2069  MOP_OUT;
2070  NEXT;
2071 
2072  CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2073  DATA_ENSURE(1);
2074  if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
2075  p += SIZE_BITSET;
2076  s += enclen(encode, s, end);
2077  MOP_OUT;
2078  NEXT;
2079 
2080  CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
2081  DATA_ENSURE(1);
2082  if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2083  s++;
2084  GET_LENGTH_INC(tlen, p);
2085  p += tlen;
2086  goto cc_mb_not_success;
2087  }
2088 
2089  cclass_mb_not:
2090  GET_LENGTH_INC(tlen, p);
2091  {
2092  OnigCodePoint code;
2093  UChar *ss;
2094  int mb_len = enclen(encode, s, end);
2095 
2096  if (! DATA_ENSURE_CHECK(mb_len)) {
2097  DATA_ENSURE(1);
2098  s = (UChar* )end;
2099  p += tlen;
2100  goto cc_mb_not_success;
2101  }
2102 
2103  ss = s;
2104  s += mb_len;
2105  code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2106 
2107 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2108  if (onig_is_in_code_range(p, code)) goto fail;
2109 #else
2110  q = p;
2111  ALIGNMENT_RIGHT(q);
2112  if (onig_is_in_code_range(q, code)) goto fail;
2113 #endif
2114  }
2115  p += tlen;
2116 
2117  cc_mb_not_success:
2118  MOP_OUT;
2119  NEXT;
2120 
2121  CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
2122  DATA_ENSURE(1);
2123  if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2124  p += SIZE_BITSET;
2125  goto cclass_mb_not;
2126  }
2127  else {
2128  if (BITSET_AT(((BitSetRef )p), *s) != 0)
2129  goto fail;
2130 
2131  p += SIZE_BITSET;
2132  GET_LENGTH_INC(tlen, p);
2133  p += tlen;
2134  s++;
2135  }
2136  MOP_OUT;
2137  NEXT;
2138 
2139  CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
2140  DATA_ENSURE(1);
2141  n = enclen(encode, s, end);
2142  DATA_ENSURE(n);
2143  if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2144  s += n;
2145  MOP_OUT;
2146  NEXT;
2147 
2148  CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
2149  DATA_ENSURE(1);
2150  n = enclen(encode, s, end);
2151  DATA_ENSURE(n);
2152  s += n;
2153  MOP_OUT;
2154  NEXT;
2155 
2156  CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
2157  while (DATA_ENSURE_CHECK1) {
2158  STACK_PUSH_ALT(p, s, sprev, pkeep);
2159  n = enclen(encode, s, end);
2160  DATA_ENSURE(n);
2161  if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2162  sprev = s;
2163  s += n;
2164  }
2165  MOP_OUT;
2166  JUMP;
2167 
2168  CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
2169  while (DATA_ENSURE_CHECK1) {
2170  STACK_PUSH_ALT(p, s, sprev, pkeep);
2171  n = enclen(encode, s, end);
2172  if (n > 1) {
2173  DATA_ENSURE(n);
2174  sprev = s;
2175  s += n;
2176  }
2177  else {
2178  sprev = s;
2179  s++;
2180  }
2181  }
2182  MOP_OUT;
2183  JUMP;
2184 
2185  CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2186  while (DATA_ENSURE_CHECK1) {
2187  if (*p == *s) {
2188  STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2189  }
2190  n = enclen(encode, s, end);
2191  DATA_ENSURE(n);
2192  if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2193  sprev = s;
2194  s += n;
2195  }
2196  p++;
2197  MOP_OUT;
2198  NEXT;
2199 
2200  CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2201  while (DATA_ENSURE_CHECK1) {
2202  if (*p == *s) {
2203  STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2204  }
2205  n = enclen(encode, s, end);
2206  if (n > 1) {
2207  DATA_ENSURE(n);
2208  sprev = s;
2209  s += n;
2210  }
2211  else {
2212  sprev = s;
2213  s++;
2214  }
2215  }
2216  p++;
2217  MOP_OUT;
2218  NEXT;
2219 
2220 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2221  CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2222  GET_STATE_CHECK_NUM_INC(mem, p);
2223  while (DATA_ENSURE_CHECK1) {
2224  STATE_CHECK_VAL(scv, mem);
2225  if (scv) goto fail;
2226 
2227  STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2228  n = enclen(encode, s, end);
2229  DATA_ENSURE(n);
2230  if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2231  sprev = s;
2232  s += n;
2233  }
2234  MOP_OUT;
2235  NEXT;
2236 
2237  CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
2238  MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2239 
2240  GET_STATE_CHECK_NUM_INC(mem, p);
2241  while (DATA_ENSURE_CHECK1) {
2242  STATE_CHECK_VAL(scv, mem);
2243  if (scv) goto fail;
2244 
2245  STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2246  n = enclen(encode, s, end);
2247  if (n > 1) {
2248  DATA_ENSURE(n);
2249  sprev = s;
2250  s += n;
2251  }
2252  else {
2253  sprev = s;
2254  s++;
2255  }
2256  }
2257  MOP_OUT;
2258  NEXT;
2259 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2260 
2261  CASE(OP_WORD) MOP_IN(OP_WORD);
2262  DATA_ENSURE(1);
2263  if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2264  goto fail;
2265 
2266  s += enclen(encode, s, end);
2267  MOP_OUT;
2268  NEXT;
2269 
2270  CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
2271  DATA_ENSURE(1);
2272  if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2273  goto fail;
2274 
2275  s += enclen(encode, s, end);
2276  MOP_OUT;
2277  NEXT;
2278 
2279  CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
2280  DATA_ENSURE(1);
2281  if (ONIGENC_IS_MBC_WORD(encode, s, end))
2282  goto fail;
2283 
2284  s += enclen(encode, s, end);
2285  MOP_OUT;
2286  NEXT;
2287 
2288  CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
2289  DATA_ENSURE(1);
2290  if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2291  goto fail;
2292 
2293  s += enclen(encode, s, end);
2294  MOP_OUT;
2295  NEXT;
2296 
2297  CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
2298  if (ON_STR_BEGIN(s)) {
2299  DATA_ENSURE(1);
2300  if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2301  goto fail;
2302  }
2303  else if (ON_STR_END(s)) {
2304  if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2305  goto fail;
2306  }
2307  else {
2308  if (ONIGENC_IS_MBC_WORD(encode, s, end)
2309  == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2310  goto fail;
2311  }
2312  MOP_OUT;
2313  JUMP;
2314 
2315  CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
2316  if (ON_STR_BEGIN(s)) {
2317  DATA_ENSURE(1);
2318  if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2319  goto fail;
2320  }
2321  else if (ON_STR_END(s)) {
2322  if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2323  goto fail;
2324  }
2325  else {
2326  if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2327  == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2328  goto fail;
2329  }
2330  MOP_OUT;
2331  JUMP;
2332 
2333  CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
2334  if (ON_STR_BEGIN(s)) {
2335  if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
2336  goto fail;
2337  }
2338  else if (ON_STR_END(s)) {
2339  if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
2340  goto fail;
2341  }
2342  else {
2343  if (ONIGENC_IS_MBC_WORD(encode, s, end)
2344  != ONIGENC_IS_MBC_WORD(encode, sprev, end))
2345  goto fail;
2346  }
2347  MOP_OUT;
2348  JUMP;
2349 
2350  CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
2351  if (ON_STR_BEGIN(s)) {
2352  if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2353  goto fail;
2354  }
2355  else if (ON_STR_END(s)) {
2356  if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2357  goto fail;
2358  }
2359  else {
2360  if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2361  != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2362  goto fail;
2363  }
2364  MOP_OUT;
2365  JUMP;
2366 
2367 #ifdef USE_WORD_BEGIN_END
2368  CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
2369  if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
2370  if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2371  MOP_OUT;
2372  JUMP;
2373  }
2374  }
2375  goto fail;
2376  NEXT;
2377 
2378  CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
2379  if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2380  if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2381  MOP_OUT;
2382  JUMP;
2383  }
2384  }
2385  goto fail;
2386  NEXT;
2387 
2388  CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
2389  if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2390  if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
2391  MOP_OUT;
2392  JUMP;
2393  }
2394  }
2395  goto fail;
2396  NEXT;
2397 
2398  CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
2399  if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2400  if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2401  MOP_OUT;
2402  JUMP;
2403  }
2404  }
2405  goto fail;
2406  NEXT;
2407 #endif
2408 
2409  CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
2410  if (! ON_STR_BEGIN(s)) goto fail;
2411  if (IS_NOTBOS(msa->options)) goto fail;
2412 
2413  MOP_OUT;
2414  JUMP;
2415 
2416  CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
2417  if (! ON_STR_END(s)) goto fail;
2418  if (IS_NOTEOS(msa->options)) goto fail;
2419 
2420  MOP_OUT;
2421  JUMP;
2422 
2423  CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
2424  if (ON_STR_BEGIN(s)) {
2425  if (IS_NOTBOL(msa->options)) goto fail;
2426  MOP_OUT;
2427  JUMP;
2428  }
2429  else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
2430 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2431  && !(IS_NEWLINE_CRLF(option)
2432  && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
2433 #endif
2434  && !ON_STR_END(s)) {
2435  MOP_OUT;
2436  JUMP;
2437  }
2438  goto fail;
2439  NEXT;
2440 
2441  CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
2442  if (ON_STR_END(s)) {
2443 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2444  if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2445 #endif
2446  if (IS_NOTEOL(msa->options)) goto fail;
2447  MOP_OUT;
2448  JUMP;
2449 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2450  }
2451 #endif
2452  }
2453  else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2454  MOP_OUT;
2455  JUMP;
2456  }
2457  goto fail;
2458  NEXT;
2459 
2460  CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
2461  if (ON_STR_END(s)) {
2462 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2463  if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2464 #endif
2465  if (IS_NOTEOL(msa->options)) goto fail;
2466  MOP_OUT;
2467  JUMP;
2468 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2469  }
2470 #endif
2471  }
2472  else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2473  UChar* ss = s + enclen(encode, s, end);
2474  if (ON_STR_END(ss)) {
2475  MOP_OUT;
2476  JUMP;
2477  }
2478 #ifdef USE_CRNL_AS_LINE_TERMINATOR
2479  else if (IS_NEWLINE_CRLF(option)
2480  && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2481  ss += enclen(encode, ss, end);
2482  if (ON_STR_END(ss)) {
2483  MOP_OUT;
2484  JUMP;
2485  }
2486  }
2487 #endif
2488  }
2489  goto fail;
2490  NEXT;
2491 
2492  CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
2493  if (s != msa->gpos)
2494  goto fail;
2495 
2496  MOP_OUT;
2497  JUMP;
2498 
2499  CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
2500  GET_MEMNUM_INC(mem, p);
2501  STACK_PUSH_MEM_START(mem, s);
2502  MOP_OUT;
2503  JUMP;
2504 
2505  CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
2506  GET_MEMNUM_INC(mem, p);
2507  mem_start_stk[mem] = (OnigStackIndex )((void* )s);
2508  mem_end_stk[mem] = INVALID_STACK_INDEX;
2509  MOP_OUT;
2510  JUMP;
2511 
2512  CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
2513  GET_MEMNUM_INC(mem, p);
2514  STACK_PUSH_MEM_END(mem, s);
2515  MOP_OUT;
2516  JUMP;
2517 
2518  CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
2519  GET_MEMNUM_INC(mem, p);
2520  mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2521  MOP_OUT;
2522  JUMP;
2523 
2524  CASE(OP_KEEP) MOP_IN(OP_KEEP);
2525  pkeep = s;
2526  MOP_OUT;
2527  JUMP;
2528 
2529 #ifdef USE_SUBEXP_CALL
2530  CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
2531  GET_MEMNUM_INC(mem, p);
2532  STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2533  STACK_PUSH_MEM_END(mem, s);
2534  mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2535  MOP_OUT;
2536  JUMP;
2537 
2538  CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
2539  GET_MEMNUM_INC(mem, p);
2540  mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2541  STACK_GET_MEM_START(mem, stkp);
2542 
2543  if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2544  mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2545  else
2546  mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
2547 
2548  STACK_PUSH_MEM_END_MARK(mem);
2549  MOP_OUT;
2550  JUMP;
2551 #endif
2552 
2553  CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
2554  mem = 1;
2555  goto backref;
2556  NEXT;
2557 
2558  CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
2559  mem = 2;
2560  goto backref;
2561  NEXT;
2562 
2563  CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
2564  GET_MEMNUM_INC(mem, p);
2565  backref:
2566  {
2567  int len;
2568  UChar *pstart, *pend;
2569 
2570  /* if you want to remove following line,
2571  you should check in parse and compile time. */
2572  if (mem > num_mem) goto fail;
2573  if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2574  if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2575 
2576  if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2577  pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2578  else
2579  pstart = (UChar* )((void* )mem_start_stk[mem]);
2580 
2581  pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2582  ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2583  : (UChar* )((void* )mem_end_stk[mem]));
2584  n = pend - pstart;
2585  DATA_ENSURE(n);
2586  sprev = s;
2587  STRING_CMP(pstart, s, n);
2588  while (sprev + (len = enclen(encode, sprev, end)) < s)
2589  sprev += len;
2590 
2591  MOP_OUT;
2592  JUMP;
2593  }
2594 
2595  CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
2596  GET_MEMNUM_INC(mem, p);
2597  {
2598  int len;
2599  UChar *pstart, *pend;
2600 
2601  /* if you want to remove following line,
2602  you should check in parse and compile time. */
2603  if (mem > num_mem) goto fail;
2604  if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2605  if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2606 
2607  if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2608  pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2609  else
2610  pstart = (UChar* )((void* )mem_start_stk[mem]);
2611 
2612  pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2613  ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2614  : (UChar* )((void* )mem_end_stk[mem]));
2615  n = pend - pstart;
2616  DATA_ENSURE(n);
2617  sprev = s;
2618  STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end);
2619  while (sprev + (len = enclen(encode, sprev, end)) < s)
2620  sprev += len;
2621 
2622  MOP_OUT;
2623  JUMP;
2624  }
2625  NEXT;
2626 
2627  CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
2628  {
2629  int len, is_fail;
2630  UChar *pstart, *pend, *swork;
2631 
2632  GET_LENGTH_INC(tlen, p);
2633  for (i = 0; i < tlen; i++) {
2634  GET_MEMNUM_INC(mem, p);
2635 
2636  if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2637  if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2638 
2639  if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2640  pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2641  else
2642  pstart = (UChar* )((void* )mem_start_stk[mem]);
2643 
2644  pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2645  ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2646  : (UChar* )((void* )mem_end_stk[mem]));
2647  n = pend - pstart;
2648  DATA_ENSURE_CONTINUE(n);
2649  sprev = s;
2650  swork = s;
2651  STRING_CMP_VALUE(pstart, swork, n, is_fail);
2652  if (is_fail) continue;
2653  s = swork;
2654  while (sprev + (len = enclen(encode, sprev, end)) < s)
2655  sprev += len;
2656 
2657  p += (SIZE_MEMNUM * (tlen - i - 1));
2658  break; /* success */
2659  }
2660  if (i == tlen) goto fail;
2661  MOP_OUT;
2662  JUMP;
2663  }
2664  NEXT;
2665 
2666  CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
2667  {
2668  int len, is_fail;
2669  UChar *pstart, *pend, *swork;
2670 
2671  GET_LENGTH_INC(tlen, p);
2672  for (i = 0; i < tlen; i++) {
2673  GET_MEMNUM_INC(mem, p);
2674 
2675  if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2676  if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2677 
2678  if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2679  pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2680  else
2681  pstart = (UChar* )((void* )mem_start_stk[mem]);
2682 
2683  pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2684  ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2685  : (UChar* )((void* )mem_end_stk[mem]));
2686  n = pend - pstart;
2687  DATA_ENSURE_CONTINUE(n);
2688  sprev = s;
2689  swork = s;
2690  STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
2691  if (is_fail) continue;
2692  s = swork;
2693  while (sprev + (len = enclen(encode, sprev, end)) < s)
2694  sprev += len;
2695 
2696  p += (SIZE_MEMNUM * (tlen - i - 1));
2697  break; /* success */
2698  }
2699  if (i == tlen) goto fail;
2700  MOP_OUT;
2701  JUMP;
2702  }
2703 
2704 #ifdef USE_BACKREF_WITH_LEVEL
2705  CASE(OP_BACKREF_WITH_LEVEL)
2706  {
2707  int len;
2708  OnigOptionType ic;
2709  LengthType level;
2710 
2711  GET_OPTION_INC(ic, p);
2712  GET_LENGTH_INC(level, p);
2713  GET_LENGTH_INC(tlen, p);
2714 
2715  sprev = s;
2716  if (backref_match_at_nested_level(reg, stk, stk_base, ic,
2717  case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2718  while (sprev + (len = enclen(encode, sprev, end)) < s)
2719  sprev += len;
2720 
2721  p += (SIZE_MEMNUM * tlen);
2722  }
2723  else
2724  goto fail;
2725 
2726  MOP_OUT;
2727  JUMP;
2728  }
2729 
2730 #endif
2731 
2732 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2733  CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
2734  GET_OPTION_INC(option, p);
2735  STACK_PUSH_ALT(p, s, sprev, pkeep);
2736  p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2737  MOP_OUT;
2738  JUMP;
2739 
2740  CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
2741  GET_OPTION_INC(option, p);
2742  MOP_OUT;
2743  JUMP;
2744 #endif
2745 
2746  CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
2747  GET_MEMNUM_INC(mem, p); /* mem: null check id */
2748  STACK_PUSH_NULL_CHECK_START(mem, s);
2749  MOP_OUT;
2750  JUMP;
2751 
2752  CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
2753  {
2754  int isnull;
2755 
2756  GET_MEMNUM_INC(mem, p); /* mem: null check id */
2757  STACK_NULL_CHECK(isnull, mem, s);
2758  if (isnull) {
2759 #ifdef ONIG_DEBUG_MATCH
2760  fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
2761  (int )mem, (uintptr_t )s, s);
2762 #endif
2763  null_check_found:
2764  /* empty loop founded, skip next instruction */
2765  switch (*p++) {
2766  case OP_JUMP:
2767  case OP_PUSH:
2768  p += SIZE_RELADDR;
2769  break;
2770  case OP_REPEAT_INC:
2771  case OP_REPEAT_INC_NG:
2772  case OP_REPEAT_INC_SG:
2773  case OP_REPEAT_INC_NG_SG:
2774  p += SIZE_MEMNUM;
2775  break;
2776  default:
2777  goto unexpected_bytecode_error;
2778  break;
2779  }
2780  }
2781  }
2782  MOP_OUT;
2783  JUMP;
2784 
2785 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2786  CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
2787  {
2788  int isnull;
2789 
2790  GET_MEMNUM_INC(mem, p); /* mem: null check id */
2791  STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2792  if (isnull) {
2793 # ifdef ONIG_DEBUG_MATCH
2794  fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
2795  (int )mem, (uintptr_t )s, s);
2796 # endif
2797  if (isnull == -1) goto fail;
2798  goto null_check_found;
2799  }
2800  }
2801  MOP_OUT;
2802  JUMP;
2803 #endif
2804 
2805 #ifdef USE_SUBEXP_CALL
2806  CASE(OP_NULL_CHECK_END_MEMST_PUSH)
2807  MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2808  {
2809  int isnull;
2810 
2811  GET_MEMNUM_INC(mem, p); /* mem: null check id */
2812 # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2813  STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2814 # else
2815  STACK_NULL_CHECK_REC(isnull, mem, s);
2816 # endif
2817  if (isnull) {
2818 # ifdef ONIG_DEBUG_MATCH
2819  fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
2820  (int )mem, (uintptr_t )s, s);
2821 # endif
2822  if (isnull == -1) goto fail;
2823  goto null_check_found;
2824  }
2825  else {
2826  STACK_PUSH_NULL_CHECK_END(mem);
2827  }
2828  }
2829  MOP_OUT;
2830  JUMP;
2831 #endif
2832 
2833  CASE(OP_JUMP) MOP_IN(OP_JUMP);
2834  GET_RELADDR_INC(addr, p);
2835  p += addr;
2836  MOP_OUT;
2837  CHECK_INTERRUPT_IN_MATCH_AT;
2838  JUMP;
2839 
2840  CASE(OP_PUSH) MOP_IN(OP_PUSH);
2841  GET_RELADDR_INC(addr, p);
2842  STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2843  MOP_OUT;
2844  JUMP;
2845 
2846 #ifdef USE_COMBINATION_EXPLOSION_CHECK
2847  CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
2848  GET_STATE_CHECK_NUM_INC(mem, p);
2849  STATE_CHECK_VAL(scv, mem);
2850  if (scv) goto fail;
2851 
2852  GET_RELADDR_INC(addr, p);
2853  STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2854  MOP_OUT;
2855  JUMP;
2856 
2857  CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2858  GET_STATE_CHECK_NUM_INC(mem, p);
2859  GET_RELADDR_INC(addr, p);
2860  STATE_CHECK_VAL(scv, mem);
2861  if (scv) {
2862  p += addr;
2863  }
2864  else {
2865  STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2866  }
2867  MOP_OUT;
2868  JUMP;
2869 
2870  CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
2871  GET_STATE_CHECK_NUM_INC(mem, p);
2872  STATE_CHECK_VAL(scv, mem);
2873  if (scv) goto fail;
2874 
2875  STACK_PUSH_STATE_CHECK(s, mem);
2876  MOP_OUT;
2877  JUMP;
2878 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
2879 
2880  CASE(OP_POP) MOP_IN(OP_POP);
2881  STACK_POP_ONE;
2882  MOP_OUT;
2883  JUMP;
2884 
2885 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
2886  CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2887  GET_RELADDR_INC(addr, p);
2888  if (*p == *s && DATA_ENSURE_CHECK1) {
2889  p++;
2890  STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2891  MOP_OUT;
2892  JUMP;
2893  }
2894  p += (addr + 1);
2895  MOP_OUT;
2896  JUMP;
2897 #endif
2898 
2899  CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2900  GET_RELADDR_INC(addr, p);
2901  if (*p == *s) {
2902  p++;
2903  STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2904  MOP_OUT;
2905  JUMP;
2906  }
2907  p++;
2908  MOP_OUT;
2909  JUMP;
2910 
2911  CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
2912  {
2913  GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2914  GET_RELADDR_INC(addr, p);
2915 
2916  STACK_ENSURE(1);
2917  repeat_stk[mem] = GET_STACK_INDEX(stk);
2918  STACK_PUSH_REPEAT(mem, p);
2919 
2920  if (reg->repeat_range[mem].lower == 0) {
2921  STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2922  }
2923  }
2924  MOP_OUT;
2925  JUMP;
2926 
2927  CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
2928  {
2929  GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2930  GET_RELADDR_INC(addr, p);
2931 
2932  STACK_ENSURE(1);
2933  repeat_stk[mem] = GET_STACK_INDEX(stk);
2934  STACK_PUSH_REPEAT(mem, p);
2935 
2936  if (reg->repeat_range[mem].lower == 0) {
2937  STACK_PUSH_ALT(p, s, sprev, pkeep);
2938  p += addr;
2939  }
2940  }
2941  MOP_OUT;
2942  JUMP;
2943 
2944  CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
2945  GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2946  si = repeat_stk[mem];
2947  stkp = STACK_AT(si);
2948 
2949  repeat_inc:
2950  stkp->u.repeat.count++;
2951  if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2952  /* end of repeat. Nothing to do. */
2953  }
2954  else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2955  STACK_PUSH_ALT(p, s, sprev, pkeep);
2956  p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2957  }
2958  else {
2959  p = stkp->u.repeat.pcode;
2960  }
2961  STACK_PUSH_REPEAT_INC(si);
2962  MOP_OUT;
2963  CHECK_INTERRUPT_IN_MATCH_AT;
2964  JUMP;
2965 
2966  CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
2967  GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2968  STACK_GET_REPEAT(mem, stkp);
2969  si = GET_STACK_INDEX(stkp);
2970  goto repeat_inc;
2971  NEXT;
2972 
2973  CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
2974  GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2975  si = repeat_stk[mem];
2976  stkp = STACK_AT(si);
2977 
2978  repeat_inc_ng:
2979  stkp->u.repeat.count++;
2980  if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2981  if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2982  UChar* pcode = stkp->u.repeat.pcode;
2983 
2984  STACK_PUSH_REPEAT_INC(si);
2985  STACK_PUSH_ALT(pcode, s, sprev, pkeep);
2986  }
2987  else {
2988  p = stkp->u.repeat.pcode;
2989  STACK_PUSH_REPEAT_INC(si);
2990  }
2991  }
2992  else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2993  STACK_PUSH_REPEAT_INC(si);
2994  }
2995  MOP_OUT;
2996  CHECK_INTERRUPT_IN_MATCH_AT;
2997  JUMP;
2998 
2999  CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3000  GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3001  STACK_GET_REPEAT(mem, stkp);
3002  si = GET_STACK_INDEX(stkp);
3003  goto repeat_inc_ng;
3004  NEXT;
3005 
3006  CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3007  STACK_PUSH_POS(s, sprev, pkeep);
3008  MOP_OUT;
3009  JUMP;
3010 
3011  CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3012  {
3013  STACK_POS_END(stkp);
3014  s = stkp->u.state.pstr;
3015  sprev = stkp->u.state.pstr_prev;
3016  }
3017  MOP_OUT;
3018  JUMP;
3019 
3020  CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3021  GET_RELADDR_INC(addr, p);
3022  STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3023  MOP_OUT;
3024  JUMP;
3025 
3026  CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3027  STACK_POP_TIL_POS_NOT;
3028  goto fail;
3029  NEXT;
3030 
3031  CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3032  STACK_PUSH_STOP_BT;
3033  MOP_OUT;
3034  JUMP;
3035 
3036  CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3037  STACK_STOP_BT_END;
3038  MOP_OUT;
3039  JUMP;
3040 
3041  CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3042  GET_LENGTH_INC(tlen, p);
3043  s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3044  if (IS_NULL(s)) goto fail;
3045  sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3046  MOP_OUT;
3047  JUMP;
3048 
3049  CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
3050  GET_RELADDR_INC(addr, p);
3051  GET_LENGTH_INC(tlen, p);
3052  q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3053  if (IS_NULL(q)) {
3054  /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3055  If you want to change to fail, replace following line. */
3056  p += addr;
3057  /* goto fail; */
3058  }
3059  else {
3060  STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
3061  s = q;
3062  sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3063  }
3064  MOP_OUT;
3065  JUMP;
3066 
3067  CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
3068  STACK_POP_TIL_LOOK_BEHIND_NOT;
3069  goto fail;
3070  NEXT;
3071 
3072  CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
3073  /* Save the absent-start-pos and the original end-pos. */
3074  STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
3075  MOP_OUT;
3076  JUMP;
3077 
3078  CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
3079  {
3080  const UChar* aend = ABSENT_END_POS;
3081  UChar* absent;
3082  UChar* selfp = p - 1;
3083 
3084  STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
3085  GET_RELADDR_INC(addr, p);
3086 #ifdef ONIG_DEBUG_MATCH
3087  fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
3088 #endif
3089  if ((absent > aend) && (s > absent)) {
3090  /* An empty match occurred in (?~...) at the start point.
3091  * Never match. */
3092  STACK_POP;
3093  goto fail;
3094  }
3095  else if ((s >= aend) && (s > absent)) {
3096  if (s > aend) {
3097  /* Only one (or less) character matched in the last iteration.
3098  * This is not a possible point. */
3099  goto fail;
3100  }
3101  /* All possible points were found. Try matching after (?~...). */
3102  DATA_ENSURE(0);
3103  p += addr;
3104  }
3105  else {
3106  STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
3107  n = enclen(encode, s, end);
3108  STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
3109  STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
3110  STACK_PUSH_ABSENT;
3111  ABSENT_END_POS = aend;
3112  }
3113  }
3114  MOP_OUT;
3115  JUMP;
3116 
3117  CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
3118  /* The pattern inside (?~...) was matched.
3119  * Set the end-pos temporary and go to next iteration. */
3120  if (sprev < ABSENT_END_POS)
3121  ABSENT_END_POS = sprev;
3122 #ifdef ONIG_DEBUG_MATCH
3123  fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
3124 #endif
3125  STACK_POP_TIL_ABSENT;
3126  goto fail;
3127  NEXT;
3128 
3129 #ifdef USE_SUBEXP_CALL
3130  CASE(OP_CALL) MOP_IN(OP_CALL);
3131  GET_ABSADDR_INC(addr, p);
3132  STACK_PUSH_CALL_FRAME(p);
3133  p = reg->p + addr;
3134  MOP_OUT;
3135  JUMP;
3136 
3137  CASE(OP_RETURN) MOP_IN(OP_RETURN);
3138  STACK_RETURN(p);
3139  STACK_PUSH_RETURN;
3140  MOP_OUT;
3141  JUMP;
3142 #endif
3143 
3144  CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
3145  GET_MEMNUM_INC(mem, p);
3146  GET_RELADDR_INC(addr, p);
3147  if ((mem > num_mem) ||
3148  (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
3149  (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
3150  p += addr;
3151  }
3152  MOP_OUT;
3153  JUMP;
3154 
3155  CASE(OP_FINISH)
3156  goto finish;
3157  NEXT;
3158 
3159  CASE(OP_FAIL)
3160  if (0) {
3161  /* fall */
3162  fail:
3163  MOP_OUT;
3164  }
3165  MOP_IN(OP_FAIL);
3166  STACK_POP;
3167  p = stk->u.state.pcode;
3168  s = stk->u.state.pstr;
3169  sprev = stk->u.state.pstr_prev;
3170  pkeep = stk->u.state.pkeep;
3171 
3172 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3173  if (stk->u.state.state_check != 0) {
3174  stk->type = STK_STATE_CHECK_MARK;
3175  stk++;
3176  }
3177 #endif
3178 
3179  MOP_OUT;
3180  JUMP;
3181 
3182  DEFAULT
3183  goto bytecode_error;
3184  } VM_LOOP_END
3185 
3186  finish:
3187  STACK_SAVE;
3188  if (xmalloc_base) xfree(xmalloc_base);
3189  return best_len;
3190 
3191 #ifdef ONIG_DEBUG
3192  stack_error:
3193  STACK_SAVE;
3194  if (xmalloc_base) xfree(xmalloc_base);
3195  return ONIGERR_STACK_BUG;
3196 #endif
3197 
3198  bytecode_error:
3199  STACK_SAVE;
3200  if (xmalloc_base) xfree(xmalloc_base);
3201  return ONIGERR_UNDEFINED_BYTECODE;
3202 
3203  unexpected_bytecode_error:
3204  STACK_SAVE;
3205  if (xmalloc_base) xfree(xmalloc_base);
3206  return ONIGERR_UNEXPECTED_BYTECODE;
3207 }
3208 
3209 
3210 static UChar*
3211 slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
3212  const UChar* text, const UChar* text_end, UChar* text_range)
3213 {
3214  UChar *t, *p, *s, *end;
3215 
3216  end = (UChar* )text_end;
3217  end -= target_end - target - 1;
3218  if (end > text_range)
3219  end = text_range;
3220 
3221  s = (UChar* )text;
3222 
3223  if (enc->max_enc_len == enc->min_enc_len) {
3224  int n = enc->max_enc_len;
3225 
3226  while (s < end) {
3227  if (*s == *target) {
3228  p = s + 1;
3229  t = target + 1;
3230  if (target_end == t || memcmp(t, p, target_end - t) == 0)
3231  return s;
3232  }
3233  s += n;
3234  }
3235  return (UChar* )NULL;
3236  }
3237  while (s < end) {
3238  if (*s == *target) {
3239  p = s + 1;
3240  t = target + 1;
3241  if (target_end == t || memcmp(t, p, target_end - t) == 0)
3242  return s;
3243  }
3244  s += enclen(enc, s, text_end);
3245  }
3246 
3247  return (UChar* )NULL;
3248 }
3249 
3250 static int
3251 str_lower_case_match(OnigEncoding enc, int case_fold_flag,
3252  const UChar* t, const UChar* tend,
3253  const UChar* p, const UChar* end)
3254 {
3255  int lowlen;
3256  UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3257 
3258  while (t < tend) {
3259  lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
3260  q = lowbuf;
3261  while (lowlen > 0) {
3262  if (*t++ != *q++) return 0;
3263  lowlen--;
3264  }
3265  }
3266 
3267  return 1;
3268 }
3269 
3270 static UChar*
3271 slow_search_ic(OnigEncoding enc, int case_fold_flag,
3272  UChar* target, UChar* target_end,
3273  const UChar* text, const UChar* text_end, UChar* text_range)
3274 {
3275  UChar *s, *end;
3276 
3277  end = (UChar* )text_end;
3278  end -= target_end - target - 1;
3279  if (end > text_range)
3280  end = text_range;
3281 
3282  s = (UChar* )text;
3283 
3284  while (s < end) {
3285  if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3286  s, text_end))
3287  return s;
3288 
3289  s += enclen(enc, s, text_end);
3290  }
3291 
3292  return (UChar* )NULL;
3293 }
3294 
3295 static UChar*
3296 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
3297  const UChar* text, const UChar* adjust_text,
3298  const UChar* text_end, const UChar* text_start)
3299 {
3300  UChar *t, *p, *s;
3301 
3302  s = (UChar* )text_end;
3303  s -= (target_end - target);
3304  if (s > text_start)
3305  s = (UChar* )text_start;
3306  else
3307  s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3308 
3309  while (s >= text) {
3310  if (*s == *target) {
3311  p = s + 1;
3312  t = target + 1;
3313  while (t < target_end) {
3314  if (*t != *p++)
3315  break;
3316  t++;
3317  }
3318  if (t == target_end)
3319  return s;
3320  }
3321  s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3322  }
3323 
3324  return (UChar* )NULL;
3325 }
3326 
3327 static UChar*
3328 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
3329  UChar* target, UChar* target_end,
3330  const UChar* text, const UChar* adjust_text,
3331  const UChar* text_end, const UChar* text_start)
3332 {
3333  UChar *s;
3334 
3335  s = (UChar* )text_end;
3336  s -= (target_end - target);
3337  if (s > text_start)
3338  s = (UChar* )text_start;
3339  else
3340  s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3341 
3342  while (s >= text) {
3343  if (str_lower_case_match(enc, case_fold_flag,
3344  target, target_end, s, text_end))
3345  return s;
3346 
3347  s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3348  }
3349 
3350  return (UChar* )NULL;
3351 }
3352 
3353 #ifndef USE_SUNDAY_QUICK_SEARCH
3354 /* Boyer-Moore-Horspool search applied to a multibyte string */
3355 static UChar*
3356 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3357  const UChar* text, const UChar* text_end,
3358  const UChar* text_range)
3359 {
3360  const UChar *s, *se, *t, *p, *end;
3361  const UChar *tail;
3362  ptrdiff_t skip, tlen1;
3363 
3364 # ifdef ONIG_DEBUG_SEARCH
3365  fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3366  (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3367 # endif
3368 
3369  tail = target_end - 1;
3370  tlen1 = tail - target;
3371  end = text_range;
3372  if (end + tlen1 > text_end)
3373  end = text_end - tlen1;
3374 
3375  s = text;
3376 
3377  if (IS_NULL(reg->int_map)) {
3378  while (s < end) {
3379  p = se = s + tlen1;
3380  t = tail;
3381  while (*p == *t) {
3382  if (t == target) return (UChar* )s;
3383  p--; t--;
3384  }
3385  skip = reg->map[*se];
3386  t = s;
3387  do {
3388  s += enclen(reg->enc, s, end);
3389  } while ((s - t) < skip && s < end);
3390  }
3391  }
3392  else {
3393 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3394  while (s < end) {
3395  p = se = s + tlen1;
3396  t = tail;
3397  while (*p == *t) {
3398  if (t == target) return (UChar* )s;
3399  p--; t--;
3400  }
3401  skip = reg->int_map[*se];
3402  t = s;
3403  do {
3404  s += enclen(reg->enc, s, end);
3405  } while ((s - t) < skip && s < end);
3406  }
3407 # endif
3408  }
3409 
3410  return (UChar* )NULL;
3411 }
3412 
3413 /* Boyer-Moore-Horspool search */
3414 static UChar*
3415 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3416  const UChar* text, const UChar* text_end, const UChar* text_range)
3417 {
3418  const UChar *s, *t, *p, *end;
3419  const UChar *tail;
3420 
3421 # ifdef ONIG_DEBUG_SEARCH
3422  fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3423  (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3424 # endif
3425 
3426  end = text_range + (target_end - target) - 1;
3427  if (end > text_end)
3428  end = text_end;
3429 
3430  tail = target_end - 1;
3431  s = text + (target_end - target) - 1;
3432  if (IS_NULL(reg->int_map)) {
3433  while (s < end) {
3434  p = s;
3435  t = tail;
3436 # ifdef ONIG_DEBUG_SEARCH
3437  fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
3438  (intptr_t )(s - text), s);
3439 # endif
3440  while (*p == *t) {
3441  if (t == target) return (UChar* )p;
3442  p--; t--;
3443  }
3444  s += reg->map[*s];
3445  }
3446  }
3447  else { /* see int_map[] */
3448 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3449  while (s < end) {
3450  p = s;
3451  t = tail;
3452  while (*p == *t) {
3453  if (t == target) return (UChar* )p;
3454  p--; t--;
3455  }
3456  s += reg->int_map[*s];
3457  }
3458 # endif
3459  }
3460  return (UChar* )NULL;
3461 }
3462 
3463 /* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
3464 static UChar*
3465 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3466  const UChar* text, const UChar* text_end,
3467  const UChar* text_range)
3468 {
3469  const UChar *s, *se, *t, *end;
3470  const UChar *tail;
3471  ptrdiff_t skip, tlen1;
3472  OnigEncoding enc = reg->enc;
3473  int case_fold_flag = reg->case_fold_flag;
3474 
3475 # ifdef ONIG_DEBUG_SEARCH
3476  fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3477  (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
3478 # endif
3479 
3480  tail = target_end - 1;
3481  tlen1 = tail - target;
3482  end = text_range;
3483  if (end + tlen1 > text_end)
3484  end = text_end - tlen1;
3485 
3486  s = text;
3487 
3488  if (IS_NULL(reg->int_map)) {
3489  while (s < end) {
3490  se = s + tlen1;
3491  if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3492  s, se + 1))
3493  return (UChar* )s;
3494  skip = reg->map[*se];
3495  t = s;
3496  do {
3497  s += enclen(reg->enc, s, end);
3498  } while ((s - t) < skip && s < end);
3499  }
3500  }
3501  else {
3502 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3503  while (s < end) {
3504  se = s + tlen1;
3505  if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3506  s, se + 1))
3507  return (UChar* )s;
3508  skip = reg->int_map[*se];
3509  t = s;
3510  do {
3511  s += enclen(reg->enc, s, end);
3512  } while ((s - t) < skip && s < end);
3513  }
3514 # endif
3515  }
3516 
3517  return (UChar* )NULL;
3518 }
3519 
3520 /* Boyer-Moore-Horspool search (ignore case) */
3521 static UChar*
3522 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3523  const UChar* text, const UChar* text_end, const UChar* text_range)
3524 {
3525  const UChar *s, *p, *end;
3526  const UChar *tail;
3527  OnigEncoding enc = reg->enc;
3528  int case_fold_flag = reg->case_fold_flag;
3529 
3530 # ifdef ONIG_DEBUG_SEARCH
3531  fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3532  (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
3533 # endif
3534 
3535  end = text_range + (target_end - target) - 1;
3536  if (end > text_end)
3537  end = text_end;
3538 
3539  tail = target_end - 1;
3540  s = text + (target_end - target) - 1;
3541  if (IS_NULL(reg->int_map)) {
3542  while (s < end) {
3543  p = s - (target_end - target) + 1;
3544  if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3545  p, s + 1))
3546  return (UChar* )p;
3547  s += reg->map[*s];
3548  }
3549  }
3550  else { /* see int_map[] */
3551 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3552  while (s < end) {
3553  p = s - (target_end - target) + 1;
3554  if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3555  p, s + 1))
3556  return (UChar* )p;
3557  s += reg->int_map[*s];
3558  }
3559 # endif
3560  }
3561  return (UChar* )NULL;
3562 }
3563 
3564 #else /* USE_SUNDAY_QUICK_SEARCH */
3565 
3566 /* Sunday's quick search applied to a multibyte string */
3567 static UChar*
3568 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3569  const UChar* text, const UChar* text_end,
3570  const UChar* text_range)
3571 {
3572  const UChar *s, *se, *t, *p, *end;
3573  const UChar *tail;
3574  ptrdiff_t skip, tlen1;
3575  OnigEncoding enc = reg->enc;
3576 
3577 # ifdef ONIG_DEBUG_SEARCH
3578  fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3579  (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3580 # endif
3581 
3582  tail = target_end - 1;
3583  tlen1 = tail - target;
3584  end = text_range;
3585  if (end + tlen1 > text_end)
3586  end = text_end - tlen1;
3587 
3588  s = text;
3589 
3590  if (IS_NULL(reg->int_map)) {
3591  while (s < end) {
3592  p = se = s + tlen1;
3593  t = tail;
3594  while (*p == *t) {
3595  if (t == target) return (UChar* )s;
3596  p--; t--;
3597  }
3598  if (s + 1 >= end) break;
3599  skip = reg->map[se[1]];
3600  t = s;
3601  do {
3602  s += enclen(enc, s, end);
3603  } while ((s - t) < skip && s < end);
3604  }
3605  }
3606  else {
3607 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3608  while (s < end) {
3609  p = se = s + tlen1;
3610  t = tail;
3611  while (*p == *t) {
3612  if (t == target) return (UChar* )s;
3613  p--; t--;
3614  }
3615  if (s + 1 >= end) break;
3616  skip = reg->int_map[se[1]];
3617  t = s;
3618  do {
3619  s += enclen(enc, s, end);
3620  } while ((s - t) < skip && s < end);
3621  }
3622 # endif
3623  }
3624 
3625  return (UChar* )NULL;
3626 }
3627 
3628 /* Sunday's quick search */
3629 static UChar*
3630 bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3631  const UChar* text, const UChar* text_end, const UChar* text_range)
3632 {
3633  const UChar *s, *t, *p, *end;
3634  const UChar *tail;
3635  ptrdiff_t tlen1;
3636 
3637 # ifdef ONIG_DEBUG_SEARCH
3638  fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3639  (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3640 # endif
3641 
3642  tail = target_end - 1;
3643  tlen1 = tail - target;
3644  end = text_range + tlen1;
3645  if (end > text_end)
3646  end = text_end;
3647 
3648  s = text + tlen1;
3649  if (IS_NULL(reg->int_map)) {
3650  while (s < end) {
3651  p = s;
3652  t = tail;
3653  while (*p == *t) {
3654  if (t == target) return (UChar* )p;
3655  p--; t--;
3656  }
3657  if (s + 1 >= end) break;
3658  s += reg->map[s[1]];
3659  }
3660  }
3661  else { /* see int_map[] */
3662 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3663  while (s < end) {
3664  p = s;
3665  t = tail;
3666  while (*p == *t) {
3667  if (t == target) return (UChar* )p;
3668  p--; t--;
3669  }
3670  if (s + 1 >= end) break;
3671  s += reg->int_map[s[1]];
3672  }
3673 # endif
3674  }
3675  return (UChar* )NULL;
3676 }
3677 
3678 /* Sunday's quick search applied to a multibyte string (ignore case) */
3679 static UChar*
3680 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3681  const UChar* text, const UChar* text_end,
3682  const UChar* text_range)
3683 {
3684  const UChar *s, *se, *t, *end;
3685  const UChar *tail;
3686  ptrdiff_t skip, tlen1;
3687  OnigEncoding enc = reg->enc;
3688  int case_fold_flag = reg->case_fold_flag;
3689 
3690 # ifdef ONIG_DEBUG_SEARCH
3691  fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3692  (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3693 # endif
3694 
3695  tail = target_end - 1;
3696  tlen1 = tail - target;
3697  end = text_range;
3698  if (end + tlen1 > text_end)
3699  end = text_end - tlen1;
3700 
3701  s = text;
3702 
3703  if (IS_NULL(reg->int_map)) {
3704  while (s < end) {
3705  se = s + tlen1;
3706  if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3707  s, se + 1))
3708  return (UChar* )s;
3709  if (s + 1 >= end) break;
3710  skip = reg->map[se[1]];
3711  t = s;
3712  do {
3713  s += enclen(enc, s, end);
3714  } while ((s - t) < skip && s < end);
3715  }
3716  }
3717  else {
3718 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3719  while (s < end) {
3720  se = s + tlen1;
3721  if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3722  s, se + 1))
3723  return (UChar* )s;
3724  if (s + 1 >= end) break;
3725  skip = reg->int_map[se[1]];
3726  t = s;
3727  do {
3728  s += enclen(enc, s, end);
3729  } while ((s - t) < skip && s < end);
3730  }
3731 # endif
3732  }
3733 
3734  return (UChar* )NULL;
3735 }
3736 
3737 /* Sunday's quick search (ignore case) */
3738 static UChar*
3739 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3740  const UChar* text, const UChar* text_end, const UChar* text_range)
3741 {
3742  const UChar *s, *p, *end;
3743  const UChar *tail;
3744  ptrdiff_t tlen1;
3745  OnigEncoding enc = reg->enc;
3746  int case_fold_flag = reg->case_fold_flag;
3747 
3748 # ifdef ONIG_DEBUG_SEARCH
3749  fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3750  (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3751 # endif
3752 
3753  tail = target_end - 1;
3754  tlen1 = tail - target;
3755  end = text_range + tlen1;
3756  if (end > text_end)
3757  end = text_end;
3758 
3759  s = text + tlen1;
3760  if (IS_NULL(reg->int_map)) {
3761  while (s < end) {
3762  p = s - tlen1;
3763  if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3764  p, s + 1))
3765  return (UChar* )p;
3766  if (s + 1 >= end) break;
3767  s += reg->map[s[1]];
3768  }
3769  }
3770  else { /* see int_map[] */
3771 # if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3772  while (s < end) {
3773  p = s - tlen1;
3774  if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3775  p, s + 1))
3776  return (UChar* )p;
3777  if (s + 1 >= end) break;
3778  s += reg->int_map[s[1]];
3779  }
3780 # endif
3781  }
3782  return (UChar* )NULL;
3783 }
3784 #endif /* USE_SUNDAY_QUICK_SEARCH */
3785 
3786 #ifdef USE_INT_MAP_BACKWARD
3787 static int
3788 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3789  int** skip)
3790 {
3791  int i, len;
3792 
3793  if (IS_NULL(*skip)) {
3794  *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3795  if (IS_NULL(*skip)) return ONIGERR_MEMORY;
3796  }
3797 
3798  len = (int )(end - s);
3799  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
3800  (*skip)[i] = len;
3801 
3802  for (i = len - 1; i > 0; i--)
3803  (*skip)[s[i]] = i;
3804 
3805  return 0;
3806 }
3807 
3808 static UChar*
3809 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
3810  const UChar* text, const UChar* adjust_text,
3811  const UChar* text_end, const UChar* text_start)
3812 {
3813  const UChar *s, *t, *p;
3814 
3815  s = text_end - (target_end - target);
3816  if (text_start < s)
3817  s = text_start;
3818  else
3819  s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3820 
3821  while (s >= text) {
3822  p = s;
3823  t = target;
3824  while (t < target_end && *p == *t) {
3825  p++; t++;
3826  }
3827  if (t == target_end)
3828  return (UChar* )s;
3829 
3830  s -= reg->int_map_backward[*s];
3831  s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3832  }
3833 
3834  return (UChar* )NULL;
3835 }
3836 #endif
3837 
3838 static UChar*
3839 map_search(OnigEncoding enc, UChar map[],
3840  const UChar* text, const UChar* text_range, const UChar* text_end)
3841 {
3842  const UChar *s = text;
3843 
3844  while (s < text_range) {
3845  if (map[*s]) return (UChar* )s;
3846 
3847  s += enclen(enc, s, text_end);
3848  }
3849  return (UChar* )NULL;
3850 }
3851 
3852 static UChar*
3853 map_search_backward(OnigEncoding enc, UChar map[],
3854  const UChar* text, const UChar* adjust_text,
3855  const UChar* text_start, const UChar* text_end)
3856 {
3857  const UChar *s = text_start;
3858 
3859  while (s >= text) {
3860  if (map[*s]) return (UChar* )s;
3861 
3862  s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3863  }
3864  return (UChar* )NULL;
3865 }
3866 
3867 extern OnigPosition
3868 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3869  OnigOptionType option)
3870 {
3871  ptrdiff_t r;
3872  UChar *prev;
3873  OnigMatchArg msa;
3874 
3875  MATCH_ARG_INIT(msa, option, region, at, at);
3876 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3877  {
3878  ptrdiff_t offset = at - str;
3879  STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3880  }
3881 #endif
3882 
3883  if (region) {
3884  r = onig_region_resize_clear(region, reg->num_mem + 1);
3885  }
3886  else
3887  r = 0;
3888 
3889  if (r == 0) {
3890  prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
3891  r = match_at(reg, str, end,
3892 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3893  end,
3894 #endif
3895  at, prev, &msa);
3896  }
3897 
3898  MATCH_ARG_FREE(msa);
3899  return r;
3900 }
3901 
3902 static int
3903 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3904  UChar* range, UChar** low, UChar** high, UChar** low_prev)
3905 {
3906  UChar *p, *pprev = (UChar* )NULL;
3907 
3908 #ifdef ONIG_DEBUG_SEARCH
3909  fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
3910  (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
3911 #endif
3912 
3913  p = s;
3914  if (reg->dmin > 0) {
3915  if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3916  p += reg->dmin;
3917  }
3918  else {
3919  UChar *q = p + reg->dmin;
3920 
3921  if (q >= end) return 0; /* fail */
3922  while (p < q) p += enclen(reg->enc, p, end);
3923  }
3924  }
3925 
3926  retry:
3927  switch (reg->optimize) {
3928  case ONIG_OPTIMIZE_EXACT:
3929  p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3930  break;
3931  case ONIG_OPTIMIZE_EXACT_IC:
3932  p = slow_search_ic(reg->enc, reg->case_fold_flag,
3933  reg->exact, reg->exact_end, p, end, range);
3934  break;
3935 
3936  case ONIG_OPTIMIZE_EXACT_BM:
3937  p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3938  break;
3939 
3940  case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3941  p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3942  break;
3943 
3944  case ONIG_OPTIMIZE_EXACT_BM_IC:
3945  p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
3946  break;
3947 
3948  case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
3949  p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
3950  break;
3951 
3952  case ONIG_OPTIMIZE_MAP:
3953  p = map_search(reg->enc, reg->map, p, range, end);
3954  break;
3955  }
3956 
3957  if (p && p < range) {
3958  if (p - reg->dmin < s) {
3959  retry_gate:
3960  pprev = p;
3961  p += enclen(reg->enc, p, end);
3962  goto retry;
3963  }
3964 
3965  if (reg->sub_anchor) {
3966  UChar* prev;
3967 
3968  switch (reg->sub_anchor) {
3969  case ANCHOR_BEGIN_LINE:
3970  if (!ON_STR_BEGIN(p)) {
3971  prev = onigenc_get_prev_char_head(reg->enc,
3972  (pprev ? pprev : str), p, end);
3973  if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
3974  goto retry_gate;
3975  }
3976  break;
3977 
3978  case ANCHOR_END_LINE:
3979  if (ON_STR_END(p)) {
3980 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3981  prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3982  (pprev ? pprev : str), p);
3983  if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
3984  goto retry_gate;
3985 #endif
3986  }
3987  else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
3988  goto retry_gate;
3989  break;
3990  }
3991  }
3992 
3993  if (reg->dmax == 0) {
3994  *low = p;
3995  if (low_prev) {
3996  if (*low > s)
3997  *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
3998  else
3999  *low_prev = onigenc_get_prev_char_head(reg->enc,
4000  (pprev ? pprev : str), p, end);
4001  }
4002  }
4003  else {
4004  if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4005  if (p < str + reg->dmax) {
4006  *low = (UChar* )str;
4007  if (low_prev)
4008  *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4009  }
4010  else {
4011  *low = p - reg->dmax;
4012  if (*low > s) {
4013  *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4014  *low, end, (const UChar** )low_prev);
4015  if (low_prev && IS_NULL(*low_prev))
4016  *low_prev = onigenc_get_prev_char_head(reg->enc,
4017  (pprev ? pprev : s), *low, end);
4018  }
4019  else {
4020  if (low_prev)
4021  *low_prev = onigenc_get_prev_char_head(reg->enc,
4022  (pprev ? pprev : str), *low, end);
4023  }
4024  }
4025  }
4026  }
4027  /* no needs to adjust *high, *high is used as range check only */
4028  *high = p - reg->dmin;
4029 
4030 #ifdef ONIG_DEBUG_SEARCH
4031  fprintf(stderr,
4032  "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
4033  *low - str, *high - str, reg->dmin, reg->dmax);
4034 #endif
4035  return 1; /* success */
4036  }
4037 
4038  return 0; /* fail */
4039 }
4040 
4041 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4042 
4043 static int
4044 backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
4045  UChar* s, const UChar* range, UChar* adjrange,
4046  UChar** low, UChar** high)
4047 {
4048  UChar *p;
4049 
4050  range += reg->dmin;
4051  p = s;
4052 
4053  retry:
4054  switch (reg->optimize) {
4055  case ONIG_OPTIMIZE_EXACT:
4056  exact_method:
4057  p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4058  range, adjrange, end, p);
4059  break;
4060 
4061  case ONIG_OPTIMIZE_EXACT_IC:
4062  case ONIG_OPTIMIZE_EXACT_BM_IC:
4063  case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4064  p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4065  reg->exact, reg->exact_end,
4066  range, adjrange, end, p);
4067  break;
4068 
4069  case ONIG_OPTIMIZE_EXACT_BM:
4070  case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4071 #ifdef USE_INT_MAP_BACKWARD
4072  if (IS_NULL(reg->int_map_backward)) {
4073  int r;
4074  if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
4075  goto exact_method;
4076 
4077  r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4078  &(reg->int_map_backward));
4079  if (r) return r;
4080  }
4081  p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4082  end, p);
4083 #else
4084  goto exact_method;
4085 #endif
4086  break;
4087 
4088  case ONIG_OPTIMIZE_MAP:
4089  p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4090  break;
4091  }
4092 
4093  if (p) {
4094  if (reg->sub_anchor) {
4095  UChar* prev;
4096 
4097  switch (reg->sub_anchor) {
4098  case ANCHOR_BEGIN_LINE:
4099  if (!ON_STR_BEGIN(p)) {
4100  prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4101  if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4102  p = prev;
4103  goto retry;
4104  }
4105  }
4106  break;
4107 
4108  case ANCHOR_END_LINE:
4109  if (ON_STR_END(p)) {
4110 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4111  prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4112  if (IS_NULL(prev)) goto fail;
4113  if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4114  p = prev;
4115  goto retry;
4116  }
4117 #endif
4118  }
4119  else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4120  p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4121  if (IS_NULL(p)) goto fail;
4122  goto retry;
4123  }
4124  break;
4125  }
4126  }
4127 
4128  /* no needs to adjust *high, *high is used as range check only */
4129  if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4130  *low = p - reg->dmax;
4131  *high = p - reg->dmin;
4132  *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4133  }
4134 
4135 #ifdef ONIG_DEBUG_SEARCH
4136  fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
4137  (int )(*low - str), (int )(*high - str));
4138 #endif
4139  return 1; /* success */
4140  }
4141 
4142  fail:
4143 #ifdef ONIG_DEBUG_SEARCH
4144  fprintf(stderr, "backward_search_range: fail.\n");
4145 #endif
4146  return 0; /* fail */
4147 }
4148 
4149 
4150 extern OnigPosition
4151 onig_search(regex_t* reg, const UChar* str, const UChar* end,
4152  const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4153 {
4154  return onig_search_gpos(reg, str, end, start, start, range, region, option);
4155 }
4156 
4157 extern OnigPosition
4158 onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
4159  const UChar* global_pos,
4160  const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4161 {
4162  ptrdiff_t r;
4163  UChar *s, *prev;
4164  OnigMatchArg msa;
4165 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4166  const UChar *orig_start = start;
4167  const UChar *orig_range = range;
4168 #endif
4169 
4170 #ifdef ONIG_DEBUG_SEARCH
4171  fprintf(stderr,
4172  "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
4173  (uintptr_t )str, str, end - str, start - str, range - str);
4174 #endif
4175 
4176  if (region) {
4177  r = onig_region_resize_clear(region, reg->num_mem + 1);
4178  if (r) goto finish_no_msa;
4179  }
4180 
4181  if (start > end || start < str) goto mismatch_no_msa;
4182 
4183 
4184 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4185 # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4186 # define MATCH_AND_RETURN_CHECK(upper_range) \
4187  r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4188  if (r != ONIG_MISMATCH) {\
4189  if (r >= 0) {\
4190  if (! IS_FIND_LONGEST(reg->options)) {\
4191  goto match;\
4192  }\
4193  }\
4194  else goto finish; /* error */ \
4195  }
4196 # else
4197 # define MATCH_AND_RETURN_CHECK(upper_range) \
4198  r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4199  if (r != ONIG_MISMATCH) {\
4200  if (r >= 0) {\
4201  goto match;\
4202  }\
4203  else goto finish; /* error */ \
4204  }
4205 # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4206 #else
4207 # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4208 # define MATCH_AND_RETURN_CHECK(none) \
4209  r = match_at(reg, str, end, s, prev, &msa);\
4210  if (r != ONIG_MISMATCH) {\
4211  if (r >= 0) {\
4212  if (! IS_FIND_LONGEST(reg->options)) {\
4213  goto match;\
4214  }\
4215  }\
4216  else goto finish; /* error */ \
4217  }
4218 # else
4219 # define MATCH_AND_RETURN_CHECK(none) \
4220  r = match_at(reg, str, end, s, prev, &msa);\
4221  if (r != ONIG_MISMATCH) {\
4222  if (r >= 0) {\
4223  goto match;\
4224  }\
4225  else goto finish; /* error */ \
4226  }
4227 # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4228 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
4229 
4230 
4231  /* anchor optimize: resume search range */
4232  if (reg->anchor != 0 && str < end) {
4233  UChar *min_semi_end, *max_semi_end;
4234 
4235  if (reg->anchor & ANCHOR_BEGIN_POSITION) {
4236  /* search start-position only */
4237  begin_position:
4238  if (range > start)
4239  {
4240  if (global_pos > start)
4241  {
4242  if (global_pos < range)
4243  range = global_pos + 1;
4244  }
4245  else
4246  range = start + 1;
4247  }
4248  else
4249  range = start;
4250  }
4251  else if (reg->anchor & ANCHOR_BEGIN_BUF) {
4252  /* search str-position only */
4253  if (range > start) {
4254  if (start != str) goto mismatch_no_msa;
4255  range = str + 1;
4256  }
4257  else {
4258  if (range <= str) {
4259  start = str;
4260  range = str;
4261  }
4262  else
4263  goto mismatch_no_msa;
4264  }
4265  }
4266  else if (reg->anchor & ANCHOR_END_BUF) {
4267  min_semi_end = max_semi_end = (UChar* )end;
4268 
4269  end_buf:
4270  if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
4271  goto mismatch_no_msa;
4272 
4273  if (range > start) {
4274  if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
4275  start = min_semi_end - reg->anchor_dmax;
4276  if (start < end)
4277  start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
4278  }
4279  if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
4280  range = max_semi_end - reg->anchor_dmin + 1;
4281  }
4282 
4283  if (start > range) goto mismatch_no_msa;
4284  /* If start == range, match with empty at end.
4285  Backward search is used. */
4286  }
4287  else {
4288  if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
4289  range = min_semi_end - reg->anchor_dmax;
4290  }
4291  if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
4292  start = max_semi_end - reg->anchor_dmin;
4293  start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
4294  }
4295  if (range > start) goto mismatch_no_msa;
4296  }
4297  }
4298  else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
4299  UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
4300 
4301  max_semi_end = (UChar* )end;
4302  if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
4303  min_semi_end = pre_end;
4304 
4305 #ifdef USE_CRNL_AS_LINE_TERMINATOR
4306  pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
4307  if (IS_NOT_NULL(pre_end) &&
4308  IS_NEWLINE_CRLF(reg->options) &&
4309  ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
4310  min_semi_end = pre_end;
4311  }
4312 #endif
4313  if (min_semi_end > str && start <= min_semi_end) {
4314  goto end_buf;
4315  }
4316  }
4317  else {
4318  min_semi_end = (UChar* )end;
4319  goto end_buf;
4320  }
4321  }
4322  else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
4323  goto begin_position;
4324  }
4325  }
4326  else if (str == end) { /* empty string */
4327  static const UChar address_for_empty_string[] = "";
4328 
4329 #ifdef ONIG_DEBUG_SEARCH
4330  fprintf(stderr, "onig_search: empty string.\n");
4331 #endif
4332 
4333  if (reg->threshold_len == 0) {
4334  start = end = str = address_for_empty_string;
4335  s = (UChar* )start;
4336  prev = (UChar* )NULL;
4337 
4338  MATCH_ARG_INIT(msa, option, region, start, start);
4339 #ifdef USE_COMBINATION_EXPLOSION_CHECK
4340  msa.state_check_buff = (void* )0;
4341  msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
4342 #endif
4343  MATCH_AND_RETURN_CHECK(end);
4344  goto mismatch;
4345  }
4346  goto mismatch_no_msa;
4347  }
4348 
4349 #ifdef ONIG_DEBUG_SEARCH
4350  fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
4351  (int )(end - str), (int )(start - str), (int )(range - str));
4352 #endif
4353 
4354  MATCH_ARG_INIT(msa, option, region, start, global_pos);
4355 #ifdef USE_COMBINATION_EXPLOSION_CHECK
4356  {
4357  ptrdiff_t offset = (MIN(start, range) - str);
4358  STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4359  }
4360 #endif
4361 
4362  s = (UChar* )start;
4363  if (range > start) { /* forward search */
4364  if (s > str)
4365  prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4366  else
4367  prev = (UChar* )NULL;
4368 
4369  if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4370  UChar *sch_range, *low, *high, *low_prev;
4371 
4372  sch_range = (UChar* )range;
4373  if (reg->dmax != 0) {
4374  if (reg->dmax == ONIG_INFINITE_DISTANCE)
4375  sch_range = (UChar* )end;
4376  else {
4377  sch_range += reg->dmax;
4378  if (sch_range > end) sch_range = (UChar* )end;
4379  }
4380  }
4381 
4382  if ((end - start) < reg->threshold_len)
4383  goto mismatch;
4384 
4385  if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4386  do {
4387  if (! forward_search_range(reg, str, end, s, sch_range,
4388  &low, &high, &low_prev)) goto mismatch;
4389  if (s < low) {
4390  s = low;
4391  prev = low_prev;
4392  }
4393  while (s <= high) {
4394  MATCH_AND_RETURN_CHECK(orig_range);
4395  prev = s;
4396  s += enclen(reg->enc, s, end);
4397  }
4398  } while (s < range);
4399  goto mismatch;
4400  }
4401  else { /* check only. */
4402  if (! forward_search_range(reg, str, end, s, sch_range,
4403  &low, &high, (UChar** )NULL)) goto mismatch;
4404 
4405  if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
4406  do {
4407  MATCH_AND_RETURN_CHECK(orig_range);
4408  prev = s;
4409  s += enclen(reg->enc, s, end);
4410 
4411  if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
4412  while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
4413  && s < range) {
4414  prev = s;
4415  s += enclen(reg->enc, s, end);
4416  }
4417  }
4418  } while (s < range);
4419  goto mismatch;
4420  }
4421  }
4422  }
4423 
4424  do {
4425  MATCH_AND_RETURN_CHECK(orig_range);
4426  prev = s;
4427  s += enclen(reg->enc, s, end);
4428  } while (s < range);
4429 
4430  if (s == range) { /* because empty match with /$/. */
4431  MATCH_AND_RETURN_CHECK(orig_range);
4432  }
4433  }
4434  else { /* backward search */
4435  if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4436  UChar *low, *high, *adjrange, *sch_start;
4437 
4438  if (range < end)
4439  adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
4440  else
4441  adjrange = (UChar* )end;
4442 
4443  if (reg->dmax != ONIG_INFINITE_DISTANCE &&
4444  (end - range) >= reg->threshold_len) {
4445  do {
4446  sch_start = s + reg->dmax;
4447  if (sch_start > end) sch_start = (UChar* )end;
4448  if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4449  &low, &high) <= 0)
4450  goto mismatch;
4451 
4452  if (s > high)
4453  s = high;
4454 
4455  while (s >= low) {
4456  prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4457  MATCH_AND_RETURN_CHECK(orig_start);
4458  s = prev;
4459  }
4460  } while (s >= range);
4461  goto mismatch;
4462  }
4463  else { /* check only. */
4464  if ((end - range) < reg->threshold_len) goto mismatch;
4465 
4466  sch_start = s;
4467  if (reg->dmax != 0) {
4468  if (reg->dmax == ONIG_INFINITE_DISTANCE)
4469  sch_start = (UChar* )end;
4470  else {
4471  sch_start += reg->dmax;
4472  if (sch_start > end) sch_start = (UChar* )end;
4473  else
4474  sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
4475  start, sch_start, end);
4476  }
4477  }
4478  if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4479  &low, &high) <= 0) goto mismatch;
4480  }
4481  }
4482 
4483  do {
4484  prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4485  MATCH_AND_RETURN_CHECK(orig_start);
4486  s = prev;
4487  } while (s >= range);
4488  }
4489 
4490  mismatch:
4491 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4492  if (IS_FIND_LONGEST(reg->options)) {
4493  if (msa.best_len >= 0) {
4494  s = msa.best_s;
4495  goto match;
4496  }
4497  }
4498 #endif
4499  r = ONIG_MISMATCH;
4500 
4501  finish:
4502  MATCH_ARG_FREE(msa);
4503 
4504  /* If result is mismatch and no FIND_NOT_EMPTY option,
4505  then the region is not set in match_at(). */
4506  if (IS_FIND_NOT_EMPTY(reg->options) && region) {
4507  onig_region_clear(region);
4508  }
4509 
4510 #ifdef ONIG_DEBUG
4511  if (r != ONIG_MISMATCH)
4512  fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
4513 #endif
4514  return r;
4515 
4516  mismatch_no_msa:
4517  r = ONIG_MISMATCH;
4518  finish_no_msa:
4519 #ifdef ONIG_DEBUG
4520  if (r != ONIG_MISMATCH)
4521  fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
4522 #endif
4523  return r;
4524 
4525  match:
4526  MATCH_ARG_FREE(msa);
4527  return s - str;
4528 }
4529 
4530 extern OnigPosition
4531 onig_scan(regex_t* reg, const UChar* str, const UChar* end,
4532  OnigRegion* region, OnigOptionType option,
4533  int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
4534  void* callback_arg)
4535 {
4536  OnigPosition r;
4537  OnigPosition n;
4538  int rs;
4539  const UChar* start;
4540 
4541  n = 0;
4542  start = str;
4543  while (1) {
4544  r = onig_search(reg, str, end, start, end, region, option);
4545  if (r >= 0) {
4546  rs = scan_callback(n, r, region, callback_arg);
4547  n++;
4548  if (rs != 0)
4549  return rs;
4550 
4551  if (region->end[0] == start - str) {
4552  if (start >= end) break;
4553  start += enclen(reg->enc, start, end);
4554  }
4555  else
4556  start = str + region->end[0];
4557 
4558  if (start > end)
4559  break;
4560  }
4561  else if (r == ONIG_MISMATCH) {
4562  break;
4563  }
4564  else { /* error */
4565  return r;
4566  }
4567  }
4568 
4569  return n;
4570 }
4571 
4572 extern OnigEncoding
4573 onig_get_encoding(const regex_t* reg)
4574 {
4575  return reg->enc;
4576 }
4577 
4578 extern OnigOptionType
4579 onig_get_options(const regex_t* reg)
4580 {
4581  return reg->options;
4582 }
4583 
4584 extern OnigCaseFoldType
4585 onig_get_case_fold_flag(const regex_t* reg)
4586 {
4587  return reg->case_fold_flag;
4588 }
4589 
4590 extern const OnigSyntaxType*
4591 onig_get_syntax(const regex_t* reg)
4592 {
4593  return reg->syntax;
4594 }
4595 
4596 extern int
4597 onig_number_of_captures(const regex_t* reg)
4598 {
4599  return reg->num_mem;
4600 }
4601 
4602 extern int
4603 onig_number_of_capture_histories(const regex_t* reg)
4604 {
4605 #ifdef USE_CAPTURE_HISTORY
4606  int i, n;
4607 
4608  n = 0;
4609  for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
4610  if (BIT_STATUS_AT(reg->capture_history, i) != 0)
4611  n++;
4612  }
4613  return n;
4614 #else
4615  return 0;
4616 #endif
4617 }
4618 
4619 extern void
4620 onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)
4621 {
4622  *to = *from;
4623 }
#define RB_GNUC_EXTENSION
This is expanded to nothing for non-GCC compilers.
Definition: defines.h:89
#define xfree
Old name of ruby_xfree.
Definition: xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition: xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition: xmalloc.h:53
static bool rb_enc_asciicompat(rb_encoding *enc)
Queries if the passed encoding is in some sense compatible with ASCII.
Definition: encoding.h:782
Definition: win32.h:696