Ruby  3.1.4p223 (2023-03-30 revision HEAD)
yjit_codegen.c
1 // This file is a fragment of the yjit.o compilation unit. See yjit.c.
2 #include "internal.h"
3 #include "gc.h"
4 #include "internal/compile.h"
5 #include "internal/class.h"
6 #include "internal/hash.h"
7 #include "internal/object.h"
8 #include "internal/sanitizers.h"
9 #include "internal/string.h"
10 #include "internal/struct.h"
11 #include "internal/variable.h"
12 #include "internal/re.h"
13 #include "probes.h"
14 #include "probes_helper.h"
15 #include "yjit.h"
16 #include "yjit_iface.h"
17 #include "yjit_core.h"
18 #include "yjit_codegen.h"
19 #include "yjit_asm.h"
20 
21 // Map from YARV opcodes to code generation functions
22 static codegen_fn gen_fns[VM_INSTRUCTION_SIZE] = { NULL };
23 
24 // Map from method entries to code generation functions
25 static st_table *yjit_method_codegen_table = NULL;
26 
27 // Code for exiting back to the interpreter from the leave instruction
28 static void *leave_exit_code;
29 
30 // Code for full logic of returning from C method and exiting to the interpreter
31 static uint32_t outline_full_cfunc_return_pos;
32 
33 // For implementing global code invalidation
35  uint32_t inline_patch_pos;
36  uint32_t outlined_target_pos;
37 };
38 
39 typedef rb_darray(struct codepage_patch) patch_array_t;
40 
41 static patch_array_t global_inval_patches = NULL;
42 
43 // Print the current source location for debugging purposes
45 static void
46 jit_print_loc(jitstate_t *jit, const char *msg)
47 {
48  char *ptr;
49  long len;
50  VALUE path = rb_iseq_path(jit->iseq);
51  RSTRING_GETMEM(path, ptr, len);
52  fprintf(stderr, "%s %.*s:%u\n", msg, (int)len, ptr, rb_iseq_line_no(jit->iseq, jit->insn_idx));
53 }
54 
55 // dump an object for debugging purposes
57 static void
58 jit_obj_info_dump(codeblock_t *cb, x86opnd_t opnd) {
59  push_regs(cb);
60  mov(cb, C_ARG_REGS[0], opnd);
61  call_ptr(cb, REG0, (void *)rb_obj_info_dump);
62  pop_regs(cb);
63 }
64 
65 // Get the current instruction's opcode
66 static int
67 jit_get_opcode(jitstate_t *jit)
68 {
69  return jit->opcode;
70 }
71 
72 // Get the index of the next instruction
73 static uint32_t
74 jit_next_insn_idx(jitstate_t *jit)
75 {
76  return jit->insn_idx + insn_len(jit_get_opcode(jit));
77 }
78 
79 // Get an instruction argument by index
80 static VALUE
81 jit_get_arg(jitstate_t *jit, size_t arg_idx)
82 {
83  RUBY_ASSERT(arg_idx + 1 < (size_t)insn_len(jit_get_opcode(jit)));
84  return *(jit->pc + arg_idx + 1);
85 }
86 
87 // Load a VALUE into a register and keep track of the reference if it is on the GC heap.
88 static void
89 jit_mov_gc_ptr(jitstate_t *jit, codeblock_t *cb, x86opnd_t reg, VALUE ptr)
90 {
91  RUBY_ASSERT(reg.type == OPND_REG && reg.num_bits == 64);
92 
93  // Load the pointer constant into the specified register
94  mov(cb, reg, const_ptr_opnd((void*)ptr));
95 
96  // The pointer immediate is encoded as the last part of the mov written out
97  uint32_t ptr_offset = cb->write_pos - sizeof(VALUE);
98 
99  if (!SPECIAL_CONST_P(ptr)) {
100  if (!rb_darray_append(&jit->block->gc_object_offsets, ptr_offset)) {
101  rb_bug("allocation failed");
102  }
103  }
104 }
105 
106 // Check if we are compiling the instruction at the stub PC
107 // Meaning we are compiling the instruction that is next to execute
108 static bool
109 jit_at_current_insn(jitstate_t *jit)
110 {
111  const VALUE *ec_pc = jit->ec->cfp->pc;
112  return (ec_pc == jit->pc);
113 }
114 
115 // Peek at the nth topmost value on the Ruby stack.
116 // Returns the topmost value when n == 0.
117 static VALUE
118 jit_peek_at_stack(jitstate_t *jit, ctx_t *ctx, int n)
119 {
120  RUBY_ASSERT(jit_at_current_insn(jit));
121 
122  // Note: this does not account for ctx->sp_offset because
123  // this is only available when hitting a stub, and while
124  // hitting a stub, cfp->sp needs to be up to date in case
125  // codegen functions trigger GC. See :stub-sp-flush:.
126  VALUE *sp = jit->ec->cfp->sp;
127 
128  return *(sp - 1 - n);
129 }
130 
131 static VALUE
132 jit_peek_at_self(jitstate_t *jit, ctx_t *ctx)
133 {
134  return jit->ec->cfp->self;
135 }
136 
138 static VALUE
139 jit_peek_at_local(jitstate_t *jit, ctx_t *ctx, int n)
140 {
141  RUBY_ASSERT(jit_at_current_insn(jit));
142 
143  int32_t local_table_size = jit->iseq->body->local_table_size;
144  RUBY_ASSERT(n < (int)jit->iseq->body->local_table_size);
145 
146  const VALUE *ep = jit->ec->cfp->ep;
147  return ep[-VM_ENV_DATA_SIZE - local_table_size + n + 1];
148 }
149 
150 // Save the incremented PC on the CFP
151 // This is necessary when calleees can raise or allocate
152 static void
153 jit_save_pc(jitstate_t *jit, x86opnd_t scratch_reg)
154 {
155  codeblock_t *cb = jit->cb;
156  mov(cb, scratch_reg, const_ptr_opnd(jit->pc + insn_len(jit->opcode)));
157  mov(cb, mem_opnd(64, REG_CFP, offsetof(rb_control_frame_t, pc)), scratch_reg);
158 }
159 
160 // Save the current SP on the CFP
161 // This realigns the interpreter SP with the JIT SP
162 // Note: this will change the current value of REG_SP,
163 // which could invalidate memory operands
164 static void
165 jit_save_sp(jitstate_t *jit, ctx_t *ctx)
166 {
167  if (ctx->sp_offset != 0) {
168  x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 0);
169  codeblock_t *cb = jit->cb;
170  lea(cb, REG_SP, stack_pointer);
171  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG_SP);
172  ctx->sp_offset = 0;
173  }
174 }
175 
176 // jit_save_pc() + jit_save_sp(). Should be used before calling a routine that
177 // could:
178 // - Perform GC allocation
179 // - Take the VM lock through RB_VM_LOCK_ENTER()
180 // - Perform Ruby method call
181 static void
182 jit_prepare_routine_call(jitstate_t *jit, ctx_t *ctx, x86opnd_t scratch_reg)
183 {
184  jit->record_boundary_patch_point = true;
185  jit_save_pc(jit, scratch_reg);
186  jit_save_sp(jit, ctx);
187 }
188 
189 // Record the current codeblock write position for rewriting into a jump into
190 // the outlined block later. Used to implement global code invalidation.
191 static void
192 record_global_inval_patch(const codeblock_t *cb, uint32_t outline_block_target_pos)
193 {
194  struct codepage_patch patch_point = { cb->write_pos, outline_block_target_pos };
195  if (!rb_darray_append(&global_inval_patches, patch_point)) rb_bug("allocation failed");
196 }
197 
198 static bool jit_guard_known_klass(jitstate_t *jit, ctx_t *ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit);
199 
200 #if YJIT_STATS
201 
202 // Add a comment at the current position in the code block
203 static void
204 _add_comment(codeblock_t *cb, const char *comment_str)
205 {
206  // We can't add comments to the outlined code block
207  if (cb == ocb)
208  return;
209 
210  // Avoid adding duplicate comment strings (can happen due to deferred codegen)
211  size_t num_comments = rb_darray_size(yjit_code_comments);
212  if (num_comments > 0) {
213  struct yjit_comment last_comment = rb_darray_get(yjit_code_comments, num_comments - 1);
214  if (last_comment.offset == cb->write_pos && strcmp(last_comment.comment, comment_str) == 0) {
215  return;
216  }
217  }
218 
219  struct yjit_comment new_comment = (struct yjit_comment){ cb->write_pos, comment_str };
220  rb_darray_append(&yjit_code_comments, new_comment);
221 }
222 
223 // Comments for generated machine code
224 #define ADD_COMMENT(cb, comment) _add_comment((cb), (comment))
225 
226 // Verify the ctx's types and mappings against the compile-time stack, self,
227 // and locals.
228 static void
229 verify_ctx(jitstate_t *jit, ctx_t *ctx)
230 {
231  // Only able to check types when at current insn
232  RUBY_ASSERT(jit_at_current_insn(jit));
233 
234  VALUE self_val = jit_peek_at_self(jit, ctx);
235  if (type_diff(yjit_type_of_value(self_val), ctx->self_type) == INT_MAX) {
236  rb_bug("verify_ctx: ctx type (%s) incompatible with actual value of self: %s", yjit_type_name(ctx->self_type), rb_obj_info(self_val));
237  }
238 
239  for (int i = 0; i < ctx->stack_size && i < MAX_TEMP_TYPES; i++) {
240  temp_type_mapping_t learned = ctx_get_opnd_mapping(ctx, OPND_STACK(i));
241  VALUE val = jit_peek_at_stack(jit, ctx, i);
242  val_type_t detected = yjit_type_of_value(val);
243 
244  if (learned.mapping.kind == TEMP_SELF) {
245  if (self_val != val) {
246  rb_bug("verify_ctx: stack value was mapped to self, but values did not match\n"
247  " stack: %s\n"
248  " self: %s",
249  rb_obj_info(val),
250  rb_obj_info(self_val));
251  }
252  }
253 
254  if (learned.mapping.kind == TEMP_LOCAL) {
255  int local_idx = learned.mapping.idx;
256  VALUE local_val = jit_peek_at_local(jit, ctx, local_idx);
257  if (local_val != val) {
258  rb_bug("verify_ctx: stack value was mapped to local, but values did not match\n"
259  " stack: %s\n"
260  " local %i: %s",
261  rb_obj_info(val),
262  local_idx,
263  rb_obj_info(local_val));
264  }
265  }
266 
267  if (type_diff(detected, learned.type) == INT_MAX) {
268  rb_bug("verify_ctx: ctx type (%s) incompatible with actual value on stack: %s", yjit_type_name(learned.type), rb_obj_info(val));
269  }
270  }
271 
272  int32_t local_table_size = jit->iseq->body->local_table_size;
273  for (int i = 0; i < local_table_size && i < MAX_TEMP_TYPES; i++) {
274  val_type_t learned = ctx->local_types[i];
275  VALUE val = jit_peek_at_local(jit, ctx, i);
276  val_type_t detected = yjit_type_of_value(val);
277 
278  if (type_diff(detected, learned) == INT_MAX) {
279  rb_bug("verify_ctx: ctx type (%s) incompatible with actual value of local: %s", yjit_type_name(learned), rb_obj_info(val));
280  }
281  }
282 }
283 
284 #else
285 
286 #define ADD_COMMENT(cb, comment) ((void)0)
287 #define verify_ctx(jit, ctx) ((void)0)
288 
289 #endif // if YJIT_STATS
290 
291 #if YJIT_STATS
292 
293 // Increment a profiling counter with counter_name
294 #define GEN_COUNTER_INC(cb, counter_name) _gen_counter_inc(cb, &(yjit_runtime_counters . counter_name))
295 static void
296 _gen_counter_inc(codeblock_t *cb, int64_t *counter)
297 {
298  if (!rb_yjit_opts.gen_stats) return;
299 
300  // Use REG1 because there might be return value in REG0
301  mov(cb, REG1, const_ptr_opnd(counter));
302  cb_write_lock_prefix(cb); // for ractors.
303  add(cb, mem_opnd(64, REG1, 0), imm_opnd(1));
304 }
305 
306 // Increment a counter then take an existing side exit.
307 #define COUNTED_EXIT(jit, side_exit, counter_name) _counted_side_exit(jit, side_exit, &(yjit_runtime_counters . counter_name))
308 static uint8_t *
309 _counted_side_exit(jitstate_t* jit, uint8_t *existing_side_exit, int64_t *counter)
310 {
311  if (!rb_yjit_opts.gen_stats) return existing_side_exit;
312 
313  uint8_t *start = cb_get_ptr(jit->ocb, jit->ocb->write_pos);
314  _gen_counter_inc(jit->ocb, counter);
315  jmp_ptr(jit->ocb, existing_side_exit);
316  return start;
317 }
318 
319 #else
320 
321 #define GEN_COUNTER_INC(cb, counter_name) ((void)0)
322 #define COUNTED_EXIT(jit, side_exit, counter_name) side_exit
323 
324 #endif // if YJIT_STATS
325 
326 // Generate an exit to return to the interpreter
327 static uint32_t
328 yjit_gen_exit(VALUE *exit_pc, ctx_t *ctx, codeblock_t *cb)
329 {
330  const uint32_t code_pos = cb->write_pos;
331 
332  ADD_COMMENT(cb, "exit to interpreter");
333 
334  // Generate the code to exit to the interpreters
335  // Write the adjusted SP back into the CFP
336  if (ctx->sp_offset != 0) {
337  x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 0);
338  lea(cb, REG_SP, stack_pointer);
339  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG_SP);
340  }
341 
342  // Update CFP->PC
343  mov(cb, RAX, const_ptr_opnd(exit_pc));
344  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), RAX);
345 
346  // Accumulate stats about interpreter exits
347 #if YJIT_STATS
348  if (rb_yjit_opts.gen_stats) {
349  mov(cb, RDI, const_ptr_opnd(exit_pc));
350  call_ptr(cb, RSI, (void *)&yjit_count_side_exit_op);
351  }
352 #endif
353 
354  pop(cb, REG_SP);
355  pop(cb, REG_EC);
356  pop(cb, REG_CFP);
357 
358  mov(cb, RAX, imm_opnd(Qundef));
359  ret(cb);
360 
361  return code_pos;
362 }
363 
364 // Generate a continuation for gen_leave() that exits to the interpreter at REG_CFP->pc.
365 static uint8_t *
366 yjit_gen_leave_exit(codeblock_t *cb)
367 {
368  uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
369 
370  // Note, gen_leave() fully reconstructs interpreter state and leaves the
371  // return value in RAX before coming here.
372 
373  // Every exit to the interpreter should be counted
374  GEN_COUNTER_INC(cb, leave_interp_return);
375 
376  pop(cb, REG_SP);
377  pop(cb, REG_EC);
378  pop(cb, REG_CFP);
379 
380  ret(cb);
381 
382  return code_ptr;
383 }
384 
385 // Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit
386 // to the interpreter when it cannot service a stub by generating new code.
387 // Before coming here, branch_stub_hit() takes care of fully reconstructing
388 // interpreter state.
389 static void
390 gen_code_for_exit_from_stub(void)
391 {
392  codeblock_t *cb = ocb;
393  code_for_exit_from_stub = cb_get_ptr(cb, cb->write_pos);
394 
395  GEN_COUNTER_INC(cb, exit_from_branch_stub);
396 
397  pop(cb, REG_SP);
398  pop(cb, REG_EC);
399  pop(cb, REG_CFP);
400 
401  mov(cb, RAX, imm_opnd(Qundef));
402  ret(cb);
403 }
404 
405 // :side-exit:
406 // Get an exit for the current instruction in the outlined block. The code
407 // for each instruction often begins with several guards before proceeding
408 // to do work. When guards fail, an option we have is to exit to the
409 // interpreter at an instruction boundary. The piece of code that takes
410 // care of reconstructing interpreter state and exiting out of generated
411 // code is called the side exit.
412 //
413 // No guards change the logic for reconstructing interpreter state at the
414 // moment, so there is one unique side exit for each context. Note that
415 // it's incorrect to jump to the side exit after any ctx stack push/pop operations
416 // since they change the logic required for reconstructing interpreter state.
417 static uint8_t *
418 yjit_side_exit(jitstate_t *jit, ctx_t *ctx)
419 {
420  if (!jit->side_exit_for_pc) {
421  codeblock_t *ocb = jit->ocb;
422  uint32_t pos = yjit_gen_exit(jit->pc, ctx, ocb);
423  jit->side_exit_for_pc = cb_get_ptr(ocb, pos);
424  }
425 
426  return jit->side_exit_for_pc;
427 }
428 
429 // Ensure that there is an exit for the start of the block being compiled.
430 // Block invalidation uses this exit.
431 static void
432 jit_ensure_block_entry_exit(jitstate_t *jit)
433 {
434  block_t *block = jit->block;
435  if (block->entry_exit) return;
436 
437  if (jit->insn_idx == block->blockid.idx) {
438  // We are compiling the first instruction in the block.
439  // Generate the exit with the cache in jitstate.
440  block->entry_exit = yjit_side_exit(jit, &block->ctx);
441  }
442  else {
443  VALUE *pc = yjit_iseq_pc_at_idx(block->blockid.iseq, block->blockid.idx);
444  uint32_t pos = yjit_gen_exit(pc, &block->ctx, ocb);
445  block->entry_exit = cb_get_ptr(ocb, pos);
446  }
447 }
448 
449 // Generate a runtime guard that ensures the PC is at the start of the iseq,
450 // otherwise take a side exit. This is to handle the situation of optional
451 // parameters. When a function with optional parameters is called, the entry
452 // PC for the method isn't necessarily 0, but we always generated code that
453 // assumes the entry point is 0.
454 static void
455 yjit_pc_guard(codeblock_t *cb, const rb_iseq_t *iseq)
456 {
457  RUBY_ASSERT(cb != NULL);
458 
459  mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, pc));
460  mov(cb, REG1, const_ptr_opnd(iseq->body->iseq_encoded));
461  xor(cb, REG0, REG1);
462 
463  // xor should impact ZF, so we can jz here
464  uint32_t pc_is_zero = cb_new_label(cb, "pc_is_zero");
465  jz_label(cb, pc_is_zero);
466 
467  // We're not starting at the first PC, so we need to exit.
468  GEN_COUNTER_INC(cb, leave_start_pc_non_zero);
469 
470  pop(cb, REG_SP);
471  pop(cb, REG_EC);
472  pop(cb, REG_CFP);
473 
474  mov(cb, RAX, imm_opnd(Qundef));
475  ret(cb);
476 
477  // PC should be at the beginning
478  cb_write_label(cb, pc_is_zero);
479  cb_link_labels(cb);
480 }
481 
482 // The code we generate in gen_send_cfunc() doesn't fire the c_return TracePoint event
483 // like the interpreter. When tracing for c_return is enabled, we patch the code after
484 // the C method return to call into this to fire the event.
485 static void
486 full_cfunc_return(rb_execution_context_t *ec, VALUE return_value)
487 {
488  rb_control_frame_t *cfp = ec->cfp;
489  RUBY_ASSERT_ALWAYS(cfp == GET_EC()->cfp);
490  const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(cfp);
491 
492  RUBY_ASSERT_ALWAYS(RUBYVM_CFUNC_FRAME_P(cfp));
493  RUBY_ASSERT_ALWAYS(me->def->type == VM_METHOD_TYPE_CFUNC);
494 
495  // CHECK_CFP_CONSISTENCY("full_cfunc_return"); TODO revive this
496 
497  // Pop the C func's frame and fire the c_return TracePoint event
498  // Note that this is the same order as vm_call_cfunc_with_frame().
499  rb_vm_pop_frame(ec);
500  EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, cfp->self, me->def->original_id, me->called_id, me->owner, return_value);
501  // Note, this deviates from the interpreter in that users need to enable
502  // a c_return TracePoint for this DTrace hook to work. A reasonable change
503  // since the Ruby return event works this way as well.
504  RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, me->owner, me->def->original_id);
505 
506  // Push return value into the caller's stack. We know that it's a frame that
507  // uses cfp->sp because we are patching a call done with gen_send_cfunc().
508  ec->cfp->sp[0] = return_value;
509  ec->cfp->sp++;
510 }
511 
512 // Landing code for when c_return tracing is enabled. See full_cfunc_return().
513 static void
514 gen_full_cfunc_return(void)
515 {
516  codeblock_t *cb = ocb;
517  outline_full_cfunc_return_pos = ocb->write_pos;
518 
519  // This chunk of code expect REG_EC to be filled properly and
520  // RAX to contain the return value of the C method.
521 
522  // Call full_cfunc_return()
523  mov(cb, C_ARG_REGS[0], REG_EC);
524  mov(cb, C_ARG_REGS[1], RAX);
525  call_ptr(cb, REG0, (void *)full_cfunc_return);
526 
527  // Count the exit
528  GEN_COUNTER_INC(cb, traced_cfunc_return);
529 
530  // Return to the interpreter
531  pop(cb, REG_SP);
532  pop(cb, REG_EC);
533  pop(cb, REG_CFP);
534 
535  mov(cb, RAX, imm_opnd(Qundef));
536  ret(cb);
537 }
538 
539 /*
540 Compile an interpreter entry block to be inserted into an iseq
541 Returns `NULL` if compilation fails.
542 */
543 static uint8_t *
544 yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq)
545 {
546  RUBY_ASSERT(cb != NULL);
547 
548  enum { MAX_PROLOGUE_SIZE = 1024 };
549 
550  // Check if we have enough executable memory
551  if (cb->write_pos + MAX_PROLOGUE_SIZE >= cb->mem_size) {
552  return NULL;
553  }
554 
555  const uint32_t old_write_pos = cb->write_pos;
556 
557  // Align the current write position to cache line boundaries
558  cb_align_pos(cb, 64);
559 
560  uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
561  ADD_COMMENT(cb, "yjit entry");
562 
563  push(cb, REG_CFP);
564  push(cb, REG_EC);
565  push(cb, REG_SP);
566 
567  // We are passed EC and CFP
568  mov(cb, REG_EC, C_ARG_REGS[0]);
569  mov(cb, REG_CFP, C_ARG_REGS[1]);
570 
571  // Load the current SP from the CFP into REG_SP
572  mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
573 
574  // Setup cfp->jit_return
575  // TODO: this could use an IP relative LEA instead of an 8 byte immediate
576  mov(cb, REG0, const_ptr_opnd(leave_exit_code));
577  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
578 
579  // We're compiling iseqs that we *expect* to start at `insn_idx`. But in
580  // the case of optional parameters, the interpreter can set the pc to a
581  // different location depending on the optional parameters. If an iseq
582  // has optional parameters, we'll add a runtime check that the PC we've
583  // compiled for is the same PC that the interpreter wants us to run with.
584  // If they don't match, then we'll take a side exit.
585  if (iseq->body->param.flags.has_opt) {
586  yjit_pc_guard(cb, iseq);
587  }
588 
589  // Verify MAX_PROLOGUE_SIZE
590  RUBY_ASSERT_ALWAYS(cb->write_pos - old_write_pos <= MAX_PROLOGUE_SIZE);
591 
592  return code_ptr;
593 }
594 
595 // Generate code to check for interrupts and take a side-exit.
596 // Warning: this function clobbers REG0
597 static void
598 yjit_check_ints(codeblock_t *cb, uint8_t *side_exit)
599 {
600  // Check for interrupts
601  // see RUBY_VM_CHECK_INTS(ec) macro
602  ADD_COMMENT(cb, "RUBY_VM_CHECK_INTS(ec)");
603  mov(cb, REG0_32, member_opnd(REG_EC, rb_execution_context_t, interrupt_mask));
604  not(cb, REG0_32);
605  test(cb, member_opnd(REG_EC, rb_execution_context_t, interrupt_flag), REG0_32);
606  jnz_ptr(cb, side_exit);
607 }
608 
609 // Generate a stubbed unconditional jump to the next bytecode instruction.
610 // Blocks that are part of a guard chain can use this to share the same successor.
611 static void
612 jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context)
613 {
614  // Reset the depth since in current usages we only ever jump to to
615  // chain_depth > 0 from the same instruction.
616  ctx_t reset_depth = *current_context;
617  reset_depth.chain_depth = 0;
618 
619  blockid_t jump_block = { jit->iseq, jit_next_insn_idx(jit) };
620 
621  // We are at the end of the current instruction. Record the boundary.
622  if (jit->record_boundary_patch_point) {
623  uint32_t exit_pos = yjit_gen_exit(jit->pc + insn_len(jit->opcode), &reset_depth, jit->ocb);
624  record_global_inval_patch(jit->cb, exit_pos);
625  jit->record_boundary_patch_point = false;
626  }
627 
628  // Generate the jump instruction
629  gen_direct_jump(
630  jit,
631  &reset_depth,
632  jump_block
633  );
634 }
635 
636 // Compile a sequence of bytecode instructions for a given basic block version.
637 // Part of gen_block_version().
638 static block_t *
639 gen_single_block(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec)
640 {
641  RUBY_ASSERT(cb != NULL);
642  verify_blockid(blockid);
643 
644  // Allocate the new block
645  block_t *block = calloc(1, sizeof(block_t));
646  if (!block) {
647  return NULL;
648  }
649 
650  // Copy the starting context to avoid mutating it
651  ctx_t ctx_copy = *start_ctx;
652  ctx_t *ctx = &ctx_copy;
653 
654  // Limit the number of specialized versions for this block
655  *ctx = limit_block_versions(blockid, ctx);
656 
657  // Save the starting context on the block.
658  block->blockid = blockid;
659  block->ctx = *ctx;
660 
661  RUBY_ASSERT(!(blockid.idx == 0 && start_ctx->stack_size > 0));
662 
663  const rb_iseq_t *iseq = block->blockid.iseq;
664  const unsigned int iseq_size = iseq->body->iseq_size;
665  uint32_t insn_idx = block->blockid.idx;
666  const uint32_t starting_insn_idx = insn_idx;
667 
668  // Initialize a JIT state object
669  jitstate_t jit = {
670  .cb = cb,
671  .ocb = ocb,
672  .block = block,
673  .iseq = iseq,
674  .ec = ec
675  };
676 
677  // Mark the start position of the block
678  block->start_addr = cb_get_write_ptr(cb);
679 
680  // For each instruction to compile
681  while (insn_idx < iseq_size) {
682  // Get the current pc and opcode
683  VALUE *pc = yjit_iseq_pc_at_idx(iseq, insn_idx);
684  int opcode = yjit_opcode_at_pc(iseq, pc);
685  RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE);
686 
687  // opt_getinlinecache wants to be in a block all on its own. Cut the block short
688  // if we run into it. See gen_opt_getinlinecache() for details.
689  if (opcode == BIN(opt_getinlinecache) && insn_idx > starting_insn_idx) {
690  jit_jump_to_next_insn(&jit, ctx);
691  break;
692  }
693 
694  // Set the current instruction
695  jit.insn_idx = insn_idx;
696  jit.opcode = opcode;
697  jit.pc = pc;
698  jit.side_exit_for_pc = NULL;
699 
700  // If previous instruction requested to record the boundary
701  if (jit.record_boundary_patch_point) {
702  // Generate an exit to this instruction and record it
703  uint32_t exit_pos = yjit_gen_exit(jit.pc, ctx, ocb);
704  record_global_inval_patch(cb, exit_pos);
705  jit.record_boundary_patch_point = false;
706  }
707 
708  // Verify our existing assumption (DEBUG)
709  if (jit_at_current_insn(&jit)) {
710  verify_ctx(&jit, ctx);
711  }
712 
713  // Lookup the codegen function for this instruction
714  codegen_fn gen_fn = gen_fns[opcode];
715  codegen_status_t status = YJIT_CANT_COMPILE;
716  if (gen_fn) {
717  if (0) {
718  fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode));
719  print_str(cb, insn_name(opcode));
720  }
721 
722  // :count-placement:
723  // Count bytecode instructions that execute in generated code.
724  // Note that the increment happens even when the output takes side exit.
725  GEN_COUNTER_INC(cb, exec_instruction);
726 
727  // Add a comment for the name of the YARV instruction
728  ADD_COMMENT(cb, insn_name(opcode));
729 
730  // Call the code generation function
731  status = gen_fn(&jit, ctx, cb);
732  }
733 
734  // If we can't compile this instruction
735  // exit to the interpreter and stop compiling
736  if (status == YJIT_CANT_COMPILE) {
737  // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE,
738  // the exit this generates would be wrong. We could save a copy of the entry context
739  // and assert that ctx is the same here.
740  uint32_t exit_off = yjit_gen_exit(jit.pc, ctx, cb);
741 
742  // If this is the first instruction in the block, then we can use
743  // the exit for block->entry_exit.
744  if (insn_idx == block->blockid.idx) {
745  block->entry_exit = cb_get_ptr(cb, exit_off);
746  }
747  break;
748  }
749 
750  // For now, reset the chain depth after each instruction as only the
751  // first instruction in the block can concern itself with the depth.
752  ctx->chain_depth = 0;
753 
754  // Move to the next instruction to compile
755  insn_idx += insn_len(opcode);
756 
757  // If the instruction terminates this block
758  if (status == YJIT_END_BLOCK) {
759  break;
760  }
761  }
762 
763  // Mark the end position of the block
764  block->end_addr = cb_get_write_ptr(cb);
765 
766  // Store the index of the last instruction in the block
767  block->end_idx = insn_idx;
768 
769  // We currently can't handle cases where the request is for a block that
770  // doesn't go to the next instruction.
771  RUBY_ASSERT(!jit.record_boundary_patch_point);
772 
773  // If code for the block doesn't fit, free the block and fail.
774  if (cb->dropped_bytes || ocb->dropped_bytes) {
775  yjit_free_block(block);
776  return NULL;
777  }
778 
779  if (YJIT_DUMP_MODE >= 2) {
780  // Dump list of compiled instrutions
781  fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq);
782  for (uint32_t idx = block->blockid.idx; idx < insn_idx; ) {
783  int opcode = yjit_opcode_at_pc(iseq, yjit_iseq_pc_at_idx(iseq, idx));
784  fprintf(stderr, " %04d %s\n", idx, insn_name(opcode));
785  idx += insn_len(opcode);
786  }
787  }
788 
789  return block;
790 }
791 
792 static codegen_status_t gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb);
793 
794 static codegen_status_t
795 gen_nop(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
796 {
797  // Do nothing
798  return YJIT_KEEP_COMPILING;
799 }
800 
801 static codegen_status_t
802 gen_dup(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
803 {
804  // Get the top value and its type
805  x86opnd_t dup_val = ctx_stack_pop(ctx, 0);
806  temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
807 
808  // Push the same value on top
809  x86opnd_t loc0 = ctx_stack_push_mapping(ctx, mapping);
810  mov(cb, REG0, dup_val);
811  mov(cb, loc0, REG0);
812 
813  return YJIT_KEEP_COMPILING;
814 }
815 
816 // duplicate stack top n elements
817 static codegen_status_t
818 gen_dupn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
819 {
820  rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
821 
822  // In practice, seems to be only used for n==2
823  if (n != 2) {
824  return YJIT_CANT_COMPILE;
825  }
826 
827  x86opnd_t opnd1 = ctx_stack_opnd(ctx, 1);
828  x86opnd_t opnd0 = ctx_stack_opnd(ctx, 0);
829  temp_type_mapping_t mapping1 = ctx_get_opnd_mapping(ctx, OPND_STACK(1));
830  temp_type_mapping_t mapping0 = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
831 
832  x86opnd_t dst1 = ctx_stack_push_mapping(ctx, mapping1);
833  mov(cb, REG0, opnd1);
834  mov(cb, dst1, REG0);
835 
836  x86opnd_t dst0 = ctx_stack_push_mapping(ctx, mapping0);
837  mov(cb, REG0, opnd0);
838  mov(cb, dst0, REG0);
839 
840  return YJIT_KEEP_COMPILING;
841 }
842 
843 static void
844 stack_swap(ctx_t *ctx, codeblock_t *cb, int offset0, int offset1, x86opnd_t reg0, x86opnd_t reg1)
845 {
846  x86opnd_t opnd0 = ctx_stack_opnd(ctx, offset0);
847  x86opnd_t opnd1 = ctx_stack_opnd(ctx, offset1);
848 
849  temp_type_mapping_t mapping0 = ctx_get_opnd_mapping(ctx, OPND_STACK(offset0));
850  temp_type_mapping_t mapping1 = ctx_get_opnd_mapping(ctx, OPND_STACK(offset1));
851 
852  mov(cb, reg0, opnd0);
853  mov(cb, reg1, opnd1);
854  mov(cb, opnd0, reg1);
855  mov(cb, opnd1, reg0);
856 
857  ctx_set_opnd_mapping(ctx, OPND_STACK(offset0), mapping1);
858  ctx_set_opnd_mapping(ctx, OPND_STACK(offset1), mapping0);
859 }
860 
861 // Swap top 2 stack entries
862 static codegen_status_t
863 gen_swap(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
864 {
865  stack_swap(ctx , cb, 0, 1, REG0, REG1);
866  return YJIT_KEEP_COMPILING;
867 }
868 
869 // set Nth stack entry to stack top
870 static codegen_status_t
871 gen_setn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
872 {
873  rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
874 
875  // Set the destination
876  x86opnd_t top_val = ctx_stack_pop(ctx, 0);
877  x86opnd_t dst_opnd = ctx_stack_opnd(ctx, (int32_t)n);
878  mov(cb, REG0, top_val);
879  mov(cb, dst_opnd, REG0);
880 
881  temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
882  ctx_set_opnd_mapping(ctx, OPND_STACK(n), mapping);
883 
884  return YJIT_KEEP_COMPILING;
885 }
886 
887 // get nth stack value, then push it
888 static codegen_status_t
889 gen_topn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
890 {
891  int32_t n = (int32_t)jit_get_arg(jit, 0);
892 
893  // Get top n type / operand
894  x86opnd_t top_n_val = ctx_stack_opnd(ctx, n);
895  temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(n));
896 
897  x86opnd_t loc0 = ctx_stack_push_mapping(ctx, mapping);
898  mov(cb, REG0, top_n_val);
899  mov(cb, loc0, REG0);
900 
901  return YJIT_KEEP_COMPILING;
902 }
903 
904 static codegen_status_t
905 gen_pop(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
906 {
907  // Decrement SP
908  ctx_stack_pop(ctx, 1);
909  return YJIT_KEEP_COMPILING;
910 }
911 
912 // Pop n values off the stack
913 static codegen_status_t
914 gen_adjuststack(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
915 {
916  rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
917  ctx_stack_pop(ctx, n);
918  return YJIT_KEEP_COMPILING;
919 }
920 
921 // new array initialized from top N values
922 static codegen_status_t
923 gen_newarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
924 {
925  rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
926 
927  // Save the PC and SP because we are allocating
928  jit_prepare_routine_call(jit, ctx, REG0);
929 
930  x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n));
931 
932  // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts);
933  mov(cb, C_ARG_REGS[0], REG_EC);
934  mov(cb, C_ARG_REGS[1], imm_opnd(n));
935  lea(cb, C_ARG_REGS[2], values_ptr);
936  call_ptr(cb, REG0, (void *)rb_ec_ary_new_from_values);
937 
938  ctx_stack_pop(ctx, n);
939  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
940  mov(cb, stack_ret, RAX);
941 
942  return YJIT_KEEP_COMPILING;
943 }
944 
945 // dup array
946 static codegen_status_t
947 gen_duparray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
948 {
949  VALUE ary = jit_get_arg(jit, 0);
950 
951  // Save the PC and SP because we are allocating
952  jit_prepare_routine_call(jit, ctx, REG0);
953 
954  // call rb_ary_resurrect(VALUE ary);
955  jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary);
956  call_ptr(cb, REG0, (void *)rb_ary_resurrect);
957 
958  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
959  mov(cb, stack_ret, RAX);
960 
961  return YJIT_KEEP_COMPILING;
962 }
963 
964 // dup hash
965 static codegen_status_t
966 gen_duphash(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
967 {
968  VALUE hash = jit_get_arg(jit, 0);
969 
970  // Save the PC and SP because we are allocating
971  jit_prepare_routine_call(jit, ctx, REG0);
972 
973  // call rb_hash_resurrect(VALUE hash);
974  jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], hash);
975  call_ptr(cb, REG0, (void *)rb_hash_resurrect);
976 
977  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
978  mov(cb, stack_ret, RAX);
979 
980  return YJIT_KEEP_COMPILING;
981 }
982 
983 VALUE rb_vm_splat_array(VALUE flag, VALUE ary);
984 
985 // call to_a on the array on the stack
986 static codegen_status_t
987 gen_splatarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
988 {
989  VALUE flag = (VALUE) jit_get_arg(jit, 0);
990 
991  // Save the PC and SP because the callee may allocate
992  // Note that this modifies REG_SP, which is why we do it first
993  jit_prepare_routine_call(jit, ctx, REG0);
994 
995  // Get the operands from the stack
996  x86opnd_t ary_opnd = ctx_stack_pop(ctx, 1);
997 
998  // Call rb_vm_splat_array(flag, ary)
999  jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], flag);
1000  mov(cb, C_ARG_REGS[1], ary_opnd);
1001  call_ptr(cb, REG1, (void *) rb_vm_splat_array);
1002 
1003  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
1004  mov(cb, stack_ret, RAX);
1005 
1006  return YJIT_KEEP_COMPILING;
1007 }
1008 
1009 // new range initialized from top 2 values
1010 static codegen_status_t
1011 gen_newrange(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1012 {
1013  rb_num_t flag = (rb_num_t)jit_get_arg(jit, 0);
1014 
1015  // rb_range_new() allocates and can raise
1016  jit_prepare_routine_call(jit, ctx, REG0);
1017 
1018  // val = rb_range_new(low, high, (int)flag);
1019  mov(cb, C_ARG_REGS[0], ctx_stack_opnd(ctx, 1));
1020  mov(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, 0));
1021  mov(cb, C_ARG_REGS[2], imm_opnd(flag));
1022  call_ptr(cb, REG0, (void *)rb_range_new);
1023 
1024  ctx_stack_pop(ctx, 2);
1025  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HEAP);
1026  mov(cb, stack_ret, RAX);
1027 
1028  return YJIT_KEEP_COMPILING;
1029 }
1030 
1031 static void
1032 guard_object_is_heap(codeblock_t *cb, x86opnd_t object_opnd, ctx_t *ctx, uint8_t *side_exit)
1033 {
1034  ADD_COMMENT(cb, "guard object is heap");
1035 
1036  // Test that the object is not an immediate
1037  test(cb, object_opnd, imm_opnd(RUBY_IMMEDIATE_MASK));
1038  jnz_ptr(cb, side_exit);
1039 
1040  // Test that the object is not false or nil
1041  cmp(cb, object_opnd, imm_opnd(Qnil));
1042  RUBY_ASSERT(Qfalse < Qnil);
1043  jbe_ptr(cb, side_exit);
1044 }
1045 
1046 static inline void
1047 guard_object_is_array(codeblock_t *cb, x86opnd_t object_opnd, x86opnd_t flags_opnd, ctx_t *ctx, uint8_t *side_exit)
1048 {
1049  ADD_COMMENT(cb, "guard object is array");
1050 
1051  // Pull out the type mask
1052  mov(cb, flags_opnd, member_opnd(object_opnd, struct RBasic, flags));
1053  and(cb, flags_opnd, imm_opnd(RUBY_T_MASK));
1054 
1055  // Compare the result with T_ARRAY
1056  cmp(cb, flags_opnd, imm_opnd(T_ARRAY));
1057  jne_ptr(cb, side_exit);
1058 }
1059 
1060 // push enough nils onto the stack to fill out an array
1061 static codegen_status_t
1062 gen_expandarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1063 {
1064  int flag = (int) jit_get_arg(jit, 1);
1065 
1066  // If this instruction has the splat flag, then bail out.
1067  if (flag & 0x01) {
1068  GEN_COUNTER_INC(cb, expandarray_splat);
1069  return YJIT_CANT_COMPILE;
1070  }
1071 
1072  // If this instruction has the postarg flag, then bail out.
1073  if (flag & 0x02) {
1074  GEN_COUNTER_INC(cb, expandarray_postarg);
1075  return YJIT_CANT_COMPILE;
1076  }
1077 
1078  uint8_t *side_exit = yjit_side_exit(jit, ctx);
1079 
1080  // num is the number of requested values. If there aren't enough in the
1081  // array then we're going to push on nils.
1082  int num = (int)jit_get_arg(jit, 0);
1083  val_type_t array_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1084  x86opnd_t array_opnd = ctx_stack_pop(ctx, 1);
1085 
1086  if (array_type.type == ETYPE_NIL) {
1087  // special case for a, b = nil pattern
1088  // push N nils onto the stack
1089  for (int i = 0; i < num; i++) {
1090  x86opnd_t push = ctx_stack_push(ctx, TYPE_NIL);
1091  mov(cb, push, imm_opnd(Qnil));
1092  }
1093  return YJIT_KEEP_COMPILING;
1094  }
1095 
1096  // Move the array from the stack into REG0 and check that it's an array.
1097  mov(cb, REG0, array_opnd);
1098  guard_object_is_heap(cb, REG0, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
1099  guard_object_is_array(cb, REG0, REG1, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
1100 
1101  // If we don't actually want any values, then just return.
1102  if (num == 0) {
1103  return YJIT_KEEP_COMPILING;
1104  }
1105 
1106  // Pull out the embed flag to check if it's an embedded array.
1107  x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1108  mov(cb, REG1, flags_opnd);
1109 
1110  // Move the length of the embedded array into REG1.
1111  and(cb, REG1, imm_opnd(RARRAY_EMBED_LEN_MASK));
1112  shr(cb, REG1, imm_opnd(RARRAY_EMBED_LEN_SHIFT));
1113 
1114  // Conditionally move the length of the heap array into REG1.
1115  test(cb, flags_opnd, imm_opnd(RARRAY_EMBED_FLAG));
1116  cmovz(cb, REG1, member_opnd(REG0, struct RArray, as.heap.len));
1117 
1118  // Only handle the case where the number of values in the array is greater
1119  // than or equal to the number of values requested.
1120  cmp(cb, REG1, imm_opnd(num));
1121  jl_ptr(cb, COUNTED_EXIT(jit, side_exit, expandarray_rhs_too_small));
1122 
1123  // Load the address of the embedded array into REG1.
1124  // (struct RArray *)(obj)->as.ary
1125  lea(cb, REG1, member_opnd(REG0, struct RArray, as.ary));
1126 
1127  // Conditionally load the address of the heap array into REG1.
1128  // (struct RArray *)(obj)->as.heap.ptr
1129  test(cb, flags_opnd, imm_opnd(RARRAY_EMBED_FLAG));
1130  cmovz(cb, REG1, member_opnd(REG0, struct RArray, as.heap.ptr));
1131 
1132  // Loop backward through the array and push each element onto the stack.
1133  for (int32_t i = (int32_t) num - 1; i >= 0; i--) {
1134  x86opnd_t top = ctx_stack_push(ctx, TYPE_UNKNOWN);
1135  mov(cb, REG0, mem_opnd(64, REG1, i * SIZEOF_VALUE));
1136  mov(cb, top, REG0);
1137  }
1138 
1139  return YJIT_KEEP_COMPILING;
1140 }
1141 
1142 // new hash initialized from top N values
1143 static codegen_status_t
1144 gen_newhash(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1145 {
1146  int32_t num = (int32_t)jit_get_arg(jit, 0);
1147 
1148  // Save the PC and SP because we are allocating
1149  jit_prepare_routine_call(jit, ctx, REG0);
1150 
1151  if (num) {
1152  // val = rb_hash_new_with_size(num / 2);
1153  mov(cb, C_ARG_REGS[0], imm_opnd(num / 2));
1154  call_ptr(cb, REG0, (void *)rb_hash_new_with_size);
1155 
1156  // save the allocated hash as we want to push it after insertion
1157  push(cb, RAX);
1158  push(cb, RAX); // alignment
1159 
1160  // rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
1161  mov(cb, C_ARG_REGS[0], imm_opnd(num));
1162  lea(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, num - 1));
1163  mov(cb, C_ARG_REGS[2], RAX);
1164  call_ptr(cb, REG0, (void *)rb_hash_bulk_insert);
1165 
1166  pop(cb, RAX); // alignment
1167  pop(cb, RAX);
1168 
1169  ctx_stack_pop(ctx, num);
1170  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
1171  mov(cb, stack_ret, RAX);
1172  }
1173  else {
1174  // val = rb_hash_new();
1175  call_ptr(cb, REG0, (void *)rb_hash_new);
1176 
1177  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
1178  mov(cb, stack_ret, RAX);
1179  }
1180 
1181  return YJIT_KEEP_COMPILING;
1182 }
1183 
1184 // Push a constant value to the stack, including type information.
1185 // The constant may be a heap object or a special constant.
1186 static void
1187 jit_putobject(jitstate_t *jit, ctx_t *ctx, VALUE arg)
1188 {
1189  val_type_t val_type = yjit_type_of_value(arg);
1190  x86opnd_t stack_top = ctx_stack_push(ctx, val_type);
1191 
1192  if (SPECIAL_CONST_P(arg)) {
1193  // Immediates will not move and do not need to be tracked for GC
1194  // Thanks to this we can mov directly to memory when possible.
1195 
1196  // NOTE: VALUE -> int64_t cast below is implementation defined.
1197  // Hopefully it preserves the the bit pattern or raise a signal.
1198  // See N1256 section 6.3.1.3.
1199  x86opnd_t imm = imm_opnd((int64_t)arg);
1200 
1201  // 64-bit immediates can't be directly written to memory
1202  if (imm.num_bits <= 32) {
1203  mov(cb, stack_top, imm);
1204  }
1205  else {
1206  mov(cb, REG0, imm);
1207  mov(cb, stack_top, REG0);
1208  }
1209  }
1210  else {
1211  // Load the value to push into REG0
1212  // Note that this value may get moved by the GC
1213  jit_mov_gc_ptr(jit, cb, REG0, arg);
1214 
1215  // Write argument at SP
1216  mov(cb, stack_top, REG0);
1217  }
1218 }
1219 
1220 static codegen_status_t
1221 gen_putnil(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1222 {
1223  jit_putobject(jit, ctx, Qnil);
1224  return YJIT_KEEP_COMPILING;
1225 }
1226 
1227 static codegen_status_t
1228 gen_putobject(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1229 {
1230  VALUE arg = jit_get_arg(jit, 0);
1231 
1232  jit_putobject(jit, ctx, arg);
1233  return YJIT_KEEP_COMPILING;
1234 }
1235 
1236 static codegen_status_t
1237 gen_putstring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1238 {
1239  VALUE put_val = jit_get_arg(jit, 0);
1240 
1241  // Save the PC and SP because the callee will allocate
1242  jit_prepare_routine_call(jit, ctx, REG0);
1243 
1244  mov(cb, C_ARG_REGS[0], REG_EC);
1245  jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], put_val);
1246  call_ptr(cb, REG0, (void *)rb_ec_str_resurrect);
1247 
1248  x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_STRING);
1249  mov(cb, stack_top, RAX);
1250 
1251  return YJIT_KEEP_COMPILING;
1252 }
1253 
1254 static codegen_status_t
1255 gen_putobject_int2fix(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1256 {
1257  int opcode = jit_get_opcode(jit);
1258  int cst_val = (opcode == BIN(putobject_INT2FIX_0_))? 0:1;
1259 
1260  jit_putobject(jit, ctx, INT2FIX(cst_val));
1261  return YJIT_KEEP_COMPILING;
1262 }
1263 
1264 static codegen_status_t
1265 gen_putself(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1266 {
1267  // Load self from CFP
1268  mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
1269 
1270  // Write it on the stack
1271  x86opnd_t stack_top = ctx_stack_push_self(ctx);
1272  mov(cb, stack_top, REG0);
1273 
1274  return YJIT_KEEP_COMPILING;
1275 }
1276 
1277 static codegen_status_t
1278 gen_putspecialobject(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1279 {
1280  enum vm_special_object_type type = (enum vm_special_object_type)jit_get_arg(jit, 0);
1281 
1282  if (type == VM_SPECIAL_OBJECT_VMCORE) {
1283  x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_HEAP);
1284  jit_mov_gc_ptr(jit, cb, REG0, rb_mRubyVMFrozenCore);
1285  mov(cb, stack_top, REG0);
1286  return YJIT_KEEP_COMPILING;
1287  }
1288  else {
1289  // TODO: implement for VM_SPECIAL_OBJECT_CBASE and
1290  // VM_SPECIAL_OBJECT_CONST_BASE
1291  return YJIT_CANT_COMPILE;
1292  }
1293 }
1294 
1295 // Get EP at level from CFP
1296 static void
1297 gen_get_ep(codeblock_t *cb, x86opnd_t reg, uint32_t level)
1298 {
1299  // Load environment pointer EP from CFP
1300  mov(cb, reg, member_opnd(REG_CFP, rb_control_frame_t, ep));
1301 
1302  while (level--) {
1303  // Get the previous EP from the current EP
1304  // See GET_PREV_EP(ep) macro
1305  // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03))
1306  mov(cb, reg, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL));
1307  and(cb, reg, imm_opnd(~0x03));
1308  }
1309 }
1310 
1311 // Compute the index of a local variable from its slot index
1312 static uint32_t
1313 slot_to_local_idx(const rb_iseq_t *iseq, int32_t slot_idx)
1314 {
1315  // Convoluted rules from local_var_name() in iseq.c
1316  int32_t local_table_size = iseq->body->local_table_size;
1317  int32_t op = slot_idx - VM_ENV_DATA_SIZE;
1318  int32_t local_idx = local_idx = local_table_size - op - 1;
1319  RUBY_ASSERT(local_idx >= 0 && local_idx < local_table_size);
1320  return (uint32_t)local_idx;
1321 }
1322 
1323 static codegen_status_t
1324 gen_getlocal_wc0(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1325 {
1326  // Compute the offset from BP to the local
1327  int32_t slot_idx = (int32_t)jit_get_arg(jit, 0);
1328  const int32_t offs = -(SIZEOF_VALUE * slot_idx);
1329  uint32_t local_idx = slot_to_local_idx(jit->iseq, slot_idx);
1330 
1331  // Load environment pointer EP (level 0) from CFP
1332  gen_get_ep(cb, REG0, 0);
1333 
1334  // Load the local from the EP
1335  mov(cb, REG0, mem_opnd(64, REG0, offs));
1336 
1337  // Write the local at SP
1338  x86opnd_t stack_top = ctx_stack_push_local(ctx, local_idx);
1339  mov(cb, stack_top, REG0);
1340 
1341  return YJIT_KEEP_COMPILING;
1342 }
1343 
1344 static codegen_status_t
1345 gen_getlocal_generic(ctx_t *ctx, uint32_t local_idx, uint32_t level)
1346 {
1347  gen_get_ep(cb, REG0, level);
1348 
1349  // Load the local from the block
1350  // val = *(vm_get_ep(GET_EP(), level) - idx);
1351  const int32_t offs = -(SIZEOF_VALUE * local_idx);
1352  mov(cb, REG0, mem_opnd(64, REG0, offs));
1353 
1354  // Write the local at SP
1355  x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
1356  mov(cb, stack_top, REG0);
1357 
1358  return YJIT_KEEP_COMPILING;
1359 }
1360 
1361 static codegen_status_t
1362 gen_getlocal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1363 {
1364  int32_t idx = (int32_t)jit_get_arg(jit, 0);
1365  int32_t level = (int32_t)jit_get_arg(jit, 1);
1366  return gen_getlocal_generic(ctx, idx, level);
1367 }
1368 
1369 static codegen_status_t
1370 gen_getlocal_wc1(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1371 {
1372  int32_t idx = (int32_t)jit_get_arg(jit, 0);
1373  return gen_getlocal_generic(ctx, idx, 1);
1374 }
1375 
1376 static codegen_status_t
1377 gen_setlocal_wc0(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1378 {
1379  /*
1380  vm_env_write(const VALUE *ep, int index, VALUE v)
1381  {
1382  VALUE flags = ep[VM_ENV_DATA_INDEX_FLAGS];
1383  if (LIKELY((flags & VM_ENV_FLAG_WB_REQUIRED) == 0)) {
1384  VM_STACK_ENV_WRITE(ep, index, v);
1385  }
1386  else {
1387  vm_env_write_slowpath(ep, index, v);
1388  }
1389  }
1390  */
1391 
1392  int32_t slot_idx = (int32_t)jit_get_arg(jit, 0);
1393  uint32_t local_idx = slot_to_local_idx(jit->iseq, slot_idx);
1394 
1395  // Load environment pointer EP (level 0) from CFP
1396  gen_get_ep(cb, REG0, 0);
1397 
1398  // flags & VM_ENV_FLAG_WB_REQUIRED
1399  x86opnd_t flags_opnd = mem_opnd(64, REG0, sizeof(VALUE) * VM_ENV_DATA_INDEX_FLAGS);
1400  test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED));
1401 
1402  // Create a side-exit to fall back to the interpreter
1403  uint8_t *side_exit = yjit_side_exit(jit, ctx);
1404 
1405  // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
1406  jnz_ptr(cb, side_exit);
1407 
1408  // Set the type of the local variable in the context
1409  val_type_t temp_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1410  ctx_set_local_type(ctx, local_idx, temp_type);
1411 
1412  // Pop the value to write from the stack
1413  x86opnd_t stack_top = ctx_stack_pop(ctx, 1);
1414  mov(cb, REG1, stack_top);
1415 
1416  // Write the value at the environment pointer
1417  const int32_t offs = -8 * slot_idx;
1418  mov(cb, mem_opnd(64, REG0, offs), REG1);
1419 
1420  return YJIT_KEEP_COMPILING;
1421 }
1422 
1423 // Push Qtrue or Qfalse depending on whether the given keyword was supplied by
1424 // the caller
1425 static codegen_status_t
1426 gen_checkkeyword(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1427 {
1428  // When a keyword is unspecified past index 32, a hash will be used
1429  // instead. This can only happen in iseqs taking more than 32 keywords.
1430  if (jit->iseq->body->param.keyword->num >= 32) {
1431  return YJIT_CANT_COMPILE;
1432  }
1433 
1434  // The EP offset to the undefined bits local
1435  int32_t bits_offset = (int32_t)jit_get_arg(jit, 0);
1436 
1437  // The index of the keyword we want to check
1438  int32_t index = (int32_t)jit_get_arg(jit, 1);
1439 
1440  // Load environment pointer EP
1441  gen_get_ep(cb, REG0, 0);
1442 
1443  // VALUE kw_bits = *(ep - bits);
1444  x86opnd_t bits_opnd = mem_opnd(64, REG0, sizeof(VALUE) * -bits_offset);
1445 
1446  // unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
1447  // if ((b & (0x01 << idx))) {
1448  //
1449  // We can skip the FIX2ULONG conversion by shifting the bit we test
1450  int64_t bit_test = 0x01 << (index + 1);
1451  test(cb, bits_opnd, imm_opnd(bit_test));
1452  mov(cb, REG0, imm_opnd(Qfalse));
1453  mov(cb, REG1, imm_opnd(Qtrue));
1454  cmovz(cb, REG0, REG1);
1455 
1456  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_IMM);
1457  mov(cb, stack_ret, REG0);
1458 
1459  return YJIT_KEEP_COMPILING;
1460 }
1461 
1462 static codegen_status_t
1463 gen_setlocal_generic(jitstate_t *jit, ctx_t *ctx, uint32_t local_idx, uint32_t level)
1464 {
1465  // Load environment pointer EP at level
1466  gen_get_ep(cb, REG0, level);
1467 
1468  // flags & VM_ENV_FLAG_WB_REQUIRED
1469  x86opnd_t flags_opnd = mem_opnd(64, REG0, sizeof(VALUE) * VM_ENV_DATA_INDEX_FLAGS);
1470  test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED));
1471 
1472  // Create a side-exit to fall back to the interpreter
1473  uint8_t *side_exit = yjit_side_exit(jit, ctx);
1474 
1475  // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
1476  jnz_ptr(cb, side_exit);
1477 
1478  // Pop the value to write from the stack
1479  x86opnd_t stack_top = ctx_stack_pop(ctx, 1);
1480  mov(cb, REG1, stack_top);
1481 
1482  // Write the value at the environment pointer
1483  const int32_t offs = -(SIZEOF_VALUE * local_idx);
1484  mov(cb, mem_opnd(64, REG0, offs), REG1);
1485 
1486  return YJIT_KEEP_COMPILING;
1487 }
1488 
1489 static codegen_status_t
1490 gen_setlocal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1491 {
1492  int32_t idx = (int32_t)jit_get_arg(jit, 0);
1493  int32_t level = (int32_t)jit_get_arg(jit, 1);
1494  return gen_setlocal_generic(jit, ctx, idx, level);
1495 }
1496 
1497 static codegen_status_t
1498 gen_setlocal_wc1(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1499 {
1500  int32_t idx = (int32_t)jit_get_arg(jit, 0);
1501  return gen_setlocal_generic(jit, ctx, idx, 1);
1502 }
1503 
1504 static void
1505 gen_jnz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1506 {
1507  switch (shape) {
1508  case SHAPE_NEXT0:
1509  case SHAPE_NEXT1:
1510  RUBY_ASSERT(false);
1511  break;
1512 
1513  case SHAPE_DEFAULT:
1514  jnz_ptr(cb, target0);
1515  break;
1516  }
1517 }
1518 
1519 static void
1520 gen_jz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1521 {
1522  switch (shape) {
1523  case SHAPE_NEXT0:
1524  case SHAPE_NEXT1:
1525  RUBY_ASSERT(false);
1526  break;
1527 
1528  case SHAPE_DEFAULT:
1529  jz_ptr(cb, target0);
1530  break;
1531  }
1532 }
1533 
1534 static void
1535 gen_jbe_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1536 {
1537  switch (shape) {
1538  case SHAPE_NEXT0:
1539  case SHAPE_NEXT1:
1540  RUBY_ASSERT(false);
1541  break;
1542 
1543  case SHAPE_DEFAULT:
1544  jbe_ptr(cb, target0);
1545  break;
1546  }
1547 }
1548 
1549 enum jcc_kinds {
1550  JCC_JNE,
1551  JCC_JNZ,
1552  JCC_JZ,
1553  JCC_JE,
1554  JCC_JBE,
1555  JCC_JNA,
1556 };
1557 
1558 // Generate a jump to a stub that recompiles the current YARV instruction on failure.
1559 // When depth_limitk is exceeded, generate a jump to a side exit.
1560 static void
1561 jit_chain_guard(enum jcc_kinds jcc, jitstate_t *jit, const ctx_t *ctx, uint8_t depth_limit, uint8_t *side_exit)
1562 {
1563  branchgen_fn target0_gen_fn;
1564 
1565  switch (jcc) {
1566  case JCC_JNE:
1567  case JCC_JNZ:
1568  target0_gen_fn = gen_jnz_to_target0;
1569  break;
1570  case JCC_JZ:
1571  case JCC_JE:
1572  target0_gen_fn = gen_jz_to_target0;
1573  break;
1574  case JCC_JBE:
1575  case JCC_JNA:
1576  target0_gen_fn = gen_jbe_to_target0;
1577  break;
1578  default:
1579  rb_bug("yjit: unimplemented jump kind");
1580  break;
1581  };
1582 
1583  if (ctx->chain_depth < depth_limit) {
1584  ctx_t deeper = *ctx;
1585  deeper.chain_depth++;
1586 
1587  gen_branch(
1588  jit,
1589  ctx,
1590  (blockid_t) { jit->iseq, jit->insn_idx },
1591  &deeper,
1592  BLOCKID_NULL,
1593  NULL,
1594  target0_gen_fn
1595  );
1596  }
1597  else {
1598  target0_gen_fn(cb, side_exit, NULL, SHAPE_DEFAULT);
1599  }
1600 }
1601 
1602 enum {
1603  GETIVAR_MAX_DEPTH = 10, // up to 5 different classes, and embedded or not for each
1604  OPT_AREF_MAX_CHAIN_DEPTH = 2, // hashes and arrays
1605  SEND_MAX_DEPTH = 5, // up to 5 different classes
1606 };
1607 
1608 VALUE rb_vm_set_ivar_idx(VALUE obj, uint32_t idx, VALUE val);
1609 
1610 // Codegen for setting an instance variable.
1611 // Preconditions:
1612 // - receiver is in REG0
1613 // - receiver has the same class as CLASS_OF(comptime_receiver)
1614 // - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
1615 static codegen_status_t
1616 gen_set_ivar(jitstate_t *jit, ctx_t *ctx, VALUE recv, VALUE klass, ID ivar_name)
1617 {
1618  // Save the PC and SP because the callee may allocate
1619  // Note that this modifies REG_SP, which is why we do it first
1620  jit_prepare_routine_call(jit, ctx, REG0);
1621 
1622  // Get the operands from the stack
1623  x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
1624  x86opnd_t recv_opnd = ctx_stack_pop(ctx, 1);
1625 
1626  uint32_t ivar_index = rb_obj_ensure_iv_index_mapping(recv, ivar_name);
1627 
1628  // Call rb_vm_set_ivar_idx with the receiver, the index of the ivar, and the value
1629  mov(cb, C_ARG_REGS[0], recv_opnd);
1630  mov(cb, C_ARG_REGS[1], imm_opnd(ivar_index));
1631  mov(cb, C_ARG_REGS[2], val_opnd);
1632  call_ptr(cb, REG0, (void *)rb_vm_set_ivar_idx);
1633 
1634  x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1635  mov(cb, out_opnd, RAX);
1636 
1637  return YJIT_KEEP_COMPILING;
1638 }
1639 
1640 // Codegen for getting an instance variable.
1641 // Preconditions:
1642 // - receiver is in REG0
1643 // - receiver has the same class as CLASS_OF(comptime_receiver)
1644 // - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
1645 static codegen_status_t
1646 gen_get_ivar(jitstate_t *jit, ctx_t *ctx, const int max_chain_depth, VALUE comptime_receiver, ID ivar_name, insn_opnd_t reg0_opnd, uint8_t *side_exit)
1647 {
1648  VALUE comptime_val_klass = CLASS_OF(comptime_receiver);
1649  const ctx_t starting_context = *ctx; // make a copy for use with jit_chain_guard
1650 
1651  // If the class uses the default allocator, instances should all be T_OBJECT
1652  // NOTE: This assumes nobody changes the allocator of the class after allocation.
1653  // Eventually, we can encode whether an object is T_OBJECT or not
1654  // inside object shapes.
1655  if (!RB_TYPE_P(comptime_receiver, T_OBJECT) ||
1656  rb_get_alloc_func(comptime_val_klass) != rb_class_allocate_instance) {
1657  // General case. Call rb_ivar_get().
1658  // VALUE rb_ivar_get(VALUE obj, ID id)
1659  ADD_COMMENT(cb, "call rb_ivar_get()");
1660 
1661  // The function could raise exceptions.
1662  jit_prepare_routine_call(jit, ctx, REG1);
1663 
1664  mov(cb, C_ARG_REGS[0], REG0);
1665  mov(cb, C_ARG_REGS[1], imm_opnd((int64_t)ivar_name));
1666  call_ptr(cb, REG1, (void *)rb_ivar_get);
1667 
1668  if (!reg0_opnd.is_self) {
1669  (void)ctx_stack_pop(ctx, 1);
1670  }
1671  // Push the ivar on the stack
1672  x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1673  mov(cb, out_opnd, RAX);
1674 
1675  // Jump to next instruction. This allows guard chains to share the same successor.
1676  jit_jump_to_next_insn(jit, ctx);
1677  return YJIT_END_BLOCK;
1678  }
1679 
1680  /*
1681  // FIXME:
1682  // This check was added because of a failure in a test involving the
1683  // Nokogiri Document class where we see a T_DATA that still has the default
1684  // allocator.
1685  // Aaron Patterson argues that this is a bug in the C extension, because
1686  // people could call .allocate() on the class and still get a T_OBJECT
1687  // For now I added an extra dynamic check that the receiver is T_OBJECT
1688  // so we can safely pass all the tests in Shopify Core.
1689  //
1690  // Guard that the receiver is T_OBJECT
1691  // #define RB_BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & RUBY_T_MASK)
1692  ADD_COMMENT(cb, "guard receiver is T_OBJECT");
1693  mov(cb, REG1, member_opnd(REG0, struct RBasic, flags));
1694  and(cb, REG1, imm_opnd(RUBY_T_MASK));
1695  cmp(cb, REG1, imm_opnd(T_OBJECT));
1696  jit_chain_guard(JCC_JNE, jit, &starting_context, max_chain_depth, side_exit);
1697  */
1698 
1699  // FIXME: Mapping the index could fail when there is too many ivar names. If we're
1700  // compiling for a branch stub that can cause the exception to be thrown from the
1701  // wrong PC.
1702  uint32_t ivar_index = rb_obj_ensure_iv_index_mapping(comptime_receiver, ivar_name);
1703 
1704  // Pop receiver if it's on the temp stack
1705  if (!reg0_opnd.is_self) {
1706  (void)ctx_stack_pop(ctx, 1);
1707  }
1708 
1709  // Compile time self is embedded and the ivar index lands within the object
1710  if (RB_FL_TEST_RAW(comptime_receiver, ROBJECT_EMBED) && ivar_index < ROBJECT_EMBED_LEN_MAX) {
1711  // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
1712 
1713  // Guard that self is embedded
1714  // TODO: BT and JC is shorter
1715  ADD_COMMENT(cb, "guard embedded getivar");
1716  x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1717  test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
1718  jit_chain_guard(JCC_JZ, jit, &starting_context, max_chain_depth, COUNTED_EXIT(jit, side_exit, getivar_megamorphic));
1719 
1720  // Load the variable
1721  x86opnd_t ivar_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.ary) + ivar_index * SIZEOF_VALUE);
1722  mov(cb, REG1, ivar_opnd);
1723 
1724  // Guard that the variable is not Qundef
1725  cmp(cb, REG1, imm_opnd(Qundef));
1726  mov(cb, REG0, imm_opnd(Qnil));
1727  cmove(cb, REG1, REG0);
1728 
1729  // Push the ivar on the stack
1730  x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1731  mov(cb, out_opnd, REG1);
1732  }
1733  else {
1734  // Compile time value is *not* embedded.
1735 
1736  // Guard that value is *not* embedded
1737  // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
1738  ADD_COMMENT(cb, "guard extended getivar");
1739  x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1740  test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
1741  jit_chain_guard(JCC_JNZ, jit, &starting_context, max_chain_depth, COUNTED_EXIT(jit, side_exit, getivar_megamorphic));
1742 
1743  // check that the extended table is big enough
1744  if (ivar_index >= ROBJECT_EMBED_LEN_MAX + 1) {
1745  // Check that the slot is inside the extended table (num_slots > index)
1746  x86opnd_t num_slots = mem_opnd(32, REG0, offsetof(struct RObject, as.heap.numiv));
1747  cmp(cb, num_slots, imm_opnd(ivar_index));
1748  jle_ptr(cb, COUNTED_EXIT(jit, side_exit, getivar_idx_out_of_range));
1749  }
1750 
1751  // Get a pointer to the extended table
1752  x86opnd_t tbl_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.heap.ivptr));
1753  mov(cb, REG0, tbl_opnd);
1754 
1755  // Read the ivar from the extended table
1756  x86opnd_t ivar_opnd = mem_opnd(64, REG0, sizeof(VALUE) * ivar_index);
1757  mov(cb, REG0, ivar_opnd);
1758 
1759  // Check that the ivar is not Qundef
1760  cmp(cb, REG0, imm_opnd(Qundef));
1761  mov(cb, REG1, imm_opnd(Qnil));
1762  cmove(cb, REG0, REG1);
1763 
1764  // Push the ivar on the stack
1765  x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1766  mov(cb, out_opnd, REG0);
1767  }
1768 
1769  // Jump to next instruction. This allows guard chains to share the same successor.
1770  jit_jump_to_next_insn(jit, ctx);
1771  return YJIT_END_BLOCK;
1772 }
1773 
1774 static codegen_status_t
1775 gen_getinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1776 {
1777  // Defer compilation so we can specialize on a runtime `self`
1778  if (!jit_at_current_insn(jit)) {
1779  defer_compilation(jit, ctx);
1780  return YJIT_END_BLOCK;
1781  }
1782 
1783  ID ivar_name = (ID)jit_get_arg(jit, 0);
1784 
1785  VALUE comptime_val = jit_peek_at_self(jit, ctx);
1786  VALUE comptime_val_klass = CLASS_OF(comptime_val);
1787 
1788  // Generate a side exit
1789  uint8_t *side_exit = yjit_side_exit(jit, ctx);
1790 
1791  // Guard that the receiver has the same class as the one from compile time.
1792  mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
1793 
1794  jit_guard_known_klass(jit, ctx, comptime_val_klass, OPND_SELF, comptime_val, GETIVAR_MAX_DEPTH, side_exit);
1795 
1796  return gen_get_ivar(jit, ctx, GETIVAR_MAX_DEPTH, comptime_val, ivar_name, OPND_SELF, side_exit);
1797 }
1798 
1799 void rb_vm_setinstancevariable(const rb_iseq_t *iseq, VALUE obj, ID id, VALUE val, IVC ic);
1800 
1801 static codegen_status_t
1802 gen_setinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1803 {
1804  ID id = (ID)jit_get_arg(jit, 0);
1805  IVC ic = (IVC)jit_get_arg(jit, 1);
1806 
1807  // Save the PC and SP because the callee may allocate
1808  // Note that this modifies REG_SP, which is why we do it first
1809  jit_prepare_routine_call(jit, ctx, REG0);
1810 
1811  // Get the operands from the stack
1812  x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
1813 
1814  // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic);
1815  mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
1816  mov(cb, C_ARG_REGS[3], val_opnd);
1817  mov(cb, C_ARG_REGS[2], imm_opnd(id));
1818  mov(cb, C_ARG_REGS[4], const_ptr_opnd(ic));
1819  jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], (VALUE)jit->iseq);
1820  call_ptr(cb, REG0, (void *)rb_vm_setinstancevariable);
1821 
1822  return YJIT_KEEP_COMPILING;
1823 }
1824 
1825 bool rb_vm_defined(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, rb_num_t op_type, VALUE obj, VALUE v);
1826 
1827 static codegen_status_t
1828 gen_defined(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1829 {
1830  rb_num_t op_type = (rb_num_t)jit_get_arg(jit, 0);
1831  VALUE obj = (VALUE)jit_get_arg(jit, 1);
1832  VALUE pushval = (VALUE)jit_get_arg(jit, 2);
1833 
1834  // Save the PC and SP because the callee may allocate
1835  // Note that this modifies REG_SP, which is why we do it first
1836  jit_prepare_routine_call(jit, ctx, REG0);
1837 
1838  // Get the operands from the stack
1839  x86opnd_t v_opnd = ctx_stack_pop(ctx, 1);
1840 
1841  // Call vm_defined(ec, reg_cfp, op_type, obj, v)
1842  mov(cb, C_ARG_REGS[0], REG_EC);
1843  mov(cb, C_ARG_REGS[1], REG_CFP);
1844  mov(cb, C_ARG_REGS[2], imm_opnd(op_type));
1845  jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)obj);
1846  mov(cb, C_ARG_REGS[4], v_opnd);
1847  call_ptr(cb, REG0, (void *)rb_vm_defined);
1848 
1849  // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) {
1850  // val = pushval;
1851  // }
1852  jit_mov_gc_ptr(jit, cb, REG1, (VALUE)pushval);
1853  cmp(cb, AL, imm_opnd(0));
1854  mov(cb, RAX, imm_opnd(Qnil));
1855  cmovnz(cb, RAX, REG1);
1856 
1857  // Push the return value onto the stack
1858  val_type_t out_type = SPECIAL_CONST_P(pushval)? TYPE_IMM:TYPE_UNKNOWN;
1859  x86opnd_t stack_ret = ctx_stack_push(ctx, out_type);
1860  mov(cb, stack_ret, RAX);
1861 
1862  return YJIT_KEEP_COMPILING;
1863 }
1864 
1865 static codegen_status_t
1866 gen_checktype(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1867 {
1868  enum ruby_value_type type_val = (enum ruby_value_type)jit_get_arg(jit, 0);
1869  // Only three types are emitted by compile.c
1870  if (type_val == T_STRING || type_val == T_ARRAY || type_val == T_HASH) {
1871  val_type_t val_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1872  x86opnd_t val = ctx_stack_pop(ctx, 1);
1873 
1874  x86opnd_t stack_ret;
1875 
1876  // Check if we know from type information
1877  if ((type_val == T_STRING && val_type.type == ETYPE_STRING) ||
1878  (type_val == T_ARRAY && val_type.type == ETYPE_ARRAY) ||
1879  (type_val == T_HASH && val_type.type == ETYPE_HASH)) {
1880  // guaranteed type match
1881  stack_ret = ctx_stack_push(ctx, TYPE_TRUE);
1882  mov(cb, stack_ret, imm_opnd(Qtrue));
1883  return YJIT_KEEP_COMPILING;
1884  }
1885  else if (val_type.is_imm || val_type.type != ETYPE_UNKNOWN) {
1886  // guaranteed not to match T_STRING/T_ARRAY/T_HASH
1887  stack_ret = ctx_stack_push(ctx, TYPE_FALSE);
1888  mov(cb, stack_ret, imm_opnd(Qfalse));
1889  return YJIT_KEEP_COMPILING;
1890  }
1891 
1892  mov(cb, REG0, val);
1893  mov(cb, REG1, imm_opnd(Qfalse));
1894 
1895  uint32_t ret = cb_new_label(cb, "ret");
1896 
1897  if (!val_type.is_heap) {
1898  // if (SPECIAL_CONST_P(val)) {
1899  // Return Qfalse via REG1 if not on heap
1900  test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
1901  jnz_label(cb, ret);
1902  cmp(cb, REG0, imm_opnd(Qnil));
1903  jbe_label(cb, ret);
1904  }
1905 
1906  // Check type on object
1907  mov(cb, REG0, mem_opnd(64, REG0, offsetof(struct RBasic, flags)));
1908  and(cb, REG0, imm_opnd(RUBY_T_MASK));
1909  cmp(cb, REG0, imm_opnd(type_val));
1910  mov(cb, REG0, imm_opnd(Qtrue));
1911  // REG1 contains Qfalse from above
1912  cmove(cb, REG1, REG0);
1913 
1914  cb_write_label(cb, ret);
1915  stack_ret = ctx_stack_push(ctx, TYPE_IMM);
1916  mov(cb, stack_ret, REG1);
1917  cb_link_labels(cb);
1918 
1919  return YJIT_KEEP_COMPILING;
1920  }
1921  else {
1922  return YJIT_CANT_COMPILE;
1923  }
1924 }
1925 
1926 static codegen_status_t
1927 gen_concatstrings(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1928 {
1929  rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
1930 
1931  // Save the PC and SP because we are allocating
1932  jit_prepare_routine_call(jit, ctx, REG0);
1933 
1934  x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n));
1935 
1936  // call rb_str_concat_literals(long n, const VALUE *strings);
1937  mov(cb, C_ARG_REGS[0], imm_opnd(n));
1938  lea(cb, C_ARG_REGS[1], values_ptr);
1939  call_ptr(cb, REG0, (void *)rb_str_concat_literals);
1940 
1941  ctx_stack_pop(ctx, n);
1942  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
1943  mov(cb, stack_ret, RAX);
1944 
1945  return YJIT_KEEP_COMPILING;
1946 }
1947 
1948 static void
1949 guard_two_fixnums(ctx_t *ctx, uint8_t *side_exit)
1950 {
1951  // Get the stack operand types
1952  val_type_t arg1_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1953  val_type_t arg0_type = ctx_get_opnd_type(ctx, OPND_STACK(1));
1954 
1955  if (arg0_type.is_heap || arg1_type.is_heap) {
1956  jmp_ptr(cb, side_exit);
1957  return;
1958  }
1959 
1960  if (arg0_type.type != ETYPE_FIXNUM && arg0_type.type != ETYPE_UNKNOWN) {
1961  jmp_ptr(cb, side_exit);
1962  return;
1963  }
1964 
1965  if (arg1_type.type != ETYPE_FIXNUM && arg1_type.type != ETYPE_UNKNOWN) {
1966  jmp_ptr(cb, side_exit);
1967  return;
1968  }
1969 
1970  RUBY_ASSERT(!arg0_type.is_heap);
1971  RUBY_ASSERT(!arg1_type.is_heap);
1972  RUBY_ASSERT(arg0_type.type == ETYPE_FIXNUM || arg0_type.type == ETYPE_UNKNOWN);
1973  RUBY_ASSERT(arg1_type.type == ETYPE_FIXNUM || arg1_type.type == ETYPE_UNKNOWN);
1974 
1975  // Get stack operands without popping them
1976  x86opnd_t arg1 = ctx_stack_opnd(ctx, 0);
1977  x86opnd_t arg0 = ctx_stack_opnd(ctx, 1);
1978 
1979  // If not fixnums, fall back
1980  if (arg0_type.type != ETYPE_FIXNUM) {
1981  ADD_COMMENT(cb, "guard arg0 fixnum");
1982  test(cb, arg0, imm_opnd(RUBY_FIXNUM_FLAG));
1983  jz_ptr(cb, side_exit);
1984  }
1985  if (arg1_type.type != ETYPE_FIXNUM) {
1986  ADD_COMMENT(cb, "guard arg1 fixnum");
1987  test(cb, arg1, imm_opnd(RUBY_FIXNUM_FLAG));
1988  jz_ptr(cb, side_exit);
1989  }
1990 
1991  // Set stack types in context
1992  ctx_upgrade_opnd_type(ctx, OPND_STACK(0), TYPE_FIXNUM);
1993  ctx_upgrade_opnd_type(ctx, OPND_STACK(1), TYPE_FIXNUM);
1994 }
1995 
1996 // Conditional move operation used by comparison operators
1997 typedef void (*cmov_fn)(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
1998 
1999 static codegen_status_t
2000 gen_fixnum_cmp(jitstate_t *jit, ctx_t *ctx, cmov_fn cmov_op)
2001 {
2002  // Defer compilation so we can specialize base on a runtime receiver
2003  if (!jit_at_current_insn(jit)) {
2004  defer_compilation(jit, ctx);
2005  return YJIT_END_BLOCK;
2006  }
2007 
2008  VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2009  VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2010 
2011  if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2012  // Create a side-exit to fall back to the interpreter
2013  // Note: we generate the side-exit before popping operands from the stack
2014  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2015 
2016  if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_LT)) {
2017  return YJIT_CANT_COMPILE;
2018  }
2019 
2020  // Check that both operands are fixnums
2021  guard_two_fixnums(ctx, side_exit);
2022 
2023  // Get the operands from the stack
2024  x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2025  x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2026 
2027  // Compare the arguments
2028  xor(cb, REG0_32, REG0_32); // REG0 = Qfalse
2029  mov(cb, REG1, arg0);
2030  cmp(cb, REG1, arg1);
2031  mov(cb, REG1, imm_opnd(Qtrue));
2032  cmov_op(cb, REG0, REG1);
2033 
2034  // Push the output on the stack
2035  x86opnd_t dst = ctx_stack_push(ctx, TYPE_UNKNOWN);
2036  mov(cb, dst, REG0);
2037 
2038  return YJIT_KEEP_COMPILING;
2039  }
2040  else {
2041  return gen_opt_send_without_block(jit, ctx, cb);
2042  }
2043 }
2044 
2045 static codegen_status_t
2046 gen_opt_lt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2047 {
2048  return gen_fixnum_cmp(jit, ctx, cmovl);
2049 }
2050 
2051 static codegen_status_t
2052 gen_opt_le(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2053 {
2054  return gen_fixnum_cmp(jit, ctx, cmovle);
2055 }
2056 
2057 static codegen_status_t
2058 gen_opt_ge(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2059 {
2060  return gen_fixnum_cmp(jit, ctx, cmovge);
2061 }
2062 
2063 static codegen_status_t
2064 gen_opt_gt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2065 {
2066  return gen_fixnum_cmp(jit, ctx, cmovg);
2067 }
2068 
2069 // Implements specialized equality for either two fixnum or two strings
2070 // Returns true if code was generated, otherwise false
2071 static bool
2072 gen_equality_specialized(jitstate_t *jit, ctx_t *ctx, uint8_t *side_exit)
2073 {
2074  VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2075  VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2076 
2077  x86opnd_t a_opnd = ctx_stack_opnd(ctx, 1);
2078  x86opnd_t b_opnd = ctx_stack_opnd(ctx, 0);
2079 
2080  if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2081  if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_EQ)) {
2082  // if overridden, emit the generic version
2083  return false;
2084  }
2085 
2086  guard_two_fixnums(ctx, side_exit);
2087 
2088  mov(cb, REG0, a_opnd);
2089  cmp(cb, REG0, b_opnd);
2090 
2091  mov(cb, REG0, imm_opnd(Qfalse));
2092  mov(cb, REG1, imm_opnd(Qtrue));
2093  cmove(cb, REG0, REG1);
2094 
2095  // Push the output on the stack
2096  ctx_stack_pop(ctx, 2);
2097  x86opnd_t dst = ctx_stack_push(ctx, TYPE_IMM);
2098  mov(cb, dst, REG0);
2099 
2100  return true;
2101  }
2102  else if (CLASS_OF(comptime_a) == rb_cString &&
2103  CLASS_OF(comptime_b) == rb_cString) {
2104  if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_EQ)) {
2105  // if overridden, emit the generic version
2106  return false;
2107  }
2108 
2109  // Load a and b in preparation for call later
2110  mov(cb, C_ARG_REGS[0], a_opnd);
2111  mov(cb, C_ARG_REGS[1], b_opnd);
2112 
2113  // Guard that a is a String
2114  mov(cb, REG0, C_ARG_REGS[0]);
2115  jit_guard_known_klass(jit, ctx, rb_cString, OPND_STACK(1), comptime_a, SEND_MAX_DEPTH, side_exit);
2116 
2117  uint32_t ret = cb_new_label(cb, "ret");
2118 
2119  // If they are equal by identity, return true
2120  cmp(cb, C_ARG_REGS[0], C_ARG_REGS[1]);
2121  mov(cb, RAX, imm_opnd(Qtrue));
2122  je_label(cb, ret);
2123 
2124  // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard)
2125  if (ctx_get_opnd_type(ctx, OPND_STACK(0)).type != ETYPE_STRING) {
2126  mov(cb, REG0, C_ARG_REGS[1]);
2127  // Note: any T_STRING is valid here, but we check for a ::String for simplicity
2128  jit_guard_known_klass(jit, ctx, rb_cString, OPND_STACK(0), comptime_b, SEND_MAX_DEPTH, side_exit);
2129  }
2130 
2131  // Call rb_str_eql_internal(a, b)
2132  call_ptr(cb, REG0, (void *)rb_str_eql_internal);
2133 
2134  // Push the output on the stack
2135  cb_write_label(cb, ret);
2136  ctx_stack_pop(ctx, 2);
2137  x86opnd_t dst = ctx_stack_push(ctx, TYPE_IMM);
2138  mov(cb, dst, RAX);
2139  cb_link_labels(cb);
2140 
2141  return true;
2142  }
2143  else {
2144  return false;
2145  }
2146 }
2147 
2148 static codegen_status_t
2149 gen_opt_eq(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2150 {
2151  // Defer compilation so we can specialize base on a runtime receiver
2152  if (!jit_at_current_insn(jit)) {
2153  defer_compilation(jit, ctx);
2154  return YJIT_END_BLOCK;
2155  }
2156 
2157  // Create a side-exit to fall back to the interpreter
2158  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2159 
2160  if (gen_equality_specialized(jit, ctx, side_exit)) {
2161  jit_jump_to_next_insn(jit, ctx);
2162  return YJIT_END_BLOCK;
2163  }
2164  else {
2165  return gen_opt_send_without_block(jit, ctx, cb);
2166  }
2167 }
2168 
2169 static codegen_status_t gen_send_general(jitstate_t *jit, ctx_t *ctx, struct rb_call_data *cd, rb_iseq_t *block);
2170 
2171 static codegen_status_t
2172 gen_opt_neq(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2173 {
2174  // opt_neq is passed two rb_call_data as arguments:
2175  // first for ==, second for !=
2176  struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 1);
2177  return gen_send_general(jit, ctx, cd, NULL);
2178 }
2179 
2180 static codegen_status_t
2181 gen_opt_aref(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2182 {
2183  struct rb_call_data * cd = (struct rb_call_data *)jit_get_arg(jit, 0);
2184  int32_t argc = (int32_t)vm_ci_argc(cd->ci);
2185 
2186  // Only JIT one arg calls like `ary[6]`
2187  if (argc != 1) {
2188  GEN_COUNTER_INC(cb, oaref_argc_not_one);
2189  return YJIT_CANT_COMPILE;
2190  }
2191 
2192  // Defer compilation so we can specialize base on a runtime receiver
2193  if (!jit_at_current_insn(jit)) {
2194  defer_compilation(jit, ctx);
2195  return YJIT_END_BLOCK;
2196  }
2197 
2198  // Remember the context on entry for adding guard chains
2199  const ctx_t starting_context = *ctx;
2200 
2201  // Specialize base on compile time values
2202  VALUE comptime_idx = jit_peek_at_stack(jit, ctx, 0);
2203  VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 1);
2204 
2205  // Create a side-exit to fall back to the interpreter
2206  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2207 
2208  if (CLASS_OF(comptime_recv) == rb_cArray && RB_FIXNUM_P(comptime_idx)) {
2209  if (!assume_bop_not_redefined(jit, ARRAY_REDEFINED_OP_FLAG, BOP_AREF)) {
2210  return YJIT_CANT_COMPILE;
2211  }
2212 
2213  // Pop the stack operands
2214  x86opnd_t idx_opnd = ctx_stack_pop(ctx, 1);
2215  x86opnd_t recv_opnd = ctx_stack_pop(ctx, 1);
2216  mov(cb, REG0, recv_opnd);
2217 
2218  // if (SPECIAL_CONST_P(recv)) {
2219  // Bail if receiver is not a heap object
2220  test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
2221  jnz_ptr(cb, side_exit);
2222  cmp(cb, REG0, imm_opnd(Qfalse));
2223  je_ptr(cb, side_exit);
2224  cmp(cb, REG0, imm_opnd(Qnil));
2225  je_ptr(cb, side_exit);
2226 
2227  // Bail if recv has a class other than ::Array.
2228  // BOP_AREF check above is only good for ::Array.
2229  mov(cb, REG1, mem_opnd(64, REG0, offsetof(struct RBasic, klass)));
2230  mov(cb, REG0, const_ptr_opnd((void *)rb_cArray));
2231  cmp(cb, REG0, REG1);
2232  jit_chain_guard(JCC_JNE, jit, &starting_context, OPT_AREF_MAX_CHAIN_DEPTH, side_exit);
2233 
2234  // Bail if idx is not a FIXNUM
2235  mov(cb, REG1, idx_opnd);
2236  test(cb, REG1, imm_opnd(RUBY_FIXNUM_FLAG));
2237  jz_ptr(cb, COUNTED_EXIT(jit, side_exit, oaref_arg_not_fixnum));
2238 
2239  // Call VALUE rb_ary_entry_internal(VALUE ary, long offset).
2240  // It never raises or allocates, so we don't need to write to cfp->pc.
2241  {
2242  mov(cb, RDI, recv_opnd);
2243  sar(cb, REG1, imm_opnd(1)); // Convert fixnum to int
2244  mov(cb, RSI, REG1);
2245  call_ptr(cb, REG0, (void *)rb_ary_entry_internal);
2246 
2247  // Push the return value onto the stack
2248  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2249  mov(cb, stack_ret, RAX);
2250  }
2251 
2252  // Jump to next instruction. This allows guard chains to share the same successor.
2253  jit_jump_to_next_insn(jit, ctx);
2254  return YJIT_END_BLOCK;
2255  }
2256  else if (CLASS_OF(comptime_recv) == rb_cHash) {
2257  if (!assume_bop_not_redefined(jit, HASH_REDEFINED_OP_FLAG, BOP_AREF)) {
2258  return YJIT_CANT_COMPILE;
2259  }
2260 
2261  x86opnd_t key_opnd = ctx_stack_opnd(ctx, 0);
2262  x86opnd_t recv_opnd = ctx_stack_opnd(ctx, 1);
2263 
2264  // Guard that the receiver is a hash
2265  mov(cb, REG0, recv_opnd);
2266  jit_guard_known_klass(jit, ctx, rb_cHash, OPND_STACK(1), comptime_recv, OPT_AREF_MAX_CHAIN_DEPTH, side_exit);
2267 
2268  // Setup arguments for rb_hash_aref().
2269  mov(cb, C_ARG_REGS[0], REG0);
2270  mov(cb, C_ARG_REGS[1], key_opnd);
2271 
2272  // Prepare to call rb_hash_aref(). It might call #hash on the key.
2273  jit_prepare_routine_call(jit, ctx, REG0);
2274 
2275  call_ptr(cb, REG0, (void *)rb_hash_aref);
2276 
2277  // Pop the key and the receiver
2278  (void)ctx_stack_pop(ctx, 2);
2279 
2280  // Push the return value onto the stack
2281  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2282  mov(cb, stack_ret, RAX);
2283 
2284  // Jump to next instruction. This allows guard chains to share the same successor.
2285  jit_jump_to_next_insn(jit, ctx);
2286  return YJIT_END_BLOCK;
2287  }
2288  else {
2289  // General case. Call the [] method.
2290  return gen_opt_send_without_block(jit, ctx, cb);
2291  }
2292 }
2293 
2294 static codegen_status_t
2295 gen_opt_aset(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2296 {
2297  // Defer compilation so we can specialize on a runtime `self`
2298  if (!jit_at_current_insn(jit)) {
2299  defer_compilation(jit, ctx);
2300  return YJIT_END_BLOCK;
2301  }
2302 
2303  VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 2);
2304  VALUE comptime_key = jit_peek_at_stack(jit, ctx, 1);
2305 
2306  // Get the operands from the stack
2307  x86opnd_t recv = ctx_stack_opnd(ctx, 2);
2308  x86opnd_t key = ctx_stack_opnd(ctx, 1);
2309  x86opnd_t val = ctx_stack_opnd(ctx, 0);
2310 
2311  if (CLASS_OF(comptime_recv) == rb_cArray && FIXNUM_P(comptime_key)) {
2312  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2313 
2314  // Guard receiver is an Array
2315  mov(cb, REG0, recv);
2316  jit_guard_known_klass(jit, ctx, rb_cArray, OPND_STACK(2), comptime_recv, SEND_MAX_DEPTH, side_exit);
2317 
2318  // Guard key is a fixnum
2319  mov(cb, REG0, key);
2320  jit_guard_known_klass(jit, ctx, rb_cInteger, OPND_STACK(1), comptime_key, SEND_MAX_DEPTH, side_exit);
2321 
2322  // Call rb_ary_store
2323  mov(cb, C_ARG_REGS[0], recv);
2324  mov(cb, C_ARG_REGS[1], key);
2325  sar(cb, C_ARG_REGS[1], imm_opnd(1)); // FIX2LONG(key)
2326  mov(cb, C_ARG_REGS[2], val);
2327 
2328  // We might allocate or raise
2329  jit_prepare_routine_call(jit, ctx, REG0);
2330 
2331  call_ptr(cb, REG0, (void *)rb_ary_store);
2332 
2333  // rb_ary_store returns void
2334  // stored value should still be on stack
2335  mov(cb, REG0, ctx_stack_opnd(ctx, 0));
2336 
2337  // Push the return value onto the stack
2338  ctx_stack_pop(ctx, 3);
2339  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2340  mov(cb, stack_ret, REG0);
2341 
2342  jit_jump_to_next_insn(jit, ctx);
2343  return YJIT_END_BLOCK;
2344  }
2345  else if (CLASS_OF(comptime_recv) == rb_cHash) {
2346  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2347 
2348  // Guard receiver is a Hash
2349  mov(cb, REG0, recv);
2350  jit_guard_known_klass(jit, ctx, rb_cHash, OPND_STACK(2), comptime_recv, SEND_MAX_DEPTH, side_exit);
2351 
2352  // Call rb_hash_aset
2353  mov(cb, C_ARG_REGS[0], recv);
2354  mov(cb, C_ARG_REGS[1], key);
2355  mov(cb, C_ARG_REGS[2], val);
2356 
2357  // We might allocate or raise
2358  jit_prepare_routine_call(jit, ctx, REG0);
2359 
2360  call_ptr(cb, REG0, (void *)rb_hash_aset);
2361 
2362  // Push the return value onto the stack
2363  ctx_stack_pop(ctx, 3);
2364  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2365  mov(cb, stack_ret, RAX);
2366 
2367  jit_jump_to_next_insn(jit, ctx);
2368  return YJIT_END_BLOCK;
2369  }
2370  else {
2371  return gen_opt_send_without_block(jit, ctx, cb);
2372  }
2373 }
2374 
2375 static codegen_status_t
2376 gen_opt_and(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2377 {
2378  // Defer compilation so we can specialize on a runtime `self`
2379  if (!jit_at_current_insn(jit)) {
2380  defer_compilation(jit, ctx);
2381  return YJIT_END_BLOCK;
2382  }
2383 
2384  VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2385  VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2386 
2387  if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2388  // Create a side-exit to fall back to the interpreter
2389  // Note: we generate the side-exit before popping operands from the stack
2390  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2391 
2392  if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_AND)) {
2393  return YJIT_CANT_COMPILE;
2394  }
2395 
2396  // Check that both operands are fixnums
2397  guard_two_fixnums(ctx, side_exit);
2398 
2399  // Get the operands and destination from the stack
2400  x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2401  x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2402 
2403  // Do the bitwise and arg0 & arg1
2404  mov(cb, REG0, arg0);
2405  and(cb, REG0, arg1);
2406 
2407  // Push the output on the stack
2408  x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2409  mov(cb, dst, REG0);
2410 
2411  return YJIT_KEEP_COMPILING;
2412  }
2413  else {
2414  // Delegate to send, call the method on the recv
2415  return gen_opt_send_without_block(jit, ctx, cb);
2416  }
2417 }
2418 
2419 static codegen_status_t
2420 gen_opt_or(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2421 {
2422  // Defer compilation so we can specialize on a runtime `self`
2423  if (!jit_at_current_insn(jit)) {
2424  defer_compilation(jit, ctx);
2425  return YJIT_END_BLOCK;
2426  }
2427 
2428  VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2429  VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2430 
2431  if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2432  // Create a side-exit to fall back to the interpreter
2433  // Note: we generate the side-exit before popping operands from the stack
2434  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2435 
2436  if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_OR)) {
2437  return YJIT_CANT_COMPILE;
2438  }
2439 
2440  // Check that both operands are fixnums
2441  guard_two_fixnums(ctx, side_exit);
2442 
2443  // Get the operands and destination from the stack
2444  x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2445  x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2446 
2447  // Do the bitwise or arg0 | arg1
2448  mov(cb, REG0, arg0);
2449  or(cb, REG0, arg1);
2450 
2451  // Push the output on the stack
2452  x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2453  mov(cb, dst, REG0);
2454 
2455  return YJIT_KEEP_COMPILING;
2456  }
2457  else {
2458  // Delegate to send, call the method on the recv
2459  return gen_opt_send_without_block(jit, ctx, cb);
2460  }
2461 }
2462 
2463 static codegen_status_t
2464 gen_opt_minus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2465 {
2466  // Defer compilation so we can specialize on a runtime `self`
2467  if (!jit_at_current_insn(jit)) {
2468  defer_compilation(jit, ctx);
2469  return YJIT_END_BLOCK;
2470  }
2471 
2472  VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2473  VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2474 
2475  if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2476  // Create a side-exit to fall back to the interpreter
2477  // Note: we generate the side-exit before popping operands from the stack
2478  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2479 
2480  if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS)) {
2481  return YJIT_CANT_COMPILE;
2482  }
2483 
2484  // Check that both operands are fixnums
2485  guard_two_fixnums(ctx, side_exit);
2486 
2487  // Get the operands and destination from the stack
2488  x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2489  x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2490 
2491  // Subtract arg0 - arg1 and test for overflow
2492  mov(cb, REG0, arg0);
2493  sub(cb, REG0, arg1);
2494  jo_ptr(cb, side_exit);
2495  add(cb, REG0, imm_opnd(1));
2496 
2497  // Push the output on the stack
2498  x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2499  mov(cb, dst, REG0);
2500 
2501  return YJIT_KEEP_COMPILING;
2502  }
2503  else {
2504  // Delegate to send, call the method on the recv
2505  return gen_opt_send_without_block(jit, ctx, cb);
2506  }
2507 }
2508 
2509 static codegen_status_t
2510 gen_opt_plus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2511 {
2512  // Defer compilation so we can specialize on a runtime `self`
2513  if (!jit_at_current_insn(jit)) {
2514  defer_compilation(jit, ctx);
2515  return YJIT_END_BLOCK;
2516  }
2517 
2518  VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2519  VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2520 
2521  if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2522  // Create a side-exit to fall back to the interpreter
2523  // Note: we generate the side-exit before popping operands from the stack
2524  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2525 
2526  if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS)) {
2527  return YJIT_CANT_COMPILE;
2528  }
2529 
2530  // Check that both operands are fixnums
2531  guard_two_fixnums(ctx, side_exit);
2532 
2533  // Get the operands and destination from the stack
2534  x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2535  x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2536 
2537  // Add arg0 + arg1 and test for overflow
2538  mov(cb, REG0, arg0);
2539  sub(cb, REG0, imm_opnd(1));
2540  add(cb, REG0, arg1);
2541  jo_ptr(cb, side_exit);
2542 
2543  // Push the output on the stack
2544  x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2545  mov(cb, dst, REG0);
2546 
2547  return YJIT_KEEP_COMPILING;
2548  }
2549  else {
2550  // Delegate to send, call the method on the recv
2551  return gen_opt_send_without_block(jit, ctx, cb);
2552  }
2553 }
2554 
2555 static codegen_status_t
2556 gen_opt_mult(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2557 {
2558  // Delegate to send, call the method on the recv
2559  return gen_opt_send_without_block(jit, ctx, cb);
2560 }
2561 
2562 static codegen_status_t
2563 gen_opt_div(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2564 {
2565  // Delegate to send, call the method on the recv
2566  return gen_opt_send_without_block(jit, ctx, cb);
2567 }
2568 
2569 VALUE rb_vm_opt_mod(VALUE recv, VALUE obj);
2570 
2571 static codegen_status_t
2572 gen_opt_mod(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2573 {
2574  // Save the PC and SP because the callee may allocate bignums
2575  // Note that this modifies REG_SP, which is why we do it first
2576  jit_prepare_routine_call(jit, ctx, REG0);
2577 
2578  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2579 
2580  // Get the operands from the stack
2581  x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2582  x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2583 
2584  // Call rb_vm_opt_mod(VALUE recv, VALUE obj)
2585  mov(cb, C_ARG_REGS[0], arg0);
2586  mov(cb, C_ARG_REGS[1], arg1);
2587  call_ptr(cb, REG0, (void *)rb_vm_opt_mod);
2588 
2589  // If val == Qundef, bail to do a method call
2590  cmp(cb, RAX, imm_opnd(Qundef));
2591  je_ptr(cb, side_exit);
2592 
2593  // Push the return value onto the stack
2594  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2595  mov(cb, stack_ret, RAX);
2596 
2597  return YJIT_KEEP_COMPILING;
2598 }
2599 
2600 static codegen_status_t
2601 gen_opt_ltlt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2602 {
2603  // Delegate to send, call the method on the recv
2604  return gen_opt_send_without_block(jit, ctx, cb);
2605 }
2606 
2607 static codegen_status_t
2608 gen_opt_nil_p(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2609 {
2610  // Delegate to send, call the method on the recv
2611  return gen_opt_send_without_block(jit, ctx, cb);
2612 }
2613 
2614 static codegen_status_t
2615 gen_opt_empty_p(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2616 {
2617  // Delegate to send, call the method on the recv
2618  return gen_opt_send_without_block(jit, ctx, cb);
2619 }
2620 
2621 static codegen_status_t
2622 gen_opt_str_freeze(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2623 {
2624  if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_FREEZE)) {
2625  return YJIT_CANT_COMPILE;
2626  }
2627 
2628  VALUE str = jit_get_arg(jit, 0);
2629  jit_mov_gc_ptr(jit, cb, REG0, str);
2630 
2631  // Push the return value onto the stack
2632  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
2633  mov(cb, stack_ret, REG0);
2634 
2635  return YJIT_KEEP_COMPILING;
2636 }
2637 
2638 static codegen_status_t
2639 gen_opt_str_uminus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2640 {
2641  if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_UMINUS)) {
2642  return YJIT_CANT_COMPILE;
2643  }
2644 
2645  VALUE str = jit_get_arg(jit, 0);
2646  jit_mov_gc_ptr(jit, cb, REG0, str);
2647 
2648  // Push the return value onto the stack
2649  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
2650  mov(cb, stack_ret, REG0);
2651 
2652  return YJIT_KEEP_COMPILING;
2653 }
2654 
2655 static codegen_status_t
2656 gen_opt_not(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2657 {
2658  return gen_opt_send_without_block(jit, ctx, cb);
2659 }
2660 
2661 static codegen_status_t
2662 gen_opt_size(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2663 {
2664  return gen_opt_send_without_block(jit, ctx, cb);
2665 }
2666 
2667 static codegen_status_t
2668 gen_opt_length(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2669 {
2670  return gen_opt_send_without_block(jit, ctx, cb);
2671 }
2672 
2673 static codegen_status_t
2674 gen_opt_regexpmatch2(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2675 {
2676  return gen_opt_send_without_block(jit, ctx, cb);
2677 }
2678 
2679 static codegen_status_t
2680 gen_opt_case_dispatch(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2681 {
2682  // Normally this instruction would lookup the key in a hash and jump to an
2683  // offset based on that.
2684  // Instead we can take the fallback case and continue with the next
2685  // instruction.
2686  // We'd hope that our jitted code will be sufficiently fast without the
2687  // hash lookup, at least for small hashes, but it's worth revisiting this
2688  // assumption in the future.
2689 
2690  ctx_stack_pop(ctx, 1);
2691 
2692  return YJIT_KEEP_COMPILING; // continue with the next instruction
2693 }
2694 
2695 static void
2696 gen_branchif_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2697 {
2698  switch (shape) {
2699  case SHAPE_NEXT0:
2700  jz_ptr(cb, target1);
2701  break;
2702 
2703  case SHAPE_NEXT1:
2704  jnz_ptr(cb, target0);
2705  break;
2706 
2707  case SHAPE_DEFAULT:
2708  jnz_ptr(cb, target0);
2709  jmp_ptr(cb, target1);
2710  break;
2711  }
2712 }
2713 
2714 static codegen_status_t
2715 gen_branchif(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2716 {
2717  int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2718 
2719  // Check for interrupts, but only on backward branches that may create loops
2720  if (jump_offset < 0) {
2721  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2722  yjit_check_ints(cb, side_exit);
2723  }
2724 
2725  // Test if any bit (outside of the Qnil bit) is on
2726  // RUBY_Qfalse /* ...0000 0000 */
2727  // RUBY_Qnil /* ...0000 1000 */
2728  x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2729  test(cb, val_opnd, imm_opnd(~Qnil));
2730 
2731  // Get the branch target instruction offsets
2732  uint32_t next_idx = jit_next_insn_idx(jit);
2733  uint32_t jump_idx = next_idx + jump_offset;
2734  blockid_t next_block = { jit->iseq, next_idx };
2735  blockid_t jump_block = { jit->iseq, jump_idx };
2736 
2737  // Generate the branch instructions
2738  gen_branch(
2739  jit,
2740  ctx,
2741  jump_block,
2742  ctx,
2743  next_block,
2744  ctx,
2745  gen_branchif_branch
2746  );
2747 
2748  return YJIT_END_BLOCK;
2749 }
2750 
2751 static void
2752 gen_branchunless_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2753 {
2754  switch (shape) {
2755  case SHAPE_NEXT0:
2756  jnz_ptr(cb, target1);
2757  break;
2758 
2759  case SHAPE_NEXT1:
2760  jz_ptr(cb, target0);
2761  break;
2762 
2763  case SHAPE_DEFAULT:
2764  jz_ptr(cb, target0);
2765  jmp_ptr(cb, target1);
2766  break;
2767  }
2768 }
2769 
2770 static codegen_status_t
2771 gen_branchunless(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2772 {
2773  int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2774 
2775  // Check for interrupts, but only on backward branches that may create loops
2776  if (jump_offset < 0) {
2777  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2778  yjit_check_ints(cb, side_exit);
2779  }
2780 
2781  // Test if any bit (outside of the Qnil bit) is on
2782  // RUBY_Qfalse /* ...0000 0000 */
2783  // RUBY_Qnil /* ...0000 1000 */
2784  x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2785  test(cb, val_opnd, imm_opnd(~Qnil));
2786 
2787  // Get the branch target instruction offsets
2788  uint32_t next_idx = jit_next_insn_idx(jit);
2789  uint32_t jump_idx = next_idx + jump_offset;
2790  blockid_t next_block = { jit->iseq, next_idx };
2791  blockid_t jump_block = { jit->iseq, jump_idx };
2792 
2793  // Generate the branch instructions
2794  gen_branch(
2795  jit,
2796  ctx,
2797  jump_block,
2798  ctx,
2799  next_block,
2800  ctx,
2801  gen_branchunless_branch
2802  );
2803 
2804  return YJIT_END_BLOCK;
2805 }
2806 
2807 static void
2808 gen_branchnil_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2809 {
2810  switch (shape) {
2811  case SHAPE_NEXT0:
2812  jne_ptr(cb, target1);
2813  break;
2814 
2815  case SHAPE_NEXT1:
2816  je_ptr(cb, target0);
2817  break;
2818 
2819  case SHAPE_DEFAULT:
2820  je_ptr(cb, target0);
2821  jmp_ptr(cb, target1);
2822  break;
2823  }
2824 }
2825 
2826 static codegen_status_t
2827 gen_branchnil(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2828 {
2829  int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2830 
2831  // Check for interrupts, but only on backward branches that may create loops
2832  if (jump_offset < 0) {
2833  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2834  yjit_check_ints(cb, side_exit);
2835  }
2836 
2837  // Test if the value is Qnil
2838  // RUBY_Qnil /* ...0000 1000 */
2839  x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2840  cmp(cb, val_opnd, imm_opnd(Qnil));
2841 
2842  // Get the branch target instruction offsets
2843  uint32_t next_idx = jit_next_insn_idx(jit);
2844  uint32_t jump_idx = next_idx + jump_offset;
2845  blockid_t next_block = { jit->iseq, next_idx };
2846  blockid_t jump_block = { jit->iseq, jump_idx };
2847 
2848  // Generate the branch instructions
2849  gen_branch(
2850  jit,
2851  ctx,
2852  jump_block,
2853  ctx,
2854  next_block,
2855  ctx,
2856  gen_branchnil_branch
2857  );
2858 
2859  return YJIT_END_BLOCK;
2860 }
2861 
2862 static codegen_status_t
2863 gen_jump(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2864 {
2865  int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2866 
2867  // Check for interrupts, but only on backward branches that may create loops
2868  if (jump_offset < 0) {
2869  uint8_t *side_exit = yjit_side_exit(jit, ctx);
2870  yjit_check_ints(cb, side_exit);
2871  }
2872 
2873  // Get the branch target instruction offsets
2874  uint32_t jump_idx = jit_next_insn_idx(jit) + jump_offset;
2875  blockid_t jump_block = { jit->iseq, jump_idx };
2876 
2877  // Generate the jump instruction
2878  gen_direct_jump(
2879  jit,
2880  ctx,
2881  jump_block
2882  );
2883 
2884  return YJIT_END_BLOCK;
2885 }
2886 
2887 /*
2888 Guard that self or a stack operand has the same class as `known_klass`, using
2889 `sample_instance` to speculate about the shape of the runtime value.
2890 FIXNUM and on-heap integers are treated as if they have distinct classes, and
2891 the guard generated for one will fail for the other.
2892 
2893 Recompile as contingency if possible, or take side exit a last resort.
2894 */
2895 static bool
2896 jit_guard_known_klass(jitstate_t *jit, ctx_t *ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit)
2897 {
2898  val_type_t val_type = ctx_get_opnd_type(ctx, insn_opnd);
2899 
2900  if (known_klass == rb_cNilClass) {
2901  RUBY_ASSERT(!val_type.is_heap);
2902  if (val_type.type != ETYPE_NIL) {
2903  RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2904 
2905  ADD_COMMENT(cb, "guard object is nil");
2906  cmp(cb, REG0, imm_opnd(Qnil));
2907  jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2908 
2909  ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_NIL);
2910  }
2911  }
2912  else if (known_klass == rb_cTrueClass) {
2913  RUBY_ASSERT(!val_type.is_heap);
2914  if (val_type.type != ETYPE_TRUE) {
2915  RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2916 
2917  ADD_COMMENT(cb, "guard object is true");
2918  cmp(cb, REG0, imm_opnd(Qtrue));
2919  jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2920 
2921  ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_TRUE);
2922  }
2923  }
2924  else if (known_klass == rb_cFalseClass) {
2925  RUBY_ASSERT(!val_type.is_heap);
2926  if (val_type.type != ETYPE_FALSE) {
2927  RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2928 
2929  ADD_COMMENT(cb, "guard object is false");
2930  STATIC_ASSERT(qfalse_is_zero, Qfalse == 0);
2931  test(cb, REG0, REG0);
2932  jit_chain_guard(JCC_JNZ, jit, ctx, max_chain_depth, side_exit);
2933 
2934  ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FALSE);
2935  }
2936  }
2937  else if (known_klass == rb_cInteger && FIXNUM_P(sample_instance)) {
2938  RUBY_ASSERT(!val_type.is_heap);
2939  // We will guard fixnum and bignum as though they were separate classes
2940  // BIGNUM can be handled by the general else case below
2941  if (val_type.type != ETYPE_FIXNUM || !val_type.is_imm) {
2942  RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2943 
2944  ADD_COMMENT(cb, "guard object is fixnum");
2945  test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG));
2946  jit_chain_guard(JCC_JZ, jit, ctx, max_chain_depth, side_exit);
2947  ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FIXNUM);
2948  }
2949  }
2950  else if (known_klass == rb_cSymbol && STATIC_SYM_P(sample_instance)) {
2951  RUBY_ASSERT(!val_type.is_heap);
2952  // We will guard STATIC vs DYNAMIC as though they were separate classes
2953  // DYNAMIC symbols can be handled by the general else case below
2954  if (val_type.type != ETYPE_SYMBOL || !val_type.is_imm) {
2955  RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2956 
2957  ADD_COMMENT(cb, "guard object is static symbol");
2958  STATIC_ASSERT(special_shift_is_8, RUBY_SPECIAL_SHIFT == 8);
2959  cmp(cb, REG0_8, imm_opnd(RUBY_SYMBOL_FLAG));
2960  jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2961  ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_STATIC_SYMBOL);
2962  }
2963  }
2964  else if (known_klass == rb_cFloat && FLONUM_P(sample_instance)) {
2965  RUBY_ASSERT(!val_type.is_heap);
2966  if (val_type.type != ETYPE_FLONUM || !val_type.is_imm) {
2967  RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2968 
2969  // We will guard flonum vs heap float as though they were separate classes
2970  ADD_COMMENT(cb, "guard object is flonum");
2971  mov(cb, REG1, REG0);
2972  and(cb, REG1, imm_opnd(RUBY_FLONUM_MASK));
2973  cmp(cb, REG1, imm_opnd(RUBY_FLONUM_FLAG));
2974  jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2975  ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FLONUM);
2976  }
2977  }
2978  else if (FL_TEST(known_klass, FL_SINGLETON) && sample_instance == rb_attr_get(known_klass, id__attached__)) {
2979  // Singleton classes are attached to one specific object, so we can
2980  // avoid one memory access (and potentially the is_heap check) by
2981  // looking for the expected object directly.
2982  // Note that in case the sample instance has a singleton class that
2983  // doesn't attach to the sample instance, it means the sample instance
2984  // has an empty singleton class that hasn't been materialized yet. In
2985  // this case, comparing against the sample instance doesn't guarantee
2986  // that its singleton class is empty, so we can't avoid the memory
2987  // access. As an example, `Object.new.singleton_class` is an object in
2988  // this situation.
2989  ADD_COMMENT(cb, "guard known object with singleton class");
2990  // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the object.
2991  jit_mov_gc_ptr(jit, cb, REG1, sample_instance);
2992  cmp(cb, REG0, REG1);
2993  jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2994  }
2995  else {
2996  RUBY_ASSERT(!val_type.is_imm);
2997 
2998  // Check that the receiver is a heap object
2999  // Note: if we get here, the class doesn't have immediate instances.
3000  if (!val_type.is_heap) {
3001  ADD_COMMENT(cb, "guard not immediate");
3002  RUBY_ASSERT(Qfalse < Qnil);
3003  test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
3004  jit_chain_guard(JCC_JNZ, jit, ctx, max_chain_depth, side_exit);
3005  cmp(cb, REG0, imm_opnd(Qnil));
3006  jit_chain_guard(JCC_JBE, jit, ctx, max_chain_depth, side_exit);
3007 
3008  ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_HEAP);
3009  }
3010 
3011  x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
3012 
3013  // Bail if receiver class is different from known_klass
3014  // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class.
3015  ADD_COMMENT(cb, "guard known class");
3016  jit_mov_gc_ptr(jit, cb, REG1, known_klass);
3017  cmp(cb, klass_opnd, REG1);
3018  jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
3019  }
3020 
3021  return true;
3022 }
3023 
3024 // Generate ancestry guard for protected callee.
3025 // Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee).
3026 static void
3027 jit_protected_callee_ancestry_guard(jitstate_t *jit, codeblock_t *cb, const rb_callable_method_entry_t *cme, uint8_t *side_exit)
3028 {
3029  // See vm_call_method().
3030  mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, self));
3031  jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], cme->defined_class);
3032  // Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise.
3033  // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass);
3034  call_ptr(cb, REG0, (void *)&rb_obj_is_kind_of);
3035  test(cb, RAX, RAX);
3036  jz_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_protected_check_failed));
3037 }
3038 
3039 // Return true when the codegen function generates code.
3040 // known_recv_klass is non-NULL when the caller has used jit_guard_known_klass().
3041 // See yjit_reg_method().
3042 typedef bool (*method_codegen_t)(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass);
3043 
3044 // Register a specialized codegen function for a particular method. Note that
3045 // the if the function returns true, the code it generates runs without a
3046 // control frame and without interrupt checks. To avoid creating observable
3047 // behavior changes, the codegen function should only target simple code paths
3048 // that do not allocate and do not make method calls.
3049 static void
3050 yjit_reg_method(VALUE klass, const char *mid_str, method_codegen_t gen_fn)
3051 {
3052  ID mid = rb_intern(mid_str);
3053  const rb_method_entry_t *me = rb_method_entry_at(klass, mid);
3054 
3055  if (!me) {
3056  rb_bug("undefined optimized method: %s", rb_id2name(mid));
3057  }
3058 
3059  // For now, only cfuncs are supported
3060  RUBY_ASSERT(me && me->def);
3061  RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC);
3062 
3063  st_insert(yjit_method_codegen_table, (st_data_t)me->def->method_serial, (st_data_t)gen_fn);
3064 }
3065 
3066 // Codegen for rb_obj_not().
3067 // Note, caller is responsible for generating all the right guards, including
3068 // arity guards.
3069 static bool
3070 jit_rb_obj_not(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3071 {
3072  const val_type_t recv_opnd = ctx_get_opnd_type(ctx, OPND_STACK(0));
3073 
3074  if (recv_opnd.type == ETYPE_NIL || recv_opnd.type == ETYPE_FALSE) {
3075  ADD_COMMENT(cb, "rb_obj_not(nil_or_false)");
3076  ctx_stack_pop(ctx, 1);
3077  x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_TRUE);
3078  mov(cb, out_opnd, imm_opnd(Qtrue));
3079  }
3080  else if (recv_opnd.is_heap || recv_opnd.type != ETYPE_UNKNOWN) {
3081  // Note: recv_opnd.type != ETYPE_NIL && recv_opnd.type != ETYPE_FALSE.
3082  ADD_COMMENT(cb, "rb_obj_not(truthy)");
3083  ctx_stack_pop(ctx, 1);
3084  x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_FALSE);
3085  mov(cb, out_opnd, imm_opnd(Qfalse));
3086  }
3087  else {
3088  // jit_guard_known_klass() already ran on the receiver which should
3089  // have deduced deduced the type of the receiver. This case should be
3090  // rare if not unreachable.
3091  return false;
3092  }
3093  return true;
3094 }
3095 
3096 // Codegen for rb_true()
3097 static bool
3098 jit_rb_true(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3099 {
3100  ADD_COMMENT(cb, "nil? == true");
3101  ctx_stack_pop(ctx, 1);
3102  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_TRUE);
3103  mov(cb, stack_ret, imm_opnd(Qtrue));
3104  return true;
3105 }
3106 
3107 // Codegen for rb_false()
3108 static bool
3109 jit_rb_false(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3110 {
3111  ADD_COMMENT(cb, "nil? == false");
3112  ctx_stack_pop(ctx, 1);
3113  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_FALSE);
3114  mov(cb, stack_ret, imm_opnd(Qfalse));
3115  return true;
3116 }
3117 
3118 // Codegen for rb_obj_equal()
3119 // object identity comparison
3120 static bool
3121 jit_rb_obj_equal(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3122 {
3123  ADD_COMMENT(cb, "equal?");
3124  x86opnd_t obj1 = ctx_stack_pop(ctx, 1);
3125  x86opnd_t obj2 = ctx_stack_pop(ctx, 1);
3126 
3127  mov(cb, REG0, obj1);
3128  cmp(cb, REG0, obj2);
3129  mov(cb, REG0, imm_opnd(Qtrue));
3130  mov(cb, REG1, imm_opnd(Qfalse));
3131  cmovne(cb, REG0, REG1);
3132 
3133  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_IMM);
3134  mov(cb, stack_ret, REG0);
3135  return true;
3136 }
3137 
3138 static VALUE
3139 yjit_str_bytesize(VALUE str)
3140 {
3141  return LONG2NUM(RSTRING_LEN(str));
3142 }
3143 
3144 static bool
3145 jit_rb_str_bytesize(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3146 {
3147  ADD_COMMENT(cb, "String#bytesize");
3148 
3149  x86opnd_t recv = ctx_stack_pop(ctx, 1);
3150  mov(cb, C_ARG_REGS[0], recv);
3151  call_ptr(cb, REG0, (void *)&yjit_str_bytesize);
3152 
3153  x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_FIXNUM);
3154  mov(cb, out_opnd, RAX);
3155 
3156  return true;
3157 }
3158 
3159 // Codegen for rb_str_to_s()
3160 // When String#to_s is called on a String instance, the method returns self and
3161 // most of the overhead comes from setting up the method call. We observed that
3162 // this situation happens a lot in some workloads.
3163 static bool
3164 jit_rb_str_to_s(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3165 {
3166  if (recv_known_klass && *recv_known_klass == rb_cString) {
3167  ADD_COMMENT(cb, "to_s on plain string");
3168  // The method returns the receiver, which is already on the stack.
3169  // No stack movement.
3170  return true;
3171  }
3172  return false;
3173 }
3174 
3175 static bool
3176 jit_thread_s_current(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3177 {
3178  ADD_COMMENT(cb, "Thread.current");
3179  ctx_stack_pop(ctx, 1);
3180 
3181  // ec->thread_ptr
3182  mov(cb, REG0, member_opnd(REG_EC, rb_execution_context_t, thread_ptr));
3183 
3184  // thread->self
3185  mov(cb, REG0, member_opnd(REG0, rb_thread_t, self));
3186 
3187  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HEAP);
3188  mov(cb, stack_ret, REG0);
3189  return true;
3190 }
3191 
3192 // Check if we know how to codegen for a particular cfunc method
3193 static method_codegen_t
3194 lookup_cfunc_codegen(const rb_method_definition_t *def)
3195 {
3196  method_codegen_t gen_fn;
3197  if (st_lookup(yjit_method_codegen_table, def->method_serial, (st_data_t *)&gen_fn)) {
3198  return gen_fn;
3199  }
3200  return NULL;
3201 }
3202 
3203 // Is anyone listening for :c_call and :c_return event currently?
3204 static bool
3205 c_method_tracing_currently_enabled(const jitstate_t *jit)
3206 {
3207  rb_event_flag_t tracing_events;
3208  if (rb_multi_ractor_p()) {
3209  tracing_events = ruby_vm_event_enabled_global_flags;
3210  }
3211  else {
3212  // At the time of writing, events are never removed from
3213  // ruby_vm_event_enabled_global_flags so always checking using it would
3214  // mean we don't compile even after tracing is disabled.
3215  tracing_events = rb_ec_ractor_hooks(jit->ec)->events;
3216  }
3217 
3218  return tracing_events & (RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN);
3219 }
3220 
3221 static codegen_status_t
3222 gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3223 {
3224  const rb_method_cfunc_t *cfunc = UNALIGNED_MEMBER_PTR(cme->def, body.cfunc);
3225 
3226  // If the function expects a Ruby array of arguments
3227  if (cfunc->argc < 0 && cfunc->argc != -1) {
3228  GEN_COUNTER_INC(cb, send_cfunc_ruby_array_varg);
3229  return YJIT_CANT_COMPILE;
3230  }
3231 
3232  // If the argument count doesn't match
3233  if (cfunc->argc >= 0 && cfunc->argc != argc) {
3234  GEN_COUNTER_INC(cb, send_cfunc_argc_mismatch);
3235  return YJIT_CANT_COMPILE;
3236  }
3237 
3238  // Don't JIT functions that need C stack arguments for now
3239  if (cfunc->argc >= 0 && argc + 1 > NUM_C_ARG_REGS) {
3240  GEN_COUNTER_INC(cb, send_cfunc_toomany_args);
3241  return YJIT_CANT_COMPILE;
3242  }
3243 
3244  if (c_method_tracing_currently_enabled(jit)) {
3245  // Don't JIT if tracing c_call or c_return
3246  GEN_COUNTER_INC(cb, send_cfunc_tracing);
3247  return YJIT_CANT_COMPILE;
3248  }
3249 
3250  // Delegate to codegen for C methods if we have it.
3251  {
3252  method_codegen_t known_cfunc_codegen;
3253  if ((known_cfunc_codegen = lookup_cfunc_codegen(cme->def))) {
3254  if (known_cfunc_codegen(jit, ctx, ci, cme, block, argc, recv_known_klass)) {
3255  // cfunc codegen generated code. Terminate the block so
3256  // there isn't multiple calls in the same block.
3257  jit_jump_to_next_insn(jit, ctx);
3258  return YJIT_END_BLOCK;
3259  }
3260  }
3261  }
3262 
3263  // Callee method ID
3264  //ID mid = vm_ci_mid(ci);
3265  //printf("JITting call to C function \"%s\", argc: %lu\n", rb_id2name(mid), argc);
3266  //print_str(cb, "");
3267  //print_str(cb, "calling CFUNC:");
3268  //print_str(cb, rb_id2name(mid));
3269  //print_str(cb, "recv");
3270  //print_ptr(cb, recv);
3271 
3272  // Create a side-exit to fall back to the interpreter
3273  uint8_t *side_exit = yjit_side_exit(jit, ctx);
3274 
3275  // Check for interrupts
3276  yjit_check_ints(cb, side_exit);
3277 
3278  // Stack overflow check
3279  // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
3280  // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
3281  lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + 2 * sizeof(rb_control_frame_t)));
3282  cmp(cb, REG_CFP, REG0);
3283  jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
3284 
3285  // Points to the receiver operand on the stack
3286  x86opnd_t recv = ctx_stack_opnd(ctx, argc);
3287 
3288  // Store incremented PC into current control frame in case callee raises.
3289  jit_save_pc(jit, REG0);
3290 
3291  if (block) {
3292  // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
3293  // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
3294  // with cfp->block_code.
3295  jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
3296  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
3297  }
3298 
3299  // Increment the stack pointer by 3 (in the callee)
3300  // sp += 3
3301  lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 3));
3302 
3303  // Write method entry at sp[-3]
3304  // sp[-3] = me;
3305  // Put compile time cme into REG1. It's assumed to be valid because we are notified when
3306  // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
3307  jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
3308  mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
3309 
3310  // Write block handler at sp[-2]
3311  // sp[-2] = block_handler;
3312  if (block) {
3313  // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
3314  lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
3315  or(cb, REG1, imm_opnd(1));
3316  mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
3317  }
3318  else {
3319  mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
3320  }
3321 
3322  // Write env flags at sp[-1]
3323  // sp[-1] = frame_type;
3324  uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
3325  mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
3326 
3327  // Allocate a new CFP (ec->cfp--)
3328  sub(
3329  cb,
3330  member_opnd(REG_EC, rb_execution_context_t, cfp),
3331  imm_opnd(sizeof(rb_control_frame_t))
3332  );
3333 
3334  // Setup the new frame
3335  // *cfp = (const struct rb_control_frame_struct) {
3336  // .pc = 0,
3337  // .sp = sp,
3338  // .iseq = 0,
3339  // .self = recv,
3340  // .ep = sp - 1,
3341  // .block_code = 0,
3342  // .__bp__ = sp,
3343  // };
3344  mov(cb, REG1, member_opnd(REG_EC, rb_execution_context_t, cfp));
3345  mov(cb, member_opnd(REG1, rb_control_frame_t, pc), imm_opnd(0));
3346  mov(cb, member_opnd(REG1, rb_control_frame_t, sp), REG0);
3347  mov(cb, member_opnd(REG1, rb_control_frame_t, iseq), imm_opnd(0));
3348  mov(cb, member_opnd(REG1, rb_control_frame_t, block_code), imm_opnd(0));
3349  mov(cb, member_opnd(REG1, rb_control_frame_t, __bp__), REG0);
3350  sub(cb, REG0, imm_opnd(sizeof(VALUE)));
3351  mov(cb, member_opnd(REG1, rb_control_frame_t, ep), REG0);
3352  mov(cb, REG0, recv);
3353  mov(cb, member_opnd(REG1, rb_control_frame_t, self), REG0);
3354 
3355  // Verify that we are calling the right function
3356  if (YJIT_CHECK_MODE > 0) {
3357  // Call check_cfunc_dispatch
3358  mov(cb, C_ARG_REGS[0], recv);
3359  jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], (VALUE)ci);
3360  mov(cb, C_ARG_REGS[2], const_ptr_opnd((void *)cfunc->func));
3361  jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)cme);
3362  call_ptr(cb, REG0, (void *)&check_cfunc_dispatch);
3363  }
3364 
3365  // Copy SP into RAX because REG_SP will get overwritten
3366  lea(cb, RAX, ctx_sp_opnd(ctx, 0));
3367 
3368  // Pop the C function arguments from the stack (in the caller)
3369  ctx_stack_pop(ctx, argc + 1);
3370 
3371  // Write interpreter SP into CFP.
3372  // Needed in case the callee yields to the block.
3373  jit_save_sp(jit, ctx);
3374 
3375  // Non-variadic method
3376  if (cfunc->argc >= 0) {
3377  // Copy the arguments from the stack to the C argument registers
3378  // self is the 0th argument and is at index argc from the stack top
3379  for (int32_t i = 0; i < argc + 1; ++i)
3380  {
3381  x86opnd_t stack_opnd = mem_opnd(64, RAX, -(argc + 1 - i) * SIZEOF_VALUE);
3382  x86opnd_t c_arg_reg = C_ARG_REGS[i];
3383  mov(cb, c_arg_reg, stack_opnd);
3384  }
3385  }
3386  // Variadic method
3387  if (cfunc->argc == -1) {
3388  // The method gets a pointer to the first argument
3389  // rb_f_puts(int argc, VALUE *argv, VALUE recv)
3390  mov(cb, C_ARG_REGS[0], imm_opnd(argc));
3391  lea(cb, C_ARG_REGS[1], mem_opnd(64, RAX, -(argc) * SIZEOF_VALUE));
3392  mov(cb, C_ARG_REGS[2], mem_opnd(64, RAX, -(argc + 1) * SIZEOF_VALUE));
3393  }
3394 
3395  // Call the C function
3396  // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]);
3397  // cfunc comes from compile-time cme->def, which we assume to be stable.
3398  // Invalidation logic is in rb_yjit_method_lookup_change()
3399  call_ptr(cb, REG0, (void*)cfunc->func);
3400 
3401  // Record code position for TracePoint patching. See full_cfunc_return().
3402  record_global_inval_patch(cb, outline_full_cfunc_return_pos);
3403 
3404  // Push the return value on the Ruby stack
3405  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3406  mov(cb, stack_ret, RAX);
3407 
3408  // Pop the stack frame (ec->cfp++)
3409  add(
3410  cb,
3411  member_opnd(REG_EC, rb_execution_context_t, cfp),
3412  imm_opnd(sizeof(rb_control_frame_t))
3413  );
3414 
3415  // cfunc calls may corrupt types
3416  ctx_clear_local_types(ctx);
3417 
3418  // Note: the return block of gen_send_iseq() has ctx->sp_offset == 1
3419  // which allows for sharing the same successor.
3420 
3421  // Jump (fall through) to the call continuation block
3422  // We do this to end the current block after the call
3423  jit_jump_to_next_insn(jit, ctx);
3424  return YJIT_END_BLOCK;
3425 }
3426 
3427 static void
3428 gen_return_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
3429 {
3430  switch (shape) {
3431  case SHAPE_NEXT0:
3432  case SHAPE_NEXT1:
3433  RUBY_ASSERT(false);
3434  break;
3435 
3436  case SHAPE_DEFAULT:
3437  mov(cb, REG0, const_ptr_opnd(target0));
3438  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
3439  break;
3440  }
3441 }
3442 
3443 // If true, the iseq is leaf and it can be replaced by a single C call.
3444 static bool
3445 rb_leaf_invokebuiltin_iseq_p(const rb_iseq_t *iseq)
3446 {
3447  unsigned int invokebuiltin_len = insn_len(BIN(opt_invokebuiltin_delegate_leave));
3448  unsigned int leave_len = insn_len(BIN(leave));
3449 
3450  return (iseq->body->iseq_size == (invokebuiltin_len + leave_len) &&
3451  rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[0]) == BIN(opt_invokebuiltin_delegate_leave) &&
3452  rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[invokebuiltin_len]) == BIN(leave) &&
3453  iseq->body->builtin_inline_p
3454  );
3455  }
3456 
3457 // Return an rb_builtin_function if the iseq contains only that leaf builtin function.
3458 static const struct rb_builtin_function*
3459 rb_leaf_builtin_function(const rb_iseq_t *iseq)
3460 {
3461  if (!rb_leaf_invokebuiltin_iseq_p(iseq))
3462  return NULL;
3463  return (const struct rb_builtin_function *)iseq->body->iseq_encoded[1];
3464 }
3465 
3466 static codegen_status_t
3467 gen_send_iseq(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, int32_t argc)
3468 {
3469  const rb_iseq_t *iseq = def_iseq_ptr(cme->def);
3470 
3471  // When you have keyword arguments, there is an extra object that gets
3472  // placed on the stack the represents a bitmap of the keywords that were not
3473  // specified at the call site. We need to keep track of the fact that this
3474  // value is present on the stack in order to properly set up the callee's
3475  // stack pointer.
3476  const bool doing_kw_call = iseq->body->param.flags.has_kw;
3477  const bool supplying_kws = vm_ci_flag(ci) & VM_CALL_KWARG;
3478 
3479  if (vm_ci_flag(ci) & VM_CALL_TAILCALL) {
3480  // We can't handle tailcalls
3481  GEN_COUNTER_INC(cb, send_iseq_tailcall);
3482  return YJIT_CANT_COMPILE;
3483  }
3484 
3485  // No support for callees with these parameters yet as they require allocation
3486  // or complex handling.
3487  if (iseq->body->param.flags.has_rest ||
3488  iseq->body->param.flags.has_post ||
3489  iseq->body->param.flags.has_kwrest) {
3490  GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3491  return YJIT_CANT_COMPILE;
3492  }
3493 
3494  // If we have keyword arguments being passed to a callee that only takes
3495  // positionals, then we need to allocate a hash. For now we're going to
3496  // call that too complex and bail.
3497  if (supplying_kws && !iseq->body->param.flags.has_kw) {
3498  GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3499  return YJIT_CANT_COMPILE;
3500  }
3501 
3502  // If we have a method accepting no kwargs (**nil), exit if we have passed
3503  // it any kwargs.
3504  if (supplying_kws && iseq->body->param.flags.accepts_no_kwarg) {
3505  GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3506  return YJIT_CANT_COMPILE;
3507  }
3508 
3509  // For computing number of locals to setup for the callee
3510  int num_params = iseq->body->param.size;
3511 
3512  // Block parameter handling. This mirrors setup_parameters_complex().
3513  if (iseq->body->param.flags.has_block) {
3514  if (iseq->body->local_iseq == iseq) {
3515  // Block argument is passed through EP and not setup as a local in
3516  // the callee.
3517  num_params--;
3518  }
3519  else {
3520  // In this case (param.flags.has_block && local_iseq != iseq),
3521  // the block argument is setup as a local variable and requires
3522  // materialization (allocation). Bail.
3523  GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3524  return YJIT_CANT_COMPILE;
3525  }
3526  }
3527 
3528  uint32_t start_pc_offset = 0;
3529 
3530  const int required_num = iseq->body->param.lead_num;
3531 
3532  // This struct represents the metadata about the caller-specified
3533  // keyword arguments.
3534  const struct rb_callinfo_kwarg *kw_arg = vm_ci_kwarg(ci);
3535  const int kw_arg_num = kw_arg ? kw_arg->keyword_len : 0;
3536 
3537  // Arity handling and optional parameter setup
3538  const int opts_filled = argc - required_num - kw_arg_num;
3539  const int opt_num = iseq->body->param.opt_num;
3540  const int opts_missing = opt_num - opts_filled;
3541 
3542  if (opts_filled < 0 || opts_filled > opt_num) {
3543  GEN_COUNTER_INC(cb, send_iseq_arity_error);
3544  return YJIT_CANT_COMPILE;
3545  }
3546 
3547  // If we have unfilled optional arguments and keyword arguments then we
3548  // would need to move adjust the arguments location to account for that.
3549  // For now we aren't handling this case.
3550  if (doing_kw_call && opts_missing > 0) {
3551  GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3552  return YJIT_CANT_COMPILE;
3553  }
3554 
3555  if (opt_num > 0) {
3556  num_params -= opt_num - opts_filled;
3557  start_pc_offset = (uint32_t)iseq->body->param.opt_table[opts_filled];
3558  }
3559 
3560  if (doing_kw_call) {
3561  // Here we're calling a method with keyword arguments and specifying
3562  // keyword arguments at this call site.
3563 
3564  // This struct represents the metadata about the callee-specified
3565  // keyword parameters.
3566  const struct rb_iseq_param_keyword *keyword = iseq->body->param.keyword;
3567 
3568  int required_kwargs_filled = 0;
3569 
3570  if (keyword->num > 30) {
3571  // We have so many keywords that (1 << num) encoded as a FIXNUM
3572  // (which shifts it left one more) no longer fits inside a 32-bit
3573  // immediate.
3574  GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3575  return YJIT_CANT_COMPILE;
3576  }
3577 
3578  // Check that the kwargs being passed are valid
3579  if (supplying_kws) {
3580  // This is the list of keyword arguments that the callee specified
3581  // in its initial declaration.
3582  const ID *callee_kwargs = keyword->table;
3583 
3584  // Here we're going to build up a list of the IDs that correspond to
3585  // the caller-specified keyword arguments. If they're not in the
3586  // same order as the order specified in the callee declaration, then
3587  // we're going to need to generate some code to swap values around
3588  // on the stack.
3589  ID *caller_kwargs = ALLOCA_N(VALUE, kw_arg->keyword_len);
3590  for (int kwarg_idx = 0; kwarg_idx < kw_arg->keyword_len; kwarg_idx++)
3591  caller_kwargs[kwarg_idx] = SYM2ID(kw_arg->keywords[kwarg_idx]);
3592 
3593  // First, we're going to be sure that the names of every
3594  // caller-specified keyword argument correspond to a name in the
3595  // list of callee-specified keyword parameters.
3596  for (int caller_idx = 0; caller_idx < kw_arg->keyword_len; caller_idx++) {
3597  int callee_idx;
3598 
3599  for (callee_idx = 0; callee_idx < keyword->num; callee_idx++) {
3600  if (caller_kwargs[caller_idx] == callee_kwargs[callee_idx]) {
3601  break;
3602  }
3603  }
3604 
3605  // If the keyword was never found, then we know we have a
3606  // mismatch in the names of the keyword arguments, so we need to
3607  // bail.
3608  if (callee_idx == keyword->num) {
3609  GEN_COUNTER_INC(cb, send_iseq_kwargs_mismatch);
3610  return YJIT_CANT_COMPILE;
3611  }
3612 
3613  // Keep a count to ensure all required kwargs are specified
3614  if (callee_idx < keyword->required_num) {
3615  required_kwargs_filled++;
3616  }
3617  }
3618  }
3619 
3620  RUBY_ASSERT(required_kwargs_filled <= keyword->required_num);
3621  if (required_kwargs_filled != keyword->required_num) {
3622  GEN_COUNTER_INC(cb, send_iseq_kwargs_mismatch);
3623  return YJIT_CANT_COMPILE;
3624  }
3625  }
3626 
3627  // Number of locals that are not parameters
3628  const int num_locals = iseq->body->local_table_size - num_params;
3629 
3630  // Create a side-exit to fall back to the interpreter
3631  uint8_t *side_exit = yjit_side_exit(jit, ctx);
3632 
3633  // Check for interrupts
3634  yjit_check_ints(cb, side_exit);
3635 
3636  const struct rb_builtin_function *leaf_builtin = rb_leaf_builtin_function(iseq);
3637 
3638  if (leaf_builtin && !block && leaf_builtin->argc + 1 <= NUM_C_ARG_REGS) {
3639  ADD_COMMENT(cb, "inlined leaf builtin");
3640 
3641  jit_prepare_routine_call(jit, ctx, REG0);
3642 
3643  // Call the builtin func (ec, recv, arg1, arg2, ...)
3644  mov(cb, C_ARG_REGS[0], REG_EC);
3645 
3646  // Copy self and arguments
3647  for (int32_t i = 0; i < leaf_builtin->argc + 1; i++) {
3648  x86opnd_t stack_opnd = ctx_stack_opnd(ctx, leaf_builtin->argc - i);
3649  x86opnd_t c_arg_reg = C_ARG_REGS[i + 1];
3650  mov(cb, c_arg_reg, stack_opnd);
3651  }
3652  ctx_stack_pop(ctx, leaf_builtin->argc + 1);
3653  call_ptr(cb, REG0, (void *)leaf_builtin->func_ptr);
3654 
3655  // Push the return value
3656  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3657  mov(cb, stack_ret, RAX);
3658 
3659  // Note: assuming that the leaf builtin doesn't change local variables here.
3660  // Seems like a safe assumption.
3661 
3662  return YJIT_KEEP_COMPILING;
3663  }
3664 
3665  // Stack overflow check
3666  // Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2.
3667  // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
3668  ADD_COMMENT(cb, "stack overflow check");
3669  lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (num_locals + iseq->body->stack_max) + 2 * sizeof(rb_control_frame_t)));
3670  cmp(cb, REG_CFP, REG0);
3671  jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
3672 
3673  if (doing_kw_call) {
3674  // Here we're calling a method with keyword arguments and specifying
3675  // keyword arguments at this call site.
3676 
3677  // Number of positional arguments the callee expects before the first
3678  // keyword argument
3679  const int args_before_kw = required_num + opt_num;
3680 
3681  // This struct represents the metadata about the caller-specified
3682  // keyword arguments.
3683  int caller_keyword_len = 0;
3684  const VALUE *caller_keywords = NULL;
3685  if (vm_ci_kwarg(ci)) {
3686  caller_keyword_len = vm_ci_kwarg(ci)->keyword_len;
3687  caller_keywords = &vm_ci_kwarg(ci)->keywords[0];
3688  }
3689 
3690  // This struct represents the metadata about the callee-specified
3691  // keyword parameters.
3692  const struct rb_iseq_param_keyword *const keyword = iseq->body->param.keyword;
3693 
3694  ADD_COMMENT(cb, "keyword args");
3695 
3696  // This is the list of keyword arguments that the callee specified
3697  // in its initial declaration.
3698  const ID *callee_kwargs = keyword->table;
3699 
3700  int total_kwargs = keyword->num;
3701 
3702  // Here we're going to build up a list of the IDs that correspond to
3703  // the caller-specified keyword arguments. If they're not in the
3704  // same order as the order specified in the callee declaration, then
3705  // we're going to need to generate some code to swap values around
3706  // on the stack.
3707  ID *caller_kwargs = ALLOCA_N(VALUE, total_kwargs);
3708  int kwarg_idx;
3709  for (kwarg_idx = 0; kwarg_idx < caller_keyword_len; kwarg_idx++) {
3710  caller_kwargs[kwarg_idx] = SYM2ID(caller_keywords[kwarg_idx]);
3711  }
3712 
3713  int unspecified_bits = 0;
3714 
3715  for (int callee_idx = keyword->required_num; callee_idx < total_kwargs; callee_idx++) {
3716  bool already_passed = false;
3717  ID callee_kwarg = callee_kwargs[callee_idx];
3718 
3719  for (int caller_idx = 0; caller_idx < caller_keyword_len; caller_idx++) {
3720  if (caller_kwargs[caller_idx] == callee_kwarg) {
3721  already_passed = true;
3722  break;
3723  }
3724  }
3725 
3726  if (!already_passed) {
3727  // Reserve space on the stack for each default value we'll be
3728  // filling in (which is done in the next loop). Also increments
3729  // argc so that the callee's SP is recorded correctly.
3730  argc++;
3731  x86opnd_t default_arg = ctx_stack_push(ctx, TYPE_UNKNOWN);
3732  VALUE default_value = keyword->default_values[callee_idx - keyword->required_num];
3733 
3734  if (default_value == Qundef) {
3735  // Qundef means that this value is not constant and must be
3736  // recalculated at runtime, so we record it in unspecified_bits
3737  // (Qnil is then used as a placeholder instead of Qundef).
3738  unspecified_bits |= 0x01 << (callee_idx - keyword->required_num);
3739  default_value = Qnil;
3740  }
3741 
3742  // GC might move default_value.
3743  jit_mov_gc_ptr(jit, cb, REG0, default_value);
3744  mov(cb, default_arg, REG0);
3745 
3746  caller_kwargs[kwarg_idx++] = callee_kwarg;
3747  }
3748  }
3749  RUBY_ASSERT(kwarg_idx == total_kwargs);
3750 
3751  // Next, we're going to loop through every keyword that was
3752  // specified by the caller and make sure that it's in the correct
3753  // place. If it's not we're going to swap it around with another one.
3754  for (kwarg_idx = 0; kwarg_idx < total_kwargs; kwarg_idx++) {
3755  ID callee_kwarg = callee_kwargs[kwarg_idx];
3756 
3757  // If the argument is already in the right order, then we don't
3758  // need to generate any code since the expected value is already
3759  // in the right place on the stack.
3760  if (callee_kwarg == caller_kwargs[kwarg_idx]) continue;
3761 
3762  // In this case the argument is not in the right place, so we
3763  // need to find its position where it _should_ be and swap with
3764  // that location.
3765  for (int swap_idx = kwarg_idx + 1; swap_idx < total_kwargs; swap_idx++) {
3766  if (callee_kwarg == caller_kwargs[swap_idx]) {
3767  // First we're going to generate the code that is going
3768  // to perform the actual swapping at runtime.
3769  stack_swap(ctx, cb, argc - 1 - swap_idx - args_before_kw, argc - 1 - kwarg_idx - args_before_kw, REG1, REG0);
3770 
3771  // Next we're going to do some bookkeeping on our end so
3772  // that we know the order that the arguments are
3773  // actually in now.
3774  ID tmp = caller_kwargs[kwarg_idx];
3775  caller_kwargs[kwarg_idx] = caller_kwargs[swap_idx];
3776  caller_kwargs[swap_idx] = tmp;
3777 
3778  break;
3779  }
3780  }
3781  }
3782 
3783  // Keyword arguments cause a special extra local variable to be
3784  // pushed onto the stack that represents the parameters that weren't
3785  // explicitly given a value and have a non-constant default.
3786  mov(cb, ctx_stack_opnd(ctx, -1), imm_opnd(INT2FIX(unspecified_bits)));
3787  }
3788  // Points to the receiver operand on the stack
3789  x86opnd_t recv = ctx_stack_opnd(ctx, argc);
3790 
3791  // Store the updated SP on the current frame (pop arguments and receiver)
3792  ADD_COMMENT(cb, "store caller sp");
3793  lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1)));
3794  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
3795 
3796  // Store the next PC in the current frame
3797  jit_save_pc(jit, REG0);
3798 
3799  if (block) {
3800  // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
3801  // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
3802  // with cfp->block_code.
3803  jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
3804  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
3805  }
3806 
3807  // Adjust the callee's stack pointer
3808  lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (3 + num_locals + doing_kw_call)));
3809 
3810  // Initialize local variables to Qnil
3811  for (int i = 0; i < num_locals; i++) {
3812  mov(cb, mem_opnd(64, REG0, sizeof(VALUE) * (i - num_locals - 3)), imm_opnd(Qnil));
3813  }
3814 
3815  ADD_COMMENT(cb, "push env");
3816  // Put compile time cme into REG1. It's assumed to be valid because we are notified when
3817  // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
3818  jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
3819  // Write method entry at sp[-3]
3820  // sp[-3] = me;
3821  mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
3822 
3823  // Write block handler at sp[-2]
3824  // sp[-2] = block_handler;
3825  if (block) {
3826  // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
3827  lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
3828  or(cb, REG1, imm_opnd(1));
3829  mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
3830  }
3831  else {
3832  mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
3833  }
3834 
3835  // Write env flags at sp[-1]
3836  // sp[-1] = frame_type;
3837  uint64_t frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
3838  mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
3839 
3840  ADD_COMMENT(cb, "push callee CFP");
3841  // Allocate a new CFP (ec->cfp--)
3842  sub(cb, REG_CFP, imm_opnd(sizeof(rb_control_frame_t)));
3843  mov(cb, member_opnd(REG_EC, rb_execution_context_t, cfp), REG_CFP);
3844 
3845  // Setup the new frame
3846  // *cfp = (const struct rb_control_frame_struct) {
3847  // .pc = pc,
3848  // .sp = sp,
3849  // .iseq = iseq,
3850  // .self = recv,
3851  // .ep = sp - 1,
3852  // .block_code = 0,
3853  // .__bp__ = sp,
3854  // };
3855  mov(cb, REG1, recv);
3856  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, self), REG1);
3857  mov(cb, REG_SP, REG0); // Switch to the callee's REG_SP
3858  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
3859  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, __bp__), REG0);
3860  sub(cb, REG0, imm_opnd(sizeof(VALUE)));
3861  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, ep), REG0);
3862  jit_mov_gc_ptr(jit, cb, REG0, (VALUE)iseq);
3863  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, iseq), REG0);
3864  mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), imm_opnd(0));
3865 
3866  // No need to set cfp->pc since the callee sets it whenever calling into routines
3867  // that could look at it through jit_save_pc().
3868  // mov(cb, REG0, const_ptr_opnd(start_pc));
3869  // mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0);
3870 
3871  // Stub so we can return to JITted code
3872  blockid_t return_block = { jit->iseq, jit_next_insn_idx(jit) };
3873 
3874  // Create a context for the callee
3875  ctx_t callee_ctx = DEFAULT_CTX;
3876 
3877  // Set the argument types in the callee's context
3878  for (int32_t arg_idx = 0; arg_idx < argc; ++arg_idx) {
3879  val_type_t arg_type = ctx_get_opnd_type(ctx, OPND_STACK(argc - arg_idx - 1));
3880  ctx_set_local_type(&callee_ctx, arg_idx, arg_type);
3881  }
3882  val_type_t recv_type = ctx_get_opnd_type(ctx, OPND_STACK(argc));
3883  ctx_upgrade_opnd_type(&callee_ctx, OPND_SELF, recv_type);
3884 
3885  // The callee might change locals through Kernel#binding and other means.
3886  ctx_clear_local_types(ctx);
3887 
3888  // Pop arguments and receiver in return context, push the return value
3889  // After the return, sp_offset will be 1. The codegen for leave writes
3890  // the return value in case of JIT-to-JIT return.
3891  ctx_t return_ctx = *ctx;
3892  ctx_stack_pop(&return_ctx, argc + 1);
3893  ctx_stack_push(&return_ctx, TYPE_UNKNOWN);
3894  return_ctx.sp_offset = 1;
3895  return_ctx.chain_depth = 0;
3896 
3897  // Write the JIT return address on the callee frame
3898  gen_branch(
3899  jit,
3900  ctx,
3901  return_block,
3902  &return_ctx,
3903  return_block,
3904  &return_ctx,
3905  gen_return_branch
3906  );
3907 
3908  //print_str(cb, "calling Ruby func:");
3909  //print_str(cb, rb_id2name(vm_ci_mid(ci)));
3910 
3911  // Directly jump to the entry point of the callee
3912  gen_direct_jump(
3913  jit,
3914  &callee_ctx,
3915  (blockid_t){ iseq, start_pc_offset }
3916  );
3917 
3918  return YJIT_END_BLOCK;
3919 }
3920 
3921 static codegen_status_t
3922 gen_struct_aref(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, VALUE comptime_recv, VALUE comptime_recv_klass) {
3923  if (vm_ci_argc(ci) != 0) {
3924  return YJIT_CANT_COMPILE;
3925  }
3926 
3927  const unsigned int off = cme->def->body.optimized.index;
3928 
3929  // Confidence checks
3930  RUBY_ASSERT_ALWAYS(RB_TYPE_P(comptime_recv, T_STRUCT));
3931  RUBY_ASSERT_ALWAYS((long)off < RSTRUCT_LEN(comptime_recv));
3932 
3933  // We are going to use an encoding that takes a 4-byte immediate which
3934  // limits the offset to INT32_MAX.
3935  {
3936  uint64_t native_off = (uint64_t)off * (uint64_t)SIZEOF_VALUE;
3937  if (native_off > (uint64_t)INT32_MAX) {
3938  return YJIT_CANT_COMPILE;
3939  }
3940  }
3941 
3942  // All structs from the same Struct class should have the same
3943  // length. So if our comptime_recv is embedded all runtime
3944  // structs of the same class should be as well, and the same is
3945  // true of the converse.
3946  bool embedded = FL_TEST_RAW(comptime_recv, RSTRUCT_EMBED_LEN_MASK);
3947 
3948  ADD_COMMENT(cb, "struct aref");
3949 
3950  x86opnd_t recv = ctx_stack_pop(ctx, 1);
3951 
3952  mov(cb, REG0, recv);
3953 
3954  if (embedded) {
3955  mov(cb, REG0, member_opnd_idx(REG0, struct RStruct, as.ary, off));
3956  }
3957  else {
3958  mov(cb, REG0, member_opnd(REG0, struct RStruct, as.heap.ptr));
3959  mov(cb, REG0, mem_opnd(64, REG0, SIZEOF_VALUE * off));
3960  }
3961 
3962  x86opnd_t ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3963  mov(cb, ret, REG0);
3964 
3965  jit_jump_to_next_insn(jit, ctx);
3966  return YJIT_END_BLOCK;
3967 }
3968 
3969 static codegen_status_t
3970 gen_struct_aset(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, VALUE comptime_recv, VALUE comptime_recv_klass) {
3971  if (vm_ci_argc(ci) != 1) {
3972  return YJIT_CANT_COMPILE;
3973  }
3974 
3975  const unsigned int off = cme->def->body.optimized.index;
3976 
3977  // Confidence checks
3978  RUBY_ASSERT_ALWAYS(RB_TYPE_P(comptime_recv, T_STRUCT));
3979  RUBY_ASSERT_ALWAYS((long)off < RSTRUCT_LEN(comptime_recv));
3980 
3981  ADD_COMMENT(cb, "struct aset");
3982 
3983  x86opnd_t val = ctx_stack_pop(ctx, 1);
3984  x86opnd_t recv = ctx_stack_pop(ctx, 1);
3985 
3986  mov(cb, C_ARG_REGS[0], recv);
3987  mov(cb, C_ARG_REGS[1], imm_opnd(off));
3988  mov(cb, C_ARG_REGS[2], val);
3989  call_ptr(cb, REG0, (void *)RSTRUCT_SET);
3990 
3991  x86opnd_t ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3992  mov(cb, ret, RAX);
3993 
3994  jit_jump_to_next_insn(jit, ctx);
3995  return YJIT_END_BLOCK;
3996 }
3997 
3999 rb_aliased_callable_method_entry(const rb_callable_method_entry_t *me);
4000 
4001 static codegen_status_t
4002 gen_send_general(jitstate_t *jit, ctx_t *ctx, struct rb_call_data *cd, rb_iseq_t *block)
4003 {
4004  // Relevant definitions:
4005  // rb_execution_context_t : vm_core.h
4006  // invoker, cfunc logic : method.h, vm_method.c
4007  // rb_callinfo : vm_callinfo.h
4008  // rb_callable_method_entry_t : method.h
4009  // vm_call_cfunc_with_frame : vm_insnhelper.c
4010  //
4011  // For a general overview for how the interpreter calls methods,
4012  // see vm_call_method().
4013 
4014  const struct rb_callinfo *ci = cd->ci; // info about the call site
4015 
4016  int32_t argc = (int32_t)vm_ci_argc(ci);
4017  ID mid = vm_ci_mid(ci);
4018 
4019  // Don't JIT calls with keyword splat
4020  if (vm_ci_flag(ci) & VM_CALL_KW_SPLAT) {
4021  GEN_COUNTER_INC(cb, send_kw_splat);
4022  return YJIT_CANT_COMPILE;
4023  }
4024 
4025  // Don't JIT calls that aren't simple
4026  // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
4027  if ((vm_ci_flag(ci) & VM_CALL_ARGS_SPLAT) != 0) {
4028  GEN_COUNTER_INC(cb, send_args_splat);
4029  return YJIT_CANT_COMPILE;
4030  }
4031  if ((vm_ci_flag(ci) & VM_CALL_ARGS_BLOCKARG) != 0) {
4032  GEN_COUNTER_INC(cb, send_block_arg);
4033  return YJIT_CANT_COMPILE;
4034  }
4035 
4036  // Defer compilation so we can specialize on class of receiver
4037  if (!jit_at_current_insn(jit)) {
4038  defer_compilation(jit, ctx);
4039  return YJIT_END_BLOCK;
4040  }
4041 
4042  VALUE comptime_recv = jit_peek_at_stack(jit, ctx, argc);
4043  VALUE comptime_recv_klass = CLASS_OF(comptime_recv);
4044 
4045  // Guard that the receiver has the same class as the one from compile time
4046  uint8_t *side_exit = yjit_side_exit(jit, ctx);
4047 
4048  // Points to the receiver operand on the stack
4049  x86opnd_t recv = ctx_stack_opnd(ctx, argc);
4050  insn_opnd_t recv_opnd = OPND_STACK(argc);
4051  mov(cb, REG0, recv);
4052  if (!jit_guard_known_klass(jit, ctx, comptime_recv_klass, recv_opnd, comptime_recv, SEND_MAX_DEPTH, side_exit)) {
4053  return YJIT_CANT_COMPILE;
4054  }
4055 
4056  // Do method lookup
4057  const rb_callable_method_entry_t *cme = rb_callable_method_entry(comptime_recv_klass, mid);
4058  if (!cme) {
4059  // TODO: counter
4060  return YJIT_CANT_COMPILE;
4061  }
4062 
4063  switch (METHOD_ENTRY_VISI(cme)) {
4064  case METHOD_VISI_PUBLIC:
4065  // Can always call public methods
4066  break;
4067  case METHOD_VISI_PRIVATE:
4068  if (!(vm_ci_flag(ci) & VM_CALL_FCALL)) {
4069  // Can only call private methods with FCALL callsites.
4070  // (at the moment they are callsites without a receiver or an explicit `self` receiver)
4071  return YJIT_CANT_COMPILE;
4072  }
4073  break;
4074  case METHOD_VISI_PROTECTED:
4075  jit_protected_callee_ancestry_guard(jit, cb, cme, side_exit);
4076  break;
4077  case METHOD_VISI_UNDEF:
4078  RUBY_ASSERT(false && "cmes should always have a visibility");
4079  break;
4080  }
4081 
4082  // Register block for invalidation
4083  RUBY_ASSERT(cme->called_id == mid);
4084  assume_method_lookup_stable(comptime_recv_klass, cme, jit);
4085 
4086  // To handle the aliased method case (VM_METHOD_TYPE_ALIAS)
4087  while (true) {
4088  // switch on the method type
4089  switch (cme->def->type) {
4090  case VM_METHOD_TYPE_ISEQ:
4091  return gen_send_iseq(jit, ctx, ci, cme, block, argc);
4092  case VM_METHOD_TYPE_CFUNC:
4093  if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4094  GEN_COUNTER_INC(cb, send_cfunc_kwargs);
4095  return YJIT_CANT_COMPILE;
4096  }
4097  return gen_send_cfunc(jit, ctx, ci, cme, block, argc, &comptime_recv_klass);
4098  case VM_METHOD_TYPE_IVAR:
4099  if (argc != 0) {
4100  // Argument count mismatch. Getters take no arguments.
4101  GEN_COUNTER_INC(cb, send_getter_arity);
4102  return YJIT_CANT_COMPILE;
4103  }
4104  if (c_method_tracing_currently_enabled(jit)) {
4105  // Can't generate code for firing c_call and c_return events
4106  // :attr-tracing:
4107  // Handling the C method tracing events for attr_accessor
4108  // methods is easier than regular C methods as we know the
4109  // "method" we are calling into never enables those tracing
4110  // events. Once global invalidation runs, the code for the
4111  // attr_accessor is invalidated and we exit at the closest
4112  // instruction boundary which is always outside of the body of
4113  // the attr_accessor code.
4114  GEN_COUNTER_INC(cb, send_cfunc_tracing);
4115  return YJIT_CANT_COMPILE;
4116  }
4117 
4118  mov(cb, REG0, recv);
4119 
4120  ID ivar_name = cme->def->body.attr.id;
4121  return gen_get_ivar(jit, ctx, SEND_MAX_DEPTH, comptime_recv, ivar_name, recv_opnd, side_exit);
4122  case VM_METHOD_TYPE_ATTRSET:
4123  if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4124  GEN_COUNTER_INC(cb, send_attrset_kwargs);
4125  return YJIT_CANT_COMPILE;
4126  }
4127  else if (argc != 1 || !RB_TYPE_P(comptime_recv, T_OBJECT)) {
4128  GEN_COUNTER_INC(cb, send_ivar_set_method);
4129  return YJIT_CANT_COMPILE;
4130  }
4131  else if (c_method_tracing_currently_enabled(jit)) {
4132  // Can't generate code for firing c_call and c_return events
4133  // See :attr-tracing:
4134  GEN_COUNTER_INC(cb, send_cfunc_tracing);
4135  return YJIT_CANT_COMPILE;
4136  }
4137  else {
4138  ID ivar_name = cme->def->body.attr.id;
4139  return gen_set_ivar(jit, ctx, comptime_recv, comptime_recv_klass, ivar_name);
4140  }
4141  // Block method, e.g. define_method(:foo) { :my_block }
4142  case VM_METHOD_TYPE_BMETHOD:
4143  GEN_COUNTER_INC(cb, send_bmethod);
4144  return YJIT_CANT_COMPILE;
4145  case VM_METHOD_TYPE_ZSUPER:
4146  GEN_COUNTER_INC(cb, send_zsuper_method);
4147  return YJIT_CANT_COMPILE;
4148  case VM_METHOD_TYPE_ALIAS: {
4149  // Retrieve the alised method and re-enter the switch
4150  cme = rb_aliased_callable_method_entry(cme);
4151  continue;
4152  }
4153  case VM_METHOD_TYPE_UNDEF:
4154  GEN_COUNTER_INC(cb, send_undef_method);
4155  return YJIT_CANT_COMPILE;
4156  case VM_METHOD_TYPE_NOTIMPLEMENTED:
4157  GEN_COUNTER_INC(cb, send_not_implemented_method);
4158  return YJIT_CANT_COMPILE;
4159  // Send family of methods, e.g. call/apply
4160  case VM_METHOD_TYPE_OPTIMIZED:
4161  switch (cme->def->body.optimized.type) {
4162  case OPTIMIZED_METHOD_TYPE_SEND:
4163  GEN_COUNTER_INC(cb, send_optimized_method_send);
4164  return YJIT_CANT_COMPILE;
4165  case OPTIMIZED_METHOD_TYPE_CALL:
4166  GEN_COUNTER_INC(cb, send_optimized_method_call);
4167  return YJIT_CANT_COMPILE;
4168  case OPTIMIZED_METHOD_TYPE_BLOCK_CALL:
4169  GEN_COUNTER_INC(cb, send_optimized_method_block_call);
4170  return YJIT_CANT_COMPILE;
4171  case OPTIMIZED_METHOD_TYPE_STRUCT_AREF:
4172  return gen_struct_aref(jit, ctx, ci, cme, comptime_recv, comptime_recv_klass);
4173  case OPTIMIZED_METHOD_TYPE_STRUCT_ASET:
4174  return gen_struct_aset(jit, ctx, ci, cme, comptime_recv, comptime_recv_klass);
4175  default:
4176  rb_bug("unknown optimized method type (%d)", cme->def->body.optimized.type);
4177  UNREACHABLE_RETURN(YJIT_CANT_COMPILE);
4178  }
4179  case VM_METHOD_TYPE_MISSING:
4180  GEN_COUNTER_INC(cb, send_missing_method);
4181  return YJIT_CANT_COMPILE;
4182  case VM_METHOD_TYPE_REFINED:
4183  GEN_COUNTER_INC(cb, send_refined_method);
4184  return YJIT_CANT_COMPILE;
4185  // no default case so compiler issues a warning if this is not exhaustive
4186  }
4187 
4188  // Unreachable
4189  RUBY_ASSERT(false);
4190  }
4191 }
4192 
4193 static codegen_status_t
4194 gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4195 {
4196  struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4197  return gen_send_general(jit, ctx, cd, NULL);
4198 }
4199 
4200 static codegen_status_t
4201 gen_send(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4202 {
4203  struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4204  rb_iseq_t *block = (rb_iseq_t *)jit_get_arg(jit, 1);
4205  return gen_send_general(jit, ctx, cd, block);
4206 }
4207 
4208 static codegen_status_t
4209 gen_invokesuper(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4210 {
4211  struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4212  rb_iseq_t *block = (rb_iseq_t *)jit_get_arg(jit, 1);
4213 
4214  // Defer compilation so we can specialize on class of receiver
4215  if (!jit_at_current_insn(jit)) {
4216  defer_compilation(jit, ctx);
4217  return YJIT_END_BLOCK;
4218  }
4219 
4220  const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(jit->ec->cfp);
4221  if (!me) {
4222  return YJIT_CANT_COMPILE;
4223  }
4224 
4225  // FIXME: We should track and invalidate this block when this cme is invalidated
4226  VALUE current_defined_class = me->defined_class;
4227  ID mid = me->def->original_id;
4228 
4229  if (me != rb_callable_method_entry(current_defined_class, me->called_id)) {
4230  // Though we likely could generate this call, as we are only concerned
4231  // with the method entry remaining valid, assume_method_lookup_stable
4232  // below requires that the method lookup matches as well
4233  return YJIT_CANT_COMPILE;
4234  }
4235 
4236  // vm_search_normal_superclass
4237  if (BUILTIN_TYPE(current_defined_class) == T_ICLASS && FL_TEST_RAW(RBASIC(current_defined_class)->klass, RMODULE_IS_REFINEMENT)) {
4238  return YJIT_CANT_COMPILE;
4239  }
4240  VALUE comptime_superclass = RCLASS_SUPER(RCLASS_ORIGIN(current_defined_class));
4241 
4242  const struct rb_callinfo *ci = cd->ci;
4243  int32_t argc = (int32_t)vm_ci_argc(ci);
4244 
4245  // Don't JIT calls that aren't simple
4246  // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
4247  if ((vm_ci_flag(ci) & VM_CALL_ARGS_SPLAT) != 0) {
4248  GEN_COUNTER_INC(cb, send_args_splat);
4249  return YJIT_CANT_COMPILE;
4250  }
4251  if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4252  GEN_COUNTER_INC(cb, send_keywords);
4253  return YJIT_CANT_COMPILE;
4254  }
4255  if ((vm_ci_flag(ci) & VM_CALL_KW_SPLAT) != 0) {
4256  GEN_COUNTER_INC(cb, send_kw_splat);
4257  return YJIT_CANT_COMPILE;
4258  }
4259  if ((vm_ci_flag(ci) & VM_CALL_ARGS_BLOCKARG) != 0) {
4260  GEN_COUNTER_INC(cb, send_block_arg);
4261  return YJIT_CANT_COMPILE;
4262  }
4263 
4264  // Ensure we haven't rebound this method onto an incompatible class.
4265  // In the interpreter we try to avoid making this check by performing some
4266  // cheaper calculations first, but since we specialize on the method entry
4267  // and so only have to do this once at compile time this is fine to always
4268  // check and side exit.
4269  VALUE comptime_recv = jit_peek_at_stack(jit, ctx, argc);
4270  if (!rb_obj_is_kind_of(comptime_recv, current_defined_class)) {
4271  return YJIT_CANT_COMPILE;
4272  }
4273 
4274  // Do method lookup
4275  const rb_callable_method_entry_t *cme = rb_callable_method_entry(comptime_superclass, mid);
4276 
4277  if (!cme) {
4278  return YJIT_CANT_COMPILE;
4279  }
4280 
4281  // Check that we'll be able to write this method dispatch before generating checks
4282  switch (cme->def->type) {
4283  case VM_METHOD_TYPE_ISEQ:
4284  case VM_METHOD_TYPE_CFUNC:
4285  break;
4286  default:
4287  // others unimplemented
4288  return YJIT_CANT_COMPILE;
4289  }
4290 
4291  // Guard that the receiver has the same class as the one from compile time
4292  uint8_t *side_exit = yjit_side_exit(jit, ctx);
4293 
4294  if (jit->ec->cfp->ep[VM_ENV_DATA_INDEX_ME_CREF] != (VALUE)me) {
4295  // This will be the case for super within a block
4296  return YJIT_CANT_COMPILE;
4297  }
4298 
4299  ADD_COMMENT(cb, "guard known me");
4300  mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, ep));
4301  x86opnd_t ep_me_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_ME_CREF);
4302  jit_mov_gc_ptr(jit, cb, REG1, (VALUE)me);
4303  cmp(cb, ep_me_opnd, REG1);
4304  jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_me_changed));
4305 
4306  if (!block) {
4307  // Guard no block passed
4308  // rb_vm_frame_block_handler(GET_EC()->cfp) == VM_BLOCK_HANDLER_NONE
4309  // note, we assume VM_ASSERT(VM_ENV_LOCAL_P(ep))
4310  //
4311  // TODO: this could properly forward the current block handler, but
4312  // would require changes to gen_send_*
4313  ADD_COMMENT(cb, "guard no block given");
4314  // EP is in REG0 from above
4315  x86opnd_t ep_specval_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL);
4316  cmp(cb, ep_specval_opnd, imm_opnd(VM_BLOCK_HANDLER_NONE));
4317  jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_block));
4318  }
4319 
4320  // Points to the receiver operand on the stack
4321  x86opnd_t recv = ctx_stack_opnd(ctx, argc);
4322  mov(cb, REG0, recv);
4323 
4324  // We need to assume that both our current method entry and the super
4325  // method entry we invoke remain stable
4326  assume_method_lookup_stable(current_defined_class, me, jit);
4327  assume_method_lookup_stable(comptime_superclass, cme, jit);
4328 
4329  // Method calls may corrupt types
4330  ctx_clear_local_types(ctx);
4331 
4332  switch (cme->def->type) {
4333  case VM_METHOD_TYPE_ISEQ:
4334  return gen_send_iseq(jit, ctx, ci, cme, block, argc);
4335  case VM_METHOD_TYPE_CFUNC:
4336  return gen_send_cfunc(jit, ctx, ci, cme, block, argc, NULL);
4337  default:
4338  break;
4339  }
4340 
4341  RUBY_ASSERT_ALWAYS(false);
4342 }
4343 
4344 static codegen_status_t
4345 gen_leave(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4346 {
4347  // Only the return value should be on the stack
4348  RUBY_ASSERT(ctx->stack_size == 1);
4349 
4350  // Create a side-exit to fall back to the interpreter
4351  uint8_t *side_exit = yjit_side_exit(jit, ctx);
4352 
4353  // Load environment pointer EP from CFP
4354  mov(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, ep));
4355 
4356  // Check for interrupts
4357  ADD_COMMENT(cb, "check for interrupts");
4358  yjit_check_ints(cb, COUNTED_EXIT(jit, side_exit, leave_se_interrupt));
4359 
4360  // Load the return value
4361  mov(cb, REG0, ctx_stack_pop(ctx, 1));
4362 
4363  // Pop the current frame (ec->cfp++)
4364  // Note: the return PC is already in the previous CFP
4365  add(cb, REG_CFP, imm_opnd(sizeof(rb_control_frame_t)));
4366  mov(cb, member_opnd(REG_EC, rb_execution_context_t, cfp), REG_CFP);
4367 
4368  // Reload REG_SP for the caller and write the return value.
4369  // Top of the stack is REG_SP[0] since the caller has sp_offset=1.
4370  mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
4371  mov(cb, mem_opnd(64, REG_SP, 0), REG0);
4372 
4373  // Jump to the JIT return address on the frame that was just popped
4374  const int32_t offset_to_jit_return = -((int32_t)sizeof(rb_control_frame_t)) + (int32_t)offsetof(rb_control_frame_t, jit_return);
4375  jmp_rm(cb, mem_opnd(64, REG_CFP, offset_to_jit_return));
4376 
4377  return YJIT_END_BLOCK;
4378 }
4379 
4380 RUBY_EXTERN rb_serial_t ruby_vm_global_constant_state;
4381 
4382 static codegen_status_t
4383 gen_getglobal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4384 {
4385  ID gid = jit_get_arg(jit, 0);
4386 
4387  // Save the PC and SP because we might make a Ruby call for warning
4388  jit_prepare_routine_call(jit, ctx, REG0);
4389 
4390  mov(cb, C_ARG_REGS[0], imm_opnd(gid));
4391 
4392  call_ptr(cb, REG0, (void *)&rb_gvar_get);
4393 
4394  x86opnd_t top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4395  mov(cb, top, RAX);
4396 
4397  return YJIT_KEEP_COMPILING;
4398 }
4399 
4400 static codegen_status_t
4401 gen_setglobal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4402 {
4403  ID gid = jit_get_arg(jit, 0);
4404 
4405  // Save the PC and SP because we might make a Ruby call for
4406  // Kernel#set_trace_var
4407  jit_prepare_routine_call(jit, ctx, REG0);
4408 
4409  mov(cb, C_ARG_REGS[0], imm_opnd(gid));
4410 
4411  x86opnd_t val = ctx_stack_pop(ctx, 1);
4412 
4413  mov(cb, C_ARG_REGS[1], val);
4414 
4415  call_ptr(cb, REG0, (void *)&rb_gvar_set);
4416 
4417  return YJIT_KEEP_COMPILING;
4418 }
4419 
4420 static codegen_status_t
4421 gen_anytostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4422 {
4423  // Save the PC and SP because we might make a Ruby call for
4424  // Kernel#set_trace_var
4425  jit_prepare_routine_call(jit, ctx, REG0);
4426 
4427  x86opnd_t str = ctx_stack_pop(ctx, 1);
4428  x86opnd_t val = ctx_stack_pop(ctx, 1);
4429 
4430  mov(cb, C_ARG_REGS[0], str);
4431  mov(cb, C_ARG_REGS[1], val);
4432 
4433  call_ptr(cb, REG0, (void *)&rb_obj_as_string_result);
4434 
4435  // Push the return value
4436  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
4437  mov(cb, stack_ret, RAX);
4438 
4439  return YJIT_KEEP_COMPILING;
4440 }
4441 
4442 static codegen_status_t
4443 gen_objtostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4444 {
4445  if (!jit_at_current_insn(jit)) {
4446  defer_compilation(jit, ctx);
4447  return YJIT_END_BLOCK;
4448  }
4449 
4450  x86opnd_t recv = ctx_stack_opnd(ctx, 0);
4451  VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 0);
4452 
4453  if (RB_TYPE_P(comptime_recv, T_STRING)) {
4454  uint8_t *side_exit = yjit_side_exit(jit, ctx);
4455 
4456  mov(cb, REG0, recv);
4457  jit_guard_known_klass(jit, ctx, CLASS_OF(comptime_recv), OPND_STACK(0), comptime_recv, SEND_MAX_DEPTH, side_exit);
4458  // No work needed. The string value is already on the top of the stack.
4459  return YJIT_KEEP_COMPILING;
4460  }
4461  else {
4462  struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4463  return gen_send_general(jit, ctx, cd, NULL);
4464  }
4465 }
4466 
4467 static codegen_status_t
4468 gen_toregexp(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4469 {
4470  rb_num_t opt = jit_get_arg(jit, 0);
4471  rb_num_t cnt = jit_get_arg(jit, 1);
4472 
4473  // Save the PC and SP because this allocates an object and could
4474  // raise an exception.
4475  jit_prepare_routine_call(jit, ctx, REG0);
4476 
4477  x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)cnt));
4478  ctx_stack_pop(ctx, cnt);
4479 
4480  mov(cb, C_ARG_REGS[0], imm_opnd(0));
4481  mov(cb, C_ARG_REGS[1], imm_opnd(cnt));
4482  lea(cb, C_ARG_REGS[2], values_ptr);
4483  call_ptr(cb, REG0, (void *)&rb_ary_tmp_new_from_values);
4484 
4485  // Save the array so we can clear it later
4486  push(cb, RAX);
4487  push(cb, RAX); // Alignment
4488  mov(cb, C_ARG_REGS[0], RAX);
4489  mov(cb, C_ARG_REGS[1], imm_opnd(opt));
4490  call_ptr(cb, REG0, (void *)&rb_reg_new_ary);
4491 
4492  // The actual regex is in RAX now. Pop the temp array from
4493  // rb_ary_tmp_new_from_values into C arg regs so we can clear it
4494  pop(cb, REG1); // Alignment
4495  pop(cb, C_ARG_REGS[0]);
4496 
4497  // The value we want to push on the stack is in RAX right now
4498  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4499  mov(cb, stack_ret, RAX);
4500 
4501  // Clear the temp array.
4502  call_ptr(cb, REG0, (void *)&rb_ary_clear);
4503 
4504  return YJIT_KEEP_COMPILING;
4505 }
4506 
4507 static codegen_status_t
4508 gen_intern(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4509 {
4510  // Save the PC and SP because we might allocate
4511  jit_prepare_routine_call(jit, ctx, REG0);
4512 
4513  x86opnd_t str = ctx_stack_pop(ctx, 1);
4514 
4515  mov(cb, C_ARG_REGS[0], str);
4516 
4517  call_ptr(cb, REG0, (void *)&rb_str_intern);
4518 
4519  // Push the return value
4520  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4521  mov(cb, stack_ret, RAX);
4522 
4523  return YJIT_KEEP_COMPILING;
4524 }
4525 
4526 static codegen_status_t
4527 gen_getspecial(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4528 {
4529  // This takes two arguments, key and type
4530  // key is only used when type == 0
4531  // A non-zero type determines which type of backref to fetch
4532  //rb_num_t key = jit_get_arg(jit, 0);
4533  rb_num_t type = jit_get_arg(jit, 1);
4534 
4535  if (type == 0) {
4536  // not yet implemented
4537  return YJIT_CANT_COMPILE;
4538  }
4539  else if (type & 0x01) {
4540  // Fetch a "special" backref based on a char encoded by shifting by 1
4541 
4542  // Can raise if matchdata uninitialized
4543  jit_prepare_routine_call(jit, ctx, REG0);
4544 
4545  // call rb_backref_get()
4546  ADD_COMMENT(cb, "rb_backref_get");
4547  call_ptr(cb, REG0, (void *)rb_backref_get);
4548  mov(cb, C_ARG_REGS[0], RAX);
4549 
4550  switch (type >> 1) {
4551  case '&':
4552  ADD_COMMENT(cb, "rb_reg_last_match");
4553  call_ptr(cb, REG0, (void *)rb_reg_last_match);
4554  break;
4555  case '`':
4556  ADD_COMMENT(cb, "rb_reg_match_pre");
4557  call_ptr(cb, REG0, (void *)rb_reg_match_pre);
4558  break;
4559  case '\'':
4560  ADD_COMMENT(cb, "rb_reg_match_post");
4561  call_ptr(cb, REG0, (void *)rb_reg_match_post);
4562  break;
4563  case '+':
4564  ADD_COMMENT(cb, "rb_reg_match_last");
4565  call_ptr(cb, REG0, (void *)rb_reg_match_last);
4566  break;
4567  default:
4568  rb_bug("invalid back-ref");
4569  }
4570 
4571  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4572  mov(cb, stack_ret, RAX);
4573 
4574  return YJIT_KEEP_COMPILING;
4575  }
4576  else {
4577  // Fetch the N-th match from the last backref based on type shifted by 1
4578 
4579  // Can raise if matchdata uninitialized
4580  jit_prepare_routine_call(jit, ctx, REG0);
4581 
4582  // call rb_backref_get()
4583  ADD_COMMENT(cb, "rb_backref_get");
4584  call_ptr(cb, REG0, (void *)rb_backref_get);
4585 
4586  // rb_reg_nth_match((int)(type >> 1), backref);
4587  ADD_COMMENT(cb, "rb_reg_nth_match");
4588  mov(cb, C_ARG_REGS[0], imm_opnd(type >> 1));
4589  mov(cb, C_ARG_REGS[1], RAX);
4590  call_ptr(cb, REG0, (void *)rb_reg_nth_match);
4591 
4592  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4593  mov(cb, stack_ret, RAX);
4594 
4595  return YJIT_KEEP_COMPILING;
4596  }
4597 }
4598 
4599 VALUE
4600 rb_vm_getclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, ICVARC ic);
4601 
4602 static codegen_status_t
4603 gen_getclassvariable(jitstate_t* jit, ctx_t* ctx, codeblock_t* cb)
4604 {
4605  // rb_vm_getclassvariable can raise exceptions.
4606  jit_prepare_routine_call(jit, ctx, REG0);
4607 
4608  mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, iseq));
4609  mov(cb, C_ARG_REGS[1], REG_CFP);
4610  mov(cb, C_ARG_REGS[2], imm_opnd(jit_get_arg(jit, 0)));
4611  mov(cb, C_ARG_REGS[3], imm_opnd(jit_get_arg(jit, 1)));
4612 
4613  call_ptr(cb, REG0, (void *)rb_vm_getclassvariable);
4614 
4615  x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4616  mov(cb, stack_top, RAX);
4617 
4618  return YJIT_KEEP_COMPILING;
4619 }
4620 
4621 VALUE
4622 rb_vm_setclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, VALUE val, ICVARC ic);
4623 
4624 static codegen_status_t
4625 gen_setclassvariable(jitstate_t* jit, ctx_t* ctx, codeblock_t* cb)
4626 {
4627  // rb_vm_setclassvariable can raise exceptions.
4628  jit_prepare_routine_call(jit, ctx, REG0);
4629 
4630  mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, iseq));
4631  mov(cb, C_ARG_REGS[1], REG_CFP);
4632  mov(cb, C_ARG_REGS[2], imm_opnd(jit_get_arg(jit, 0)));
4633  mov(cb, C_ARG_REGS[3], ctx_stack_pop(ctx, 1));
4634  mov(cb, C_ARG_REGS[4], imm_opnd(jit_get_arg(jit, 1)));
4635 
4636  call_ptr(cb, REG0, (void *)rb_vm_setclassvariable);
4637 
4638  return YJIT_KEEP_COMPILING;
4639 }
4640 
4641 static codegen_status_t
4642 gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4643 {
4644  VALUE jump_offset = jit_get_arg(jit, 0);
4645  VALUE const_cache_as_value = jit_get_arg(jit, 1);
4646  IC ic = (IC)const_cache_as_value;
4647 
4648  // See vm_ic_hit_p(). The same conditions are checked in yjit_constant_ic_update().
4649  struct iseq_inline_constant_cache_entry *ice = ic->entry;
4650  if (!ice || // cache not filled
4651  GET_IC_SERIAL(ice) != ruby_vm_global_constant_state /* cache out of date */) {
4652  // In these cases, leave a block that unconditionally side exits
4653  // for the interpreter to invalidate.
4654  return YJIT_CANT_COMPILE;
4655  }
4656 
4657  // Make sure there is an exit for this block as the interpreter might want
4658  // to invalidate this block from yjit_constant_ic_update().
4659  jit_ensure_block_entry_exit(jit);
4660 
4661  if (ice->ic_cref) {
4662  // Cache is keyed on a certain lexical scope. Use the interpreter's cache.
4663  uint8_t *side_exit = yjit_side_exit(jit, ctx);
4664 
4665  // Call function to verify the cache. It doesn't allocate or call methods.
4666  bool rb_vm_ic_hit_p(IC ic, const VALUE *reg_ep);
4667  mov(cb, C_ARG_REGS[0], const_ptr_opnd((void *)ic));
4668  mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, ep));
4669  call_ptr(cb, REG0, (void *)rb_vm_ic_hit_p);
4670 
4671  // Check the result. _Bool is one byte in SysV.
4672  test(cb, AL, AL);
4673  jz_ptr(cb, COUNTED_EXIT(jit, side_exit, opt_getinlinecache_miss));
4674 
4675  // Push ic->entry->value
4676  mov(cb, REG0, const_ptr_opnd((void *)ic));
4677  mov(cb, REG0, member_opnd(REG0, struct iseq_inline_constant_cache, entry));
4678  x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4679  mov(cb, REG0, member_opnd(REG0, struct iseq_inline_constant_cache_entry, value));
4680  mov(cb, stack_top, REG0);
4681  }
4682  else {
4683  // Optimize for single ractor mode.
4684  // FIXME: This leaks when st_insert raises NoMemoryError
4685  if (!assume_single_ractor_mode(jit)) return YJIT_CANT_COMPILE;
4686 
4687  // Invalidate output code on any and all constant writes
4688  // FIXME: This leaks when st_insert raises NoMemoryError
4689  assume_stable_global_constant_state(jit);
4690 
4691  jit_putobject(jit, ctx, ice->value);
4692  }
4693 
4694  // Jump over the code for filling the cache
4695  uint32_t jump_idx = jit_next_insn_idx(jit) + (int32_t)jump_offset;
4696  gen_direct_jump(
4697  jit,
4698  ctx,
4699  (blockid_t){ .iseq = jit->iseq, .idx = jump_idx }
4700  );
4701 
4702  return YJIT_END_BLOCK;
4703 }
4704 
4705 // Push the explicit block parameter onto the temporary stack. Part of the
4706 // interpreter's scheme for avoiding Proc allocations when delegating
4707 // explicit block parameters.
4708 static codegen_status_t
4709 gen_getblockparamproxy(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4710 {
4711  // A mirror of the interpreter code. Checking for the case
4712  // where it's pushing rb_block_param_proxy.
4713  uint8_t *side_exit = yjit_side_exit(jit, ctx);
4714 
4715  // EP level
4716  uint32_t level = (uint32_t)jit_get_arg(jit, 1);
4717 
4718  // Load environment pointer EP from CFP
4719  gen_get_ep(cb, REG0, level);
4720 
4721  // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero
4722  test(cb, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_FLAGS), imm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM));
4723  jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_param_modified));
4724 
4725  // Load the block handler for the current frame
4726  // note, VM_ASSERT(VM_ENV_LOCAL_P(ep))
4727  mov(cb, REG0, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL));
4728 
4729  // Block handler is a tagged pointer. Look at the tag. 0x03 is from VM_BH_ISEQ_BLOCK_P().
4730  and(cb, REG0_8, imm_opnd(0x3));
4731 
4732  // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null.
4733  cmp(cb, REG0_8, imm_opnd(0x1));
4734  jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_handler_not_iseq));
4735 
4736  // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr.
4737  mov(cb, REG0, const_ptr_opnd((void *)rb_block_param_proxy));
4738  RUBY_ASSERT(!SPECIAL_CONST_P(rb_block_param_proxy));
4739  x86opnd_t top = ctx_stack_push(ctx, TYPE_HEAP);
4740  mov(cb, top, REG0);
4741 
4742  return YJIT_KEEP_COMPILING;
4743 }
4744 
4745 static codegen_status_t
4746 gen_invokebuiltin(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4747 {
4748  const struct rb_builtin_function *bf = (struct rb_builtin_function *)jit_get_arg(jit, 0);
4749 
4750  // ec, self, and arguments
4751  if (bf->argc + 2 > NUM_C_ARG_REGS) {
4752  return YJIT_CANT_COMPILE;
4753  }
4754 
4755  // If the calls don't allocate, do they need up to date PC, SP?
4756  jit_prepare_routine_call(jit, ctx, REG0);
4757 
4758  // Call the builtin func (ec, recv, arg1, arg2, ...)
4759  mov(cb, C_ARG_REGS[0], REG_EC);
4760  mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
4761 
4762  // Copy arguments from locals
4763  for (int32_t i = 0; i < bf->argc; i++) {
4764  x86opnd_t stack_opnd = ctx_stack_opnd(ctx, bf->argc - i - 1);
4765  x86opnd_t c_arg_reg = C_ARG_REGS[2 + i];
4766  mov(cb, c_arg_reg, stack_opnd);
4767  }
4768 
4769  call_ptr(cb, REG0, (void *)bf->func_ptr);
4770 
4771  // Push the return value
4772  ctx_stack_pop(ctx, bf->argc);
4773  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4774  mov(cb, stack_ret, RAX);
4775 
4776  return YJIT_KEEP_COMPILING;
4777 }
4778 
4779 // opt_invokebuiltin_delegate calls a builtin function, like
4780 // invokebuiltin does, but instead of taking arguments from the top of the
4781 // stack uses the argument locals (and self) from the current method.
4782 static codegen_status_t
4783 gen_opt_invokebuiltin_delegate(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4784 {
4785  const struct rb_builtin_function *bf = (struct rb_builtin_function *)jit_get_arg(jit, 0);
4786  int32_t start_index = (int32_t)jit_get_arg(jit, 1);
4787 
4788  // ec, self, and arguments
4789  if (bf->argc + 2 > NUM_C_ARG_REGS) {
4790  return YJIT_CANT_COMPILE;
4791  }
4792 
4793  // If the calls don't allocate, do they need up to date PC, SP?
4794  jit_prepare_routine_call(jit, ctx, REG0);
4795 
4796  if (bf->argc > 0) {
4797  // Load environment pointer EP from CFP
4798  mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, ep));
4799  }
4800 
4801  // Call the builtin func (ec, recv, arg1, arg2, ...)
4802  mov(cb, C_ARG_REGS[0], REG_EC);
4803  mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
4804 
4805  // Copy arguments from locals
4806  for (int32_t i = 0; i < bf->argc; i++) {
4807  const int32_t offs = -jit->iseq->body->local_table_size - VM_ENV_DATA_SIZE + 1 + start_index + i;
4808  x86opnd_t local_opnd = mem_opnd(64, REG0, offs * SIZEOF_VALUE);
4809  x86opnd_t c_arg_reg = C_ARG_REGS[i + 2];
4810  mov(cb, c_arg_reg, local_opnd);
4811  }
4812  call_ptr(cb, REG0, (void *)bf->func_ptr);
4813 
4814  // Push the return value
4815  x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4816  mov(cb, stack_ret, RAX);
4817 
4818  return YJIT_KEEP_COMPILING;
4819 }
4820 
4821 static int tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data);
4822 static void invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq);
4823 
4824 // Invalidate all generated code and patch C method return code to contain
4825 // logic for firing the c_return TracePoint event. Once rb_vm_barrier()
4826 // returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which
4827 // means they are inside a C routine. If there are any generated code on-stack,
4828 // they are waiting for a return from a C routine. For every routine call, we
4829 // patch in an exit after the body of the containing VM instruction. This makes
4830 // it so all the invalidated code exit as soon as execution logically reaches
4831 // the next VM instruction. The interpreter takes care of firing the tracing
4832 // event if it so happens that the next VM instruction has one attached.
4833 //
4834 // The c_return event needs special handling as our codegen never outputs code
4835 // that contains tracing logic. If we let the normal output code run until the
4836 // start of the next VM instruction by relying on the patching scheme above, we
4837 // would fail to fire the c_return event. The interpreter doesn't fire the
4838 // event at an instruction boundary, so simply exiting to the interpreter isn't
4839 // enough. To handle it, we patch in the full logic at the return address. See
4840 // full_cfunc_return().
4841 //
4842 // In addition to patching, we prevent future entries into invalidated code by
4843 // removing all live blocks from their iseq.
4844 void
4845 rb_yjit_tracing_invalidate_all(void)
4846 {
4847  if (!rb_yjit_enabled_p()) return;
4848 
4849  // Stop other ractors since we are going to patch machine code.
4850  RB_VM_LOCK_ENTER();
4851  rb_vm_barrier();
4852 
4853  // Make it so all live block versions are no longer valid branch targets
4854  rb_objspace_each_objects(tracing_invalidate_all_i, NULL);
4855 
4856  // Apply patches
4857  const uint32_t old_pos = cb->write_pos;
4858  rb_darray_for(global_inval_patches, patch_idx) {
4859  struct codepage_patch patch = rb_darray_get(global_inval_patches, patch_idx);
4860  cb_set_pos(cb, patch.inline_patch_pos);
4861  uint8_t *jump_target = cb_get_ptr(ocb, patch.outlined_target_pos);
4862  jmp_ptr(cb, jump_target);
4863  }
4864  cb_set_pos(cb, old_pos);
4865 
4866  // Freeze invalidated part of the codepage. We only want to wait for
4867  // running instances of the code to exit from now on, so we shouldn't
4868  // change the code. There could be other ractors sleeping in
4869  // branch_stub_hit(), for example. We could harden this by changing memory
4870  // protection on the frozen range.
4871  RUBY_ASSERT_ALWAYS(yjit_codepage_frozen_bytes <= old_pos && "frozen bytes should increase monotonically");
4872  yjit_codepage_frozen_bytes = old_pos;
4873 
4874  cb_mark_all_executable(ocb);
4875  cb_mark_all_executable(cb);
4876  RB_VM_LOCK_LEAVE();
4877 }
4878 
4879 static int
4880 tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data)
4881 {
4882  VALUE v = (VALUE)vstart;
4883  for (; v != (VALUE)vend; v += stride) {
4884  void *ptr = asan_poisoned_object_p(v);
4885  asan_unpoison_object(v, false);
4886 
4887  if (rb_obj_is_iseq(v)) {
4888  rb_iseq_t *iseq = (rb_iseq_t *)v;
4889  invalidate_all_blocks_for_tracing(iseq);
4890  }
4891 
4892  asan_poison_object_if(ptr, v);
4893  }
4894  return 0;
4895 }
4896 
4897 static void
4898 invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq)
4899 {
4900  struct rb_iseq_constant_body *body = iseq->body;
4901  if (!body) return; // iseq yet to be initialized
4902 
4903  ASSERT_vm_locking();
4904 
4905  // Empty all blocks on the iseq so we don't compile new blocks that jump to the
4906  // invalidted region.
4907  // TODO Leaking the blocks for now since we might have situations where
4908  // a different ractor is waiting in branch_stub_hit(). If we free the block
4909  // that ractor can wake up with a dangling block.
4910  rb_darray_for(body->yjit_blocks, version_array_idx) {
4911  rb_yjit_block_array_t version_array = rb_darray_get(body->yjit_blocks, version_array_idx);
4912  rb_darray_for(version_array, version_idx) {
4913  // Stop listening for invalidation events like basic operation redefinition.
4914  block_t *block = rb_darray_get(version_array, version_idx);
4915  yjit_unlink_method_lookup_dependency(block);
4916  yjit_block_assumptions_free(block);
4917  }
4918  rb_darray_free(version_array);
4919  }
4920  rb_darray_free(body->yjit_blocks);
4921  body->yjit_blocks = NULL;
4922 
4923 #if USE_MJIT
4924  // Reset output code entry point
4925  body->jit_func = NULL;
4926 #endif
4927 }
4928 
4929 static void
4930 yjit_reg_op(int opcode, codegen_fn gen_fn)
4931 {
4932  RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE);
4933  // Check that the op wasn't previously registered
4934  RUBY_ASSERT(gen_fns[opcode] == NULL);
4935 
4936  gen_fns[opcode] = gen_fn;
4937 }
4938 
4939 void
4940 yjit_init_codegen(void)
4941 {
4942  // Initialize the code blocks
4943  uint32_t mem_size = rb_yjit_opts.exec_mem_size * 1024 * 1024;
4944  uint8_t *mem_block = alloc_exec_mem(mem_size);
4945 
4946  cb = &block;
4947  cb_init(cb, mem_block, mem_size/2);
4948 
4949  ocb = &outline_block;
4950  cb_init(ocb, mem_block + mem_size/2, mem_size/2);
4951 
4952  // Generate the interpreter exit code for leave
4953  leave_exit_code = yjit_gen_leave_exit(cb);
4954 
4955  // Generate full exit code for C func
4956  gen_full_cfunc_return();
4957  cb_mark_all_executable(cb);
4958 
4959  // Map YARV opcodes to the corresponding codegen functions
4960  yjit_reg_op(BIN(nop), gen_nop);
4961  yjit_reg_op(BIN(dup), gen_dup);
4962  yjit_reg_op(BIN(dupn), gen_dupn);
4963  yjit_reg_op(BIN(swap), gen_swap);
4964  yjit_reg_op(BIN(setn), gen_setn);
4965  yjit_reg_op(BIN(topn), gen_topn);
4966  yjit_reg_op(BIN(pop), gen_pop);
4967  yjit_reg_op(BIN(adjuststack), gen_adjuststack);
4968  yjit_reg_op(BIN(newarray), gen_newarray);
4969  yjit_reg_op(BIN(duparray), gen_duparray);
4970  yjit_reg_op(BIN(duphash), gen_duphash);
4971  yjit_reg_op(BIN(splatarray), gen_splatarray);
4972  yjit_reg_op(BIN(expandarray), gen_expandarray);
4973  yjit_reg_op(BIN(newhash), gen_newhash);
4974  yjit_reg_op(BIN(newrange), gen_newrange);
4975  yjit_reg_op(BIN(concatstrings), gen_concatstrings);
4976  yjit_reg_op(BIN(putnil), gen_putnil);
4977  yjit_reg_op(BIN(putobject), gen_putobject);
4978  yjit_reg_op(BIN(putstring), gen_putstring);
4979  yjit_reg_op(BIN(putobject_INT2FIX_0_), gen_putobject_int2fix);
4980  yjit_reg_op(BIN(putobject_INT2FIX_1_), gen_putobject_int2fix);
4981  yjit_reg_op(BIN(putself), gen_putself);
4982  yjit_reg_op(BIN(putspecialobject), gen_putspecialobject);
4983  yjit_reg_op(BIN(getlocal), gen_getlocal);
4984  yjit_reg_op(BIN(getlocal_WC_0), gen_getlocal_wc0);
4985  yjit_reg_op(BIN(getlocal_WC_1), gen_getlocal_wc1);
4986  yjit_reg_op(BIN(setlocal), gen_setlocal);
4987  yjit_reg_op(BIN(setlocal_WC_0), gen_setlocal_wc0);
4988  yjit_reg_op(BIN(setlocal_WC_1), gen_setlocal_wc1);
4989  yjit_reg_op(BIN(getinstancevariable), gen_getinstancevariable);
4990  yjit_reg_op(BIN(setinstancevariable), gen_setinstancevariable);
4991  yjit_reg_op(BIN(defined), gen_defined);
4992  yjit_reg_op(BIN(checktype), gen_checktype);
4993  yjit_reg_op(BIN(checkkeyword), gen_checkkeyword);
4994  yjit_reg_op(BIN(opt_lt), gen_opt_lt);
4995  yjit_reg_op(BIN(opt_le), gen_opt_le);
4996  yjit_reg_op(BIN(opt_ge), gen_opt_ge);
4997  yjit_reg_op(BIN(opt_gt), gen_opt_gt);
4998  yjit_reg_op(BIN(opt_eq), gen_opt_eq);
4999  yjit_reg_op(BIN(opt_neq), gen_opt_neq);
5000  yjit_reg_op(BIN(opt_aref), gen_opt_aref);
5001  yjit_reg_op(BIN(opt_aset), gen_opt_aset);
5002  yjit_reg_op(BIN(opt_and), gen_opt_and);
5003  yjit_reg_op(BIN(opt_or), gen_opt_or);
5004  yjit_reg_op(BIN(opt_minus), gen_opt_minus);
5005  yjit_reg_op(BIN(opt_plus), gen_opt_plus);
5006  yjit_reg_op(BIN(opt_mult), gen_opt_mult);
5007  yjit_reg_op(BIN(opt_div), gen_opt_div);
5008  yjit_reg_op(BIN(opt_mod), gen_opt_mod);
5009  yjit_reg_op(BIN(opt_ltlt), gen_opt_ltlt);
5010  yjit_reg_op(BIN(opt_nil_p), gen_opt_nil_p);
5011  yjit_reg_op(BIN(opt_empty_p), gen_opt_empty_p);
5012  yjit_reg_op(BIN(opt_str_freeze), gen_opt_str_freeze);
5013  yjit_reg_op(BIN(opt_str_uminus), gen_opt_str_uminus);
5014  yjit_reg_op(BIN(opt_not), gen_opt_not);
5015  yjit_reg_op(BIN(opt_size), gen_opt_size);
5016  yjit_reg_op(BIN(opt_length), gen_opt_length);
5017  yjit_reg_op(BIN(opt_regexpmatch2), gen_opt_regexpmatch2);
5018  yjit_reg_op(BIN(opt_getinlinecache), gen_opt_getinlinecache);
5019  yjit_reg_op(BIN(invokebuiltin), gen_invokebuiltin);
5020  yjit_reg_op(BIN(opt_invokebuiltin_delegate), gen_opt_invokebuiltin_delegate);
5021  yjit_reg_op(BIN(opt_invokebuiltin_delegate_leave), gen_opt_invokebuiltin_delegate);
5022  yjit_reg_op(BIN(opt_case_dispatch), gen_opt_case_dispatch);
5023  yjit_reg_op(BIN(branchif), gen_branchif);
5024  yjit_reg_op(BIN(branchunless), gen_branchunless);
5025  yjit_reg_op(BIN(branchnil), gen_branchnil);
5026  yjit_reg_op(BIN(jump), gen_jump);
5027  yjit_reg_op(BIN(getblockparamproxy), gen_getblockparamproxy);
5028  yjit_reg_op(BIN(opt_send_without_block), gen_opt_send_without_block);
5029  yjit_reg_op(BIN(send), gen_send);
5030  yjit_reg_op(BIN(invokesuper), gen_invokesuper);
5031  yjit_reg_op(BIN(leave), gen_leave);
5032  yjit_reg_op(BIN(getglobal), gen_getglobal);
5033  yjit_reg_op(BIN(setglobal), gen_setglobal);
5034  yjit_reg_op(BIN(anytostring), gen_anytostring);
5035  yjit_reg_op(BIN(objtostring), gen_objtostring);
5036  yjit_reg_op(BIN(toregexp), gen_toregexp);
5037  yjit_reg_op(BIN(intern), gen_intern);
5038  yjit_reg_op(BIN(getspecial), gen_getspecial);
5039  yjit_reg_op(BIN(getclassvariable), gen_getclassvariable);
5040  yjit_reg_op(BIN(setclassvariable), gen_setclassvariable);
5041 
5042  yjit_method_codegen_table = st_init_numtable();
5043 
5044  // Specialization for C methods. See yjit_reg_method() for details.
5045  yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not);
5046 
5047  yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true);
5048  yjit_reg_method(rb_mKernel, "nil?", jit_rb_false);
5049 
5050  yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal);
5051  yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal);
5052  yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal);
5053  yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal);
5054  yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal);
5055  yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal);
5056 
5057  // rb_str_to_s() methods in string.c
5058  yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s);
5059  yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s);
5060  yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize);
5061 
5062  // Thread.current
5063  yjit_reg_method(rb_singleton_class(rb_cThread), "current", jit_thread_s_current);
5064 }
#define RUBY_ASSERT(expr)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition: assert.h:177
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
Definition: assert.h:167
#define RUBY_EXTERN
Declaration of externally visible global variables.
Definition: dllexport.h:47
#define RUBY_EVENT_C_CALL
A method, written in C, is called.
Definition: event.h:39
#define RUBY_EVENT_C_RETURN
Return from a method, written in C.
Definition: event.h:40
uint32_t rb_event_flag_t
Represents event(s).
Definition: event.h:103
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implenentation detail of RB_FL_TEST().
Definition: fl_type.h:507
VALUE rb_singleton_class(VALUE obj)
Finds or creates the singleton class of the passed object.
Definition: class.c:2068
#define FL_SINGLETON
Old name of RUBY_FL_SINGLETON.
Definition: fl_type.h:58
#define T_STRING
Old name of RUBY_T_STRING.
Definition: value_type.h:78
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
Definition: long.h:48
#define SPECIAL_CONST_P
Old name of RB_SPECIAL_CONST_P.
#define T_STRUCT
Old name of RUBY_T_STRUCT.
Definition: value_type.h:79
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
Definition: assume.h:31
#define SYM2ID
Old name of RB_SYM2ID.
Definition: symbol.h:45
#define CLASS_OF
Old name of rb_class_of.
Definition: globals.h:203
#define STATIC_SYM_P
Old name of RB_STATIC_SYM_P.
#define T_ICLASS
Old name of RUBY_T_ICLASS.
Definition: value_type.h:66
#define T_HASH
Old name of RUBY_T_HASH.
Definition: value_type.h:65
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
Definition: fl_type.h:140
#define LONG2NUM
Old name of RB_LONG2NUM.
Definition: long.h:50
#define FLONUM_P
Old name of RB_FLONUM_P.
#define Qtrue
Old name of RUBY_Qtrue.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
Definition: value_type.h:56
#define T_OBJECT
Old name of RUBY_T_OBJECT.
Definition: value_type.h:75
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
Definition: value_type.h:85
#define FL_TEST
Old name of RB_FL_TEST.
Definition: fl_type.h:139
#define FIXNUM_P
Old name of RB_FIXNUM_P.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition: error.c:802
VALUE rb_mKernel
Kernel module.
Definition: object.c:49
VALUE rb_cArray
Array class.
Definition: array.c:40
VALUE rb_cInteger
Module class.
Definition: numeric.c:192
VALUE rb_cNilClass
NilClass class.
Definition: object.c:55
VALUE rb_cHash
Hash class.
Definition: hash.c:92
VALUE rb_cFalseClass
FalseClass class.
Definition: object.c:57
VALUE rb_cSymbol
Sumbol class.
Definition: string.c:81
VALUE rb_cBasicObject
BasicObject class.
Definition: object.c:48
VALUE rb_cThread
Thread class.
Definition: vm.c:397
VALUE rb_cModule
Module class.
Definition: object.c:51
VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass)
Queries if the given object is an instance (of possibly descendants) of the given class.
Definition: object.c:731
VALUE rb_cFloat
Float class.
Definition: numeric.c:191
VALUE rb_cTrueClass
TrueClass class.
Definition: object.c:56
VALUE rb_cString
String class.
Definition: string.c:80
VALUE rb_ary_resurrect(VALUE ary)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
Definition: array.c:2676
VALUE rb_ary_clear(VALUE ary)
Destructively removes everything form an array.
Definition: array.c:4465
void rb_ary_store(VALUE ary, long key, VALUE val)
Destructively stores the passed value to the passed array's passed index.
Definition: array.c:1148
void rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash)
Inserts a list of key-value pairs into a hash table at once.
Definition: hash.c:4753
VALUE rb_hash_aref(VALUE hash, VALUE key)
Queries the given key in the given hash table.
Definition: hash.c:2082
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Inserts or replaces ("upsert"s) the objects into the given hash table.
Definition: hash.c:2903
VALUE rb_hash_new(void)
Creates a new, empty hash object.
Definition: hash.c:1529
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
Definition: vm.c:1580
VALUE rb_range_new(VALUE beg, VALUE end, int excl)
Creates a new Range.
Definition: range.c:67
VALUE rb_reg_last_match(VALUE md)
This just returns the argument, stringified.
Definition: re.c:1818
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
Definition: re.c:1793
VALUE rb_reg_match_post(VALUE md)
The portion of the original string after the given match.
Definition: re.c:1862
VALUE rb_reg_match_pre(VALUE md)
The portion of the original string before the given match.
Definition: re.c:1836
VALUE rb_reg_match_last(VALUE md)
The portion of the original string that captured at the very last.
Definition: re.c:1879
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
Definition: symbol.c:837
VALUE rb_attr_get(VALUE obj, ID name)
Identical to rb_ivar_get()
Definition: variable.c:1293
VALUE rb_ivar_get(VALUE obj, ID name)
Identical to rb_iv_get(), except it accepts the name as an ID instead of a C string.
Definition: variable.c:1285
rb_alloc_func_t rb_get_alloc_func(VALUE klass)
Queries the allocator function of a class.
Definition: vm_method.c:1123
const char * rb_id2name(ID id)
Retrieves the name mapped to the given id.
Definition: symbol.c:941
ID rb_intern(const char *name)
Finds or creates a symbol of the given name.
Definition: symbol.c:782
#define RBIMPL_ATTR_MAYBE_UNUSED()
Wraps (or simulates) [[maybe_unused]]
Definition: maybe_unused.h:33
#define ALLOCA_N(type, n)
Definition: memory.h:286
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
@ RARRAY_EMBED_LEN_SHIFT
Where ::RARRAY_EMBED_LEN_MASK resides.
Definition: rarray.h:159
#define RBASIC(obj)
Convenient casting macro.
Definition: rbasic.h:40
#define RCLASS_SUPER
Just another name of rb_class_get_superclass.
Definition: rclass.h:46
@ ROBJECT_EMBED_LEN_MAX
Max possible number of instance variables that can be embedded.
Definition: robject.h:84
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
Definition: rstring.h:573
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
Definition: rstring.h:483
static long RSTRUCT_LEN(VALUE st)
Returns the number of struct members.
Definition: rstruct.h:94
static VALUE RSTRUCT_SET(VALUE st, int k, VALUE v)
Resembles Struct#[]=.
Definition: rstruct.h:104
static bool RB_FIXNUM_P(VALUE obj)
Checks if the given object is a so-called Fixnum.
@ RUBY_SPECIAL_SHIFT
Least significant 8 bits are reserved.
@ RUBY_FIXNUM_FLAG
Flag to denote a fixnum.
@ RUBY_FLONUM_MASK
Bit mask detecting a flonum.
@ RUBY_FLONUM_FLAG
Flag to denote a flonum.
@ RUBY_SYMBOL_FLAG
Flag to denote a static symbol.
@ RUBY_IMMEDIATE_MASK
Bit mask detecting special consts.
Ruby's array.
Definition: rarray.h:166
struct RArray::@42::@43 heap
Arrays that use separated memory region for elements use this pattern.
const VALUE ary[RARRAY_EMBED_LEN_MAX]
Embedded elements.
Definition: rarray.h:221
Ruby's object's, base components.
Definition: rbasic.h:64
Ruby's ordinal objects.
Definition: robject.h:93
struct RObject::@45::@46 heap
Object that use separated memory region for instance variables use this pattern.
VALUE ary[ROBJECT_EMBED_LEN_MAX]
Embedded instance variables.
Definition: robject.h:131
Definition: struct.h:22
Definition: vm_core.h:224
Definition: vm_core.h:277
Definition: vm_core.h:273
Definition: method.h:62
struct rb_iseq_constant_body::@152 param
parameter information
Definition: method.h:54
Definition: st.h:79
Basic block version Represents a portion of an iseq compiled with a given context Note: care must be ...
Definition: yjit_core.h:237
Code generation context Contains information we can use to optimize code.
Definition: yjit_core.h:134
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition: value.h:52
#define SIZEOF_VALUE
Identical to sizeof(VALUE), except it is a macro that can also be used inside of preprocessor directi...
Definition: value.h:69
uintptr_t VALUE
Type that represents a Ruby object.
Definition: value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition: value_type.h:375
ruby_value_type
C-level type of an object.
Definition: value_type.h:112
@ RUBY_T_MASK
Bitmask of ruby_value_type.
Definition: value_type.h:144