Ruby  3.1.4p223 (2023-03-30 revision HEAD)
mjit_worker.c
1 /**********************************************************************
2 
3  mjit_worker.c - Worker for MRI method JIT compiler
4 
5  Copyright (C) 2017 Vladimir Makarov <vmakarov@redhat.com>.
6 
7 **********************************************************************/
8 
9 // NOTE: All functions in this file are executed on MJIT worker. So don't
10 // call Ruby methods (C functions that may call rb_funcall) or trigger
11 // GC (using ZALLOC, xmalloc, xfree, etc.) in this file.
12 
13 /* However, note that calling `free` for resources `xmalloc`-ed in mjit.c,
14  which is currently done in some places, is sometimes problematic in the
15  following situations:
16 
17  * malloc library could be different between interpreter and extensions
18  on Windows (perhaps not applicable to MJIT because CC is the same)
19  * xmalloc -> free leaks extra space used for USE_GC_MALLOC_OBJ_INFO_DETAILS
20  (not enabled by default)
21 
22  ...in short, it's usually not a problem in MJIT. But maybe it's worth
23  fixing for consistency or for USE_GC_MALLOC_OBJ_INFO_DETAILS support.
24 */
25 
26 /* We utilize widely used C compilers (GCC and LLVM Clang) to
27  implement MJIT. We feed them a C code generated from ISEQ. The
28  industrial C compilers are slower than regular JIT engines.
29  Generated code performance of the used C compilers has a higher
30  priority over the compilation speed.
31 
32  So our major goal is to minimize the ISEQ compilation time when we
33  use widely optimization level (-O2). It is achieved by
34 
35  o Using a precompiled version of the header
36  o Keeping all files in `/tmp`. On modern Linux `/tmp` is a file
37  system in memory. So it is pretty fast
38  o Implementing MJIT as a multi-threaded code because we want to
39  compile ISEQs in parallel with iseq execution to speed up Ruby
40  code execution. MJIT has one thread (*worker*) to do
41  parallel compilations:
42  o It prepares a precompiled code of the minimized header.
43  It starts at the MRI execution start
44  o It generates PIC object files of ISEQs
45  o It takes one JIT unit from a priority queue unless it is empty.
46  o It translates the JIT unit ISEQ into C-code using the precompiled
47  header, calls CC and load PIC code when it is ready
48  o Currently MJIT put ISEQ in the queue when ISEQ is called
49  o MJIT can reorder ISEQs in the queue if some ISEQ has been called
50  many times and its compilation did not start yet
51  o MRI reuses the machine code if it already exists for ISEQ
52  o The machine code we generate can stop and switch to the ISEQ
53  interpretation if some condition is not satisfied as the machine
54  code can be speculative or some exception raises
55  o Speculative machine code can be canceled.
56 
57  Here is a diagram showing the MJIT organization:
58 
59  _______
60  |header |
61  |_______|
62  | MRI building
63  --------------|----------------------------------------
64  | MRI execution
65  |
66  _____________|_____
67  | | |
68  | ___V__ | CC ____________________
69  | | |----------->| precompiled header |
70  | | | | |____________________|
71  | | | | |
72  | | MJIT | | |
73  | | | | |
74  | | | | ____V___ CC __________
75  | |______|----------->| C code |--->| .so file |
76  | | |________| |__________|
77  | | |
78  | | |
79  | MRI machine code |<-----------------------------
80  |___________________| loading
81 
82 */
83 
84 #ifdef __sun
85 #define __EXTENSIONS__ 1
86 #endif
87 
88 #include "vm_core.h"
89 #include "vm_callinfo.h"
90 #include "mjit.h"
91 #include "gc.h"
92 #include "ruby_assert.h"
93 #include "ruby/debug.h"
94 #include "ruby/thread.h"
95 #include "ruby/version.h"
96 #include "builtin.h"
97 #include "insns.inc"
98 #include "insns_info.inc"
99 #include "internal/compile.h"
100 
101 #ifdef _WIN32
102 #include <winsock2.h>
103 #include <windows.h>
104 #else
105 #include <sys/wait.h>
106 #include <sys/time.h>
107 #include <dlfcn.h>
108 #endif
109 #include <errno.h>
110 #ifdef HAVE_FCNTL_H
111 #include <fcntl.h>
112 #endif
113 #ifdef HAVE_SYS_PARAM_H
114 # include <sys/param.h>
115 #endif
116 #include "dln.h"
117 
118 #include "ruby/util.h"
119 #undef strdup // ruby_strdup may trigger GC
120 
121 #ifndef MAXPATHLEN
122 # define MAXPATHLEN 1024
123 #endif
124 
125 #ifdef _WIN32
126 #define dlopen(name,flag) ((void*)LoadLibrary(name))
127 #define dlerror() strerror(rb_w32_map_errno(GetLastError()))
128 #define dlsym(handle,name) ((void*)GetProcAddress((handle),(name)))
129 #define dlclose(handle) (!FreeLibrary(handle))
130 #define RTLD_NOW -1
131 
132 #define waitpid(pid,stat_loc,options) (WaitForSingleObject((HANDLE)(pid), INFINITE), GetExitCodeProcess((HANDLE)(pid), (LPDWORD)(stat_loc)), CloseHandle((HANDLE)pid), (pid))
133 #define WIFEXITED(S) ((S) != STILL_ACTIVE)
134 #define WEXITSTATUS(S) (S)
135 #define WIFSIGNALED(S) (0)
136 typedef intptr_t pid_t;
137 #endif
138 
139 // Atomically set function pointer if possible.
140 #define MJIT_ATOMIC_SET(var, val) (void)ATOMIC_PTR_EXCHANGE(var, val)
141 
142 #define MJIT_TMP_PREFIX "_ruby_mjit_"
143 
144 // JIT compaction requires the header transformation because linking multiple .o files
145 // doesn't work without having `static` in the same function definitions. We currently
146 // don't support transforming the MJIT header on Windows.
147 #ifdef _WIN32
148 # define USE_JIT_COMPACTION 0
149 #else
150 # define USE_JIT_COMPACTION 1
151 #endif
152 
153 // The unit structure that holds metadata of ISeq for MJIT.
154 struct rb_mjit_unit {
155  struct list_node unode;
156  // Unique order number of unit.
157  int id;
158  // Dlopen handle of the loaded object file.
159  void *handle;
160  rb_iseq_t *iseq;
161 #if defined(_WIN32)
162  // DLL cannot be removed while loaded on Windows. If this is set, it'll be lazily deleted.
163  char *so_file;
164 #endif
165  // Only used by unload_units. Flag to check this unit is currently on stack or not.
166  bool used_code_p;
167  // True if this is still in active_units but it's to be lazily removed
168  bool stale_p;
169  // mjit_compile's optimization switches
170  struct rb_mjit_compile_info compile_info;
171  // captured CC values, they should be marked with iseq.
172  const struct rb_callcache **cc_entries;
173  unsigned int cc_entries_size; // iseq->body->ci_size + ones of inlined iseqs
174 };
175 
176 // Linked list of struct rb_mjit_unit.
178  struct list_head head;
179  int length; // the list length
180 };
181 
182 extern void rb_native_mutex_lock(rb_nativethread_lock_t *lock);
183 extern void rb_native_mutex_unlock(rb_nativethread_lock_t *lock);
184 extern void rb_native_mutex_initialize(rb_nativethread_lock_t *lock);
185 extern void rb_native_mutex_destroy(rb_nativethread_lock_t *lock);
186 
187 extern void rb_native_cond_initialize(rb_nativethread_cond_t *cond);
188 extern void rb_native_cond_destroy(rb_nativethread_cond_t *cond);
189 extern void rb_native_cond_signal(rb_nativethread_cond_t *cond);
190 extern void rb_native_cond_broadcast(rb_nativethread_cond_t *cond);
191 extern void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *mutex);
192 
193 // process.c
194 extern rb_pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options, rb_nativethread_cond_t *cond);
195 
196 // A copy of MJIT portion of MRI options since MJIT initialization. We
197 // need them as MJIT threads still can work when the most MRI data were
198 // freed.
199 struct mjit_options mjit_opts;
200 
201 // true if MJIT is enabled.
202 bool mjit_enabled = false;
203 // true if JIT-ed code should be called. When `ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS`
204 // and `mjit_call_p == false`, any JIT-ed code execution is cancelled as soon as possible.
205 bool mjit_call_p = false;
206 
207 // Priority queue of iseqs waiting for JIT compilation.
208 // This variable is a pointer to head unit of the queue.
209 static struct rb_mjit_unit_list unit_queue = { LIST_HEAD_INIT(unit_queue.head) };
210 // List of units which are successfully compiled.
211 static struct rb_mjit_unit_list active_units = { LIST_HEAD_INIT(active_units.head) };
212 // List of compacted so files which will be cleaned up by `free_list()` in `mjit_finish()`.
213 static struct rb_mjit_unit_list compact_units = { LIST_HEAD_INIT(compact_units.head) };
214 // List of units before recompilation and just waiting for dlclose().
215 static struct rb_mjit_unit_list stale_units = { LIST_HEAD_INIT(stale_units.head) };
216 // The number of so far processed ISEQs, used to generate unique id.
217 static int current_unit_num;
218 // A mutex for conitionals and critical sections.
219 static rb_nativethread_lock_t mjit_engine_mutex;
220 // A thread conditional to wake up `mjit_finish` at the end of PCH thread.
221 static rb_nativethread_cond_t mjit_pch_wakeup;
222 // A thread conditional to wake up the client if there is a change in
223 // executed unit status.
224 static rb_nativethread_cond_t mjit_client_wakeup;
225 // A thread conditional to wake up a worker if there we have something
226 // to add or we need to stop MJIT engine.
227 static rb_nativethread_cond_t mjit_worker_wakeup;
228 // A thread conditional to wake up workers if at the end of GC.
229 static rb_nativethread_cond_t mjit_gc_wakeup;
230 // Greater than 0 when GC is working.
231 static int in_gc = 0;
232 // True when JIT is working.
233 static bool in_jit = false;
234 // True when active_units has at least one stale_p=true unit.
235 static bool pending_stale_p = false;
236 // The times when unload_units is requested. unload_units is called after some requests.
237 static int unload_requests = 0;
238 // The total number of unloaded units.
239 static int total_unloads = 0;
240 // Set to true to stop worker.
241 static bool stop_worker_p;
242 // Set to true if worker is stopped.
243 static bool worker_stopped = true;
244 
245 // Path of "/tmp", which can be changed to $TMP in MinGW.
246 static char *tmp_dir;
247 
248 // Used C compiler path.
249 static const char *cc_path;
250 // Used C compiler flags.
251 static const char **cc_common_args;
252 // Used C compiler flags added by --mjit-debug=...
253 static char **cc_added_args;
254 // Name of the precompiled header file.
255 static char *pch_file;
256 // The process id which should delete the pch_file on mjit_finish.
257 static rb_pid_t pch_owner_pid;
258 // Status of the precompiled header creation. The status is
259 // shared by the workers and the pch thread.
260 static enum {PCH_NOT_READY, PCH_FAILED, PCH_SUCCESS} pch_status;
261 
262 #ifndef _MSC_VER
263 // Name of the header file.
264 static char *header_file;
265 #endif
266 
267 #ifdef _WIN32
268 // Linker option to enable libruby.
269 static char *libruby_pathflag;
270 #endif
271 
272 #include "mjit_config.h"
273 
274 #if defined(__GNUC__) && \
275  (!defined(__clang__) || \
276  (defined(__clang__) && (defined(__FreeBSD__) || defined(__GLIBC__))))
277 # define GCC_PIC_FLAGS "-Wfatal-errors", "-fPIC", "-shared", "-w", "-pipe",
278 # define MJIT_CFLAGS_PIPE 1
279 #else
280 # define GCC_PIC_FLAGS /* empty */
281 # define MJIT_CFLAGS_PIPE 0
282 #endif
283 
284 // Use `-nodefaultlibs -nostdlib` for GCC where possible, which does not work on mingw, cygwin, AIX, and OpenBSD.
285 // This seems to improve MJIT performance on GCC.
286 #if defined __GNUC__ && !defined __clang__ && !defined(_WIN32) && !defined(__CYGWIN__) && !defined(_AIX) && !defined(__OpenBSD__)
287 # define GCC_NOSTDLIB_FLAGS "-nodefaultlibs", "-nostdlib",
288 #else
289 # define GCC_NOSTDLIB_FLAGS // empty
290 #endif
291 
292 static const char *const CC_COMMON_ARGS[] = {
293  MJIT_CC_COMMON MJIT_CFLAGS GCC_PIC_FLAGS
294  NULL
295 };
296 
297 static const char *const CC_DEBUG_ARGS[] = {MJIT_DEBUGFLAGS NULL};
298 static const char *const CC_OPTIMIZE_ARGS[] = {MJIT_OPTFLAGS NULL};
299 
300 static const char *const CC_LDSHARED_ARGS[] = {MJIT_LDSHARED GCC_PIC_FLAGS NULL};
301 static const char *const CC_DLDFLAGS_ARGS[] = {MJIT_DLDFLAGS NULL};
302 // `CC_LINKER_ARGS` are linker flags which must be passed to `-c` as well.
303 static const char *const CC_LINKER_ARGS[] = {
304 #if defined __GNUC__ && !defined __clang__ && !defined(__OpenBSD__)
305  "-nostartfiles",
306 #endif
307  GCC_NOSTDLIB_FLAGS NULL
308 };
309 
310 static const char *const CC_LIBS[] = {
311 #if defined(_WIN32) || defined(__CYGWIN__)
312  MJIT_LIBS // mswin, mingw, cygwin
313 #endif
314 #if defined __GNUC__ && !defined __clang__
315 # if defined(_WIN32)
316  "-lmsvcrt", // mingw
317 # endif
318  "-lgcc", // mingw, cygwin, and GCC platforms using `-nodefaultlibs -nostdlib`
319 #endif
320 #if defined __ANDROID__
321  "-lm", // to avoid 'cannot locate symbol "modf" referenced by .../_ruby_mjit_XXX.so"'
322 #endif
323  NULL
324 };
325 
326 #define CC_CODEFLAG_ARGS (mjit_opts.debug ? CC_DEBUG_ARGS : CC_OPTIMIZE_ARGS)
327 
328 // Print the arguments according to FORMAT to stderr only if MJIT
329 // verbose option value is more or equal to LEVEL.
330 PRINTF_ARGS(static void, 2, 3)
331 verbose(int level, const char *format, ...)
332 {
333  if (mjit_opts.verbose >= level) {
334  va_list args;
335  size_t len = strlen(format);
336  char *full_format = alloca(sizeof(char) * (len + 2));
337 
338  // Creating `format + '\n'` to atomically print format and '\n'.
339  memcpy(full_format, format, len);
340  full_format[len] = '\n';
341  full_format[len+1] = '\0';
342 
343  va_start(args, format);
344  vfprintf(stderr, full_format, args);
345  va_end(args);
346  }
347 }
348 
349 PRINTF_ARGS(static void, 1, 2)
350 mjit_warning(const char *format, ...)
351 {
352  if (mjit_opts.warnings || mjit_opts.verbose) {
353  va_list args;
354 
355  fprintf(stderr, "MJIT warning: ");
356  va_start(args, format);
357  vfprintf(stderr, format, args);
358  va_end(args);
359  fprintf(stderr, "\n");
360  }
361 }
362 
363 // Add unit node to the tail of doubly linked `list`. It should be not in
364 // the list before.
365 static void
366 add_to_list(struct rb_mjit_unit *unit, struct rb_mjit_unit_list *list)
367 {
368  (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_unit_queue, list == &unit_queue);
369  (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_active_units, list == &active_units);
370  (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_compact_units, list == &compact_units);
371  (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_stale_units, list == &stale_units);
372 
373  list_add_tail(&list->head, &unit->unode);
374  list->length++;
375 }
376 
377 static void
378 remove_from_list(struct rb_mjit_unit *unit, struct rb_mjit_unit_list *list)
379 {
380 #if USE_DEBUG_COUNTER
381  rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_unit_queue, -1, list == &unit_queue);
382  rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_active_units, -1, list == &active_units);
383  rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_compact_units, -1, list == &compact_units);
384  rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_stale_units, -1, list == &stale_units);
385 #endif
386 
387  list_del(&unit->unode);
388  list->length--;
389 }
390 
391 static void
392 remove_file(const char *filename)
393 {
394  if (remove(filename)) {
395  mjit_warning("failed to remove \"%s\": %s", filename, strerror(errno));
396  }
397 }
398 
399 // Lazily delete .so files.
400 static void
401 clean_temp_files(struct rb_mjit_unit *unit)
402 {
403 #if defined(_WIN32)
404  if (unit->so_file) {
405  char *so_file = unit->so_file;
406 
407  unit->so_file = NULL;
408  // unit->so_file is set only when mjit_opts.save_temps is false.
409  remove_file(so_file);
410  free(so_file);
411  }
412 #endif
413 }
414 
415 // This is called in the following situations:
416 // 1) On dequeue or `unload_units()`, associated ISeq is already GCed.
417 // 2) The unit is not called often and unloaded by `unload_units()`.
418 // 3) Freeing lists on `mjit_finish()`.
419 //
420 // `jit_func` value does not matter for 1 and 3 since the unit won't be used anymore.
421 // For the situation 2, this sets the ISeq's JIT state to NOT_COMPILED_JIT_ISEQ_FUNC
422 // to prevent the situation that the same methods are continuously compiled.
423 static void
424 free_unit(struct rb_mjit_unit *unit)
425 {
426  if (unit->iseq) { // ISeq is not GCed
427  unit->iseq->body->jit_func = (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
428  unit->iseq->body->jit_unit = NULL;
429  }
430  if (unit->cc_entries) {
431  void *entries = (void *)unit->cc_entries;
432  free(entries);
433  }
434  if (unit->handle && dlclose(unit->handle)) { // handle is NULL if it's in queue
435  mjit_warning("failed to close handle for u%d: %s", unit->id, dlerror());
436  }
437  clean_temp_files(unit);
438  free(unit);
439 }
440 
441 // Start a critical section. Use message `msg` to print debug info at `level`.
442 static inline void
443 CRITICAL_SECTION_START(int level, const char *msg)
444 {
445  verbose(level, "Locking %s", msg);
446  rb_native_mutex_lock(&mjit_engine_mutex);
447  verbose(level, "Locked %s", msg);
448 }
449 
450 // Finish the current critical section. Use message `msg` to print
451 // debug info at `level`.
452 static inline void
453 CRITICAL_SECTION_FINISH(int level, const char *msg)
454 {
455  verbose(level, "Unlocked %s", msg);
456  rb_native_mutex_unlock(&mjit_engine_mutex);
457 }
458 
459 static int
460 sprint_uniq_filename(char *str, size_t size, unsigned long id, const char *prefix, const char *suffix)
461 {
462  return snprintf(str, size, "%s/%sp%"PRI_PIDT_PREFIX"uu%lu%s", tmp_dir, prefix, getpid(), id, suffix);
463 }
464 
465 // Return time in milliseconds as a double.
466 #ifdef __APPLE__
467 double ruby_real_ms_time(void);
468 # define real_ms_time() ruby_real_ms_time()
469 #else
470 static double
471 real_ms_time(void)
472 {
473 # ifdef HAVE_CLOCK_GETTIME
474  struct timespec tv;
475 # ifdef CLOCK_MONOTONIC
476  const clockid_t c = CLOCK_MONOTONIC;
477 # else
478  const clockid_t c = CLOCK_REALTIME;
479 # endif
480 
481  clock_gettime(c, &tv);
482  return tv.tv_nsec / 1000000.0 + tv.tv_sec * 1000.0;
483 # else
484  struct timeval tv;
485 
486  gettimeofday(&tv, NULL);
487  return tv.tv_usec / 1000.0 + tv.tv_sec * 1000.0;
488 # endif
489 }
490 #endif
491 
492 // Return the best unit from list. The best is the first
493 // high priority unit or the unit whose iseq has the biggest number
494 // of calls so far.
495 static struct rb_mjit_unit *
496 get_from_list(struct rb_mjit_unit_list *list)
497 {
498  while (in_gc) {
499  verbose(3, "Waiting wakeup from GC");
500  rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
501  }
502  in_jit = true; // Lock GC
503 
504  // Find iseq with max total_calls
505  struct rb_mjit_unit *unit = NULL, *next, *best = NULL;
506  list_for_each_safe(&list->head, unit, next, unode) {
507  if (unit->iseq == NULL) { // ISeq is GCed.
508  remove_from_list(unit, list);
509  free_unit(unit);
510  continue;
511  }
512 
513  if (best == NULL || best->iseq->body->total_calls < unit->iseq->body->total_calls) {
514  best = unit;
515  }
516  }
517 
518  in_jit = false; // Unlock GC
519  verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
520  rb_native_cond_signal(&mjit_client_wakeup);
521 
522  if (best) {
523  remove_from_list(best, list);
524  }
525  return best;
526 }
527 
528 // Return length of NULL-terminated array `args` excluding the NULL marker.
529 static size_t
530 args_len(char *const *args)
531 {
532  size_t i;
533 
534  for (i = 0; (args[i]) != NULL;i++)
535  ;
536  return i;
537 }
538 
539 // Concatenate `num` passed NULL-terminated arrays of strings, put the
540 // result (with NULL end marker) into the heap, and return the result.
541 static char **
542 form_args(int num, ...)
543 {
544  va_list argp;
545  size_t len, n;
546  int i;
547  char **args, **res, **tmp;
548 
549  va_start(argp, num);
550  res = NULL;
551  for (i = len = 0; i < num; i++) {
552  args = va_arg(argp, char **);
553  n = args_len(args);
554  if ((tmp = (char **)realloc(res, sizeof(char *) * (len + n + 1))) == NULL) {
555  free(res);
556  res = NULL;
557  break;
558  }
559  res = tmp;
560  MEMCPY(res + len, args, char *, n + 1);
561  len += n;
562  }
563  va_end(argp);
564  return res;
565 }
566 
567 COMPILER_WARNING_PUSH
568 #if __has_warning("-Wdeprecated-declarations") || RBIMPL_COMPILER_IS(GCC)
569 COMPILER_WARNING_IGNORED(-Wdeprecated-declarations)
570 #endif
571 // Start an OS process of absolute executable path with arguments `argv`.
572 // Return PID of the process.
573 static pid_t
574 start_process(const char *abspath, char *const *argv)
575 {
576  // Not calling non-async-signal-safe functions between vfork
577  // and execv for safety
578  int dev_null = rb_cloexec_open(ruby_null_device, O_WRONLY, 0);
579  if (dev_null < 0) {
580  verbose(1, "MJIT: Failed to open a null device: %s", strerror(errno));
581  return -1;
582  }
583  if (mjit_opts.verbose >= 2) {
584  const char *arg;
585  fprintf(stderr, "Starting process: %s", abspath);
586  for (int i = 0; (arg = argv[i]) != NULL; i++)
587  fprintf(stderr, " %s", arg);
588  fprintf(stderr, "\n");
589  }
590 
591  pid_t pid;
592 #ifdef _WIN32
593  extern HANDLE rb_w32_start_process(const char *abspath, char *const *argv, int out_fd);
594  int out_fd = 0;
595  if (mjit_opts.verbose <= 1) {
596  // Discard cl.exe's outputs like:
597  // _ruby_mjit_p12u3.c
598  // Creating library C:.../_ruby_mjit_p12u3.lib and object C:.../_ruby_mjit_p12u3.exp
599  out_fd = dev_null;
600  }
601 
602  pid = (pid_t)rb_w32_start_process(abspath, argv, out_fd);
603  if (pid == 0) {
604  verbose(1, "MJIT: Failed to create process: %s", dlerror());
605  return -1;
606  }
607 #else
608  if ((pid = vfork()) == 0) { /* TODO: reuse some function in process.c */
609  umask(0077);
610  if (mjit_opts.verbose == 0) {
611  // CC can be started in a thread using a file which has been
612  // already removed while MJIT is finishing. Discard the
613  // messages about missing files.
614  dup2(dev_null, STDERR_FILENO);
615  dup2(dev_null, STDOUT_FILENO);
616  }
617  (void)close(dev_null);
618  pid = execv(abspath, argv); // Pid will be negative on an error
619  // Even if we successfully found CC to compile PCH we still can
620  // fail with loading the CC in very rare cases for some reasons.
621  // Stop the forked process in this case.
622  verbose(1, "MJIT: Error in execv: %s", abspath);
623  _exit(1);
624  }
625 #endif
626  (void)close(dev_null);
627  return pid;
628 }
629 COMPILER_WARNING_POP
630 
631 // Execute an OS process of executable PATH with arguments ARGV.
632 // Return -1 or -2 if failed to execute, otherwise exit code of the process.
633 // TODO: Use a similar function in process.c
634 static int
635 exec_process(const char *path, char *const argv[])
636 {
637  int stat, exit_code = -2;
638  rb_vm_t *vm = WAITPID_USE_SIGCHLD ? GET_VM() : 0;
639  rb_nativethread_cond_t cond;
640 
641  if (vm) {
643  rb_native_mutex_lock(&vm->waitpid_lock);
644  }
645 
646  pid_t pid = start_process(path, argv);
647  for (;pid > 0;) {
648  pid_t r = vm ? ruby_waitpid_locked(vm, pid, &stat, 0, &cond)
649  : waitpid(pid, &stat, 0);
650  if (r == -1) {
651  if (errno == EINTR) continue;
652  fprintf(stderr, "[%"PRI_PIDT_PREFIX"d] waitpid(%lu): %s (SIGCHLD=%d,%u)\n",
653  getpid(), (unsigned long)pid, strerror(errno),
654  RUBY_SIGCHLD, SIGCHLD_LOSSY);
655  break;
656  }
657  else if (r == pid) {
658  if (WIFEXITED(stat)) {
659  exit_code = WEXITSTATUS(stat);
660  break;
661  }
662  else if (WIFSIGNALED(stat)) {
663  exit_code = -1;
664  break;
665  }
666  }
667  }
668 
669  if (vm) {
670  rb_native_mutex_unlock(&vm->waitpid_lock);
671  rb_native_cond_destroy(&cond);
672  }
673  return exit_code;
674 }
675 
676 static void
677 remove_so_file(const char *so_file, struct rb_mjit_unit *unit)
678 {
679 #if defined(_WIN32)
680  // Windows can't remove files while it's used.
681  unit->so_file = strdup(so_file); // lazily delete on `clean_temp_files()`
682  if (unit->so_file == NULL)
683  mjit_warning("failed to allocate memory to lazily remove '%s': %s", so_file, strerror(errno));
684 #else
685  remove_file(so_file);
686 #endif
687 }
688 
689 // Print _mjitX, but make a human-readable funcname when --mjit-debug is used
690 static void
691 sprint_funcname(char *funcname, const struct rb_mjit_unit *unit)
692 {
693  const rb_iseq_t *iseq = unit->iseq;
694  if (iseq == NULL || (!mjit_opts.debug && !mjit_opts.debug_flags)) {
695  sprintf(funcname, "_mjit%d", unit->id);
696  return;
697  }
698 
699  // Generate a short path
700  const char *path = RSTRING_PTR(rb_iseq_path(iseq));
701  const char *lib = "/lib/";
702  const char *version = "/" STRINGIZE(RUBY_API_VERSION_MAJOR) "." STRINGIZE(RUBY_API_VERSION_MINOR) "." STRINGIZE(RUBY_API_VERSION_TEENY) "/";
703  while (strstr(path, lib)) // skip "/lib/"
704  path = strstr(path, lib) + strlen(lib);
705  while (strstr(path, version)) // skip "/x.y.z/"
706  path = strstr(path, version) + strlen(version);
707 
708  // Annotate all-normalized method names
709  const char *method = RSTRING_PTR(iseq->body->location.label);
710  if (!strcmp(method, "[]")) method = "AREF";
711  if (!strcmp(method, "[]=")) method = "ASET";
712 
713  // Print and normalize
714  sprintf(funcname, "_mjit%d_%s_%s", unit->id, path, method);
715  for (size_t i = 0; i < strlen(funcname); i++) {
716  char c = funcname[i];
717  if (!(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '_')) {
718  funcname[i] = '_';
719  }
720  }
721 }
722 
723 static const rb_iseq_t **compiling_iseqs = NULL;
724 
725 static bool
726 set_compiling_iseqs(const rb_iseq_t *iseq)
727 {
728  compiling_iseqs = calloc(iseq->body->iseq_size + 2, sizeof(rb_iseq_t *)); // 2: 1 (unit->iseq) + 1 (NULL end)
729  if (compiling_iseqs == NULL)
730  return false;
731 
732  compiling_iseqs[0] = iseq;
733  int i = 1;
734 
735  unsigned int pos = 0;
736  while (pos < iseq->body->iseq_size) {
737  int insn = rb_vm_insn_decode(iseq->body->iseq_encoded[pos]);
738  if (insn == BIN(opt_send_without_block) || insn == BIN(opt_size)) {
739  CALL_DATA cd = (CALL_DATA)iseq->body->iseq_encoded[pos + 1];
740  extern const rb_iseq_t *rb_mjit_inlinable_iseq(const struct rb_callinfo *ci, const struct rb_callcache *cc);
741  const rb_iseq_t *iseq = rb_mjit_inlinable_iseq(cd->ci, cd->cc);
742  if (iseq != NULL) {
743  compiling_iseqs[i] = iseq;
744  i++;
745  }
746  }
747  pos += insn_len(insn);
748  }
749  return true;
750 }
751 
752 static void
753 free_compiling_iseqs(void)
754 {
756 #ifdef _MSC_VER
757  RBIMPL_WARNING_IGNORED(4090); /* suppress false warning by MSVC */
758 #endif
759  free(compiling_iseqs);
761  compiling_iseqs = NULL;
762 }
763 
764 bool
765 rb_mjit_compiling_iseq_p(const rb_iseq_t *iseq)
766 {
767  assert(compiling_iseqs != NULL);
768  int i = 0;
769  while (compiling_iseqs[i]) {
770  if (compiling_iseqs[i] == iseq) return true;
771  i++;
772  }
773  return false;
774 }
775 
776 static const int c_file_access_mode =
777 #ifdef O_BINARY
778  O_BINARY|
779 #endif
780  O_WRONLY|O_EXCL|O_CREAT;
781 
782 #define append_str2(p, str, len) ((char *)memcpy((p), str, (len))+(len))
783 #define append_str(p, str) append_str2(p, str, sizeof(str)-1)
784 #define append_lit(p, str) append_str2(p, str, rb_strlen_lit(str))
785 
786 #ifdef _MSC_VER
787 // Compile C file to so. It returns true if it succeeds. (mswin)
788 static bool
789 compile_c_to_so(const char *c_file, const char *so_file)
790 {
791  const char *files[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, "-link", libruby_pathflag, NULL };
792  char *p;
793 
794  // files[0] = "-Fe*.dll"
795  files[0] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fe") + strlen(so_file) + 1));
796  p = append_lit(p, "-Fe");
797  p = append_str2(p, so_file, strlen(so_file));
798  *p = '\0';
799 
800  // files[1] = "-Fo*.obj"
801  // We don't need .obj file, but it's somehow created to cwd without -Fo and we want to control the output directory.
802  files[1] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fo") + strlen(so_file) - rb_strlen_lit(DLEXT) + rb_strlen_lit(".obj") + 1));
803  char *obj_file = p = append_lit(p, "-Fo");
804  p = append_str2(p, so_file, strlen(so_file) - rb_strlen_lit(DLEXT));
805  p = append_lit(p, ".obj");
806  *p = '\0';
807 
808  // files[2] = "-Yu*.pch"
809  files[2] = p = alloca(sizeof(char) * (rb_strlen_lit("-Yu") + strlen(pch_file) + 1));
810  p = append_lit(p, "-Yu");
811  p = append_str2(p, pch_file, strlen(pch_file));
812  *p = '\0';
813 
814  // files[3] = "C:/.../rb_mjit_header-*.obj"
815  files[3] = p = alloca(sizeof(char) * (strlen(pch_file) + 1));
816  p = append_str2(p, pch_file, strlen(pch_file) - strlen(".pch"));
817  p = append_lit(p, ".obj");
818  *p = '\0';
819 
820  // files[4] = "-Tc*.c"
821  files[4] = p = alloca(sizeof(char) * (rb_strlen_lit("-Tc") + strlen(c_file) + 1));
822  p = append_lit(p, "-Tc");
823  p = append_str2(p, c_file, strlen(c_file));
824  *p = '\0';
825 
826  // files[5] = "-Fd*.pdb"
827  // Generate .pdb file in temporary directory instead of cwd.
828  files[5] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fd") + strlen(so_file) - rb_strlen_lit(DLEXT) + rb_strlen_lit(".pdb") + 1));
829  p = append_lit(p, "-Fd");
830  p = append_str2(p, so_file, strlen(so_file) - rb_strlen_lit(DLEXT));
831  p = append_lit(p, ".pdb");
832  *p = '\0';
833 
834  // files[6] = "-Z7"
835  // Put this last to override any debug options that came previously.
836  files[6] = p = alloca(sizeof(char) * rb_strlen_lit("-Z7") + 1);
837  p = append_lit(p, "-Z7");
838  *p = '\0';
839 
840  char **args = form_args(5, CC_LDSHARED_ARGS, CC_CODEFLAG_ARGS,
841  files, CC_LIBS, CC_DLDFLAGS_ARGS);
842  if (args == NULL)
843  return false;
844 
845  int exit_code = exec_process(cc_path, args);
846  free(args);
847 
848  if (exit_code == 0) {
849  // remove never-used files (.obj, .lib, .exp, .pdb). XXX: Is there any way not to generate this?
850  if (!mjit_opts.save_temps) {
851  char *before_dot;
852  remove_file(obj_file);
853 
854  before_dot = obj_file + strlen(obj_file) - rb_strlen_lit(".obj");
855  append_lit(before_dot, ".lib"); remove_file(obj_file);
856  append_lit(before_dot, ".exp"); remove_file(obj_file);
857  append_lit(before_dot, ".pdb"); remove_file(obj_file);
858  }
859  }
860  else {
861  verbose(2, "compile_c_to_so: compile error: %d", exit_code);
862  }
863  return exit_code == 0;
864 }
865 #else // _MSC_VER
866 
867 // The function producing the pre-compiled header.
868 static void
869 make_pch(void)
870 {
871  const char *rest_args[] = {
872 # ifdef __clang__
873  "-emit-pch",
874  "-c",
875 # endif
876  // -nodefaultlibs is a linker flag, but it may affect cc1 behavior on Gentoo, which should NOT be changed on pch:
877  // https://gitweb.gentoo.org/proj/gcc-patches.git/tree/7.3.0/gentoo/13_all_default-ssp-fix.patch
878  GCC_NOSTDLIB_FLAGS
879  "-o", pch_file, header_file,
880  NULL,
881  };
882 
883  verbose(2, "Creating precompiled header");
884  char **args = form_args(4, cc_common_args, CC_CODEFLAG_ARGS, cc_added_args, rest_args);
885  if (args == NULL) {
886  mjit_warning("making precompiled header failed on forming args");
887  CRITICAL_SECTION_START(3, "in make_pch");
888  pch_status = PCH_FAILED;
889  CRITICAL_SECTION_FINISH(3, "in make_pch");
890  return;
891  }
892 
893  int exit_code = exec_process(cc_path, args);
894  free(args);
895 
896  CRITICAL_SECTION_START(3, "in make_pch");
897  if (exit_code == 0) {
898  pch_status = PCH_SUCCESS;
899  }
900  else {
901  mjit_warning("Making precompiled header failed on compilation. Stopping MJIT worker...");
902  pch_status = PCH_FAILED;
903  }
904  /* wakeup `mjit_finish` */
905  rb_native_cond_broadcast(&mjit_pch_wakeup);
906  CRITICAL_SECTION_FINISH(3, "in make_pch");
907 }
908 
909 // Compile .c file to .so file. It returns true if it succeeds. (non-mswin)
910 // Not compiling .c to .so directly because it fails on MinGW, and this helps
911 // to generate no .dSYM on macOS.
912 static bool
913 compile_c_to_so(const char *c_file, const char *so_file)
914 {
915  char* o_file = alloca(strlen(c_file) + 1);
916  strcpy(o_file, c_file);
917  o_file[strlen(c_file) - 1] = 'o';
918 
919  const char *o_args[] = {
920  "-o", o_file, c_file,
921 # ifdef __clang__
922  "-include-pch", pch_file,
923 # endif
924  "-c", NULL
925  };
926  char **args = form_args(5, cc_common_args, CC_CODEFLAG_ARGS, cc_added_args, o_args, CC_LINKER_ARGS);
927  if (args == NULL) return false;
928  int exit_code = exec_process(cc_path, args);
929  free(args);
930  if (exit_code != 0) {
931  verbose(2, "compile_c_to_so: failed to compile .c to .o: %d", exit_code);
932  return false;
933  }
934 
935  const char *so_args[] = {
936  "-o", so_file,
937 # ifdef _WIN32
938  libruby_pathflag,
939 # endif
940  o_file, NULL
941  };
942 # if defined(__MACH__)
943  extern VALUE rb_libruby_selfpath;
944  const char *loader_args[] = {"-bundle_loader", StringValuePtr(rb_libruby_selfpath), NULL};
945 # else
946  const char *loader_args[] = {NULL};
947 # endif
948  args = form_args(7, CC_LDSHARED_ARGS, CC_CODEFLAG_ARGS, so_args, loader_args, CC_LIBS, CC_DLDFLAGS_ARGS, CC_LINKER_ARGS);
949  if (args == NULL) return false;
950  exit_code = exec_process(cc_path, args);
951  free(args);
952  if (!mjit_opts.save_temps) remove_file(o_file);
953  if (exit_code != 0) {
954  verbose(2, "compile_c_to_so: failed to link .o to .so: %d", exit_code);
955  }
956  return exit_code == 0;
957 }
958 #endif // _MSC_VER
959 
960 #if USE_JIT_COMPACTION
961 static void compile_prelude(FILE *f);
962 
963 static bool
964 compile_compact_jit_code(char* c_file)
965 {
966  FILE *f;
967  int fd = rb_cloexec_open(c_file, c_file_access_mode, 0600);
968  if (fd < 0 || (f = fdopen(fd, "w")) == NULL) {
969  int e = errno;
970  if (fd >= 0) (void)close(fd);
971  verbose(1, "Failed to fopen '%s', giving up JIT for it (%s)", c_file, strerror(e));
972  return false;
973  }
974 
975  compile_prelude(f);
976 
977  // wait until mjit_gc_exit_hook is called
978  CRITICAL_SECTION_START(3, "before mjit_compile to wait GC finish");
979  while (in_gc) {
980  verbose(3, "Waiting wakeup from GC");
981  rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
982  }
983  // We need to check again here because we could've waited on GC above
984  bool iseq_gced = false;
985  struct rb_mjit_unit *child_unit = 0, *next;
986  list_for_each_safe(&active_units.head, child_unit, next, unode) {
987  if (child_unit->iseq == NULL) { // ISeq is GC-ed
988  iseq_gced = true;
989  verbose(1, "JIT compaction: A method for JIT code u%d is obsoleted. Compaction will be skipped.", child_unit->id);
990  remove_from_list(child_unit, &active_units);
991  free_unit(child_unit); // unload it without waiting for throttled unload_units to retry compaction quickly
992  }
993  }
994  in_jit = !iseq_gced;
995  CRITICAL_SECTION_FINISH(3, "before mjit_compile to wait GC finish");
996  if (!in_jit) {
997  fclose(f);
998  if (!mjit_opts.save_temps)
999  remove_file(c_file);
1000  return false;
1001  }
1002 
1003  // This entire loop lock GC so that we do not need to consider a case that
1004  // ISeq is GC-ed in a middle of re-compilation. It takes 3~4ms with 100 methods
1005  // on my machine. It's not too bad compared to compilation time of C (7200~8000ms),
1006  // but it might be larger if we use a larger --jit-max-cache.
1007  //
1008  // TODO: Consider using a more granular lock after we implement inlining across
1009  // compacted functions (not done yet).
1010  bool success = true;
1011  list_for_each(&active_units.head, child_unit, unode) {
1012  CRITICAL_SECTION_START(3, "before set_compiling_iseqs");
1013  success &= set_compiling_iseqs(child_unit->iseq);
1014  CRITICAL_SECTION_FINISH(3, "after set_compiling_iseqs");
1015  if (!success) continue;
1016 
1017  char funcname[MAXPATHLEN];
1018  sprint_funcname(funcname, child_unit);
1019 
1020  long iseq_lineno = 0;
1021  if (FIXNUM_P(child_unit->iseq->body->location.first_lineno))
1022  // FIX2INT may fallback to rb_num2long(), which is a method call and dangerous in MJIT worker. So using only FIX2LONG.
1023  iseq_lineno = FIX2LONG(child_unit->iseq->body->location.first_lineno);
1024  const char *sep = "@";
1025  const char *iseq_label = RSTRING_PTR(child_unit->iseq->body->location.label);
1026  const char *iseq_path = RSTRING_PTR(rb_iseq_path(child_unit->iseq));
1027  if (!iseq_label) iseq_label = sep = "";
1028  fprintf(f, "\n/* %s%s%s:%ld */\n", iseq_label, sep, iseq_path, iseq_lineno);
1029  success &= mjit_compile(f, child_unit->iseq, funcname, child_unit->id);
1030 
1031  CRITICAL_SECTION_START(3, "before compiling_iseqs free");
1032  free_compiling_iseqs();
1033  CRITICAL_SECTION_FINISH(3, "after compiling_iseqs free");
1034  }
1035 
1036  // release blocking mjit_gc_start_hook
1037  CRITICAL_SECTION_START(3, "after mjit_compile to wakeup client for GC");
1038  in_jit = false;
1039  verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
1040  rb_native_cond_signal(&mjit_client_wakeup);
1041  CRITICAL_SECTION_FINISH(3, "in worker to wakeup client for GC");
1042 
1043  fclose(f);
1044  return success;
1045 }
1046 
1047 // Compile all cached .c files and build a single .so file. Reload all JIT func from it.
1048 // This improves the code locality for better performance in terms of iTLB and iCache.
1049 static void
1050 compact_all_jit_code(void)
1051 {
1052  struct rb_mjit_unit *unit, *cur = 0;
1053  static const char c_ext[] = ".c";
1054  static const char so_ext[] = DLEXT;
1055  char c_file[MAXPATHLEN], so_file[MAXPATHLEN];
1056 
1057  // Abnormal use case of rb_mjit_unit that doesn't have ISeq
1058  unit = calloc(1, sizeof(struct rb_mjit_unit)); // To prevent GC, don't use ZALLOC
1059  if (unit == NULL) return;
1060  unit->id = current_unit_num++;
1061  sprint_uniq_filename(c_file, (int)sizeof(c_file), unit->id, MJIT_TMP_PREFIX, c_ext);
1062  sprint_uniq_filename(so_file, (int)sizeof(so_file), unit->id, MJIT_TMP_PREFIX, so_ext);
1063 
1064  bool success = compile_compact_jit_code(c_file);
1065  double start_time = real_ms_time();
1066  if (success) {
1067  success = compile_c_to_so(c_file, so_file);
1068  if (!mjit_opts.save_temps)
1069  remove_file(c_file);
1070  }
1071  double end_time = real_ms_time();
1072 
1073  if (success) {
1074  void *handle = dlopen(so_file, RTLD_NOW);
1075  if (handle == NULL) {
1076  mjit_warning("failure in loading code from compacted '%s': %s", so_file, dlerror());
1077  free(unit);
1078  return;
1079  }
1080  unit->handle = handle;
1081 
1082  // lazily dlclose handle (and .so file for win32) on `mjit_finish()`.
1083  add_to_list(unit, &compact_units);
1084 
1085  if (!mjit_opts.save_temps)
1086  remove_so_file(so_file, unit);
1087 
1088  CRITICAL_SECTION_START(3, "in compact_all_jit_code to read list");
1089  list_for_each(&active_units.head, cur, unode) {
1090  void *func;
1091  char funcname[MAXPATHLEN];
1092  sprint_funcname(funcname, cur);
1093 
1094  if ((func = dlsym(handle, funcname)) == NULL) {
1095  mjit_warning("skipping to reload '%s' from '%s': %s", funcname, so_file, dlerror());
1096  continue;
1097  }
1098 
1099  if (cur->iseq) { // Check whether GCed or not
1100  // Usage of jit_code might be not in a critical section.
1101  MJIT_ATOMIC_SET(cur->iseq->body->jit_func, (mjit_func_t)func);
1102  }
1103  }
1104  CRITICAL_SECTION_FINISH(3, "in compact_all_jit_code to read list");
1105  verbose(1, "JIT compaction (%.1fms): Compacted %d methods %s -> %s", end_time - start_time, active_units.length, c_file, so_file);
1106  }
1107  else {
1108  free(unit);
1109  verbose(1, "JIT compaction failure (%.1fms): Failed to compact methods", end_time - start_time);
1110  }
1111 }
1112 #endif // USE_JIT_COMPACTION
1113 
1114 static void *
1115 load_func_from_so(const char *so_file, const char *funcname, struct rb_mjit_unit *unit)
1116 {
1117  void *handle, *func;
1118 
1119  handle = dlopen(so_file, RTLD_NOW);
1120  if (handle == NULL) {
1121  mjit_warning("failure in loading code from '%s': %s", so_file, dlerror());
1122  return (void *)NOT_COMPILED_JIT_ISEQ_FUNC;
1123  }
1124 
1125  func = dlsym(handle, funcname);
1126  unit->handle = handle;
1127  return func;
1128 }
1129 
1130 #ifndef __clang__
1131 static const char *
1132 header_name_end(const char *s)
1133 {
1134  const char *e = s + strlen(s);
1135 # ifdef __GNUC__ // don't chomp .pch for mswin
1136  static const char suffix[] = ".gch";
1137 
1138  // chomp .gch suffix
1139  if (e > s+sizeof(suffix)-1 && strcmp(e-sizeof(suffix)+1, suffix) == 0) {
1140  e -= sizeof(suffix)-1;
1141  }
1142 # endif
1143  return e;
1144 }
1145 #endif
1146 
1147 // Print platform-specific prerequisites in generated code.
1148 static void
1149 compile_prelude(FILE *f)
1150 {
1151 #ifndef __clang__ // -include-pch is used for Clang
1152  const char *s = pch_file;
1153  const char *e = header_name_end(s);
1154 
1155  fprintf(f, "#include \"");
1156  // print pch_file except .gch for gcc, but keep .pch for mswin
1157  for (; s < e; s++) {
1158  switch (*s) {
1159  case '\\': case '"':
1160  fputc('\\', f);
1161  }
1162  fputc(*s, f);
1163  }
1164  fprintf(f, "\"\n");
1165 #endif
1166 
1167 #ifdef _WIN32
1168  fprintf(f, "void _pei386_runtime_relocator(void){}\n");
1169  fprintf(f, "int __stdcall DllMainCRTStartup(void* hinstDLL, unsigned int fdwReason, void* lpvReserved) { return 1; }\n");
1170 #endif
1171 }
1172 
1173 // Compile ISeq in UNIT and return function pointer of JIT-ed code.
1174 // It may return NOT_COMPILED_JIT_ISEQ_FUNC if something went wrong.
1175 static mjit_func_t
1176 convert_unit_to_func(struct rb_mjit_unit *unit)
1177 {
1178  static const char c_ext[] = ".c";
1179  static const char so_ext[] = DLEXT;
1180  char c_file[MAXPATHLEN], so_file[MAXPATHLEN], funcname[MAXPATHLEN];
1181 
1182  sprint_uniq_filename(c_file, (int)sizeof(c_file), unit->id, MJIT_TMP_PREFIX, c_ext);
1183  sprint_uniq_filename(so_file, (int)sizeof(so_file), unit->id, MJIT_TMP_PREFIX, so_ext);
1184  sprint_funcname(funcname, unit);
1185 
1186  FILE *f;
1187  int fd = rb_cloexec_open(c_file, c_file_access_mode, 0600);
1188  if (fd < 0 || (f = fdopen(fd, "w")) == NULL) {
1189  int e = errno;
1190  if (fd >= 0) (void)close(fd);
1191  verbose(1, "Failed to fopen '%s', giving up JIT for it (%s)", c_file, strerror(e));
1192  return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1193  }
1194 
1195  // print #include of MJIT header, etc.
1196  compile_prelude(f);
1197 
1198  // wait until mjit_gc_exit_hook is called
1199  CRITICAL_SECTION_START(3, "before mjit_compile to wait GC finish");
1200  while (in_gc) {
1201  verbose(3, "Waiting wakeup from GC");
1202  rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
1203  }
1204  // We need to check again here because we could've waited on GC above
1205  in_jit = (unit->iseq != NULL);
1206  if (in_jit)
1207  in_jit &= set_compiling_iseqs(unit->iseq);
1208  CRITICAL_SECTION_FINISH(3, "before mjit_compile to wait GC finish");
1209  if (!in_jit) {
1210  fclose(f);
1211  if (!mjit_opts.save_temps)
1212  remove_file(c_file);
1213  return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1214  }
1215 
1216  // To make MJIT worker thread-safe against GC.compact, copy ISeq values while `in_jit` is true.
1217  long iseq_lineno = 0;
1218  if (FIXNUM_P(unit->iseq->body->location.first_lineno))
1219  // FIX2INT may fallback to rb_num2long(), which is a method call and dangerous in MJIT worker. So using only FIX2LONG.
1220  iseq_lineno = FIX2LONG(unit->iseq->body->location.first_lineno);
1221  char *iseq_label = alloca(RSTRING_LEN(unit->iseq->body->location.label) + 1);
1222  char *iseq_path = alloca(RSTRING_LEN(rb_iseq_path(unit->iseq)) + 1);
1223  strcpy(iseq_label, RSTRING_PTR(unit->iseq->body->location.label));
1224  strcpy(iseq_path, RSTRING_PTR(rb_iseq_path(unit->iseq)));
1225 
1226  verbose(2, "start compilation: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file);
1227  fprintf(f, "/* %s@%s:%ld */\n\n", iseq_label, iseq_path, iseq_lineno);
1228  bool success = mjit_compile(f, unit->iseq, funcname, unit->id);
1229 
1230  // release blocking mjit_gc_start_hook
1231  CRITICAL_SECTION_START(3, "after mjit_compile to wakeup client for GC");
1232  free_compiling_iseqs();
1233  in_jit = false;
1234  verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
1235  rb_native_cond_signal(&mjit_client_wakeup);
1236  CRITICAL_SECTION_FINISH(3, "in worker to wakeup client for GC");
1237 
1238  fclose(f);
1239  if (!success) {
1240  if (!mjit_opts.save_temps)
1241  remove_file(c_file);
1242  verbose(1, "JIT failure: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file);
1243  return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1244  }
1245 
1246  double start_time = real_ms_time();
1247  success = compile_c_to_so(c_file, so_file);
1248  if (!mjit_opts.save_temps)
1249  remove_file(c_file);
1250  double end_time = real_ms_time();
1251 
1252  if (!success) {
1253  verbose(2, "Failed to generate so: %s", so_file);
1254  return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1255  }
1256 
1257  void *func = load_func_from_so(so_file, funcname, unit);
1258  if (!mjit_opts.save_temps)
1259  remove_so_file(so_file, unit);
1260 
1261  if ((uintptr_t)func > (uintptr_t)LAST_JIT_ISEQ_FUNC) {
1262  verbose(1, "JIT success (%.1fms): %s@%s:%ld -> %s",
1263  end_time - start_time, iseq_label, iseq_path, iseq_lineno, c_file);
1264  }
1265  return (mjit_func_t)func;
1266 }
1267 
1268 // To see cc_entries using index returned by `mjit_capture_cc_entries` in mjit_compile.c
1269 const struct rb_callcache **
1270 mjit_iseq_cc_entries(const struct rb_iseq_constant_body *const body)
1271 {
1272  return body->jit_unit->cc_entries;
1273 }
1274 
1275 // Capture cc entries of `captured_iseq` and append them to `compiled_iseq->jit_unit->cc_entries`.
1276 // This is needed when `captured_iseq` is inlined by `compiled_iseq` and GC needs to mark inlined cc.
1277 //
1278 // Index to refer to `compiled_iseq->jit_unit->cc_entries` is returned instead of the address
1279 // because old addresses may be invalidated by `realloc` later. -1 is returned on failure.
1280 //
1281 // This assumes that it's safe to reference cc without acquiring GVL.
1282 int
1283 mjit_capture_cc_entries(const struct rb_iseq_constant_body *compiled_iseq, const struct rb_iseq_constant_body *captured_iseq)
1284 {
1285  struct rb_mjit_unit *unit = compiled_iseq->jit_unit;
1286  unsigned int new_entries_size = unit->cc_entries_size + captured_iseq->ci_size;
1287  VM_ASSERT(captured_iseq->ci_size > 0);
1288 
1289  // Allocate new cc_entries and append them to unit->cc_entries
1290  const struct rb_callcache **cc_entries;
1291  int cc_entries_index = unit->cc_entries_size;
1292  if (unit->cc_entries_size == 0) {
1293  VM_ASSERT(unit->cc_entries == NULL);
1294  unit->cc_entries = cc_entries = malloc(sizeof(struct rb_callcache *) * new_entries_size);
1295  if (cc_entries == NULL) return -1;
1296  }
1297  else {
1298  void *cc_ptr = (void *)unit->cc_entries; // get rid of bogus warning by VC
1299  cc_entries = realloc(cc_ptr, sizeof(struct rb_callcache *) * new_entries_size);
1300  if (cc_entries == NULL) return -1;
1301  unit->cc_entries = cc_entries;
1302  cc_entries += cc_entries_index;
1303  }
1304  unit->cc_entries_size = new_entries_size;
1305 
1306  // Capture cc to cc_enties
1307  for (unsigned int i = 0; i < captured_iseq->ci_size; i++) {
1308  cc_entries[i] = captured_iseq->call_data[i].cc;
1309  }
1310 
1311  return cc_entries_index;
1312 }
1313 
1314 // Set up field `used_code_p` for unit iseqs whose iseq on the stack of ec.
1315 static void
1316 mark_ec_units(rb_execution_context_t *ec)
1317 {
1318  const rb_control_frame_t *cfp;
1319 
1320  if (ec->vm_stack == NULL)
1321  return;
1322  for (cfp = RUBY_VM_END_CONTROL_FRAME(ec) - 1; ; cfp = RUBY_VM_NEXT_CONTROL_FRAME(cfp)) {
1323  const rb_iseq_t *iseq;
1324  if (cfp->pc && (iseq = cfp->iseq) != NULL
1325  && imemo_type((VALUE) iseq) == imemo_iseq
1326  && (iseq->body->jit_unit) != NULL) {
1327  iseq->body->jit_unit->used_code_p = true;
1328  }
1329 
1330  if (cfp == ec->cfp)
1331  break; // reached the most recent cfp
1332  }
1333 }
1334 
1335 // MJIT info related to an existing continutaion.
1336 struct mjit_cont {
1337  rb_execution_context_t *ec; // continuation ec
1338  struct mjit_cont *prev, *next; // used to form lists
1339 };
1340 
1341 // Double linked list of registered continuations. This is used to detect
1342 // units which are in use in unload_units.
1343 static struct mjit_cont *first_cont;
1344 
1345 // Unload JIT code of some units to satisfy the maximum permitted
1346 // number of units with a loaded code.
1347 static void
1348 unload_units(void)
1349 {
1350  struct rb_mjit_unit *unit = 0, *next;
1351  struct mjit_cont *cont;
1352  int units_num = active_units.length;
1353 
1354  // For now, we don't unload units when ISeq is GCed. We should
1355  // unload such ISeqs first here.
1356  list_for_each_safe(&active_units.head, unit, next, unode) {
1357  if (unit->iseq == NULL) { // ISeq is GCed.
1358  remove_from_list(unit, &active_units);
1359  free_unit(unit);
1360  }
1361  }
1362 
1363  // Detect units which are in use and can't be unloaded.
1364  list_for_each(&active_units.head, unit, unode) {
1365  assert(unit->iseq != NULL && unit->handle != NULL);
1366  unit->used_code_p = false;
1367  }
1368  // All threads have a root_fiber which has a mjit_cont. Other normal fibers also
1369  // have a mjit_cont. Thus we can check ISeqs in use by scanning ec of mjit_conts.
1370  for (cont = first_cont; cont != NULL; cont = cont->next) {
1371  mark_ec_units(cont->ec);
1372  }
1373  // TODO: check stale_units and unload unused ones! (note that the unit is not associated to ISeq anymore)
1374 
1375  // Unload units whose total_calls is smaller than any total_calls in unit_queue.
1376  // TODO: make the algorithm more efficient
1377  long unsigned prev_queue_calls = -1;
1378  while (true) {
1379  // Calculate the next max total_calls in unit_queue
1380  long unsigned max_queue_calls = 0;
1381  list_for_each(&unit_queue.head, unit, unode) {
1382  if (unit->iseq != NULL && max_queue_calls < unit->iseq->body->total_calls
1383  && unit->iseq->body->total_calls < prev_queue_calls) {
1384  max_queue_calls = unit->iseq->body->total_calls;
1385  }
1386  }
1387  prev_queue_calls = max_queue_calls;
1388 
1389  bool unloaded_p = false;
1390  list_for_each_safe(&active_units.head, unit, next, unode) {
1391  if (unit->used_code_p) // We can't unload code on stack.
1392  continue;
1393 
1394  if (max_queue_calls > unit->iseq->body->total_calls) {
1395  verbose(2, "Unloading unit %d (calls=%lu, threshold=%lu)",
1396  unit->id, unit->iseq->body->total_calls, max_queue_calls);
1397  assert(unit->handle != NULL);
1398  remove_from_list(unit, &active_units);
1399  free_unit(unit);
1400  unloaded_p = true;
1401  }
1402  }
1403  if (!unloaded_p) break;
1404  }
1405 
1406  if (units_num > active_units.length) {
1407  verbose(1, "Too many JIT code -- %d units unloaded", units_num - active_units.length);
1408  total_unloads += units_num - active_units.length;
1409  }
1410 }
1411 
1412 static void mjit_add_iseq_to_process(const rb_iseq_t *iseq, const struct rb_mjit_compile_info *compile_info, bool worker_p);
1413 
1414 // The function implementing a worker. It is executed in a separate
1415 // thread by rb_thread_create_mjit_thread. It compiles precompiled header
1416 // and then compiles requested ISeqs.
1417 void
1418 mjit_worker(void)
1419 {
1420  // Allow only `max_cache_size / 100` times (default: 100) of compaction.
1421  // Note: GC of compacted code has not been implemented yet.
1422  int max_compact_size = mjit_opts.max_cache_size / 100;
1423  if (max_compact_size < 10) max_compact_size = 10;
1424 
1425  // Run unload_units after it's requested `max_cache_size / 10` (default: 10) times.
1426  // This throttles the call to mitigate locking in unload_units. It also throttles JIT compaction.
1427  int throttle_threshold = mjit_opts.max_cache_size / 10;
1428 
1429 #ifndef _MSC_VER
1430  if (pch_status == PCH_NOT_READY) {
1431  make_pch();
1432  }
1433 #endif
1434  if (pch_status == PCH_FAILED) {
1435  mjit_enabled = false;
1436  CRITICAL_SECTION_START(3, "in worker to update worker_stopped");
1437  worker_stopped = true;
1438  verbose(3, "Sending wakeup signal to client in a mjit-worker");
1439  rb_native_cond_signal(&mjit_client_wakeup);
1440  CRITICAL_SECTION_FINISH(3, "in worker to update worker_stopped");
1441  return; // TODO: do the same thing in the latter half of mjit_finish
1442  }
1443 
1444  // main worker loop
1445  while (!stop_worker_p) {
1446  struct rb_mjit_unit *unit;
1447 
1448  // Wait until a unit becomes available
1449  CRITICAL_SECTION_START(3, "in worker dequeue");
1450  while ((list_empty(&unit_queue.head) || active_units.length >= mjit_opts.max_cache_size) && !stop_worker_p) {
1451  rb_native_cond_wait(&mjit_worker_wakeup, &mjit_engine_mutex);
1452  verbose(3, "Getting wakeup from client");
1453 
1454  // Lazily move active_units to stale_units to avoid race conditions around active_units with compaction
1455  if (pending_stale_p) {
1456  pending_stale_p = false;
1457  struct rb_mjit_unit *next;
1458  list_for_each_safe(&active_units.head, unit, next, unode) {
1459  if (unit->stale_p) {
1460  unit->stale_p = false;
1461  remove_from_list(unit, &active_units);
1462  add_to_list(unit, &stale_units);
1463  // Lazily put it to unit_queue as well to avoid race conditions on jit_unit with mjit_compile.
1464  mjit_add_iseq_to_process(unit->iseq, &unit->iseq->body->jit_unit->compile_info, true);
1465  }
1466  }
1467  }
1468 
1469  // Unload some units as needed
1470  if (unload_requests >= throttle_threshold) {
1471  while (in_gc) {
1472  verbose(3, "Waiting wakeup from GC");
1473  rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
1474  }
1475  in_jit = true; // Lock GC
1476 
1477  RB_DEBUG_COUNTER_INC(mjit_unload_units);
1478  unload_units();
1479  unload_requests = 0;
1480 
1481  in_jit = false; // Unlock GC
1482  verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
1483  rb_native_cond_signal(&mjit_client_wakeup);
1484  }
1485  if (active_units.length == mjit_opts.max_cache_size && mjit_opts.wait) { // Sometimes all methods may be in use
1486  mjit_opts.max_cache_size++; // avoid infinite loop on `rb_mjit_wait_call`. Note that --jit-wait is just for testing.
1487  verbose(1, "No units can be unloaded -- incremented max-cache-size to %d for --jit-wait", mjit_opts.max_cache_size);
1488  }
1489  }
1490  unit = get_from_list(&unit_queue);
1491  CRITICAL_SECTION_FINISH(3, "in worker dequeue");
1492 
1493  if (unit) {
1494  // JIT compile
1495  mjit_func_t func = convert_unit_to_func(unit);
1496  (void)RB_DEBUG_COUNTER_INC_IF(mjit_compile_failures, func == (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC);
1497 
1498  CRITICAL_SECTION_START(3, "in jit func replace");
1499  while (in_gc) { // Make sure we're not GC-ing when touching ISeq
1500  verbose(3, "Waiting wakeup from GC");
1501  rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
1502  }
1503  if (unit->iseq) { // Check whether GCed or not
1504  if ((uintptr_t)func > (uintptr_t)LAST_JIT_ISEQ_FUNC) {
1505  add_to_list(unit, &active_units);
1506  }
1507  // Usage of jit_code might be not in a critical section.
1508  MJIT_ATOMIC_SET(unit->iseq->body->jit_func, func);
1509  }
1510  else {
1511  free_unit(unit);
1512  }
1513  CRITICAL_SECTION_FINISH(3, "in jit func replace");
1514 
1515 #if USE_JIT_COMPACTION
1516  // Combine .o files to one .so and reload all jit_func to improve memory locality.
1517  if (compact_units.length < max_compact_size
1518  && ((!mjit_opts.wait && unit_queue.length == 0 && active_units.length > 1)
1519  || (active_units.length == mjit_opts.max_cache_size && compact_units.length * throttle_threshold <= total_unloads))) { // throttle compaction by total_unloads
1520  compact_all_jit_code();
1521  }
1522 #endif
1523  }
1524  }
1525 
1526  // To keep mutex unlocked when it is destroyed by mjit_finish, don't wrap CRITICAL_SECTION here.
1527  worker_stopped = true;
1528 }
#define FIX2LONG
Old name of RB_FIX2LONG.
Definition: long.h:46
#define FIXNUM_P
Old name of RB_FIXNUM_P.
Defines RBIMPL_HAS_BUILTIN.
int rb_cloexec_open(const char *pathname, int flags, mode_t mode)
Opens a file that closes on exec.
Definition: io.c:314
#define rb_strlen_lit(str)
Length of a string literal.
Definition: string.h:1756
#define strdup(s)
Just another name of ruby_strdup.
Definition: util.h:176
#define RUBY_API_VERSION_TEENY
Teeny version.
Definition: version.h:76
#define RUBY_API_VERSION_MAJOR
Major version.
Definition: version.h:64
#define RUBY_API_VERSION_MINOR
Minor version.
Definition: version.h:70
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition: memory.h:366
#define PRI_PIDT_PREFIX
A rb_sprintf() format prefix to be used for a pid_t parameter.
Definition: pid_t.h:38
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition: rstring.h:82
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
Definition: rstring.h:497
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
Definition: rstring.h:483
void rb_native_mutex_lock(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_lock.
void rb_native_cond_initialize(rb_nativethread_cond_t *cond)
Fills the passed condition variable with an initial value.
void rb_native_cond_broadcast(rb_nativethread_cond_t *cond)
Signals a condition variable.
void rb_native_mutex_initialize(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_initialize.
void rb_native_mutex_unlock(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_unlock.
void rb_native_mutex_destroy(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_destroy.
void rb_native_cond_destroy(rb_nativethread_cond_t *cond)
Destroys the passed condition variable.
void rb_native_cond_signal(rb_nativethread_cond_t *cond)
Signals a condition variable.
void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *mutex)
Waits for the passed condition variable to be signalled.
uintptr_t VALUE
Type that represents a Ruby object.
Definition: value.h:40
#define RBIMPL_WARNING_IGNORED(flag)
Suppresses a warning.
Definition: warning_push.h:80
#define RBIMPL_WARNING_PUSH()
Pushes compiler warning state.
Definition: warning_push.h:55
#define RBIMPL_WARNING_POP()
Pops compiler warning state.
Definition: warning_push.h:62