Ruby  3.1.4p223 (2023-03-30 revision HEAD)
thread_pthread.c
1 /* -*-c-*- */
2 /**********************************************************************
3 
4  thread_pthread.c -
5 
6  $Author$
7 
8  Copyright (C) 2004-2007 Koichi Sasada
9 
10 **********************************************************************/
11 
12 #ifdef THREAD_SYSTEM_DEPENDENT_IMPLEMENTATION
13 
14 #include "gc.h"
15 #include "mjit.h"
16 
17 #ifdef HAVE_SYS_RESOURCE_H
18 #include <sys/resource.h>
19 #endif
20 #ifdef HAVE_THR_STKSEGMENT
21 #include <thread.h>
22 #endif
23 #if defined(HAVE_FCNTL_H)
24 #include <fcntl.h>
25 #elif defined(HAVE_SYS_FCNTL_H)
26 #include <sys/fcntl.h>
27 #endif
28 #ifdef HAVE_SYS_PRCTL_H
29 #include <sys/prctl.h>
30 #endif
31 #if defined(HAVE_SYS_TIME_H)
32 #include <sys/time.h>
33 #endif
34 #if defined(__HAIKU__)
35 #include <kernel/OS.h>
36 #endif
37 #ifdef __linux__
38 #include <sys/syscall.h> /* for SYS_gettid */
39 #endif
40 #include <time.h>
41 #include <signal.h>
42 
43 #if defined(HAVE_SYS_EVENTFD_H) && defined(HAVE_EVENTFD)
44 # define USE_EVENTFD (1)
45 # include <sys/eventfd.h>
46 #else
47 # define USE_EVENTFD (0)
48 #endif
49 
50 #if defined(SIGVTALRM) && !defined(__CYGWIN__) && !defined(__EMSCRIPTEN__)
51 # define USE_UBF_LIST 1
52 #endif
53 
54 /*
55  * UBF_TIMER and ubf_list both use SIGVTALRM.
56  *
57  * UBF_TIMER has NOTHING to do with thread timeslices (TIMER_INTERRUPT_MASK)
58  *
59  * UBF_TIMER is to close TOCTTOU signal race on programs where we
60  * cannot rely on GVL contention (vm->gvl.timer) to perform wakeups
61  * while a thread is doing blocking I/O on sockets or pipes. With
62  * rb_thread_call_without_gvl and similar functions:
63  *
64  * (1) Check interrupts.
65  * (2) release GVL.
66  * (2a) signal received
67  * (3) call func with data1 (blocks for a long time without ubf_timer)
68  * (4) acquire GVL.
69  * Other Ruby threads can not run in parallel any more.
70  * (5) Check interrupts.
71  *
72  * We need UBF_TIMER to break out of (3) if (2a) happens.
73  *
74  * ubf_list wakeups may be triggered on gvl_yield.
75  *
76  * If we have vm->gvl.timer (on GVL contention), we don't need UBF_TIMER
77  * as it can perform the same tasks while doing timeslices.
78  */
79 #define UBF_TIMER_NONE 0
80 #define UBF_TIMER_POSIX 1
81 #define UBF_TIMER_PTHREAD 2
82 
83 #ifndef UBF_TIMER
84 # if defined(HAVE_TIMER_SETTIME) && defined(HAVE_TIMER_CREATE) && \
85  defined(CLOCK_MONOTONIC) && defined(USE_UBF_LIST)
86  /* preferred */
87 # define UBF_TIMER UBF_TIMER_POSIX
88 # elif defined(USE_UBF_LIST)
89  /* safe, but inefficient */
90 # define UBF_TIMER UBF_TIMER_PTHREAD
91 # else
92  /* we'll be racy without SIGVTALRM for ubf_list */
93 # define UBF_TIMER UBF_TIMER_NONE
94 # endif
95 #endif
96 
97 enum rtimer_state {
98  /* alive, after timer_create: */
99  RTIMER_DISARM,
100  RTIMER_ARMING,
101  RTIMER_ARMED,
102 
103  RTIMER_DEAD
104 };
105 
106 #if UBF_TIMER == UBF_TIMER_POSIX
107 static const struct itimerspec zero;
108 static struct {
109  rb_atomic_t state_; /* rtimer_state */
110  rb_pid_t owner;
111  timer_t timerid;
112 } timer_posix = {
113  /* .state = */ RTIMER_DEAD,
114 };
115 
116 #define TIMER_STATE_DEBUG 0
117 
118 static const char *
119 rtimer_state_name(enum rtimer_state state)
120 {
121  switch (state) {
122  case RTIMER_DISARM: return "disarm";
123  case RTIMER_ARMING: return "arming";
124  case RTIMER_ARMED: return "armed";
125  case RTIMER_DEAD: return "dead";
126  default: rb_bug("unreachable");
127  }
128 }
129 
130 static enum rtimer_state
131 timer_state_exchange(enum rtimer_state state)
132 {
133  enum rtimer_state prev = ATOMIC_EXCHANGE(timer_posix.state_, state);
134  if (TIMER_STATE_DEBUG) fprintf(stderr, "state (exc): %s->%s\n", rtimer_state_name(prev), rtimer_state_name(state));
135  return prev;
136 }
137 
138 static enum rtimer_state
139 timer_state_cas(enum rtimer_state expected_prev, enum rtimer_state state)
140 {
141  enum rtimer_state prev = ATOMIC_CAS(timer_posix.state_, expected_prev, state);
142 
143  if (TIMER_STATE_DEBUG) {
144  if (prev == expected_prev) {
145  fprintf(stderr, "state (cas): %s->%s\n", rtimer_state_name(prev), rtimer_state_name(state));
146  }
147  else {
148  fprintf(stderr, "state (cas): %s (expected:%s)\n", rtimer_state_name(prev), rtimer_state_name(expected_prev));
149  }
150  }
151 
152  return prev;
153 }
154 
155 #elif UBF_TIMER == UBF_TIMER_PTHREAD
156 static void *timer_pthread_fn(void *);
157 static struct {
158  int low[2];
159  rb_atomic_t armed; /* boolean */
160  rb_pid_t owner;
161  pthread_t thid;
162 } timer_pthread = {
163  { -1, -1 },
164 };
165 #endif
166 
167 static const rb_hrtime_t *sigwait_timeout(rb_thread_t *, int sigwait_fd,
168  const rb_hrtime_t *,
169  int *drained_p);
170 static void ubf_timer_disarm(void);
171 static void threadptr_trap_interrupt(rb_thread_t *);
172 static void clear_thread_cache_altstack(void);
173 static void ubf_wakeup_all_threads(void);
174 static int ubf_threads_empty(void);
175 
176 #define TIMER_THREAD_CREATED_P() (signal_self_pipe.owner_process == getpid())
177 
178 /* for testing, and in case we come across a platform w/o pipes: */
179 #define BUSY_WAIT_SIGNALS (0)
180 
181 /*
182  * sigwait_th is the thread which owns sigwait_fd and sleeps on it
183  * (using ppoll). MJIT worker can be sigwait_th==0, so we initialize
184  * it to THREAD_INVALID at startup and fork time. It is the ONLY thread
185  * allowed to read from sigwait_fd, otherwise starvation can occur.
186  */
187 #define THREAD_INVALID ((const rb_thread_t *)-1)
188 static const rb_thread_t *sigwait_th;
189 
190 #ifdef HAVE_SCHED_YIELD
191 #define native_thread_yield() (void)sched_yield()
192 #else
193 #define native_thread_yield() ((void)0)
194 #endif
195 
196 #if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) && \
197  defined(CLOCK_REALTIME) && defined(CLOCK_MONOTONIC) && \
198  defined(HAVE_CLOCK_GETTIME)
199 static pthread_condattr_t condattr_mono;
200 static pthread_condattr_t *condattr_monotonic = &condattr_mono;
201 #else
202 static const void *const condattr_monotonic = NULL;
203 #endif
204 
205 /* 100ms. 10ms is too small for user level thread scheduling
206  * on recent Linux (tested on 2.6.35)
207  */
208 #define TIME_QUANTUM_MSEC (100)
209 #define TIME_QUANTUM_USEC (TIME_QUANTUM_MSEC * 1000)
210 #define TIME_QUANTUM_NSEC (TIME_QUANTUM_USEC * 1000)
211 
212 static rb_hrtime_t native_cond_timeout(rb_nativethread_cond_t *, rb_hrtime_t);
213 static int native_cond_timedwait(rb_nativethread_cond_t *cond, pthread_mutex_t *mutex, const rb_hrtime_t *abs);
214 
215 /*
216  * Designate the next gvl.timer thread, favor the last thread in
217  * the waitq since it will be in waitq longest
218  */
219 static int
220 designate_timer_thread(rb_global_vm_lock_t *gvl)
221 {
222  native_thread_data_t *last;
223 
224  last = list_tail(&gvl->waitq, native_thread_data_t, node.ubf);
225  if (last) {
226  rb_native_cond_signal(&last->cond.gvlq);
227  return TRUE;
228  }
229  return FALSE;
230 }
231 
232 /*
233  * We become designated timer thread to kick vm->gvl.owner
234  * periodically. Continue on old timeout if it expired.
235  */
236 static void
237 do_gvl_timer(rb_global_vm_lock_t *gvl, rb_thread_t *th)
238 {
239  rb_vm_t *vm = GET_VM();
240  static rb_hrtime_t abs;
241  native_thread_data_t *nd = &th->native_thread_data;
242 
243  gvl->timer = th;
244 
245  /* take over wakeups from UBF_TIMER */
246  ubf_timer_disarm();
247 
248  if (gvl->timer_err == ETIMEDOUT) {
249  abs = native_cond_timeout(&nd->cond.gvlq, TIME_QUANTUM_NSEC);
250  }
251  gvl->timer_err = native_cond_timedwait(&nd->cond.gvlq, &gvl->lock, &abs);
252 
253  ubf_wakeup_all_threads();
254  ruby_sigchld_handler(vm);
255 
256  if (UNLIKELY(rb_signal_buff_size())) {
257  if (th == vm->ractor.main_thread) {
258  RUBY_VM_SET_TRAP_INTERRUPT(th->ec);
259  }
260  else {
261  threadptr_trap_interrupt(vm->ractor.main_thread);
262  }
263  }
264 
265  /*
266  * Timeslice. Warning: the process may fork while this
267  * thread is contending for GVL:
268  */
269  if (gvl->owner) {
270  // strictly speaking, accessing "gvl->owner" is not thread-safe
271  RUBY_VM_SET_TIMER_INTERRUPT(gvl->owner->ec);
272  }
273  gvl->timer = 0;
274 }
275 
276 static void
277 gvl_acquire_common(rb_global_vm_lock_t *gvl, rb_thread_t *th)
278 {
279  if (gvl->owner) {
280  native_thread_data_t *nd = &th->native_thread_data;
281 
282  VM_ASSERT(th->unblock.func == 0 &&
283  "we must not be in ubf_list and GVL waitq at the same time");
284 
285  list_add_tail(&gvl->waitq, &nd->node.gvl);
286 
287  do {
288  if (!gvl->timer) {
289  do_gvl_timer(gvl, th);
290  }
291  else {
292  rb_native_cond_wait(&nd->cond.gvlq, &gvl->lock);
293  }
294  } while (gvl->owner);
295 
296  list_del_init(&nd->node.gvl);
297 
298  if (gvl->need_yield) {
299  gvl->need_yield = 0;
300  rb_native_cond_signal(&gvl->switch_cond);
301  }
302  }
303  else { /* reset timer if uncontended */
304  gvl->timer_err = ETIMEDOUT;
305  }
306  gvl->owner = th;
307  if (!gvl->timer) {
308  if (!designate_timer_thread(gvl) && !ubf_threads_empty()) {
309  rb_thread_wakeup_timer_thread(-1);
310  }
311  }
312 }
313 
314 static void
315 gvl_acquire(rb_global_vm_lock_t *gvl, rb_thread_t *th)
316 {
317  rb_native_mutex_lock(&gvl->lock);
318  gvl_acquire_common(gvl, th);
319  rb_native_mutex_unlock(&gvl->lock);
320 }
321 
322 static const native_thread_data_t *
323 gvl_release_common(rb_global_vm_lock_t *gvl)
324 {
325  native_thread_data_t *next;
326  gvl->owner = 0;
327  next = list_top(&gvl->waitq, native_thread_data_t, node.ubf);
328  if (next) rb_native_cond_signal(&next->cond.gvlq);
329 
330  return next;
331 }
332 
333 static void
334 gvl_release(rb_global_vm_lock_t *gvl)
335 {
336  rb_native_mutex_lock(&gvl->lock);
337  gvl_release_common(gvl);
338  rb_native_mutex_unlock(&gvl->lock);
339 }
340 
341 static void
342 gvl_yield(rb_global_vm_lock_t *gvl, rb_thread_t *th)
343 {
344  const native_thread_data_t *next;
345 
346  /*
347  * Perhaps other threads are stuck in blocking region w/o GVL, too,
348  * (perhaps looping in io_close_fptr) so we kick them:
349  */
350  ubf_wakeup_all_threads();
351  rb_native_mutex_lock(&gvl->lock);
352  next = gvl_release_common(gvl);
353 
354  /* An another thread is processing GVL yield. */
355  if (UNLIKELY(gvl->wait_yield)) {
356  while (gvl->wait_yield)
357  rb_native_cond_wait(&gvl->switch_wait_cond, &gvl->lock);
358  }
359  else if (next) {
360  /* Wait until another thread task takes GVL. */
361  gvl->need_yield = 1;
362  gvl->wait_yield = 1;
363  while (gvl->need_yield)
364  rb_native_cond_wait(&gvl->switch_cond, &gvl->lock);
365  gvl->wait_yield = 0;
366  rb_native_cond_broadcast(&gvl->switch_wait_cond);
367  }
368  else {
369  rb_native_mutex_unlock(&gvl->lock);
370  native_thread_yield();
371  rb_native_mutex_lock(&gvl->lock);
372  rb_native_cond_broadcast(&gvl->switch_wait_cond);
373  }
374  gvl_acquire_common(gvl, th);
375  rb_native_mutex_unlock(&gvl->lock);
376 }
377 
378 void
379 rb_gvl_init(rb_global_vm_lock_t *gvl)
380 {
381  rb_native_mutex_initialize(&gvl->lock);
382  rb_native_cond_initialize(&gvl->switch_cond);
383  rb_native_cond_initialize(&gvl->switch_wait_cond);
384  list_head_init(&gvl->waitq);
385  gvl->owner = 0;
386  gvl->timer = 0;
387  gvl->timer_err = ETIMEDOUT;
388  gvl->need_yield = 0;
389  gvl->wait_yield = 0;
390 }
391 
392 static void
393 gvl_destroy(rb_global_vm_lock_t *gvl)
394 {
395  /*
396  * only called once at VM shutdown (not atfork), another thread
397  * may still grab vm->gvl.lock when calling gvl_release at
398  * the end of thread_start_func_2
399  */
400  if (0) {
401  rb_native_cond_destroy(&gvl->switch_wait_cond);
402  rb_native_cond_destroy(&gvl->switch_cond);
403  rb_native_mutex_destroy(&gvl->lock);
404  }
405  clear_thread_cache_altstack();
406 }
407 
408 #if defined(HAVE_WORKING_FORK)
409 static void thread_cache_reset(void);
410 static void
411 gvl_atfork(rb_global_vm_lock_t *gvl)
412 {
413  thread_cache_reset();
414  rb_gvl_init(gvl);
415  gvl_acquire(gvl, GET_THREAD());
416 }
417 #endif
418 
419 #define NATIVE_MUTEX_LOCK_DEBUG 0
420 
421 static void
422 mutex_debug(const char *msg, void *lock)
423 {
424  if (NATIVE_MUTEX_LOCK_DEBUG) {
425  int r;
426  static pthread_mutex_t dbglock = PTHREAD_MUTEX_INITIALIZER;
427 
428  if ((r = pthread_mutex_lock(&dbglock)) != 0) {exit(EXIT_FAILURE);}
429  fprintf(stdout, "%s: %p\n", msg, lock);
430  if ((r = pthread_mutex_unlock(&dbglock)) != 0) {exit(EXIT_FAILURE);}
431  }
432 }
433 
434 void
435 rb_native_mutex_lock(pthread_mutex_t *lock)
436 {
437  int r;
438  mutex_debug("lock", lock);
439  if ((r = pthread_mutex_lock(lock)) != 0) {
440  rb_bug_errno("pthread_mutex_lock", r);
441  }
442 }
443 
444 void
445 rb_native_mutex_unlock(pthread_mutex_t *lock)
446 {
447  int r;
448  mutex_debug("unlock", lock);
449  if ((r = pthread_mutex_unlock(lock)) != 0) {
450  rb_bug_errno("pthread_mutex_unlock", r);
451  }
452 }
453 
454 int
455 rb_native_mutex_trylock(pthread_mutex_t *lock)
456 {
457  int r;
458  mutex_debug("trylock", lock);
459  if ((r = pthread_mutex_trylock(lock)) != 0) {
460  if (r == EBUSY) {
461  return EBUSY;
462  }
463  else {
464  rb_bug_errno("pthread_mutex_trylock", r);
465  }
466  }
467  return 0;
468 }
469 
470 void
471 rb_native_mutex_initialize(pthread_mutex_t *lock)
472 {
473  int r = pthread_mutex_init(lock, 0);
474  mutex_debug("init", lock);
475  if (r != 0) {
476  rb_bug_errno("pthread_mutex_init", r);
477  }
478 }
479 
480 void
481 rb_native_mutex_destroy(pthread_mutex_t *lock)
482 {
483  int r = pthread_mutex_destroy(lock);
484  mutex_debug("destroy", lock);
485  if (r != 0) {
486  rb_bug_errno("pthread_mutex_destroy", r);
487  }
488 }
489 
490 void
491 rb_native_cond_initialize(rb_nativethread_cond_t *cond)
492 {
493  int r = pthread_cond_init(cond, condattr_monotonic);
494  if (r != 0) {
495  rb_bug_errno("pthread_cond_init", r);
496  }
497 }
498 
499 void
500 rb_native_cond_destroy(rb_nativethread_cond_t *cond)
501 {
502  int r = pthread_cond_destroy(cond);
503  if (r != 0) {
504  rb_bug_errno("pthread_cond_destroy", r);
505  }
506 }
507 
508 /*
509  * In OS X 10.7 (Lion), pthread_cond_signal and pthread_cond_broadcast return
510  * EAGAIN after retrying 8192 times. You can see them in the following page:
511  *
512  * http://www.opensource.apple.com/source/Libc/Libc-763.11/pthreads/pthread_cond.c
513  *
514  * The following rb_native_cond_signal and rb_native_cond_broadcast functions
515  * need to retrying until pthread functions don't return EAGAIN.
516  */
517 
518 void
519 rb_native_cond_signal(rb_nativethread_cond_t *cond)
520 {
521  int r;
522  do {
523  r = pthread_cond_signal(cond);
524  } while (r == EAGAIN);
525  if (r != 0) {
526  rb_bug_errno("pthread_cond_signal", r);
527  }
528 }
529 
530 void
531 rb_native_cond_broadcast(rb_nativethread_cond_t *cond)
532 {
533  int r;
534  do {
535  r = pthread_cond_broadcast(cond);
536  } while (r == EAGAIN);
537  if (r != 0) {
538  rb_bug_errno("rb_native_cond_broadcast", r);
539  }
540 }
541 
542 void
543 rb_native_cond_wait(rb_nativethread_cond_t *cond, pthread_mutex_t *mutex)
544 {
545  int r = pthread_cond_wait(cond, mutex);
546  if (r != 0) {
547  rb_bug_errno("pthread_cond_wait", r);
548  }
549 }
550 
551 static int
552 native_cond_timedwait(rb_nativethread_cond_t *cond, pthread_mutex_t *mutex, const rb_hrtime_t *abs)
553 {
554  int r;
555  struct timespec ts;
556 
557  /*
558  * An old Linux may return EINTR. Even though POSIX says
559  * "These functions shall not return an error code of [EINTR]".
560  * http://pubs.opengroup.org/onlinepubs/009695399/functions/pthread_cond_timedwait.html
561  * Let's hide it from arch generic code.
562  */
563  do {
564  rb_hrtime2timespec(&ts, abs);
565  r = pthread_cond_timedwait(cond, mutex, &ts);
566  } while (r == EINTR);
567 
568  if (r != 0 && r != ETIMEDOUT) {
569  rb_bug_errno("pthread_cond_timedwait", r);
570  }
571 
572  return r;
573 }
574 
575 void
576 rb_native_cond_timedwait(rb_nativethread_cond_t *cond, pthread_mutex_t *mutex, unsigned long msec)
577 {
578  rb_hrtime_t hrmsec = native_cond_timeout(cond, RB_HRTIME_PER_MSEC * msec);
579  native_cond_timedwait(cond, mutex, &hrmsec);
580 }
581 
582 static rb_hrtime_t
583 native_cond_timeout(rb_nativethread_cond_t *cond, const rb_hrtime_t rel)
584 {
585  if (condattr_monotonic) {
586  return rb_hrtime_add(rb_hrtime_now(), rel);
587  }
588  else {
589  struct timespec ts;
590 
591  rb_timespec_now(&ts);
592  return rb_hrtime_add(rb_timespec2hrtime(&ts), rel);
593  }
594 }
595 
596 #define native_cleanup_push pthread_cleanup_push
597 #define native_cleanup_pop pthread_cleanup_pop
598 
599 #ifdef RB_THREAD_LOCAL_SPECIFIER
600 static RB_THREAD_LOCAL_SPECIFIER rb_thread_t *ruby_native_thread;
601 #else
602 static pthread_key_t ruby_native_thread_key;
603 #endif
604 
605 static void
606 null_func(int i)
607 {
608  /* null */
609 }
610 
611 rb_thread_t *
612 ruby_thread_from_native(void)
613 {
614 #ifdef RB_THREAD_LOCAL_SPECIFIER
615  return ruby_native_thread;
616 #else
617  return pthread_getspecific(ruby_native_thread_key);
618 #endif
619 }
620 
621 int
622 ruby_thread_set_native(rb_thread_t *th)
623 {
624  if (th && th->ec) {
625  rb_ractor_set_current_ec(th->ractor, th->ec);
626  }
627 #ifdef RB_THREAD_LOCAL_SPECIFIER
628  ruby_native_thread = th;
629  return 1;
630 #else
631  return pthread_setspecific(ruby_native_thread_key, th) == 0;
632 #endif
633 }
634 
635 static void native_thread_init(rb_thread_t *th);
636 
637 void
638 Init_native_thread(rb_thread_t *th)
639 {
640 #if defined(HAVE_PTHREAD_CONDATTR_SETCLOCK)
641  if (condattr_monotonic) {
642  int r = pthread_condattr_init(condattr_monotonic);
643  if (r == 0) {
644  r = pthread_condattr_setclock(condattr_monotonic, CLOCK_MONOTONIC);
645  }
646  if (r) condattr_monotonic = NULL;
647  }
648 #endif
649 
650 #ifndef RB_THREAD_LOCAL_SPECIFIER
651  if (pthread_key_create(&ruby_native_thread_key, 0) == EAGAIN) {
652  rb_bug("pthread_key_create failed (ruby_native_thread_key)");
653  }
654  if (pthread_key_create(&ruby_current_ec_key, 0) == EAGAIN) {
655  rb_bug("pthread_key_create failed (ruby_current_ec_key)");
656  }
657 #endif
658  th->thread_id = pthread_self();
659  ruby_thread_set_native(th);
660  fill_thread_id_str(th);
661  native_thread_init(th);
662  posix_signal(SIGVTALRM, null_func);
663 }
664 
665 #ifdef RB_THREAD_T_HAS_NATIVE_ID
666 static int
667 get_native_thread_id(void)
668 {
669 #ifdef __linux__
670  return (int)syscall(SYS_gettid);
671 #elif defined(__FreeBSD__)
672  return pthread_getthreadid_np();
673 #endif
674 }
675 #endif
676 
677 static void
678 native_thread_init(rb_thread_t *th)
679 {
680  native_thread_data_t *nd = &th->native_thread_data;
681 
682 #ifdef RB_THREAD_T_HAS_NATIVE_ID
683  th->tid = get_native_thread_id();
684 #endif
685 #ifdef USE_UBF_LIST
686  list_node_init(&nd->node.ubf);
687 #endif
688  rb_native_cond_initialize(&nd->cond.gvlq);
689  if (&nd->cond.gvlq != &nd->cond.intr)
690  rb_native_cond_initialize(&nd->cond.intr);
691 }
692 
693 #ifndef USE_THREAD_CACHE
694 #define USE_THREAD_CACHE 1
695 #endif
696 
697 static void
698 native_thread_destroy(rb_thread_t *th)
699 {
700  native_thread_data_t *nd = &th->native_thread_data;
701 
702  rb_native_cond_destroy(&nd->cond.gvlq);
703  if (&nd->cond.gvlq != &nd->cond.intr)
704  rb_native_cond_destroy(&nd->cond.intr);
705 
706  /*
707  * prevent false positive from ruby_thread_has_gvl_p if that
708  * gets called from an interposing function wrapper
709  */
710  if (USE_THREAD_CACHE)
711  ruby_thread_set_native(0);
712 }
713 
714 #if USE_THREAD_CACHE
715 static rb_thread_t *register_cached_thread_and_wait(void *);
716 #endif
717 
718 #if defined HAVE_PTHREAD_GETATTR_NP || defined HAVE_PTHREAD_ATTR_GET_NP
719 #define STACKADDR_AVAILABLE 1
720 #elif defined HAVE_PTHREAD_GET_STACKADDR_NP && defined HAVE_PTHREAD_GET_STACKSIZE_NP
721 #define STACKADDR_AVAILABLE 1
722 #undef MAINSTACKADDR_AVAILABLE
723 #define MAINSTACKADDR_AVAILABLE 1
724 void *pthread_get_stackaddr_np(pthread_t);
725 size_t pthread_get_stacksize_np(pthread_t);
726 #elif defined HAVE_THR_STKSEGMENT || defined HAVE_PTHREAD_STACKSEG_NP
727 #define STACKADDR_AVAILABLE 1
728 #elif defined HAVE_PTHREAD_GETTHRDS_NP
729 #define STACKADDR_AVAILABLE 1
730 #elif defined __HAIKU__
731 #define STACKADDR_AVAILABLE 1
732 #endif
733 
734 #ifndef MAINSTACKADDR_AVAILABLE
735 # ifdef STACKADDR_AVAILABLE
736 # define MAINSTACKADDR_AVAILABLE 1
737 # else
738 # define MAINSTACKADDR_AVAILABLE 0
739 # endif
740 #endif
741 #if MAINSTACKADDR_AVAILABLE && !defined(get_main_stack)
742 # define get_main_stack(addr, size) get_stack(addr, size)
743 #endif
744 
745 #ifdef STACKADDR_AVAILABLE
746 /*
747  * Get the initial address and size of current thread's stack
748  */
749 static int
750 get_stack(void **addr, size_t *size)
751 {
752 #define CHECK_ERR(expr) \
753  {int err = (expr); if (err) return err;}
754 #ifdef HAVE_PTHREAD_GETATTR_NP /* Linux */
755  pthread_attr_t attr;
756  size_t guard = 0;
757  STACK_GROW_DIR_DETECTION;
758  CHECK_ERR(pthread_getattr_np(pthread_self(), &attr));
759 # ifdef HAVE_PTHREAD_ATTR_GETSTACK
760  CHECK_ERR(pthread_attr_getstack(&attr, addr, size));
761  STACK_DIR_UPPER((void)0, (void)(*addr = (char *)*addr + *size));
762 # else
763  CHECK_ERR(pthread_attr_getstackaddr(&attr, addr));
764  CHECK_ERR(pthread_attr_getstacksize(&attr, size));
765 # endif
766 # ifdef HAVE_PTHREAD_ATTR_GETGUARDSIZE
767  CHECK_ERR(pthread_attr_getguardsize(&attr, &guard));
768 # else
769  guard = getpagesize();
770 # endif
771  *size -= guard;
772  pthread_attr_destroy(&attr);
773 #elif defined HAVE_PTHREAD_ATTR_GET_NP /* FreeBSD, DragonFly BSD, NetBSD */
774  pthread_attr_t attr;
775  CHECK_ERR(pthread_attr_init(&attr));
776  CHECK_ERR(pthread_attr_get_np(pthread_self(), &attr));
777 # ifdef HAVE_PTHREAD_ATTR_GETSTACK
778  CHECK_ERR(pthread_attr_getstack(&attr, addr, size));
779 # else
780  CHECK_ERR(pthread_attr_getstackaddr(&attr, addr));
781  CHECK_ERR(pthread_attr_getstacksize(&attr, size));
782 # endif
783  STACK_DIR_UPPER((void)0, (void)(*addr = (char *)*addr + *size));
784  pthread_attr_destroy(&attr);
785 #elif (defined HAVE_PTHREAD_GET_STACKADDR_NP && defined HAVE_PTHREAD_GET_STACKSIZE_NP) /* MacOS X */
786  pthread_t th = pthread_self();
787  *addr = pthread_get_stackaddr_np(th);
788  *size = pthread_get_stacksize_np(th);
789 #elif defined HAVE_THR_STKSEGMENT || defined HAVE_PTHREAD_STACKSEG_NP
790  stack_t stk;
791 # if defined HAVE_THR_STKSEGMENT /* Solaris */
792  CHECK_ERR(thr_stksegment(&stk));
793 # else /* OpenBSD */
794  CHECK_ERR(pthread_stackseg_np(pthread_self(), &stk));
795 # endif
796  *addr = stk.ss_sp;
797  *size = stk.ss_size;
798 #elif defined HAVE_PTHREAD_GETTHRDS_NP /* AIX */
799  pthread_t th = pthread_self();
800  struct __pthrdsinfo thinfo;
801  char reg[256];
802  int regsiz=sizeof(reg);
803  CHECK_ERR(pthread_getthrds_np(&th, PTHRDSINFO_QUERY_ALL,
804  &thinfo, sizeof(thinfo),
805  &reg, &regsiz));
806  *addr = thinfo.__pi_stackaddr;
807  /* Must not use thinfo.__pi_stacksize for size.
808  It is around 3KB smaller than the correct size
809  calculated by thinfo.__pi_stackend - thinfo.__pi_stackaddr. */
810  *size = thinfo.__pi_stackend - thinfo.__pi_stackaddr;
811  STACK_DIR_UPPER((void)0, (void)(*addr = (char *)*addr + *size));
812 #elif defined __HAIKU__
813  thread_info info;
814  STACK_GROW_DIR_DETECTION;
815  CHECK_ERR(get_thread_info(find_thread(NULL), &info));
816  *addr = info.stack_base;
817  *size = (uintptr_t)info.stack_end - (uintptr_t)info.stack_base;
818  STACK_DIR_UPPER((void)0, (void)(*addr = (char *)*addr + *size));
819 #else
820 #error STACKADDR_AVAILABLE is defined but not implemented.
821 #endif
822  return 0;
823 #undef CHECK_ERR
824 }
825 #endif
826 
827 static struct {
828  rb_nativethread_id_t id;
829  size_t stack_maxsize;
830  VALUE *stack_start;
831 } native_main_thread;
832 
833 #ifdef STACK_END_ADDRESS
834 extern void *STACK_END_ADDRESS;
835 #endif
836 
837 enum {
838  RUBY_STACK_SPACE_LIMIT = 1024 * 1024, /* 1024KB */
839  RUBY_STACK_SPACE_RATIO = 5
840 };
841 
842 static size_t
843 space_size(size_t stack_size)
844 {
845  size_t space_size = stack_size / RUBY_STACK_SPACE_RATIO;
846  if (space_size > RUBY_STACK_SPACE_LIMIT) {
847  return RUBY_STACK_SPACE_LIMIT;
848  }
849  else {
850  return space_size;
851  }
852 }
853 
854 #ifdef __linux__
855 static __attribute__((noinline)) void
856 reserve_stack(volatile char *limit, size_t size)
857 {
858 # ifdef C_ALLOCA
859 # error needs alloca()
860 # endif
861  struct rlimit rl;
862  volatile char buf[0x100];
863  enum {stack_check_margin = 0x1000}; /* for -fstack-check */
864 
865  STACK_GROW_DIR_DETECTION;
866 
867  if (!getrlimit(RLIMIT_STACK, &rl) && rl.rlim_cur == RLIM_INFINITY)
868  return;
869 
870  if (size < stack_check_margin) return;
871  size -= stack_check_margin;
872 
873  size -= sizeof(buf); /* margin */
874  if (IS_STACK_DIR_UPPER()) {
875  const volatile char *end = buf + sizeof(buf);
876  limit += size;
877  if (limit > end) {
878  /* |<-bottom (=limit(a)) top->|
879  * | .. |<-buf 256B |<-end | stack check |
880  * | 256B | =size= | margin (4KB)|
881  * | =size= limit(b)->| 256B | |
882  * | | alloca(sz) | | |
883  * | .. |<-buf |<-limit(c) [sz-1]->0> | |
884  */
885  size_t sz = limit - end;
886  limit = alloca(sz);
887  limit[sz-1] = 0;
888  }
889  }
890  else {
891  limit -= size;
892  if (buf > limit) {
893  /* |<-top (=limit(a)) bottom->|
894  * | .. | 256B buf->| | stack check |
895  * | 256B | =size= | margin (4KB)|
896  * | =size= limit(b)->| 256B | |
897  * | | alloca(sz) | | |
898  * | .. | buf->| limit(c)-><0> | |
899  */
900  size_t sz = buf - limit;
901  limit = alloca(sz);
902  limit[0] = 0;
903  }
904  }
905 }
906 #else
907 # define reserve_stack(limit, size) ((void)(limit), (void)(size))
908 #endif
909 
910 #undef ruby_init_stack
911 void
912 ruby_init_stack(volatile VALUE *addr)
913 {
914  native_main_thread.id = pthread_self();
915 
916 #if MAINSTACKADDR_AVAILABLE
917  if (native_main_thread.stack_maxsize) return;
918  {
919  void* stackaddr;
920  size_t size;
921  if (get_main_stack(&stackaddr, &size) == 0) {
922  native_main_thread.stack_maxsize = size;
923  native_main_thread.stack_start = stackaddr;
924  reserve_stack(stackaddr, size);
925  goto bound_check;
926  }
927  }
928 #endif
929 #ifdef STACK_END_ADDRESS
930  native_main_thread.stack_start = STACK_END_ADDRESS;
931 #else
932  if (!native_main_thread.stack_start ||
933  STACK_UPPER((VALUE *)(void *)&addr,
934  native_main_thread.stack_start > addr,
935  native_main_thread.stack_start < addr)) {
936  native_main_thread.stack_start = (VALUE *)addr;
937  }
938 #endif
939  {
940 #if defined(HAVE_GETRLIMIT)
941 #if defined(PTHREAD_STACK_DEFAULT)
942 # if PTHREAD_STACK_DEFAULT < RUBY_STACK_SPACE*5
943 # error "PTHREAD_STACK_DEFAULT is too small"
944 # endif
945  size_t size = PTHREAD_STACK_DEFAULT;
946 #else
947  size_t size = RUBY_VM_THREAD_VM_STACK_SIZE;
948 #endif
949  size_t space;
950  int pagesize = getpagesize();
951  struct rlimit rlim;
952  STACK_GROW_DIR_DETECTION;
953  if (getrlimit(RLIMIT_STACK, &rlim) == 0) {
954  size = (size_t)rlim.rlim_cur;
955  }
956  addr = native_main_thread.stack_start;
957  if (IS_STACK_DIR_UPPER()) {
958  space = ((size_t)((char *)addr + size) / pagesize) * pagesize - (size_t)addr;
959  }
960  else {
961  space = (size_t)addr - ((size_t)((char *)addr - size) / pagesize + 1) * pagesize;
962  }
963  native_main_thread.stack_maxsize = space;
964 #endif
965  }
966 
967 #if MAINSTACKADDR_AVAILABLE
968  bound_check:
969 #endif
970  /* If addr is out of range of main-thread stack range estimation, */
971  /* it should be on co-routine (alternative stack). [Feature #2294] */
972  {
973  void *start, *end;
974  STACK_GROW_DIR_DETECTION;
975 
976  if (IS_STACK_DIR_UPPER()) {
977  start = native_main_thread.stack_start;
978  end = (char *)native_main_thread.stack_start + native_main_thread.stack_maxsize;
979  }
980  else {
981  start = (char *)native_main_thread.stack_start - native_main_thread.stack_maxsize;
982  end = native_main_thread.stack_start;
983  }
984 
985  if ((void *)addr < start || (void *)addr > end) {
986  /* out of range */
987  native_main_thread.stack_start = (VALUE *)addr;
988  native_main_thread.stack_maxsize = 0; /* unknown */
989  }
990  }
991 }
992 
993 #define CHECK_ERR(expr) \
994  {int err = (expr); if (err) {rb_bug_errno(#expr, err);}}
995 
996 static int
997 native_thread_init_stack(rb_thread_t *th)
998 {
999  rb_nativethread_id_t curr = pthread_self();
1000 
1001  if (pthread_equal(curr, native_main_thread.id)) {
1002  th->ec->machine.stack_start = native_main_thread.stack_start;
1003  th->ec->machine.stack_maxsize = native_main_thread.stack_maxsize;
1004  }
1005  else {
1006 #ifdef STACKADDR_AVAILABLE
1007  void *start;
1008  size_t size;
1009 
1010  if (get_stack(&start, &size) == 0) {
1011  uintptr_t diff = (uintptr_t)start - (uintptr_t)&curr;
1012  th->ec->machine.stack_start = (VALUE *)&curr;
1013  th->ec->machine.stack_maxsize = size - diff;
1014  }
1015 #else
1016  rb_raise(rb_eNotImpError, "ruby engine can initialize only in the main thread");
1017 #endif
1018  }
1019 
1020  return 0;
1021 }
1022 
1023 #ifndef __CYGWIN__
1024 #define USE_NATIVE_THREAD_INIT 1
1025 #endif
1026 
1027 static void *
1028 thread_start_func_1(void *th_ptr)
1029 {
1030  rb_thread_t *th = th_ptr;
1031  RB_ALTSTACK_INIT(void *altstack, th->altstack);
1032 #if USE_THREAD_CACHE
1033  thread_start:
1034 #endif
1035  {
1036 #if !defined USE_NATIVE_THREAD_INIT
1037  VALUE stack_start;
1038 #endif
1039 
1040  fill_thread_id_str(th);
1041 #if defined USE_NATIVE_THREAD_INIT
1042  native_thread_init_stack(th);
1043 #endif
1044  native_thread_init(th);
1045  /* run */
1046 #if defined USE_NATIVE_THREAD_INIT
1047  thread_start_func_2(th, th->ec->machine.stack_start);
1048 #else
1049  thread_start_func_2(th, &stack_start);
1050 #endif
1051  }
1052 #if USE_THREAD_CACHE
1053  /* cache thread */
1054  if ((th = register_cached_thread_and_wait(RB_ALTSTACK(altstack))) != 0) {
1055  goto thread_start;
1056  }
1057 #else
1058  RB_ALTSTACK_FREE(altstack);
1059 #endif
1060  return 0;
1061 }
1062 
1063 struct cached_thread_entry {
1064  rb_nativethread_cond_t cond;
1065  rb_nativethread_id_t thread_id;
1066  rb_thread_t *th;
1067  void *altstack;
1068  struct list_node node;
1069 };
1070 
1071 #if USE_THREAD_CACHE
1072 static rb_nativethread_lock_t thread_cache_lock = RB_NATIVETHREAD_LOCK_INIT;
1073 static LIST_HEAD(cached_thread_head);
1074 
1075 # if defined(HAVE_WORKING_FORK)
1076 static void
1077 thread_cache_reset(void)
1078 {
1079  rb_native_mutex_initialize(&thread_cache_lock);
1080  list_head_init(&cached_thread_head);
1081 }
1082 # endif
1083 
1084 /*
1085  * number of seconds to cache for, I think 1-5s is sufficient to obviate
1086  * the need for thread pool in many network programs (taking into account
1087  * worst case network latency across the globe) without wasting memory
1088  */
1089 #ifndef THREAD_CACHE_TIME
1090 # define THREAD_CACHE_TIME ((rb_hrtime_t)3 * RB_HRTIME_PER_SEC)
1091 #endif
1092 
1093 static rb_thread_t *
1094 register_cached_thread_and_wait(void *altstack)
1095 {
1096  rb_hrtime_t end = THREAD_CACHE_TIME;
1097  struct cached_thread_entry entry;
1098 
1099  rb_native_cond_initialize(&entry.cond);
1100  entry.altstack = altstack;
1101  entry.th = NULL;
1102  entry.thread_id = pthread_self();
1103  end = native_cond_timeout(&entry.cond, end);
1104 
1105  rb_native_mutex_lock(&thread_cache_lock);
1106  {
1107  list_add(&cached_thread_head, &entry.node);
1108 
1109  native_cond_timedwait(&entry.cond, &thread_cache_lock, &end);
1110 
1111  if (entry.th == NULL) { /* unused */
1112  list_del(&entry.node);
1113  }
1114  }
1115  rb_native_mutex_unlock(&thread_cache_lock);
1116 
1117  rb_native_cond_destroy(&entry.cond);
1118  if (!entry.th) {
1119  RB_ALTSTACK_FREE(entry.altstack);
1120  }
1121 
1122  return entry.th;
1123 }
1124 #else
1125 # if defined(HAVE_WORKING_FORK)
1126 static void thread_cache_reset(void) { }
1127 # endif
1128 #endif
1129 
1130 static int
1131 use_cached_thread(rb_thread_t *th)
1132 {
1133 #if USE_THREAD_CACHE
1134  struct cached_thread_entry *entry;
1135 
1136  rb_native_mutex_lock(&thread_cache_lock);
1137  entry = list_pop(&cached_thread_head, struct cached_thread_entry, node);
1138  if (entry) {
1139  entry->th = th;
1140  /* th->thread_id must be set before signal for Thread#name= */
1141  th->thread_id = entry->thread_id;
1142  fill_thread_id_str(th);
1143  rb_native_cond_signal(&entry->cond);
1144  }
1145  rb_native_mutex_unlock(&thread_cache_lock);
1146  return !!entry;
1147 #endif
1148  return 0;
1149 }
1150 
1151 static void
1152 clear_thread_cache_altstack(void)
1153 {
1154 #if USE_THREAD_CACHE
1155  struct cached_thread_entry *entry;
1156 
1157  rb_native_mutex_lock(&thread_cache_lock);
1158  list_for_each(&cached_thread_head, entry, node) {
1159  void MAYBE_UNUSED(*altstack) = entry->altstack;
1160  entry->altstack = 0;
1161  RB_ALTSTACK_FREE(altstack);
1162  }
1163  rb_native_mutex_unlock(&thread_cache_lock);
1164 #endif
1165 }
1166 
1167 static int
1168 native_thread_create(rb_thread_t *th)
1169 {
1170  int err = 0;
1171 
1172  if (use_cached_thread(th)) {
1173  thread_debug("create (use cached thread): %p\n", (void *)th);
1174  }
1175  else {
1176  pthread_attr_t attr;
1177  const size_t stack_size = th->vm->default_params.thread_machine_stack_size + th->vm->default_params.thread_vm_stack_size;
1178  const size_t space = space_size(stack_size);
1179 
1180 #ifdef USE_SIGALTSTACK
1181  th->altstack = rb_allocate_sigaltstack();
1182 #endif
1183  th->ec->machine.stack_maxsize = stack_size - space;
1184 
1185  CHECK_ERR(pthread_attr_init(&attr));
1186 
1187 # ifdef PTHREAD_STACK_MIN
1188  thread_debug("create - stack size: %lu\n", (unsigned long)stack_size);
1189  CHECK_ERR(pthread_attr_setstacksize(&attr, stack_size));
1190 # endif
1191 
1192 # ifdef HAVE_PTHREAD_ATTR_SETINHERITSCHED
1193  CHECK_ERR(pthread_attr_setinheritsched(&attr, PTHREAD_INHERIT_SCHED));
1194 # endif
1195  CHECK_ERR(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED));
1196 
1197  err = pthread_create(&th->thread_id, &attr, thread_start_func_1, th);
1198  thread_debug("create: %p (%d)\n", (void *)th, err);
1199  /* should be done in the created thread */
1200  fill_thread_id_str(th);
1201  CHECK_ERR(pthread_attr_destroy(&attr));
1202  }
1203  return err;
1204 }
1205 
1206 #if USE_NATIVE_THREAD_PRIORITY
1207 
1208 static void
1209 native_thread_apply_priority(rb_thread_t *th)
1210 {
1211 #if defined(_POSIX_PRIORITY_SCHEDULING) && (_POSIX_PRIORITY_SCHEDULING > 0)
1212  struct sched_param sp;
1213  int policy;
1214  int priority = 0 - th->priority;
1215  int max, min;
1216  pthread_getschedparam(th->thread_id, &policy, &sp);
1217  max = sched_get_priority_max(policy);
1218  min = sched_get_priority_min(policy);
1219 
1220  if (min > priority) {
1221  priority = min;
1222  }
1223  else if (max < priority) {
1224  priority = max;
1225  }
1226 
1227  sp.sched_priority = priority;
1228  pthread_setschedparam(th->thread_id, policy, &sp);
1229 #else
1230  /* not touched */
1231 #endif
1232 }
1233 
1234 #endif /* USE_NATIVE_THREAD_PRIORITY */
1235 
1236 static int
1237 native_fd_select(int n, rb_fdset_t *readfds, rb_fdset_t *writefds, rb_fdset_t *exceptfds, struct timeval *timeout, rb_thread_t *th)
1238 {
1239  return rb_fd_select(n, readfds, writefds, exceptfds, timeout);
1240 }
1241 
1242 static void
1243 ubf_pthread_cond_signal(void *ptr)
1244 {
1245  rb_thread_t *th = (rb_thread_t *)ptr;
1246  thread_debug("ubf_pthread_cond_signal (%p)\n", (void *)th);
1247  rb_native_cond_signal(&th->native_thread_data.cond.intr);
1248 }
1249 
1250 static void
1251 native_cond_sleep(rb_thread_t *th, rb_hrtime_t *rel)
1252 {
1253  rb_nativethread_lock_t *lock = &th->interrupt_lock;
1254  rb_nativethread_cond_t *cond = &th->native_thread_data.cond.intr;
1255 
1256  /* Solaris cond_timedwait() return EINVAL if an argument is greater than
1257  * current_time + 100,000,000. So cut up to 100,000,000. This is
1258  * considered as a kind of spurious wakeup. The caller to native_sleep
1259  * should care about spurious wakeup.
1260  *
1261  * See also [Bug #1341] [ruby-core:29702]
1262  * http://download.oracle.com/docs/cd/E19683-01/816-0216/6m6ngupgv/index.html
1263  */
1264  const rb_hrtime_t max = (rb_hrtime_t)100000000 * RB_HRTIME_PER_SEC;
1265 
1266  GVL_UNLOCK_BEGIN(th);
1267  {
1268  rb_native_mutex_lock(lock);
1269  th->unblock.func = ubf_pthread_cond_signal;
1270  th->unblock.arg = th;
1271 
1272  if (RUBY_VM_INTERRUPTED(th->ec)) {
1273  /* interrupted. return immediate */
1274  thread_debug("native_sleep: interrupted before sleep\n");
1275  }
1276  else {
1277  if (!rel) {
1278  rb_native_cond_wait(cond, lock);
1279  }
1280  else {
1281  rb_hrtime_t end;
1282 
1283  if (*rel > max) {
1284  *rel = max;
1285  }
1286 
1287  end = native_cond_timeout(cond, *rel);
1288  native_cond_timedwait(cond, lock, &end);
1289  }
1290  }
1291  th->unblock.func = 0;
1292 
1293  rb_native_mutex_unlock(lock);
1294  }
1295  GVL_UNLOCK_END(th);
1296 
1297  thread_debug("native_sleep done\n");
1298 }
1299 
1300 #ifdef USE_UBF_LIST
1301 static LIST_HEAD(ubf_list_head);
1302 static rb_nativethread_lock_t ubf_list_lock = RB_NATIVETHREAD_LOCK_INIT;
1303 
1304 static void
1305 ubf_list_atfork(void)
1306 {
1307  list_head_init(&ubf_list_head);
1308  rb_native_mutex_initialize(&ubf_list_lock);
1309 }
1310 
1311 /* The thread 'th' is registered to be trying unblock. */
1312 static void
1313 register_ubf_list(rb_thread_t *th)
1314 {
1315  struct list_node *node = &th->native_thread_data.node.ubf;
1316 
1317  if (list_empty((struct list_head*)node)) {
1318  rb_native_mutex_lock(&ubf_list_lock);
1319  list_add(&ubf_list_head, node);
1320  rb_native_mutex_unlock(&ubf_list_lock);
1321  }
1322 }
1323 
1324 /* The thread 'th' is unblocked. It no longer need to be registered. */
1325 static void
1326 unregister_ubf_list(rb_thread_t *th)
1327 {
1328  struct list_node *node = &th->native_thread_data.node.ubf;
1329 
1330  /* we can't allow re-entry into ubf_list_head */
1331  VM_ASSERT(th->unblock.func == 0);
1332 
1333  if (!list_empty((struct list_head*)node)) {
1334  rb_native_mutex_lock(&ubf_list_lock);
1335  list_del_init(node);
1336  if (list_empty(&ubf_list_head) && !rb_signal_buff_size()) {
1337  ubf_timer_disarm();
1338  }
1339  rb_native_mutex_unlock(&ubf_list_lock);
1340  }
1341 }
1342 
1343 /*
1344  * send a signal to intent that a target thread return from blocking syscall.
1345  * Maybe any signal is ok, but we chose SIGVTALRM.
1346  */
1347 static void
1348 ubf_wakeup_thread(rb_thread_t *th)
1349 {
1350  thread_debug("thread_wait_queue_wakeup (%"PRI_THREAD_ID")\n", thread_id_str(th));
1351  pthread_kill(th->thread_id, SIGVTALRM);
1352 }
1353 
1354 static void
1355 ubf_select(void *ptr)
1356 {
1357  rb_thread_t *th = (rb_thread_t *)ptr;
1358  rb_global_vm_lock_t *gvl = rb_ractor_gvl(th->ractor);
1359  const rb_thread_t *cur = ruby_thread_from_native(); /* may be 0 */
1360 
1361  register_ubf_list(th);
1362 
1363  /*
1364  * ubf_wakeup_thread() doesn't guarantee to wake up a target thread.
1365  * Therefore, we repeatedly call ubf_wakeup_thread() until a target thread
1366  * exit from ubf function. We must have a timer to perform this operation.
1367  * We use double-checked locking here because this function may be called
1368  * while vm->gvl.lock is held in do_gvl_timer.
1369  * There is also no need to start a timer if we're the designated
1370  * sigwait_th thread, otherwise we can deadlock with a thread
1371  * in unblock_function_clear.
1372  */
1373  if (cur != gvl->timer && cur != sigwait_th) {
1374  /*
1375  * Double-checked locking above was to prevent nested locking
1376  * by the SAME thread. We use trylock here to prevent deadlocks
1377  * between DIFFERENT threads
1378  */
1379  if (rb_native_mutex_trylock(&gvl->lock) == 0) {
1380  if (!gvl->timer) {
1381  rb_thread_wakeup_timer_thread(-1);
1382  }
1383  rb_native_mutex_unlock(&gvl->lock);
1384  }
1385  }
1386 
1387  ubf_wakeup_thread(th);
1388 }
1389 
1390 static int
1391 ubf_threads_empty(void)
1392 {
1393  return list_empty(&ubf_list_head);
1394 }
1395 
1396 static void
1397 ubf_wakeup_all_threads(void)
1398 {
1399  rb_thread_t *th;
1400  native_thread_data_t *dat;
1401 
1402  if (!ubf_threads_empty()) {
1403  rb_native_mutex_lock(&ubf_list_lock);
1404  list_for_each(&ubf_list_head, dat, node.ubf) {
1405  th = container_of(dat, rb_thread_t, native_thread_data);
1406  ubf_wakeup_thread(th);
1407  }
1408  rb_native_mutex_unlock(&ubf_list_lock);
1409  }
1410 }
1411 
1412 #else /* USE_UBF_LIST */
1413 #define register_ubf_list(th) (void)(th)
1414 #define unregister_ubf_list(th) (void)(th)
1415 #define ubf_select 0
1416 static void ubf_wakeup_all_threads(void) { return; }
1417 static int ubf_threads_empty(void) { return 1; }
1418 #define ubf_list_atfork() do {} while (0)
1419 #endif /* USE_UBF_LIST */
1420 
1421 #define TT_DEBUG 0
1422 #define WRITE_CONST(fd, str) (void)(write((fd),(str),sizeof(str)-1)<0)
1423 
1424 static struct {
1425  /* pipes are closed in forked children when owner_process does not match */
1426  int normal[2]; /* [0] == sigwait_fd */
1427  int ub_main[2]; /* unblock main thread from native_ppoll_sleep */
1428 
1429  /* volatile for signal handler use: */
1430  volatile rb_pid_t owner_process;
1431 } signal_self_pipe = {
1432  {-1, -1},
1433  {-1, -1},
1434 };
1435 
1436 /* only use signal-safe system calls here */
1437 static void
1438 rb_thread_wakeup_timer_thread_fd(int fd)
1439 {
1440 #if USE_EVENTFD
1441  const uint64_t buff = 1;
1442 #else
1443  const char buff = '!';
1444 #endif
1445  ssize_t result;
1446 
1447  /* already opened */
1448  if (fd >= 0) {
1449  retry:
1450  if ((result = write(fd, &buff, sizeof(buff))) <= 0) {
1451  int e = errno;
1452  switch (e) {
1453  case EINTR: goto retry;
1454  case EAGAIN:
1455 #if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
1456  case EWOULDBLOCK:
1457 #endif
1458  break;
1459  default:
1460  async_bug_fd("rb_thread_wakeup_timer_thread: write", e, fd);
1461  }
1462  }
1463  if (TT_DEBUG) WRITE_CONST(2, "rb_thread_wakeup_timer_thread: write\n");
1464  }
1465  else {
1466  /* ignore wakeup */
1467  }
1468 }
1469 
1470 /*
1471  * This ensures we get a SIGVTALRM in TIME_QUANTUM_MSEC if our
1472  * process could not react to the original signal in time.
1473  */
1474 static void
1475 ubf_timer_arm(rb_pid_t current) /* async signal safe */
1476 {
1477 #if UBF_TIMER == UBF_TIMER_POSIX
1478  if ((!current || timer_posix.owner == current) &&
1479  timer_state_cas(RTIMER_DISARM, RTIMER_ARMING) == RTIMER_DISARM) {
1480  struct itimerspec it;
1481 
1482  it.it_interval.tv_sec = it.it_value.tv_sec = 0;
1483  it.it_interval.tv_nsec = it.it_value.tv_nsec = TIME_QUANTUM_NSEC;
1484 
1485  if (timer_settime(timer_posix.timerid, 0, &it, 0))
1486  rb_async_bug_errno("timer_settime (arm)", errno);
1487 
1488  switch (timer_state_cas(RTIMER_ARMING, RTIMER_ARMED)) {
1489  case RTIMER_DISARM:
1490  /* somebody requested a disarm while we were arming */
1491  /* may race harmlessly with ubf_timer_destroy */
1492  (void)timer_settime(timer_posix.timerid, 0, &zero, 0);
1493 
1494  case RTIMER_ARMING: return; /* success */
1495  case RTIMER_ARMED:
1496  /*
1497  * it is possible to have another thread disarm, and
1498  * a third thread arm finish re-arming before we get
1499  * here, so we wasted a syscall with timer_settime but
1500  * probably unavoidable in a signal handler.
1501  */
1502  return;
1503  case RTIMER_DEAD:
1504  /* may race harmlessly with ubf_timer_destroy */
1505  (void)timer_settime(timer_posix.timerid, 0, &zero, 0);
1506  return;
1507  default:
1508  rb_async_bug_errno("UBF_TIMER_POSIX unknown state", ERANGE);
1509  }
1510  }
1511 #elif UBF_TIMER == UBF_TIMER_PTHREAD
1512  if (!current || current == timer_pthread.owner) {
1513  if (ATOMIC_EXCHANGE(timer_pthread.armed, 1) == 0)
1514  rb_thread_wakeup_timer_thread_fd(timer_pthread.low[1]);
1515  }
1516 #endif
1517 }
1518 
1519 void
1520 rb_thread_wakeup_timer_thread(int sig)
1521 {
1522  rb_pid_t current;
1523 
1524  /* non-sighandler path */
1525  if (sig <= 0) {
1526  rb_thread_wakeup_timer_thread_fd(signal_self_pipe.normal[1]);
1527  if (sig < 0) {
1528  ubf_timer_arm(0);
1529  }
1530  return;
1531  }
1532 
1533  /* must be safe inside sighandler, so no mutex */
1534  current = getpid();
1535  if (signal_self_pipe.owner_process == current) {
1536  rb_thread_wakeup_timer_thread_fd(signal_self_pipe.normal[1]);
1537 
1538  /*
1539  * system_working check is required because vm and main_thread are
1540  * freed during shutdown
1541  */
1542  if (system_working > 0) {
1543  volatile rb_execution_context_t *ec;
1544  rb_vm_t *vm = GET_VM();
1545  rb_thread_t *mth;
1546 
1547  /*
1548  * FIXME: root VM and main_thread should be static and not
1549  * on heap for maximum safety (and startup/shutdown speed)
1550  */
1551  if (!vm) return;
1552  mth = vm->ractor.main_thread;
1553  if (!mth || system_working <= 0) return;
1554 
1555  /* this relies on GC for grace period before cont_free */
1556  ec = ACCESS_ONCE(rb_execution_context_t *, mth->ec);
1557 
1558  if (ec) {
1559  RUBY_VM_SET_TRAP_INTERRUPT(ec);
1560  ubf_timer_arm(current);
1561 
1562  /* some ubfs can interrupt single-threaded process directly */
1563  if (vm->ubf_async_safe && mth->unblock.func) {
1564  (mth->unblock.func)(mth->unblock.arg);
1565  }
1566  }
1567  }
1568  }
1569 }
1570 
1571 #define CLOSE_INVALIDATE_PAIR(expr) \
1572  close_invalidate_pair(expr,"close_invalidate: "#expr)
1573 static void
1574 close_invalidate(int *fdp, const char *msg)
1575 {
1576  int fd = *fdp;
1577 
1578  *fdp = -1;
1579  if (close(fd) < 0) {
1580  async_bug_fd(msg, errno, fd);
1581  }
1582 }
1583 
1584 static void
1585 close_invalidate_pair(int fds[2], const char *msg)
1586 {
1587  if (USE_EVENTFD && fds[0] == fds[1]) {
1588  close_invalidate(&fds[0], msg);
1589  fds[1] = -1;
1590  }
1591  else {
1592  close_invalidate(&fds[0], msg);
1593  close_invalidate(&fds[1], msg);
1594  }
1595 }
1596 
1597 static void
1598 set_nonblock(int fd)
1599 {
1600  int oflags;
1601  int err;
1602 
1603  oflags = fcntl(fd, F_GETFL);
1604  if (oflags == -1)
1605  rb_sys_fail(0);
1606  oflags |= O_NONBLOCK;
1607  err = fcntl(fd, F_SETFL, oflags);
1608  if (err == -1)
1609  rb_sys_fail(0);
1610 }
1611 
1612 /* communication pipe with timer thread and signal handler */
1613 static int
1614 setup_communication_pipe_internal(int pipes[2])
1615 {
1616  int err;
1617 
1618  if (pipes[0] >= 0 || pipes[1] >= 0) {
1619  VM_ASSERT(pipes[0] >= 0);
1620  VM_ASSERT(pipes[1] >= 0);
1621  return 0;
1622  }
1623 
1624  /*
1625  * Don't bother with eventfd on ancient Linux 2.6.22..2.6.26 which were
1626  * missing EFD_* flags, they can fall back to pipe
1627  */
1628 #if USE_EVENTFD && defined(EFD_NONBLOCK) && defined(EFD_CLOEXEC)
1629  pipes[0] = pipes[1] = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC);
1630  if (pipes[0] >= 0) {
1631  rb_update_max_fd(pipes[0]);
1632  return 0;
1633  }
1634 #endif
1635 
1636  err = rb_cloexec_pipe(pipes);
1637  if (err != 0) {
1638  rb_warn("pipe creation failed for timer: %s, scheduling broken",
1639  strerror(errno));
1640  return -1;
1641  }
1642  rb_update_max_fd(pipes[0]);
1643  rb_update_max_fd(pipes[1]);
1644  set_nonblock(pipes[0]);
1645  set_nonblock(pipes[1]);
1646  return 0;
1647 }
1648 
1649 #if !defined(SET_CURRENT_THREAD_NAME) && defined(__linux__) && defined(PR_SET_NAME)
1650 # define SET_CURRENT_THREAD_NAME(name) prctl(PR_SET_NAME, name)
1651 #endif
1652 
1653 enum {
1654  THREAD_NAME_MAX =
1655 #if defined(__linux__)
1656  16
1657 #elif defined(__APPLE__)
1658 /* Undocumented, and main thread seems unlimited */
1659  64
1660 #else
1661  16
1662 #endif
1663 };
1664 
1665 static VALUE threadptr_invoke_proc_location(rb_thread_t *th);
1666 
1667 static void
1668 native_set_thread_name(rb_thread_t *th)
1669 {
1670 #ifdef SET_CURRENT_THREAD_NAME
1671  VALUE loc;
1672  if (!NIL_P(loc = th->name)) {
1673  SET_CURRENT_THREAD_NAME(RSTRING_PTR(loc));
1674  }
1675  else if ((loc = threadptr_invoke_proc_location(th)) != Qnil) {
1676  char *name, *p;
1677  char buf[THREAD_NAME_MAX];
1678  size_t len;
1679  int n;
1680 
1681  name = RSTRING_PTR(RARRAY_AREF(loc, 0));
1682  p = strrchr(name, '/'); /* show only the basename of the path. */
1683  if (p && p[1])
1684  name = p + 1;
1685 
1686  n = snprintf(buf, sizeof(buf), "%s:%d", name, NUM2INT(RARRAY_AREF(loc, 1)));
1687  RB_GC_GUARD(loc);
1688 
1689  len = (size_t)n;
1690  if (len >= sizeof(buf)) {
1691  buf[sizeof(buf)-2] = '*';
1692  buf[sizeof(buf)-1] = '\0';
1693  }
1694  SET_CURRENT_THREAD_NAME(buf);
1695  }
1696 #endif
1697 }
1698 
1699 static void
1700 native_set_another_thread_name(rb_nativethread_id_t thread_id, VALUE name)
1701 {
1702 #if defined SET_ANOTHER_THREAD_NAME || defined SET_CURRENT_THREAD_NAME
1703  char buf[THREAD_NAME_MAX];
1704  const char *s = "";
1705 # if !defined SET_ANOTHER_THREAD_NAME
1706  if (!pthread_equal(pthread_self(), thread_id)) return;
1707 # endif
1708  if (!NIL_P(name)) {
1709  long n;
1710  RSTRING_GETMEM(name, s, n);
1711  if (n >= (int)sizeof(buf)) {
1712  memcpy(buf, s, sizeof(buf)-1);
1713  buf[sizeof(buf)-1] = '\0';
1714  s = buf;
1715  }
1716  }
1717 # if defined SET_ANOTHER_THREAD_NAME
1718  SET_ANOTHER_THREAD_NAME(thread_id, s);
1719 # elif defined SET_CURRENT_THREAD_NAME
1720  SET_CURRENT_THREAD_NAME(s);
1721 # endif
1722 #endif
1723 }
1724 
1725 #if defined(RB_THREAD_T_HAS_NATIVE_ID) || defined(__APPLE__)
1726 static VALUE
1727 native_thread_native_thread_id(rb_thread_t *target_th)
1728 {
1729 #ifdef RB_THREAD_T_HAS_NATIVE_ID
1730  int tid = target_th->tid;
1731  if (tid == 0) return Qnil;
1732  return INT2FIX(tid);
1733 #elif defined(__APPLE__)
1734  uint64_t tid;
1735  int e = pthread_threadid_np(target_th->thread_id, &tid);
1736  if (e != 0) rb_syserr_fail(e, "pthread_threadid_np");
1737  return ULL2NUM((unsigned long long)tid);
1738 #endif
1739 }
1740 # define USE_NATIVE_THREAD_NATIVE_THREAD_ID 1
1741 #else
1742 # define USE_NATIVE_THREAD_NATIVE_THREAD_ID 0
1743 #endif
1744 
1745 static void
1746 ubf_timer_invalidate(void)
1747 {
1748 #if UBF_TIMER == UBF_TIMER_PTHREAD
1749  CLOSE_INVALIDATE_PAIR(timer_pthread.low);
1750 #endif
1751 }
1752 
1753 static void
1754 ubf_timer_pthread_create(rb_pid_t current)
1755 {
1756 #if UBF_TIMER == UBF_TIMER_PTHREAD
1757  int err;
1758  if (timer_pthread.owner == current)
1759  return;
1760 
1761  if (setup_communication_pipe_internal(timer_pthread.low) < 0)
1762  return;
1763 
1764  err = pthread_create(&timer_pthread.thid, 0, timer_pthread_fn, GET_VM());
1765  if (!err)
1766  timer_pthread.owner = current;
1767  else
1768  rb_warn("pthread_create failed for timer: %s, signals racy",
1769  strerror(err));
1770 #endif
1771 }
1772 
1773 static void
1774 ubf_timer_create(rb_pid_t current)
1775 {
1776 #if UBF_TIMER == UBF_TIMER_POSIX
1777 # if defined(__sun)
1778 # define UBF_TIMER_CLOCK CLOCK_REALTIME
1779 # else /* Tested Linux and FreeBSD: */
1780 # define UBF_TIMER_CLOCK CLOCK_MONOTONIC
1781 # endif
1782 
1783  struct sigevent sev;
1784 
1785  sev.sigev_notify = SIGEV_SIGNAL;
1786  sev.sigev_signo = SIGVTALRM;
1787  sev.sigev_value.sival_ptr = &timer_posix;
1788 
1789  if (!timer_create(UBF_TIMER_CLOCK, &sev, &timer_posix.timerid)) {
1790  rb_atomic_t prev = timer_state_exchange(RTIMER_DISARM);
1791 
1792  if (prev != RTIMER_DEAD) {
1793  rb_bug("timer_posix was not dead: %u\n", (unsigned)prev);
1794  }
1795  timer_posix.owner = current;
1796  }
1797  else {
1798  rb_warn("timer_create failed: %s, signals racy", strerror(errno));
1799  }
1800 #endif
1801  if (UBF_TIMER == UBF_TIMER_PTHREAD)
1802  ubf_timer_pthread_create(current);
1803 }
1804 
1805 static void
1806 rb_thread_create_timer_thread(void)
1807 {
1808  /* we only create the pipe, and lazy-spawn */
1809  rb_pid_t current = getpid();
1810  rb_pid_t owner = signal_self_pipe.owner_process;
1811 
1812  if (owner && owner != current) {
1813  CLOSE_INVALIDATE_PAIR(signal_self_pipe.normal);
1814  CLOSE_INVALIDATE_PAIR(signal_self_pipe.ub_main);
1815  ubf_timer_invalidate();
1816  }
1817 
1818  if (setup_communication_pipe_internal(signal_self_pipe.normal) < 0) return;
1819  if (setup_communication_pipe_internal(signal_self_pipe.ub_main) < 0) return;
1820 
1821  ubf_timer_create(current);
1822  if (owner != current) {
1823  /* validate pipe on this process */
1824  sigwait_th = THREAD_INVALID;
1825  signal_self_pipe.owner_process = current;
1826  }
1827 }
1828 
1829 static void
1830 ubf_timer_disarm(void)
1831 {
1832 #if UBF_TIMER == UBF_TIMER_POSIX
1833  rb_atomic_t prev;
1834 
1835  if (timer_posix.owner && timer_posix.owner != getpid()) return;
1836  prev = timer_state_cas(RTIMER_ARMED, RTIMER_DISARM);
1837  switch (prev) {
1838  case RTIMER_DISARM: return; /* likely */
1839  case RTIMER_ARMING: return; /* ubf_timer_arm will disarm itself */
1840  case RTIMER_ARMED:
1841  if (timer_settime(timer_posix.timerid, 0, &zero, 0)) {
1842  int err = errno;
1843 
1844  if (err == EINVAL) {
1845  prev = timer_state_cas(RTIMER_DISARM, RTIMER_DISARM);
1846 
1847  /* main thread may have killed the timer */
1848  if (prev == RTIMER_DEAD) return;
1849 
1850  rb_bug_errno("timer_settime (disarm)", err);
1851  }
1852  }
1853  return;
1854  case RTIMER_DEAD: return; /* stay dead */
1855  default:
1856  rb_bug("UBF_TIMER_POSIX bad state: %u\n", (unsigned)prev);
1857  }
1858 
1859 #elif UBF_TIMER == UBF_TIMER_PTHREAD
1860  ATOMIC_SET(timer_pthread.armed, 0);
1861 #endif
1862 }
1863 
1864 static void
1865 ubf_timer_destroy(void)
1866 {
1867 #if UBF_TIMER == UBF_TIMER_POSIX
1868  if (timer_posix.owner == getpid()) {
1869  rb_atomic_t expect = RTIMER_DISARM;
1870  size_t i, max = 10000000;
1871 
1872  /* prevent signal handler from arming: */
1873  for (i = 0; i < max; i++) {
1874  switch (timer_state_cas(expect, RTIMER_DEAD)) {
1875  case RTIMER_DISARM:
1876  if (expect == RTIMER_DISARM) goto done;
1877  expect = RTIMER_DISARM;
1878  break;
1879  case RTIMER_ARMING:
1880  native_thread_yield(); /* let another thread finish arming */
1881  expect = RTIMER_ARMED;
1882  break;
1883  case RTIMER_ARMED:
1884  if (expect == RTIMER_ARMED) {
1885  if (timer_settime(timer_posix.timerid, 0, &zero, 0))
1886  rb_bug_errno("timer_settime (destroy)", errno);
1887  goto done;
1888  }
1889  expect = RTIMER_ARMED;
1890  break;
1891  case RTIMER_DEAD:
1892  rb_bug("RTIMER_DEAD unexpected");
1893  }
1894  }
1895  rb_bug("timed out waiting for timer to arm");
1896 done:
1897  if (timer_delete(timer_posix.timerid) < 0)
1898  rb_sys_fail("timer_delete");
1899 
1900  VM_ASSERT(timer_state_exchange(RTIMER_DEAD) == RTIMER_DEAD);
1901  }
1902 #elif UBF_TIMER == UBF_TIMER_PTHREAD
1903  int err;
1904 
1905  timer_pthread.owner = 0;
1906  ubf_timer_disarm();
1907  rb_thread_wakeup_timer_thread_fd(timer_pthread.low[1]);
1908  err = pthread_join(timer_pthread.thid, 0);
1909  if (err) {
1910  rb_raise(rb_eThreadError, "native_thread_join() failed (%d)", err);
1911  }
1912 #endif
1913 }
1914 
1915 static int
1916 native_stop_timer_thread(void)
1917 {
1918  int stopped;
1919  stopped = --system_working <= 0;
1920  if (stopped)
1921  ubf_timer_destroy();
1922 
1923  if (TT_DEBUG) fprintf(stderr, "stop timer thread\n");
1924  return stopped;
1925 }
1926 
1927 static void
1928 native_reset_timer_thread(void)
1929 {
1930  if (TT_DEBUG) fprintf(stderr, "reset timer thread\n");
1931 }
1932 
1933 #ifdef HAVE_SIGALTSTACK
1934 int
1935 ruby_stack_overflowed_p(const rb_thread_t *th, const void *addr)
1936 {
1937  void *base;
1938  size_t size;
1939  const size_t water_mark = 1024 * 1024;
1940  STACK_GROW_DIR_DETECTION;
1941 
1942 #ifdef STACKADDR_AVAILABLE
1943  if (get_stack(&base, &size) == 0) {
1944 # ifdef __APPLE__
1945  if (pthread_equal(th->thread_id, native_main_thread.id)) {
1946  struct rlimit rlim;
1947  if (getrlimit(RLIMIT_STACK, &rlim) == 0 && rlim.rlim_cur > size) {
1948  size = (size_t)rlim.rlim_cur;
1949  }
1950  }
1951 # endif
1952  base = (char *)base + STACK_DIR_UPPER(+size, -size);
1953  }
1954  else
1955 #endif
1956  if (th) {
1957  size = th->ec->machine.stack_maxsize;
1958  base = (char *)th->ec->machine.stack_start - STACK_DIR_UPPER(0, size);
1959  }
1960  else {
1961  return 0;
1962  }
1963  size /= RUBY_STACK_SPACE_RATIO;
1964  if (size > water_mark) size = water_mark;
1965  if (IS_STACK_DIR_UPPER()) {
1966  if (size > ~(size_t)base+1) size = ~(size_t)base+1;
1967  if (addr > base && addr <= (void *)((char *)base + size)) return 1;
1968  }
1969  else {
1970  if (size > (size_t)base) size = (size_t)base;
1971  if (addr > (void *)((char *)base - size) && addr <= base) return 1;
1972  }
1973  return 0;
1974 }
1975 #endif
1976 
1977 int
1978 rb_reserved_fd_p(int fd)
1979 {
1980  /* no false-positive if out-of-FD at startup */
1981  if (fd < 0)
1982  return 0;
1983 
1984 #if UBF_TIMER == UBF_TIMER_PTHREAD
1985  if (fd == timer_pthread.low[0] || fd == timer_pthread.low[1])
1986  goto check_pid;
1987 #endif
1988  if (fd == signal_self_pipe.normal[0] || fd == signal_self_pipe.normal[1])
1989  goto check_pid;
1990  if (fd == signal_self_pipe.ub_main[0] || fd == signal_self_pipe.ub_main[1])
1991  goto check_pid;
1992  return 0;
1993 check_pid:
1994  if (signal_self_pipe.owner_process == getpid()) /* async-signal-safe */
1995  return 1;
1996  return 0;
1997 }
1998 
1999 rb_nativethread_id_t
2001 {
2002  return pthread_self();
2003 }
2004 
2005 #if USE_MJIT
2006 /* A function that wraps actual worker function, for pthread abstraction. */
2007 static void *
2008 mjit_worker(void *arg)
2009 {
2010  void (*worker_func)(void) = (void(*)(void))arg;
2011 
2012 #ifdef SET_CURRENT_THREAD_NAME
2013  SET_CURRENT_THREAD_NAME("ruby-mjitworker"); /* 16 byte including NUL */
2014 #endif
2015  worker_func();
2016  return NULL;
2017 }
2018 
2019 /* Launch MJIT thread. Returns FALSE if it fails to create thread. */
2020 int
2021 rb_thread_create_mjit_thread(void (*worker_func)(void))
2022 {
2023  pthread_attr_t attr;
2024  pthread_t worker_pid;
2025  int ret = FALSE;
2026 
2027  if (pthread_attr_init(&attr) != 0) return ret;
2028 
2029  /* jit_worker thread is not to be joined */
2030  if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) == 0
2031  && pthread_create(&worker_pid, &attr, mjit_worker, (void *)worker_func) == 0) {
2032  ret = TRUE;
2033  }
2034  pthread_attr_destroy(&attr);
2035  return ret;
2036 }
2037 #endif
2038 
2039 int
2040 rb_sigwait_fd_get(const rb_thread_t *th)
2041 {
2042  if (signal_self_pipe.normal[0] >= 0) {
2043  VM_ASSERT(signal_self_pipe.owner_process == getpid());
2044  /*
2045  * no need to keep firing the timer if any thread is sleeping
2046  * on the signal self-pipe
2047  */
2048  ubf_timer_disarm();
2049 
2050  if (ATOMIC_PTR_CAS(sigwait_th, THREAD_INVALID, th) == THREAD_INVALID) {
2051  return signal_self_pipe.normal[0];
2052  }
2053  }
2054  return -1; /* avoid thundering herd and work stealing/starvation */
2055 }
2056 
2057 void
2058 rb_sigwait_fd_put(const rb_thread_t *th, int fd)
2059 {
2060  const rb_thread_t *old;
2061 
2062  VM_ASSERT(signal_self_pipe.normal[0] == fd);
2063  old = ATOMIC_PTR_EXCHANGE(sigwait_th, THREAD_INVALID);
2064  if (old != th) assert(old == th);
2065 }
2066 
2067 #ifndef HAVE_PPOLL
2068 /* TODO: don't ignore sigmask */
2069 static int
2070 ruby_ppoll(struct pollfd *fds, nfds_t nfds,
2071  const struct timespec *ts, const sigset_t *sigmask)
2072 {
2073  int timeout_ms;
2074 
2075  if (ts) {
2076  int tmp, tmp2;
2077 
2078  if (ts->tv_sec > INT_MAX/1000)
2079  timeout_ms = INT_MAX;
2080  else {
2081  tmp = (int)(ts->tv_sec * 1000);
2082  /* round up 1ns to 1ms to avoid excessive wakeups for <1ms sleep */
2083  tmp2 = (int)((ts->tv_nsec + 999999L) / (1000L * 1000L));
2084  if (INT_MAX - tmp < tmp2)
2085  timeout_ms = INT_MAX;
2086  else
2087  timeout_ms = (int)(tmp + tmp2);
2088  }
2089  }
2090  else
2091  timeout_ms = -1;
2092 
2093  return poll(fds, nfds, timeout_ms);
2094 }
2095 # define ppoll(fds,nfds,ts,sigmask) ruby_ppoll((fds),(nfds),(ts),(sigmask))
2096 #endif
2097 
2098 void
2099 rb_sigwait_sleep(rb_thread_t *th, int sigwait_fd, const rb_hrtime_t *rel)
2100 {
2101  struct pollfd pfd;
2102  struct timespec ts;
2103 
2104  pfd.fd = sigwait_fd;
2105  pfd.events = POLLIN;
2106 
2107  if (!BUSY_WAIT_SIGNALS && ubf_threads_empty()) {
2108  (void)ppoll(&pfd, 1, rb_hrtime2timespec(&ts, rel), 0);
2109  check_signals_nogvl(th, sigwait_fd);
2110  }
2111  else {
2112  rb_hrtime_t to = RB_HRTIME_MAX, end;
2113  int n = 0;
2114 
2115  if (rel) {
2116  to = *rel;
2117  end = rb_hrtime_add(rb_hrtime_now(), to);
2118  }
2119  /*
2120  * tricky: this needs to return on spurious wakeup (no auto-retry).
2121  * But we also need to distinguish between periodic quantum
2122  * wakeups, so we care about the result of consume_communication_pipe
2123  *
2124  * We want to avoid spurious wakeup for Mutex#sleep compatibility
2125  * [ruby-core:88102]
2126  */
2127  for (;;) {
2128  const rb_hrtime_t *sto = sigwait_timeout(th, sigwait_fd, &to, &n);
2129 
2130  if (n) return;
2131  n = ppoll(&pfd, 1, rb_hrtime2timespec(&ts, sto), 0);
2132  if (check_signals_nogvl(th, sigwait_fd))
2133  return;
2134  if (n || (th && RUBY_VM_INTERRUPTED(th->ec)))
2135  return;
2136  if (rel && hrtime_update_expire(&to, end))
2137  return;
2138  }
2139  }
2140 }
2141 
2142 /*
2143  * we need to guarantee wakeups from native_ppoll_sleep because
2144  * ubf_select may not be going through ubf_list if other threads
2145  * are all sleeping.
2146  */
2147 static void
2148 ubf_ppoll_sleep(void *ignore)
2149 {
2150  rb_thread_wakeup_timer_thread_fd(signal_self_pipe.ub_main[1]);
2151 }
2152 
2153 /*
2154  * Single CPU setups benefit from explicit sched_yield() before ppoll(),
2155  * since threads may be too starved to enter the GVL waitqueue for
2156  * us to detect contention. Instead, we want to kick other threads
2157  * so they can run and possibly prevent us from entering slow paths
2158  * in ppoll() or similar syscalls.
2159  *
2160  * Confirmed on FreeBSD 11.2 and Linux 4.19.
2161  * [ruby-core:90417] [Bug #15398]
2162  */
2163 #define GVL_UNLOCK_BEGIN_YIELD(th) do { \
2164  const native_thread_data_t *next; \
2165  rb_global_vm_lock_t *gvl = rb_ractor_gvl(th->ractor); \
2166  RB_GC_SAVE_MACHINE_CONTEXT(th); \
2167  rb_native_mutex_lock(&gvl->lock); \
2168  next = gvl_release_common(gvl); \
2169  rb_native_mutex_unlock(&gvl->lock); \
2170  if (!next && rb_ractor_living_thread_num(th->ractor) > 1) { \
2171  native_thread_yield(); \
2172  }
2173 
2174 /*
2175  * This function does not exclusively acquire sigwait_fd, so it
2176  * cannot safely read from it. However, it can be woken up in
2177  * 4 ways:
2178  *
2179  * 1) ubf_ppoll_sleep (from another thread)
2180  * 2) rb_thread_wakeup_timer_thread (from signal handler)
2181  * 3) any unmasked signal hitting the process
2182  * 4) periodic ubf timer wakeups (after 3)
2183  */
2184 static void
2185 native_ppoll_sleep(rb_thread_t *th, rb_hrtime_t *rel)
2186 {
2187  rb_native_mutex_lock(&th->interrupt_lock);
2188  th->unblock.func = ubf_ppoll_sleep;
2189  rb_native_mutex_unlock(&th->interrupt_lock);
2190 
2191  GVL_UNLOCK_BEGIN_YIELD(th);
2192 
2193  if (!RUBY_VM_INTERRUPTED(th->ec)) {
2194  struct pollfd pfd[2];
2195  struct timespec ts;
2196 
2197  pfd[0].fd = signal_self_pipe.normal[0]; /* sigwait_fd */
2198  pfd[1].fd = signal_self_pipe.ub_main[0];
2199  pfd[0].events = pfd[1].events = POLLIN;
2200  if (ppoll(pfd, 2, rb_hrtime2timespec(&ts, rel), 0) > 0) {
2201  if (pfd[1].revents & POLLIN) {
2202  (void)consume_communication_pipe(pfd[1].fd);
2203  }
2204  }
2205  /*
2206  * do not read the sigwait_fd, here, let uplevel callers
2207  * or other threads that, otherwise we may steal and starve
2208  * other threads
2209  */
2210  }
2211  unblock_function_clear(th);
2212  GVL_UNLOCK_END(th);
2213 }
2214 
2215 static void
2216 native_sleep(rb_thread_t *th, rb_hrtime_t *rel)
2217 {
2218  int sigwait_fd = rb_sigwait_fd_get(th);
2219  rb_ractor_blocking_threads_inc(th->ractor, __FILE__, __LINE__);
2220 
2221  if (sigwait_fd >= 0) {
2222  rb_native_mutex_lock(&th->interrupt_lock);
2223  th->unblock.func = ubf_sigwait;
2224  rb_native_mutex_unlock(&th->interrupt_lock);
2225 
2226  GVL_UNLOCK_BEGIN_YIELD(th);
2227 
2228  if (!RUBY_VM_INTERRUPTED(th->ec)) {
2229  rb_sigwait_sleep(th, sigwait_fd, rel);
2230  }
2231  else {
2232  check_signals_nogvl(th, sigwait_fd);
2233  }
2234  unblock_function_clear(th);
2235  GVL_UNLOCK_END(th);
2236  rb_sigwait_fd_put(th, sigwait_fd);
2237  rb_sigwait_fd_migrate(th->vm);
2238  }
2239  else if (th == th->vm->ractor.main_thread) { /* always able to handle signals */
2240  native_ppoll_sleep(th, rel);
2241  }
2242  else {
2243  native_cond_sleep(th, rel);
2244  }
2245 
2246  rb_ractor_blocking_threads_dec(th->ractor, __FILE__, __LINE__);
2247 }
2248 
2249 #if UBF_TIMER == UBF_TIMER_PTHREAD
2250 static void *
2251 timer_pthread_fn(void *p)
2252 {
2253  rb_vm_t *vm = p;
2254  pthread_t main_thread_id = vm->ractor.main_thread->thread_id;
2255  struct pollfd pfd;
2256  int timeout = -1;
2257  int ccp;
2258 
2259  pfd.fd = timer_pthread.low[0];
2260  pfd.events = POLLIN;
2261 
2262  while (system_working > 0) {
2263  (void)poll(&pfd, 1, timeout);
2264  ccp = consume_communication_pipe(pfd.fd);
2265 
2266  if (system_working > 0) {
2267  if (ATOMIC_CAS(timer_pthread.armed, 1, 1)) {
2268  pthread_kill(main_thread_id, SIGVTALRM);
2269 
2270  if (rb_signal_buff_size() || !ubf_threads_empty()) {
2271  timeout = TIME_QUANTUM_MSEC;
2272  }
2273  else {
2274  ATOMIC_SET(timer_pthread.armed, 0);
2275  timeout = -1;
2276  }
2277  }
2278  else if (ccp) {
2279  pthread_kill(main_thread_id, SIGVTALRM);
2280  ATOMIC_SET(timer_pthread.armed, 0);
2281  timeout = -1;
2282  }
2283  }
2284  }
2285 
2286  return 0;
2287 }
2288 #endif /* UBF_TIMER_PTHREAD */
2289 
2290 static VALUE
2291 ubf_caller(void *ignore)
2292 {
2294 
2295  return Qfalse;
2296 }
2297 
2298 /*
2299  * Called if and only if one thread is running, and
2300  * the unblock function is NOT async-signal-safe
2301  * This assumes USE_THREAD_CACHE is true for performance reasons
2302  */
2303 static VALUE
2304 rb_thread_start_unblock_thread(void)
2305 {
2306  return rb_thread_create(ubf_caller, 0);
2307 }
2308 #endif /* THREAD_SYSTEM_DEPENDENT_IMPLEMENTATION */
std::atomic< unsigned > rb_atomic_t
Type that is eligible for atomic operations.
Definition: atomic.h:69
#define INT2FIX
Old name of RB_INT2FIX.
Definition: long.h:48
#define ULL2NUM
Old name of RB_ULL2NUM.
Definition: long_long.h:31
#define NUM2INT
Old name of RB_NUM2INT.
Definition: int.h:44
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define NIL_P
Old name of RB_NIL_P.
void ruby_init_stack(volatile VALUE *addr)
Set stack bottom of Ruby implementation.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition: error.c:3025
VALUE rb_eNotImpError
NotImplementedError exception.
Definition: error.c:1109
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
Definition: error.c:3137
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition: error.c:802
void rb_sys_fail(const char *mesg)
Converts a C errno into a Ruby exception, then raises it.
Definition: error.c:3149
void rb_warn(const char *fmt,...)
Identical to rb_warning(), except it reports always regardless of runtime -W flag.
Definition: error.c:418
void rb_bug_errno(const char *mesg, int errno_arg)
This is a wrapper of rb_bug() which automatically constructs appropriate message from the passed errn...
Definition: error.c:830
VALUE rb_eThreadError
ThreadError exception.
Definition: eval.c:872
int rb_cloexec_pipe(int fildes[2])
Opens a pipe with closing on exec.
Definition: io.c:413
void rb_update_max_fd(int fd)
Informs the interpreter that the passed fd can be the max.
Definition: io.c:234
int rb_reserved_fd_p(int fd)
Queries if the given FD is reserved or not.
void rb_thread_sleep_forever(void)
Blocks indefinitely.
Definition: thread.c:1519
VALUE rb_thread_create(VALUE(*f)(void *g), void *g)
Creates a Ruby thread that is backended by a C function.
void rb_timespec_now(struct timespec *ts)
Fills the current time into the given struct.
Definition: time.c:1908
int rb_fd_select(int nfds, rb_fdset_t *rfds, rb_fdset_t *wfds, rb_fdset_t *efds, struct timeval *timeout)
Waits for multiple file descriptors at once.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
Definition: memory.h:161
#define RARRAY_AREF(a, i)
Definition: rarray.h:588
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
Definition: rstring.h:497
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
Definition: rstring.h:573
The data structure which wraps the fd_set bitmap used by select(2).
Definition: largesize.h:74
rb_nativethread_id_t rb_nativethread_self(void)
Queries the ID of the native thread that is calling this function.
void rb_native_mutex_lock(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_lock.
void rb_native_cond_initialize(rb_nativethread_cond_t *cond)
Fills the passed condition variable with an initial value.
int rb_native_mutex_trylock(rb_nativethread_lock_t *lock)
Identical to rb_native_mutex_lock(), except it doesn't block in case rb_native_mutex_lock() would.
void rb_native_cond_broadcast(rb_nativethread_cond_t *cond)
Signals a condition variable.
void rb_native_mutex_initialize(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_initialize.
void rb_native_mutex_unlock(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_unlock.
void rb_native_mutex_destroy(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_destroy.
void rb_native_cond_destroy(rb_nativethread_cond_t *cond)
Destroys the passed condition variable.
void rb_native_cond_signal(rb_nativethread_cond_t *cond)
Signals a condition variable.
void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *mutex)
Waits for the passed condition variable to be signalled.
void rb_native_cond_timedwait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *mutex, unsigned long msec)
Identical to rb_native_cond_wait(), except it additionally takes timeout in msec resolution.
uintptr_t VALUE
Type that represents a Ruby object.
Definition: value.h:40