Table of Contents
我们的 redis-mgr 会默认对集群中的第一个twemproxy使用 -v 5 打印 notice 级别的日志, 切日志使用的是, kill -HUP.
线上发现两次切日志之后twemproxy停止响应, gdb:
Loaded symbols for /lib64/ld-linux-x86-64.so.2 __lll_lock_wait_private () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:97 97 ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S: No such file or directory. in ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S (gdb) bt #0 __lll_lock_wait_private () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:97 #1 0x00007f0393918b2d in _L_lock_1934 () at tzset.c:138 #2 0x00007f03939188e7 in __tz_convert (timer=0x7f0393bfd9f4, use_localtime=1, tp=0x7f0393c00360) at tzset.c:619 #3 0x0000000000410fba in _log (file=0x431036 "nc_signal.c", line=122, panic=0, fmt=0x43101c "signal %d (%s) received%s") at nc_log.c:140 #4 0x000000000041016c in signal_handler (signo=<value optimized out>) at nc_signal.c:122 #5 <signal handler called> #6 0x00007f03939552d5 in *__GI___xstat (vers=<value optimized out>, name=<value optimized out>, buf=0x7fff2a266fc0) at ../sysdeps/unix/sysv/linux/wordsize-64/xstat.c:38 #7 0x00007f03939193f0 in __tzfile_read (file=0x7f03939c5b7c "/etc/localtime", extra=0, extrap=<value optimized out>) at tzfile.c:173 #8 0x00007f03939187a4 in tzset_internal (always=<value optimized out>, explicit=<value optimized out>) at tzset.c:439 #9 0x00007f0393918909 in __tz_convert (timer=0x7fff2a267158, use_localtime=1, tp=0x7f0393c00360) at tzset.c:624 #10 0x0000000000410fba in _log (file=0x42ed0c "nc_server.c", line=645, panic=0, fmt=0x42ece0 "key '%.*s' on dist %d maps to server '%.*s'") at nc_log.c:140 #11 0x000000000040887e in server_pool_server (ctx=0x1c0e090, pool=0x1c0e360, key=0x1c150c2 "key:__rand_int__\r\n", keylen=16) at nc_server.c:644 #12 server_pool_conn (ctx=0x1c0e090, pool=0x1c0e360, key=0x1c150c2 "key:__rand_int__\r\n", keylen=16) at nc_server.c:668 #13 0x000000000040ae04 in req_forward (ctx=0x1c0e090, conn=0x1c14e70, msg=0x1c14f70, nmsg=<value optimized out>) at nc_request.c:482 #14 req_recv_done (ctx=0x1c0e090, conn=0x1c14e70, msg=0x1c14f70, nmsg=<value optimized out>) at nc_request.c:524 #15 0x0000000000409f00 in msg_parsed (ctx=0x1c0e090, conn=0x1c14e70) at nc_message.c:451 #16 msg_parse (ctx=0x1c0e090, conn=0x1c14e70) at nc_message.c:581 #17 msg_recv_chain (ctx=0x1c0e090, conn=0x1c14e70) at nc_message.c:642 #18 msg_recv (ctx=0x1c0e090, conn=0x1c14e70) at nc_message.c:681 #19 0x0000000000406179 in core_recv (arg=0x1c14e70, events=<value optimized out>) at nc_core.c:158 #20 core_core (arg=0x1c14e70, events=<value optimized out>) at nc_core.c:293 #21 0x000000000041bbb8 in event_wait (evb=0x1c0e570, timeout=<value optimized out>) at nc_epoll.c:269 #22 0x0000000000405f89 in core_loop (ctx=0x1c0e090) at nc_core.c:316 #23 0x0000000000412a38 in nc_run (argc=<value optimized out>, argv=0x7fff2a267690) at nc.c:531 #24 main (argc=<value optimized out>, argv=0x7fff2a267690) at nc.c:580 (gdb)
both of the normal Program flow and the signal_handler call __tz_convert(), it looks that __tz_convert use a lock and this make the signal_handler never return, let's have a look __tz_convert
nc_signal.c:
void signal_handler(int signo) { struct signal *sig; void (*action)(void); char *actionstr; bool done; switch (signo) { case SIGHUP: actionstr = ", reopening log file"; action = log_reopen; break; } loga("signal %d (%s) received%s", signo, sig->signame, actionstr); if (action != NULL) { action(); } if (done) { exit(1); } }
loga 打印日志的时候, 会调用localtime():
void _log(const char *file, int line, int panic, const char *fmt, ...) { ... t = time(NULL); local = localtime(&t); timestr = asctime(local); len += nc_scnprintf(buf + len, size - len, "[%.*s] %s:%d ", strlen(timestr) - 1, timestr, file, line); va_start(args, fmt); len += nc_vscnprintf(buf + len, size - len, fmt, args); va_end(args); buf[len++] = '\n'; n = nc_write(l->fd, buf, len); }
首先 localtime() 不是线程安全的, 同时它也不是signal-safe的, 所以在signal_handler里面使用localtime()就可能造成死锁.
glibc/time/localtime.c:
22 struct tm _tmbuf; 23 24 25 /* Return the `struct tm' representation of *T in local time, 26 using *TP to store the result. */ 27 struct tm * 28 __localtime_r (t, tp) 29 const time_t *t; 30 struct tm *tp; 31 { 32 return __tz_convert (t, 1, tp); 33 } 34 weak_alias (__localtime_r, localtime_r) 35 36 37 /* Return the `struct tm' representation of *T in local time. */ 38 struct tm * 39 localtime (t) 40 const time_t *t; 41 { 42 return __tz_convert (t, 1, &_tmbuf); 43 }
localtime() 和 localtime_r() 都是简单的调用 __tz_convert(), 区别只是在于 localtime() 使用了一个全局变量 _tmbuf , 所以不是线程安全的.
但是 __tz_convert() 不是 signal-safe 的.
glibc/time/tzset.c:
39 /* This locks all the state variables in tzfile.c and this file. */ 40 __libc_lock_define_initialized (static, tzset_lock) ... 613 /* Return the `struct tm' representation of *TIMER in the local timezone. 614 Use local time if USE_LOCALTIME is nonzero, UTC otherwise. */ 615 struct tm * 616 __tz_convert (const time_t *timer, int use_localtime, struct tm *tp) 617 { 618 long int leap_correction; 619 int leap_extra_secs; 620 621 if (timer == NULL) 622 { 623 __set_errno (EINVAL); 624 return NULL; 625 } 626 627 __libc_lock_lock (tzset_lock); 628 629 /* Update internal database according to current TZ setting. 630 POSIX.1 8.3.7.2 says that localtime_r is not required to set tzname. 631 This is a good idea since this allows at least a bit more parallelism. */ 632 tzset_internal (tp == &_tmbuf && use_localtime, 1);
这里用了一个锁 tzset_lock, 它是用来保护对 tzset_internal() 的调用的, tzset_internal 是一个类似单态的模式, 第一次调用这个函数的时候初始化一些变量:
399 static void 400 internal_function 401 tzset_internal (always, explicit) 402 int always; 403 int explicit; 404 { 405 static int is_initialized; 406 const char *tz; 407 408 if (is_initialized && !always) 409 return; 410 is_initialized = 1; ... 465 __tzset_parse_tz (tz); 466 }
tzset_internal 是用于初始化这个程序的时区设置, 并设置到 tz_rules 这个全局变量:
48 /* This structure contains all the information about a 49 timezone given in the POSIX standard TZ envariable. */ 50 typedef struct 51 { 52 const char *name; 53 54 /* When to change. */ 55 enum { J0, J1, M } type; /* Interpretation of: */ 56 unsigned short int m, n, d; /* Month, week, day. */ 57 int secs; /* Time of day. */ 58 59 long int offset; /* Seconds east of GMT (west if < 0). */ 60 61 /* We cache the computed time of change for a 62 given year so we don't have to recompute it. */ 63 time_t change; /* When to change to this zone. */ 64 int computed_for; /* Year above is computed for. */ 65 } tz_rule; 66 67 /* tz_rules[0] is standard, tz_rules[1] is daylight. */ 68 static tz_rule tz_rules[2];
glibc作为一个库, 其实有三个地方可以初始化一些变量:
所以glibc的 localtime() 选择在第一次调用时初始化, 熟悉单态模式的话就了解, 这里必须加一个锁. 就是这个锁导致 localtime() 不是signal-safe的.
free/malloc也用了锁:
2907 __libc_free (void *mem) 2908 { ... 2942 _int_free (ar_ptr, p, 0); 2943 } 3798 static void 3799 _int_free (mstate av, mchunkptr p, int have_lock) 3919 else if (!chunk_is_mmapped(p)) { 3920 if (! have_lock) { 3921 (void)mutex_lock(&av->mutex); 3922 locked = 1; 3923 } ... 4052 if (! have_lock) { 4053 assert (locked); 4054 (void)mutex_unlock(&av->mutex); 4055 } 4056 }
ning@ning-laptop ~/idning/langtest/c/signal-safe$ cat test.c #include <signal.h> #include <stdio.h> #include <time.h> #include <unistd.h> void handler(int signum) { char result[100]; time_t now; struct tm time1; now = time(NULL); localtime_r(&now, &time1); strftime(result, 100, "%T", &time1); printf("At %s, user pressed Ctrl-C\n", result); } int main (void) { time_t now; struct tm ltime; int i; if (signal(SIGHUP, handler) == SIG_IGN){ return; } now = time(NULL); for(i=0 ; ; i++){ localtime_r(&now, <ime); if(i%1000 == 0){ printf("%d\n", i); } } return 0; } ning@ning-laptop ~/idning/langtest/c/signal-safe$ cat Makefile all: cc test.c ./a.out & sleep 1 pkill -HUP -f 'a.out' ps aux | grep a.out gdb -p `pidof a.out`
(gdb) bt #0 __lll_lock_wait_private () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:97 #1 0x00002b928c8bfb2d in _L_lock_1934 () at tzset.c:138 #2 0x00002b928c8bf8e7 in __tz_convert (timer=0x2b928cba49f4, use_localtime=1, tp=0x7fff5cef1840) at tzset.c:619 #3 0x0000000000400708 in handler () #4 <signal handler called> #5 _IO_str_init_static_internal (sf=0x7fff5cef1d90, ptr=0x7de354 "8", size=<value optimized out>, pstart=0x0) at strops.c:63 #6 0x00002b928c88d265 in _IO_vsscanf (string=0x7de354 "8", format=0x2b928c96cb58 "%hu%n:%hu%n:%hu%n", args=0x7fff5cef1eb0) at iovsscanf.c:44 #7 0x00002b928c87a5b8 in __sscanf (s=0x0, format=0x7de354 "8") at sscanf.c:34 #8 0x00002b928c8bee32 in __tzset_parse_tz (tz=0x7de354 "8") at tzset.c:212 #9 0x00002b928c8c00ac in __tzfile_compute (timer=1402735871, use_localtime=<value optimized out>, leap_correct=0x7fff5cef2090, leap_hit=0x7fff5cef209c, tp=0x7fff5cef20c0) at tzfile.c:682 #10 0x00002b928c8bf9a7 in __tz_convert (timer=0x7fff5cef2100, use_localtime=1, tp=0x7fff5cef20c0) at tzset.c:627 #11 0x00000000004007a1 in main ()
这个栈和twemproxy出现的栈是一样的.
有没有``signal-safe`` + thread-safe 的localtime实现?
至少是可以自己来实现这样一个逻辑的.
修改signal_handler, 只使用signal-safe的函数:
pull request 在此: https://github.com/twitter/twemproxy/pull/231
http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap03.html
定义
3.313 Reentrant Function A function whose effect, when called by two or more threads, is guaranteed to be as if the threads each executed the function one after another in an undefined order, even if the actual execution is interleaved. Thread-Safe A function that may be safely invoked concurrently by multiple threads. Each function defined in the System Interfaces volume of IEEE Std 1003.1-2001 is thread-safe unless explicitly stated otherwise. Examples are any "pure" function, a function which holds a mutex locked while it is accessing static storage, or objects shared among threads. 3.26 Async-Signal-Safe Function A function that may be invoked, without restriction, from signal-catching functions. No function is async-signal-safe unless explicitly described as such.
可重入函数一定是线程安全的
可重入函数一定是Async-Signal-Safe的.
因为malloc/free不是signal-safe, 而gblic 的 vsnprintf用了malloc, 所以sprintf也不是signal-safe
最好严格遵循 man 7 signal 的列表, 注意不同系统中async-signal-safe函数列表不同, 比如这里是FreeBSD的库中 signal-safe情况:
http://www.openbsd.org/cgi-bin/man.cgi?query=signal
这里有glibc 的各个函数列表:
http://www.gnu.org/software/libc/manual/html_node/Library-Summary.html#Library-Summary
下面文档定义了glibc中几种safe概念:
http://www.gnu.org/software/libc/manual/html_node/POSIX-Safety-Concepts.html
MT-Safe : Thread-Safe functions are safe to call in the presence of other threads
AS-Safe : Async-Signal-Safe functions are safe to call from asynchronous signal handlers. AS, in AS-Safe, stands for Asynchronous Signal.
- AC-Safe : Async-Cancel-Safe functions are safe to call when asynchronous cancellation is enabled. AC in AC-Safe stands for Asynchronous Cancellation.
- The POSIX standard defines only three functions to be AC-Safe: pthread_cancel, pthread_setcancelstate, and pthread_setcanceltype
下面文档定义了glibc中, 导致unsafe的原因:
http://www.gnu.org/software/libc/manual/html_node/Unsafe-Features.html
- lock Functions marked with lock as an AS-Unsafe feature may be interrupted by a signal while holding a non-recursive lock. If the signal handler calls another such function that takes the same lock, the result is a deadlock.
- corrupt Functions marked with corrupt as an AS-Unsafe feature may corrupt data structures and misbehave when they interrupt,
- heap Functions marked with heap may call heap memory management functions from the malloc/free family of functions and are only as safe as those functions.
例如 vsnprintf 就是AS-Unsafe, 因为它用了malloc/free这样的heap函数:
— Function: int vsnprintf (char *s, size_t size, const char *template, va_list ap) Preliminary: | MT-Safe locale | AS-Unsafe heap | AC-Unsafe mem | See POSIX Safety Concepts.
所有printf系列函数都是 AS-Unsafe 的.
FreeBSD中更多的signal-safe函数:
ffs, htonl, htons, memccpy, memchr, memcmp, memcpy, memmove, memset, ntohl, ntohs, stpcpy, stpncpy, strcat, strchr, strcmp, strcpy, strcspn, strlen, strncat, strncmp, strncpy, strnlen, strpbrk, strrchr, strspn, strstr, strtok_r, wcpcpy, wcpncpy, wcscat, wcschr, wcscmp, wcscpy, wcscspn, wcslen, wcsncat, wcsncmp, wcsncpy, wcsnlen, wcspbrk, wcsrchr, wcsspn, wcsstr, wcstok, wmemchr, wmemcmp, wmemcpy, wmemmove, wmemset
上面这些函数在目前已知的实现中, 都是signal-safe的, 参考下文, 这些函数已经加到signal-safe函数列表中, 参见: http://austingroupbugs.net/view.php?id=692#c1609
nginx自己实现了一套printf:
u_char * ngx_vslprintf(u_char *buf, u_char *last, const char *fmt, va_list args) { u_char *p, zero; ngx_uint_t width, sign, hex, max_width, frac_width, n; while (*fmt && buf < last) { /* * "buf < last" means that we could copy at least one character: * the plain character, "%%", "%c", and minus without the checking */ if (*fmt == '%') { i64 = 0; ui64 = 0;
ngx还使用了时间cache, 从而能够在打日志的时候使用正确的时间字符串.
ngx 中时间cache是通过timer信号来更新的, 略复杂.
也是实现了一族safe函数.
size_t my_safe_snprintf(char* to, size_t n, const char* fmt, ...) ATTRIBUTE_FORMAT(printf, 3, 4); /** A (very) limited version of snprintf, which writes the result to STDERR. @sa my_safe_snprintf Implemented with simplicity, and async-signal-safety in mind. @note Has an internal buffer capacity of 512 bytes, which should suffice for our signal handling routines. */ size_t my_safe_printf_stderr(const char* fmt, ...) ATTRIBUTE_FORMAT(printf, 1, 2);