Crash on thread id wrap around
On GNU/Linux, if the target reuses the TID of a thread that GDB still
has in its list marked as THREAD_EXITED, GDB crashes, like:
(gdb) continue
Continuing.
src/gdb/thread.c:789: internal-error: set_running: Assertion `tp->state != THREAD_EXITED' failed.
A problem internal to GDB has been detected,
further debugging may prove unreliable.
Quit this debugging session? (y or n) FAIL: gdb.threads/tid-reuse.exp: continue to breakpoint: after_reuse_time (GDB internal error)
Here:
(top-gdb) bt
#0 internal_error (file=0x953dd8 "src/gdb/thread.c", line=789, fmt=0x953da0 "%s: Assertion `%s' failed.")
at src/gdb/common/errors.c:54
#1 0x0000000000638514 in set_running (ptid=..., running=1) at src/gdb/thread.c:789
#2 0x00000000004bda42 in linux_handle_extended_wait (lp=0x16f5760, status=0, stopping=0) at src/gdb/linux-nat.c:2114
#3 0x00000000004bfa24 in linux_nat_filter_event (lwpid=20570, status=198015) at src/gdb/linux-nat.c:3127
#4 0x00000000004c070e in linux_nat_wait_1 (ops=0xe193d0, ptid=..., ourstatus=0x7fffffffd2c0, target_options=1) at src/gdb/linux-nat.c:3478
#5 0x00000000004c1015 in linux_nat_wait (ops=0xe193d0, ptid=..., ourstatus=0x7fffffffd2c0, target_options=1) at src/gdb/linux-nat.c:3722
#6 0x00000000004c92d2 in thread_db_wait (ops=0xd80b60 <thread_db_ops>, ptid=..., ourstatus=0x7fffffffd2c0, options=1)
at src/gdb/linux-thread-db.c:1525
#7 0x000000000066db43 in delegate_wait (self=0xd80b60 <thread_db_ops>, arg1=..., arg2=0x7fffffffd2c0, arg3=1) at src/gdb/target-delegates.c:116
#8 0x000000000067e54b in target_wait (ptid=..., status=0x7fffffffd2c0, options=1) at src/gdb/target.c:2206
#9 0x0000000000625111 in fetch_inferior_event (client_data=0x0) at src/gdb/infrun.c:3275
#10 0x0000000000648a3b in inferior_event_handler (event_type=INF_REG_EVENT, client_data=0x0) at src/gdb/inf-loop.c:56
#11 0x00000000004c2ecb in handle_target_event (error=0, client_data=0x0) at src/gdb/linux-nat.c:4655
I managed to come up with a test that reliably reproduces this. It
spawns enough threads for the pid number space to wrap around, so
could potentially take a while. On my box that's 4 seconds; on
gcc110, a PPC box which has max_pid set to 65536, it's over 10
seconds. So I made the test compute how long that would take, and cap
the time waited if it would be unreasonably long.
Tested on x86_64 Fedora 20.
gdb/ChangeLog:
2015-04-01 Pedro Alves <palves@redhat.com>
* linux-thread-db.c (record_thread): Readd the thread to gdb's
list if it was marked exited.
gdb/testsuite/ChangeLog:
2015-04-01 Pedro Alves <palves@redhat.com>
* gdb.threads/tid-reuse.c: New file.
* gdb.threads/tid-reuse.exp: New file.
2015-04-01 12:38:06 +00:00
|
|
|
/* This testcase is part of GDB, the GNU debugger.
|
|
|
|
|
2016-01-01 04:33:14 +00:00
|
|
|
Copyright 2015-2016 Free Software Foundation, Inc.
|
Crash on thread id wrap around
On GNU/Linux, if the target reuses the TID of a thread that GDB still
has in its list marked as THREAD_EXITED, GDB crashes, like:
(gdb) continue
Continuing.
src/gdb/thread.c:789: internal-error: set_running: Assertion `tp->state != THREAD_EXITED' failed.
A problem internal to GDB has been detected,
further debugging may prove unreliable.
Quit this debugging session? (y or n) FAIL: gdb.threads/tid-reuse.exp: continue to breakpoint: after_reuse_time (GDB internal error)
Here:
(top-gdb) bt
#0 internal_error (file=0x953dd8 "src/gdb/thread.c", line=789, fmt=0x953da0 "%s: Assertion `%s' failed.")
at src/gdb/common/errors.c:54
#1 0x0000000000638514 in set_running (ptid=..., running=1) at src/gdb/thread.c:789
#2 0x00000000004bda42 in linux_handle_extended_wait (lp=0x16f5760, status=0, stopping=0) at src/gdb/linux-nat.c:2114
#3 0x00000000004bfa24 in linux_nat_filter_event (lwpid=20570, status=198015) at src/gdb/linux-nat.c:3127
#4 0x00000000004c070e in linux_nat_wait_1 (ops=0xe193d0, ptid=..., ourstatus=0x7fffffffd2c0, target_options=1) at src/gdb/linux-nat.c:3478
#5 0x00000000004c1015 in linux_nat_wait (ops=0xe193d0, ptid=..., ourstatus=0x7fffffffd2c0, target_options=1) at src/gdb/linux-nat.c:3722
#6 0x00000000004c92d2 in thread_db_wait (ops=0xd80b60 <thread_db_ops>, ptid=..., ourstatus=0x7fffffffd2c0, options=1)
at src/gdb/linux-thread-db.c:1525
#7 0x000000000066db43 in delegate_wait (self=0xd80b60 <thread_db_ops>, arg1=..., arg2=0x7fffffffd2c0, arg3=1) at src/gdb/target-delegates.c:116
#8 0x000000000067e54b in target_wait (ptid=..., status=0x7fffffffd2c0, options=1) at src/gdb/target.c:2206
#9 0x0000000000625111 in fetch_inferior_event (client_data=0x0) at src/gdb/infrun.c:3275
#10 0x0000000000648a3b in inferior_event_handler (event_type=INF_REG_EVENT, client_data=0x0) at src/gdb/inf-loop.c:56
#11 0x00000000004c2ecb in handle_target_event (error=0, client_data=0x0) at src/gdb/linux-nat.c:4655
I managed to come up with a test that reliably reproduces this. It
spawns enough threads for the pid number space to wrap around, so
could potentially take a while. On my box that's 4 seconds; on
gcc110, a PPC box which has max_pid set to 65536, it's over 10
seconds. So I made the test compute how long that would take, and cap
the time waited if it would be unreasonably long.
Tested on x86_64 Fedora 20.
gdb/ChangeLog:
2015-04-01 Pedro Alves <palves@redhat.com>
* linux-thread-db.c (record_thread): Readd the thread to gdb's
list if it was marked exited.
gdb/testsuite/ChangeLog:
2015-04-01 Pedro Alves <palves@redhat.com>
* gdb.threads/tid-reuse.c: New file.
* gdb.threads/tid-reuse.exp: New file.
2015-04-01 12:38:06 +00:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
|
|
|
|
|
|
|
#define _GNU_SOURCE
|
|
|
|
#include <assert.h>
|
|
|
|
#include <pthread.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <limits.h>
|
|
|
|
|
|
|
|
/* How many threads fit in the target's thread number space. */
|
|
|
|
long tid_max = -1;
|
|
|
|
|
|
|
|
/* Number of threads spawned. */
|
|
|
|
unsigned long thread_counter;
|
|
|
|
|
|
|
|
/* How long it takes to spawn as many threads as fits in the thread
|
|
|
|
number space. On systems where thread IDs are just monotonically
|
|
|
|
incremented, this is enough for the tid numbers to wrap around. On
|
|
|
|
targets that randomize thread IDs, this is enough time to give each
|
|
|
|
number in the thread number space some chance of reuse. It'll be
|
|
|
|
capped to a lower value if we can't compute it. */
|
|
|
|
unsigned int reuse_time = -1;
|
|
|
|
|
|
|
|
void *
|
|
|
|
do_nothing_thread_func (void *arg)
|
|
|
|
{
|
|
|
|
usleep (1);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
void *
|
|
|
|
spawner_thread_func (void *arg)
|
|
|
|
{
|
|
|
|
while (1)
|
|
|
|
{
|
|
|
|
pthread_t child;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
thread_counter++;
|
|
|
|
|
|
|
|
rc = pthread_create (&child, NULL, do_nothing_thread_func, NULL);
|
|
|
|
assert (rc == 0);
|
|
|
|
|
|
|
|
rc = pthread_join (child, NULL);
|
|
|
|
assert (rc == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Called after the program is done counting number of spawned threads
|
|
|
|
for a period, to compute REUSE_TIME. */
|
|
|
|
|
|
|
|
void
|
|
|
|
after_count (void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Called after enough time has passed for TID reuse to occur. */
|
|
|
|
|
|
|
|
void
|
|
|
|
after_reuse_time (void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __linux__
|
|
|
|
|
|
|
|
/* Get the running system's configured pid_max. */
|
|
|
|
|
|
|
|
static int
|
|
|
|
linux_proc_get_pid_max (void)
|
|
|
|
{
|
|
|
|
static const char filename[] ="/proc/sys/kernel/pid_max";
|
|
|
|
FILE *file;
|
|
|
|
char buf[100];
|
|
|
|
int retval = -1;
|
|
|
|
|
|
|
|
file = fopen (filename, "r");
|
|
|
|
if (file == NULL)
|
|
|
|
{
|
|
|
|
fprintf (stderr, "unable to open %s\n", filename);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fgets (buf, sizeof (buf), file) != NULL)
|
|
|
|
retval = strtol (buf, NULL, 10);
|
|
|
|
|
|
|
|
fclose (file);
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int
|
|
|
|
main (int argc, char *argv[])
|
|
|
|
{
|
|
|
|
pthread_t child;
|
|
|
|
int rc;
|
|
|
|
unsigned int reuse_time_raw = 0;
|
|
|
|
|
|
|
|
rc = pthread_create (&child, NULL, spawner_thread_func, NULL);
|
|
|
|
assert (rc == 0);
|
|
|
|
|
|
|
|
#define COUNT_TIME 2
|
|
|
|
sleep (COUNT_TIME);
|
|
|
|
|
|
|
|
#ifdef __linux__
|
|
|
|
tid_max = linux_proc_get_pid_max ();
|
|
|
|
#endif
|
|
|
|
/* If we don't know how many threads it would take to use the whole
|
|
|
|
number space on this system, just run the test for a bit. */
|
|
|
|
if (tid_max > 0)
|
|
|
|
{
|
|
|
|
reuse_time_raw = tid_max / ((float) thread_counter / COUNT_TIME) + 0.5;
|
|
|
|
|
|
|
|
/* Give it a bit more, just in case. */
|
|
|
|
reuse_time = reuse_time_raw + 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 4 seconds were sufficient on the machine this was first observed,
|
|
|
|
an Intel i7-2620M @ 2.70GHz running Linux 3.18.7, with
|
|
|
|
pid_max=32768. Going forward, as machines get faster, this will
|
|
|
|
need less time, unless pid_max is set to a very high number. To
|
|
|
|
avoid unreasonably long test time, cap to an upper bound. */
|
|
|
|
if (reuse_time > 60)
|
|
|
|
reuse_time = 60;
|
|
|
|
printf ("thread_counter=%lu, tid_max = %ld, reuse_time_raw=%u, reuse_time=%u\n",
|
|
|
|
thread_counter, tid_max, reuse_time_raw, reuse_time);
|
|
|
|
after_count ();
|
|
|
|
|
|
|
|
sleep (reuse_time);
|
|
|
|
|
|
|
|
after_reuse_time ();
|
|
|
|
return 0;
|
|
|
|
}
|