Move logic out of symbol_find_demangled_name

This patch moves most of the demangling logic out of
symbol_find_demangled_name into the various language_defn objects.

The simplest way to do this seemed to be to add a new method to
language_defn.  This is shame given the existing la_demangle, but
given Ada's unusual needs, and the differing demangling options
between languages, la_demangle didn't seem to fit.

In order to make this work, I made enum language order-sensitive.
This helps preserve the current ordering of demangling operations.

2016-06-23  Tom Tromey  <tom@tromey.com>

	* symtab.c (symbol_find_demangled_name): Loop over languages and
	use language_sniff_from_mangled_name.
	* rust-lang.c (rust_sniff_from_mangled_name): New function.
	(rust_language_defn): Update.
	* p-lang.c (pascal_language_defn): Update.
	* opencl-lang.c (opencl_language_defn): Update.
	* objc-lang.c (objc_sniff_from_mangled_name): New function.
	(objc_language_defn): Update.
	* m2-lang.c (m2_language_defn): Update.
	* language.h (struct language_defn) <la_sniff_from_mangled_name>: New
	field.
	(language_sniff_from_mangled_name): Declare.
	* language.c (language_sniff_from_mangled_name): New function.
	(unknown_language_defn, auto_language_defn, local_language_defn):
	Update.
	* jv-lang.c (java_sniff_from_mangled_name): New function.
	(java_language_defn): Use it.
	* go-lang.c (go_sniff_from_mangled_name): New function.
	(go_language_defn): Use it.
	* f-lang.c (f_language_defn): Update.
	* defs.h (enum language): Reorder.
	* d-lang.c (d_sniff_from_mangled_name): New function.
	(d_language_defn): Use it.
	* cp-support.h (gdb_sniff_from_mangled_name): Declare.
	* cp-support.c (gdb_sniff_from_mangled_name): New function.
	* c-lang.c (c_language_defn, cplus_language_defn)
	(asm_language_defn, minimal_language_defn): Update.
	* ada-lang.c (ada_sniff_from_mangled_name): New function.
	(ada_language_defn): Use it.
This commit is contained in:
Tom Tromey 2016-05-26 15:04:07 -06:00
parent 56618e20bc
commit 8b302db80c
18 changed files with 225 additions and 99 deletions

View file

@ -1,3 +1,35 @@
2016-06-23 Tom Tromey <tom@tromey.com>
* symtab.c (symbol_find_demangled_name): Loop over languages and
use language_sniff_from_mangled_name.
* rust-lang.c (rust_sniff_from_mangled_name): New function.
(rust_language_defn): Update.
* p-lang.c (pascal_language_defn): Update.
* opencl-lang.c (opencl_language_defn): Update.
* objc-lang.c (objc_sniff_from_mangled_name): New function.
(objc_language_defn): Update.
* m2-lang.c (m2_language_defn): Update.
* language.h (struct language_defn) <la_sniff_from_mangled_name>: New
field.
(language_sniff_from_mangled_name): Declare.
* language.c (language_sniff_from_mangled_name): New function.
(unknown_language_defn, auto_language_defn, local_language_defn):
Update.
* jv-lang.c (java_sniff_from_mangled_name): New function.
(java_language_defn): Use it.
* go-lang.c (go_sniff_from_mangled_name): New function.
(go_language_defn): Use it.
* f-lang.c (f_language_defn): Update.
* defs.h (enum language): Reorder.
* d-lang.c (d_sniff_from_mangled_name): New function.
(d_language_defn): Use it.
* cp-support.h (gdb_sniff_from_mangled_name): Declare.
* cp-support.c (gdb_sniff_from_mangled_name): New function.
* c-lang.c (c_language_defn, cplus_language_defn)
(asm_language_defn, minimal_language_defn): Update.
* ada-lang.c (ada_sniff_from_mangled_name): New function.
(ada_language_defn): Use it.
2016-06-23 Tom Tromey <tom@tromey.com>
* ada-lang.c (ada_extensions): New array.

View file

@ -1452,6 +1452,45 @@ ada_la_decode (const char *encoded, int options)
return xstrdup (ada_decode (encoded));
}
/* Implement la_sniff_from_mangled_name for Ada. */
static int
ada_sniff_from_mangled_name (const char *mangled, char **out)
{
const char *demangled = ada_decode (mangled);
*out = NULL;
if (demangled != mangled && demangled != NULL && demangled[0] != '<')
{
/* Set the gsymbol language to Ada, but still return 0.
Two reasons for that:
1. For Ada, we prefer computing the symbol's decoded name
on the fly rather than pre-compute it, in order to save
memory (Ada projects are typically very large).
2. There are some areas in the definition of the GNAT
encoding where, with a bit of bad luck, we might be able
to decode a non-Ada symbol, generating an incorrect
demangled name (Eg: names ending with "TB" for instance
are identified as task bodies and so stripped from
the decoded name returned).
Returning 1, here, but not setting *DEMANGLED, helps us get a
little bit of the best of both worlds. Because we're last,
we should not affect any of the other languages that were
able to demangle the symbol before us; we get to correctly
tag Ada symbols as such; and even if we incorrectly tagged a
non-Ada symbol, which should be rare, any routing through the
Ada language should be transparent (Ada tries to behave much
like C/C++ with non-Ada symbols). */
return 1;
}
return 0;
}
/* Returns non-zero iff SYM_NAME matches NAME, ignoring any trailing
suffixes that encode debugging information or leading _ada_ on
SYM_NAME (see is_name_suffix commentary for the debugging
@ -14086,6 +14125,7 @@ const struct language_defn ada_language_defn = {
ada_lookup_symbol_nonlocal, /* Looking up non-local symbols. */
basic_lookup_transparent_type, /* lookup_transparent_type */
ada_la_decode, /* Language specific symbol demangler */
ada_sniff_from_mangled_name,
NULL, /* Language specific
class_name_from_physname */
ada_op_print_tab, /* expression operators for printing */

View file

@ -856,6 +856,7 @@ const struct language_defn c_language_defn =
basic_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
basic_lookup_transparent_type,/* lookup_transparent_type */
NULL, /* Language specific symbol demangler */
NULL,
NULL, /* Language specific
class_name_from_physname */
c_op_print_tab, /* expression operators for printing */
@ -989,6 +990,7 @@ const struct language_defn cplus_language_defn =
cp_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
cp_lookup_transparent_type, /* lookup_transparent_type */
gdb_demangle, /* Language specific symbol demangler */
gdb_sniff_from_mangled_name,
cp_class_name_from_physname, /* Language specific
class_name_from_physname */
c_op_print_tab, /* expression operators for printing */
@ -1040,6 +1042,7 @@ const struct language_defn asm_language_defn =
basic_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
basic_lookup_transparent_type,/* lookup_transparent_type */
NULL, /* Language specific symbol demangler */
NULL,
NULL, /* Language specific
class_name_from_physname */
c_op_print_tab, /* expression operators for printing */
@ -1091,6 +1094,7 @@ const struct language_defn minimal_language_defn =
basic_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
basic_lookup_transparent_type,/* lookup_transparent_type */
NULL, /* Language specific symbol demangler */
NULL,
NULL, /* Language specific
class_name_from_physname */
c_op_print_tab, /* expression operators for printing */

View file

@ -1627,6 +1627,15 @@ gdb_demangle (const char *name, int options)
return result;
}
/* See cp-support.h. */
int
gdb_sniff_from_mangled_name (const char *mangled, char **demangled)
{
*demangled = gdb_demangle (mangled, DMGL_PARAMS | DMGL_ANSI);
return *demangled != NULL;
}
/* Don't allow just "maintenance cplus". */
static void

View file

@ -158,4 +158,8 @@ extern struct cmd_list_element *maint_cplus_cmd_list;
char *gdb_demangle (const char *name, int options);
/* Like gdb_demangle, but suitable for use as la_sniff_from_mangled_name. */
int gdb_sniff_from_mangled_name (const char *mangled, char **demangled);
#endif /* CP_SUPPORT_H */

View file

@ -55,6 +55,15 @@ d_demangle (const char *symbol, int options)
return gdb_demangle (symbol, options | DMGL_DLANG);
}
/* la_sniff_from_mangled_name implementation for D. */
static int
d_sniff_from_mangled_name (const char *mangled, char **demangled)
{
*demangled = d_demangle (mangled, 0);
return *demangled != NULL;
}
/* Table mapping opcodes into strings for printing operators
and precedences of the operators. */
static const struct op_print d_op_print_tab[] =
@ -223,6 +232,7 @@ static const struct language_defn d_language_defn =
d_lookup_symbol_nonlocal,
basic_lookup_transparent_type,
d_demangle, /* Language specific symbol demangler. */
d_sniff_from_mangled_name,
NULL, /* Language specific
class_name_from_physname. */
d_op_print_tab, /* Expression operators for printing. */

View file

@ -194,26 +194,36 @@ extern void quit_serial_event_clear (void);
/* * Languages represented in the symbol table and elsewhere.
This should probably be in language.h, but since enum's can't
be forward declared to satisfy opaque references before their
actual definition, needs to be here. */
actual definition, needs to be here.
The constants here are in priority order. In particular,
demangling is attempted according to this order.
Note that there's ambiguity between the mangling schemes of some of
these languages, so some symbols could be successfully demangled by
several languages. For that reason, the constants here are sorted
in the order we'll attempt demangling them. For example: Java and
Rust use C++ mangling, so must come after C++; Ada must come last
(see ada_sniff_from_mangled_name). */
enum language
{
language_unknown, /* Language not known */
language_auto, /* Placeholder for automatic setting */
language_c, /* C */
language_objc, /* Objective-C */
language_cplus, /* C++ */
language_java, /* Java */
language_d, /* D */
language_go, /* Go */
language_objc, /* Objective-C */
language_java, /* Java */
language_fortran, /* Fortran */
language_m2, /* Modula-2 */
language_asm, /* Assembly language */
language_pascal, /* Pascal */
language_ada, /* Ada */
language_opencl, /* OpenCL */
language_rust, /* Rust */
language_minimal, /* All other languages, minimal support only */
language_ada, /* Ada */
nr_languages
};

View file

@ -268,7 +268,14 @@ const struct language_defn f_language_defn =
NULL, /* name_of_this */
cp_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
basic_lookup_transparent_type,/* lookup_transparent_type */
/* We could support demangling here to provide module namespaces
also for inferiors with only minimal symbol table (ELF symbols).
Just the mangling standard is not standardized across compilers
and there is no DW_AT_producer available for inferiors with only
the ELF symbols to check the mangling kind. */
NULL, /* Language specific symbol demangler */
NULL,
NULL, /* Language specific
class_name_from_physname */
f_op_print_tab, /* expression operators for printing */

View file

@ -385,6 +385,15 @@ go_demangle (const char *mangled_name, int options)
return result;
}
/* la_sniff_from_mangled_name for Go. */
static int
go_sniff_from_mangled_name (const char *mangled, char **demangled)
{
*demangled = go_demangle (mangled, 0);
return *demangled != NULL;
}
/* Given a Go symbol, return its package or NULL if unknown.
Space for the result is malloc'd, caller must free. */
@ -584,6 +593,7 @@ static const struct language_defn go_language_defn =
basic_lookup_symbol_nonlocal,
basic_lookup_transparent_type,
go_demangle, /* Language specific symbol demangler. */
go_sniff_from_mangled_name,
NULL, /* Language specific
class_name_from_physname. */
go_op_print_tab, /* Expression operators for printing. */

View file

@ -1018,6 +1018,15 @@ static char *java_demangle (const char *mangled, int options)
return gdb_demangle (mangled, options | DMGL_JAVA);
}
/* la_sniff_from_mangled_name for Java. */
static int
java_sniff_from_mangled_name (const char *mangled, char **demangled)
{
*demangled = java_demangle (mangled, DMGL_PARAMS | DMGL_ANSI);
return *demangled != NULL;
}
/* Find the member function name of the demangled name NAME. NAME
must be a method name including arguments, in order to correctly
locate the last component.
@ -1194,6 +1203,7 @@ const struct language_defn java_language_defn =
basic_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
basic_lookup_transparent_type,/* lookup_transparent_type */
java_demangle, /* Language specific symbol demangler */
java_sniff_from_mangled_name,
java_class_name_from_physname,/* Language specific class name */
java_op_print_tab, /* expression operators for printing */
0, /* not c-style arrays */

View file

@ -663,6 +663,23 @@ language_demangle (const struct language_defn *current_language,
return NULL;
}
/* See langauge.h. */
int
language_sniff_from_mangled_name (const struct language_defn *lang,
const char *mangled, char **demangled)
{
gdb_assert (lang != NULL);
if (lang->la_sniff_from_mangled_name == NULL)
{
*demangled = NULL;
return 0;
}
return lang->la_sniff_from_mangled_name (mangled, demangled);
}
/* Return class name from physname or NULL. */
char *
language_class_name_from_physname (const struct language_defn *lang,
@ -843,6 +860,7 @@ const struct language_defn unknown_language_defn =
basic_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
basic_lookup_transparent_type,/* lookup_transparent_type */
unk_lang_demangle, /* Language specific symbol demangler */
NULL,
unk_lang_class_name, /* Language specific
class_name_from_physname */
unk_op_print_tab, /* expression operators for printing */
@ -891,6 +909,7 @@ const struct language_defn auto_language_defn =
basic_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
basic_lookup_transparent_type,/* lookup_transparent_type */
unk_lang_demangle, /* Language specific symbol demangler */
NULL,
unk_lang_class_name, /* Language specific
class_name_from_physname */
unk_op_print_tab, /* expression operators for printing */
@ -937,6 +956,7 @@ const struct language_defn local_language_defn =
basic_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
basic_lookup_transparent_type,/* lookup_transparent_type */
unk_lang_demangle, /* Language specific symbol demangler */
NULL,
unk_lang_class_name, /* Language specific
class_name_from_physname */
unk_op_print_tab, /* expression operators for printing */

View file

@ -292,6 +292,22 @@ struct language_defn
/* Return demangled language symbol, or NULL. */
char *(*la_demangle) (const char *mangled, int options);
/* Demangle a symbol according to this language's rules. Unlike
la_demangle, this does not take any options.
*DEMANGLED will be set by this function.
If this function returns 0, then *DEMANGLED must always be set
to NULL.
If this function returns 1, the implementation may set this to
a xmalloc'd string holding the demangled form. However, it is
not required to. The string, if any, is owned by the caller.
The resulting string should be of the form that will be
installed into a symbol. */
int (*la_sniff_from_mangled_name) (const char *mangled, char **demangled);
/* Return class name of a mangled method name or NULL. */
char *(*la_class_name_from_physname) (const char *physname);
@ -565,6 +581,13 @@ extern CORE_ADDR skip_language_trampoline (struct frame_info *, CORE_ADDR pc);
extern char *language_demangle (const struct language_defn *current_language,
const char *mangled, int options);
/* A wrapper for la_sniff_from_mangled_name. The arguments and result
are as for the method. */
extern int language_sniff_from_mangled_name (const struct language_defn *lang,
const char *mangled,
char **demangled);
/* Return class name from physname, or NULL. */
extern char *language_class_name_from_physname (const struct language_defn *,
const char *physname);

View file

@ -381,6 +381,7 @@ const struct language_defn m2_language_defn =
basic_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
basic_lookup_transparent_type,/* lookup_transparent_type */
NULL, /* Language specific symbol demangler */
NULL,
NULL, /* Language specific
class_name_from_physname */
m2_op_print_tab, /* expression operators for printing */

View file

@ -281,6 +281,15 @@ objc_demangle (const char *mangled, int options)
return NULL; /* Not an objc mangled name. */
}
/* la_sniff_from_mangled_name for ObjC. */
static int
objc_sniff_from_mangled_name (const char *mangled, char **demangled)
{
*demangled = objc_demangle (mangled, 0);
return *demangled != NULL;
}
/* Determine if we are currently in the Objective-C dispatch function.
If so, get the address of the method function that the dispatcher
would call and use that as the function to step into instead. Also
@ -381,6 +390,7 @@ const struct language_defn objc_language_defn = {
basic_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
basic_lookup_transparent_type,/* lookup_transparent_type */
objc_demangle, /* Language specific symbol demangler */
objc_sniff_from_mangled_name,
NULL, /* Language specific
class_name_from_physname */
objc_op_print_tab, /* Expression operators for printing */

View file

@ -1070,6 +1070,7 @@ const struct language_defn opencl_language_defn =
basic_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
basic_lookup_transparent_type,/* lookup_transparent_type */
NULL, /* Language specific symbol demangler */
NULL,
NULL, /* Language specific
class_name_from_physname */
c_op_print_tab, /* expression operators for printing */

View file

@ -444,6 +444,7 @@ const struct language_defn pascal_language_defn =
basic_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
basic_lookup_transparent_type,/* lookup_transparent_type */
NULL, /* Language specific symbol demangler */
NULL,
NULL, /* Language specific class_name_from_physname */
pascal_op_print_tab, /* expression operators for printing */
1, /* c-style arrays */

View file

@ -25,6 +25,7 @@
#include "c-lang.h"
#include "charset.h"
#include "cp-support.h"
#include "demangle.h"
#include "gdbarch.h"
#include "infcall.h"
#include "objfiles.h"
@ -1988,6 +1989,17 @@ rust_lookup_symbol_nonlocal (const struct language_defn *langdef,
/* la_sniff_from_mangled_name for Rust. */
static int
rust_sniff_from_mangled_name (const char *mangled, char **demangled)
{
*demangled = gdb_demangle (mangled, DMGL_PARAMS | DMGL_ANSI);
return *demangled != NULL;
}
static const struct exp_descriptor exp_descriptor_rust =
{
rust_print_subexp,
@ -2030,6 +2042,7 @@ static const struct language_defn rust_language_defn =
rust_lookup_symbol_nonlocal, /* lookup_symbol_nonlocal */
basic_lookup_transparent_type,/* lookup_transparent_type */
gdb_demangle, /* Language specific symbol demangler */
rust_sniff_from_mangled_name,
NULL, /* Language specific
class_name_from_physname */
c_op_print_tab, /* expression operators for printing */

View file

@ -747,110 +747,31 @@ symbol_find_demangled_name (struct general_symbol_info *gsymbol,
const char *mangled)
{
char *demangled = NULL;
int i;
int recognized;
if (gsymbol->language == language_unknown)
gsymbol->language = language_auto;
if (gsymbol->language == language_objc
|| gsymbol->language == language_auto)
if (gsymbol->language != language_auto)
{
demangled =
objc_demangle (mangled, 0);
if (demangled != NULL)
const struct language_defn *lang = language_def (gsymbol->language);
language_sniff_from_mangled_name (lang, mangled, &demangled);
return demangled;
}
for (i = language_unknown; i < nr_languages; ++i)
{
enum language l = (enum language) i;
const struct language_defn *lang = language_def (l);
if (language_sniff_from_mangled_name (lang, mangled, &demangled))
{
gsymbol->language = language_objc;
gsymbol->language = l;
return demangled;
}
}
if (gsymbol->language == language_cplus
|| gsymbol->language == language_rust
|| gsymbol->language == language_auto)
{
demangled =
gdb_demangle (mangled, DMGL_PARAMS | DMGL_ANSI);
if (demangled != NULL)
{
gsymbol->language = language_cplus;
return demangled;
}
}
if (gsymbol->language == language_java)
{
demangled =
gdb_demangle (mangled,
DMGL_PARAMS | DMGL_ANSI | DMGL_JAVA);
if (demangled != NULL)
{
gsymbol->language = language_java;
return demangled;
}
}
if (gsymbol->language == language_d
|| gsymbol->language == language_auto)
{
demangled = d_demangle(mangled, 0);
if (demangled != NULL)
{
gsymbol->language = language_d;
return demangled;
}
}
/* FIXME(dje): Continually adding languages here is clumsy.
Better to just call la_demangle if !auto, and if auto then call
a utility routine that tries successive languages in turn and reports
which one it finds. I realize the la_demangle options may be different
for different languages but there's already a FIXME for that. */
if (gsymbol->language == language_go
|| gsymbol->language == language_auto)
{
demangled = go_demangle (mangled, 0);
if (demangled != NULL)
{
gsymbol->language = language_go;
return demangled;
}
}
/* We could support `gsymbol->language == language_fortran' here to provide
module namespaces also for inferiors with only minimal symbol table (ELF
symbols). Just the mangling standard is not standardized across compilers
and there is no DW_AT_producer available for inferiors with only the ELF
symbols to check the mangling kind. */
/* Check for Ada symbols last. See comment below explaining why. */
if (gsymbol->language == language_auto)
{
const char *demangled = ada_decode (mangled);
if (demangled != mangled && demangled != NULL && demangled[0] != '<')
{
/* Set the gsymbol language to Ada, but still return NULL.
Two reasons for that:
1. For Ada, we prefer computing the symbol's decoded name
on the fly rather than pre-compute it, in order to save
memory (Ada projects are typically very large).
2. There are some areas in the definition of the GNAT
encoding where, with a bit of bad luck, we might be able
to decode a non-Ada symbol, generating an incorrect
demangled name (Eg: names ending with "TB" for instance
are identified as task bodies and so stripped from
the decoded name returned).
Returning NULL, here, helps us get a little bit of
the best of both worlds. Because we're last, we should
not affect any of the other languages that were able to
demangle the symbol before us; we get to correctly tag
Ada symbols as such; and even if we incorrectly tagged
a non-Ada symbol, which should be rare, any routing
through the Ada language should be transparent (Ada
tries to behave much like C/C++ with non-Ada symbols). */
gsymbol->language = language_ada;
return NULL;
}
}
return NULL;
}