From 7bfd842d05042bfa697b1ded0c8f9bf3e58171fe Mon Sep 17 00:00:00 2001 From: Nick Clifton Date: Mon, 28 May 2012 14:20:19 +0000 Subject: [PATCH] * read.c (read_symbol_name): New function. Reads a symbol names. Allows escape codes in names. (s_comm_internal): Use read_symbol_name. (s_globl, s_lsym, s_set, s_weakref): Likewise. * doc/as.texinfo: Document support for multibyte characters in symbol names. * gas/elf/syms.s: New test - checks the generation of multibyte symbol names. * gas/elf/syms.d: New file - expected readelf output. * gas/elf/elf.exp: Add syms. * readelf.c (print_symbol): Display multibyte characters in symbol names. (process_section_headers): Use print_symbol. * ld-ifunc/ifunc-13a-i386.s: Fix use of .global directive. * ld-ifunc/ifunc-15a-i385.s: Likewise. --- binutils/ChangeLog | 6 + binutils/readelf.c | 125 +++++++------ gas/ChangeLog | 9 + gas/doc/as.texinfo | 13 +- gas/read.c | 238 ++++++++++++++----------- gas/testsuite/ChangeLog | 7 + gas/testsuite/gas/elf/elf.exp | 2 + gas/testsuite/gas/elf/syms.d | 18 ++ gas/testsuite/gas/elf/syms.s | 5 + ld/testsuite/ChangeLog | 5 + ld/testsuite/ld-ifunc/ifunc-13a-i386.s | 2 +- ld/testsuite/ld-ifunc/ifunc-15-i386.s | 2 +- 12 files changed, 260 insertions(+), 172 deletions(-) create mode 100644 gas/testsuite/gas/elf/syms.d create mode 100644 gas/testsuite/gas/elf/syms.s diff --git a/binutils/ChangeLog b/binutils/ChangeLog index c19e45c227..2d2bc08085 100644 --- a/binutils/ChangeLog +++ b/binutils/ChangeLog @@ -1,3 +1,9 @@ +2012-05-28 Nick Clifton + + * readelf.c (print_symbol): Display multibyte characters in symbol + names. + (process_section_headers): Use print_symbol. + 2012-05-18 Andreas Schwab * aclocal.m4: Regenerate. diff --git a/binutils/readelf.c b/binutils/readelf.c index 212f70e6c2..762a1a8700 100644 --- a/binutils/readelf.c +++ b/binutils/readelf.c @@ -48,6 +48,7 @@ #ifdef HAVE_ZLIB_H #include #endif +#include #if __GNUC__ >= 2 /* Define BFD64 here, even if our default architecture is 32 bit ELF @@ -383,93 +384,89 @@ print_vma (bfd_vma vma, print_mode mode) return 0; } -/* Display a symbol on stdout. Handles the display of non-printing characters. +/* Display a symbol on stdout. Handles the display of control characters and + multibye characters. - If DO_WIDE is not true then format the symbol to be at most WIDTH characters, - truncating as necessary. If WIDTH is negative then format the string to be - exactly - WIDTH characters, truncating or padding as necessary. + Display at most abs(WIDTH) characters, truncating as necessary, unless do_wide is true. + + If WIDTH is negative then ensure that the output is at least (- WIDTH) characters, + padding as necessary. Returns the number of emitted characters. */ static unsigned int print_symbol (int width, const char *symbol) { - const char *c; bfd_boolean extra_padding = FALSE; - unsigned int num_printed = 0; + int num_printed = 0; + mbstate_t state; + int width_remaining; - if (do_wide) - { - /* Set the width to a very large value. This simplifies the - code below. */ - width = INT_MAX; - } - else if (width < 0) + if (width < 0) { /* Keep the width positive. This also helps. */ width = - width; extra_padding = TRUE; - } + } - while (width) + if (do_wide) + /* Set the remaining width to a very large value. + This simplifies the code below. */ + width_remaining = INT_MAX; + else + width_remaining = width; + + /* Initialise the multibyte conversion state. */ + memset (& state, 0, sizeof (state)); + + while (width_remaining) { - int len; + size_t n; + wchar_t w; + const char c = *symbol++; - c = symbol; - - /* Look for non-printing symbols inside the symbol's name. - This test is triggered in particular by the names generated - by the assembler for local labels. */ - while (ISPRINT (*c)) - c++; - - len = c - symbol; - - if (len) - { - if (len > width) - len = width; - - printf ("%.*s", len, symbol); - - width -= len; - num_printed += len; - } - - if (*c == 0 || width == 0) + if (c == 0) break; - /* Now display the non-printing character, if - there is room left in which to dipslay it. */ - if ((unsigned char) *c < 32) + /* Do not print control characters directly as they can affect terminal + settings. Such characters usually appear in the names generated + by the assembler for local labels. */ + if (ISCNTRL (c)) { - if (width < 2) + if (width_remaining < 2) break; - printf ("^%c", *c + 0x40); - - width -= 2; + printf ("^%c", c + 0x40); + width_remaining -= 2; num_printed += 2; } + else if (ISPRINT (c)) + { + putchar (c); + width_remaining --; + num_printed ++; + } else { - if (width < 6) - break; + /* Let printf do the hard work of displaying multibyte characters. */ + printf ("%.1s", symbol - 1); + width_remaining --; + num_printed ++; - printf ("<0x%.2x>", (unsigned char) *c); - - width -= 6; - num_printed += 6; + /* Try to find out how many bytes made up the character that was + just printed. Advance the symbol pointer past the bytes that + were displayed. */ + n = mbrtowc (& w, symbol - 1, MB_CUR_MAX, & state); + if (n != (size_t) -1 && n != (size_t) -2 && n > 0) + symbol += (n - 1); } - - symbol = c + 1; } - if (extra_padding && width > 0) + if (extra_padding && num_printed < width) { /* Fill in the remaining spaces. */ - printf ("%-*s", width, " "); - num_printed += 2; + printf ("%-*s", width - num_printed, " "); + num_printed = width; } return num_printed; @@ -4737,21 +4734,21 @@ process_section_headers (FILE * file) i < elf_header.e_shnum; i++, section++) { + printf (" [%2u] ", i); if (do_section_details) { - printf (" [%2u] %s\n", - i, - SECTION_NAME (section)); + print_symbol (INT_MAX, SECTION_NAME (section)); + putchar ('\n'); if (is_32bit_elf || do_wide) printf (" %-15.15s ", get_section_type_name (section->sh_type)); } else - printf ((do_wide ? " [%2u] %-17s %-15s " - : " [%2u] %-17.17s %-15.15s "), - i, - SECTION_NAME (section), - get_section_type_name (section->sh_type)); + { + print_symbol (-17, SECTION_NAME (section)); + printf (" %-15.15s ", + get_section_type_name (section->sh_type)); + } if (is_32bit_elf) { diff --git a/gas/ChangeLog b/gas/ChangeLog index caef2baa55..a32f87c2ff 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,12 @@ +2012-05-28 Nick Clifton + + * read.c (read_symbol_name): New function. Reads a symbol names. + Allows escape codes in names. + (s_comm_internal): Use read_symbol_name. + (s_globl, s_lsym, s_set, s_weakref): Likewise. + * doc/as.texinfo: Document support for multibyte characters in + symbol names. + 2012-05-21 Mike Frysinger * config/tc-mips.c (mips_after_parse_args): Assert that arch_info diff --git a/gas/doc/as.texinfo b/gas/doc/as.texinfo index 694c8067d9..72b5d0522f 100644 --- a/gas/doc/as.texinfo +++ b/gas/doc/as.texinfo @@ -2485,10 +2485,10 @@ On most machines, you can also use @code{$} in symbol names; exceptions are noted in @ref{Machine Dependencies}. @end ifset No symbol may begin with a digit. Case is significant. -There is no length limit: all characters are significant. Symbols are -delimited by characters not in that set, or by the beginning of a file -(since the source program must end with a newline, the end of a file is -not a possible symbol delimiter). @xref{Symbols}. +There is no length limit: all characters are significant. Multibyte characters +are supported. Symbols are delimited by characters not in that set, or by the +beginning of a file (since the source program must end with a newline, the end +of a file is not a possible symbol delimiter). @xref{Symbols}. @cindex length of symbols @node Statements @@ -3414,6 +3414,11 @@ on the H8/300), and underscores. Case of letters is significant: @code{foo} is a different symbol name than @code{Foo}. +Multibyte characters are supported. To generate a symbol name containing +multibyte characters enclose it within double quotes and use escape codes. cf +@xref{Strings}. Generating a multibyte symbol name from a label is not +currently supported. + Each symbol has exactly one name. Each name in an assembly language program refers to exactly one symbol. You may use that symbol name any number of times in a program. diff --git a/gas/read.c b/gas/read.c index 4ff33132f8..cf7f7529f6 100644 --- a/gas/read.c +++ b/gas/read.c @@ -40,6 +40,7 @@ #include "obstack.h" #include "ecoff.h" #include "dw2gencfi.h" +#include "wchar.h" #ifndef TC_START_LABEL #define TC_START_LABEL(x,y,z) (x == ':') @@ -1583,13 +1584,106 @@ s_altmacro (int on) macro_set_alternate (on); } +/* Read a symbol name from input_line_pointer. + + Stores the symbol name in a buffer and returns a pointer to this buffer. + The buffer is xalloc'ed. It is the caller's responsibility to free + this buffer. + + The name is not left in the i_l_p buffer as it may need processing + to handle escape characters. + + Advances i_l_p to the next non-whitespace character. + + If a symbol name could not be read, the routine issues an error + messages, skips to the end of the line and returns NULL. */ + +static char * +read_symbol_name (void) +{ + char * name; + char * start; + char c; + + c = *input_line_pointer++; + + if (c == '"') + { +#define SYM_NAME_CHUNK_LEN 128 + ptrdiff_t len = SYM_NAME_CHUNK_LEN; + char * name_end; + unsigned int C; + + start = name = xmalloc (len + 1); + + name_end = name + SYM_NAME_CHUNK_LEN; + + while (is_a_char (C = next_char_of_string ())) + { + if (name >= name_end) + { + ptrdiff_t sofar; + + sofar = name - start; + len += SYM_NAME_CHUNK_LEN; + start = xrealloc (start, len + 1); + name_end = start + len; + name = start + sofar; + } + + *name++ = (char) C; + } + *name = 0; + + /* Since quoted symbol names can contain non-ASCII characters, + check the string and warn if it cannot be recognised by the + current character set. */ + if (mbstowcs (NULL, name, len) == (size_t) -1) + as_warn (_("symbol name not recognised in the current locale")); + } + else if (is_name_beginner (c) || c == '\001') + { + ptrdiff_t len; + + name = input_line_pointer - 1; + + /* We accept \001 in a name in case this is + being called with a constructed string. */ + while (is_part_of_name (c = *input_line_pointer++) + || c == '\001') + ; + + len = (input_line_pointer - name) - 1; + start = xmalloc (len + 1); + + memcpy (start, name, len); + start[len] = 0; + + /* Skip a name ender char if one is present. */ + if (! is_name_ender (c)) + --input_line_pointer; + } + else + name = start = NULL; + + if (name == start) + { + as_bad (_("expected symbol name")); + ignore_rest_of_line (); + return NULL; + } + + SKIP_WHITESPACE (); + + return start; +} + + symbolS * s_comm_internal (int param, symbolS *(*comm_parse_extra) (int, symbolS *, addressT)) { char *name; - char c; - char *p; offsetT temp, size; symbolS *symbolP = NULL; char *stop = NULL; @@ -1599,20 +1693,8 @@ s_comm_internal (int param, if (flag_mri) stop = mri_comment_field (&stopc); - name = input_line_pointer; - c = get_symbol_end (); - /* Just after name is now '\0'. */ - p = input_line_pointer; - *p = c; - - if (name == p) - { - as_bad (_("expected symbol name")); - ignore_rest_of_line (); - goto out; - } - - SKIP_WHITESPACE (); + if ((name = read_symbol_name ()) == NULL) + goto out; /* Accept an optional comma after the name. The comma used to be required, but Irix 5 cc does not generate it for .lcomm. */ @@ -1635,7 +1717,6 @@ s_comm_internal (int param, goto out; } - *p = 0; symbolP = symbol_find_or_make (name); if ((S_IS_DEFINED (symbolP) || symbol_equated_p (symbolP)) && !S_IS_COMMON (symbolP)) @@ -1644,7 +1725,6 @@ s_comm_internal (int param, { symbolP = NULL; as_bad (_("symbol `%s' is already defined"), name); - *p = c; ignore_rest_of_line (); goto out; } @@ -1662,7 +1742,6 @@ s_comm_internal (int param, as_warn (_("size of \"%s\" is already %ld; not changing to %ld"), name, (long) size, (long) temp); - *p = c; if (comm_parse_extra != NULL) symbolP = (*comm_parse_extra) (param, symbolP, size); else @@ -1676,6 +1755,8 @@ s_comm_internal (int param, out: if (flag_mri) mri_comment_end (stop, stopc); + if (name != NULL) + free (name); return symbolP; } @@ -2179,12 +2260,12 @@ s_globl (int ignore ATTRIBUTE_UNUSED) do { - name = input_line_pointer; - c = get_symbol_end (); + if ((name = read_symbol_name ()) == NULL) + return; + symbolP = symbol_find_or_make (name); S_SET_EXTERNAL (symbolP); - *input_line_pointer = c; SKIP_WHITESPACE (); c = *input_line_pointer; if (c == ',') @@ -2194,6 +2275,8 @@ s_globl (int ignore ATTRIBUTE_UNUSED) if (is_end_of_line[(unsigned char) *input_line_pointer]) c = '\n'; } + + free (name); } while (c == ','); @@ -2580,33 +2663,17 @@ void s_lsym (int ignore ATTRIBUTE_UNUSED) { char *name; - char c; - char *p; expressionS exp; symbolS *symbolP; /* We permit ANY defined expression: BSD4.2 demands constants. */ - name = input_line_pointer; - c = get_symbol_end (); - p = input_line_pointer; - *p = c; - - if (name == p) - { - as_bad (_("expected symbol name")); - ignore_rest_of_line (); - return; - } - - SKIP_WHITESPACE (); + if ((name = read_symbol_name ()) == NULL) + return; if (*input_line_pointer != ',') { - *p = 0; as_bad (_("expected comma after \"%s\""), name); - *p = c; - ignore_rest_of_line (); - return; + goto err_out; } input_line_pointer++; @@ -2616,11 +2683,9 @@ s_lsym (int ignore ATTRIBUTE_UNUSED) && exp.X_op != O_register) { as_bad (_("bad expression")); - ignore_rest_of_line (); - return; + goto err_out; } - *p = 0; symbolP = symbol_find_or_make (name); if (S_GET_SEGMENT (symbolP) == undefined_section) @@ -2638,8 +2703,14 @@ s_lsym (int ignore ATTRIBUTE_UNUSED) as_bad (_("symbol `%s' is already defined"), name); } - *p = c; demand_empty_rest_of_line (); + free (name); + return; + + err_out: + ignore_rest_of_line (); + free (name); + return; } /* Read a line into an sb. Returns the character that ended the line @@ -3283,42 +3354,25 @@ void s_set (int equiv) { char *name; - char delim; - char *end_name; /* Especial apologies for the random logic: this just grew, and could be parsed much more simply! Dean in haste. */ - name = input_line_pointer; - delim = get_symbol_end (); - end_name = input_line_pointer; - *end_name = delim; - - if (name == end_name) - { - as_bad (_("expected symbol name")); - ignore_rest_of_line (); - return; - } - - SKIP_WHITESPACE (); + if ((name = read_symbol_name ()) == NULL) + return; if (*input_line_pointer != ',') { - *end_name = 0; as_bad (_("expected comma after \"%s\""), name); - *end_name = delim; ignore_rest_of_line (); + free (name); return; } input_line_pointer++; - *end_name = 0; - assign_symbol (name, equiv); - *end_name = delim; - demand_empty_rest_of_line (); + free (name); } void @@ -3622,23 +3676,12 @@ void s_weakref (int ignore ATTRIBUTE_UNUSED) { char *name; - char delim; - char *end_name; symbolS *symbolP; symbolS *symbolP2; expressionS exp; - name = input_line_pointer; - delim = get_symbol_end (); - end_name = input_line_pointer; - - if (name == end_name) - { - as_bad (_("expected symbol name")); - *end_name = delim; - ignore_rest_of_line (); - return; - } + if ((name = read_symbol_name ()) == NULL) + return; symbolP = symbol_find_or_make (name); @@ -3647,41 +3690,27 @@ s_weakref (int ignore ATTRIBUTE_UNUSED) if (!S_IS_VOLATILE (symbolP)) { as_bad (_("symbol `%s' is already defined"), name); - *end_name = delim; - ignore_rest_of_line (); - return; + goto err_out; } symbolP = symbol_clone (symbolP, 1); S_CLEAR_VOLATILE (symbolP); } - *end_name = delim; - SKIP_WHITESPACE (); if (*input_line_pointer != ',') { - *end_name = 0; as_bad (_("expected comma after \"%s\""), name); - *end_name = delim; - ignore_rest_of_line (); - return; + goto err_out; } input_line_pointer++; SKIP_WHITESPACE (); + free (name); - name = input_line_pointer; - delim = get_symbol_end (); - end_name = input_line_pointer; - - if (name == end_name) - { - as_bad (_("expected symbol name")); - ignore_rest_of_line (); - return; - } + if ((name = read_symbol_name ()) == NULL) + return; if ((symbolP2 = symbol_find_noref (name, 1)) == NULL && (symbolP2 = md_undefined_symbol (name)) == NULL) @@ -3712,6 +3741,7 @@ s_weakref (int ignore ATTRIBUTE_UNUSED) while (symp != symbolP) { char *old_loop = loop; + symp = symbol_get_value_expression (symp)->X_add_symbol; loop = concat (loop, " => ", S_GET_NAME (symp), (const char *) NULL); @@ -3722,8 +3752,7 @@ s_weakref (int ignore ATTRIBUTE_UNUSED) S_GET_NAME (symbolP), loop); free (loop); - - *end_name = delim; + free (name); ignore_rest_of_line (); return; } @@ -3734,8 +3763,6 @@ s_weakref (int ignore ATTRIBUTE_UNUSED) /* symbolP2 = symp; */ } - *end_name = delim; - memset (&exp, 0, sizeof (exp)); exp.X_op = O_symbol; exp.X_add_symbol = symbolP2; @@ -3746,6 +3773,13 @@ s_weakref (int ignore ATTRIBUTE_UNUSED) S_SET_WEAKREFR (symbolP); demand_empty_rest_of_line (); + free (name); + return; + + err_out: + ignore_rest_of_line (); + free (name); + return; } diff --git a/gas/testsuite/ChangeLog b/gas/testsuite/ChangeLog index 13a6344552..e19086abc5 100644 --- a/gas/testsuite/ChangeLog +++ b/gas/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2012-05-28 Nick Clifton + + * gas/elf/syms.s: New test - checks the generation of multibyte + symbol names. + * gas/elf/syms.d: New file - expected readelf output. + * gas/elf/elf.exp: Add syms. + 2012-05-25 Alan Modra * gas/lns/lns-big-delta.s: Add nops. diff --git a/gas/testsuite/gas/elf/elf.exp b/gas/testsuite/gas/elf/elf.exp index 736eec9836..b437730ca9 100644 --- a/gas/testsuite/gas/elf/elf.exp +++ b/gas/testsuite/gas/elf/elf.exp @@ -184,6 +184,8 @@ if { [is_elf_format] } then { run_dump_test "bad-size" run_dump_test "bad-group" + run_dump_test "syms" + load_lib gas-dg.exp dg-init dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/err-*.s $srcdir/$subdir/warn-*.s]] "" "" diff --git a/gas/testsuite/gas/elf/syms.d b/gas/testsuite/gas/elf/syms.d new file mode 100644 index 0000000000..40f7706d56 --- /dev/null +++ b/gas/testsuite/gas/elf/syms.d @@ -0,0 +1,18 @@ +#readelf: -S -s -p .strtab +#name: Multibyte symbol names +# The following targets use an unusual .set syntax... +#not-target: alpha*-*-* h8300-*-* + +#... +Section Headers: +#... + \[ .\] sec.*tion.* +#... +Symbol table.* +#... + ..: .*sy.*mbol +#... +String dump.* +#... + \[......\] sy.*mbol +#pass diff --git a/gas/testsuite/gas/elf/syms.s b/gas/testsuite/gas/elf/syms.s new file mode 100644 index 0000000000..977c6bb7c9 --- /dev/null +++ b/gas/testsuite/gas/elf/syms.s @@ -0,0 +1,5 @@ + .section "sec\xa5\xc2tion" + + .set "sy\xa5\xc2mbol", . + + .string8 "str\xa5\xc2ing" diff --git a/ld/testsuite/ChangeLog b/ld/testsuite/ChangeLog index 086123bb73..751a3873b1 100644 --- a/ld/testsuite/ChangeLog +++ b/ld/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2012-05-28 Nick Clifton + + * ld-ifunc/ifunc-13a-i386.s: Fix use of .global directive. + * ld-ifunc/ifunc-15a-i385.s: Likewise. + 2012-05-28 Alan Modra PR ld/14170 diff --git a/ld/testsuite/ld-ifunc/ifunc-13a-i386.s b/ld/testsuite/ld-ifunc/ifunc-13a-i386.s index eb893af3d3..5bda920b7f 100644 --- a/ld/testsuite/ld-ifunc/ifunc-13a-i386.s +++ b/ld/testsuite/ld-ifunc/ifunc-13a-i386.s @@ -1,6 +1,6 @@ .text .type foo, @function - .global + .global foo foo: movl xxx@GOT(%ebx), %eax ret diff --git a/ld/testsuite/ld-ifunc/ifunc-15-i386.s b/ld/testsuite/ld-ifunc/ifunc-15-i386.s index 5ee4fab859..ea541e2978 100644 --- a/ld/testsuite/ld-ifunc/ifunc-15-i386.s +++ b/ld/testsuite/ld-ifunc/ifunc-15-i386.s @@ -1,6 +1,6 @@ .text .type foo, @function - .global + .global foo foo: movl ifunc@GOT(%ebx), %eax ret