* read.c (read_symbol_name): New function. Reads a symbol names.

Allows escape codes in names.
	(s_comm_internal): Use read_symbol_name.
	(s_globl, s_lsym, s_set, s_weakref): Likewise.
	* doc/as.texinfo: Document support for multibyte characters in
	symbol names.

	* gas/elf/syms.s: New test - checks the generation of multibyte
	symbol names.
	* gas/elf/syms.d: New file - expected readelf output.
	* gas/elf/elf.exp: Add syms.

	* readelf.c (print_symbol): Display multibyte characters in symbol
	names.
	(process_section_headers): Use print_symbol.

	* ld-ifunc/ifunc-13a-i386.s: Fix use of .global directive.
	* ld-ifunc/ifunc-15a-i385.s: Likewise.
This commit is contained in:
Nick Clifton 2012-05-28 14:20:19 +00:00
parent e54e67a9e9
commit 7bfd842d05
12 changed files with 260 additions and 172 deletions

View file

@ -1,3 +1,9 @@
2012-05-28 Nick Clifton <nickc@redhat.com>
* readelf.c (print_symbol): Display multibyte characters in symbol
names.
(process_section_headers): Use print_symbol.
2012-05-18 Andreas Schwab <schwab@linux-m68k.org> 2012-05-18 Andreas Schwab <schwab@linux-m68k.org>
* aclocal.m4: Regenerate. * aclocal.m4: Regenerate.

View file

@ -48,6 +48,7 @@
#ifdef HAVE_ZLIB_H #ifdef HAVE_ZLIB_H
#include <zlib.h> #include <zlib.h>
#endif #endif
#include <wchar.h>
#if __GNUC__ >= 2 #if __GNUC__ >= 2
/* Define BFD64 here, even if our default architecture is 32 bit ELF /* Define BFD64 here, even if our default architecture is 32 bit ELF
@ -383,93 +384,89 @@ print_vma (bfd_vma vma, print_mode mode)
return 0; return 0;
} }
/* Display a symbol on stdout. Handles the display of non-printing characters. /* Display a symbol on stdout. Handles the display of control characters and
multibye characters.
If DO_WIDE is not true then format the symbol to be at most WIDTH characters, Display at most abs(WIDTH) characters, truncating as necessary, unless do_wide is true.
truncating as necessary. If WIDTH is negative then format the string to be
exactly - WIDTH characters, truncating or padding as necessary. If WIDTH is negative then ensure that the output is at least (- WIDTH) characters,
padding as necessary.
Returns the number of emitted characters. */ Returns the number of emitted characters. */
static unsigned int static unsigned int
print_symbol (int width, const char *symbol) print_symbol (int width, const char *symbol)
{ {
const char *c;
bfd_boolean extra_padding = FALSE; bfd_boolean extra_padding = FALSE;
unsigned int num_printed = 0; int num_printed = 0;
mbstate_t state;
int width_remaining;
if (do_wide) if (width < 0)
{
/* Set the width to a very large value. This simplifies the
code below. */
width = INT_MAX;
}
else if (width < 0)
{ {
/* Keep the width positive. This also helps. */ /* Keep the width positive. This also helps. */
width = - width; width = - width;
extra_padding = TRUE; extra_padding = TRUE;
} }
while (width) if (do_wide)
/* Set the remaining width to a very large value.
This simplifies the code below. */
width_remaining = INT_MAX;
else
width_remaining = width;
/* Initialise the multibyte conversion state. */
memset (& state, 0, sizeof (state));
while (width_remaining)
{ {
int len; size_t n;
wchar_t w;
const char c = *symbol++;
c = symbol; if (c == 0)
/* Look for non-printing symbols inside the symbol's name.
This test is triggered in particular by the names generated
by the assembler for local labels. */
while (ISPRINT (*c))
c++;
len = c - symbol;
if (len)
{
if (len > width)
len = width;
printf ("%.*s", len, symbol);
width -= len;
num_printed += len;
}
if (*c == 0 || width == 0)
break; break;
/* Now display the non-printing character, if /* Do not print control characters directly as they can affect terminal
there is room left in which to dipslay it. */ settings. Such characters usually appear in the names generated
if ((unsigned char) *c < 32) by the assembler for local labels. */
if (ISCNTRL (c))
{ {
if (width < 2) if (width_remaining < 2)
break; break;
printf ("^%c", *c + 0x40); printf ("^%c", c + 0x40);
width_remaining -= 2;
width -= 2;
num_printed += 2; num_printed += 2;
} }
else if (ISPRINT (c))
{
putchar (c);
width_remaining --;
num_printed ++;
}
else else
{ {
if (width < 6) /* Let printf do the hard work of displaying multibyte characters. */
break; printf ("%.1s", symbol - 1);
width_remaining --;
num_printed ++;
printf ("<0x%.2x>", (unsigned char) *c); /* Try to find out how many bytes made up the character that was
just printed. Advance the symbol pointer past the bytes that
width -= 6; were displayed. */
num_printed += 6; n = mbrtowc (& w, symbol - 1, MB_CUR_MAX, & state);
if (n != (size_t) -1 && n != (size_t) -2 && n > 0)
symbol += (n - 1);
} }
symbol = c + 1;
} }
if (extra_padding && width > 0) if (extra_padding && num_printed < width)
{ {
/* Fill in the remaining spaces. */ /* Fill in the remaining spaces. */
printf ("%-*s", width, " "); printf ("%-*s", width - num_printed, " ");
num_printed += 2; num_printed = width;
} }
return num_printed; return num_printed;
@ -4737,21 +4734,21 @@ process_section_headers (FILE * file)
i < elf_header.e_shnum; i < elf_header.e_shnum;
i++, section++) i++, section++)
{ {
printf (" [%2u] ", i);
if (do_section_details) if (do_section_details)
{ {
printf (" [%2u] %s\n", print_symbol (INT_MAX, SECTION_NAME (section));
i, putchar ('\n');
SECTION_NAME (section));
if (is_32bit_elf || do_wide) if (is_32bit_elf || do_wide)
printf (" %-15.15s ", printf (" %-15.15s ",
get_section_type_name (section->sh_type)); get_section_type_name (section->sh_type));
} }
else else
printf ((do_wide ? " [%2u] %-17s %-15s " {
: " [%2u] %-17.17s %-15.15s "), print_symbol (-17, SECTION_NAME (section));
i, printf (" %-15.15s ",
SECTION_NAME (section), get_section_type_name (section->sh_type));
get_section_type_name (section->sh_type)); }
if (is_32bit_elf) if (is_32bit_elf)
{ {

View file

@ -1,3 +1,12 @@
2012-05-28 Nick Clifton <nickc@redhat.com>
* read.c (read_symbol_name): New function. Reads a symbol names.
Allows escape codes in names.
(s_comm_internal): Use read_symbol_name.
(s_globl, s_lsym, s_set, s_weakref): Likewise.
* doc/as.texinfo: Document support for multibyte characters in
symbol names.
2012-05-21 Mike Frysinger <vapier@gentoo.org> 2012-05-21 Mike Frysinger <vapier@gentoo.org>
* config/tc-mips.c (mips_after_parse_args): Assert that arch_info * config/tc-mips.c (mips_after_parse_args): Assert that arch_info

View file

@ -2485,10 +2485,10 @@ On most machines, you can also use @code{$} in symbol names; exceptions
are noted in @ref{Machine Dependencies}. are noted in @ref{Machine Dependencies}.
@end ifset @end ifset
No symbol may begin with a digit. Case is significant. No symbol may begin with a digit. Case is significant.
There is no length limit: all characters are significant. Symbols are There is no length limit: all characters are significant. Multibyte characters
delimited by characters not in that set, or by the beginning of a file are supported. Symbols are delimited by characters not in that set, or by the
(since the source program must end with a newline, the end of a file is beginning of a file (since the source program must end with a newline, the end
not a possible symbol delimiter). @xref{Symbols}. of a file is not a possible symbol delimiter). @xref{Symbols}.
@cindex length of symbols @cindex length of symbols
@node Statements @node Statements
@ -3414,6 +3414,11 @@ on the H8/300), and underscores.
Case of letters is significant: @code{foo} is a different symbol name Case of letters is significant: @code{foo} is a different symbol name
than @code{Foo}. than @code{Foo}.
Multibyte characters are supported. To generate a symbol name containing
multibyte characters enclose it within double quotes and use escape codes. cf
@xref{Strings}. Generating a multibyte symbol name from a label is not
currently supported.
Each symbol has exactly one name. Each name in an assembly language program Each symbol has exactly one name. Each name in an assembly language program
refers to exactly one symbol. You may use that symbol name any number of times refers to exactly one symbol. You may use that symbol name any number of times
in a program. in a program.

View file

@ -40,6 +40,7 @@
#include "obstack.h" #include "obstack.h"
#include "ecoff.h" #include "ecoff.h"
#include "dw2gencfi.h" #include "dw2gencfi.h"
#include "wchar.h"
#ifndef TC_START_LABEL #ifndef TC_START_LABEL
#define TC_START_LABEL(x,y,z) (x == ':') #define TC_START_LABEL(x,y,z) (x == ':')
@ -1583,13 +1584,106 @@ s_altmacro (int on)
macro_set_alternate (on); macro_set_alternate (on);
} }
/* Read a symbol name from input_line_pointer.
Stores the symbol name in a buffer and returns a pointer to this buffer.
The buffer is xalloc'ed. It is the caller's responsibility to free
this buffer.
The name is not left in the i_l_p buffer as it may need processing
to handle escape characters.
Advances i_l_p to the next non-whitespace character.
If a symbol name could not be read, the routine issues an error
messages, skips to the end of the line and returns NULL. */
static char *
read_symbol_name (void)
{
char * name;
char * start;
char c;
c = *input_line_pointer++;
if (c == '"')
{
#define SYM_NAME_CHUNK_LEN 128
ptrdiff_t len = SYM_NAME_CHUNK_LEN;
char * name_end;
unsigned int C;
start = name = xmalloc (len + 1);
name_end = name + SYM_NAME_CHUNK_LEN;
while (is_a_char (C = next_char_of_string ()))
{
if (name >= name_end)
{
ptrdiff_t sofar;
sofar = name - start;
len += SYM_NAME_CHUNK_LEN;
start = xrealloc (start, len + 1);
name_end = start + len;
name = start + sofar;
}
*name++ = (char) C;
}
*name = 0;
/* Since quoted symbol names can contain non-ASCII characters,
check the string and warn if it cannot be recognised by the
current character set. */
if (mbstowcs (NULL, name, len) == (size_t) -1)
as_warn (_("symbol name not recognised in the current locale"));
}
else if (is_name_beginner (c) || c == '\001')
{
ptrdiff_t len;
name = input_line_pointer - 1;
/* We accept \001 in a name in case this is
being called with a constructed string. */
while (is_part_of_name (c = *input_line_pointer++)
|| c == '\001')
;
len = (input_line_pointer - name) - 1;
start = xmalloc (len + 1);
memcpy (start, name, len);
start[len] = 0;
/* Skip a name ender char if one is present. */
if (! is_name_ender (c))
--input_line_pointer;
}
else
name = start = NULL;
if (name == start)
{
as_bad (_("expected symbol name"));
ignore_rest_of_line ();
return NULL;
}
SKIP_WHITESPACE ();
return start;
}
symbolS * symbolS *
s_comm_internal (int param, s_comm_internal (int param,
symbolS *(*comm_parse_extra) (int, symbolS *, addressT)) symbolS *(*comm_parse_extra) (int, symbolS *, addressT))
{ {
char *name; char *name;
char c;
char *p;
offsetT temp, size; offsetT temp, size;
symbolS *symbolP = NULL; symbolS *symbolP = NULL;
char *stop = NULL; char *stop = NULL;
@ -1599,20 +1693,8 @@ s_comm_internal (int param,
if (flag_mri) if (flag_mri)
stop = mri_comment_field (&stopc); stop = mri_comment_field (&stopc);
name = input_line_pointer; if ((name = read_symbol_name ()) == NULL)
c = get_symbol_end (); goto out;
/* Just after name is now '\0'. */
p = input_line_pointer;
*p = c;
if (name == p)
{
as_bad (_("expected symbol name"));
ignore_rest_of_line ();
goto out;
}
SKIP_WHITESPACE ();
/* Accept an optional comma after the name. The comma used to be /* Accept an optional comma after the name. The comma used to be
required, but Irix 5 cc does not generate it for .lcomm. */ required, but Irix 5 cc does not generate it for .lcomm. */
@ -1635,7 +1717,6 @@ s_comm_internal (int param,
goto out; goto out;
} }
*p = 0;
symbolP = symbol_find_or_make (name); symbolP = symbol_find_or_make (name);
if ((S_IS_DEFINED (symbolP) || symbol_equated_p (symbolP)) if ((S_IS_DEFINED (symbolP) || symbol_equated_p (symbolP))
&& !S_IS_COMMON (symbolP)) && !S_IS_COMMON (symbolP))
@ -1644,7 +1725,6 @@ s_comm_internal (int param,
{ {
symbolP = NULL; symbolP = NULL;
as_bad (_("symbol `%s' is already defined"), name); as_bad (_("symbol `%s' is already defined"), name);
*p = c;
ignore_rest_of_line (); ignore_rest_of_line ();
goto out; goto out;
} }
@ -1662,7 +1742,6 @@ s_comm_internal (int param,
as_warn (_("size of \"%s\" is already %ld; not changing to %ld"), as_warn (_("size of \"%s\" is already %ld; not changing to %ld"),
name, (long) size, (long) temp); name, (long) size, (long) temp);
*p = c;
if (comm_parse_extra != NULL) if (comm_parse_extra != NULL)
symbolP = (*comm_parse_extra) (param, symbolP, size); symbolP = (*comm_parse_extra) (param, symbolP, size);
else else
@ -1676,6 +1755,8 @@ s_comm_internal (int param,
out: out:
if (flag_mri) if (flag_mri)
mri_comment_end (stop, stopc); mri_comment_end (stop, stopc);
if (name != NULL)
free (name);
return symbolP; return symbolP;
} }
@ -2179,12 +2260,12 @@ s_globl (int ignore ATTRIBUTE_UNUSED)
do do
{ {
name = input_line_pointer; if ((name = read_symbol_name ()) == NULL)
c = get_symbol_end (); return;
symbolP = symbol_find_or_make (name); symbolP = symbol_find_or_make (name);
S_SET_EXTERNAL (symbolP); S_SET_EXTERNAL (symbolP);
*input_line_pointer = c;
SKIP_WHITESPACE (); SKIP_WHITESPACE ();
c = *input_line_pointer; c = *input_line_pointer;
if (c == ',') if (c == ',')
@ -2194,6 +2275,8 @@ s_globl (int ignore ATTRIBUTE_UNUSED)
if (is_end_of_line[(unsigned char) *input_line_pointer]) if (is_end_of_line[(unsigned char) *input_line_pointer])
c = '\n'; c = '\n';
} }
free (name);
} }
while (c == ','); while (c == ',');
@ -2580,33 +2663,17 @@ void
s_lsym (int ignore ATTRIBUTE_UNUSED) s_lsym (int ignore ATTRIBUTE_UNUSED)
{ {
char *name; char *name;
char c;
char *p;
expressionS exp; expressionS exp;
symbolS *symbolP; symbolS *symbolP;
/* We permit ANY defined expression: BSD4.2 demands constants. */ /* We permit ANY defined expression: BSD4.2 demands constants. */
name = input_line_pointer; if ((name = read_symbol_name ()) == NULL)
c = get_symbol_end (); return;
p = input_line_pointer;
*p = c;
if (name == p)
{
as_bad (_("expected symbol name"));
ignore_rest_of_line ();
return;
}
SKIP_WHITESPACE ();
if (*input_line_pointer != ',') if (*input_line_pointer != ',')
{ {
*p = 0;
as_bad (_("expected comma after \"%s\""), name); as_bad (_("expected comma after \"%s\""), name);
*p = c; goto err_out;
ignore_rest_of_line ();
return;
} }
input_line_pointer++; input_line_pointer++;
@ -2616,11 +2683,9 @@ s_lsym (int ignore ATTRIBUTE_UNUSED)
&& exp.X_op != O_register) && exp.X_op != O_register)
{ {
as_bad (_("bad expression")); as_bad (_("bad expression"));
ignore_rest_of_line (); goto err_out;
return;
} }
*p = 0;
symbolP = symbol_find_or_make (name); symbolP = symbol_find_or_make (name);
if (S_GET_SEGMENT (symbolP) == undefined_section) if (S_GET_SEGMENT (symbolP) == undefined_section)
@ -2638,8 +2703,14 @@ s_lsym (int ignore ATTRIBUTE_UNUSED)
as_bad (_("symbol `%s' is already defined"), name); as_bad (_("symbol `%s' is already defined"), name);
} }
*p = c;
demand_empty_rest_of_line (); demand_empty_rest_of_line ();
free (name);
return;
err_out:
ignore_rest_of_line ();
free (name);
return;
} }
/* Read a line into an sb. Returns the character that ended the line /* Read a line into an sb. Returns the character that ended the line
@ -3283,42 +3354,25 @@ void
s_set (int equiv) s_set (int equiv)
{ {
char *name; char *name;
char delim;
char *end_name;
/* Especial apologies for the random logic: /* Especial apologies for the random logic:
this just grew, and could be parsed much more simply! this just grew, and could be parsed much more simply!
Dean in haste. */ Dean in haste. */
name = input_line_pointer; if ((name = read_symbol_name ()) == NULL)
delim = get_symbol_end (); return;
end_name = input_line_pointer;
*end_name = delim;
if (name == end_name)
{
as_bad (_("expected symbol name"));
ignore_rest_of_line ();
return;
}
SKIP_WHITESPACE ();
if (*input_line_pointer != ',') if (*input_line_pointer != ',')
{ {
*end_name = 0;
as_bad (_("expected comma after \"%s\""), name); as_bad (_("expected comma after \"%s\""), name);
*end_name = delim;
ignore_rest_of_line (); ignore_rest_of_line ();
free (name);
return; return;
} }
input_line_pointer++; input_line_pointer++;
*end_name = 0;
assign_symbol (name, equiv); assign_symbol (name, equiv);
*end_name = delim;
demand_empty_rest_of_line (); demand_empty_rest_of_line ();
free (name);
} }
void void
@ -3622,23 +3676,12 @@ void
s_weakref (int ignore ATTRIBUTE_UNUSED) s_weakref (int ignore ATTRIBUTE_UNUSED)
{ {
char *name; char *name;
char delim;
char *end_name;
symbolS *symbolP; symbolS *symbolP;
symbolS *symbolP2; symbolS *symbolP2;
expressionS exp; expressionS exp;
name = input_line_pointer; if ((name = read_symbol_name ()) == NULL)
delim = get_symbol_end (); return;
end_name = input_line_pointer;
if (name == end_name)
{
as_bad (_("expected symbol name"));
*end_name = delim;
ignore_rest_of_line ();
return;
}
symbolP = symbol_find_or_make (name); symbolP = symbol_find_or_make (name);
@ -3647,41 +3690,27 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
if (!S_IS_VOLATILE (symbolP)) if (!S_IS_VOLATILE (symbolP))
{ {
as_bad (_("symbol `%s' is already defined"), name); as_bad (_("symbol `%s' is already defined"), name);
*end_name = delim; goto err_out;
ignore_rest_of_line ();
return;
} }
symbolP = symbol_clone (symbolP, 1); symbolP = symbol_clone (symbolP, 1);
S_CLEAR_VOLATILE (symbolP); S_CLEAR_VOLATILE (symbolP);
} }
*end_name = delim;
SKIP_WHITESPACE (); SKIP_WHITESPACE ();
if (*input_line_pointer != ',') if (*input_line_pointer != ',')
{ {
*end_name = 0;
as_bad (_("expected comma after \"%s\""), name); as_bad (_("expected comma after \"%s\""), name);
*end_name = delim; goto err_out;
ignore_rest_of_line ();
return;
} }
input_line_pointer++; input_line_pointer++;
SKIP_WHITESPACE (); SKIP_WHITESPACE ();
free (name);
name = input_line_pointer; if ((name = read_symbol_name ()) == NULL)
delim = get_symbol_end (); return;
end_name = input_line_pointer;
if (name == end_name)
{
as_bad (_("expected symbol name"));
ignore_rest_of_line ();
return;
}
if ((symbolP2 = symbol_find_noref (name, 1)) == NULL if ((symbolP2 = symbol_find_noref (name, 1)) == NULL
&& (symbolP2 = md_undefined_symbol (name)) == NULL) && (symbolP2 = md_undefined_symbol (name)) == NULL)
@ -3712,6 +3741,7 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
while (symp != symbolP) while (symp != symbolP)
{ {
char *old_loop = loop; char *old_loop = loop;
symp = symbol_get_value_expression (symp)->X_add_symbol; symp = symbol_get_value_expression (symp)->X_add_symbol;
loop = concat (loop, " => ", S_GET_NAME (symp), loop = concat (loop, " => ", S_GET_NAME (symp),
(const char *) NULL); (const char *) NULL);
@ -3722,8 +3752,7 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
S_GET_NAME (symbolP), loop); S_GET_NAME (symbolP), loop);
free (loop); free (loop);
free (name);
*end_name = delim;
ignore_rest_of_line (); ignore_rest_of_line ();
return; return;
} }
@ -3734,8 +3763,6 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
/* symbolP2 = symp; */ /* symbolP2 = symp; */
} }
*end_name = delim;
memset (&exp, 0, sizeof (exp)); memset (&exp, 0, sizeof (exp));
exp.X_op = O_symbol; exp.X_op = O_symbol;
exp.X_add_symbol = symbolP2; exp.X_add_symbol = symbolP2;
@ -3746,6 +3773,13 @@ s_weakref (int ignore ATTRIBUTE_UNUSED)
S_SET_WEAKREFR (symbolP); S_SET_WEAKREFR (symbolP);
demand_empty_rest_of_line (); demand_empty_rest_of_line ();
free (name);
return;
err_out:
ignore_rest_of_line ();
free (name);
return;
} }

View file

@ -1,3 +1,10 @@
2012-05-28 Nick Clifton <nickc@redhat.com>
* gas/elf/syms.s: New test - checks the generation of multibyte
symbol names.
* gas/elf/syms.d: New file - expected readelf output.
* gas/elf/elf.exp: Add syms.
2012-05-25 Alan Modra <amodra@gmail.com> 2012-05-25 Alan Modra <amodra@gmail.com>
* gas/lns/lns-big-delta.s: Add nops. * gas/lns/lns-big-delta.s: Add nops.

View file

@ -184,6 +184,8 @@ if { [is_elf_format] } then {
run_dump_test "bad-size" run_dump_test "bad-size"
run_dump_test "bad-group" run_dump_test "bad-group"
run_dump_test "syms"
load_lib gas-dg.exp load_lib gas-dg.exp
dg-init dg-init
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/err-*.s $srcdir/$subdir/warn-*.s]] "" "" dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/err-*.s $srcdir/$subdir/warn-*.s]] "" ""

View file

@ -0,0 +1,18 @@
#readelf: -S -s -p .strtab
#name: Multibyte symbol names
# The following targets use an unusual .set syntax...
#not-target: alpha*-*-* h8300-*-*
#...
Section Headers:
#...
\[ .\] sec.*tion.*
#...
Symbol table.*
#...
..: .*sy.*mbol
#...
String dump.*
#...
\[......\] sy.*mbol
#pass

View file

@ -0,0 +1,5 @@
.section "sec\xa5\xc2tion"
.set "sy\xa5\xc2mbol", .
.string8 "str\xa5\xc2ing"

View file

@ -1,3 +1,8 @@
2012-05-28 Nick Clifton <nickc@redhat.com>
* ld-ifunc/ifunc-13a-i386.s: Fix use of .global directive.
* ld-ifunc/ifunc-15a-i385.s: Likewise.
2012-05-28 Alan Modra <amodra@gmail.com> 2012-05-28 Alan Modra <amodra@gmail.com>
PR ld/14170 PR ld/14170

View file

@ -1,6 +1,6 @@
.text .text
.type foo, @function .type foo, @function
.global .global foo
foo: foo:
movl xxx@GOT(%ebx), %eax movl xxx@GOT(%ebx), %eax
ret ret

View file

@ -1,6 +1,6 @@
.text .text
.type foo, @function .type foo, @function
.global .global foo
foo: foo:
movl ifunc@GOT(%ebx), %eax movl ifunc@GOT(%ebx), %eax
ret ret