From Craig Silverstein: First cut at detecting ODR violations.

This commit is contained in:
Ian Lance Taylor 2007-11-13 20:02:32 +00:00
parent 37715c4c6b
commit 70e654ba48
4 changed files with 263 additions and 107 deletions

View file

@ -180,6 +180,10 @@ queue_middle_tasks(const General_options& options,
(*input_objects->dynobj_begin())->name().c_str());
}
// See if any of the input definitions violate the One Definition Rule.
// TODO: if this is too slow, do this as a task, rather than inline.
symtab->detect_odr_violations();
// Define some sections and symbols needed for a dynamic link. This
// handles some cases we want to see before we read the relocs.
layout->create_initial_dynamic_sections(input_objects, symtab);

View file

@ -119,14 +119,76 @@ static const unsigned int def_flag = 0 << def_undef_or_common_shift;
static const unsigned int undef_flag = 1 << def_undef_or_common_shift;
static const unsigned int common_flag = 2 << def_undef_or_common_shift;
// This convenience function combines all the flags based on facts
// about the symbol.
static unsigned int
symbol_to_bits(elfcpp::STB binding, bool is_dynamic,
unsigned int shndx, elfcpp::STT type)
{
unsigned int bits;
switch (binding)
{
case elfcpp::STB_GLOBAL:
bits = global_flag;
break;
case elfcpp::STB_WEAK:
bits = weak_flag;
break;
case elfcpp::STB_LOCAL:
// We should only see externally visible symbols in the symbol
// table.
gold_error(_("invalid STB_LOCAL symbol in external symbols"));
bits = global_flag;
default:
// Any target which wants to handle STB_LOOS, etc., needs to
// define a resolve method.
gold_error(_("unsupported symbol binding"));
bits = global_flag;
}
if (is_dynamic)
bits |= dynamic_flag;
else
bits |= regular_flag;
switch (shndx)
{
case elfcpp::SHN_UNDEF:
bits |= undef_flag;
break;
case elfcpp::SHN_COMMON:
bits |= common_flag;
break;
default:
if (type == elfcpp::STT_COMMON)
bits |= common_flag;
else
bits |= def_flag;
break;
}
return bits;
}
// Resolve a symbol. This is called the second and subsequent times
// we see a symbol. TO is the pre-existing symbol. SYM is the new
// symbol, seen in OBJECT. VERSION of the version of SYM.
// we see a symbol. TO is the pre-existing symbol. ORIG_SYM is the
// new symbol, seen in OBJECT. SYM is almost always identical to
// ORIG_SYM, but may be munged (for instance, if we determine the
// symbol is in a to-be-discarded section, we'll set sym's shndx to
// UNDEFINED). VERSION of the version of SYM.
template<int size, bool big_endian>
void
Symbol_table::resolve(Sized_symbol<size>* to,
const elfcpp::Sym<size, big_endian>& sym,
const elfcpp::Sym<size, big_endian>& orig_sym,
Object* object, const char* version)
{
if (object->target()->has_resolve())
@ -150,53 +212,10 @@ Symbol_table::resolve(Sized_symbol<size>* to,
to->set_in_dyn();
}
unsigned int frombits;
switch (sym.get_st_bind())
{
case elfcpp::STB_GLOBAL:
frombits = global_flag;
break;
case elfcpp::STB_WEAK:
frombits = weak_flag;
break;
case elfcpp::STB_LOCAL:
gold_error(_("%s: invalid STB_LOCAL symbol %s in external symbols"),
object->name().c_str(), to->name());
frombits = global_flag;
break;
default:
gold_error(_("%s: unsupported symbol binding %d for symbol %s"),
object->name().c_str(),
static_cast<int>(sym.get_st_bind()), to->name());
frombits = global_flag;
break;
}
if (!object->is_dynamic())
frombits |= regular_flag;
else
frombits |= dynamic_flag;
switch (sym.get_st_shndx())
{
case elfcpp::SHN_UNDEF:
frombits |= undef_flag;
break;
case elfcpp::SHN_COMMON:
frombits |= common_flag;
break;
default:
if (sym.get_st_type() == elfcpp::STT_COMMON)
frombits |= common_flag;
else
frombits |= def_flag;
break;
}
unsigned int frombits = symbol_to_bits(sym.get_st_bind(),
object->is_dynamic(),
sym.get_st_shndx(),
sym.get_st_type());
bool adjust_common_sizes;
if (Symbol_table::should_override(to, frombits, object,
@ -214,6 +233,34 @@ Symbol_table::resolve(Sized_symbol<size>* to,
if (adjust_common_sizes && sym.get_st_size() > to->symsize())
to->set_symsize(sym.get_st_size());
}
// A new weak undefined reference, merging with an old weak
// reference, could be a One Definition Rule (ODR) violation --
// especially if the types or sizes of the references differ. We'll
// store such pairs and look them up later to make sure they
// actually refer to the same lines of code. (Note: not all ODR
// violations can be found this way, and not everything this finds
// is an ODR violation. But it's helpful to warn about.)
// We use orig_sym here because we want the symbol exactly as it
// appears in the object file, not munged via our future processing.
if (orig_sym.get_st_bind() == elfcpp::STB_WEAK
&& to->binding() == elfcpp::STB_WEAK
&& orig_sym.get_st_shndx() != elfcpp::SHN_UNDEF
&& to->shndx() != elfcpp::SHN_UNDEF
&& orig_sym.get_st_size() != 0 // Ignore weird 0-sized symbols.
&& to->symsize() != 0
&& (orig_sym.get_st_type() != to->type()
|| orig_sym.get_st_size() != to->symsize())
// C does not have a concept of ODR, so we only need to do this
// on C++ symbols. These have (mangled) names starting with _Z.
&& to->name()[0] == '_' && to->name()[1] == 'Z')
{
Symbol_location from_location
= { object, orig_sym.get_st_shndx(), orig_sym.get_st_value() };
Symbol_location to_location = { to->object(), to->shndx(), to->value() };
this->candidate_odr_violations_[to->name()].insert(from_location);
this->candidate_odr_violations_[to->name()].insert(to_location);
}
}
// Handle the core of symbol resolution. This is called with the
@ -229,51 +276,11 @@ Symbol_table::should_override(const Symbol* to, unsigned int frombits,
{
*adjust_common_sizes = false;
unsigned int tobits;
switch (to->binding())
{
case elfcpp::STB_GLOBAL:
tobits = global_flag;
break;
case elfcpp::STB_WEAK:
tobits = weak_flag;
break;
case elfcpp::STB_LOCAL:
// We should only see externally visible symbols in the symbol
// table.
gold_unreachable();
default:
// Any target which wants to handle STB_LOOS, etc., needs to
// define a resolve method.
gold_unreachable();
}
if (to->source() == Symbol::FROM_OBJECT
&& to->object()->is_dynamic())
tobits |= dynamic_flag;
else
tobits |= regular_flag;
switch (to->shndx())
{
case elfcpp::SHN_UNDEF:
tobits |= undef_flag;
break;
case elfcpp::SHN_COMMON:
tobits |= common_flag;
break;
default:
if (to->type() == elfcpp::STT_COMMON)
tobits |= common_flag;
else
tobits |= def_flag;
break;
}
unsigned int tobits = symbol_to_bits(to->binding(),
(to->source() == Symbol::FROM_OBJECT
&& to->object()->is_dynamic()),
to->shndx(),
to->type());
// FIXME: Warn if either but not both of TO and SYM are STT_TLS.
@ -719,6 +726,7 @@ void
Symbol_table::resolve<32, false>(
Sized_symbol<32>* to,
const elfcpp::Sym<32, false>& sym,
const elfcpp::Sym<32, false>& orig_sym,
Object* object,
const char* version);
#endif
@ -729,6 +737,7 @@ void
Symbol_table::resolve<32, true>(
Sized_symbol<32>* to,
const elfcpp::Sym<32, true>& sym,
const elfcpp::Sym<32, true>& orig_sym,
Object* object,
const char* version);
#endif
@ -739,6 +748,7 @@ void
Symbol_table::resolve<64, false>(
Sized_symbol<64>* to,
const elfcpp::Sym<64, false>& sym,
const elfcpp::Sym<64, false>& orig_sym,
Object* object,
const char* version);
#endif
@ -749,6 +759,7 @@ void
Symbol_table::resolve<64, true>(
Sized_symbol<64>* to,
const elfcpp::Sym<64, true>& sym,
const elfcpp::Sym<64, true>& orig_sym,
Object* object,
const char* version);
#endif

View file

@ -23,10 +23,12 @@
#include "gold.h"
#include <stdint.h>
#include <set>
#include <string>
#include <utility>
#include "object.h"
#include "dwarf_reader.h"
#include "dynobj.h"
#include "output.h"
#include "target.h"
@ -343,7 +345,7 @@ Symbol_table::resolve(Sized_symbol<size>* to, const Sized_symbol<size>* from,
esym.put_st_info(from->binding(), from->type());
esym.put_st_other(from->visibility(), from->nonvis());
esym.put_st_shndx(from->shndx());
this->resolve(to, esym.sym(), from->object(), version);
this->resolve(to, esym.sym(), esym.sym(), from->object(), version);
if (from->in_reg())
to->set_in_reg();
if (from->in_dyn())
@ -372,6 +374,11 @@ Symbol_table::resolve(Sized_symbol<size>* to, const Sized_symbol<size>* from,
// object file as a forwarder, and record it in the forwarders_ map.
// Note that entries in the hash table will never be marked as
// forwarders.
//
// SYM and ORIG_SYM are almost always the same. ORIG_SYM is the
// symbol exactly as it existed in the input file. SYM is usually
// that as well, but can be modified, for instance if we determine
// it's in a to-be-discarded section.
template<int size, bool big_endian>
Sized_symbol<size>*
@ -381,7 +388,8 @@ Symbol_table::add_from_object(Object* object,
const char *version,
Stringpool::Key version_key,
bool def,
const elfcpp::Sym<size, big_endian>& sym)
const elfcpp::Sym<size, big_endian>& sym,
const elfcpp::Sym<size, big_endian>& orig_sym)
{
Symbol* const snull = NULL;
std::pair<typename Symbol_table_type::iterator, bool> ins =
@ -416,7 +424,7 @@ Symbol_table::add_from_object(Object* object,
was_undefined = ret->is_undefined();
was_common = ret->is_common();
this->resolve(ret, sym, object, version);
this->resolve(ret, sym, orig_sym, object, version);
if (def)
{
@ -456,7 +464,7 @@ Symbol_table::add_from_object(Object* object,
ret = this->get_sized_symbol SELECT_SIZE_NAME(size) (
insdef.first->second
SELECT_SIZE(size));
this->resolve(ret, sym, object, version);
this->resolve(ret, sym, orig_sym, object, version);
ins.first->second = ret;
}
else
@ -571,7 +579,7 @@ Symbol_table::add_from_relobj(
Stringpool::Key name_key;
name = this->namepool_.add(name, true, &name_key);
res = this->add_from_object(relobj, name, name_key, NULL, 0,
false, *psym);
false, *psym, sym);
}
else
{
@ -590,7 +598,7 @@ Symbol_table::add_from_relobj(
ver = this->namepool_.add(ver, true, &ver_key);
res = this->add_from_object(relobj, name, name_key, ver, ver_key,
def, *psym);
def, *psym, sym);
}
(*sympointers)[i] = res;
@ -659,7 +667,7 @@ Symbol_table::add_from_dynobj(
Stringpool::Key name_key;
name = this->namepool_.add(name, true, &name_key);
res = this->add_from_object(dynobj, name, name_key, NULL, 0,
false, sym);
false, sym, sym);
}
else
{
@ -693,7 +701,7 @@ Symbol_table::add_from_dynobj(
{
// This symbol does not have a version.
res = this->add_from_object(dynobj, name, name_key, NULL, 0,
false, sym);
false, sym, sym);
}
else
{
@ -723,14 +731,14 @@ Symbol_table::add_from_dynobj(
if (sym.get_st_shndx() == elfcpp::SHN_ABS
&& name_key == version_key)
res = this->add_from_object(dynobj, name, name_key, NULL, 0,
false, sym);
false, sym, sym);
else
{
const bool def = (!hidden
&& (sym.get_st_shndx()
!= elfcpp::SHN_UNDEF));
res = this->add_from_object(dynobj, name, name_key, version,
version_key, def, sym);
version_key, def, sym, sym);
}
}
}
@ -1794,6 +1802,97 @@ Symbol_table::sized_write_section_symbol(const Output_section* os,
of->write_output_view(offset, sym_size, pov);
}
// Check candidate_odr_violations_ to find symbols with the same name
// but apparently different definitions (different source-file/line-no).
void
Symbol_table::detect_odr_violations() const
{
if (parameters->get_size() == 32)
{
if (!parameters->is_big_endian())
{
#ifdef HAVE_TARGET_32_LITTLE
this->sized_detect_odr_violations<32, false>();
#else
gold_unreachable();
#endif
}
else
{
#ifdef HAVE_TARGET_32_BIG
this->sized_detect_odr_violations<32, true>();
#else
gold_unreachable();
#endif
}
}
else if (parameters->get_size() == 64)
{
if (!parameters->is_big_endian())
{
#ifdef HAVE_TARGET_64_LITTLE
this->sized_detect_odr_violations<64, false>();
#else
gold_unreachable();
#endif
}
else
{
#ifdef HAVE_TARGET_64_BIG
this->sized_detect_odr_violations<64, true>();
#else
gold_unreachable();
#endif
}
}
else
gold_unreachable();
}
// Implement detect_odr_violations.
template<int size, bool big_endian>
void
Symbol_table::sized_detect_odr_violations() const
{
for (Odr_map::const_iterator it = candidate_odr_violations_.begin();
it != candidate_odr_violations_.end();
++it)
{
const char* symbol_name = it->first;
// We use a sorted set so the output is deterministic.
std::set<std::string> line_nums;
Unordered_set<Symbol_location, Symbol_location_hash>::const_iterator
locs;
for (locs = it->second.begin(); locs != it->second.end(); ++locs)
{
// We need to lock the object in order to read it. This
// means that we can not run inside a Task. If we want to
// run this in a Task for better performance, we will need
// one Task for object, plus appropriate locking to ensure
// that we don't conflict with other uses of the object.
locs->object->lock();
Dwarf_line_info<size, big_endian> line_info(locs->object);
locs->object->unlock();
std::string lineno = line_info.addr2line(locs->shndx, locs->offset);
if (!lineno.empty())
line_nums.insert(lineno);
}
if (line_nums.size() > 1)
{
gold_warning(_("symbol %s defined in multiple places "
"(possible ODR violation):"), symbol_name);
for (std::set<std::string>::const_iterator it2 = line_nums.begin();
it2 != line_nums.end();
++it2)
fprintf(stderr, " %s\n", it2->c_str());
}
}
}
// Warnings functions.
// Add a new warning.

View file

@ -895,7 +895,7 @@ class Warnings
};
// A mapping from warning symbol names (canonicalized in
// Symbol_table's namepool_ field) to
// Symbol_table's namepool_ field) to warning information.
typedef Unordered_map<const char*, Warning_location> Warning_table;
Warning_table warnings_;
@ -968,7 +968,7 @@ class Symbol_table
// Define a set of symbols in output segments.
void
define_symbols(const Layout*, const Target*, int count,
const Define_symbol_in_segment*);
const Define_symbol_in_segment*);
// Define SYM using a COPY reloc. POSD is the Output_data where the
// symbol should be defined--typically a .dyn.bss section. VALUE is
@ -1023,6 +1023,11 @@ class Symbol_table
size_t relnum, off_t reloffset) const
{ this->warnings_.issue_warning(sym, relinfo, relnum, reloffset); }
// Check candidate_odr_violations_ to find symbols with the same name
// but apparently different definitions (different source-file/line-no).
void
detect_odr_violations() const;
// SYM is defined using a COPY reloc. Return the dynamic object
// where the original definition was found.
Dynobj*
@ -1070,13 +1075,15 @@ class Symbol_table
Sized_symbol<size>*
add_from_object(Object*, const char *name, Stringpool::Key name_key,
const char *version, Stringpool::Key version_key,
bool def, const elfcpp::Sym<size, big_endian>& sym);
bool def, const elfcpp::Sym<size, big_endian>& sym,
const elfcpp::Sym<size, big_endian>& orig_sym);
// Resolve symbols.
template<int size, bool big_endian>
void
resolve(Sized_symbol<size>* to,
const elfcpp::Sym<size, big_endian>& sym,
const elfcpp::Sym<size, big_endian>& orig_sym,
Object*, const char* version);
template<int size, bool big_endian>
@ -1157,6 +1164,11 @@ class Symbol_table
void
do_allocate_commons(const General_options&, Layout*);
// Implement detect_odr_violations.
template<int size, bool big_endian>
void
sized_detect_odr_violations() const;
// Finalize symbols specialized for size.
template<int size>
off_t
@ -1208,6 +1220,33 @@ class Symbol_table
// they are defined.
typedef Unordered_map<const Symbol*, Dynobj*> Copied_symbol_dynobjs;
// A map from symbol name (as a pointer into the namepool) to all
// the locations the symbols is (weakly) defined (and certain other
// conditions are met). This map will be used later to detect
// possible One Definition Rule (ODR) violations.
struct Symbol_location
{
Object* object; // Object where the symbol is defined.
unsigned int shndx; // Section-in-object where the symbol is defined.
off_t offset; // Offset-in-section where the symbol is defined.
bool operator==(const Symbol_location& that) const
{
return (this->object == that.object
&& this->shndx == that.shndx
&& this->offset == that.offset);
}
};
struct Symbol_location_hash
{
size_t operator()(const Symbol_location& loc) const
{ return reinterpret_cast<uintptr_t>(loc.object) ^ loc.offset ^ loc.shndx; }
};
typedef Unordered_map<const char*,
Unordered_set<Symbol_location, Symbol_location_hash> >
Odr_map;
// We increment this every time we see a new undefined symbol, for
// use in archive groups.
int saw_undefined_;
@ -1242,6 +1281,9 @@ class Symbol_table
Commons_type commons_;
// Manage symbol warnings.
Warnings warnings_;
// Manage potential One Definition Rule (ODR) violations.
Odr_map candidate_odr_violations_;
// When we emit a COPY reloc for a symbol, we define it in an
// Output_data. When it's time to emit version information for it,
// we need to know the dynamic object in which we found the original