From 70e654ba489ea64e5ba72c63fc56e8a2d5dfd894 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Tue, 13 Nov 2007 20:02:32 +0000 Subject: [PATCH] From Craig Silverstein: First cut at detecting ODR violations. --- gold/gold.cc | 4 + gold/resolve.cc | 199 +++++++++++++++++++++++++----------------------- gold/symtab.cc | 119 ++++++++++++++++++++++++++--- gold/symtab.h | 48 +++++++++++- 4 files changed, 263 insertions(+), 107 deletions(-) diff --git a/gold/gold.cc b/gold/gold.cc index 83bb3f6065..a215d0f31d 100644 --- a/gold/gold.cc +++ b/gold/gold.cc @@ -180,6 +180,10 @@ queue_middle_tasks(const General_options& options, (*input_objects->dynobj_begin())->name().c_str()); } + // See if any of the input definitions violate the One Definition Rule. + // TODO: if this is too slow, do this as a task, rather than inline. + symtab->detect_odr_violations(); + // Define some sections and symbols needed for a dynamic link. This // handles some cases we want to see before we read the relocs. layout->create_initial_dynamic_sections(input_objects, symtab); diff --git a/gold/resolve.cc b/gold/resolve.cc index 7d141c0253..980831b795 100644 --- a/gold/resolve.cc +++ b/gold/resolve.cc @@ -119,14 +119,76 @@ static const unsigned int def_flag = 0 << def_undef_or_common_shift; static const unsigned int undef_flag = 1 << def_undef_or_common_shift; static const unsigned int common_flag = 2 << def_undef_or_common_shift; +// This convenience function combines all the flags based on facts +// about the symbol. + +static unsigned int +symbol_to_bits(elfcpp::STB binding, bool is_dynamic, + unsigned int shndx, elfcpp::STT type) +{ + unsigned int bits; + + switch (binding) + { + case elfcpp::STB_GLOBAL: + bits = global_flag; + break; + + case elfcpp::STB_WEAK: + bits = weak_flag; + break; + + case elfcpp::STB_LOCAL: + // We should only see externally visible symbols in the symbol + // table. + gold_error(_("invalid STB_LOCAL symbol in external symbols")); + bits = global_flag; + + default: + // Any target which wants to handle STB_LOOS, etc., needs to + // define a resolve method. + gold_error(_("unsupported symbol binding")); + bits = global_flag; + } + + if (is_dynamic) + bits |= dynamic_flag; + else + bits |= regular_flag; + + switch (shndx) + { + case elfcpp::SHN_UNDEF: + bits |= undef_flag; + break; + + case elfcpp::SHN_COMMON: + bits |= common_flag; + break; + + default: + if (type == elfcpp::STT_COMMON) + bits |= common_flag; + else + bits |= def_flag; + break; + } + + return bits; +} + // Resolve a symbol. This is called the second and subsequent times -// we see a symbol. TO is the pre-existing symbol. SYM is the new -// symbol, seen in OBJECT. VERSION of the version of SYM. +// we see a symbol. TO is the pre-existing symbol. ORIG_SYM is the +// new symbol, seen in OBJECT. SYM is almost always identical to +// ORIG_SYM, but may be munged (for instance, if we determine the +// symbol is in a to-be-discarded section, we'll set sym's shndx to +// UNDEFINED). VERSION of the version of SYM. template void Symbol_table::resolve(Sized_symbol* to, const elfcpp::Sym& sym, + const elfcpp::Sym& orig_sym, Object* object, const char* version) { if (object->target()->has_resolve()) @@ -150,53 +212,10 @@ Symbol_table::resolve(Sized_symbol* to, to->set_in_dyn(); } - unsigned int frombits; - switch (sym.get_st_bind()) - { - case elfcpp::STB_GLOBAL: - frombits = global_flag; - break; - - case elfcpp::STB_WEAK: - frombits = weak_flag; - break; - - case elfcpp::STB_LOCAL: - gold_error(_("%s: invalid STB_LOCAL symbol %s in external symbols"), - object->name().c_str(), to->name()); - frombits = global_flag; - break; - - default: - gold_error(_("%s: unsupported symbol binding %d for symbol %s"), - object->name().c_str(), - static_cast(sym.get_st_bind()), to->name()); - frombits = global_flag; - break; - } - - if (!object->is_dynamic()) - frombits |= regular_flag; - else - frombits |= dynamic_flag; - - switch (sym.get_st_shndx()) - { - case elfcpp::SHN_UNDEF: - frombits |= undef_flag; - break; - - case elfcpp::SHN_COMMON: - frombits |= common_flag; - break; - - default: - if (sym.get_st_type() == elfcpp::STT_COMMON) - frombits |= common_flag; - else - frombits |= def_flag; - break; - } + unsigned int frombits = symbol_to_bits(sym.get_st_bind(), + object->is_dynamic(), + sym.get_st_shndx(), + sym.get_st_type()); bool adjust_common_sizes; if (Symbol_table::should_override(to, frombits, object, @@ -214,6 +233,34 @@ Symbol_table::resolve(Sized_symbol* to, if (adjust_common_sizes && sym.get_st_size() > to->symsize()) to->set_symsize(sym.get_st_size()); } + + // A new weak undefined reference, merging with an old weak + // reference, could be a One Definition Rule (ODR) violation -- + // especially if the types or sizes of the references differ. We'll + // store such pairs and look them up later to make sure they + // actually refer to the same lines of code. (Note: not all ODR + // violations can be found this way, and not everything this finds + // is an ODR violation. But it's helpful to warn about.) + // We use orig_sym here because we want the symbol exactly as it + // appears in the object file, not munged via our future processing. + if (orig_sym.get_st_bind() == elfcpp::STB_WEAK + && to->binding() == elfcpp::STB_WEAK + && orig_sym.get_st_shndx() != elfcpp::SHN_UNDEF + && to->shndx() != elfcpp::SHN_UNDEF + && orig_sym.get_st_size() != 0 // Ignore weird 0-sized symbols. + && to->symsize() != 0 + && (orig_sym.get_st_type() != to->type() + || orig_sym.get_st_size() != to->symsize()) + // C does not have a concept of ODR, so we only need to do this + // on C++ symbols. These have (mangled) names starting with _Z. + && to->name()[0] == '_' && to->name()[1] == 'Z') + { + Symbol_location from_location + = { object, orig_sym.get_st_shndx(), orig_sym.get_st_value() }; + Symbol_location to_location = { to->object(), to->shndx(), to->value() }; + this->candidate_odr_violations_[to->name()].insert(from_location); + this->candidate_odr_violations_[to->name()].insert(to_location); + } } // Handle the core of symbol resolution. This is called with the @@ -229,51 +276,11 @@ Symbol_table::should_override(const Symbol* to, unsigned int frombits, { *adjust_common_sizes = false; - unsigned int tobits; - switch (to->binding()) - { - case elfcpp::STB_GLOBAL: - tobits = global_flag; - break; - - case elfcpp::STB_WEAK: - tobits = weak_flag; - break; - - case elfcpp::STB_LOCAL: - // We should only see externally visible symbols in the symbol - // table. - gold_unreachable(); - - default: - // Any target which wants to handle STB_LOOS, etc., needs to - // define a resolve method. - gold_unreachable(); - } - - if (to->source() == Symbol::FROM_OBJECT - && to->object()->is_dynamic()) - tobits |= dynamic_flag; - else - tobits |= regular_flag; - - switch (to->shndx()) - { - case elfcpp::SHN_UNDEF: - tobits |= undef_flag; - break; - - case elfcpp::SHN_COMMON: - tobits |= common_flag; - break; - - default: - if (to->type() == elfcpp::STT_COMMON) - tobits |= common_flag; - else - tobits |= def_flag; - break; - } + unsigned int tobits = symbol_to_bits(to->binding(), + (to->source() == Symbol::FROM_OBJECT + && to->object()->is_dynamic()), + to->shndx(), + to->type()); // FIXME: Warn if either but not both of TO and SYM are STT_TLS. @@ -719,6 +726,7 @@ void Symbol_table::resolve<32, false>( Sized_symbol<32>* to, const elfcpp::Sym<32, false>& sym, + const elfcpp::Sym<32, false>& orig_sym, Object* object, const char* version); #endif @@ -729,6 +737,7 @@ void Symbol_table::resolve<32, true>( Sized_symbol<32>* to, const elfcpp::Sym<32, true>& sym, + const elfcpp::Sym<32, true>& orig_sym, Object* object, const char* version); #endif @@ -739,6 +748,7 @@ void Symbol_table::resolve<64, false>( Sized_symbol<64>* to, const elfcpp::Sym<64, false>& sym, + const elfcpp::Sym<64, false>& orig_sym, Object* object, const char* version); #endif @@ -749,6 +759,7 @@ void Symbol_table::resolve<64, true>( Sized_symbol<64>* to, const elfcpp::Sym<64, true>& sym, + const elfcpp::Sym<64, true>& orig_sym, Object* object, const char* version); #endif diff --git a/gold/symtab.cc b/gold/symtab.cc index e3face79df..be88534843 100644 --- a/gold/symtab.cc +++ b/gold/symtab.cc @@ -23,10 +23,12 @@ #include "gold.h" #include +#include #include #include #include "object.h" +#include "dwarf_reader.h" #include "dynobj.h" #include "output.h" #include "target.h" @@ -343,7 +345,7 @@ Symbol_table::resolve(Sized_symbol* to, const Sized_symbol* from, esym.put_st_info(from->binding(), from->type()); esym.put_st_other(from->visibility(), from->nonvis()); esym.put_st_shndx(from->shndx()); - this->resolve(to, esym.sym(), from->object(), version); + this->resolve(to, esym.sym(), esym.sym(), from->object(), version); if (from->in_reg()) to->set_in_reg(); if (from->in_dyn()) @@ -372,6 +374,11 @@ Symbol_table::resolve(Sized_symbol* to, const Sized_symbol* from, // object file as a forwarder, and record it in the forwarders_ map. // Note that entries in the hash table will never be marked as // forwarders. +// +// SYM and ORIG_SYM are almost always the same. ORIG_SYM is the +// symbol exactly as it existed in the input file. SYM is usually +// that as well, but can be modified, for instance if we determine +// it's in a to-be-discarded section. template Sized_symbol* @@ -381,7 +388,8 @@ Symbol_table::add_from_object(Object* object, const char *version, Stringpool::Key version_key, bool def, - const elfcpp::Sym& sym) + const elfcpp::Sym& sym, + const elfcpp::Sym& orig_sym) { Symbol* const snull = NULL; std::pair ins = @@ -416,7 +424,7 @@ Symbol_table::add_from_object(Object* object, was_undefined = ret->is_undefined(); was_common = ret->is_common(); - this->resolve(ret, sym, object, version); + this->resolve(ret, sym, orig_sym, object, version); if (def) { @@ -456,7 +464,7 @@ Symbol_table::add_from_object(Object* object, ret = this->get_sized_symbol SELECT_SIZE_NAME(size) ( insdef.first->second SELECT_SIZE(size)); - this->resolve(ret, sym, object, version); + this->resolve(ret, sym, orig_sym, object, version); ins.first->second = ret; } else @@ -571,7 +579,7 @@ Symbol_table::add_from_relobj( Stringpool::Key name_key; name = this->namepool_.add(name, true, &name_key); res = this->add_from_object(relobj, name, name_key, NULL, 0, - false, *psym); + false, *psym, sym); } else { @@ -590,7 +598,7 @@ Symbol_table::add_from_relobj( ver = this->namepool_.add(ver, true, &ver_key); res = this->add_from_object(relobj, name, name_key, ver, ver_key, - def, *psym); + def, *psym, sym); } (*sympointers)[i] = res; @@ -659,7 +667,7 @@ Symbol_table::add_from_dynobj( Stringpool::Key name_key; name = this->namepool_.add(name, true, &name_key); res = this->add_from_object(dynobj, name, name_key, NULL, 0, - false, sym); + false, sym, sym); } else { @@ -693,7 +701,7 @@ Symbol_table::add_from_dynobj( { // This symbol does not have a version. res = this->add_from_object(dynobj, name, name_key, NULL, 0, - false, sym); + false, sym, sym); } else { @@ -723,14 +731,14 @@ Symbol_table::add_from_dynobj( if (sym.get_st_shndx() == elfcpp::SHN_ABS && name_key == version_key) res = this->add_from_object(dynobj, name, name_key, NULL, 0, - false, sym); + false, sym, sym); else { const bool def = (!hidden && (sym.get_st_shndx() != elfcpp::SHN_UNDEF)); res = this->add_from_object(dynobj, name, name_key, version, - version_key, def, sym); + version_key, def, sym, sym); } } } @@ -1794,6 +1802,97 @@ Symbol_table::sized_write_section_symbol(const Output_section* os, of->write_output_view(offset, sym_size, pov); } +// Check candidate_odr_violations_ to find symbols with the same name +// but apparently different definitions (different source-file/line-no). + +void +Symbol_table::detect_odr_violations() const +{ + if (parameters->get_size() == 32) + { + if (!parameters->is_big_endian()) + { +#ifdef HAVE_TARGET_32_LITTLE + this->sized_detect_odr_violations<32, false>(); +#else + gold_unreachable(); +#endif + } + else + { +#ifdef HAVE_TARGET_32_BIG + this->sized_detect_odr_violations<32, true>(); +#else + gold_unreachable(); +#endif + } + } + else if (parameters->get_size() == 64) + { + if (!parameters->is_big_endian()) + { +#ifdef HAVE_TARGET_64_LITTLE + this->sized_detect_odr_violations<64, false>(); +#else + gold_unreachable(); +#endif + } + else + { +#ifdef HAVE_TARGET_64_BIG + this->sized_detect_odr_violations<64, true>(); +#else + gold_unreachable(); +#endif + } + } + else + gold_unreachable(); +} + +// Implement detect_odr_violations. + +template +void +Symbol_table::sized_detect_odr_violations() const +{ + for (Odr_map::const_iterator it = candidate_odr_violations_.begin(); + it != candidate_odr_violations_.end(); + ++it) + { + const char* symbol_name = it->first; + // We use a sorted set so the output is deterministic. + std::set line_nums; + + Unordered_set::const_iterator + locs; + for (locs = it->second.begin(); locs != it->second.end(); ++locs) + { + // We need to lock the object in order to read it. This + // means that we can not run inside a Task. If we want to + // run this in a Task for better performance, we will need + // one Task for object, plus appropriate locking to ensure + // that we don't conflict with other uses of the object. + locs->object->lock(); + Dwarf_line_info line_info(locs->object); + locs->object->unlock(); + std::string lineno = line_info.addr2line(locs->shndx, locs->offset); + if (!lineno.empty()) + line_nums.insert(lineno); + } + + if (line_nums.size() > 1) + { + gold_warning(_("symbol %s defined in multiple places " + "(possible ODR violation):"), symbol_name); + for (std::set::const_iterator it2 = line_nums.begin(); + it2 != line_nums.end(); + ++it2) + fprintf(stderr, " %s\n", it2->c_str()); + } + } +} + // Warnings functions. // Add a new warning. diff --git a/gold/symtab.h b/gold/symtab.h index 81ad2d91f2..367813fc61 100644 --- a/gold/symtab.h +++ b/gold/symtab.h @@ -895,7 +895,7 @@ class Warnings }; // A mapping from warning symbol names (canonicalized in - // Symbol_table's namepool_ field) to + // Symbol_table's namepool_ field) to warning information. typedef Unordered_map Warning_table; Warning_table warnings_; @@ -968,7 +968,7 @@ class Symbol_table // Define a set of symbols in output segments. void define_symbols(const Layout*, const Target*, int count, - const Define_symbol_in_segment*); + const Define_symbol_in_segment*); // Define SYM using a COPY reloc. POSD is the Output_data where the // symbol should be defined--typically a .dyn.bss section. VALUE is @@ -1023,6 +1023,11 @@ class Symbol_table size_t relnum, off_t reloffset) const { this->warnings_.issue_warning(sym, relinfo, relnum, reloffset); } + // Check candidate_odr_violations_ to find symbols with the same name + // but apparently different definitions (different source-file/line-no). + void + detect_odr_violations() const; + // SYM is defined using a COPY reloc. Return the dynamic object // where the original definition was found. Dynobj* @@ -1070,13 +1075,15 @@ class Symbol_table Sized_symbol* add_from_object(Object*, const char *name, Stringpool::Key name_key, const char *version, Stringpool::Key version_key, - bool def, const elfcpp::Sym& sym); + bool def, const elfcpp::Sym& sym, + const elfcpp::Sym& orig_sym); // Resolve symbols. template void resolve(Sized_symbol* to, const elfcpp::Sym& sym, + const elfcpp::Sym& orig_sym, Object*, const char* version); template @@ -1157,6 +1164,11 @@ class Symbol_table void do_allocate_commons(const General_options&, Layout*); + // Implement detect_odr_violations. + template + void + sized_detect_odr_violations() const; + // Finalize symbols specialized for size. template off_t @@ -1208,6 +1220,33 @@ class Symbol_table // they are defined. typedef Unordered_map Copied_symbol_dynobjs; + // A map from symbol name (as a pointer into the namepool) to all + // the locations the symbols is (weakly) defined (and certain other + // conditions are met). This map will be used later to detect + // possible One Definition Rule (ODR) violations. + struct Symbol_location + { + Object* object; // Object where the symbol is defined. + unsigned int shndx; // Section-in-object where the symbol is defined. + off_t offset; // Offset-in-section where the symbol is defined. + bool operator==(const Symbol_location& that) const + { + return (this->object == that.object + && this->shndx == that.shndx + && this->offset == that.offset); + } + }; + + struct Symbol_location_hash + { + size_t operator()(const Symbol_location& loc) const + { return reinterpret_cast(loc.object) ^ loc.offset ^ loc.shndx; } + }; + + typedef Unordered_map > + Odr_map; + // We increment this every time we see a new undefined symbol, for // use in archive groups. int saw_undefined_; @@ -1242,6 +1281,9 @@ class Symbol_table Commons_type commons_; // Manage symbol warnings. Warnings warnings_; + // Manage potential One Definition Rule (ODR) violations. + Odr_map candidate_odr_violations_; + // When we emit a COPY reloc for a symbol, we define it in an // Output_data. When it's time to emit version information for it, // we need to know the dynamic object in which we found the original