* gprof.c (long_options): Add "--function-ordering" and
"--file-ordering" options. (usage): Add new options to usage message. (main): Handle new options. * gprof.h (STYLE_FUNCTION_ORDER): Define. (STYLE_FILE_ORDER): Define. (function_mapping_file): Declare. * cg_arcs.c (arcs, numarcs): New globals. (arc_add): Put new arcs into the arc array so the function/file ordering code can examine them. * cg_arcs.h (struct arc): New field "has_been_placed". (arcs, numarcs): Declare new globals. * core.c (symbol_map, symbol_map_count): New globals. (read_function_mappings): New function to read in a function to object map file. (core_init): Call read_function_mappings if a function mapping file exists. (core_create_function_syms): Handle function to object file mappings. * symtab.h (struct sym): New fields "mapped", "has_been_placed", "nuses", "prev". * cg_print.c (cmp_arc_count): New function for sorting arcs. (cmp_fun_nuses): Likewise for functions. (cg_print_function_ordering): New function to print a suggested function ordering. (cg_print_file_ordering): Likewise for ordering .o files. (order_and_dump_functions_by_arcs): Helper function for function and object file ordering code. Gprof changes for mentor vm work.
This commit is contained in:
parent
71128bd7a9
commit
64c50fc5db
9 changed files with 908 additions and 5 deletions
|
@ -1,3 +1,34 @@
|
|||
Sat Dec 30 10:11:03 1995 Jeffrey A Law (law@cygnus.com)
|
||||
|
||||
* gprof.c (long_options): Add "--function-ordering" and
|
||||
"--file-ordering" options.
|
||||
(usage): Add new options to usage message.
|
||||
(main): Handle new options.
|
||||
* gprof.h (STYLE_FUNCTION_ORDER): Define.
|
||||
(STYLE_FILE_ORDER): Define.
|
||||
(function_mapping_file): Declare.
|
||||
* cg_arcs.c (arcs, numarcs): New globals.
|
||||
(arc_add): Put new arcs into the arc array so the function/file
|
||||
ordering code can examine them.
|
||||
* cg_arcs.h (struct arc): New field "has_been_placed".
|
||||
(arcs, numarcs): Declare new globals.
|
||||
* core.c (symbol_map, symbol_map_count): New globals.
|
||||
(read_function_mappings): New function to read in a function
|
||||
to object map file.
|
||||
(core_init): Call read_function_mappings if a function mapping
|
||||
file exists.
|
||||
(core_create_function_syms): Handle function to object file
|
||||
mappings.
|
||||
* symtab.h (struct sym): New fields "mapped", "has_been_placed",
|
||||
"nuses", "prev".
|
||||
* cg_print.c (cmp_arc_count): New function for sorting arcs.
|
||||
(cmp_fun_nuses): Likewise for functions.
|
||||
(cg_print_function_ordering): New function to print a suggested
|
||||
function ordering.
|
||||
(cg_print_file_ordering): Likewise for ordering .o files.
|
||||
(order_and_dump_functions_by_arcs): Helper function for function
|
||||
and object file ordering code.
|
||||
|
||||
Sun Dec 24 21:32:27 1995 Jeffrey A Law (law@cygnus.com)
|
||||
|
||||
* core.c (core_sym_class): Ignore symbols without BSF_FUNCTION
|
||||
|
|
|
@ -27,6 +27,8 @@
|
|||
|
||||
Sym *cycle_header;
|
||||
int num_cycles;
|
||||
Arc **arcs;
|
||||
int numarcs;
|
||||
|
||||
/*
|
||||
* Return TRUE iff PARENT has an arc to covers the address
|
||||
|
@ -65,7 +67,8 @@ void
|
|||
DEFUN (arc_add, (parent, child, count),
|
||||
Sym * parent AND Sym * child AND int count)
|
||||
{
|
||||
Arc *arc;
|
||||
static int maxarcs = 0;
|
||||
Arc *arc, **newarcs;
|
||||
|
||||
DBG (TALLYDEBUG, printf ("[arc_add] %d arcs from %s to %s\n",
|
||||
count, parent->name, child->name));
|
||||
|
@ -85,6 +88,37 @@ DEFUN (arc_add, (parent, child, count),
|
|||
arc->child = child;
|
||||
arc->count = count;
|
||||
|
||||
/* If this isn't an arc for a recursive call to parent, then add it
|
||||
to the array of arcs. */
|
||||
if (parent != child)
|
||||
{
|
||||
/* If we've exhausted space in our current array, get a new one
|
||||
and copy the contents. We might want to throttle the doubling
|
||||
factor one day. */
|
||||
if (numarcs == maxarcs)
|
||||
{
|
||||
/* Determine how much space we want to allocate. */
|
||||
if (maxarcs == 0)
|
||||
maxarcs = 1;
|
||||
maxarcs *= 2;
|
||||
|
||||
/* Allocate the new array. */
|
||||
newarcs = (Arc **)xmalloc(sizeof (Arc *) * maxarcs);
|
||||
|
||||
/* Copy the old array's contents into the new array. */
|
||||
bcopy (arcs, newarcs, numarcs * sizeof (Arc *));
|
||||
|
||||
/* Free up the old array. */
|
||||
free (arcs);
|
||||
|
||||
/* And make the new array be the current array. */
|
||||
arcs = newarcs;
|
||||
}
|
||||
|
||||
/* Place this arc in the arc array. */
|
||||
arcs[numarcs++] = arc;
|
||||
}
|
||||
|
||||
/* prepend this child to the children of this parent: */
|
||||
arc->next_child = parent->cg.children;
|
||||
parent->cg.children = arc;
|
||||
|
|
|
@ -20,6 +20,7 @@ typedef struct arc
|
|||
double child_time; /* child-time inherited along arc */
|
||||
struct arc *next_parent; /* next parent of CHILD */
|
||||
struct arc *next_child; /* next child of PARENT */
|
||||
int has_been_placed; /* have this arc's functions been placed? */
|
||||
}
|
||||
Arc;
|
||||
|
||||
|
@ -29,5 +30,7 @@ extern Sym *cycle_header; /* cycle headers */
|
|||
extern void arc_add PARAMS ((Sym * parent, Sym * child, int count));
|
||||
extern Arc *arc_lookup PARAMS ((Sym * parent, Sym * child));
|
||||
extern Sym **cg_assemble PARAMS ((void));
|
||||
extern Arc **arcs;
|
||||
extern int numarcs;
|
||||
|
||||
#endif /* cg_arcs_h */
|
||||
|
|
616
gprof/cg_print.c
616
gprof/cg_print.c
|
@ -11,6 +11,9 @@
|
|||
#define EQUALTO 0
|
||||
#define GREATERTHAN 1
|
||||
|
||||
static void order_and_dump_functions_by_arcs PARAMS ((Arc **, unsigned long,
|
||||
int, Arc **,
|
||||
unsigned long *));
|
||||
/* declarations of automatically generated functions to output blurbs: */
|
||||
extern void bsd_callg_blurb PARAMS ((FILE * fp));
|
||||
extern void fsf_callg_blurb PARAMS ((FILE * fp));
|
||||
|
@ -654,3 +657,616 @@ DEFUN_VOID (cg_print_index)
|
|||
}
|
||||
free (name_sorted_syms);
|
||||
}
|
||||
|
||||
/* Compare two arcs based on their usage counts. We want to sort
|
||||
in descending order. */
|
||||
static int
|
||||
DEFUN (cmp_arc_count, (left, right), const PTR left AND const PTR right)
|
||||
{
|
||||
const Arc **npp1 = (const Arc **) left;
|
||||
const Arc **npp2 = (const Arc **) right;
|
||||
|
||||
if ((*npp1)->count > (*npp2)->count)
|
||||
return -1;
|
||||
else if ((*npp1)->count < (*npp2)->count)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Compare two funtions based on their usage counts. We want to sort
|
||||
in descending order. */
|
||||
static int
|
||||
DEFUN (cmp_fun_nuses, (left, right), const PTR left AND const PTR right)
|
||||
{
|
||||
const Sym **npp1 = (const Sym **) left;
|
||||
const Sym **npp2 = (const Sym **) right;
|
||||
|
||||
if ((*npp1)->nuses > (*npp2)->nuses)
|
||||
return -1;
|
||||
else if ((*npp1)->nuses < (*npp2)->nuses)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Print a suggested function ordering based on the profiling data.
|
||||
|
||||
We perform 4 major steps when ordering functions:
|
||||
|
||||
* Group unused functions together and place them at the
|
||||
end of the function order.
|
||||
|
||||
* Search the highest use arcs (those which account for 90% of
|
||||
the total arc count) for functions which have several parents.
|
||||
|
||||
Group those with the most call sites together (currently the
|
||||
top 1.25% which have at least five different call sites).
|
||||
|
||||
These are emitted at the start of the function order.
|
||||
|
||||
* Use a greedy placement algorithm to place functions which
|
||||
occur in the top 99% of the arcs in the profile. Some provisions
|
||||
are made to handle high usage arcs where the parent and/or
|
||||
child has already been placed.
|
||||
|
||||
* Run the same greedy placement algorithm on the remaining
|
||||
arcs to place the leftover functions.
|
||||
|
||||
|
||||
The various "magic numbers" should (one day) be tuneable by command
|
||||
line options. They were arrived at by benchmarking a few applications
|
||||
with various values to see which values produced better overall function
|
||||
orderings.
|
||||
|
||||
Of course, profiling errors, machine limitations (PA long calls), and
|
||||
poor cutoff values for the placement algorithm may limit the usefullness
|
||||
of the resulting function order. Improvements would be greatly appreciated.
|
||||
|
||||
Suggestions:
|
||||
|
||||
* Place the functions with many callers near the middle of the
|
||||
list to reduce long calls.
|
||||
|
||||
* Propagate arc usage changes as functions are placed. Ie if
|
||||
func1 and func2 are placed together, arcs to/from those arcs
|
||||
to the same parent/child should be combined, then resort the
|
||||
arcs to choose the next one.
|
||||
|
||||
* Implement some global positioning algorithm to place the
|
||||
chains made by the greedy local positioning algorithm. Probably
|
||||
by examining arcs which haven't been placed yet to tie two
|
||||
chains together.
|
||||
|
||||
* Take a function's size and time into account in the algorithm;
|
||||
size in particular is important on the PA (long calls). Placing
|
||||
many small functions onto their own page may be wise.
|
||||
|
||||
* Use better profiling information; many published algorithms
|
||||
are based on call sequences through time, rather than just
|
||||
arc counts.
|
||||
|
||||
* Prodecure cloning could improve performance when a small number
|
||||
of arcs account for most of the calls to a particular function.
|
||||
|
||||
* Use relocation information to avoid moving unused functions
|
||||
completely out of the code stream; this would avoid severe lossage
|
||||
when the profile data bears little resemblance to actual runs.
|
||||
|
||||
* Propagation of arc usages should also improve .o link line
|
||||
ordering which shares the same arc placement algorithm with
|
||||
the function ordering code (in fact it is a degenerate case
|
||||
of function ordering). */
|
||||
|
||||
void
|
||||
DEFUN_VOID (cg_print_function_ordering)
|
||||
{
|
||||
unsigned long index, used, unused, scratch_index;
|
||||
unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
|
||||
#ifdef __GNU_C__
|
||||
unsigned long long total_arcs, tmp_arcs_count;
|
||||
#else
|
||||
unsigned long total_arcs, tmp_arcs_count;
|
||||
#endif
|
||||
Sym **unused_syms, **used_syms, **scratch_syms;
|
||||
Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
|
||||
|
||||
index = 0;
|
||||
used = 0;
|
||||
unused = 0;
|
||||
scratch_index = 0;
|
||||
unplaced_arc_count = 0;
|
||||
high_arc_count = 0;
|
||||
scratch_arc_count = 0;
|
||||
|
||||
/* First group all the unused functions together. */
|
||||
unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
|
||||
used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
|
||||
scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
|
||||
high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
|
||||
scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
|
||||
unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
|
||||
|
||||
/* Walk through all the functions; mark those which are never
|
||||
called as placed (we'll emit them as a group later). */
|
||||
for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
|
||||
{
|
||||
if (symtab.base[index].ncalls == 0)
|
||||
{
|
||||
/* Filter out gprof generated names. */
|
||||
if (strcmp (symtab.base[index].name, "<locore>")
|
||||
&& strcmp (symtab.base[index].name, "<hicore>"))
|
||||
{
|
||||
unused_syms[unused++] = &symtab.base[index];
|
||||
symtab.base[index].has_been_placed = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
used_syms[used++] = &symtab.base[index];
|
||||
symtab.base[index].has_been_placed = 0;
|
||||
symtab.base[index].next = 0;
|
||||
symtab.base[index].prev = 0;
|
||||
symtab.base[index].nuses = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort the arcs from most used to least used. */
|
||||
qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
|
||||
|
||||
/* Compute the total arc count. Also mark arcs as unplaced.
|
||||
|
||||
Note we don't compensate for overflow if that happens!
|
||||
Overflow is much less likely when this file is compiled
|
||||
with GCC as it can double-wide integers via long long. */
|
||||
total_arcs = 0;
|
||||
for (index = 0; index < numarcs; index++)
|
||||
{
|
||||
total_arcs += arcs[index]->count;
|
||||
arcs[index]->has_been_placed = 0;
|
||||
}
|
||||
|
||||
/* We want to pull out those functions which are referenced
|
||||
by many highly used arcs and emit them as a group. This
|
||||
could probably use some tuning. */
|
||||
tmp_arcs_count = 0;
|
||||
for (index = 0; index < numarcs; index++)
|
||||
{
|
||||
tmp_arcs_count += arcs[index]->count;
|
||||
|
||||
/* Count how many times each parent and child are used up
|
||||
to our threshhold of arcs (90%). */
|
||||
if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
|
||||
break;
|
||||
|
||||
arcs[index]->child->nuses++;
|
||||
}
|
||||
|
||||
/* Now sort a temporary symbol table based on the number of
|
||||
times each function was used in the highest used arcs. */
|
||||
bcopy (used_syms, scratch_syms, used * sizeof (Sym *));
|
||||
qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
|
||||
|
||||
/* Now pick out those symbols we're going to emit as
|
||||
a group. We take up to 1.25% of the used symbols. */
|
||||
for (index = 0; index < used / 80; index++)
|
||||
{
|
||||
Sym *sym = scratch_syms[index];
|
||||
Arc *arc;
|
||||
|
||||
/* If we hit symbols that aren't used from many call sites,
|
||||
then we can quit. We choose five as the low limit for
|
||||
no particular reason. */
|
||||
if (sym->nuses == 5)
|
||||
break;
|
||||
|
||||
/* We're going to need the arcs between these functions.
|
||||
Unfortunately, we don't know all these functions
|
||||
until we're done. So we keep track of all the arcs
|
||||
to the functions we care about, then prune out those
|
||||
which are uninteresting.
|
||||
|
||||
An interesting variation would be to quit when we found
|
||||
multi-call site functions which account for some percentage
|
||||
of the arcs. */
|
||||
|
||||
arc = sym->cg.children;
|
||||
while (arc)
|
||||
{
|
||||
if (arc->parent != arc->child)
|
||||
scratch_arcs[scratch_arc_count++] = arc;
|
||||
arc->has_been_placed = 1;
|
||||
arc = arc->next_child;
|
||||
}
|
||||
|
||||
arc = sym->cg.parents;
|
||||
while (arc)
|
||||
{
|
||||
if (arc->parent != arc->child)
|
||||
scratch_arcs[scratch_arc_count++] = arc;
|
||||
arc->has_been_placed = 1;
|
||||
arc = arc->next_parent;
|
||||
}
|
||||
|
||||
/* Keep track of how many symbols we're going to place. */
|
||||
scratch_index = index;
|
||||
|
||||
/* A lie, but it makes identifying these functions easier
|
||||
later. */
|
||||
sym->has_been_placed = 1;
|
||||
}
|
||||
|
||||
/* Now walk through the temporary arcs and copy those we care about
|
||||
into the high arcs array. */
|
||||
for (index = 0; index < scratch_arc_count; index++)
|
||||
{
|
||||
Arc *arc = scratch_arcs[index];
|
||||
|
||||
/* If this arc refers to highly used functions, then
|
||||
then we want to keep it. */
|
||||
if (arc->child->has_been_placed
|
||||
&& arc->parent->has_been_placed)
|
||||
{
|
||||
high_arcs[high_arc_count++] = scratch_arcs[index];
|
||||
|
||||
/* We need to turn of has_been_placed since we're going to
|
||||
use the main arc placement algorithm on these arcs. */
|
||||
arc->child->has_been_placed = 0;
|
||||
arc->parent->has_been_placed = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Dump the multi-site high usage functions which are not going
|
||||
to be ordered by the main ordering algorithm. */
|
||||
for (index = 0; index < scratch_index; index++)
|
||||
{
|
||||
if (scratch_syms[index]->has_been_placed)
|
||||
printf ("%s\n", scratch_syms[index]->name);
|
||||
}
|
||||
|
||||
/* Now we can order the multi-site high use functions based on the
|
||||
arcs between them. */
|
||||
qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
|
||||
order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
|
||||
unplaced_arcs, &unplaced_arc_count);
|
||||
|
||||
/* Order and dump the high use functions left, these typically
|
||||
have only a few call sites. */
|
||||
order_and_dump_functions_by_arcs (arcs, numarcs, 0,
|
||||
unplaced_arcs, &unplaced_arc_count);
|
||||
|
||||
/* Now place the rarely used functions. */
|
||||
order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
|
||||
scratch_arcs, &scratch_arc_count);
|
||||
|
||||
/* Output any functions not emitted by the order_and_dump calls. */
|
||||
for (index = 0; index < used; index++)
|
||||
if (used_syms[index]->has_been_placed == 0)
|
||||
printf("%s\n", used_syms[index]->name);
|
||||
|
||||
/* Output the unused functions. */
|
||||
for (index = 0; index < unused; index++)
|
||||
printf("%s\n", unused_syms[index]->name);
|
||||
|
||||
unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
|
||||
used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
|
||||
scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
|
||||
high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
|
||||
scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
|
||||
unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
|
||||
|
||||
free (unused_syms);
|
||||
free (used_syms);
|
||||
free (scratch_syms);
|
||||
free (high_arcs);
|
||||
free (scratch_arcs);
|
||||
free (unplaced_arcs);
|
||||
}
|
||||
|
||||
/* Place functions based on the arcs in ARCS with NUMARCS entries;
|
||||
place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
|
||||
|
||||
If ALL is nonzero, then place all functions referenced by ARCS,
|
||||
else only place those referenced in the top 99% of the arcs in ARCS. */
|
||||
|
||||
#define MOST 0.99
|
||||
static void
|
||||
order_and_dump_functions_by_arcs (arcs, numarcs, all,
|
||||
unplaced_arcs, unplaced_arc_count)
|
||||
Arc **arcs;
|
||||
unsigned long numarcs;
|
||||
int all;
|
||||
Arc **unplaced_arcs;
|
||||
unsigned long *unplaced_arc_count;
|
||||
{
|
||||
#ifdef __GNU_C__
|
||||
unsigned long long tmp_arcs, total_arcs;
|
||||
#else
|
||||
unsigned long tmp_arcs, total_arcs;
|
||||
#endif
|
||||
unsigned int index;
|
||||
|
||||
/* If needed, compute the total arc count.
|
||||
|
||||
Note we don't compensate for overflow if that happens! */
|
||||
if (! all)
|
||||
{
|
||||
total_arcs = 0;
|
||||
for (index = 0; index < numarcs; index++)
|
||||
total_arcs += arcs[index]->count;
|
||||
}
|
||||
else
|
||||
total_arcs = 0;
|
||||
|
||||
tmp_arcs = 0;
|
||||
for (index = 0; index < numarcs; index++)
|
||||
{
|
||||
Sym *sym1, *sym2;
|
||||
Sym *child, *parent;
|
||||
|
||||
tmp_arcs += arcs[index]->count;
|
||||
|
||||
/* Ignore this arc if it's already been placed. */
|
||||
if (arcs[index]->has_been_placed)
|
||||
continue;
|
||||
|
||||
child = arcs[index]->child;
|
||||
parent = arcs[index]->parent;
|
||||
|
||||
/* If we're not using all arcs, and this is a rarely used
|
||||
arc, then put it on the unplaced_arc list. Similarly
|
||||
if both the parent and child of this arc have been placed. */
|
||||
if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
|
||||
|| child->has_been_placed || parent->has_been_placed)
|
||||
{
|
||||
unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If all slots in the parent and child are full, then there isn't
|
||||
anything we can do right now. We'll place this arc on the
|
||||
unplaced arc list in the hope that a global positioning
|
||||
algorithm can use it to place function chains. */
|
||||
if (parent->next && parent->prev && child->next && child->prev)
|
||||
{
|
||||
unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If the parent is unattached, then find the closest
|
||||
place to attach it onto child's chain. Similarly
|
||||
for the opposite case. */
|
||||
if (!parent->next && !parent->prev)
|
||||
{
|
||||
int next_count = 0;
|
||||
int prev_count = 0;
|
||||
Sym *prev = child;
|
||||
Sym *next = child;
|
||||
|
||||
/* Walk to the beginning and end of the child's chain. */
|
||||
while (next->next)
|
||||
{
|
||||
next = next->next;
|
||||
next_count++;
|
||||
}
|
||||
|
||||
while (prev->prev)
|
||||
{
|
||||
prev = prev->prev;
|
||||
prev_count++;
|
||||
}
|
||||
|
||||
/* Choose the closest. */
|
||||
child = next_count < prev_count ? next : prev;
|
||||
}
|
||||
else if (! child->next && !child->prev)
|
||||
{
|
||||
int next_count = 0;
|
||||
int prev_count = 0;
|
||||
Sym *prev = parent;
|
||||
Sym *next = parent;
|
||||
|
||||
while (next->next)
|
||||
{
|
||||
next = next->next;
|
||||
next_count++;
|
||||
}
|
||||
|
||||
while (prev->prev)
|
||||
{
|
||||
prev = prev->prev;
|
||||
prev_count++;
|
||||
}
|
||||
|
||||
parent = prev_count < next_count ? prev : next;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Couldn't find anywhere to attach the functions,
|
||||
put the arc on the unplaced arc list. */
|
||||
unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Make sure we don't tie two ends together. */
|
||||
sym1 = parent;
|
||||
if (sym1->next)
|
||||
while (sym1->next)
|
||||
sym1 = sym1->next;
|
||||
else
|
||||
while (sym1->prev)
|
||||
sym1 = sym1->prev;
|
||||
|
||||
sym2 = child;
|
||||
if (sym2->next)
|
||||
while (sym2->next)
|
||||
sym2 = sym2->next;
|
||||
else
|
||||
while (sym2->prev)
|
||||
sym2 = sym2->prev;
|
||||
|
||||
if (sym1 == child
|
||||
&& sym2 == parent)
|
||||
{
|
||||
/* This would tie two ends together. */
|
||||
unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (parent->next)
|
||||
{
|
||||
/* Must attach to the parent's prev field. */
|
||||
if (! child->next)
|
||||
{
|
||||
/* parent-prev and child-next */
|
||||
parent->prev = child;
|
||||
child->next = parent;
|
||||
arcs[index]->has_been_placed = 1;
|
||||
}
|
||||
}
|
||||
else if (parent->prev)
|
||||
{
|
||||
/* Must attach to the parent's next field. */
|
||||
if (! child->prev)
|
||||
{
|
||||
/* parent-next and child-prev */
|
||||
parent->next = child;
|
||||
child->prev = parent;
|
||||
arcs[index]->has_been_placed = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Can attach to either field in the parent, depends
|
||||
on where we've got space in the child. */
|
||||
if (child->prev)
|
||||
{
|
||||
/* parent-prev and child-next */
|
||||
parent->prev = child;
|
||||
child->next = parent;
|
||||
arcs[index]->has_been_placed = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* parent-next and child-prev */
|
||||
parent->next = child;
|
||||
child->prev = parent;
|
||||
arcs[index]->has_been_placed = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Dump the chains of functions we've made. */
|
||||
for (index = 0; index < numarcs; index++)
|
||||
{
|
||||
Sym *sym;
|
||||
if (arcs[index]->parent->has_been_placed
|
||||
|| arcs[index]->child->has_been_placed)
|
||||
continue;
|
||||
|
||||
sym = arcs[index]->parent;
|
||||
|
||||
/* If this symbol isn't attached to any other
|
||||
symbols, then we've got a rarely used arc.
|
||||
|
||||
Skip it for now, we'll deal with them later. */
|
||||
if (sym->next == NULL
|
||||
&& sym->prev == NULL)
|
||||
continue;
|
||||
|
||||
/* Get to the start of this chain. */
|
||||
while (sym->prev)
|
||||
sym = sym->prev;
|
||||
|
||||
while (sym)
|
||||
{
|
||||
/* Mark it as placed. */
|
||||
sym->has_been_placed = 1;
|
||||
printf ("%s\n", sym->name);
|
||||
sym = sym->next;
|
||||
}
|
||||
}
|
||||
|
||||
/* If we want to place all the arcs, then output those which weren't
|
||||
placed by the main algorithm. */
|
||||
if (all)
|
||||
for (index = 0; index < numarcs; index++)
|
||||
{
|
||||
Sym *sym;
|
||||
if (arcs[index]->parent->has_been_placed
|
||||
|| arcs[index]->child->has_been_placed)
|
||||
continue;
|
||||
|
||||
sym = arcs[index]->parent;
|
||||
|
||||
sym->has_been_placed = 1;
|
||||
printf ("%s\n", sym->name);
|
||||
}
|
||||
}
|
||||
|
||||
/* Print a suggested .o ordering for files on a link line based
|
||||
on profiling information. This uses the function placement
|
||||
code for the bulk of its work. */
|
||||
|
||||
struct function_map {
|
||||
char *function_name;
|
||||
char *file_name;
|
||||
};
|
||||
|
||||
void
|
||||
DEFUN_VOID (cg_print_file_ordering)
|
||||
{
|
||||
unsigned long scratch_arc_count, index;
|
||||
Arc **scratch_arcs;
|
||||
extern struct function_map *symbol_map;
|
||||
extern int symbol_map_count;
|
||||
char *last;
|
||||
|
||||
scratch_arc_count = 0;
|
||||
|
||||
scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
|
||||
for (index = 0; index < numarcs; index++)
|
||||
{
|
||||
if (! arcs[index]->parent->mapped
|
||||
|| ! arcs[index]->child->mapped)
|
||||
arcs[index]->has_been_placed = 1;
|
||||
}
|
||||
|
||||
order_and_dump_functions_by_arcs (arcs, numarcs, 0,
|
||||
scratch_arcs, &scratch_arc_count);
|
||||
|
||||
/* Output .o's not handled by the main placement algorithm. */
|
||||
for (index = 0; index < symtab.len; index++)
|
||||
{
|
||||
if (symtab.base[index].mapped
|
||||
&& ! symtab.base[index].has_been_placed)
|
||||
printf ("%s\n", symtab.base[index].name);
|
||||
}
|
||||
|
||||
/* Now output any .o's that didn't have any text symbols. */
|
||||
last = NULL;
|
||||
for (index = 0; index < symbol_map_count; index++)
|
||||
{
|
||||
int index2;
|
||||
|
||||
/* Don't bother searching if this symbol is the
|
||||
same as the previous one. */
|
||||
if (last && !strcmp (last, symbol_map[index].file_name))
|
||||
continue;
|
||||
|
||||
for (index2 = 0; index2 < symtab.len; index2++)
|
||||
{
|
||||
if (! symtab.base[index2].mapped)
|
||||
continue;
|
||||
|
||||
if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
|
||||
break;
|
||||
}
|
||||
|
||||
/* If we didn't find it in the symbol table, then it must be a .o
|
||||
with no text symbols. Output it last. */
|
||||
if (index2 == symtab.len)
|
||||
printf ("%s\n", symbol_map[index].file_name);
|
||||
last = symbol_map[index].file_name;
|
||||
}
|
||||
}
|
||||
|
|
146
gprof/core.c
146
gprof/core.c
|
@ -9,6 +9,98 @@ asymbol **core_syms;
|
|||
asection *core_text_sect;
|
||||
PTR core_text_space;
|
||||
|
||||
/* For mapping symbols to specific .o files during file ordering. */
|
||||
struct function_map {
|
||||
char *function_name;
|
||||
char *file_name;
|
||||
};
|
||||
|
||||
struct function_map *symbol_map;
|
||||
int symbol_map_count;
|
||||
|
||||
static void
|
||||
DEFUN (read_function_mappings, (filename), const char *filename)
|
||||
{
|
||||
FILE *file = fopen (filename, "r");
|
||||
char dummy[1024];
|
||||
int count = 0;
|
||||
|
||||
if (!file)
|
||||
{
|
||||
fprintf (stderr, "%s: could not open %s.\n", whoami, filename);
|
||||
done (1);
|
||||
}
|
||||
|
||||
/* First parse the mapping file so we know how big we need to
|
||||
make our tables. We also do some sanity checks at this
|
||||
time. */
|
||||
while (!feof (file))
|
||||
{
|
||||
int matches;
|
||||
|
||||
matches = fscanf (file, "%[^\n:]", dummy);
|
||||
if (!matches)
|
||||
{
|
||||
fprintf (stderr, "%s: unable to parse mapping file %s.\n",
|
||||
whoami, filename);
|
||||
done (1);
|
||||
}
|
||||
|
||||
/* Just skip messages about files with no symbols. */
|
||||
if (!strncmp (dummy, "No symbols in ", 14))
|
||||
{
|
||||
fscanf (file, "\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Don't care what else is on this line at this point. */
|
||||
fscanf (file, "%[^\n]\n", dummy);
|
||||
count++;
|
||||
}
|
||||
|
||||
/* Now we know how big we need to make our table. */
|
||||
symbol_map = xmalloc (count * sizeof (struct function_map));
|
||||
|
||||
/* Rewind the input file so we can read it again. */
|
||||
rewind (file);
|
||||
|
||||
/* Read each entry and put it into the table. */
|
||||
count = 0;
|
||||
while (!feof (file))
|
||||
{
|
||||
int matches;
|
||||
char *tmp;
|
||||
|
||||
matches = fscanf (file, "%[^\n:]", dummy);
|
||||
if (!matches)
|
||||
{
|
||||
fprintf (stderr, "%s: unable to parse mapping file %s.\n",
|
||||
whoami, filename);
|
||||
done (1);
|
||||
}
|
||||
|
||||
/* Just skip messages about files with no symbols. */
|
||||
if (!strncmp (dummy, "No symbols in ", 14))
|
||||
{
|
||||
fscanf (file, "\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
/* dummy has the filename, go ahead and copy it. */
|
||||
symbol_map[count].file_name = xmalloc (strlen (dummy) + 1);
|
||||
strcpy (symbol_map[count].file_name, dummy);
|
||||
|
||||
/* Now we need the function name. */
|
||||
fscanf (file, "%[^\n]\n", dummy);
|
||||
tmp = strrchr (dummy, ' ') + 1;
|
||||
symbol_map[count].function_name = xmalloc (strlen (tmp) + 1);
|
||||
strcpy (symbol_map[count].function_name, tmp);
|
||||
count++;
|
||||
}
|
||||
|
||||
/* Record the size of the map table for future reference. */
|
||||
symbol_map_count = count;
|
||||
}
|
||||
|
||||
void
|
||||
DEFUN (core_init, (a_out_name), const char *a_out_name)
|
||||
|
@ -59,6 +151,9 @@ DEFUN (core_init, (a_out_name), const char *a_out_name)
|
|||
bfd_errmsg (bfd_get_error ()));
|
||||
done (1);
|
||||
}
|
||||
|
||||
if (function_mapping_file)
|
||||
read_function_mappings (function_mapping_file);
|
||||
}
|
||||
|
||||
|
||||
|
@ -232,7 +327,7 @@ DEFUN (core_create_function_syms, (core_bfd), bfd * core_bfd)
|
|||
bfd_vma min_vma = ~0, max_vma = 0;
|
||||
const char *filename, *func_name;
|
||||
int class;
|
||||
long i;
|
||||
long i, j, found, skip;
|
||||
|
||||
/* pass 1 - determine upper bound on number of function names: */
|
||||
symtab.len = 0;
|
||||
|
@ -242,7 +337,24 @@ DEFUN (core_create_function_syms, (core_bfd), bfd * core_bfd)
|
|||
{
|
||||
continue;
|
||||
}
|
||||
++symtab.len;
|
||||
|
||||
/* This should be replaced with a binary search or hashed
|
||||
search. Gross.
|
||||
|
||||
Don't create a symtab entry for a function that has
|
||||
a mapping to a file, unless it's the first function
|
||||
in the file. */
|
||||
skip = 0;
|
||||
for (j = 0; j < symbol_map_count; j++)
|
||||
if (!strcmp (core_syms[i]->name, symbol_map[j].function_name))
|
||||
{
|
||||
if (j > 0 && ! strcmp (symbol_map [j].file_name,
|
||||
symbol_map [j - 1].file_name))
|
||||
skip = 1;
|
||||
break;
|
||||
}
|
||||
if (!skip)
|
||||
++symtab.len;
|
||||
}
|
||||
|
||||
if (symtab.len == 0)
|
||||
|
@ -267,13 +379,41 @@ DEFUN (core_create_function_syms, (core_bfd), bfd * core_bfd)
|
|||
core_syms[i]->value, core_syms[i]->name));
|
||||
continue;
|
||||
}
|
||||
/* This should be replaced with a binary search or hashed
|
||||
search. Gross. */
|
||||
|
||||
skip = 0;
|
||||
found = 0;
|
||||
for (j = 0; j < symbol_map_count; j++)
|
||||
if (!strcmp (core_syms[i]->name, symbol_map[j].function_name))
|
||||
{
|
||||
if (j > 0 && ! strcmp (symbol_map [j].file_name,
|
||||
symbol_map [j - 1].file_name))
|
||||
skip = 1;
|
||||
else
|
||||
found = j;
|
||||
break;
|
||||
}
|
||||
|
||||
if (skip)
|
||||
continue;
|
||||
|
||||
sym_init (symtab.limit);
|
||||
|
||||
/* symbol offsets are always section-relative: */
|
||||
|
||||
symtab.limit->addr = core_syms[i]->value + core_syms[i]->section->vma;
|
||||
symtab.limit->name = core_syms[i]->name;
|
||||
if (symbol_map_count
|
||||
&& !strcmp (core_syms[i]->name, symbol_map[found].function_name))
|
||||
{
|
||||
symtab.limit->name = symbol_map[found].file_name;
|
||||
symtab.limit->mapped = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
symtab.limit->name = core_syms[i]->name;
|
||||
symtab.limit->mapped = 0;
|
||||
}
|
||||
|
||||
#ifdef __osf__
|
||||
/*
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#define VERSION "2.6"
|
||||
|
||||
const char *whoami;
|
||||
const char *function_mapping_file;
|
||||
const char *a_out_name = A_OUTNAME;
|
||||
long hz = HZ_WRONG;
|
||||
|
||||
|
@ -89,6 +90,8 @@ static struct option long_options[] =
|
|||
{"no-graph", optional_argument, 0, 'Q'},
|
||||
{"exec-counts", optional_argument, 0, 'C'},
|
||||
{"no-exec-counts", optional_argument, 0, 'Z'},
|
||||
{"function-ordering", no_argument, 0, 'r'},
|
||||
{"file-ordering", required_argument, 0, 'R'},
|
||||
{"file-info", no_argument, 0, 'i'},
|
||||
{"sum", no_argument, 0, 's'},
|
||||
|
||||
|
@ -136,6 +139,7 @@ Usage: %s [-[abcDhilLsTvwxyz]] [-[ACeEfFJnNOpPqQZ][name]] [-I dirs]\n\
|
|||
[--[no-]annotated-source[=name]] [--[no-]exec-counts[=name]]\n\
|
||||
[--[no-]flat-profile[=name]] [--[no-]graph[=name]]\n\
|
||||
[--[no-]time=name] [--all-lines] [--brief] [--debug[=level]]\n\
|
||||
[--function-ordering] [--file-ordering]\n\
|
||||
[--directory-path=dirs] [--display-unused-functions]\n\
|
||||
[--file-format=name] [--file-info] [--help] [--line] [--min-count=n]\n\
|
||||
[--no-static] [--print-path] [--separate-files]\n\
|
||||
|
@ -322,6 +326,15 @@ DEFUN (main, (argc, argv), int argc AND char **argv)
|
|||
output_style |= STYLE_CALL_GRAPH;
|
||||
user_specified |= STYLE_CALL_GRAPH;
|
||||
break;
|
||||
case 'r':
|
||||
output_style |= STYLE_FUNCTION_ORDER;
|
||||
user_specified |= STYLE_FUNCTION_ORDER;
|
||||
break;
|
||||
case 'R':
|
||||
output_style |= STYLE_FILE_ORDER;
|
||||
user_specified |= STYLE_FILE_ORDER;
|
||||
function_mapping_file = optarg;
|
||||
break;
|
||||
case 'Q':
|
||||
if (optarg)
|
||||
{
|
||||
|
@ -391,6 +404,16 @@ DEFUN (main, (argc, argv), int argc AND char **argv)
|
|||
}
|
||||
}
|
||||
|
||||
/* Don't allow both ordering options, they modify the arc data in-place. */
|
||||
if ((user_specified & STYLE_FUNCTION_ORDER)
|
||||
&& (user_specified & STYLE_FILE_ORDER))
|
||||
{
|
||||
fprintf (stderr,"\
|
||||
%s: Only one of --function-ordering and --file-ordering may be specified.\n",
|
||||
whoami);
|
||||
done (1);
|
||||
}
|
||||
|
||||
/* append value of GPROF_PATH to source search list if set: */
|
||||
str = (char *) getenv ("GPROF_PATH");
|
||||
if (str)
|
||||
|
@ -581,6 +604,14 @@ DEFUN (main, (argc, argv), int argc AND char **argv)
|
|||
{
|
||||
print_annotated_source ();
|
||||
}
|
||||
if (output_style & STYLE_FUNCTION_ORDER)
|
||||
{
|
||||
cg_print_function_ordering ();
|
||||
}
|
||||
if (output_style & STYLE_FILE_ORDER)
|
||||
{
|
||||
cg_print_file_ordering ();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -79,6 +79,8 @@
|
|||
#define STYLE_EXEC_COUNTS (1<<3)
|
||||
#define STYLE_ANNOTATED_SOURCE (1<<4)
|
||||
#define STYLE_GMON_INFO (1<<5)
|
||||
#define STYLE_FUNCTION_ORDER (1<<6)
|
||||
#define STYLE_FILE_ORDER (1<<7)
|
||||
|
||||
#define ANYDEBUG (1<<0) /* 1 */
|
||||
#define DFNDEBUG (1<<1) /* 2 */
|
||||
|
@ -111,6 +113,7 @@ typedef int bool;
|
|||
typedef unsigned char UNIT[2]; /* unit of profiling */
|
||||
|
||||
extern const char *whoami; /* command-name, for error messages */
|
||||
extern const char *function_mapping_file; /* file mapping functions to files */
|
||||
extern const char *a_out_name; /* core filename */
|
||||
extern long hz; /* ticks per second */
|
||||
|
||||
|
|
|
@ -394,6 +394,46 @@ cumulative data in the file @file{gmon.sum}.
|
|||
@item -T
|
||||
The @samp{-T} option causes @code{gprof} to print its output in
|
||||
``traditional'' BSD style.
|
||||
|
||||
@item --function-ordering
|
||||
The @samp{--function-ordering} option causes @code{gprof} to print a
|
||||
suggested function ordering for the program based on profiling data.
|
||||
This option suggests an ordering which may improve paging, tlb and
|
||||
cache behavior for the program on systems which support arbitrary
|
||||
ordering of functions in an executable.
|
||||
|
||||
The exact details of how to force the linker to place functions
|
||||
in a particular order is system dependent and out of the scope of this
|
||||
manual.
|
||||
|
||||
@item --file-ordering @var{map_file}
|
||||
The @samp{--file-ordering} option causes @code{gprof} to print a
|
||||
suggested .o link line ordering for the program based on profiling data.
|
||||
This option suggests an ordering which may improve paging, tlb and
|
||||
cache behavior for the program on systems which do not support arbitrary
|
||||
ordering of functions in an executable.
|
||||
|
||||
Use of the @samp{-a} argument is highly recommended with this option.
|
||||
|
||||
The @var{map_file} argument is a pathname to a file which provides
|
||||
function name to object file mappings. The format of the file is similar to
|
||||
the output of the program @code{nm}.
|
||||
|
||||
@smallexample
|
||||
@group
|
||||
c-parse.o:00000000 T yyparse
|
||||
c-parse.o:00000004 C yyerrflag
|
||||
c-lang.o:00000000 T maybe_objc_method_name
|
||||
c-lang.o:00000000 T print_lang_statistics
|
||||
c-lang.o:00000000 T recognize_objc_keyword
|
||||
c-decl.o:00000000 T print_lang_identifier
|
||||
c-decl.o:00000000 T print_lang_type
|
||||
@dots{}
|
||||
|
||||
@end group
|
||||
@end smallexample
|
||||
|
||||
GNU @code{nm} @samp{--extern-only} @samp{--defined-only} @samp{-v} @samp{--print-file-name} can be used to create @var{map_file}.
|
||||
@end table
|
||||
|
||||
@node Flat Profile
|
||||
|
|
|
@ -36,9 +36,14 @@ typedef struct sym
|
|||
int line_num; /* source line number */
|
||||
unsigned int is_func:1, /* is this a function entry point? */
|
||||
is_static:1, /* is this a local (static) symbol? */
|
||||
is_bb_head:1; /* is this the head of a basic-blk? */
|
||||
is_bb_head:1, /* is this the head of a basic-blk? */
|
||||
mapped:1, /* this symbol was mapped to another name */
|
||||
has_been_placed:1; /* have we placed this symbol? */
|
||||
int ncalls; /* how many times executed */
|
||||
int nuses; /* how many times this symbol appears in
|
||||
a particular context */
|
||||
struct sym *next; /* for building chains of syms */
|
||||
struct sym *prev; /* for building chains of syms */
|
||||
|
||||
/* profile-specific information: */
|
||||
|
||||
|
|
Loading…
Reference in a new issue