* merge.c (struct sec_merge_sec_info): Update comment.

(struct sec_merge_hash_entry): Remove entsize. (sec_merge_hash_lookup): Only adjust alignment when creating. (sec_merge_emit): Remove register keyword. (cmplengthentry, last4_eq, last_eq): Delete. (strrevcmp, strrevcmp_align, is_suffix): New. (merge_strings): Use them to implement fast suffix merging. * elf-strtab.c (struct elf_strtab_hash_entry): Update comments. Make "len" signed. (_bfd_elf_strtab_add): Lose on >2G strings. (_bfd_elf_strtab_emit): Don't emit strings with len < 0. (cmplengthentry, last4_eq): Delete. (strrevcmp, is_suffix): New. (_bfd_elf_strtab_finalize): Rework to implement fast suffix merging.
2003-10-29 22:59:37 +00:00 · 2003-10-29 22:59:37 +00:00 · ddb2b44229
commit ddb2b44229
parent 6d6d8e9cc3
3 changed files with 200 additions and 264 deletions
--- a/bfd/ChangeLog
+++ b/bfd/ChangeLog
@ -1,3 +1,23 @@
+2003-10-30  Lars Knoll  <lars@trolltech.com>
+	    Michael Matz  <matz@suse.de>
+	    Jakub Jelinek  <jakub@redhat.com>
+	    Alan Modra  <amodra@bigpond.net.au>
+
+	* merge.c (struct sec_merge_sec_info): Update comment.
+	(struct sec_merge_hash_entry): Remove entsize.
+	(sec_merge_hash_lookup): Only adjust alignment when creating.
+	(sec_merge_emit): Remove register keyword.
+	(cmplengthentry, last4_eq, last_eq): Delete.
+	(strrevcmp, strrevcmp_align, is_suffix): New.
+	(merge_strings): Use them to implement fast suffix merging.
+	* elf-strtab.c (struct elf_strtab_hash_entry): Update comments.
+	Make "len" signed.
+	(_bfd_elf_strtab_add): Lose on >2G strings.
+	(_bfd_elf_strtab_emit): Don't emit strings with len < 0.
+	(cmplengthentry, last4_eq): Delete.
+	(strrevcmp, is_suffix): New.
+	(_bfd_elf_strtab_finalize): Rework to implement fast suffix merging.
+
 2003-10-29  Daniel Jacobowitz  <drow@mvista.com>

        * elf32-arm.h (elf32_arm_final_link_relocate): Move check for
--- a/bfd/elf-strtab.c
+++ b/bfd/elf-strtab.c
@ -1,5 +1,5 @@
 /* ELF strtab with GC and suffix merging support.
-   Copyright 2001, 2002 Free Software Foundation, Inc.
+   Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
   Written by Jakub Jelinek <jakub@redhat.com>.

   This file is part of BFD, the Binary File Descriptor library.
@ -30,15 +30,14 @@
 struct elf_strtab_hash_entry
 {
  struct bfd_hash_entry root;
-  /* Length of this entry.  */
-  unsigned int len;
+  /* Length of this entry.  This includes the zero terminator.  */
+  int len;
  unsigned int refcount;
  union {
    /* Index within the merged section.  */
    bfd_size_type index;
-    /* Entry this is a suffix of (if len is 0).  */
+    /* Entry this is a suffix of (if len < 0).  */
    struct elf_strtab_hash_entry *suffix;
-    struct elf_strtab_hash_entry *next;
  } u;
 };

@ -158,6 +157,8 @@ _bfd_elf_strtab_add (struct elf_strtab_hash *tab,
  if (entry->len == 0)
    {
      entry->len = strlen (str) + 1;
+      /* 2G strings lose.  */
+      BFD_ASSERT (entry->len > 0);
      if (tab->size == tab->alloced)
 	{
 	  bfd_size_type amt = sizeof (struct elf_strtab_hash_entry *);
@ -235,14 +236,14 @@ _bfd_elf_strtab_emit (register bfd *abfd, struct elf_strtab_hash *tab)
  for (i = 1; i < tab->size; ++i)
    {
      register const char *str;
-      register size_t len;
+      register unsigned int len;

-      str = tab->array[i]->root.string;
-      len = tab->array[i]->len;
      BFD_ASSERT (tab->array[i]->refcount == 0);
-      if (len == 0)
+      len = tab->array[i]->len;
+      if ((int) len < 0)
 	continue;

+      str = tab->array[i]->root.string;
      if (bfd_bwrite (str, len, abfd) != len)
 	return FALSE;

@ -253,40 +254,41 @@ _bfd_elf_strtab_emit (register bfd *abfd, struct elf_strtab_hash *tab)
  return TRUE;
 }

-/* Compare two elf_strtab_hash_entry structures.  This is called via qsort.  */
+/* Compare two elf_strtab_hash_entry structures.  Called via qsort.  */

 static int
-cmplengthentry (const void *a, const void *b)
+strrevcmp (const void *a, const void *b)
 {
  struct elf_strtab_hash_entry *A = *(struct elf_strtab_hash_entry **) a;
  struct elf_strtab_hash_entry *B = *(struct elf_strtab_hash_entry **) b;
+  unsigned int lenA = A->len;
+  unsigned int lenB = B->len;
+  const unsigned char *s = A->root.string + lenA - 1;
+  const unsigned char *t = B->root.string + lenB - 1;
+  int l = lenA < lenB ? lenA : lenB;

-  if (A->len < B->len)
-    return 1;
-  else if (A->len > B->len)
-    return -1;
-
-  return memcmp (A->root.string, B->root.string, A->len);
+  while (l)
+    {
+      if (*s != *t)
+	return (int) *s - (int) *t;
+      s--;
+      t--;
+      l--;
+    }
+  return lenA - lenB;
 }

-static int
-last4_eq (const void *a, const void *b)
+static inline int
+is_suffix (const struct elf_strtab_hash_entry *A,
+	   const struct elf_strtab_hash_entry *B)
 {
-  const struct elf_strtab_hash_entry *A = a;
-  const struct elf_strtab_hash_entry *B = b;
-
-  if (memcmp (A->root.string + A->len - 5, B->root.string + B->len - 5, 4)
-      != 0)
-    /* This was a hashtable collision.  */
-    return 0;
-
  if (A->len <= B->len)
    /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
       not to be equal by the hash table.  */
    return 0;

  return memcmp (A->root.string + (A->len - B->len),
-		 B->root.string, B->len - 5) == 0;
+		 B->root.string, B->len - 1) == 0;
 }

 /* This function assigns final string table offsets for used strings,
@ -295,10 +297,8 @@ last4_eq (const void *a, const void *b)
 void
 _bfd_elf_strtab_finalize (struct elf_strtab_hash *tab)
 {
-  struct elf_strtab_hash_entry **array, **a, **end, *e;
-  htab_t last4tab = NULL;
+  struct elf_strtab_hash_entry **array, **a, *e;
  bfd_size_type size, amt;
-  struct elf_strtab_hash_entry *last[256], **last_ptr[256];

  /* GCC 2.91.66 (egcs-1.1.2) on i386 miscompiles this function when i is
     a 64-bit bfd_size_type: a 64-bit target or --enable-64-bit-bfd.
@ -306,105 +306,71 @@ _bfd_elf_strtab_finalize (struct elf_strtab_hash *tab)
     cycles.  */
  size_t i;

-  /* Now sort the strings by length, longest first.  */
-  array = NULL;
+  /* Sort the strings by suffix and length.  */
  amt = tab->size * sizeof (struct elf_strtab_hash_entry *);
  array = bfd_malloc (amt);
  if (array == NULL)
    goto alloc_failure;

-  memset (last, 0, sizeof (last));
-  for (i = 0; i < 256; ++i)
-    last_ptr[i] = &last[i];
  for (i = 1, a = array; i < tab->size; ++i)
-    if (tab->array[i]->refcount)
-      *a++ = tab->array[i];
-    else
-      tab->array[i]->len = 0;
-
-  size = a - array;
-
-  qsort (array, size, sizeof (struct elf_strtab_hash_entry *), cmplengthentry);
-
-  last4tab = htab_create_alloc (size * 4, NULL, last4_eq, NULL, calloc, free);
-  if (last4tab == NULL)
-    goto alloc_failure;
-
-  /* Now insert the strings into hash tables (strings with last 4 characters
-     and strings with last character equal), look for longer strings which
-     we're suffix of.  */
-  for (a = array, end = array + size; a < end; a++)
    {
-      register hashval_t hash;
-      unsigned int c;
-      unsigned int j;
-      const unsigned char *s;
-      void **p;
-
-      e = *a;
-      if (e->len > 4)
+      e = tab->array[i];
+      if (e->refcount)
 	{
-	  s = e->root.string + e->len - 1;
-	  hash = 0;
-	  for (j = 0; j < 4; j++)
-	    {
-	      c = *--s;
-	      hash += c + (c << 17);
-	      hash ^= hash >> 2;
-	    }
-	  p = htab_find_slot_with_hash (last4tab, e, hash, INSERT);
-	  if (p == NULL)
-	    goto alloc_failure;
-	  if (*p)
-	    {
-	      struct elf_strtab_hash_entry *ent;
-
-	      ent = *p;
-	      e->u.suffix = ent;
-	      e->len = 0;
-	      continue;
-	    }
-	  else
-	    *p = e;
+	  *a++ = e;
+	  /* Adjust the length to not include the zero terminator.  */
+	  e->len -= 1;
 	}
      else
+	e->len = 0;
+    }
+
+  size = a - array;
+  if (size != 0)
+    {
+      qsort (array, size, sizeof (struct elf_strtab_hash_entry *), strrevcmp);
+
+      /* Loop over the sorted array and merge suffixes.  Start from the
+	 end because we want eg.
+
+	 s1 -> "d"
+	 s2 -> "bcd"
+	 s3 -> "abcd"
+
+	 to end up as
+
+	 s3 -> "abcd"
+	 s2 _____^
+	 s1 _______^
+
+	 ie. we don't want s1 pointing into the old s2.  */
+      e = *--a;
+      e->len += 1;
+      while (--a >= array)
 	{
-	  struct elf_strtab_hash_entry *tem;
+	  struct elf_strtab_hash_entry *cmp = *a;

-	  c = e->root.string[e->len - 2] & 0xff;
-
-	  for (tem = last[c]; tem; tem = tem->u.next)
-	    if (tem->len > e->len
-		&& memcmp (tem->root.string + (tem->len - e->len),
-			   e->root.string, e->len - 1) == 0)
-	      break;
-	  if (tem)
+	  cmp->len += 1;
+	  if (is_suffix (e, cmp))
 	    {
-	      e->u.suffix = tem;
-	      e->len = 0;
-	      continue;
+	      cmp->u.suffix = e;
+	      cmp->len = -cmp->len;
 	    }
+	  else
+	    e = cmp;
 	}
-
-      c = e->root.string[e->len - 2] & 0xff;
-      /* Put longest strings first.  */
-      *last_ptr[c] = e;
-      last_ptr[c] = &e->u.next;
-      e->u.next = NULL;
    }

 alloc_failure:
  if (array)
    free (array);
-  if (last4tab)
-    htab_delete (last4tab);

-  /* Now assign positions to the strings we want to keep.  */
+  /* Assign positions to the strings we want to keep.  */
  size = 1;
  for (i = 1; i < tab->size; ++i)
    {
      e = tab->array[i];
-      if (e->refcount && e->len)
+      if (e->refcount && e->len > 0)
 	{
 	  e->u.index = size;
 	  size += e->len;
@ -413,12 +379,11 @@ alloc_failure:

  tab->sec_size = size;

-  /* And now adjust the rest.  */
+  /* Adjust the rest.  */
  for (i = 1; i < tab->size; ++i)
    {
      e = tab->array[i];
-      if (e->refcount && ! e->len)
-	e->u.index = e->u.suffix->u.index
-		     + (e->u.suffix->len - strlen (e->root.string) - 1);
+      if (e->refcount && e->len < 0)
+	e->u.index = e->u.suffix->u.index + (e->u.suffix->len + e->len);
    }
 }
--- a/bfd/merge.c
+++ b/bfd/merge.c
@ -34,7 +34,7 @@ struct sec_merge_sec_info;
 struct sec_merge_hash_entry
 {
  struct bfd_hash_entry root;
-  /* Length of this entry.  */
+  /* Length of this entry.  This includes the zero terminator.  */
  unsigned int len;
  /* Start of this string needs to be aligned to
     alignment octets (not 1 << align).  */
@ -43,8 +43,6 @@ struct sec_merge_hash_entry
  {
    /* Index within the merged section.  */
    bfd_size_type index;
-    /* Entity size (if present in suffix hash tables).  */
-    unsigned int entsize;
    /* Entry this is a suffix of (if alignment is 0).  */
    struct sec_merge_hash_entry *suffix;
  } u;
@ -205,9 +203,12 @@ sec_merge_hash_lookup (struct sec_merge_hash *table, const char *string,
 	     alignment, we need to insert another copy.  */
 	  if (hashp->alignment < alignment)
 	    {
-	      /*  Mark the less aligned copy as deleted.  */
-	      hashp->len = 0;
-	      hashp->alignment = 0;
+	      if (create)
+		{
+		  /*  Mark the less aligned copy as deleted.  */
+		  hashp->len = 0;
+		  hashp->alignment = 0;
+		}
 	      break;
 	    }
 	  return hashp;
@ -287,7 +288,7 @@ sec_merge_add (struct sec_merge_hash *tab, const char *str,
 }

 static bfd_boolean
-sec_merge_emit (register bfd *abfd, struct sec_merge_hash_entry *entry)
+sec_merge_emit (bfd *abfd, struct sec_merge_hash_entry *entry)
 {
  struct sec_merge_sec_info *secinfo = entry->secinfo;
  asection *sec = secinfo->sec;
@ -420,79 +421,6 @@ _bfd_merge_section (bfd *abfd, void **psinfo, asection *sec, void **psecinfo)
  return FALSE;
 }

-/* Compare two sec_merge_hash_entry structures.  This is called via qsort.  */
-
-static int
-cmplengthentry (const void *a, const void *b)
-{
-  struct sec_merge_hash_entry * A = *(struct sec_merge_hash_entry **) a;
-  struct sec_merge_hash_entry * B = *(struct sec_merge_hash_entry **) b;
-
-  if (A->len < B->len)
-    return 1;
-  else if (A->len > B->len)
-    return -1;
-
-  return memcmp (A->root.string, B->root.string, A->len);
-}
-
-static int
-last4_eq (const void *a, const void *b)
-{
-  struct sec_merge_hash_entry * A = (struct sec_merge_hash_entry *) a;
-  struct sec_merge_hash_entry * B = (struct sec_merge_hash_entry *) b;
-
-  if (memcmp (A->root.string + A->len - 5 * A->u.entsize,
-	      B->root.string + B->len - 5 * A->u.entsize,
-	      4 * A->u.entsize) != 0)
-    /* This was a hashtable collision.  */
-    return 0;
-
-  if (A->len <= B->len)
-    /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
-       not to be equal by the hash table.  */
-    return 0;
-
-  if (A->alignment < B->alignment
-      || ((A->len - B->len) & (B->alignment - 1)))
-    /* The suffix is not sufficiently aligned.  */
-    return 0;
-
-  return memcmp (A->root.string + (A->len - B->len),
-		 B->root.string, B->len - 5 * A->u.entsize) == 0;
-}
-
-static int
-last_eq (const void *a, const void *b)
-{
-  struct sec_merge_hash_entry * A = (struct sec_merge_hash_entry *) a;
-  struct sec_merge_hash_entry * B = (struct sec_merge_hash_entry *) b;
-
-  if (B->len >= 5 * A->u.entsize)
-    /* Longer strings are just pushed into the hash table,
-       they'll be used when looking up for very short strings.  */
-    return 0;
-
-  if (memcmp (A->root.string + A->len - 2 * A->u.entsize,
-	      B->root.string + B->len - 2 * A->u.entsize,
-	      A->u.entsize) != 0)
-    /* This was a hashtable collision.  */
-    return 0;
-
-  if (A->len <= B->len)
-    /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
-       not to be equal by the hash table.  */
-    return 0;
-
-  if (A->alignment < B->alignment
-      || ((A->len - B->len) & (B->alignment - 1)))
-    /* The suffix is not sufficiently aligned.  */
-    return 0;
-
-  return memcmp (A->root.string + (A->len - B->len),
-		 B->root.string, B->len - 2 * A->u.entsize) == 0;
-}
-
 /* Record one section into the hash table.  */
 static bfd_boolean
 record_section (struct sec_merge_info *sinfo,
@ -534,7 +462,7 @@ record_section (struct sec_merge_info *sinfo,
 			goto error_return;
 		    }
 		  p++;
-	        }
+		}
 	    }
 	  else
 	    {
@ -576,18 +504,81 @@ error_return:
  return FALSE;
 }

+static int
+strrevcmp (const void *a, const void *b)
+{
+  struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a;
+  struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
+  unsigned int lenA = A->len;
+  unsigned int lenB = B->len;
+  const unsigned char *s = A->root.string + lenA - 1;
+  const unsigned char *t = B->root.string + lenB - 1;
+  int l = lenA < lenB ? lenA : lenB;
+
+  while (l)
+    {
+      if (*s != *t)
+	return (int) *s - (int) *t;
+      s--;
+      t--;
+      l--;
+    }
+  return lenA - lenB;
+}
+
+/* Like strrevcmp, but for the case where all strings have the same
+   alignment > entsize.  */
+
+static int
+strrevcmp_align (const void *a, const void *b)
+{
+  struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a;
+  struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
+  unsigned int lenA = A->len;
+  unsigned int lenB = B->len;
+  const unsigned char *s = A->root.string + lenA - 1;
+  const unsigned char *t = B->root.string + lenB - 1;
+  int l = lenA < lenB ? lenA : lenB;
+  int tail_align = (lenA & (A->alignment - 1)) - (lenB & (A->alignment - 1));
+
+  if (tail_align != 0)
+    return tail_align;
+
+  while (l)
+    {
+      if (*s != *t)
+	return (int) *s - (int) *t;
+      s--;
+      t--;
+      l--;
+    }
+  return lenA - lenB;
+}
+
+static inline int
+is_suffix (const struct sec_merge_hash_entry *A,
+	   const struct sec_merge_hash_entry *B)
+{
+  if (A->len <= B->len)
+    /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
+       not to be equal by the hash table.  */
+    return 0;
+
+  return memcmp (A->root.string + (A->len - B->len),
+		 B->root.string, B->len) == 0;
+}
+
 /* This is a helper function for _bfd_merge_sections.  It attempts to
   merge strings matching suffixes of longer strings.  */
 static void
 merge_strings (struct sec_merge_info *sinfo)
 {
-  struct sec_merge_hash_entry **array, **a, **end, *e;
+  struct sec_merge_hash_entry **array, **a, *e;
  struct sec_merge_sec_info *secinfo;
-  htab_t lasttab = NULL, last4tab = NULL;
  bfd_size_type size, amt;
+  unsigned int alignment = 0;

-  /* Now sort the strings by length, longest first.  */
-  array = NULL;
+  /* Now sort the strings */
  amt = sinfo->htab->size * sizeof (struct sec_merge_hash_entry *);
  array = (struct sec_merge_hash_entry **) bfd_malloc (amt);
  if (array == NULL)
@ -595,90 +586,50 @@ merge_strings (struct sec_merge_info *sinfo)

  for (e = sinfo->htab->first, a = array; e; e = e->next)
    if (e->alignment)
-      *a++ = e;
+      {
+	*a++ = e;
+	/* Adjust the length to not include the zero terminator.  */
+	e->len -= sinfo->htab->entsize;
+	if (alignment != e->alignment)
+	  {
+	    if (alignment == 0)
+	      alignment = e->alignment;
+	    else
+	      alignment = (unsigned) -1;
+	  }
+      }

  sinfo->htab->size = a - array;
-
-  qsort (array, (size_t) sinfo->htab->size,
-	 sizeof (struct sec_merge_hash_entry *), cmplengthentry);
-
-  last4tab = htab_create_alloc ((size_t) sinfo->htab->size * 4,
-				NULL, last4_eq, NULL, calloc, free);
-  lasttab = htab_create_alloc ((size_t) sinfo->htab->size * 4,
-			       NULL, last_eq, NULL, calloc, free);
-  if (lasttab == NULL || last4tab == NULL)
-    goto alloc_failure;
-
-  /* Now insert the strings into hash tables (strings with last 4 characters
-     and strings with last character equal), look for longer strings which
-     we're suffix of.  */
-  for (a = array, end = array + sinfo->htab->size; a < end; a++)
+  if (sinfo->htab->size != 0)
    {
-      register hashval_t hash;
-      unsigned int c;
-      unsigned int i;
-      const unsigned char *s;
-      void **p;
+      qsort (array, (size_t) sinfo->htab->size,
+	     sizeof (struct sec_merge_hash_entry *),
+	     (alignment != (unsigned) -1 && alignment > sinfo->htab->entsize
+	      ? strrevcmp_align : strrevcmp));

-      e = *a;
-      e->u.entsize = sinfo->htab->entsize;
-      if (e->len <= e->u.entsize)
-	break;
-      if (e->len > 4 * e->u.entsize)
+      /* Loop over the sorted array and merge suffixes */
+      e = *--a;
+      e->len += sinfo->htab->entsize;
+      while (--a >= array)
 	{
-	  s = (const unsigned char *) (e->root.string + e->len - e->u.entsize);
-	  hash = 0;
-	  for (i = 0; i < 4 * e->u.entsize; i++)
-	    {
-	      c = *--s;
-	      hash += c + (c << 17);
-	      hash ^= hash >> 2;
-	    }
-	  p = htab_find_slot_with_hash (last4tab, e, hash, INSERT);
-	  if (p == NULL)
-	    goto alloc_failure;
-	  if (*p)
-	    {
-	      struct sec_merge_hash_entry *ent;
+	  struct sec_merge_hash_entry *cmp = *a;

-	      ent = (struct sec_merge_hash_entry *) *p;
-	      e->u.suffix = ent;
-	      e->alignment = 0;
-	      continue;
+	  cmp->len += sinfo->htab->entsize;
+	  if (e->alignment >= cmp->alignment
+	      && !((e->len - cmp->len) & (cmp->alignment - 1))
+	      && is_suffix (e, cmp))
+	    {
+	      cmp->u.suffix = e;
+	      cmp->alignment = 0;
 	    }
 	  else
-	    *p = e;
+	    e = cmp;
 	}
-      s = (const unsigned char *) (e->root.string + e->len - e->u.entsize);
-      hash = 0;
-      for (i = 0; i < e->u.entsize; i++)
-	{
-	  c = *--s;
-	  hash += c + (c << 17);
-	  hash ^= hash >> 2;
-	}
-      p = htab_find_slot_with_hash (lasttab, e, hash, INSERT);
-      if (p == NULL)
-	goto alloc_failure;
-      if (*p)
-	{
-	  struct sec_merge_hash_entry *ent;
-
-	  ent = (struct sec_merge_hash_entry *) *p;
-	  e->u.suffix = ent;
-	  e->alignment = 0;
-	}
-      else
-	*p = e;
    }

 alloc_failure:
  if (array)
    free (array);
-  if (lasttab)
-    htab_delete (lasttab);
-  if (last4tab)
-    htab_delete (last4tab);

  /* Now assign positions to the strings we want to keep.  */
  size = 0;