2003-11-07 Andrew Cagney <cagney@redhat.com>
* bcache.h: Update copyright. Add comments on bcache VS hashtab. * bcache.c (struct bstring): Make "length" an unsigned short, add "half_hash". (struct bcache): Add "half_hash_error_count". (bcache): Compute and save the "half_hash". Compare the "half_hash" before comparing the length. Update half_hash_error_count.
This commit is contained in:
parent
f168dd8007
commit
49df298f1f
3 changed files with 131 additions and 19 deletions
|
@ -1,3 +1,13 @@
|
|||
2003-11-07 Andrew Cagney <cagney@redhat.com>
|
||||
|
||||
* bcache.h: Update copyright. Add comments on bcache VS hashtab.
|
||||
* bcache.c (struct bstring): Make "length" an unsigned short, add
|
||||
"half_hash".
|
||||
(struct bcache): Add "half_hash_error_count".
|
||||
(bcache): Compute and save the "half_hash". Compare the
|
||||
"half_hash" before comparing the length. Update
|
||||
half_hash_error_count.
|
||||
|
||||
2003-11-07 Andrew Cagney <cagney@redhat.com>
|
||||
|
||||
* inftarg.c (child_xfer_partial): New function
|
||||
|
|
39
gdb/bcache.c
39
gdb/bcache.c
|
@ -38,8 +38,15 @@
|
|||
|
||||
struct bstring
|
||||
{
|
||||
/* Hash chain. */
|
||||
struct bstring *next;
|
||||
size_t length;
|
||||
/* Assume the data length is no more than 64k. */
|
||||
unsigned short length;
|
||||
/* The half hash hack. This contains the upper 16 bits of the hash
|
||||
value and is used as a pre-check when comparing two strings and
|
||||
avoids the need to do length or memcmp calls. It proves to be
|
||||
roughly 100% effective. */
|
||||
unsigned short half_hash;
|
||||
|
||||
union
|
||||
{
|
||||
|
@ -79,6 +86,10 @@ struct bcache
|
|||
expand_hash_count. */
|
||||
unsigned long expand_count;
|
||||
unsigned long expand_hash_count;
|
||||
/* Number of times that the half-hash compare hit (compare the upper
|
||||
16 bits of hash values) hit, but the corresponding combined
|
||||
length/data compare missed. */
|
||||
unsigned long half_hash_miss_count;
|
||||
};
|
||||
|
||||
/* The old hash function was stolen from SDBM. This is what DB 3.0 uses now,
|
||||
|
@ -187,6 +198,8 @@ expand_hash_table (struct bcache *bcache)
|
|||
void *
|
||||
bcache (const void *addr, int length, struct bcache *bcache)
|
||||
{
|
||||
unsigned long full_hash;
|
||||
unsigned short half_hash;
|
||||
int hash_index;
|
||||
struct bstring *s;
|
||||
|
||||
|
@ -197,13 +210,24 @@ bcache (const void *addr, int length, struct bcache *bcache)
|
|||
bcache->total_count++;
|
||||
bcache->total_size += length;
|
||||
|
||||
hash_index = hash (addr, length) % bcache->num_buckets;
|
||||
full_hash = hash (addr, length);
|
||||
half_hash = (full_hash >> 16);
|
||||
hash_index = full_hash % bcache->num_buckets;
|
||||
|
||||
/* Search the hash bucket for a string identical to the caller's. */
|
||||
/* Search the hash bucket for a string identical to the caller's.
|
||||
As a short-circuit first compare the upper part of each hash
|
||||
values. */
|
||||
for (s = bcache->bucket[hash_index]; s; s = s->next)
|
||||
if (s->length == length
|
||||
&& ! memcmp (&s->d.data, addr, length))
|
||||
return &s->d.data;
|
||||
{
|
||||
if (s->half_hash == half_hash)
|
||||
{
|
||||
if (s->length == length
|
||||
&& ! memcmp (&s->d.data, addr, length))
|
||||
return &s->d.data;
|
||||
else
|
||||
bcache->half_hash_miss_count++;
|
||||
}
|
||||
}
|
||||
|
||||
/* The user's string isn't in the list. Insert it after *ps. */
|
||||
{
|
||||
|
@ -212,6 +236,7 @@ bcache (const void *addr, int length, struct bcache *bcache)
|
|||
memcpy (&new->d.data, addr, length);
|
||||
new->length = length;
|
||||
new->next = bcache->bucket[hash_index];
|
||||
new->half_hash = half_hash;
|
||||
bcache->bucket[hash_index] = new;
|
||||
|
||||
bcache->unique_count++;
|
||||
|
@ -378,6 +403,8 @@ print_bcache_statistics (struct bcache *c, char *type)
|
|||
c->expand_count);
|
||||
printf_filtered (" Hash table hashes: %lu\n",
|
||||
c->total_count + c->expand_hash_count);
|
||||
printf_filtered (" Half hash misses: %lu\n",
|
||||
c->half_hash_miss_count);
|
||||
printf_filtered (" Hash table population: ");
|
||||
print_percentage (occupied_buckets, c->num_buckets);
|
||||
printf_filtered (" Median hash chain length: %3d\n",
|
||||
|
|
101
gdb/bcache.h
101
gdb/bcache.h
|
@ -2,7 +2,7 @@
|
|||
Written by Fred Fish <fnf@cygnus.com>
|
||||
Rewritten by Jim Blandy <jimb@cygnus.com>
|
||||
|
||||
Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
|
||||
Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GDB.
|
||||
|
||||
|
@ -48,20 +48,95 @@
|
|||
You shouldn't modify the strings you get from a bcache, because:
|
||||
|
||||
- You don't necessarily know who you're sharing space with. If I
|
||||
stick eight bytes of text in a bcache, and then stick an
|
||||
eight-byte structure in the same bcache, there's no guarantee
|
||||
those two objects don't actually comprise the same sequence of
|
||||
bytes. If they happen to, the bcache will use a single byte
|
||||
string for both of them. Then, modifying the structure will
|
||||
change the string. In bizarre ways.
|
||||
stick eight bytes of text in a bcache, and then stick an eight-byte
|
||||
structure in the same bcache, there's no guarantee those two
|
||||
objects don't actually comprise the same sequence of bytes. If
|
||||
they happen to, the bcache will use a single byte string for both
|
||||
of them. Then, modifying the structure will change the string. In
|
||||
bizarre ways.
|
||||
|
||||
- Even if you know for some other reason that all that's okay,
|
||||
there's another problem. A bcache stores all its strings in a
|
||||
hash table. If you modify a string's contents, you will probably
|
||||
change its hash value. This means that the modified string is
|
||||
now in the wrong place in the hash table, and future bcache
|
||||
probes will never find it. So by mutating a string, you give up
|
||||
any chance of sharing its space with future duplicates. */
|
||||
there's another problem. A bcache stores all its strings in a hash
|
||||
table. If you modify a string's contents, you will probably change
|
||||
its hash value. This means that the modified string is now in the
|
||||
wrong place in the hash table, and future bcache probes will never
|
||||
find it. So by mutating a string, you give up any chance of
|
||||
sharing its space with future duplicates.
|
||||
|
||||
|
||||
Size of bcache VS hashtab:
|
||||
|
||||
For bcache, the most critical cost is size (or more exactly the
|
||||
overhead added by the bcache). It turns out that the bcache is
|
||||
remarkably efficient.
|
||||
|
||||
Assuming a 32-bit system (the hash table slots are 4 bytes),
|
||||
ignoring alignment, and limit strings to 255 bytes (1 byte length)
|
||||
we get ...
|
||||
|
||||
bcache: This uses a separate linked list to track the hash chain.
|
||||
The numbers show roughly 100% occupancy of the hash table and an
|
||||
average chain length of 4. Spreading the slot cost over the 4
|
||||
chain elements:
|
||||
|
||||
4 (slot) / 4 (chain length) + 1 (length) + 4 (chain) = 6 bytes
|
||||
|
||||
hashtab: This uses a more traditional re-hash algorithm where the
|
||||
chain is maintained within the hash table. The table occupancy is
|
||||
kept below 75% but we'll assume its perfect:
|
||||
|
||||
4 (slot) x 4/3 (occupancy) + 1 (length) = 6 1/3 bytes
|
||||
|
||||
So a perfect hashtab has just slightly larger than an average
|
||||
bcache.
|
||||
|
||||
It turns out that an average hashtab is far worse. Two things
|
||||
hurt:
|
||||
|
||||
- Hashtab's occupancy is more like 50% (it ranges between 38% and
|
||||
75%) giving a per slot cost of 4x2 vs 4x4/3.
|
||||
|
||||
- the string structure needs to be aligned to 8 bytes which for
|
||||
hashtab wastes 7 bytes, while for bcache wastes only 3.
|
||||
|
||||
This gives:
|
||||
|
||||
hashtab: 4 x 2 + 1 + 7 = 16 bytes
|
||||
|
||||
bcache 4 / 4 + 1 + 4 + 3 = 9 bytes
|
||||
|
||||
The numbers of GDB debugging GDB support this. ~40% vs ~70% overhead.
|
||||
|
||||
|
||||
Speed of bcache VS hashtab (the half hash hack):
|
||||
|
||||
While hashtab has a typical chain length of 1, bcache has a chain
|
||||
length of round 4. This means that the bcache will require
|
||||
something like double the number of compares after that initial
|
||||
hash. In both cases the comparison takes the form:
|
||||
|
||||
a.length == b.length && memcmp (a.data, b.data, a.length) == 0
|
||||
|
||||
That is lengths are checked before doing the memcmp.
|
||||
|
||||
For GDB debugging GDB, it turned out that all lengths were 24 bytes
|
||||
(no C++ so only psymbols were cached) and hence, all compares
|
||||
required a call to memcmp. As a hack, two bytes of padding
|
||||
(mentioned above) are used to store the upper 16 bits of the
|
||||
string's hash value and then that is used in the comparison vis:
|
||||
|
||||
a.half_hash == b.half_hash && a.length == b.length && memcmp
|
||||
(a.data, b.data, a.length)
|
||||
|
||||
The numbers from GDB debugging GDB show this to be a remarkable
|
||||
100% effective (only necessary length and memcmp tests being
|
||||
performed).
|
||||
|
||||
Mind you, looking at the wall clock, the same GDB debugging GDB
|
||||
showed only marginal speed up (0.780 vs 0.773s). Seems GDB is too
|
||||
busy doing something else :-(
|
||||
|
||||
*/
|
||||
|
||||
|
||||
struct bcache;
|
||||
|
|
Loading…
Reference in a new issue