* elf32-spu.c (spu_elf_size_stubs): Revert 2008-01-28 doubling
	of _ovly_buf_table size.
	(spu_elf_build_stubs): Use low bit of .size as "present" bit.
	Adjust initialisations relating to _ovly_buf_table.
ld/
	* emultempl/spu_ovl.S: Use low bit of _ovly_table.size as
	a "present" bit rather than low bit of .buf.  Correct indexing
	into _ovly_buf_table.  Use relative loads and stores to access
	overlay manager local vars.
	* emultempl/spu_ovl.o: Regenerate.
This commit is contained in:
Alan Modra 2008-02-07 01:26:56 +00:00
parent 464f5c14b7
commit 2e444beaa0
5 changed files with 50 additions and 32 deletions

View file

@ -1,3 +1,10 @@
2008-02-07 Alan Modra <amodra@bigpond.net.au>
* elf32-spu.c (spu_elf_size_stubs): Revert 2008-01-28 doubling
of _ovly_buf_table size.
(spu_elf_build_stubs): Use low bit of .size as "present" bit.
Adjust initialisations relating to _ovly_buf_table.
2008-02-04 Bob Wilson <bob.wilson@acm.org> 2008-02-04 Bob Wilson <bob.wilson@acm.org>
* elf32-xtensa (elf_xtensa_relocate_section): After finding an invalid * elf32-xtensa (elf_xtensa_relocate_section): After finding an invalid

View file

@ -1202,7 +1202,7 @@ spu_elf_size_stubs (bfd *output_bfd,
|| !bfd_set_section_alignment (ibfd, htab->ovtab, 4)) || !bfd_set_section_alignment (ibfd, htab->ovtab, 4))
return 0; return 0;
htab->ovtab->size = htab->num_overlays * 16 + 16 + htab->num_buf * 2 * 4; htab->ovtab->size = htab->num_overlays * 16 + 16 + htab->num_buf * 4;
(*place_spu_section) (htab->ovtab, NULL, ".data"); (*place_spu_section) (htab->ovtab, NULL, ".data");
htab->toe = bfd_make_section_anyway_with_flags (ibfd, ".toe", SEC_ALLOC); htab->toe = bfd_make_section_anyway_with_flags (ibfd, ".toe", SEC_ALLOC);
@ -1373,8 +1373,8 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms)
/* Write out _ovly_table. */ /* Write out _ovly_table. */
p = htab->ovtab->contents; p = htab->ovtab->contents;
/* set low bit of .buf to mark non-overlay area as present. */ /* set low bit of .size to mark non-overlay area as present. */
p[15] = 1; p[7] = 1;
for (s = obfd->sections; s != NULL; s = s->next) for (s = obfd->sections; s != NULL; s = s->next)
{ {
unsigned int ovl_index = spu_elf_section_data (s)->u.o.ovl_index; unsigned int ovl_index = spu_elf_section_data (s)->u.o.ovl_index;
@ -1387,7 +1387,7 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms)
bfd_put_32 (htab->ovtab->owner, s->vma, p + off); bfd_put_32 (htab->ovtab->owner, s->vma, p + off);
bfd_put_32 (htab->ovtab->owner, (s->size + 15) & -16, p + off + 4); bfd_put_32 (htab->ovtab->owner, (s->size + 15) & -16, p + off + 4);
/* file_off written later in spu_elf_modify_program_headers. */ /* file_off written later in spu_elf_modify_program_headers. */
bfd_put_32 (htab->ovtab->owner, ovl_buf * 2, p + off + 12); bfd_put_32 (htab->ovtab->owner, ovl_buf, p + off + 12);
} }
} }
@ -1407,12 +1407,12 @@ spu_elf_build_stubs (struct bfd_link_info *info, int emit_syms)
if (h == NULL) if (h == NULL)
return FALSE; return FALSE;
h->root.u.def.value = htab->num_overlays * 16 + 16; h->root.u.def.value = htab->num_overlays * 16 + 16;
h->size = htab->num_buf * 2 * 4; h->size = htab->num_buf * 4;
h = define_ovtab_symbol (htab, "_ovly_buf_table_end"); h = define_ovtab_symbol (htab, "_ovly_buf_table_end");
if (h == NULL) if (h == NULL)
return FALSE; return FALSE;
h->root.u.def.value = htab->num_overlays * 16 + 16 + htab->num_buf * 2 * 4; h->root.u.def.value = htab->num_overlays * 16 + 16 + htab->num_buf * 4;
h->size = 0; h->size = 0;
h = define_ovtab_symbol (htab, "_EAR_"); h = define_ovtab_symbol (htab, "_EAR_");

View file

@ -1,3 +1,11 @@
2008-02-07 Alan Modra <amodra@bigpond.net.au>
* emultempl/spu_ovl.S: Use low bit of _ovly_table.size as
a "present" bit rather than low bit of .buf. Correct indexing
into _ovly_buf_table. Use relative loads and stores to access
overlay manager local vars.
* emultempl/spu_ovl.o: Regenerate.
2008-02-04 H.J. Lu <hongjiu.lu@intel.com> 2008-02-04 H.J. Lu <hongjiu.lu@intel.com>
PR 5715 PR 5715

View file

@ -1,6 +1,6 @@
/* Overlay manager for SPU. /* Overlay manager for SPU.
Copyright 2006, 2007 Free Software Foundation, Inc. Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
This file is part of the GNU Binutils. This file is part of the GNU Binutils.
@ -46,12 +46,13 @@
#define cgbits reserved2 #define cgbits reserved2
#define off3 reserved2 #define off3 reserved2
#define off4 reserved2 #define off4 reserved2
#define addr4 reserved2
#define off5 reserved2 #define off5 reserved2
#define tagstat reserved2 #define tagstat reserved2
#define reserved3 $77 #define reserved3 $77
#define buf1 reserved3 #define size1 reserved3
#define buf2 reserved3 #define size2 reserved3
#define rv3 reserved3 #define rv3 reserved3
#define ealo reserved3 #define ealo reserved3
#define cmd reserved3 #define cmd reserved3
@ -145,18 +146,18 @@ __ovly_return:
#nop; lnop #nop; lnop
#nop; lnop #nop; lnop
#nop #nop
rotqbyi buf1, vma, 12 # 1,4 14 rotqbyi size1, vma, 4 # 1,4 14
#nop #nop
stqd save3, -48($sp) # 1,6 15 stqd save3, -48($sp) # 1,6 15
#nop #nop
stqd save2, -32($sp) # 1,6 16 stqd save2, -32($sp) # 1,6 16
#nop #nop
stqd save1, -16($sp) # 1,6 17 stqd save1, -16($sp) # 1,6 17
andi present1, buf1, 1 # 0,2 18 andi present1, size1, 1 # 0,2 18
stqd ovl, (__ovly_current - __ovly_return)($lr) # 1,6 18 stqr ovl, __ovly_current # 1,6 18
#nop; lnop #nop; lnop
#nop #nop
brz present1, __ovly_load_event # 1,4 20 brz present1, do_load # 1,4 20
ovly_ret9: ovly_ret9:
#nop #nop
bi target # 1,4 21 bi target # 1,4 21
@ -197,11 +198,11 @@ __ovly_load:
#lnop #lnop
#nop; lnop #nop; lnop
#nop #nop
lqd cur, (__ovly_current - __ovly_return)(rv1) # 1,6 2 lqr cur, __ovly_current # 1,6 2
shli off2, ovl, 4 # 0,4 3 shli off2, ovl, 4 # 0,4 3
stqd ovl, (__ovly_current - __ovly_return)(rv1) # 1,6 3 stqr ovl, __ovly_current # 1,6 3
ceq rv2, $lr, rv1 # 0,2 4 ceq rv2, $lr, rv1 # 0,2 4
lqd rv3, (__rv_pattern - __ovly_return)(rv1) # 1,6 4 lqr rv3, __rv_pattern # 1,6 4
#nop; lnop #nop; lnop
#nop; lnop #nop; lnop
#nop #nop
@ -214,11 +215,11 @@ __ovly_load:
ila rv1, __ovly_return # 0,2 1 ila rv1, __ovly_return # 0,2 1
stqd save2, -32($sp) # 1,6 1 stqd save2, -32($sp) # 1,6 1
shli off2, ovl, 4 # 0,4 2 shli off2, ovl, 4 # 0,4 2
lqa cur, __ovly_current # 1,6 2 lqr cur, __ovly_current # 1,6 2
nop nop
stqa ovl, __ovly_current # 1,6 3 stqr ovl, __ovly_current # 1,6 3
ceq rv2, $lr, rv1 # 0,2 4 ceq rv2, $lr, rv1 # 0,2 4
lqd rv3, (__rv_pattern - __ovly_return)(rv1) # 1,6 4 lqr rv3, __rv_pattern # 1,6 4
#nop #nop
hbr ovly_load9, target # 1,15 5 hbr ovly_load9, target # 1,15 5
#nop #nop
@ -237,18 +238,18 @@ __ovly_load:
#nop #nop
rotqmbyi rv6, $lr, -8 # 1,4 12 rotqmbyi rv6, $lr, -8 # 1,4 12
#nop #nop
rotqbyi buf2, vma, 12 # 1,4 13 rotqbyi size2, vma, 4 # 1,4 13
#nop #nop
lqd save3, -48($sp) # 1,6 14 lqd save3, -48($sp) # 1,6 14
#nop; lnop #nop; lnop
or rv7, rv4, rv6 # 0,2 16 or rv7, rv4, rv6 # 0,2 16
lqd save2, -32($sp) # 1,6 16 lqd save2, -32($sp) # 1,6 16
andi present2, buf2, 1 # 0,2 17 andi present2, size2, 1 # 0,2 17
lnop # 1,0 17 lnop # 1,0 17
selb $lr, rv7, $lr, rv5 # 0,2 18 selb $lr, rv7, $lr, rv5 # 0,2 18
lqd save1, -16($sp) # 1,6 18 lqd save1, -16($sp) # 1,6 18
#nop #nop
brz present2, __ovly_load_event # 1,4 19 brz present2, do_load # 1,4 19
ovly_load9: ovly_load9:
#nop #nop
bi target # 1,4 20 bi target # 1,4 20
@ -266,6 +267,7 @@ ovly_load9:
.global __ovly_load_event .global __ovly_load_event
.type __ovly_load_event, @function .type __ovly_load_event, @function
__ovly_load_event: __ovly_load_event:
do_load:
#nop #nop
rotqbyi sz, vma, 8 # 1,4 0 rotqbyi sz, vma, 8 # 1,4 0
#nop #nop
@ -273,7 +275,7 @@ __ovly_load_event:
#nop #nop
lqa ea64, _EAR_ # 1,6 2 lqa ea64, _EAR_ # 1,6 2
#nop #nop
lqd cgshuf, (__cg_pattern - __ovly_return)($lr) # 1,6 3 lqr cgshuf, __cg_pattern # 1,6 3
/* We could predict the branch at the end of this loop by adding a few /* We could predict the branch at the end of this loop by adding a few
instructions, and there are plenty of free cycles to do so without instructions, and there are plenty of free cycles to do so without
@ -316,13 +318,13 @@ __ovly_xfer_loop:
brnz osize, __ovly_xfer_loop # 1,4 24 brnz osize, __ovly_xfer_loop # 1,4 24
/* Now update our data structions while waiting for DMA to complete. /* Now update our data structions while waiting for DMA to complete.
Low bit of .buf needs to be cleared on the _ovly_table entry Low bit of .size needs to be cleared on the _ovly_table entry
corresponding to the evicted overlay, and set on the entry for the corresponding to the evicted overlay, and set on the entry for the
newly loaded overlay. Note that no overlay may in fact be evicted newly loaded overlay. Note that no overlay may in fact be evicted
as _ovly_buf_table[] starts with all zeros. Don't zap .buf entry as _ovly_buf_table[] starts with all zeros. Don't zap .size entry
for zero index! Also of course update the _ovly_buf_table entry. */ for zero index! Also of course update the _ovly_buf_table entry. */
#nop #nop
lqd newovl, (__ovly_current - __ovly_return)($lr) # 1,6 25 lqr newovl, __ovly_current # 1,6 25
#nop; lnop #nop; lnop
#nop; lnop #nop; lnop
#nop; lnop #nop; lnop
@ -333,7 +335,7 @@ __ovly_xfer_loop:
ila tab3, _ovly_table - 16 # 0,2 32 ila tab3, _ovly_table - 16 # 0,2 32
#lnop #lnop
#nop #nop
fsmbi pbyte, 1 # 1,4 33 fsmbi pbyte, 0x100 # 1,4 33
#nop; lnop #nop; lnop
#nop #nop
lqx vma, tab3, off3 # 1,6 35 lqx vma, tab3, off3 # 1,6 35
@ -351,7 +353,7 @@ __ovly_xfer_loop:
#nop; lnop #nop; lnop
shli off4, buf3, 2 # 1,4 45 shli off4, buf3, 2 # 1,4 45
#lnop #lnop
ila tab4, _ovly_buf_table # 0,2 46 ila tab4, _ovly_buf_table - 4 # 0,2 46
#lnop #lnop
#nop; lnop #nop; lnop
#nop; lnop #nop; lnop
@ -359,13 +361,14 @@ __ovly_xfer_loop:
lqx map, tab4, off4 # 1,6 49 lqx map, tab4, off4 # 1,6 49
#nop #nop
cwx genwi, tab4, off4 # 1,4 50 cwx genwi, tab4, off4 # 1,4 50
#nop; lnop a addr4, tab4, off4 # 0,2 51
#lnop
#nop; lnop #nop; lnop
#nop; lnop #nop; lnop
#nop; lnop #nop; lnop
#nop #nop
rotqby oldovl, map, off4 # 1,4 55 rotqby oldovl, map, addr4 # 1,4 55
nop #nop
shufb newmap, newovl, map, genwi # 0,4 56 shufb newmap, newovl, map, genwi # 0,4 56
#if MFC_TAG_ID < 16 #if MFC_TAG_ID < 16
ila newmask, 1 << MFC_TAG_ID # 0,2 57 ila newmask, 1 << MFC_TAG_ID # 0,2 57
@ -375,7 +378,7 @@ __ovly_xfer_loop:
#lnop #lnop
#nop; lnop #nop; lnop
#nop; lnop #nop; lnop
stqx newmap, tab4, off4 # 1,6 60 stqd newmap, 0(addr4) # 1,6 60
/* Save app's tagmask, wait for DMA complete, restore mask. */ /* Save app's tagmask, wait for DMA complete, restore mask. */
ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61 ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61

Binary file not shown.