old-cross-binutils/ld/testsuite/ld-x86-64/tlspic3.s
H.J. Lu e2cbcd9156 Support x86-64 TLS code sequences without PLT
We can generate x86-64 TLS code sequences for general and local dynamic
models without PLT, which uses indirect call via GOT:

call *__tls_get_addr@GOTPCREL(%rip)

instead of direct call:

call __tls_get_addr[@PLT]

Since direct call is 4-byte long and indirect call, is 5-byte long, the
extra one byte must be handled properly.

For general dynamic model, one 0x66 prefix before call instruction is
removed to make room for indirect call.  For local dynamic model, we
simply use 5-byte indirect call.

TLS linker optimization is updated to recognize new instruction patterns.
For local dynamic model to local exec model transition, we generate
4 0x66 prefixes, instead of 3, before mov instruction in 64-bit and
generate a 5-byte nop, instead of 4-byte, before mov instruction in
32-bit.  Since linker may convert

call *__tls_get_addr@GOTPCREL(%rip)

to

addr32 call __tls_get_addr

when producing static executable, both patterns are recognized.

bfd/

	* elf64-x86-64.c (elf_x86_64_link_hash_entry): Add tls_get_addr.
	(elf_x86_64_link_hash_newfunc): Initialize tls_get_addr to 2.
	(elf_x86_64_check_tls_transition): Check indirect call and
	direct call with the addr32 prefix for general and local dynamic
	models.  Set the tls_get_addr feild.
	(elf_x86_64_convert_load_reloc): Always use addr32 prefix for
	indirect __tls_get_addr call via GOT.
	(elf_x86_64_relocate_section): Handle GD->LE, GD->IE and LD->LE
	transitions with indirect call and direct call with the addr32
	prefix.

ld/

	* testsuite/ld-x86-64/pass.out: New file.
	* testsuite/ld-x86-64/tls-def1.c: Likewise.
	* testsuite/ld-x86-64/tls-gd1.S: Likewise.
	* testsuite/ld-x86-64/tls-ld1.S: Likewise.
	* testsuite/ld-x86-64/tls-main1.c: Likewise.
	* testsuite/ld-x86-64/tls.exp: Likewise.
	* testsuite/ld-x86-64/tlsbin2-nacl.rd: Likewise.
	* testsuite/ld-x86-64/tlsbin2.dd: Likewise.
	* testsuite/ld-x86-64/tlsbin2.rd: Likewise.
	* testsuite/ld-x86-64/tlsbin2.sd: Likewise.
	* testsuite/ld-x86-64/tlsbin2.td: Likewise.
	* testsuite/ld-x86-64/tlsbinpic2.s: Likewise.
	* testsuite/ld-x86-64/tlsgd10.dd: Likewise.
	* testsuite/ld-x86-64/tlsgd10.s: Likewise.
	* testsuite/ld-x86-64/tlsgd11.dd: Likewise.
	* testsuite/ld-x86-64/tlsgd11.s: Likewise.
	* testsuite/ld-x86-64/tlsgd12.d: Likewise.
	* testsuite/ld-x86-64/tlsgd12.s: Likewise.
	* testsuite/ld-x86-64/tlsgd13.d: Likewise.
	* testsuite/ld-x86-64/tlsgd13.s: Likewise.
	* testsuite/ld-x86-64/tlsgd14.dd: Likewise.
	* testsuite/ld-x86-64/tlsgd14.s: Likewise.
	* testsuite/ld-x86-64/tlsgd5c.s: Likewise.
	* testsuite/ld-x86-64/tlsgd6c.s: Likewise.
	* testsuite/ld-x86-64/tlsgd9.dd: Likewise.
	* testsuite/ld-x86-64/tlsgd9.s: Likewise.
	* testsuite/ld-x86-64/tlsld4.dd: Likewise.
	* testsuite/ld-x86-64/tlsld4.s: Likewise.
	* testsuite/ld-x86-64/tlsld5.dd: Likewise.
	* testsuite/ld-x86-64/tlsld5.s: Likewise.
	* testsuite/ld-x86-64/tlsld6.dd: Likewise.
	* testsuite/ld-x86-64/tlsld6.s: Likewise.
	* testsuite/ld-x86-64/tlspic2-nacl.rd: Likewise.
	* testsuite/ld-x86-64/tlspic2.dd: Likewise.
	* testsuite/ld-x86-64/tlspic2.rd: Likewise.
	* testsuite/ld-x86-64/tlspic2.sd: Likewise.
	* testsuite/ld-x86-64/tlspic2.td: Likewise.
	* testsuite/ld-x86-64/tlspic3.s: Likewise.
	* testsuite/ld-x86-64/tlspie2.s: Likewise.
	* testsuite/ld-x86-64/tlspie2a.d: Likewise.
	* testsuite/ld-x86-64/tlspie2b.d: Likewise.
	* testsuite/ld-x86-64/tlspie2c.d: Likewise.
	* testsuite/ld-x86-64/tlsgd5.dd: Updated.
	* testsuite/ld-x86-64/tlsgd6.dd: Likewise.
	* testsuite/ld-x86-64/x86-64.exp: Run libtlspic2.so, tlsbin2,
	tlsgd5b, tlsgd6b, tlsld4, tlsld5, tlsld6, tlsgd9, tlsgd10,
	tlsgd11, tlsgd14, tlsgd12, tlsgd13, tlspie2a, tlspie2b and
	tlspie2c.
2016-06-06 11:07:16 -07:00

290 lines
5.9 KiB
ArmAsm

/* Force .data aligned to 4K, so .got very likely gets at 0x102190
(0x60 bytes .tdata and 0x130 bytes .dynamic) */
.data
.balign 4096
.section ".tdata", "awT", @progbits
.globl sg1, sg2, sg3, sg4, sg5, sg6, sg7, sg8
.globl sh1, sh2, sh3, sh4, sh5, sh6, sh7, sh8
.hidden sh1, sh2, sh3, sh4, sh5, sh6, sh7, sh8
sg1: .long 17
sg2: .long 18
sg3: .long 19
sg4: .long 20
sg5: .long 21
sg6: .long 22
sg7: .long 23
sg8: .long 24
sl1: .long 65
sl2: .long 66
sl3: .long 67
sl4: .long 68
sl5: .long 69
sl6: .long 70
sl7: .long 71
sl8: .long 72
sh1: .long 257
sh2: .long 258
sh3: .long 259
sh4: .long 260
sh5: .long 261
sh6: .long 262
sh7: .long 263
sh8: .long 264
/* Force .text aligned to 4K, so it very likely gets at 0x1000. */
.text
.balign 4096
.globl fn1
.type fn1,@function
fn1:
pushq %rbp
movq %rsp, %rbp
nop;nop;nop;nop
/* GD */
.byte 0x66
leaq sg1@tlsgd(%rip), %rdi
.byte 0x66
rex64
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop;nop;nop
/* GD -> IE because variable is referenced through IE too */
.byte 0x66
leaq sg2@tlsgd(%rip), %rdi
.byte 0x66
rex64
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop;nop;nop
/* GD against local variable */
.byte 0x66
leaq sl1@tlsgd(%rip), %rdi
.byte 0x66
rex64
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop;nop;nop
/* GD -> IE against local variable referenced through IE too */
.byte 0x66
leaq sl2@tlsgd(%rip), %rdi
.byte 0x66
rex64
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop;nop;nop
/* GD against hidden and local variable */
.byte 0x66
leaq sh1@tlsgd(%rip), %rdi
.byte 0x66
rex64
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop;nop;nop
/* GD -> IE against hidden and local variable referenced through
IE too */
.byte 0x66
leaq sh2@tlsgd(%rip), %rdi
.byte 0x66
rex64
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop;nop;nop
/* GD against hidden but not local variable */
.byte 0x66
leaq sH1@tlsgd(%rip), %rdi
.byte 0x66
rex64
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop;nop;nop
/* GD -> IE against hidden but not local variable referenced through
IE too */
.byte 0x66
leaq sH2@tlsgd(%rip), %rdi
.byte 0x66
rex64
call *__tls_get_addr@GOTPCREL(%rip)
nop;nop;nop;nop
/* LD */
leaq sl1@tlsld(%rip), %rdi
call *__tls_get_addr@GOTPCREL(%rip)
nop
leaq sl1@dtpoff(%rax), %rdx
nop;nop
leaq 2+sl2@dtpoff(%rax), %r9
nop;nop;nop;nop
/* LD against hidden and local variables */
leaq sh1@tlsld(%rip), %rdi
call *__tls_get_addr@GOTPCREL(%rip)
nop
leaq sh1@dtpoff(%rax), %rdx
nop;nop
leaq sh2@dtpoff+3(%rax), %rcx
nop;nop;nop;nop
/* LD against hidden but not local variables */
leaq sH1@tlsld(%rip), %rdi
call *__tls_get_addr@GOTPCREL(%rip)
nop
leaq sH1@dtpoff(%rax), %r12
nop;nop
leaq sH2@dtpoff+1(%rax), %rcx
nop;nop
/* IE against global var */
movq %fs:0, %rcx
nop;nop
addq sg2@gottpoff(%rip), %rcx
nop;nop;nop;nop
/* IE against local var */
movq %fs:0, %r14
nop;nop
addq sl2@gottpoff(%rip), %r14
nop;nop;nop;nop
/* IE against hidden and local var */
movq %fs:0, %rcx
nop;nop
addq sh2@gottpoff(%rip), %rcx
nop;nop;nop;nop
/* IE against hidden but not local var */
movq %fs:0, %rcx
nop;nop
addq sH2@gottpoff(%rip), %rcx
nop;nop;nop;nop
/* Direct access through %fs */
/* IE against global var */
movq sg5@gottpoff(%rip), %rcx
nop;nop
movq %fs:(%rcx), %rdx
nop;nop;nop;nop
/* IE against local var */
movq sl5@gottpoff(%rip), %r10
nop;nop
movq %fs:(%r10), %r12
nop;nop;nop;nop
/* IE against hidden and local var */
movq sh5@gottpoff(%rip), %rdx
nop;nop
movq %fs:(%rdx), %rdx
nop;nop;nop;nop
/* IE against hidden but not local var */
movq sH5@gottpoff(%rip), %rcx
nop;nop
movq %fs:(%rcx), %rdx
nop;nop;nop;nop
1: movabsq $_GLOBAL_OFFSET_TABLE_-1b, %r11
pushq %r15
pushq %r15
leaq 1b(%rip), %r15
addq %r11, %r15
nop;nop
/* -mcmodel=large sequences */
/* -mcmodel=large GD */
leaq sg1@tlsgd(%rip), %rdi
movabsq $__tls_get_addr@pltoff, %rax
addq %r15, %rax
call *%rax
nop;nop;nop;nop
/* -mcmodel=large GD -> IE because variable is referenced through IE too */
leaq sg2@tlsgd(%rip), %rdi
movabsq $__tls_get_addr@pltoff, %rax
addq %r15, %rax
call *%rax
nop;nop;nop;nop
/* -mcmodel=large GD against local variable */
leaq sl1@tlsgd(%rip), %rdi
movabsq $__tls_get_addr@pltoff, %rax
addq %r15, %rax
call *%rax
nop;nop;nop;nop
/* -mcmodel=large GD -> IE against local variable referenced through IE too */
leaq sl2@tlsgd(%rip), %rdi
movabsq $__tls_get_addr@pltoff, %rax
addq %r15, %rax
call *%rax
nop;nop;nop;nop
/* -mcmodel=large GD against hidden and local variable */
leaq sh1@tlsgd(%rip), %rdi
movabsq $__tls_get_addr@pltoff, %rax
addq %r15, %rax
call *%rax
nop;nop;nop;nop
/* -mcmodel=large GD -> IE against hidden and local variable referenced through
IE too */
leaq sh2@tlsgd(%rip), %rdi
movabsq $__tls_get_addr@pltoff, %rax
addq %r15, %rax
call *%rax
nop;nop;nop;nop
/* -mcmodel=large GD against hidden but not local variable */
leaq sH1@tlsgd(%rip), %rdi
movabsq $__tls_get_addr@pltoff, %rax
addq %r15, %rax
call *%rax
nop;nop;nop;nop
/* -mcmodel=large GD -> IE against hidden but not local variable referenced through
IE too */
leaq sH2@tlsgd(%rip), %rdi
movabsq $__tls_get_addr@pltoff, %rax
addq %r15, %rax
call *%rax
nop;nop;nop;nop
/* -mcmodel=large LD */
leaq sl1@tlsld(%rip), %rdi
movabsq $__tls_get_addr@pltoff, %rax
addq %r15, %rax
call *%rax
nop;nop
leaq sl1@dtpoff(%rax), %rdx
nop;nop
leaq 2+sl2@dtpoff(%rax), %r9
nop;nop;nop;nop
/* -mcmodel=large LD against hidden and local variables */
leaq sh1@tlsld(%rip), %rdi
movabsq $__tls_get_addr@pltoff, %rax
addq %r15, %rax
call *%rax
nop;nop
leaq sh1@dtpoff(%rax), %rdx
nop;nop
leaq sh2@dtpoff+3(%rax), %rcx
nop;nop;nop;nop
/* -mcmodel=large LD against hidden but not local variables */
leaq sH1@tlsld(%rip), %rdi
movabsq $__tls_get_addr@pltoff, %rax
addq %r15, %rax
call *%rax
nop;nop
leaq sH1@dtpoff(%rax), %r12
nop;nop
leaq sH2@dtpoff+1(%rax), %rcx
nop;nop
popq %r15
popq %r15
leave
ret