e2cbcd9156
We can generate x86-64 TLS code sequences for general and local dynamic models without PLT, which uses indirect call via GOT: call *__tls_get_addr@GOTPCREL(%rip) instead of direct call: call __tls_get_addr[@PLT] Since direct call is 4-byte long and indirect call, is 5-byte long, the extra one byte must be handled properly. For general dynamic model, one 0x66 prefix before call instruction is removed to make room for indirect call. For local dynamic model, we simply use 5-byte indirect call. TLS linker optimization is updated to recognize new instruction patterns. For local dynamic model to local exec model transition, we generate 4 0x66 prefixes, instead of 3, before mov instruction in 64-bit and generate a 5-byte nop, instead of 4-byte, before mov instruction in 32-bit. Since linker may convert call *__tls_get_addr@GOTPCREL(%rip) to addr32 call __tls_get_addr when producing static executable, both patterns are recognized. bfd/ * elf64-x86-64.c (elf_x86_64_link_hash_entry): Add tls_get_addr. (elf_x86_64_link_hash_newfunc): Initialize tls_get_addr to 2. (elf_x86_64_check_tls_transition): Check indirect call and direct call with the addr32 prefix for general and local dynamic models. Set the tls_get_addr feild. (elf_x86_64_convert_load_reloc): Always use addr32 prefix for indirect __tls_get_addr call via GOT. (elf_x86_64_relocate_section): Handle GD->LE, GD->IE and LD->LE transitions with indirect call and direct call with the addr32 prefix. ld/ * testsuite/ld-x86-64/pass.out: New file. * testsuite/ld-x86-64/tls-def1.c: Likewise. * testsuite/ld-x86-64/tls-gd1.S: Likewise. * testsuite/ld-x86-64/tls-ld1.S: Likewise. * testsuite/ld-x86-64/tls-main1.c: Likewise. * testsuite/ld-x86-64/tls.exp: Likewise. * testsuite/ld-x86-64/tlsbin2-nacl.rd: Likewise. * testsuite/ld-x86-64/tlsbin2.dd: Likewise. * testsuite/ld-x86-64/tlsbin2.rd: Likewise. * testsuite/ld-x86-64/tlsbin2.sd: Likewise. * testsuite/ld-x86-64/tlsbin2.td: Likewise. * testsuite/ld-x86-64/tlsbinpic2.s: Likewise. * testsuite/ld-x86-64/tlsgd10.dd: Likewise. * testsuite/ld-x86-64/tlsgd10.s: Likewise. * testsuite/ld-x86-64/tlsgd11.dd: Likewise. * testsuite/ld-x86-64/tlsgd11.s: Likewise. * testsuite/ld-x86-64/tlsgd12.d: Likewise. * testsuite/ld-x86-64/tlsgd12.s: Likewise. * testsuite/ld-x86-64/tlsgd13.d: Likewise. * testsuite/ld-x86-64/tlsgd13.s: Likewise. * testsuite/ld-x86-64/tlsgd14.dd: Likewise. * testsuite/ld-x86-64/tlsgd14.s: Likewise. * testsuite/ld-x86-64/tlsgd5c.s: Likewise. * testsuite/ld-x86-64/tlsgd6c.s: Likewise. * testsuite/ld-x86-64/tlsgd9.dd: Likewise. * testsuite/ld-x86-64/tlsgd9.s: Likewise. * testsuite/ld-x86-64/tlsld4.dd: Likewise. * testsuite/ld-x86-64/tlsld4.s: Likewise. * testsuite/ld-x86-64/tlsld5.dd: Likewise. * testsuite/ld-x86-64/tlsld5.s: Likewise. * testsuite/ld-x86-64/tlsld6.dd: Likewise. * testsuite/ld-x86-64/tlsld6.s: Likewise. * testsuite/ld-x86-64/tlspic2-nacl.rd: Likewise. * testsuite/ld-x86-64/tlspic2.dd: Likewise. * testsuite/ld-x86-64/tlspic2.rd: Likewise. * testsuite/ld-x86-64/tlspic2.sd: Likewise. * testsuite/ld-x86-64/tlspic2.td: Likewise. * testsuite/ld-x86-64/tlspic3.s: Likewise. * testsuite/ld-x86-64/tlspie2.s: Likewise. * testsuite/ld-x86-64/tlspie2a.d: Likewise. * testsuite/ld-x86-64/tlspie2b.d: Likewise. * testsuite/ld-x86-64/tlspie2c.d: Likewise. * testsuite/ld-x86-64/tlsgd5.dd: Updated. * testsuite/ld-x86-64/tlsgd6.dd: Likewise. * testsuite/ld-x86-64/x86-64.exp: Run libtlspic2.so, tlsbin2, tlsgd5b, tlsgd6b, tlsld4, tlsld5, tlsld6, tlsgd9, tlsgd10, tlsgd11, tlsgd14, tlsgd12, tlsgd13, tlspie2a, tlspie2b and tlspie2c.
146 lines
2.9 KiB
ArmAsm
146 lines
2.9 KiB
ArmAsm
/* Force .data aligned to 4K, so that .got very likely gets at
|
|
0x5021a0 (0x60 bytes .tdata and 0x140 bytes .dynamic) */
|
|
.data
|
|
.balign 4096
|
|
.section ".tdata", "awT", @progbits
|
|
.globl sg1, sg2, sg3, sg4, sg5, sg6, sg7, sg8
|
|
.globl sh1, sh2, sh3, sh4, sh5, sh6, sh7, sh8
|
|
.hidden sh1, sh2, sh3, sh4, sh5, sh6, sh7, sh8
|
|
sg1: .long 17
|
|
sg2: .long 18
|
|
sg3: .long 19
|
|
sg4: .long 20
|
|
sg5: .long 21
|
|
sg6: .long 22
|
|
sg7: .long 23
|
|
sg8: .long 24
|
|
sl1: .long 65
|
|
sl2: .long 66
|
|
sl3: .long 67
|
|
sl4: .long 68
|
|
sl5: .long 69
|
|
sl6: .long 70
|
|
sl7: .long 71
|
|
sl8: .long 72
|
|
sh1: .long 257
|
|
sh2: .long 258
|
|
sh3: .long 259
|
|
sh4: .long 260
|
|
sh5: .long 261
|
|
sh6: .long 262
|
|
sh7: .long 263
|
|
sh8: .long 264
|
|
/* Force .text aligned to 4K, so it very likely gets at 0x401000. */
|
|
.text
|
|
.balign 4096
|
|
.globl fn2
|
|
.type fn2,@function
|
|
fn2:
|
|
pushq %rbp
|
|
movq %rsp, %rbp
|
|
|
|
/* GD -> IE because variable is not defined in executable */
|
|
.byte 0x66
|
|
leaq sG1@tlsgd(%rip), %rdi
|
|
.byte 0x66
|
|
rex64
|
|
call *__tls_get_addr@GOTPCREL(%rip)
|
|
nop;nop;nop;nop
|
|
|
|
/* GD -> IE because variable is not defined in executable where
|
|
the variable is referenced through IE too */
|
|
.byte 0x66
|
|
leaq sG2@tlsgd(%rip), %rdi
|
|
.byte 0x66
|
|
rex64
|
|
call *__tls_get_addr@GOTPCREL(%rip)
|
|
nop;nop;nop;nop
|
|
|
|
/* GD -> LE with global variable defined in executable */
|
|
.byte 0x66
|
|
leaq sg1@tlsgd(%rip), %rdi
|
|
.byte 0x66
|
|
rex64
|
|
call *__tls_get_addr@GOTPCREL(%rip)
|
|
nop;nop;nop;nop
|
|
|
|
/* GD -> LE with local variable defined in executable */
|
|
.byte 0x66
|
|
leaq sl1@tlsgd(%rip), %rdi
|
|
.byte 0x66
|
|
rex64
|
|
call *__tls_get_addr@GOTPCREL(%rip)
|
|
nop;nop;nop;nop
|
|
|
|
/* GD -> LE with hidden variable defined in executable */
|
|
.byte 0x66
|
|
leaq sh1@tlsgd(%rip), %rdi
|
|
.byte 0x66
|
|
rex64
|
|
call *__tls_get_addr@GOTPCREL(%rip)
|
|
nop;nop;nop;nop
|
|
|
|
/* LD -> LE */
|
|
leaq sl1@tlsld(%rip), %rdi
|
|
call *__tls_get_addr@GOTPCREL(%rip)
|
|
nop;nop
|
|
leaq 1+sl1@dtpoff(%rax), %rdx
|
|
nop;nop
|
|
leaq sl2@dtpoff+2(%rax), %r9
|
|
nop;nop;nop;nop
|
|
|
|
/* LD -> LE against hidden variables */
|
|
leaq sh1@tlsld(%rip), %rdi
|
|
call *__tls_get_addr@GOTPCREL(%rip)
|
|
nop;nop
|
|
leaq sh1@dtpoff(%rax), %rdx
|
|
nop;nop
|
|
leaq 3+sh2@dtpoff(%rax), %rcx
|
|
nop;nop;nop;nop
|
|
|
|
/* IE against global var */
|
|
movq %fs:0, %r9
|
|
nop;nop
|
|
addq sG2@gottpoff(%rip), %r9
|
|
nop;nop;nop;nop
|
|
|
|
/* IE -> LE against global var defined in exec */
|
|
movq %fs:0, %r10
|
|
nop;nop
|
|
addq sg1@gottpoff(%rip), %r10
|
|
nop;nop;nop;nop
|
|
|
|
/* IE -> LE against local var */
|
|
movq %fs:0, %rax
|
|
nop;nop
|
|
addq sl1@gottpoff(%rip), %rax
|
|
nop;nop;nop;nop
|
|
|
|
/* IE -> LE against hidden var */
|
|
movq %fs:0, %rcx
|
|
nop;nop
|
|
addq sh1@gottpoff(%rip), %rcx
|
|
nop;nop;nop;nop
|
|
|
|
/* Direct access through %fs */
|
|
|
|
/* IE against global var */
|
|
movq sG5@gottpoff(%rip), %rcx
|
|
nop;nop
|
|
movq %fs:(%rcx), %rdx
|
|
nop;nop;nop;nop
|
|
|
|
/* IE->LE against local var */
|
|
movq sl5@gottpoff(%rip), %r11
|
|
nop;nop
|
|
movq %fs:(%r11), %r12
|
|
nop;nop;nop;nop
|
|
|
|
/* IE->LE against hidden var */
|
|
movq sh5@gottpoff(%rip), %rdx
|
|
nop;nop
|
|
movq %fs:(%rdx), %rdx
|
|
nop;nop;nop;nop
|
|
|
|
leave
|
|
ret
|