Add amdfam10 instructions

This commit is contained in:
Michael Meissner 2006-07-13 22:25:48 +00:00
parent 06d65a1d1e
commit 050dfa73de
13 changed files with 1286 additions and 1007 deletions

View file

@ -1,3 +1,17 @@
2006-07-13 Dwarakanath Rajagopal <dwarak.rajagopal@amd.com>
Michael Meissner <michael.meissner@amd.com>
* config/tc-i386.h (PROCESSOR_AMDFAM10): New processor_type.
(CpuSSE4a, CpuABM, CpuAmdFam10): New Cpu directives.
* config/tc-i386.c (cpu_arch): Add support for AmdFam10
architecture.
(i386_align_code): Ditto.
(md_assemble_code): Add support for insertq/extrq instructions,
swapping as needed for intel syntax.
(swap_imm_operands): New function to swap immediate operands.
(swap_operands): Deal with 4 operand instructions.
(build_modrm_byte): Add support for insertq instruction.
2006-07-13 H.J. Lu <hongjiu.lu@intel.com>
* config/tc-i386.h (Size64): Fix a typo in comment.

View file

@ -89,6 +89,7 @@ static const reg_entry *parse_register PARAMS ((char *reg_string,
static char *parse_insn PARAMS ((char *, char *));
static char *parse_operands PARAMS ((char *, const char *));
static void swap_operands PARAMS ((void));
static void swap_imm_operands PARAMS ((void));
static void optimize_imm PARAMS ((void));
static void optimize_disp PARAMS ((void));
static int match_template PARAMS ((void));
@ -491,6 +492,9 @@ static const arch_entry cpu_arch[] =
{"k8", PROCESSOR_K8,
Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
|CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2},
{"amdfam10", PROCESSOR_AMDFAM10,
Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
|CpuSledgehammer|CpuAmdFam10|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2|CpuSSE3|CpuSSE4a|CpuABM},
{".mmx", PROCESSOR_UNKNOWN,
CpuMMX},
{".sse", PROCESSOR_UNKNOWN,
@ -508,7 +512,11 @@ static const arch_entry cpu_arch[] =
{".pacifica", PROCESSOR_UNKNOWN,
CpuSVME},
{".svme", PROCESSOR_UNKNOWN,
CpuSVME}
CpuSVME},
{".sse4a", PROCESSOR_UNKNOWN,
CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuSSE4a},
{".abm", PROCESSOR_UNKNOWN,
CpuABM}
};
const pseudo_typeS md_pseudo_table[] =
@ -741,7 +749,7 @@ i386_align_code (fragP, count)
1. For PROCESSOR_I486, PROCESSOR_PENTIUM and PROCESSOR_GENERIC32,
f32_patt will be used.
2. For PROCESSOR_K8 in 64bit, NOPs with 0x66 prefixe will be used.
2. For PROCESSOR_K8 and PROCESSOR_AMDFAM10 in 64bit, NOPs with 0x66 prefix will be used.
3. For PROCESSOR_MEROM, alt_long_patt will be used.
4. For PROCESSOR_PENTIUMPRO, PROCESSOR_PENTIUM4, PROCESSOR_NOCONA,
PROCESSOR_YONAH, PROCESSOR_MEROM, PROCESSOR_K6, PROCESSOR_ATHLON
@ -812,6 +820,7 @@ i386_align_code (fragP, count)
case PROCESSOR_ATHLON:
case PROCESSOR_K8:
case PROCESSOR_GENERIC64:
case PROCESSOR_AMDFAM10:
patt = alt_short_patt;
break;
case PROCESSOR_I486:
@ -840,6 +849,7 @@ i386_align_code (fragP, count)
case PROCESSOR_K6:
case PROCESSOR_ATHLON:
case PROCESSOR_K8:
case PROCESSOR_AMDFAM10:
case PROCESSOR_GENERIC32:
/* We use cpu_arch_isa_flags to check if we CAN optimize
for Cpu686. */
@ -1733,15 +1743,27 @@ md_assemble (line)
if (line == NULL)
return;
/* The order of the immediates should be reversed
for 2 immediates extrq and insertq instructions */
if ((i.imm_operands == 2) &&
((strcmp (mnemonic, "extrq") == 0)
|| (strcmp (mnemonic, "insertq") == 0)))
{
swap_imm_operands ();
/* "extrq" and insertq" are the only two instructions whose operands
have to be reversed even though they have two immediate operands.
*/
if (intel_syntax)
swap_operands ();
}
/* Now we've parsed the mnemonic into a set of templates, and have the
operands at hand. */
/* All intel opcodes have reversed operands except for "bound" and
"enter". We also don't reverse intersegment "jmp" and "call"
instructions with 2 immediate operands so that the immediate segment
precedes the offset, as it does when in AT&T mode. "enter" and the
intersegment "jmp" and "call" instructions are the only ones that
have two immediate operands. */
precedes the offset, as it does when in AT&T mode. */
if (intel_syntax && i.operands > 1
&& (strcmp (mnemonic, "bound") != 0)
&& (strcmp (mnemonic, "invlpga") != 0)
@ -2271,6 +2293,27 @@ parse_operands (l, mnemonic)
return l;
}
static void
swap_imm_operands ()
{
union i386_op temp_op;
unsigned int temp_type;
enum bfd_reloc_code_real temp_reloc;
int xchg1 = 0;
int xchg2 = 1;
temp_type = i.types[xchg2];
i.types[xchg2] = i.types[xchg1];
i.types[xchg1] = temp_type;
temp_op = i.op[xchg2];
i.op[xchg2] = i.op[xchg1];
i.op[xchg1] = temp_op;
temp_reloc = i.reloc[xchg2];
i.reloc[xchg2] = i.reloc[xchg1];
i.reloc[xchg1] = temp_reloc;
}
static void
swap_operands ()
{
@ -2280,6 +2323,26 @@ swap_operands ()
int xchg1 = 0;
int xchg2 = 0;
if (i.operands == 4)
/* There will be two exchanges in a 4 operand instruction.
First exchange is the done inside this block.(1st and 4rth operand)
The next exchange is done outside this block.(2nd and 3rd operand) */
{
xchg1 = 0;
xchg2 = 3;
temp_type = i.types[xchg2];
i.types[xchg2] = i.types[xchg1];
i.types[xchg1] = temp_type;
temp_op = i.op[xchg2];
i.op[xchg2] = i.op[xchg1];
i.op[xchg1] = temp_op;
temp_reloc = i.reloc[xchg2];
i.reloc[xchg2] = i.reloc[xchg1];
i.reloc[xchg1] = temp_reloc;
xchg1 = 1;
xchg2 = 2;
}
if (i.operands == 2)
{
xchg1 = 0;
@ -3281,6 +3344,10 @@ build_modrm_byte ()
| SReg2 | SReg3
| Control | Debug | Test))
? 0 : 1);
/* In 4 operands instructions with 2 immediate operands, the first two are immediate
bytes and hence source operand will be in the next byte after the immediates */
if ((i.operands == 4)&&(i.imm_operands=2)) source++;
dest = source + 1;
i.rm.mode = 3;

View file

@ -91,8 +91,8 @@ extern const char extra_symbol_chars[];
extern const char *i386_comment_chars;
#define tc_comment_chars i386_comment_chars
#define MAX_OPERANDS 3 /* max operands per insn */
#define MAX_IMMEDIATE_OPERANDS 2/* max immediates per insn (lcall, ljmp) */
#define MAX_OPERANDS 4 /* max operands per insn */
#define MAX_IMMEDIATE_OPERANDS 2/* max immediates per insn (lcall, ljmp, insertq, extrq) */
#define MAX_MEMORY_OPERANDS 2 /* max memory refs per insn (string ops) */
/* Prefixes will be emitted in the order defined below.
@ -185,6 +185,9 @@ typedef struct
#define CpuSVME 0x80000 /* AMD Secure Virtual Machine Ext-s required */
#define CpuVMX 0x100000 /* VMX Instructions required */
#define CpuMNI 0x200000 /* Merom New Instructions required */
#define CpuSSE4a 0x400000 /* SSE4a New Instuctions required */
#define CpuABM 0x800000 /* ABM New Instructions required */
#define CpuAmdFam10 0x1000000 /* AmdFam10 New instructions required */
/* These flags are set by gas depending on the flag_code. */
#define Cpu64 0x4000000 /* 64bit support required */
@ -192,8 +195,8 @@ typedef struct
/* The default value for unknown CPUs - enable all features to avoid problems. */
#define CpuUnknownFlags (Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 \
|CpuP4|CpuSledgehammer|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuPNI|CpuVMX \
|Cpu3dnow|Cpu3dnowA|CpuK6|CpuAthlon|CpuPadLock|CpuSVME|CpuMNI)
|CpuP4|CpuSledgehammer|CpuAmdFam10|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuPNI|CpuVMX \
|Cpu3dnow|Cpu3dnowA|CpuK6|CpuAthlon|CpuPadLock|CpuSVME|CpuMNI|CpuABM|CpuSSE4a)
/* the bits in opcode_modifier are used to generate the final opcode from
the base_opcode. These bits also are used to detect alternate forms of
@ -240,7 +243,7 @@ typedef struct
by OR'ing together all of the possible type masks. (e.g.
'operand_types[i] = Reg|Imm' specifies that operand i can be
either a register or an immediate operand. */
unsigned int operand_types[3];
unsigned int operand_types[4];
/* operand_types[i] bits */
/* register */
@ -391,7 +394,8 @@ enum processor_type
PROCESSOR_ATHLON,
PROCESSOR_K8,
PROCESSOR_GENERIC32,
PROCESSOR_GENERIC64
PROCESSOR_GENERIC64,
PROCESSOR_AMDFAM10
};
/* x86 arch names, types and features */

View file

@ -1,3 +1,11 @@
2006-07-13 Dwarakanath Rajagopal <dwarak.rajagopal@amd.com>
Michael Meissner <michael.meissner@amd.com>
* gas/i386/amdfam10.s: New file for amdfam10 instructions.
* gam/i386/amdfam10.d: Ditto.
* gas/i386/x86-64-amdfam10.s: Ditto.
* gam/i386/x86-64-amdfam10.d: Ditto.
2006-07-12 Nick Clifton <nickc@redhat.com>
* gas/sh/basic.exp: Run "too_large" dump test.

View file

@ -0,0 +1,22 @@
#objdump: -dw
#name: i386 amdfam10
.*: +file format .*
Disassembly of section .text:
0+000 <foo>:
0: f3 0f bd 19[ ]+lzcnt \(%ecx\),%ebx
4: f3 66 0f bd 19[ ]+lzcnt \(%ecx\),%bx
9: f3 0f bd d9[ ]+lzcnt %ecx,%ebx
d: f3 66 0f bd d9[ ]+lzcnt %cx,%bx
12: 0f b8 19[ ]+popcnt \(%ecx\),%ebx
15: 66 0f b8 19[ ]+popcnt \(%ecx\),%bx
19: 0f b8 d9[ ]+popcnt %ecx,%ebx
1c: 66 0f b8 d9[ ]+popcnt %cx,%bx
20: 66 0f 79 ca[ ]+extrq %xmm2,%xmm1
24: 66 0f 78 c1 02 04[ ]*extrq \$0x4,\$0x2,%xmm1
2a: f2 0f 79 ca[ ]+insertq %xmm2,%xmm1
2e: f2 0f 78 ca 02 04[ ]*insertq \$0x4,\$0x2,%xmm2,%xmm1
34: f2 0f 2b 09[ ]+movntsd %xmm1,\(%ecx\)
38: f3 0f 2b 09[ ]+movntss %xmm1,\(%ecx\)

View file

@ -0,0 +1,18 @@
#AMDFAM10 New Instructions
.text
foo:
lzcnt (%ecx),%ebx
lzcnt (%ecx),%bx
lzcnt %ecx,%ebx
lzcnt %cx,%bx
popcnt (%ecx),%ebx
popcnt (%ecx),%bx
popcnt %ecx,%ebx
popcnt %cx,%bx
extrq %xmm2,%xmm1
extrq $4,$2,%xmm1
insertq %xmm2,%xmm1
insertq $4,$2,%xmm2,%xmm1
movntsd %xmm1,(%ecx)
movntss %xmm1,(%ecx)

View file

@ -68,6 +68,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_32_check]]
run_dump_test "crx"
run_list_test "cr-err" ""
run_dump_test "svme"
run_dump_test "amdfam10"
run_dump_test "merom"
run_dump_test "rep"
run_dump_test "rep-suffix"
@ -141,6 +142,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_64_check]] t
run_list_test "x86-64-inval-seg" "-al"
run_dump_test "x86-64-branch"
run_dump_test "svme64"
run_dump_test "x86-64-amdfam10"
run_dump_test "x86-64-vmx"
run_dump_test "immed64"
run_dump_test "x86-64-prescott"

View file

@ -0,0 +1,26 @@
#objdump: -dw
#name: x86-64 amdfam10
.*: +file format .*
Disassembly of section .text:
0+000 <foo>:
0: f3 48 0f bd 19[ ]+lzcnt \(%rcx\),%rbx
5: f3 0f bd 19[ ]+lzcnt \(%rcx\),%ebx
9: f3 66 0f bd 19[ ]+lzcnt \(%rcx\),%bx
e: f3 48 0f bd d9[ ]+lzcnt %rcx,%rbx
13: f3 0f bd d9[ ]+lzcnt %ecx,%ebx
17: f3 66 0f bd d9[ ]+lzcnt %cx,%bx
1c: 48 0f b8 19[ ]+popcnt \(%rcx\),%rbx
20: 0f b8 19[ ]+popcnt \(%rcx\),%ebx
23: 66 0f b8 19[ ]+popcnt \(%rcx\),%bx
27: 48 0f b8 d9[ ]+popcnt %rcx,%rbx
2b: 0f b8 d9[ ]+popcnt %ecx,%ebx
2e: 66 0f b8 d9[ ]+popcnt %cx,%bx
32: 66 0f 79 ca[ ]+extrq %xmm2,%xmm1
36: 66 0f 78 c1 02 04[ ]+extrq \$0x4,\$0x2,%xmm1
3c: f2 0f 79 ca[ ]+insertq %xmm2,%xmm1
40: f2 0f 78 ca 02 04[ ]+insertq \$0x4,\$0x2,%xmm2,%xmm1
46: f2 0f 2b 09[ ]+movntsd %xmm1,\(%rcx\)
4a: f3 0f 2b 09[ ]+movntss %xmm1,\(%rcx\)

View file

@ -0,0 +1,22 @@
#AMDFAM10 New Instructions
.text
foo:
lzcnt (%rcx),%rbx
lzcnt (%rcx),%ebx
lzcnt (%rcx),%bx
lzcnt %rcx,%rbx
lzcnt %ecx,%ebx
lzcnt %cx,%bx
popcnt (%rcx),%rbx
popcnt (%rcx),%ebx
popcnt (%rcx),%bx
popcnt %rcx,%rbx
popcnt %ecx,%ebx
popcnt %cx,%bx
extrq %xmm2,%xmm1
extrq $4,$2,%xmm1
insertq %xmm2,%xmm1
insertq $4,$2,%xmm2,%xmm1
movntsd %xmm1,(%rcx)
movntss %xmm1,(%rcx)

View file

@ -1,3 +1,8 @@
2006-07-10 Dwarakanath Rajagopal <dwarak.rajagopal@amd.com>
Michael Meissner <michael.meissner@amd.com>
* i386.h: Add amdfam10 new instructions (SSE4a and ABM instructions).
2006-06-12 H.J. Lu <hongjiu.lu@intel.com>
* i386.h (i386_optab): Add "nop" with memory reference.

View file

@ -1462,6 +1462,20 @@ static const template i386_optab[] =
{"vmsave", 0, 0x0f01, 0xdb, CpuSVME, NoSuf|ImmExt, { 0, 0, 0 } },
{"vmsave", 1, 0x0f01, 0xdb, CpuSVME, NoSuf|ImmExt, { AnyMem, 0, 0 } },
/* SSE4a instructions */
{"movntsd", 2, 0xf20f2b, X, CpuSSE4a, NoSuf|IgnoreSize|Modrm, { RegXMM, LongMem, 0 } },
{"movntss", 2, 0xf30f2b, X, CpuSSE4a, NoSuf|IgnoreSize|Modrm, { RegXMM, WordMem, 0 } },
{"extrq", 3, 0x660f78, 0, CpuSSE4a, NoSuf|IgnoreSize|Modrm, { Imm8, Imm8, RegXMM } },
{"extrq", 2, 0x660f79, X, CpuSSE4a, NoSuf|IgnoreSize|Modrm, { RegXMM, RegXMM} },
{"insertq", 2, 0xf20f79, X, CpuSSE4a, NoSuf|IgnoreSize|Modrm, { RegXMM, RegXMM} },
{"insertq", 4, 0xf20f78, X, CpuSSE4a, NoSuf|IgnoreSize|Modrm, { Imm8, Imm8, RegXMM, RegXMM} },
/* ABM instructions */
{"popcnt", 2, 0x0fb8, X, CpuABM, wlq_Suf|Modrm, { WordReg|WordMem, WordReg, 0} },
{"lzcnt", 2, 0xf30fbd, X, CpuABM, wlq_Suf|Modrm, { WordReg|WordMem, WordReg, 0} },
/* VIA PadLock extensions. */
{"xstore-rng",0, 0x000fa7, 0xc0, Cpu686|CpuPadLock, NoSuf|IsString|ImmExt, { 0, 0, 0} },
{"xcrypt-ecb",0, 0xf30fa7, 0xc8, Cpu686|CpuPadLock, NoSuf|IsString|ImmExt, { 0, 0, 0} },

View file

@ -1,3 +1,13 @@
2006-07-10 Dwarakanath Rajagopal <dwarak.rajagopal@amd.com>
Michael Meissner <michael.meissner@amd.com>
* i386-dis.c (dis386): Add support for 4 operand instructions. Add
support for amdfam10 SSE4a/ABM instructions. Modify all
initializer macros to have additional arguments. Disallow REP
prefix for non-string instructions.
(print_insn): Ditto.
2006-07-05 Julian Brown <julian@codesourcery.com>
* arm-dis.c (coprocessor): Alter fmsrr disassembly syntax.

File diff suppressed because it is too large Load diff