From fba9bfed2dd8e2fd7e73a2c2c5a33954a3d1cb78 Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Sat, 7 Feb 1998 00:12:14 +0000 Subject: [PATCH] - Added almost all code needed for PKE0/1 simulation. Considers clarifications given in SCEI question/answer batches #1 and #2. --- sim/mips/sky-pke.c | 1272 +++++++++++++++++++++++++++++++++++++++++--- sim/mips/sky-pke.h | 290 +++++++++- 2 files changed, 1462 insertions(+), 100 deletions(-) diff --git a/sim/mips/sky-pke.c b/sim/mips/sky-pke.c index b90d196f76..94b650257b 100644 --- a/sim/mips/sky-pke.c +++ b/sim/mips/sky-pke.c @@ -1,7 +1,12 @@ /* Copyright (C) 1998, Cygnus Solutions */ -#include "sky-pke.h" #include +#include "sky-pke.h" +#include "sky-dma.h" +#include "sim-assert.h" +#include "sky-vu0.h" +#include "sky-vu1.h" +#include "sky-gpuif.h" /* Imported functions */ @@ -16,6 +21,13 @@ static int pke_io_read_buffer(device*, void*, int, address_word, static int pke_io_write_buffer(device*, const void*, int, address_word, unsigned, sim_cpu*, sim_cia); static void pke_issue(struct pke_device*); +static void pke_pc_advance(struct pke_device*, int num_words); +static unsigned_4* pke_pc_operand(struct pke_device*, int word_num); +static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int word_num); +static int pke_track_write(struct pke_device*, const void* src, int len, + address_word dest, unsigned_4 sourceaddr); +static void pke_attach(SIM_DESC sd, struct pke_device* me); + /* Static data */ @@ -24,10 +36,9 @@ struct pke_device pke0_device = { { "pke0", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */ 0, 0, /* ID, flags */ - PKE0_REGISTER_WINDOW_START, PKE0_FIFO_START, /* memory-mapping addresses */ {}, /* regs */ NULL, 0, 0, NULL, /* FIFO */ - 0 /* pc */ + 0, 0 /* pc */ }; @@ -35,10 +46,9 @@ struct pke_device pke1_device = { { "pke1", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */ 1, 0, /* ID, flags */ - PKE1_REGISTER_WINDOW_START, PKE1_FIFO_START, /* memory-mapping addresses */ {}, /* regs */ NULL, 0, 0, NULL, /* FIFO */ - 0 /* pc */ + 0, 0 /* pc */ }; @@ -46,65 +56,23 @@ struct pke_device pke1_device = /* External functions */ -/* Attach PKE0 addresses to main memory */ +/* Attach PKE addresses to main memory */ void pke0_attach(SIM_DESC sd) { - sim_core_attach (sd, - NULL, - 0 /*level*/, - access_read_write, - 0 /*space ???*/, - pke0_device.register_memory_addr, - PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/, - 0 /*modulo*/, - (device*) &pke0_device, - NULL /*buffer*/); - - sim_core_attach (sd, - NULL, - 0 /*level*/, - access_read_write, - 0 /*space ???*/, - pke0_device.fifo_memory_addr, - sizeof(quadword) /*nr_bytes*/, - 0 /*modulo*/, - (device*) &pke1_device, - NULL /*buffer*/); + pke_attach(sd, & pke0_device); } - -/* Attach PKE1 addresses to main memory */ - -void +void pke1_attach(SIM_DESC sd) { - sim_core_attach (sd, - NULL, - 0 /*level*/, - access_read_write, - 0 /*space ???*/, - pke1_device.register_memory_addr, - PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/, - 0 /*modulo*/, - (device*) &pke1_device, - NULL /*buffer*/); - - sim_core_attach (sd, - NULL, - 0 /*level*/, - access_read_write, - 0 /*space ???*/, - pke1_device.fifo_memory_addr, - sizeof(quadword) /*nr_bytes*/, - 0 /*modulo*/, - (device*) &pke1_device, - NULL /*buffer*/); + pke_attach(sd, & pke1_device); } -/* Issue a PKE0 instruction if possible */ + +/* Issue a PKE instruction if possible */ void pke0_issue() @@ -112,9 +80,6 @@ pke0_issue() pke_issue(& pke0_device); } - -/* Issue a PKE1 instruction if possible */ - void pke1_issue() { @@ -126,6 +91,50 @@ pke1_issue() /* Internal functions */ +/* Attach PKE memory regions to simulator */ + +void +pke_attach(SIM_DESC sd, struct pke_device* me) +{ + /* register file */ + sim_core_attach (sd, + NULL, + 0 /*level*/, + access_read_write, + 0 /*space ???*/, + (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START, + PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/, + 0 /*modulo*/, + (device*) &pke0_device, + NULL /*buffer*/); + + /* FIFO port */ + sim_core_attach (sd, + NULL, + 0 /*level*/, + access_read_write, + 0 /*space ???*/, + (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR, + sizeof(quadword) /*nr_bytes*/, + 0 /*modulo*/, + (device*) &pke1_device, + NULL /*buffer*/); + + /* source-addr tracking word */ + sim_core_attach (sd, + NULL, + 0 /*level*/, + access_read_write, + 0 /*space ???*/, + (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR, + sizeof(unsigned_4) /*nr_bytes*/, + 0 /*modulo*/, + NULL, + zalloc(sizeof(unsigned_4)) /*buffer*/); +} + + + /* Handle a PKE read; return no. of bytes read */ int @@ -134,12 +143,18 @@ pke_io_read_buffer(device *me_, int space, address_word addr, unsigned nr_bytes, - sim_cpu *processor, + sim_cpu *cpu, sim_cia cia) { /* downcast to gather embedding pke_device struct */ struct pke_device* me = (struct pke_device*) me_; + /* find my address ranges */ + address_word my_reg_start = + (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START; + address_word my_fifo_addr = + (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR; + /* enforce that an access does not span more than one quadword */ address_word low = ADDR_TRUNC_QW(addr); address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1); @@ -147,33 +162,66 @@ pke_io_read_buffer(device *me_, return 0; /* classify address & handle */ - if(addr >= me->register_memory_addr && - addr < me->register_memory_addr + PKE_REGISTER_WINDOW_SIZE) + if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE)) { /* register bank */ - int reg_num = ADDR_TRUNC_QW(addr - me->register_memory_addr) >> 4; + int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4; + int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */ int readable = 1; + quadword result; - /* ensure readibility of register: all okay except PKE1-only ones read on PKE0 */ + /* clear result */ + result[0] = result[1] = result[2] = result[3] = 0; + + /* handle reads to individual registers; clear `readable' on error */ switch(reg_num) { + /* handle common case of register reading, side-effect free */ + /* PKE1-only registers*/ case PKE_REG_BASE: case PKE_REG_OFST: case PKE_REG_TOPS: case PKE_REG_TOP: case PKE_REG_DBF: - if(me->pke_number == 0) /* PKE0 cannot access these registers */ + if(me->pke_number == 0) readable = 0; + /* fall through */ + /* PKE0 & PKE1 common registers*/ + case PKE_REG_STAT: + case PKE_REG_ERR: + case PKE_REG_MARK: + case PKE_REG_CYCLE: + case PKE_REG_MODE: + case PKE_REG_NUM: + case PKE_REG_MASK: + case PKE_REG_CODE: + case PKE_REG_ITOPS: + case PKE_REG_ITOP: + case PKE_REG_R0: + case PKE_REG_R1: + case PKE_REG_R2: + case PKE_REG_R3: + case PKE_REG_C0: + case PKE_REG_C1: + case PKE_REG_C2: + case PKE_REG_C3: + result[0] = me->regs[reg_num][0]; + break; + + /* handle common case of write-only registers */ + case PKE_REG_FBRST: + readable = 0; + break; + + default: + ASSERT(0); /* test above should prevent this possibility */ } - /* perform read & return */ + /* perform transfer & return */ if(readable) { - /* find byte-offset inside register bank */ - int reg_byte = ADDR_OFFSET_QW(addr); - void* src = ((unsigned_1*) (& me->regs[reg_num])) + reg_byte; /* copy the bits */ - memcpy(dest, src, nr_bytes); + memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes); /* okay */ return nr_bytes; } @@ -185,16 +233,18 @@ pke_io_read_buffer(device *me_, /* NOTREACHED */ } - else if(addr >= me->fifo_memory_addr && - addr < me->fifo_memory_addr + sizeof(quadword)) + else if(addr >= my_fifo_addr && + addr < my_fifo_addr + sizeof(quadword)) { /* FIFO */ - /* XXX: FIFO is not readable. */ - return 0; + /* FIFO is not readable: return a word of zeroes */ + memset(dest, 0, nr_bytes); + return nr_bytes; } /* NOTREACHED */ + return 0; } @@ -206,12 +256,18 @@ pke_io_write_buffer(device *me_, int space, address_word addr, unsigned nr_bytes, - sim_cpu *processor, + sim_cpu *cpu, sim_cia cia) { /* downcast to gather embedding pke_device struct */ struct pke_device* me = (struct pke_device*) me_; + /* find my address ranges */ + address_word my_reg_start = + (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START; + address_word my_fifo_addr = + (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR; + /* enforce that an access does not span more than one quadword */ address_word low = ADDR_TRUNC_QW(addr); address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1); @@ -219,33 +275,108 @@ pke_io_write_buffer(device *me_, return 0; /* classify address & handle */ - if(addr >= me->register_memory_addr && - addr < me->register_memory_addr + PKE_REGISTER_WINDOW_SIZE) + if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE)) { /* register bank */ - int reg_num = ADDR_TRUNC_QW(addr - me->register_memory_addr) >> 4; + int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4; + int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */ int writeable = 1; + quadword input; - /* ensure readibility of register: all okay except PKE1-only ones read on PKE0 */ + /* clear input */ + input[0] = input[1] = input[2] = input[3] = 0; + + /* write user-given bytes into input */ + memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes); + + /* handle writes to individual registers; clear `writeable' on error */ switch(reg_num) { + case PKE_REG_FBRST: + /* XXX: order of evaluation? STP && STC ?? */ + if(BIT_MASK_GET(input[0], 0, 0)) /* RST bit */ + { + /* clear FIFO: also prevents re-execution attempt of + possible stalled instruction */ + me->fifo_num_elements = me->fifo_pc; + /* clear registers */ + memset(me->regs, 0, sizeof(me->regs)); + me->flags = 0; + me->qw_pc = 0; + } + if(BIT_MASK_GET(input[0], 1, 1)) /* FBK bit */ + { + PKE_REG_MASK_SET(me, STAT, PFS, 1); + } + if(BIT_MASK_GET(input[0], 2, 2)) /* STP bit */ + { + /* XXX: how to safely abort "currently executing" (=> stalled) instruction? */ + PKE_REG_MASK_SET(me, STAT, PSS, 1); + } + if(BIT_MASK_GET(input[0], 2, 2)) /* STC bit */ + { + /* clear a bunch of status bits */ + PKE_REG_MASK_SET(me, STAT, PSS, 0); + PKE_REG_MASK_SET(me, STAT, PFS, 0); + PKE_REG_MASK_SET(me, STAT, PIS, 0); + PKE_REG_MASK_SET(me, STAT, INT, 0); + PKE_REG_MASK_SET(me, STAT, ER0, 0); + PKE_REG_MASK_SET(me, STAT, ER1, 0); + /* will allow resumption of possible stalled instruction */ + } + break; + + case PKE_REG_ERR: + /* copy bottom three bits */ + BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input[0], 0, 2)); + break; + + case PKE_REG_MARK: + /* copy bottom sixteen bits */ + PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input[0], 0, 15)); + /* reset MRK bit in STAT */ + PKE_REG_MASK_SET(me, STAT, MRK, 0); + break; + + /* handle common case of read-only registers */ + /* PKE1-only registers - not really necessary to handle separately */ case PKE_REG_BASE: case PKE_REG_OFST: case PKE_REG_TOPS: case PKE_REG_TOP: case PKE_REG_DBF: - if(me->pke_number == 0) /* PKE0 cannot access these registers */ + if(me->pke_number == 0) writeable = 0; + /* fall through */ + /* PKE0 & PKE1 common registers*/ + case PKE_REG_STAT: + /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */ + case PKE_REG_CYCLE: + case PKE_REG_MODE: + case PKE_REG_NUM: + case PKE_REG_MASK: + case PKE_REG_CODE: + case PKE_REG_ITOPS: + case PKE_REG_ITOP: + case PKE_REG_R0: + case PKE_REG_R1: + case PKE_REG_R2: + case PKE_REG_R3: + case PKE_REG_C0: + case PKE_REG_C1: + case PKE_REG_C2: + case PKE_REG_C3: + writeable = 0; + break; + + default: + ASSERT(0); /* test above should prevent this possibility */ } - /* perform write & return */ - if(writeable) + /* perform return */ + if(writeable) { - /* find byte-offset inside register bank */ - int reg_byte = ADDR_OFFSET_QW(addr); - void* dest = ((unsigned_1*) (& me->regs[reg_num])) + reg_byte; - /* copy the bits */ - memcpy(dest, src, nr_bytes); + /* okay */ return nr_bytes; } else @@ -256,10 +387,11 @@ pke_io_write_buffer(device *me_, /* NOTREACHED */ } - else if(addr >= me->fifo_memory_addr && - addr < me->fifo_memory_addr + sizeof(quadword)) + else if(addr >= my_fifo_addr && + addr < my_fifo_addr + sizeof(quadword)) { /* FIFO */ + struct fifo_quadword* fqw; /* assert transfer size == 128 bits */ if(nr_bytes != sizeof(quadword)) @@ -283,24 +415,988 @@ pke_io_write_buffer(device *me_, } /* add new quadword at end of FIFO */ - memcpy(& me->fifo[++me->fifo_num_elements], src, nr_bytes); + fqw = & me->fifo[me->fifo_num_elements]; + memcpy((void*) fqw->data, src, nr_bytes); + sim_read(CPU_STATE(cpu), + (SIM_ADDR) (me->pke_number == 0 ? DMA_CHANNEL0_SRCADDR : DMA_CHANNEL1_SRCADDR), + (void*) & fqw->source_address, + sizeof(address_word)); + sim_read(CPU_STATE(cpu), + (SIM_ADDR) (me->pke_number == 0 ? DMA_CHANNEL0_PKTFLAG : DMA_CHANNEL1_PKTFLAG), + (void*) & fqw->dma_tag_present, + sizeof(unsigned_4)); + /* XXX: check RC */ + + me->fifo_num_elements++; + + /* set FQC to "1" as FIFO is now not empty */ + PKE_REG_MASK_SET(me, STAT, FQC, 1); /* okay */ return nr_bytes; } /* NOTREACHED */ + return 0; } -/* Issue & swallow one PKE opcode if possible */ +/* Issue & swallow next PKE opcode if possible/available */ void pke_issue(struct pke_device* me) { + struct fifo_quadword* fqw; + unsigned_4 fw; + unsigned_4 cmd, intr, num; + unsigned_4 imm; + int next_pps_state; /* PPS after this instruction issue attempt */ + /* 1 -- test go / no-go for PKE execution */ + + /* check for stall/halt control bits */ + /* XXX: What is the PEW bit for? */ + if(PKE_REG_MASK_GET(me, STAT, PSS) || + PKE_REG_MASK_GET(me, STAT, PFS) || + /* maskable stall controls: ER0, ER1, PIS */ + (PKE_REG_MASK_GET(me, STAT, ER0) && !PKE_REG_MASK_GET(me, ERR, ME0)) || + (PKE_REG_MASK_GET(me, STAT, ER1) && !PKE_REG_MASK_GET(me, ERR, ME1)) || + (PKE_REG_MASK_GET(me, STAT, PIS) && !PKE_REG_MASK_GET(me, ERR, MII))) + { + /* XXX */ + } + /* XXX: handle PSS by *skipping* instruction? */ + + /* confirm availability of new quadword of PKE instructions */ + if(me->fifo_num_elements <= me->fifo_pc) + return; + + + /* 2 -- fetch PKE instruction */ + + /* "fetch" instruction quadword */ + fqw = & me->fifo[me->fifo_pc]; + + /* skip over DMA tags, if present */ + if((fqw->dma_tag_present != 0) && (me->qw_pc < 2)) + { + ASSERT(me->qw_pc == 0); + /* XXX: check validity of DMA tag; if bad, set ER0 flag */ + me->qw_pc = 2; + } + + /* "fetch" instruction word */ + fw = fqw->data[me->qw_pc]; + + /* store it in PKECODE register */ + me->regs[PKE_REG_CODE][0] = fw; + + + /* 3 -- decode PKE instruction */ + + /* PKE instruction format: [intr 0:0][pke-command 6:0][num 7:0][immediate 15:0], + so op-code is in top byte. */ + intr = BIT_MASK_GET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E); + cmd = BIT_MASK_GET(fw, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E); + num = BIT_MASK_GET(fw, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E); + imm = BIT_MASK_GET(fw, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E); + + if(intr) + { + /* set INT flag in STAT register */ + PKE_REG_MASK_SET(me, STAT, INT, 1); + /* XXX: send interrupt to R5900? */ + } + + /* decoding */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE); + next_pps_state = PKE_REG_STAT_PPS_IDLE; /* assume instruction completes */ + + /* decode */ + if(IS_PKE_CMD(cmd, PKENOP)) + { + /* no work required, yey */ + pke_pc_advance(me, 1); + } + else if(IS_PKE_CMD(cmd, STCYCL)) + { + /* copy immediate value into CYCLE reg */ + me->regs[PKE_REG_CYCLE][0] = imm; + pke_pc_advance(me, 1); + } + else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET)) + { + /* copy 10 bits to OFFSET field */ + PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9)); + /* clear DBF bit */ + PKE_REG_MASK_SET(me, DBF, DF, 0); + /* clear other DBF bit */ + PKE_REG_MASK_SET(me, STAT, DBF, 0); + /* set TOPS = BASE */ + PKE_REG_MASK_SET(me, TOPS, TOPS, + PKE_REG_MASK_GET(me, BASE, BASE)); + pke_pc_advance(me, 1); + } + else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE)) + { + /* copy 10 bits to BASE field */ + PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9)); + /* clear DBF bit */ + PKE_REG_MASK_SET(me, DBF, DF, 0); + /* clear other DBF bit */ + PKE_REG_MASK_SET(me, STAT, DBF, 0); + /* set TOPS = BASE */ + PKE_REG_MASK_SET(me, TOPS, TOPS, + PKE_REG_MASK_GET(me, BASE, BASE)); + pke_pc_advance(me, 1); + } + else if(IS_PKE_CMD(cmd, ITOP)) + { + /* copy 10 bits to ITOPS field */ + PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9)); + pke_pc_advance(me, 1); + } + else if(IS_PKE_CMD(cmd, STMOD)) + { + /* copy 2 bits to MODE register */ + PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2)); + pke_pc_advance(me, 1); + } + else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3)) /* MSKPATH3 */ + { + /* XXX: what to do with this? DMA control register? */ + pke_pc_advance(me, 1); + } + else if(IS_PKE_CMD(cmd, PKEMARK)) + { + /* copy 16 bits to MARK register */ + PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15)); + /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */ + PKE_REG_MASK_SET(me, STAT, MRK, 1); + pke_pc_advance(me, 1); + } + else if(IS_PKE_CMD(cmd, FLUSHE)) + { + /* read VU status word */ + unsigned_4 vu_stat; + sim_read(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), + (void*) & vu_stat, + sizeof(unsigned_4)); + /* XXX: check RC */ + + /* check if VBS bit is clear, i.e., VU is idle */ + if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) + { + /* VU idle */ + /* advance PC */ + pke_pc_advance(me, 1); + } + else + { + /* VU busy */ + next_pps_state = PKE_REG_STAT_PPS_WAIT; + /* retry this instruction next clock */ + } + } + else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH)) + { + /* read VU status word */ + unsigned_4 vu_stat; + sim_read(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), + (void*) & vu_stat, + sizeof(unsigned_4)); + /* XXX: check RC */ + + /* check if VGW bit is clear, i.e., PATH1 is idle */ + /* simulator design implies PATH2 is always "idle" */ + if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 && + BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 && + 1 /* PATH2 always idle */) + { + /* VU idle */ + /* PATH1 idle */ + /* PATH2 idle */ + /* advance PC */ + pke_pc_advance(me, 1); + } + else + { + /* GPUIF busy */ + /* retry this instruction next clock */ + } + } + else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA)) + { + /* read VU status word */ + unsigned_4 vu_stat; + sim_read(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), + (void*) & vu_stat, + sizeof(unsigned_4)); + /* XXX: check RC */ + + /* check if VGW bit is clear, i.e., PATH1 is idle */ + /* simulator design implies PATH2 is always "idle" */ + /* XXX: simulator design implies PATH3 is always "idle" */ + if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 && + BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 && + 1 /* PATH2 always idle */ && + 1 /* PATH3 always idle */) + { + /* VU idle */ + /* PATH1 idle */ + /* PATH2 idle */ + /* PATH3 idle */ + /* advance PC */ + pke_pc_advance(me, 1); + } + else + { + /* GPUIF busy */ + /* retry this instruction next clock */ + } + } + else if(IS_PKE_CMD(cmd, PKEMSCAL)) + { + /* read VU status word */ + unsigned_4 vu_stat; + sim_read(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), + (void*) & vu_stat, + sizeof(unsigned_4)); + /* XXX: check RC */ + + /* check if VBS bit is clear, i.e., VU is idle */ + if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) + { + /* VU idle */ + unsigned_4 vu_pc; + + /* perform PKE1-unique processing for microprogram calls */ + if(me->pke_number == 1) + { + /* flip DBF */ + PKE_REG_MASK_SET(me, DBF, DF, + PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1); + PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF)); + /* compute new TOPS */ + PKE_REG_MASK_SET(me, TOPS, TOPS, + (PKE_REG_MASK_GET(me, BASE, BASE) + + (PKE_REG_MASK_GET(me, DBF, DF) * + PKE_REG_MASK_GET(me, OFST, OFFSET)))); + /* compute new ITOP and TOP */ + PKE_REG_MASK_SET(me, ITOP, ITOP, + PKE_REG_MASK_GET(me, ITOPS, ITOPS)); + PKE_REG_MASK_SET(me, TOP, TOP, + PKE_REG_MASK_GET(me, TOPS, TOPS)); + } + + /* compute new PC */ + vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */ + /* write new PC; callback function gets VU running */ + sim_write(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), + (void*) & vu_pc, + sizeof(unsigned_4)); + /* advance PC */ + pke_pc_advance(me, 1); + } + else + { + /* VU busy */ + next_pps_state = PKE_REG_STAT_PPS_WAIT; + /* retry this instruction next clock */ + } + } + else if(IS_PKE_CMD(cmd, PKEMSCNT)) + { + /* read VU status word */ + unsigned_4 vu_stat; + sim_read(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), + (void*) & vu_stat, + sizeof(unsigned_4)); + /* XXX: check RC */ + + /* check if VBS bit is clear, i.e., VU is idle */ + if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) + { + /* VU idle */ + unsigned_4 vu_pc; + + /* flip DBF etc. for PKE1 */ + if(me->pke_number == 1) + { + PKE_REG_MASK_SET(me, DBF, DF, + PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1); + PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF)); + PKE_REG_MASK_SET(me, TOPS, TOPS, + (PKE_REG_MASK_GET(me, BASE, BASE) + + (PKE_REG_MASK_GET(me, DBF, DF) * + PKE_REG_MASK_GET(me, OFST, OFFSET)))); + PKE_REG_MASK_SET(me, ITOP, ITOP, + PKE_REG_MASK_GET(me, ITOPS, ITOPS)); + PKE_REG_MASK_SET(me, TOP, TOP, + PKE_REG_MASK_GET(me, TOPS, TOPS)); + } + + /* read old PC */ + sim_read(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), + (void*) & vu_pc, + sizeof(unsigned_4)); + /* rewrite its PC; callback function gets VU running */ + sim_write(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), + (void*) & vu_pc, + sizeof(unsigned_4)); + /* advance PC */ + pke_pc_advance(me, 1); + } + else + { + /* VU busy */ + next_pps_state = PKE_REG_STAT_PPS_WAIT; + /* retry this instruction next clock */ + } + } + else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF)) + { + /* read VU status word */ + unsigned_4 vu_stat; + sim_read(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), + (void*) & vu_stat, + sizeof(unsigned_4)); + /* XXX: check RC */ + + /* check if VGW bit is clear, i.e., PATH1 is idle */ + /* simulator design implies PATH2 is always "idle" */ + if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 && + BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 && + 1 /* PATH2 always idle */) + { + /* VU idle */ + /* PATH1 idle */ + /* PATH2 idle */ + unsigned_4 vu_pc; + + /* flip DBF etc. for PKE1 */ + if(me->pke_number == 1) + { + PKE_REG_MASK_SET(me, DBF, DF, + PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1); + PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF)); + PKE_REG_MASK_SET(me, TOPS, TOPS, + (PKE_REG_MASK_GET(me, BASE, BASE) + + (PKE_REG_MASK_GET(me, DBF, DF) * + PKE_REG_MASK_GET(me, OFST, OFFSET)))); + PKE_REG_MASK_SET(me, ITOP, ITOP, + PKE_REG_MASK_GET(me, ITOPS, ITOPS)); + PKE_REG_MASK_SET(me, TOP, TOP, + PKE_REG_MASK_GET(me, TOPS, TOPS)); + } + + /* compute new PC */ + vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */ + /* write new PC; callback function gets VU running */ + sim_write(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START), + (void*) & vu_pc, + sizeof(unsigned_4)); + /* advance PC */ + pke_pc_advance(me, 1); + } + else + { + /* VU busy */ + next_pps_state = PKE_REG_STAT_PPS_WAIT; + /* retry this instruction next clock */ + } + } + else if(IS_PKE_CMD(cmd, STMASK)) + { + /* check that FIFO has one more word for STMASK operand */ + unsigned_4* mask; + + mask = pke_pc_operand(me, 1); + if(mask != NULL) + { + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + /* fill the register */ + PKE_REG_MASK_SET(me, MASK, MASK, *mask); + /* advance PC */ + pke_pc_advance(me, 2); + } + else + { + /* need to wait for another word */ + next_pps_state = PKE_REG_STAT_PPS_WAIT; + /* retry this instruction next clock */ + } + } + else if(IS_PKE_CMD(cmd, STROW)) + { + /* check that FIFO has four more words for STROW operand */ + unsigned_4* last_op; + + last_op = pke_pc_operand(me, 4); + if(last_op != NULL) + { + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* copy ROW registers: must all exist if 4th operand exists */ + me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1); + me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2); + me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3); + me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4); + + /* advance PC */ + pke_pc_advance(me, 5); + } + else + { + /* need to wait for another word */ + next_pps_state = PKE_REG_STAT_PPS_WAIT; + /* retry this instruction next clock */ + } + } + else if(IS_PKE_CMD(cmd, STCOL)) + { + /* check that FIFO has four more words for STCOL operand */ + unsigned_4* last_op; + + last_op = pke_pc_operand(me, 4); + if(last_op != NULL) + { + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* copy COL registers: must all exist if 4th operand exists */ + me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1); + me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2); + me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3); + me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4); + + /* advance PC */ + pke_pc_advance(me, 5); + } + else + { + /* need to wait for another word */ + next_pps_state = PKE_REG_STAT_PPS_WAIT; + /* retry this instruction next clock */ + } + } + else if(IS_PKE_CMD(cmd, MPG)) + { + unsigned_4* last_mpg_word; + + /* map zero to max+1 */ + if(num==0) num=0x100; + + /* check that FIFO has a few more words for MPG operand */ + last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */ + if(last_mpg_word != NULL) + { + /* perform implied FLUSHE */ + /* read VU status word */ + unsigned_4 vu_stat; + sim_read(NULL, + (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT), + (void*) & vu_stat, + sizeof(unsigned_4)); + /* XXX: check RC */ + + /* check if VBS bit is clear, i.e., VU is idle */ + if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0) + { + /* VU idle */ + int i; + + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* transfer VU instructions, one word per iteration */ + for(i=0; ipke_number == 0) ? + VU0_MEM0_WINDOW_START : VU0_MEM0_WINDOW_START; + vu_addr = vu_addr_base + (imm*2) + i; + + /* VU*_MEM0_TRACK : source-addr tracking table */ + vutrack_addr_base = (me->pke_number == 0) ? + VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START; + vutrack_addr = vu_addr_base + (imm*2) + i; + + /* write data into VU memory */ + pke_track_write(me, operand, sizeof(unsigned_4), + vu_addr, fq->source_address); + + /* write srcaddr into VU srcaddr tracking table */ + sim_write(NULL, + (SIM_ADDR) vutrack_addr, + (void*) & fq->source_address, + sizeof(unsigned_4)); + /* XXX: check RC */ + } /* VU xfer loop */ + + /* advance PC */ + pke_pc_advance(me, 1 + num*2); + } + else + { + /* VU busy */ + next_pps_state = PKE_REG_STAT_PPS_WAIT; + /* retry this instruction next clock */ + } + } /* if FIFO full enough */ + else + { + /* need to wait for another word */ + next_pps_state = PKE_REG_STAT_PPS_WAIT; + /* retry this instruction next clock */ + } + } + else if(IS_PKE_CMD(cmd, DIRECT) || IS_PKE_CMD(cmd, DIRECTHL)) /* treat identically */ + { + /* check that FIFO has a few more words for DIRECT operand */ + unsigned_4* last_direct_word; + + /* map zero to max+1 */ + if(imm==0) imm=0x10000; + + last_direct_word = pke_pc_operand(me, imm*4); /* num: number of 128-bit words */ + if(last_direct_word != NULL) + { + /* VU idle */ + int i; + quadword fifo_data; + + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* transfer GPUIF quadwords, one word per iteration */ + for(i=0; isource_address); + /* XXX: check RC */ + } /* write collected quadword */ + + } /* GPUIF xfer loop */ + + /* advance PC */ + pke_pc_advance(me, 1 + imm*4); + } /* if FIFO full enough */ + else + { + /* need to wait for another word */ + next_pps_state = PKE_REG_STAT_PPS_WAIT; + /* retry this instruction next clock */ + } + } + else if(IS_PKE_CMD(cmd, UNPACK)) /* warning: monster complexity */ + { + short vn = BIT_MASK_GET(cmd, 2, 3); + short vl = BIT_MASK_GET(cmd, 0, 1); + short vnvl = BIT_MASK_GET(cmd, 0, 3); + int m = BIT_MASK_GET(cmd, 4, 4); + short cl = PKE_REG_MASK_GET(me, CYCLE, CL); + short wl = PKE_REG_MASK_GET(me, CYCLE, WL); + int n, num_operands; + unsigned_4* last_operand_word; + + /* map zero to max+1 */ + if(num==0) num=0x100; + + /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */ + if(wl <= cl) + n = num; + else + n = cl * (num/wl) + PKE_LIMIT(num % wl, cl); + num_operands = (((sizeof(unsigned_4) >> vl) * (vn+1) * n)/sizeof(unsigned_4)); + + /* confirm that FIFO has enough words in it */ + last_operand_word = pke_pc_operand(me, num_operands); + if(last_operand_word != NULL) + { + address_word vu_addr_base; + int operand_num, vector_num; + + /* "transferring" operand */ + PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER); + + /* XXX: don't check whether VU is idle?? */ + + if(me->pke_number == 0) + vu_addr_base = VU0_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9); + else + { + vu_addr_base = VU1_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9); + if(BIT_MASK_GET(imm, 15, 15)) /* fetch R flag from imm word */ + vu_addr_base += PKE_REG_MASK_GET(me, TOPS, TOPS); + } + + /* XXX: vu_addr overflow check */ + + /* transfer given number of vectors */ + operand_num = 1; /* word index into instruction stream: 1..num_operands */ + vector_num = 0; /* vector number being processed: 0..num-1 */ + while(operand_num <= num_operands) + { + quadword vu_old_data; + quadword vu_new_data; + quadword unpacked_data; + address_word vu_addr; + struct fifo_quadword* fq; + int i; + + /* XXX: set NUM */ + + /* compute VU destination address, as bytes in R5900 memory */ + if(cl >= wl) + { + /* map zero to max+1 */ + if(wl == 0) wl = 0x0100; + vu_addr = vu_addr_base + 16*(cl*(vector_num/wl) + (vector_num%wl)); + } + else + vu_addr = vu_addr_base + 16*vector_num; + + /* read old VU data word at address */ + sim_read(NULL, (SIM_ADDR) vu_addr, (void*) & vu_old_data, sizeof(vu_old_data)); + + /* Let sourceaddr track the first operand */ + fq = pke_pc_fifo(me, operand_num); + + /* For cyclic unpack, next operand quadword may come from instruction stream + or be zero. */ + if((cl < wl) && ((vector_num % wl) >= cl)) /* wl != 0, set above */ + { + /* clear operand - used only in a "indeterminate" state */ + for(i = 0; i < 4; i++) + unpacked_data[i] = 0; + } + else + { + /* compute unpacked words from instruction stream */ + switch(vnvl) + { + case PKE_UNPACK_S_32: + case PKE_UNPACK_V2_32: + case PKE_UNPACK_V3_32: + case PKE_UNPACK_V4_32: + /* copy (vn+1) 32-bit values */ + for(i = 0; i < vn+1; i++) + { + unsigned_4* operand = pke_pc_operand(me, operand_num); + unpacked_data[i] = *operand; + operand_num ++; + } + break; + + case PKE_UNPACK_S_16: + case PKE_UNPACK_V2_16: + case PKE_UNPACK_V3_16: + case PKE_UNPACK_V4_16: + /* copy (vn+1) 16-bit values, packed two-per-word */ + for(i=0; i vn) + masked_value = & zero; /* XXX: what to put here? */ + else + masked_value = & unpacked_data[i]; + break; + + case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */ + masked_value = & me->regs[PKE_REG_R0 + i][0]; + break; + + case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */ + masked_value = & me->regs[PKE_REG_C0 + PKE_LIMIT(vector_num,3)][0]; + break; + + case PKE_MASKREG_NOTHING: + /* "write inhibit" by re-copying old data */ + masked_value = & vu_old_data[i]; + break; + + default: + ASSERT(0); + /* no other cases possible */ + } + + /* copy masked value for column */ + memcpy(& vu_new_data[i], masked_value, sizeof(unsigned_4)); + } /* loop over columns */ + } + else + { + /* no mask - just copy over entire unpacked quadword */ + memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data)); + } + + /* process STMOD register for accumulation operations */ + switch(PKE_REG_MASK_GET(me, MODE, MDE)) + { + case PKE_MODE_ADDROW: /* add row registers to output data */ + for(i=0; i<4; i++) + /* exploit R0..R3 contiguity */ + vu_new_data[i] += me->regs[PKE_REG_R0 + i][0]; + break; + + case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */ + for(i=0; i<4; i++) + { + /* exploit R0..R3 contiguity */ + vu_new_data[i] += me->regs[PKE_REG_R0 + i][0]; + me->regs[PKE_REG_R0 + i][0] = vu_new_data[i]; + } + break; + + case PKE_MODE_INPUT: /* pass data through */ + default: + ; + } + + /* write replacement word */ + pke_track_write(me, vu_new_data, sizeof(vu_new_data), + (SIM_ADDR) vu_addr, fq->source_address); + + /* next vector please */ + vector_num ++; + } /* vector transfer loop */ + } /* PKE FIFO full enough */ + else + { + /* need to wait for another word */ + next_pps_state = PKE_REG_STAT_PPS_WAIT; + /* retry this instruction next clock */ + } + } + /* ... */ + else + { + /* set ER1 flag in STAT register */ + PKE_REG_MASK_SET(me, STAT, ER1, 1); + /* advance over faulty word */ + pke_pc_advance(me, 1); + } + + /* PKE is now idle or waiting */ + PKE_REG_MASK_SET(me, STAT, PPS, next_pps_state); +} + + + + + + +/* advance the PC by given number of words; update STAT/FQC field */ + +void +pke_pc_advance(struct pke_device* me, int num_words) +{ + ASSERT(num_words > 0); + + me->qw_pc += num_words; + /* handle overflow */ + while(me->qw_pc >= 4) + { + me->qw_pc -= 4; + me->fifo_pc ++; + } + + /* clear FQC if FIFO is now empty */ + if(me->fifo_num_elements == me->fifo_pc) + { + PKE_REG_MASK_SET(me, STAT, FQC, 0); + } } + +/* Return pointer to given operand# in FIFO. `word_num' starts at 1. + If FIFO is not full enough, return 0. */ + +unsigned_4* +pke_pc_operand(struct pke_device* me, int word_num) +{ + int new_qw_pc, new_fifo_pc; + unsigned_4* operand; + + ASSERT(word_num > 0); + + new_fifo_pc = me->fifo_pc; + new_qw_pc += me->qw_pc + word_num; + + /* handle overflow */ + while(new_qw_pc >= 4) + { + new_qw_pc -= 4; + new_fifo_pc ++; + } + + /* not enough elements */ + if(me->fifo_num_elements == me->fifo_pc) + operand = NULL; + else + operand = & me->fifo[new_fifo_pc].data[new_qw_pc]; + + return operand; +} + + + +/* Return pointer to FIFO quadword containing given operand# in FIFO. + `word_num' starts at 1. If FIFO is not full enough, return 0. */ + +struct fifo_quadword* +pke_pc_fifo(struct pke_device* me, int word_num) +{ + int new_qw_pc, new_fifo_pc; + struct fifo_quadword* operand; + + ASSERT(word_num > 0); + + new_fifo_pc = me->fifo_pc; + new_qw_pc += me->qw_pc + word_num; + + /* handle overflow */ + while(new_qw_pc >= 4) + { + new_qw_pc -= 4; + new_fifo_pc ++; + } + + /* not enough elements */ + if(me->fifo_num_elements == me->fifo_pc) + operand = NULL; + else + operand = & me->fifo[new_fifo_pc]; + + return operand; +} + + + +/* Write a bunch of bytes into simulator memory. Store the given source address into the + PKE sourceaddr tracking word. */ +int +pke_track_write(struct pke_device* me, const void* src, int len, + address_word dest, unsigned_4 sourceaddr) +{ + int rc; + unsigned_4 no_sourceaddr = 0; + + /* write srcaddr into PKE srcaddr tracking */ + sim_write(NULL, + (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR, + (void*) & sourceaddr, + sizeof(unsigned_4)); + + /* write bytes into simulator */ + rc = sim_write(NULL, + (SIM_ADDR) dest, + (void*) src, + len); + + /* clear srcaddr from PKE srcaddr tracking */ + sim_write(NULL, + (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR, + (void*) & no_sourceaddr, + sizeof(unsigned_4)); + + return rc; +} diff --git a/sim/mips/sky-pke.h b/sim/mips/sky-pke.h index d15b821218..e4d0cca01d 100644 --- a/sim/mips/sky-pke.h +++ b/sim/mips/sky-pke.h @@ -7,6 +7,11 @@ #include "sky-device.h" +/* Debugguing PKE? */ + +#define PKE_DEBUG + + /* External functions */ void pke0_attach(SIM_DESC sd); @@ -17,7 +22,7 @@ void pke1_issue(); /* Quadword data type */ -typedef unsigned int quadword[4]; +typedef unsigned_4 quadword[4]; /* truncate address to quadword */ #define ADDR_TRUNC_QW(addr) ((addr) & ~0x0f) @@ -29,8 +34,8 @@ typedef unsigned int quadword[4]; #define PKE0_REGISTER_WINDOW_START 0x10000800 #define PKE1_REGISTER_WINDOW_START 0x10000A00 -#define PKE0_FIFO_START 0x10008000 -#define PKE1_FIFO_START 0x10008010 +#define PKE0_FIFO_ADDR 0x10008000 +#define PKE1_FIFO_ADDR 0x10008010 /* Quadword indices of PKE registers. Actual registers sit at bottom @@ -51,11 +56,11 @@ typedef unsigned int quadword[4]; #define PKE_REG_ITOP 0x0d #define PKE_REG_TOP 0x0e /* pke1 only */ #define PKE_REG_DBF 0x0f /* pke1 only */ -#define PKE_REG_R0 0x10 +#define PKE_REG_R0 0x10 /* R0 .. R3 must be contiguous */ #define PKE_REG_R1 0x11 #define PKE_REG_R2 0x12 #define PKE_REG_R3 0x13 -#define PKE_REG_C0 0x14 +#define PKE_REG_C0 0x14 /* C0 .. C3 must be contiguous */ #define PKE_REG_C1 0x15 #define PKE_REG_C2 0x16 #define PKE_REG_C3 0x17 @@ -64,11 +69,273 @@ typedef unsigned int quadword[4]; #define PKE_REGISTER_WINDOW_SIZE (sizeof(quadword) * PKE_NUM_REGS) + /* virtual addresses for source-addr tracking */ #define PKE0_SRCADDR 0x20000020 #define PKE1_SRCADDR 0x20000024 +/* PKE commands */ + +#define PKE_CMD_PKENOP_MASK 0x7F +#define PKE_CMD_PKENOP_BITS 0x00 +#define PKE_CMD_STCYCL_MASK 0x7F +#define PKE_CMD_STCYCL_BITS 0x01 +#define PKE_CMD_OFFSET_MASK 0x7F +#define PKE_CMD_OFFSET_BITS 0x02 +#define PKE_CMD_BASE_MASK 0x7F +#define PKE_CMD_BASE_BITS 0x03 +#define PKE_CMD_ITOP_MASK 0x7F +#define PKE_CMD_ITOP_BITS 0x04 +#define PKE_CMD_STMOD_MASK 0x7F +#define PKE_CMD_STMOD_BITS 0x05 +#define PKE_CMD_MSKPATH3_MASK 0x7F +#define PKE_CMD_MSKPATH3_BITS 0x06 +#define PKE_CMD_PKEMARK_MASK 0x7F +#define PKE_CMD_PKEMARK_BITS 0x07 +#define PKE_CMD_FLUSHE_MASK 0x7F +#define PKE_CMD_FLUSHE_BITS 0x10 +#define PKE_CMD_FLUSH_MASK 0x7F +#define PKE_CMD_FLUSH_BITS 0x11 +#define PKE_CMD_FLUSHA_MASK 0x7F +#define PKE_CMD_FLUSHA_BITS 0x13 +#define PKE_CMD_PKEMSCAL_MASK 0x7F /* CAL == "call" */ +#define PKE_CMD_PKEMSCAL_BITS 0x14 +#define PKE_CMD_PKEMSCNT_MASK 0x7F /* CNT == "continue" */ +#define PKE_CMD_PKEMSCNT_BITS 0x17 +#define PKE_CMD_PKEMSCALF_MASK 0x7F /* CALF == "call after flush" */ +#define PKE_CMD_PKEMSCALF_BITS 0x15 +#define PKE_CMD_STMASK_MASK 0x7F +#define PKE_CMD_STMASK_BITS 0x20 +#define PKE_CMD_STROW_MASK 0x7F +#define PKE_CMD_STROW_BITS 0x30 +#define PKE_CMD_STCOL_MASK 0x7F +#define PKE_CMD_STCOL_BITS 0x31 +#define PKE_CMD_MPG_MASK 0x7F +#define PKE_CMD_MPG_BITS 0x4A +#define PKE_CMD_DIRECT_MASK 0x7F +#define PKE_CMD_DIRECT_BITS 0x50 +#define PKE_CMD_DIRECTHL_MASK 0x7F +#define PKE_CMD_DIRECTHL_BITS 0x51 +#define PKE_CMD_UNPACK_MASK 0x60 +#define PKE_CMD_UNPACK_BITS 0x60 + +/* test given word for particular PKE command bit pattern */ +#define IS_PKE_CMD(word,cmd) (((word) & PKE_CMD_##cmd##_MASK) == PKE_CMD_##cmd##_BITS) + + +/* register bitmasks: bit numbers for end and beginning of fields */ + +/* PKE opcode */ +#define PKE_OPCODE_I_E 31 +#define PKE_OPCODE_I_B 31 +#define PKE_OPCODE_CMD_E 30 +#define PKE_OPCODE_CMD_B 24 +#define PKE_OPCODE_NUM_E 23 +#define PKE_OPCODE_NUM_B 16 +#define PKE_OPCODE_IMM_E 15 +#define PKE_OPCODE_IMM_B 0 + +/* STAT register */ +#define PKE_REG_STAT_FQC_E 28 +#define PKE_REG_STAT_FQC_B 24 +#define PKE_REG_STAT_FDR_E 23 +#define PKE_REG_STAT_FDR_B 23 +#define PKE_REG_STAT_ER1_E 13 +#define PKE_REG_STAT_ER1_B 13 +#define PKE_REG_STAT_ER0_E 12 +#define PKE_REG_STAT_ER0_B 12 +#define PKE_REG_STAT_INT_E 11 +#define PKE_REG_STAT_INT_B 11 +#define PKE_REG_STAT_PIS_E 10 +#define PKE_REG_STAT_PIS_B 10 +#define PKE_REG_STAT_PFS_E 9 +#define PKE_REG_STAT_PFS_B 9 +#define PKE_REG_STAT_PSS_E 8 +#define PKE_REG_STAT_PSS_B 8 +#define PKE_REG_STAT_DBF_E 7 +#define PKE_REG_STAT_DBF_B 7 +#define PKE_REG_STAT_MRK_E 6 +#define PKE_REG_STAT_MRK_B 6 +#define PKE_REG_STAT_PGW_E 3 +#define PKE_REG_STAT_PGW_B 3 +#define PKE_REG_STAT_PEW_E 2 +#define PKE_REG_STAT_PEW_B 2 +#define PKE_REG_STAT_PPS_E 1 +#define PKE_REG_STAT_PPS_B 0 + +#define PKE_REG_STAT_PPS_IDLE 0x00 +#define PKE_REG_STAT_PPS_WAIT 0x01 +#define PKE_REG_STAT_PPS_DECODE 0x02 +#define PKE_REG_STAT_PPS_XFER 0x03 + +/* DBF register */ +#define PKE_REG_DBF_DF_E 0 +#define PKE_REG_DBF_DF_B 0 + +/* OFST register */ +#define PKE_REG_OFST_OFFSET_E 9 +#define PKE_REG_OFST_OFFSET_B 0 + +/* OFST register */ +#define PKE_REG_TOPS_TOPS_E 9 +#define PKE_REG_TOPS_TOPS_B 0 + +/* BASE register */ +#define PKE_REG_BASE_BASE_E 9 +#define PKE_REG_BASE_BASE_B 0 + +/* ITOPS register */ +#define PKE_REG_ITOPS_ITOPS_E 9 +#define PKE_REG_ITOPS_ITOPS_B 0 + +/* MODE register */ +#define PKE_REG_MODE_MDE_E 1 +#define PKE_REG_MODE_MDE_B 0 + +/* MARK register */ +#define PKE_REG_MARK_MARK_E 15 +#define PKE_REG_MARK_MARK_B 0 + +/* ITOP register */ +#define PKE_REG_ITOP_ITOP_E 9 +#define PKE_REG_ITOP_ITOP_B 0 + +/* TOP register */ +#define PKE_REG_TOP_TOP_E 9 +#define PKE_REG_TOP_TOP_B 0 + +/* MASK register */ +#define PKE_REG_MASK_MASK_E 31 +#define PKE_REG_MASK_MASK_B 0 + +/* CYCLE register */ +#define PKE_REG_CYCLE_WL_E 15 +#define PKE_REG_CYCLE_WL_B 8 +#define PKE_REG_CYCLE_CL_E 7 +#define PKE_REG_CYCLE_CL_B 0 + +/* ERR register */ +#define PKE_REG_ERR_ME1_E 2 +#define PKE_REG_ERR_ME1_B 2 +#define PKE_REG_ERR_ME0_E 1 +#define PKE_REG_ERR_ME0_B 1 +#define PKE_REG_ERR_MII_E 0 +#define PKE_REG_ERR_MII_B 0 + + +/* source-addr for words written to VU/GPUIF ports */ +#define PKE0_SRCADDR 0x20000020 /* from 1998-01-22 e-mail plans */ +#define PKE1_SRCADDR 0x20000024 /* from 1998-01-22 e-mail plans */ + + +/* UNPACK opcodes */ +#define PKE_UNPACK(vn,vl) ((vn) << 2 | (vl)) +#define PKE_UNPACK_S_32 PKE_UNPACK(0, 0) +#define PKE_UNPACK_S_16 PKE_UNPACK(0, 1) +#define PKE_UNPACK_S_8 PKE_UNPACK(0, 2) +#define PKE_UNPACK_V2_32 PKE_UNPACK(1, 0) +#define PKE_UNPACK_V2_16 PKE_UNPACK(1, 1) +#define PKE_UNPACK_V2_8 PKE_UNPACK(1, 2) +#define PKE_UNPACK_V3_32 PKE_UNPACK(2, 0) +#define PKE_UNPACK_V3_16 PKE_UNPACK(2, 1) +#define PKE_UNPACK_V3_8 PKE_UNPACK(2, 2) +#define PKE_UNPACK_V4_32 PKE_UNPACK(3, 0) +#define PKE_UNPACK_V4_16 PKE_UNPACK(3, 1) +#define PKE_UNPACK_V4_8 PKE_UNPACK(3, 2) +#define PKE_UNPACK_V4_5 PKE_UNPACK(3, 3) + + +/* MASK register sub-field definitions */ +#define PKE_MASKREG_INPUT 0 +#define PKE_MASKREG_ROW 1 +#define PKE_MASKREG_COLUMN 2 +#define PKE_MASKREG_NOTHING 3 + + +/* STMOD register field definitions */ +#define PKE_MODE_INPUT 0 +#define PKE_MODE_ADDROW 1 +#define PKE_MODE_ACCROW 2 + + +/* extract a MASK register sub-field for row [0..3] and column [0..3] */ +/* MASK register is laid out of 2-bit values in this r-c order */ +/* m33 m32 m31 m30 m23 m22 m21 m20 m13 m12 m11 m10 m03 m02 m01 m00 */ +#define PKE_MASKREG_GET(me,row,col) \ +((((me)->regs[PKE_REG_MASK][0]) >> (8*(row) + 2*(col))) & 0x03) + + +/* and now a few definitions that rightfully belong elsewhere */ +#ifdef PKE_DEBUG + +/* GPUIF addresses */ +#define GPUIF_PATH3_FIFO_ADDR 0x10008020 /* data from CORE */ +#define GPUIF_PATH1_FIFO_ADDR 0x10008030 /* data from VU1 */ +#define GPUIF_PATH2_FIFO_ADDR 0x10008040 /* data from PKE1 */ + +/* VU STAT register */ +#define VU_REG_STAT_VGW_E 4 +#define VU_REG_STAT_VGW_B 4 +#define VU_REG_STAT_VBS_E 0 +#define VU_REG_STAT_VBS_B 0 + +/* VU PC pseudo-registers */ /* omitted from 1998-01-22 e-mail plans */ +#define VU0_PC_START 0x20025000 +#define VU1_PC_START 0x20026000 + +/* VU source-addr tracking tables */ /* changed from 1998-01-22 e-mail plans */ +#define VU0_MEM0_SRCADDR_START 0x21000000 +#define VU0_MEM1_SRCADDR_START 0x21004000 +#define VU1_MEM0_SRCADDR_START 0x21008000 +#define VU1_MEM1_SRCADDR_START 0x2100C000 + +#endif /* PKE_DEBUG */ + + +/* operations */ +/* unsigned 32-bit mask of given width */ +#define BIT_MASK(width) ((((unsigned_4)1) << (width+1)) - 1) +/* e.g.: BIT_MASK(5) = 00011111 */ + +/* mask between given given bits numbers (MSB) */ +#define BIT_MASK_BTW(begin,end) (BIT_MASK(end) & ~BIT_MASK(begin)) +/* e.g.: BIT_MASK_BTW(4,11) = 0000111111110000 */ + +/* set bitfield value */ +#define BIT_MASK_SET(lvalue,begin,end,value) \ +do { \ + lvalue &= ~BIT_MASK_BTW(begin,end); \ + lvalue |= (((value) << (begin)) & BIT_MASK_BTW(begin,end)); \ +} while(0) + +/* get bitfield value */ +#define BIT_MASK_GET(rvalue,begin,end) \ + (((rvalue) & BIT_MASK_BTW(begin,end)) >> (begin)) +/* e.g., BIT_MASK_GET(0000111100001111, 2, 8) = 0000000100001100 */ + +/* get bitfield value, sign-extended to given bit number */ +#define BIT_MASK_GET_SX(rvalue,begin,end,sx) \ + (BIT_MASK_GET(rvalue,begin,end) | ((BIT_MASK_GET(rvalue,begin,end) & BIT_MASK_BTW(end,end)) ? BIT_MASK_BTW(end,sx) : 0)) +/* e.g., BIT_MASK_GET_SX(0000111100001111, 2, 8, 15) = 1111111100001100 */ + + +/* These ugly macro hacks allow succinct bitfield accesses */ +/* set a bitfield in a register by "name" */ +#define PKE_REG_MASK_SET(me,reg,flag,value) \ + BIT_MASK_SET(((me)->regs[PKE_REG_##reg][0]), \ + PKE_REG_##reg##_##flag##_B, PKE_REG_##reg##_##flag##_E, \ + (value)) + +/* get a bitfield from a register by "name" */ +#define PKE_REG_MASK_GET(me,reg,flag) \ + BIT_MASK_GET(((me)->regs[PKE_REG_##reg][0]), \ + PKE_REG_##reg##_##flag##_B, PKE_REG_##reg##_##flag##_E) + + +#define PKE_LIMIT(value,max) ((value) > (max) ? (max) : (value)) + + /* One row in the FIFO */ struct fifo_quadword { @@ -76,6 +343,8 @@ struct fifo_quadword quadword data; /* source main memory address (or 0: unknown) */ address_word source_address; + /* DMA tag present in lower 64 bits */ + unsigned_4 dma_tag_present; }; @@ -89,9 +358,6 @@ struct pke_device int pke_number; int flags; - address_word register_memory_addr; - address_word fifo_memory_addr; - /* quadword registers */ quadword regs[PKE_NUM_REGS]; @@ -100,10 +366,11 @@ struct pke_device int fifo_num_elements; /* no. of quadwords occupied in FIFO */ int fifo_buffer_size; /* no. of quadwords of space in FIFO */ FILE* fifo_trace_file; /* or 0 for no trace */ + /* XXX: assumes FIFOs grow indefinately */ - /* index into FIFO of current instruction */ - int program_counter; - + /* PC */ + int fifo_pc; /* 0 .. (fifo_num_elements-1): quadword index of next instruction */ + int qw_pc; /* 0 .. 3: word index of next instruction */ }; @@ -113,5 +380,4 @@ struct pke_device /* none at present */ - #endif /* H_PKE_H */