From 1fec9927c1d4aaa673e1195138dec23afafa86b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Fri, 1 Jul 2016 09:06:55 +0200 Subject: [PATCH] Splits disassembler from expdump, adds disassembler options. Adds better assembler interface, adds option for outputting the generated listings. --- libvm/Makefile | 2 +- libvm/as/Makefile | 2 +- libvm/as/as.c | 100 +++++++++++++++++++++++++---- libvm/disassembler.c | 145 +++++++++++++++++++++++++++++++++++++++++++ libvm/disassembler.h | 22 +++++++ libvm/expdump.c | 137 +--------------------------------------- 6 files changed, 258 insertions(+), 150 deletions(-) create mode 100644 libvm/disassembler.c create mode 100644 libvm/disassembler.h diff --git a/libvm/Makefile b/libvm/Makefile index af2c758..c5d4071 100644 --- a/libvm/Makefile +++ b/libvm/Makefile @@ -6,7 +6,7 @@ all: explink expdump emulator as explink: explink.c $(CC) -g -o bin/$@ $^ -expdump: expdump.c mnemonics.c +expdump: expdump.c mnemonics.c disassembler.c $(CC) -g -o bin/$@ $^ emulator: emulator.c vm.c diff --git a/libvm/as/Makefile b/libvm/as/Makefile index 781d0a8..19cee5f 100644 --- a/libvm/as/Makefile +++ b/libvm/as/Makefile @@ -5,7 +5,7 @@ all: as -as: as.c tokens.c ../mnemonics.c +as: as.c tokens.c ../mnemonics.c ../disassembler.c gcc -g -o ../bin/$@ $^ tokens.c: tokens.y diff --git a/libvm/as/as.c b/libvm/as/as.c index 0bfc9c7..d4c8e63 100644 --- a/libvm/as/as.c +++ b/libvm/as/as.c @@ -11,6 +11,7 @@ #include "tokens.h" #include "../vm.h" #include "../mnemonics.h" +#include "../disassembler.h" struct llist { @@ -30,11 +31,14 @@ void list_insert(llist_t **list, const char *name, uint32_t value) *list = item; } +int listFound = 0; uint32_t list_find(llist_t **list, const char *name) { + listFound = 0; for(llist_t *it = *list; it != NULL; it = it->next) { if(strcmp(it->name, name) == 0) { + listFound = 1; return it->value; } } @@ -53,6 +57,8 @@ void list_free(llist_t **list) FILE *output; +FILE *listing = NULL; + int listSymbols = 0; int entryPoint = 0; @@ -105,6 +111,11 @@ void assemble() list_insert(&labels, yytext, entryPoint); + if(listing) + { + fprintf(listing, "%s:\n", yytext); + } + tok = yylex(); } if(tok != TOK_NEWLINE) @@ -127,7 +138,24 @@ void assemble() exit(1); } else if(strcmp(mnemonics[i].name, yytext) == 0) { - current = mnemonics[i].instr; + // Copy instruction here, but account for + // already applied modificatiors by + // only copying when values still zero. + + #define COPY_IF(prop) \ + if(!current.prop) \ + current.prop = mnemonics[i].instr.prop; + COPY_IF(execZ ); + COPY_IF(execN); + COPY_IF(input0); + COPY_IF(input1); + COPY_IF(command); + COPY_IF(cmdinfo); + COPY_IF(flags); + COPY_IF(output); + COPY_IF(argument); + #undef COPY_IF + // current = mnemonics[i].instr; break; } } @@ -136,6 +164,7 @@ void assemble() apply_modifiers(¤t); + int reqPatch = 0; if(tok != TOK_NEWLINE) { switch(tok) @@ -151,9 +180,16 @@ void assemble() break; case TOK_REFERENCE: { - // insert patch here for deferred argument modification // (yytext + 1) removes the leading @ - list_insert(&patches, yytext + 1, entryPoint); + // check if we already had a label with this name + uint32_t target = list_find(&labels, yytext + 1); + if(listFound) + current.argument = target; + else { + // insert patch here for deferred argument modification + list_insert(&patches, yytext + 1, entryPoint); + reqPatch = 1; + } break; } default: @@ -172,6 +208,13 @@ void assemble() // write command: fwrite(¤t, sizeof(instruction_t), 1, output); + if(listing) + { + if(reqPatch) fprintf(listing, "\t; Requires patch:\n"); + fprintf(listing, "\t", yytext); + disassemble(¤t, 1, entryPoint, listing); + } + // Increase command index by one entryPoint += 1; } @@ -180,10 +223,14 @@ void assemble() int main(int argc, char **argv) { - output = stdout; + // configure disassembler: + disasmOptions.outputAddresses = false; + output = NULL; + listing = NULL; + int c; - while ((c = getopt(argc, argv, "o:e:s")) != -1) + while ((c = getopt(argc, argv, "o:e:sl:L")) != -1) { switch (c) { @@ -193,17 +240,32 @@ int main(int argc, char **argv) case 's': listSymbols = 1; break; + case 'L': + listing = stdout; + break; + case 'l': + { + if(listing != NULL) { + fprintf(stderr, "-L or -l can only be used mutual exclusive and only once.\n"); + exit(1); + } + listing = fopen(optarg, "w"); + if(listing == NULL) { + fprintf(stderr, "Could not open %s.\n"); + exit(1); + } + } case 'o': - { - FILE *f = fopen(optarg, "wb"); - if(f == NULL) { - fprintf(stderr, "%f not found.\n", optarg); - abort(); + { + if(output != NULL) { + fprintf(stderr, "-o can be used only once.\n"); + exit(1); } - if(output != stdout) { - fclose(output); + output = fopen(optarg, "wb"); + if(output == NULL) { + fprintf(stderr, "%s not found.\n", optarg); + exit(1); } - output = f; break; } case '?': @@ -218,6 +280,13 @@ int main(int argc, char **argv) abort(); } } + + if(output == NULL) + { + fprintf(stderr, "An output file must be given by -o fileName\n"); + exit(1); + } + for (int index = optind; index < argc; index++) { FILE *f = fopen(argv[index], "r"); @@ -245,6 +314,11 @@ int main(int argc, char **argv) for(llist_t *it = patches; it != NULL; it = it->next) { uint32_t target = list_find(&labels, it->name); + if(listFound == 0) + { + fprintf(stderr, "Could not find label %s.\n", it->name); + exit(1); + } // Seek to the target address fseek(output, sizeof(instruction_t) * it->value + 4, SEEK_SET); diff --git a/libvm/disassembler.c b/libvm/disassembler.c new file mode 100644 index 0000000..b2fe33b --- /dev/null +++ b/libvm/disassembler.c @@ -0,0 +1,145 @@ +#include "disassembler.h" +#include "mnemonics.h" +#include +#include + +struct disassembler_options disasmOptions = +{ + false, + true +}; + +static const char *commandStrings[] = +{ + // VM_CMD_COPY 0 + "copy", + // VM_CMD_STORE 1 + "store", + // VM_CMD_LOAD 2 + "load", + // VM_CMD_GET 3 + "get", + // VM_CMD_SET 4 + "set", + // VM_CMD_BPGET 5 + "bpget", + // VM_CMD_BPSET 6 + "bpset", + // VM_CMD_CPGET 7 + "cpget", + // VM_CMD_MATH 8 + "math", + // VM_CMD_SPGET 9 + "spget", + // VM_CMD_SPSET 10 + "spset", + // VM_CMD_SYSCALL 11 + "syscall", + // VM_CMD_HWIO 12 + "hwio", +}; + +void disassemble(instruction_t *list, uint32_t count, uint32_t base, FILE *f) +{ + bool v = disasmOptions.verbose; + for (int i = 0; i < count; i++) + { + instruction_t instr = list[i]; + if(disasmOptions.outputAddresses) + fprintf(f, "%8X: ", base + i); + + const mnemonic_t *knownInstruction = NULL; + + for (int j = 0; mnemonics[j].name != NULL; j++) + { + if (memcmp(&instr, &mnemonics[j].instr, sizeof(instruction_t) - sizeof(uint32_t)) == 0) { + knownInstruction = &mnemonics[j]; + break; + } + } + + if (knownInstruction != NULL) + { + fprintf(f, "%s", knownInstruction->name); + if (instr.argument != 0 || instr.input0 == VM_INPUT_ARG) + { + if(instr.output == VM_OUTPUT_JUMP || instr.output == VM_OUTPUT_JUMPR) + fprintf(f, " 0x%X", instr.argument); + else + fprintf(f, " %d", instr.argument); + } + fprintf(f, "\n"); + continue; + } + + switch (instr.execN) + { + case VM_EXEC_0: fprintf(f, "[ex(n)=0] "); break; + case VM_EXEC_1: fprintf(f, "[ex(n)=1] "); break; + case VM_EXEC_X: if (v) fprintf(f, "[ex(n)=x] "); break; + default: fprintf(stderr, "Invalid code @%d\n", base + i); abort(); + } + + switch (instr.execZ) + { + case VM_EXEC_0: fprintf(f, "[ex(z)=0] "); break; + case VM_EXEC_1: fprintf(f, "[ex(z)=1] "); break; + case VM_EXEC_X: if (v) fprintf(f, "[ex(z)=x] "); break; + default: fprintf(stderr, "Invalid code @%d\n", base + i); abort(); + } + + switch (instr.input0) + { + case VM_INPUT_ZERO: if (v) fprintf(f, "[i0:zero] "); break; + case VM_INPUT_POP: fprintf(f, "[i0:pop] "); break; + case VM_INPUT_PEEK: fprintf(f, "[i0:peek] "); break; + case VM_INPUT_ARG: fprintf(f, "[i0:arg] "); break; + default: fprintf(stderr, "Invalid code @%d\n", base + i); abort(); + } + + switch (instr.input1) + { + case VM_INPUT_ZERO: if (v) fprintf(f, "[i1:zero] "); break; + case VM_INPUT_POP: fprintf(f, "[i1:pop] "); break; + // case VM_INPUT_PEEK: fprintf(f, "[i1:peek] "); break; + // case VM_INPUT_ARG: fprintf(f, "[i1:arg] "); break; + default: fprintf(stderr, "Invalid code @%d\n", base + i); abort(); + } + + if (instr.command <= 12) + fprintf(f, "%s", commandStrings[instr.command]); + else + fprintf(f, "undefined [cmd:%d]", instr.command); + + if (instr.cmdinfo != 0) + { + fprintf(f, " [ci:%d]", instr.cmdinfo); + } + + if (instr.argument != 0 || instr.input0 == VM_INPUT_ARG) + { + if (instr.output == VM_OUTPUT_JUMP || instr.output == VM_OUTPUT_JUMPR) + fprintf(f, " 0x%X", instr.argument); + else + fprintf(f, " %d", instr.argument); + } + + switch (instr.flags) + { + case VM_FLAG_NO: if (v) fprintf(f, " [f:no]"); break; + case VM_FLAG_YES: fprintf(f, " [f:yes]"); break; + default: fprintf(stderr, "Invalid code @%d\n", base + i); abort(); + } + + switch (instr.output) + { + case VM_OUTPUT_DISCARD: fprintf(f, " [r:discard]"); break; + case VM_OUTPUT_JUMP: fprintf(f, " [r:jump]"); break; + case VM_OUTPUT_JUMPR: fprintf(f, " [r:jumpr]"); break; + case VM_OUTPUT_PUSH: fprintf(f, " [r:push]"); break; + default: fprintf(stderr, "Invalid code @%d\n", base + i); abort(); + } + + fprintf(f, "\n"); + } +} \ No newline at end of file diff --git a/libvm/disassembler.h b/libvm/disassembler.h new file mode 100644 index 0000000..83df39b --- /dev/null +++ b/libvm/disassembler.h @@ -0,0 +1,22 @@ +#pragma once + +#include "vm.h" +#include +#include + +struct disassembler_options +{ + bool verbose; + bool outputAddresses; +}; + +// should the disassembler be more verbose? +extern struct disassembler_options disasmOptions; + +// Disassembles a given list of instructions +void disassemble( + instruction_t *list, // Array with + uint32_t count, // count instructions. + uint32_t base, // The base instruction offset, "entry point" + FILE *f // The file where the disassembly should be printed. +); \ No newline at end of file diff --git a/libvm/expdump.c b/libvm/expdump.c index 8321ad4..ae63e82 100644 --- a/libvm/expdump.c +++ b/libvm/expdump.c @@ -7,6 +7,7 @@ #include "exp.h" #include "vm.h" +#include "disassembler.h" #include #include @@ -22,140 +23,6 @@ #define DEBUG_VAL(x) fprintf(stderr, #x " = %d\n", x) -#include "mnemonics.h" - -const char *commandStrings[] = -{ - // VM_CMD_COPY 0 - "copy", - // VM_CMD_STORE 1 - "store", - // VM_CMD_LOAD 2 - "load", - // VM_CMD_GET 3 - "get", - // VM_CMD_SET 4 - "set", - // VM_CMD_BPGET 5 - "bpget", - // VM_CMD_BPSET 6 - "bpset", - // VM_CMD_CPGET 7 - "cpget", - // VM_CMD_MATH 8 - "math", - // VM_CMD_SPGET 9 - "spget", - // VM_CMD_SPSET 10 - "spset", - // VM_CMD_SYSCALL 11 - "syscall", - // VM_CMD_HWIO 12 - "hwio", -}; - -int disassembleVerbose = 0; - -void disassemble(instruction_t *list, uint32_t count, uint32_t base, FILE *f) -{ - int v = disassembleVerbose; - for (int i = 0; i < count; i++) - { - instruction_t instr = list[i]; - - fprintf(f, "%8X: ", base + i); - - const mnemonic_t *knownInstruction = NULL; - - for (int j = 0; mnemonics[j].name != NULL; j++) - { - if (memcmp(&instr, &mnemonics[j].instr, sizeof(instruction_t) - sizeof(uint32_t)) == 0) { - knownInstruction = &mnemonics[j]; - break; - } - } - - if (knownInstruction != NULL) - { - fprintf(f, "%s", knownInstruction->name); - if (instr.argument != 0 || instr.input0 == VM_INPUT_ARG) - { - if(instr.output == VM_OUTPUT_JUMP || instr.output == VM_OUTPUT_JUMPR) - fprintf(f, " 0x%X", instr.argument); - else - fprintf(f, " %d", instr.argument); - } - fprintf(f, "\n"); - continue; - } - - switch (instr.execN) - { - case VM_EXEC_0: fprintf(f, "[ex(n)=0] "); break; - case VM_EXEC_1: fprintf(f, "[ex(n)=1] "); break; - case VM_EXEC_X: if (v) fprintf(f, "[ex(n)=x] "); break; - } - - switch (instr.execZ) - { - case VM_EXEC_0: fprintf(f, "[ex(z)=0] "); break; - case VM_EXEC_1: fprintf(f, "[ex(z)=1] "); break; - case VM_EXEC_X: if (v) fprintf(f, "[ex(z)=x] "); break; - } - - switch (instr.input0) - { - case VM_INPUT_ZERO: if (v) fprintf(f, "[i0:zero] "); break; - case VM_INPUT_POP: fprintf(f, "[i0:pop] "); break; - case VM_INPUT_PEEK: fprintf(f, "[i0:peek] "); break; - case VM_INPUT_ARG: fprintf(f, "[i0:arg] "); break; - } - - switch (instr.input1) - { - case VM_INPUT_ZERO: if (v) fprintf(f, "[i1:zero] "); break; - case VM_INPUT_POP: fprintf(f, "[i1:pop] "); break; - // case VM_INPUT_PEEK: fprintf(f, "[i1:peek] "); break; - // case VM_INPUT_ARG: fprintf(f, "[i1:arg] "); break; - } - - if (instr.command <= 12) - fprintf(f, "%s", commandStrings[instr.command]); - else - fprintf(f, "undefined [cmd:%d]", instr.command); - - if (instr.cmdinfo != 0) - { - fprintf(f, " [ci:%d]", instr.cmdinfo); - } - - if (instr.argument != 0 || instr.input0 == VM_INPUT_ARG) - { - if (instr.output == VM_OUTPUT_JUMP || instr.output == VM_OUTPUT_JUMPR) - fprintf(f, " 0x%X", instr.argument); - else - fprintf(f, " %d", instr.argument); - } - - switch (instr.flags) - { - case VM_FLAG_NO: if (v) fprintf(f, " [f:no]"); break; - case VM_FLAG_YES: fprintf(f, " [f:yes]"); break; - } - - switch (instr.output) - { - case VM_OUTPUT_DISCARD: if (v) fprintf(f, " [r:discard]"); break; - case VM_OUTPUT_JUMP: fprintf(f, " [r:jump]"); break; - case VM_OUTPUT_JUMPR: fprintf(f, " [r:jumpr]"); break; - case VM_OUTPUT_PUSH: fprintf(f, " [r:push]"); break; - } - - fprintf(f, "\n"); - } -} - - int main(int argc, char **argv) { opterr = 0; @@ -173,7 +40,7 @@ int main(int argc, char **argv) case 'H': headers = 1; break; case 's': dumpSections = 1; break; case 'm': dumpMetas = 1; break; - case 'D': disassembleVerbose = 1; + case 'D': disasmOptions.verbose = true; case 'd': disassembleSections = 1; break; case '?': if (optopt == 'o' || optopt == 'c' || optopt == 'd')