From 7b5d44aa072d0bd3509fe51f49786de184bd4ccc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Quei=C3=9Fner?= Date: Sun, 22 May 2016 17:17:30 +0200 Subject: [PATCH] Adds more detailed documentation. --- prototypes/supervm-asm/assembler.cs | 43 +++++++--- prototypes/supervm-asm/testcode.asm | 9 +- prototypes/supervm/supervm.md | 125 ++++++++++++++++++++++++++-- prototypes/supervm/vm.h | 2 +- 4 files changed, 156 insertions(+), 23 deletions(-) diff --git a/prototypes/supervm-asm/assembler.cs b/prototypes/supervm-asm/assembler.cs index 6852172..7df30c8 100644 --- a/prototypes/supervm-asm/assembler.cs +++ b/prototypes/supervm-asm/assembler.cs @@ -10,8 +10,6 @@ namespace supervm_asm { class Program { - - static void Main(string[] args) { if(args.Contains("-gen-code")) @@ -24,17 +22,19 @@ namespace supervm_asm foreach(var file in args.Where(a => !a.StartsWith("-") && Path.GetExtension(a) == ".asm")) { var output = Path.ChangeExtension(file, ".bin"); - var code = Assembler.Assemble(File.ReadAllText(file)); + var assembly = Assembler.Assemble(File.ReadAllText(file)); - Console.WriteLine("{0}:", output); - for (int i = 0; i < code.Length; i++) + var code = assembly.Code; + + Console.WriteLine("{0}*{1}:", output, code.Count); + for (int i = 0; i < code.Count; i++) { - Console.Write("; {0:X3} ", i); - PrintInstruction(code[i]); + Console.Write("; {0:D3} ", i); + PrintInstruction(code[i], assembly.Annotation[i]); } using(var fs = File.Open(output, FileMode.Create, FileAccess.Write)) { - for(int i = 0; i < code.Length; i++) + for(int i = 0; i < code.Count; i++) { var bits = BitConverter.GetBytes(code[i]); if(BitConverter.IsLittleEndian == false) @@ -47,7 +47,7 @@ namespace supervm_asm } } - static void PrintInstruction(ulong instr) + static void PrintInstruction(ulong instr, string comment) { var str = Convert.ToString((long)instr, 2).PadLeft(64, '0'); @@ -71,23 +71,39 @@ namespace supervm_asm Console.Write("{0} ", str.Substring(portion.Start, portion.Length)); } Console.ForegroundColor = fg; - Console.WriteLine(); + Console.WriteLine(" {0}", comment); } } + public class VMAssembly + { + private readonly ulong[] code; + private readonly string[] origins; + + public VMAssembly(ulong[] code, string[] origins) + { + this.code = code; + this.origins = origins; + } + + public IReadOnlyList Code => this.code; + public IReadOnlyList Annotation => this.origins; + } + public static class Assembler { static Regex annotationMatcher = new Regex(@"\[\s*(.*?)\s*\]", RegexOptions.Compiled); static Regex labelMatcher = new Regex(@"^(\w+):\s*(.*)\s*$", RegexOptions.Compiled); static Regex instructionMatcher = new Regex(@"(\w+)(?:\s+([@-]?\w+|'.'))?", RegexOptions.Compiled); - public static ulong[] Assemble(string src) + public static VMAssembly Assemble(string src) { var lines = src.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); var patches = new Dictionary(); var labels = new Dictionary(); var code = new List(); + var source = new List(); for (int i = 0; i < lines.Length; i++) { var line = lines[i].Trim(); @@ -96,6 +112,8 @@ namespace supervm_asm if (idx >= 0) line = line.Substring(0, idx); } + + var uncommented = line; { // Process labels var match = labelMatcher.Match(line); @@ -256,6 +274,7 @@ namespace supervm_asm encoded |= ((ulong)argument << 32); code.Add(encoded); + source.Add(uncommented); } } @@ -270,7 +289,7 @@ namespace supervm_asm } } - return code.ToArray(); + return new VMAssembly(code.ToArray(), source.ToArray()); } diff --git a/prototypes/supervm-asm/testcode.asm b/prototypes/supervm-asm/testcode.asm index 3c4f324..d6540dd 100644 --- a/prototypes/supervm-asm/testcode.asm +++ b/prototypes/supervm-asm/testcode.asm @@ -17,11 +17,13 @@ ; void print_str(char *string); print_str: - spget ; enter function by - bpset ; saving the parents base pointer + bpget ; enter function by + spget ; saving the parents base pointer + bpset ; and storing the current stack pointer + ; char *ptr = string; - get -1 ; get argument 0 into our local variable '#1' + get -2 ; get argument 0 into our local variable '#1' ; while(*ptr) { print_str_loop: @@ -45,4 +47,5 @@ print_str_end_loop: ; return bpget ; leave function spset ; by restoring parent base pointer + bpset jmpi ; and jumping back. diff --git a/prototypes/supervm/supervm.md b/prototypes/supervm/supervm.md index 46618a5..e4bd4f9 100644 --- a/prototypes/supervm/supervm.md +++ b/prototypes/supervm/supervm.md @@ -12,16 +12,61 @@ It is targeted at uses who program SuperVM with the native assembly language, system programmers who want to include the virtual machine in their system or create their own SuperVM implementation. -## The Stack +## Concepts + +SuperVM is a virtual machine that emulates a 32 bit stack machine. Instead of utilizing +registers operations take their operands from the stack and push their results to +it. + +An instruction is split into two parts: +The instruction configuration and the command. The command defines what operation should +be performed (memory access, calculation, ...), whereas the configuration defines the +behaviour of instruction (stack/flag-modifications). + +## Memory Areas +The virtual machine has three separarated memory areas. Each area serves a specific +purpose and should not overlap the others. + +### Code Memory +The code memory contains an immutable block of code that is instruction indexable. +Each instruction is 64 bit wide. + +### Stack Memory The virtual machine utilizes a stack to provide operands to instructions. This stack stores temporary values the program is working with. +Each entry on the stack is an 32 bit value that is mostly interpreted as +a pointer, an index or an unsigned or signed integer. It is also possible +to store a 32bit IEEE floating point number on the stack. + +The size of the stack is defined by the implementation, but it should contain at +least 1024 entries. This allows a fair recursive depth of 128 recursions with an average +of 6 local variables per function call. + +### Data Memory +SuperVM also provides a memory model that allows storing persistent data that is +accessed by different parts of the code. +The data memory is byte accessible and can be written or read. + +It is implementation defined how the memory is managed and accessible. It can be a +sparse memory with different sections, it could utilize a software-implemented paging +process or just be a flat chunk of memory. + +As most programs require a minimum of global variables, the data memory should be +at least 16kB large. + +Every pointer that accesses data memory (e.g. via `store` and `load`) contains the +address of a byte in memory, starting with zero. + ## Registers and Flags The SuperVM virtual machine is a stack machine, but has also some control registers that can be set with special instructions. The registers mainly control stack access or control flow. +Each register has a size of 32 bits. Only exception is the flag register which +contains a single bit per flag. + | Mnemonic | Register | Function | |----------|---------------|-------------------------------------------------| | SP | Stack Pointer | Stores the current 'top' position of the stack. | @@ -29,18 +74,26 @@ control stack access or control flow. | CP | Code Pointer | Stores the instruction which is executed next. | | FG | Flag Register | Stores the state of the flags. | +Stack, Base and Code Pointer store indexes instead of actual memory addresses. +This prevents the VM to execute invalid instructions as the code pointer +always points to the start of an instruction. + +Unlike common on most of the current CPUs, the stack and base pointer are growing upwards, +each push increments the stack pointer by one, each pop decrements it. + +All registers start initialized with a zero. + ### Stack Pointer The stack pointer points to the top of the stack. Each `push` operation increases the stack pointer by one, each `pop` operation reduces it by one. -### Base Pointer +### Base Pointer and Function Calls The base pointer is a pointer that can be set to access the stack relative to it. -The indended use is to create a stack frame with the base pointer by pushing the -previous base pointer to the stack and setting the new base pointer to the current -stack pointer. +This relative access is done by the commands `get` and `set`. -Returning a function with this mechanism is moving the stack pointer to the current -base pointer, then popping the previous base pointer from the stack. +The base pointer is designed to create stack frames for functions with local variables +as it is not possible to access local variables on the stack with only push and pop +operations. ### Code Pointer The code pointer contains the instruction which is executed next. Modifying the @@ -74,6 +127,11 @@ The execution mode checks whether the instruction will be execution or not. The depends on the state of the flags. An `X` means "Don't care", a `0` means the flag must be cleared and a `1` means the flag must be set. +| State | Binary Representation | +| X | 0b00 | +| 0 | 0b10 | +| 1 | 0b11 | + An instruction is only executed when all conditions are met. | Flag | Range | @@ -255,3 +313,56 @@ Each mnemonic declares a specific configuration of an instruction. | shr | no | pop | pop | math | 14 | push | no | TO BE SPECIFIED | | syscall | yes | zero | zero | syscall | 0 | discard | no | Mnemonic for a generic syscall. | | hwio | yes | zero | zero | hwio | 0 | discard | no | Mnemonic for a generic hwio. | + +## Function Calls + +The following chapter defines the SuperVM calling convention. It is required that all +functions conform to this convention. + +To call a function, it is required that the return address is pushed to the stack. +After this, a jump is taken to the function address. + + call: + push @returnPoint ; Pushing returnPoint as the return address + jmp @function ; Jumps to the function + returnPoint: + +SuperVM provides the instruction `cpget` which pushes by default the address of the +second next instruction which resembles the code above. This behaviour allows position +independent code: + + call: + cpget ; pushs implicit returnPoint + jmp @function ; Calls function + +Functions can now return by calling `ret` when the return address is on top of the stack. +A simple function that does a system call may look like this: + + function: + syscall + ret + +As most functions utilize local variables, a stack frame is required. +Creating this stack frame is done by pushing the current base pointer, then +setting the base pointer to the current stack pointer. + + enter: + bpget ; Save current base pointer + spget ; Get current stack pointer + bpset ; Set new base pointer + +Returning a function with this mechanism is by setting the stack pointer to the current +base pointer, then popping the previous base pointer from the stack. + + leave: + bpget ; Get current base pointer + spset ; Restore stack saved at the beginning + bpset ; Restore previous base pointer + ret ; and jumping back. + +This mechanism leaves the base pointer of the calling function intact and also provides +a new base pointer for the current function. + +## TODO + +- 64 Bit arithmetic instructions \ No newline at end of file diff --git a/prototypes/supervm/vm.h b/prototypes/supervm/vm.h index 1208c38..c97fb1d 100644 --- a/prototypes/supervm/vm.h +++ b/prototypes/supervm/vm.h @@ -8,7 +8,7 @@ extern "C" { #endif #if !defined(VM_STACKSIZE) -#define VM_STACKSIZE 64 +#define VM_STACKSIZE 512 #endif // Binary Encoding : (enabled, value)