Adds more detailed documentation.

This commit is contained in:
Felix Queißner 2016-05-22 17:17:30 +02:00
parent 9130573358
commit 7b5d44aa07
4 changed files with 156 additions and 23 deletions

View file

@ -10,8 +10,6 @@ namespace supervm_asm
{ {
class Program class Program
{ {
static void Main(string[] args) static void Main(string[] args)
{ {
if(args.Contains("-gen-code")) if(args.Contains("-gen-code"))
@ -24,17 +22,19 @@ namespace supervm_asm
foreach(var file in args.Where(a => !a.StartsWith("-") && Path.GetExtension(a) == ".asm")) foreach(var file in args.Where(a => !a.StartsWith("-") && Path.GetExtension(a) == ".asm"))
{ {
var output = Path.ChangeExtension(file, ".bin"); var output = Path.ChangeExtension(file, ".bin");
var code = Assembler.Assemble(File.ReadAllText(file)); var assembly = Assembler.Assemble(File.ReadAllText(file));
Console.WriteLine("{0}:", output); var code = assembly.Code;
for (int i = 0; i < code.Length; i++)
Console.WriteLine("{0}*{1}:", output, code.Count);
for (int i = 0; i < code.Count; i++)
{ {
Console.Write("; {0:X3} ", i); Console.Write("; {0:D3} ", i);
PrintInstruction(code[i]); PrintInstruction(code[i], assembly.Annotation[i]);
} }
using(var fs = File.Open(output, FileMode.Create, FileAccess.Write)) using(var fs = File.Open(output, FileMode.Create, FileAccess.Write))
{ {
for(int i = 0; i < code.Length; i++) for(int i = 0; i < code.Count; i++)
{ {
var bits = BitConverter.GetBytes(code[i]); var bits = BitConverter.GetBytes(code[i]);
if(BitConverter.IsLittleEndian == false) if(BitConverter.IsLittleEndian == false)
@ -47,7 +47,7 @@ namespace supervm_asm
} }
} }
static void PrintInstruction(ulong instr) static void PrintInstruction(ulong instr, string comment)
{ {
var str = Convert.ToString((long)instr, 2).PadLeft(64, '0'); var str = Convert.ToString((long)instr, 2).PadLeft(64, '0');
@ -71,23 +71,39 @@ namespace supervm_asm
Console.Write("{0} ", str.Substring(portion.Start, portion.Length)); Console.Write("{0} ", str.Substring(portion.Start, portion.Length));
} }
Console.ForegroundColor = fg; Console.ForegroundColor = fg;
Console.WriteLine(); Console.WriteLine(" {0}", comment);
} }
} }
public class VMAssembly
{
private readonly ulong[] code;
private readonly string[] origins;
public VMAssembly(ulong[] code, string[] origins)
{
this.code = code;
this.origins = origins;
}
public IReadOnlyList<ulong> Code => this.code;
public IReadOnlyList<string> Annotation => this.origins;
}
public static class Assembler public static class Assembler
{ {
static Regex annotationMatcher = new Regex(@"\[\s*(.*?)\s*\]", RegexOptions.Compiled); static Regex annotationMatcher = new Regex(@"\[\s*(.*?)\s*\]", RegexOptions.Compiled);
static Regex labelMatcher = new Regex(@"^(\w+):\s*(.*)\s*$", RegexOptions.Compiled); static Regex labelMatcher = new Regex(@"^(\w+):\s*(.*)\s*$", RegexOptions.Compiled);
static Regex instructionMatcher = new Regex(@"(\w+)(?:\s+([@-]?\w+|'.'))?", RegexOptions.Compiled); static Regex instructionMatcher = new Regex(@"(\w+)(?:\s+([@-]?\w+|'.'))?", RegexOptions.Compiled);
public static ulong[] Assemble(string src) public static VMAssembly Assemble(string src)
{ {
var lines = src.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); var lines = src.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
var patches = new Dictionary<int, string>(); var patches = new Dictionary<int, string>();
var labels = new Dictionary<string, int>(); var labels = new Dictionary<string, int>();
var code = new List<ulong>(); var code = new List<ulong>();
var source = new List<string>();
for (int i = 0; i < lines.Length; i++) for (int i = 0; i < lines.Length; i++)
{ {
var line = lines[i].Trim(); var line = lines[i].Trim();
@ -97,6 +113,8 @@ namespace supervm_asm
line = line.Substring(0, idx); line = line.Substring(0, idx);
} }
var uncommented = line;
{ // Process labels { // Process labels
var match = labelMatcher.Match(line); var match = labelMatcher.Match(line);
if (match.Success) if (match.Success)
@ -256,6 +274,7 @@ namespace supervm_asm
encoded |= ((ulong)argument << 32); encoded |= ((ulong)argument << 32);
code.Add(encoded); code.Add(encoded);
source.Add(uncommented);
} }
} }
@ -270,7 +289,7 @@ namespace supervm_asm
} }
} }
return code.ToArray(); return new VMAssembly(code.ToArray(), source.ToArray());
} }

View file

@ -17,11 +17,13 @@
; void print_str(char *string); ; void print_str(char *string);
print_str: print_str:
spget ; enter function by bpget ; enter function by
bpset ; saving the parents base pointer spget ; saving the parents base pointer
bpset ; and storing the current stack pointer
; char *ptr = string; ; char *ptr = string;
get -1 ; get argument 0 into our local variable '#1' get -2 ; get argument 0 into our local variable '#1'
; while(*ptr) { ; while(*ptr) {
print_str_loop: print_str_loop:
@ -45,4 +47,5 @@ print_str_end_loop:
; return ; return
bpget ; leave function bpget ; leave function
spset ; by restoring parent base pointer spset ; by restoring parent base pointer
bpset
jmpi ; and jumping back. jmpi ; and jumping back.

View file

@ -12,16 +12,61 @@ It is targeted at uses who program SuperVM with the native assembly language,
system programmers who want to include the virtual machine in their system or system programmers who want to include the virtual machine in their system or
create their own SuperVM implementation. create their own SuperVM implementation.
## The Stack ## Concepts
SuperVM is a virtual machine that emulates a 32 bit stack machine. Instead of utilizing
registers operations take their operands from the stack and push their results to
it.
An instruction is split into two parts:
The instruction configuration and the command. The command defines what operation should
be performed (memory access, calculation, ...), whereas the configuration defines the
behaviour of instruction (stack/flag-modifications).
## Memory Areas
The virtual machine has three separarated memory areas. Each area serves a specific
purpose and should not overlap the others.
### Code Memory
The code memory contains an immutable block of code that is instruction indexable.
Each instruction is 64 bit wide.
### Stack Memory
The virtual machine utilizes a stack to provide operands to instructions. The virtual machine utilizes a stack to provide operands to instructions.
This stack stores temporary values the program is working with. This stack stores temporary values the program is working with.
Each entry on the stack is an 32 bit value that is mostly interpreted as
a pointer, an index or an unsigned or signed integer. It is also possible
to store a 32bit IEEE floating point number on the stack.
The size of the stack is defined by the implementation, but it should contain at
least 1024 entries. This allows a fair recursive depth of 128 recursions with an average
of 6 local variables per function call.
### Data Memory
SuperVM also provides a memory model that allows storing persistent data that is
accessed by different parts of the code.
The data memory is byte accessible and can be written or read.
It is implementation defined how the memory is managed and accessible. It can be a
sparse memory with different sections, it could utilize a software-implemented paging
process or just be a flat chunk of memory.
As most programs require a minimum of global variables, the data memory should be
at least 16kB large.
Every pointer that accesses data memory (e.g. via `store` and `load`) contains the
address of a byte in memory, starting with zero.
## Registers and Flags ## Registers and Flags
The SuperVM virtual machine is a stack machine, but has also some control The SuperVM virtual machine is a stack machine, but has also some control
registers that can be set with special instructions. The registers mainly registers that can be set with special instructions. The registers mainly
control stack access or control flow. control stack access or control flow.
Each register has a size of 32 bits. Only exception is the flag register which
contains a single bit per flag.
| Mnemonic | Register | Function | | Mnemonic | Register | Function |
|----------|---------------|-------------------------------------------------| |----------|---------------|-------------------------------------------------|
| SP | Stack Pointer | Stores the current 'top' position of the stack. | | SP | Stack Pointer | Stores the current 'top' position of the stack. |
@ -29,18 +74,26 @@ control stack access or control flow.
| CP | Code Pointer | Stores the instruction which is executed next. | | CP | Code Pointer | Stores the instruction which is executed next. |
| FG | Flag Register | Stores the state of the flags. | | FG | Flag Register | Stores the state of the flags. |
Stack, Base and Code Pointer store indexes instead of actual memory addresses.
This prevents the VM to execute invalid instructions as the code pointer
always points to the start of an instruction.
Unlike common on most of the current CPUs, the stack and base pointer are growing upwards,
each push increments the stack pointer by one, each pop decrements it.
All registers start initialized with a zero.
### Stack Pointer ### Stack Pointer
The stack pointer points to the top of the stack. Each `push` operation increases The stack pointer points to the top of the stack. Each `push` operation increases
the stack pointer by one, each `pop` operation reduces it by one. the stack pointer by one, each `pop` operation reduces it by one.
### Base Pointer ### Base Pointer and Function Calls
The base pointer is a pointer that can be set to access the stack relative to it. The base pointer is a pointer that can be set to access the stack relative to it.
The indended use is to create a stack frame with the base pointer by pushing the This relative access is done by the commands `get` and `set`.
previous base pointer to the stack and setting the new base pointer to the current
stack pointer.
Returning a function with this mechanism is moving the stack pointer to the current The base pointer is designed to create stack frames for functions with local variables
base pointer, then popping the previous base pointer from the stack. as it is not possible to access local variables on the stack with only push and pop
operations.
### Code Pointer ### Code Pointer
The code pointer contains the instruction which is executed next. Modifying the The code pointer contains the instruction which is executed next. Modifying the
@ -74,6 +127,11 @@ The execution mode checks whether the instruction will be execution or not. The
depends on the state of the flags. An `X` means "Don't care", a `0` means the flag must be depends on the state of the flags. An `X` means "Don't care", a `0` means the flag must be
cleared and a `1` means the flag must be set. cleared and a `1` means the flag must be set.
| State | Binary Representation |
| X | 0b00 |
| 0 | 0b10 |
| 1 | 0b11 |
An instruction is only executed when all conditions are met. An instruction is only executed when all conditions are met.
| Flag | Range | | Flag | Range |
@ -255,3 +313,56 @@ Each mnemonic declares a specific configuration of an instruction.
| shr | no | pop | pop | math | 14 | push | no | TO BE SPECIFIED | | shr | no | pop | pop | math | 14 | push | no | TO BE SPECIFIED |
| syscall | yes | zero | zero | syscall | 0 | discard | no | Mnemonic for a generic syscall. | | syscall | yes | zero | zero | syscall | 0 | discard | no | Mnemonic for a generic syscall. |
| hwio | yes | zero | zero | hwio | 0 | discard | no | Mnemonic for a generic hwio. | | hwio | yes | zero | zero | hwio | 0 | discard | no | Mnemonic for a generic hwio. |
## Function Calls
The following chapter defines the SuperVM calling convention. It is required that all
functions conform to this convention.
To call a function, it is required that the return address is pushed to the stack.
After this, a jump is taken to the function address.
call:
push @returnPoint ; Pushing returnPoint as the return address
jmp @function ; Jumps to the function
returnPoint:
SuperVM provides the instruction `cpget` which pushes by default the address of the
second next instruction which resembles the code above. This behaviour allows position
independent code:
call:
cpget ; pushs implicit returnPoint
jmp @function ; Calls function
Functions can now return by calling `ret` when the return address is on top of the stack.
A simple function that does a system call may look like this:
function:
syscall
ret
As most functions utilize local variables, a stack frame is required.
Creating this stack frame is done by pushing the current base pointer, then
setting the base pointer to the current stack pointer.
enter:
bpget ; Save current base pointer
spget ; Get current stack pointer
bpset ; Set new base pointer
Returning a function with this mechanism is by setting the stack pointer to the current
base pointer, then popping the previous base pointer from the stack.
leave:
bpget ; Get current base pointer
spset ; Restore stack saved at the beginning
bpset ; Restore previous base pointer
ret ; and jumping back.
This mechanism leaves the base pointer of the calling function intact and also provides
a new base pointer for the current function.
## TODO
- 64 Bit arithmetic instructions

View file

@ -8,7 +8,7 @@ extern "C" {
#endif #endif
#if !defined(VM_STACKSIZE) #if !defined(VM_STACKSIZE)
#define VM_STACKSIZE 64 #define VM_STACKSIZE 512
#endif #endif
// Binary Encoding : (enabled, value) // Binary Encoding : (enabled, value)