Adds more detailed documentation.
This commit is contained in:
parent
9130573358
commit
7b5d44aa07
4 changed files with 156 additions and 23 deletions
|
@ -10,8 +10,6 @@ namespace supervm_asm
|
||||||
{
|
{
|
||||||
class Program
|
class Program
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
static void Main(string[] args)
|
static void Main(string[] args)
|
||||||
{
|
{
|
||||||
if(args.Contains("-gen-code"))
|
if(args.Contains("-gen-code"))
|
||||||
|
@ -24,17 +22,19 @@ namespace supervm_asm
|
||||||
foreach(var file in args.Where(a => !a.StartsWith("-") && Path.GetExtension(a) == ".asm"))
|
foreach(var file in args.Where(a => !a.StartsWith("-") && Path.GetExtension(a) == ".asm"))
|
||||||
{
|
{
|
||||||
var output = Path.ChangeExtension(file, ".bin");
|
var output = Path.ChangeExtension(file, ".bin");
|
||||||
var code = Assembler.Assemble(File.ReadAllText(file));
|
var assembly = Assembler.Assemble(File.ReadAllText(file));
|
||||||
|
|
||||||
Console.WriteLine("{0}:", output);
|
var code = assembly.Code;
|
||||||
for (int i = 0; i < code.Length; i++)
|
|
||||||
|
Console.WriteLine("{0}*{1}:", output, code.Count);
|
||||||
|
for (int i = 0; i < code.Count; i++)
|
||||||
{
|
{
|
||||||
Console.Write("; {0:X3} ", i);
|
Console.Write("; {0:D3} ", i);
|
||||||
PrintInstruction(code[i]);
|
PrintInstruction(code[i], assembly.Annotation[i]);
|
||||||
}
|
}
|
||||||
using(var fs = File.Open(output, FileMode.Create, FileAccess.Write))
|
using(var fs = File.Open(output, FileMode.Create, FileAccess.Write))
|
||||||
{
|
{
|
||||||
for(int i = 0; i < code.Length; i++)
|
for(int i = 0; i < code.Count; i++)
|
||||||
{
|
{
|
||||||
var bits = BitConverter.GetBytes(code[i]);
|
var bits = BitConverter.GetBytes(code[i]);
|
||||||
if(BitConverter.IsLittleEndian == false)
|
if(BitConverter.IsLittleEndian == false)
|
||||||
|
@ -47,7 +47,7 @@ namespace supervm_asm
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void PrintInstruction(ulong instr)
|
static void PrintInstruction(ulong instr, string comment)
|
||||||
{
|
{
|
||||||
var str = Convert.ToString((long)instr, 2).PadLeft(64, '0');
|
var str = Convert.ToString((long)instr, 2).PadLeft(64, '0');
|
||||||
|
|
||||||
|
@ -71,23 +71,39 @@ namespace supervm_asm
|
||||||
Console.Write("{0} ", str.Substring(portion.Start, portion.Length));
|
Console.Write("{0} ", str.Substring(portion.Start, portion.Length));
|
||||||
}
|
}
|
||||||
Console.ForegroundColor = fg;
|
Console.ForegroundColor = fg;
|
||||||
Console.WriteLine();
|
Console.WriteLine(" {0}", comment);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public class VMAssembly
|
||||||
|
{
|
||||||
|
private readonly ulong[] code;
|
||||||
|
private readonly string[] origins;
|
||||||
|
|
||||||
|
public VMAssembly(ulong[] code, string[] origins)
|
||||||
|
{
|
||||||
|
this.code = code;
|
||||||
|
this.origins = origins;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IReadOnlyList<ulong> Code => this.code;
|
||||||
|
public IReadOnlyList<string> Annotation => this.origins;
|
||||||
|
}
|
||||||
|
|
||||||
public static class Assembler
|
public static class Assembler
|
||||||
{
|
{
|
||||||
static Regex annotationMatcher = new Regex(@"\[\s*(.*?)\s*\]", RegexOptions.Compiled);
|
static Regex annotationMatcher = new Regex(@"\[\s*(.*?)\s*\]", RegexOptions.Compiled);
|
||||||
static Regex labelMatcher = new Regex(@"^(\w+):\s*(.*)\s*$", RegexOptions.Compiled);
|
static Regex labelMatcher = new Regex(@"^(\w+):\s*(.*)\s*$", RegexOptions.Compiled);
|
||||||
static Regex instructionMatcher = new Regex(@"(\w+)(?:\s+([@-]?\w+|'.'))?", RegexOptions.Compiled);
|
static Regex instructionMatcher = new Regex(@"(\w+)(?:\s+([@-]?\w+|'.'))?", RegexOptions.Compiled);
|
||||||
|
|
||||||
public static ulong[] Assemble(string src)
|
public static VMAssembly Assemble(string src)
|
||||||
{
|
{
|
||||||
var lines = src.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
|
var lines = src.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
|
||||||
var patches = new Dictionary<int, string>();
|
var patches = new Dictionary<int, string>();
|
||||||
var labels = new Dictionary<string, int>();
|
var labels = new Dictionary<string, int>();
|
||||||
|
|
||||||
var code = new List<ulong>();
|
var code = new List<ulong>();
|
||||||
|
var source = new List<string>();
|
||||||
for (int i = 0; i < lines.Length; i++)
|
for (int i = 0; i < lines.Length; i++)
|
||||||
{
|
{
|
||||||
var line = lines[i].Trim();
|
var line = lines[i].Trim();
|
||||||
|
@ -96,6 +112,8 @@ namespace supervm_asm
|
||||||
if (idx >= 0)
|
if (idx >= 0)
|
||||||
line = line.Substring(0, idx);
|
line = line.Substring(0, idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var uncommented = line;
|
||||||
|
|
||||||
{ // Process labels
|
{ // Process labels
|
||||||
var match = labelMatcher.Match(line);
|
var match = labelMatcher.Match(line);
|
||||||
|
@ -256,6 +274,7 @@ namespace supervm_asm
|
||||||
encoded |= ((ulong)argument << 32);
|
encoded |= ((ulong)argument << 32);
|
||||||
|
|
||||||
code.Add(encoded);
|
code.Add(encoded);
|
||||||
|
source.Add(uncommented);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -270,7 +289,7 @@ namespace supervm_asm
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return code.ToArray();
|
return new VMAssembly(code.ToArray(), source.ToArray());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,11 +17,13 @@
|
||||||
|
|
||||||
; void print_str(char *string);
|
; void print_str(char *string);
|
||||||
print_str:
|
print_str:
|
||||||
spget ; enter function by
|
bpget ; enter function by
|
||||||
bpset ; saving the parents base pointer
|
spget ; saving the parents base pointer
|
||||||
|
bpset ; and storing the current stack pointer
|
||||||
|
|
||||||
|
|
||||||
; char *ptr = string;
|
; char *ptr = string;
|
||||||
get -1 ; get argument 0 into our local variable '#1'
|
get -2 ; get argument 0 into our local variable '#1'
|
||||||
|
|
||||||
; while(*ptr) {
|
; while(*ptr) {
|
||||||
print_str_loop:
|
print_str_loop:
|
||||||
|
@ -45,4 +47,5 @@ print_str_end_loop:
|
||||||
; return
|
; return
|
||||||
bpget ; leave function
|
bpget ; leave function
|
||||||
spset ; by restoring parent base pointer
|
spset ; by restoring parent base pointer
|
||||||
|
bpset
|
||||||
jmpi ; and jumping back.
|
jmpi ; and jumping back.
|
||||||
|
|
|
@ -12,16 +12,61 @@ It is targeted at uses who program SuperVM with the native assembly language,
|
||||||
system programmers who want to include the virtual machine in their system or
|
system programmers who want to include the virtual machine in their system or
|
||||||
create their own SuperVM implementation.
|
create their own SuperVM implementation.
|
||||||
|
|
||||||
## The Stack
|
## Concepts
|
||||||
|
|
||||||
|
SuperVM is a virtual machine that emulates a 32 bit stack machine. Instead of utilizing
|
||||||
|
registers operations take their operands from the stack and push their results to
|
||||||
|
it.
|
||||||
|
|
||||||
|
An instruction is split into two parts:
|
||||||
|
The instruction configuration and the command. The command defines what operation should
|
||||||
|
be performed (memory access, calculation, ...), whereas the configuration defines the
|
||||||
|
behaviour of instruction (stack/flag-modifications).
|
||||||
|
|
||||||
|
## Memory Areas
|
||||||
|
The virtual machine has three separarated memory areas. Each area serves a specific
|
||||||
|
purpose and should not overlap the others.
|
||||||
|
|
||||||
|
### Code Memory
|
||||||
|
The code memory contains an immutable block of code that is instruction indexable.
|
||||||
|
Each instruction is 64 bit wide.
|
||||||
|
|
||||||
|
### Stack Memory
|
||||||
The virtual machine utilizes a stack to provide operands to instructions.
|
The virtual machine utilizes a stack to provide operands to instructions.
|
||||||
This stack stores temporary values the program is working with.
|
This stack stores temporary values the program is working with.
|
||||||
|
|
||||||
|
Each entry on the stack is an 32 bit value that is mostly interpreted as
|
||||||
|
a pointer, an index or an unsigned or signed integer. It is also possible
|
||||||
|
to store a 32bit IEEE floating point number on the stack.
|
||||||
|
|
||||||
|
The size of the stack is defined by the implementation, but it should contain at
|
||||||
|
least 1024 entries. This allows a fair recursive depth of 128 recursions with an average
|
||||||
|
of 6 local variables per function call.
|
||||||
|
|
||||||
|
### Data Memory
|
||||||
|
SuperVM also provides a memory model that allows storing persistent data that is
|
||||||
|
accessed by different parts of the code.
|
||||||
|
The data memory is byte accessible and can be written or read.
|
||||||
|
|
||||||
|
It is implementation defined how the memory is managed and accessible. It can be a
|
||||||
|
sparse memory with different sections, it could utilize a software-implemented paging
|
||||||
|
process or just be a flat chunk of memory.
|
||||||
|
|
||||||
|
As most programs require a minimum of global variables, the data memory should be
|
||||||
|
at least 16kB large.
|
||||||
|
|
||||||
|
Every pointer that accesses data memory (e.g. via `store` and `load`) contains the
|
||||||
|
address of a byte in memory, starting with zero.
|
||||||
|
|
||||||
## Registers and Flags
|
## Registers and Flags
|
||||||
|
|
||||||
The SuperVM virtual machine is a stack machine, but has also some control
|
The SuperVM virtual machine is a stack machine, but has also some control
|
||||||
registers that can be set with special instructions. The registers mainly
|
registers that can be set with special instructions. The registers mainly
|
||||||
control stack access or control flow.
|
control stack access or control flow.
|
||||||
|
|
||||||
|
Each register has a size of 32 bits. Only exception is the flag register which
|
||||||
|
contains a single bit per flag.
|
||||||
|
|
||||||
| Mnemonic | Register | Function |
|
| Mnemonic | Register | Function |
|
||||||
|----------|---------------|-------------------------------------------------|
|
|----------|---------------|-------------------------------------------------|
|
||||||
| SP | Stack Pointer | Stores the current 'top' position of the stack. |
|
| SP | Stack Pointer | Stores the current 'top' position of the stack. |
|
||||||
|
@ -29,18 +74,26 @@ control stack access or control flow.
|
||||||
| CP | Code Pointer | Stores the instruction which is executed next. |
|
| CP | Code Pointer | Stores the instruction which is executed next. |
|
||||||
| FG | Flag Register | Stores the state of the flags. |
|
| FG | Flag Register | Stores the state of the flags. |
|
||||||
|
|
||||||
|
Stack, Base and Code Pointer store indexes instead of actual memory addresses.
|
||||||
|
This prevents the VM to execute invalid instructions as the code pointer
|
||||||
|
always points to the start of an instruction.
|
||||||
|
|
||||||
|
Unlike common on most of the current CPUs, the stack and base pointer are growing upwards,
|
||||||
|
each push increments the stack pointer by one, each pop decrements it.
|
||||||
|
|
||||||
|
All registers start initialized with a zero.
|
||||||
|
|
||||||
### Stack Pointer
|
### Stack Pointer
|
||||||
The stack pointer points to the top of the stack. Each `push` operation increases
|
The stack pointer points to the top of the stack. Each `push` operation increases
|
||||||
the stack pointer by one, each `pop` operation reduces it by one.
|
the stack pointer by one, each `pop` operation reduces it by one.
|
||||||
|
|
||||||
### Base Pointer
|
### Base Pointer and Function Calls
|
||||||
The base pointer is a pointer that can be set to access the stack relative to it.
|
The base pointer is a pointer that can be set to access the stack relative to it.
|
||||||
The indended use is to create a stack frame with the base pointer by pushing the
|
This relative access is done by the commands `get` and `set`.
|
||||||
previous base pointer to the stack and setting the new base pointer to the current
|
|
||||||
stack pointer.
|
|
||||||
|
|
||||||
Returning a function with this mechanism is moving the stack pointer to the current
|
The base pointer is designed to create stack frames for functions with local variables
|
||||||
base pointer, then popping the previous base pointer from the stack.
|
as it is not possible to access local variables on the stack with only push and pop
|
||||||
|
operations.
|
||||||
|
|
||||||
### Code Pointer
|
### Code Pointer
|
||||||
The code pointer contains the instruction which is executed next. Modifying the
|
The code pointer contains the instruction which is executed next. Modifying the
|
||||||
|
@ -74,6 +127,11 @@ The execution mode checks whether the instruction will be execution or not. The
|
||||||
depends on the state of the flags. An `X` means "Don't care", a `0` means the flag must be
|
depends on the state of the flags. An `X` means "Don't care", a `0` means the flag must be
|
||||||
cleared and a `1` means the flag must be set.
|
cleared and a `1` means the flag must be set.
|
||||||
|
|
||||||
|
| State | Binary Representation |
|
||||||
|
| X | 0b00 |
|
||||||
|
| 0 | 0b10 |
|
||||||
|
| 1 | 0b11 |
|
||||||
|
|
||||||
An instruction is only executed when all conditions are met.
|
An instruction is only executed when all conditions are met.
|
||||||
|
|
||||||
| Flag | Range |
|
| Flag | Range |
|
||||||
|
@ -255,3 +313,56 @@ Each mnemonic declares a specific configuration of an instruction.
|
||||||
| shr | no | pop | pop | math | 14 | push | no | TO BE SPECIFIED |
|
| shr | no | pop | pop | math | 14 | push | no | TO BE SPECIFIED |
|
||||||
| syscall | yes | zero | zero | syscall | 0 | discard | no | Mnemonic for a generic syscall. |
|
| syscall | yes | zero | zero | syscall | 0 | discard | no | Mnemonic for a generic syscall. |
|
||||||
| hwio | yes | zero | zero | hwio | 0 | discard | no | Mnemonic for a generic hwio. |
|
| hwio | yes | zero | zero | hwio | 0 | discard | no | Mnemonic for a generic hwio. |
|
||||||
|
|
||||||
|
## Function Calls
|
||||||
|
|
||||||
|
The following chapter defines the SuperVM calling convention. It is required that all
|
||||||
|
functions conform to this convention.
|
||||||
|
|
||||||
|
To call a function, it is required that the return address is pushed to the stack.
|
||||||
|
After this, a jump is taken to the function address.
|
||||||
|
|
||||||
|
call:
|
||||||
|
push @returnPoint ; Pushing returnPoint as the return address
|
||||||
|
jmp @function ; Jumps to the function
|
||||||
|
returnPoint:
|
||||||
|
|
||||||
|
SuperVM provides the instruction `cpget` which pushes by default the address of the
|
||||||
|
second next instruction which resembles the code above. This behaviour allows position
|
||||||
|
independent code:
|
||||||
|
|
||||||
|
call:
|
||||||
|
cpget ; pushs implicit returnPoint
|
||||||
|
jmp @function ; Calls function
|
||||||
|
|
||||||
|
Functions can now return by calling `ret` when the return address is on top of the stack.
|
||||||
|
A simple function that does a system call may look like this:
|
||||||
|
|
||||||
|
function:
|
||||||
|
syscall
|
||||||
|
ret
|
||||||
|
|
||||||
|
As most functions utilize local variables, a stack frame is required.
|
||||||
|
Creating this stack frame is done by pushing the current base pointer, then
|
||||||
|
setting the base pointer to the current stack pointer.
|
||||||
|
|
||||||
|
enter:
|
||||||
|
bpget ; Save current base pointer
|
||||||
|
spget ; Get current stack pointer
|
||||||
|
bpset ; Set new base pointer
|
||||||
|
|
||||||
|
Returning a function with this mechanism is by setting the stack pointer to the current
|
||||||
|
base pointer, then popping the previous base pointer from the stack.
|
||||||
|
|
||||||
|
leave:
|
||||||
|
bpget ; Get current base pointer
|
||||||
|
spset ; Restore stack saved at the beginning
|
||||||
|
bpset ; Restore previous base pointer
|
||||||
|
ret ; and jumping back.
|
||||||
|
|
||||||
|
This mechanism leaves the base pointer of the calling function intact and also provides
|
||||||
|
a new base pointer for the current function.
|
||||||
|
|
||||||
|
## TODO
|
||||||
|
|
||||||
|
- 64 Bit arithmetic instructions
|
|
@ -8,7 +8,7 @@ extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(VM_STACKSIZE)
|
#if !defined(VM_STACKSIZE)
|
||||||
#define VM_STACKSIZE 64
|
#define VM_STACKSIZE 512
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Binary Encoding : (enabled, value)
|
// Binary Encoding : (enabled, value)
|
||||||
|
|
Loading…
Reference in a new issue