Hardcore coding session. Parser complete, Syntax Tree model is "working", execution is quite possible. Although no code->model translation.

This commit is contained in:
Felix Queissner 2015-08-09 21:43:47 +02:00
parent ad1a74fb85
commit cf148e411e
10 changed files with 718 additions and 0 deletions

6
.gitignore vendored
View file

@ -35,3 +35,9 @@ kernel
# Debug files
*.dSYM/
trainOS.pro.user
trainscript/tsvm
*.tab.cpp
*.tab.h
*.l.h
lex.yy.cpp

48
trainOS.pro Normal file
View file

@ -0,0 +1,48 @@
TEMPLATE = app
CONFIG += console
CONFIG -= app_bundle
CONFIG -= qt
SOURCES += \
src/console.c \
src/init.c \
src/interrupts.c \
src/malloc.c \
src/pmm.c \
src/stdlib.c \
src/vmm.c \
trainscript/tsvm.cpp \
trainscript/main.cpp
HEADERS += \
src/console.h \
src/cpustate.h \
src/interrupts.h \
src/intr_stubs.h \
src/io.h \
src/kernel.h \
src/multiboot.h \
src/pmm.h \
src/stdlib.h \
src/varargs.h \
src/vmm.h \
trainscript/common.h \
trainscript/tsvm.hpp
DISTFILES += \
asm/intr_common_handler.S \
asm/multiboot.S \
asm/start.S \
trainscript.md \
Makefile \
trainscript/trainscript.l \
trainscript/file01.ts \
trainscript/Makefile \
trainscript/trainscript.y
QMAKE_INCDIR =
QMAKE_CFLAGS = -m32 -Dnullptr=0 -std=c11 -Wall -fno-stack-protector -ffreestanding
QMAKE_LINK = ld
QMAKE_LFLAGS = -g -melf_i386 -Tkernel.ld

25
trainscript/Makefile Normal file
View file

@ -0,0 +1,25 @@
LEX=flex
YACC=bison
all: trainscript.tab.o lex.yy.o tsvm.o main.o
g++ -o tsvm lex.yy.o trainscript.tab.o tsvm.o main.o
main.o: main.cpp tsvm.hpp
g++ -std=c++11 -c main.cpp -o main.o
tsvm.o: tsvm.cpp tsvm.hpp
g++ -std=c++11 -c tsvm.cpp -o tsvm.o
lex.yy.o: lex.yy.cpp
g++ -c lex.yy.cpp -o lex.yy.o
trainscript.tab.o: trainscript.tab.cpp
g++ -c trainscript.tab.cpp -o trainscript.tab.o
lex.yy.cpp: trainscript.l
$(LEX) --header-file=trainscript.l.h trainscript.l
mv lex.yy.c lex.yy.cpp
trainscript.tab.cpp: trainscript.y
$(YACC) -d trainscript.y
mv trainscript.tab.c trainscript.tab.cpp

54
trainscript/common.h Normal file
View file

@ -0,0 +1,54 @@
#pragma once
#include <stddef.h>
#include <inttypes.h>
#define ECHO do { } while(0)
void yyerror(void *scanner, const char *s);
#define YY_EXTRA_TYPE ParserData*
#define YY_INPUT(buf,result,max_size) \
{ \
if(yyextra->index >= yyextra->length) \
result = YY_NULL; \
else { \
buf[0] = yyextra->buffer[yyextra->index++]; \
result = 1; \
} \
}
namespace trainscript {
class Module;
}
typedef struct
{
char *buffer;
size_t index;
size_t length;
trainscript::Module *module;
void *scanner;
} ParserData;
typedef enum
{
tidUNKNOWN = 0,
tidVOID = 1,
tidINT = 2,
tidREAL = 3,
tidTEXT = 4,
} typeid_t;
typedef struct
{
typeid_t type;
int pointer;
} type_t;
typedef struct
{
type_t type;
char *name;
} VariableDeclaration;

11
trainscript/file01.ts Normal file
View file

@ -0,0 +1,11 @@
# Trainscript Test File
VAR global : INT;
PUB main() c : INT | a : INT, b : INT
10 a;
helper() b;
a + b c;
PRI helper() result : INT
20 result;

75
trainscript/main.cpp Normal file
View file

@ -0,0 +1,75 @@
#include "common.h"
#include "tsvm.hpp"
#include <string>
#include <fstream>
#include <vector>
using namespace std;
using namespace trainscript;
bool filedata(string name, vector<char> &buffer)
{
std::ifstream file(name.c_str(), std::ios::binary);
file.seekg(0, std::ios::end);
std::streamsize len = file.tellg();
file.seekg(0, std::ios::beg);
if(len == -1)
return false;
printf("size: %d\n", len);
buffer.resize(len);
file.read(buffer.data(), buffer.size());
return true;
}
int main(int argc, char** argv)
{
if(argc < 2) {
printf("tsvm <file>\n");
return 1;
}
vector<char> buffer;
if(filedata(argv[1], buffer) == false) {
printf("File not found\n");
return 2;
}
Module *module = VM::load(buffer.data(), buffer.size());
if(module == nullptr) {
printf("Could not compile file.\n");
return 3;
}
// This should be replaced by parsing.... :P
{
Block *block = new Block(module);
block->instructions.push_back(new DebugInstruction(module, "hello world!"));
block->instructions.push_back(new DebugVariableInstruction(module, "x"));
Method *m = new Method(module, block);
m->arguments.push_back({"x", Variable(TypeID::Int)});
module->methods.insert({"main", m});
}
Method *scriptMain = module->method("main");
if(scriptMain== nullptr) {
printf("'main' method not found.\n");
return 4;
}
scriptMain->invoke({ Variable(15) });
return 0;
}

47
trainscript/trainscript.l Normal file
View file

@ -0,0 +1,47 @@
%{
#include <stdlib.h>
#include "common.h"
#include "trainscript.tab.h"
%}
%option noyywrap
%option never-interactive
%option reentrant
%option bison-bridge
%option yylineno
%%
\#[^\n]* ; // Eat all the comments!
[ ]+ ; // Eat all the whitespace!
\t { return TAB; }
\; { return SEMICOLON; }
\: { return COLON; }
\, { return COMMA; }
\| { return PIPE; }
\( { return LBRACKET; }
\) { return RBRACKET; }
\+ { return PLUS; }
\- { return MINUS; }
\* { return MULTIPLY; }
\/ { return DIVIDE; }
\% { return MODULO; }
\-\>|→ { return RARROW; }
\<\-|← { return LARROW; }
VAR { return KW_VAR; }
PUB { return KW_PUB; }
PRI { return KW_PRI; }
PTR { return KW_PTR; }
VOID { return KW_VOID; }
INT { return KW_INT; }
REAL { return KW_REAL; }
TEXT { return KW_TEXT; }
[0-9]+ { yylval->ival = atoi(yytext); return INT; }
[a-zA-Z0-9]+ { yylval->text = strdup(yytext); return IDENTIFIER; }
. { yyerror(NULL, "illegal token"); }
%%
/*
[0-9]+ { yylval.ival = atoi(yytext); return INT; }
[a-zA-Z0-9]+ { yylval.sval = strdup(yytext); return STRING; }
[0-9]+\.[0-9]+ { yylval->fval = atof(yytext); return FLOAT; }
*/

161
trainscript/trainscript.y Normal file
View file

@ -0,0 +1,161 @@
%{
#include <stdio.h>
#include <stdlib.h>
#include "common.h"
typedef union YYSTYPE YYSTYPE;
// stuff from flex that bison needs to know about:
int yylex (YYSTYPE * yylval_param , void *yyscanner);
void yyerror(void *scanner, const char *s);
#define scanner context->scanner
%}
%pure-parser
%lex-param {void * scanner}
%parse-param {ParserData * context}
%union {
float fval;
int ival;
char *text;
int indentation;
type_t type;
VariableDeclaration declaration;
}
%token TAB
%token TYPENAME
%token SEMICOLON
%token COLON
%token COMMA
%token PIPE
%token PLUS
%token MINUS
%token MULTIPLY
%token DIVIDE
%token MODULO
%token LBRACKET
%token RBRACKET
%token RARROW
%token LARROW
%token <fval> FLOAT
%token <ival> INT
%token <text> IDENTIFIER
%token KW_PUB
%token KW_PRI
%token KW_VAR
%token KW_PTR
%token KW_VOID
%token KW_INT
%token KW_REAL
%token KW_TEXT
%type <ival> expression
%type <type> typeName
%type <declaration> variableDeclaration
%type <indentation> indentation
%start input
%left PLUS MINUS MULTIPLY DIVIDE MODULO
%%
input:
%empty
| input variableDeclaration SEMICOLON { printf("decl %s as %d^%d\n", $2.name, $2.type.type, $2.type.pointer); }
| input method { printf("method declaration.\n"); }
;
method:
methodDeclaration body
;
body:
%empty
| body indentation assignment SEMICOLON { printf("Indent: %d\n", $2); }
;
methodDeclaration:
KW_PUB IDENTIFIER LBRACKET argumentList RBRACKET methodLocals
| KW_PRI IDENTIFIER LBRACKET argumentList RBRACKET methodLocals
| KW_PUB IDENTIFIER LBRACKET argumentList RBRACKET RARROW argument methodLocals
| KW_PRI IDENTIFIER LBRACKET argumentList RBRACKET RARROW argument methodLocals
;
methodLocals:
%empty
| PIPE methodLocalList
;
methodLocalList:
argument
| methodLocalList COMMA argument
argumentList:
%empty
| argument
| argumentList COMMA argument
;
argument:
IDENTIFIER COLON typeName
;
assignment:
expression RARROW IDENTIFIER { printf("%s := %d\n", $3, $1); }
expression:
INT { $$ = $1; }
| IDENTIFIER { printf("[access %s]", $1); $$ = 1; }
| IDENTIFIER LBRACKET expressionList RBRACKET { printf("[call %s]", $1); $$ = 1; }
| LBRACKET expression RBRACKET { $$ = $2; }
| expression PLUS expression { $$ = $1 + $3; }
| expression MINUS expression { $$ = $1 - $3; }
| expression MULTIPLY expression { $$ = $1 * $3; }
| expression DIVIDE expression { $$ = $1 / $3; }
| expression MODULO expression { $$ = $1 % $3; }
;
expressionList:
%empty
| expression
| expressionList COMMA expression
;
variableDeclaration:
KW_VAR IDENTIFIER COLON typeName { $$.name = $2; $$.type = $4; }
;
typeName:
KW_VOID { $$.type = tidVOID; $$.pointer = 0; }
| KW_INT { $$.type = tidINT; $$.pointer = 0; }
| KW_REAL { $$.type = tidREAL; $$.pointer = 0; }
| KW_TEXT { $$.type = tidTEXT; $$.pointer = 0; }
| KW_PTR LBRACKET typeName RBRACKET { $$ = $3; $$.pointer++; }
;
indentation:
TAB { $$ = 1; }
| indentation TAB { $$ = $1 + 1; }
;
%%
#undef scanner
#include "trainscript.l.h"
void yyerror(void *scanner, const char *s) {
// printf("Error: %s\n", s);
}

60
trainscript/tsvm.cpp Normal file
View file

@ -0,0 +1,60 @@
#include <stdlib.h>
#include <string.h>
#include "common.h"
#include "tsvm.hpp"
#include "trainscript.tab.h"
#include "trainscript.l.h"
namespace trainscript
{
Module *VM::load(const void *buffer, size_t length)
{
void *internalStorage = malloc(length);
memcpy(internalStorage, buffer, length);
Module *module = new Module();
ParserData data;
data.buffer = reinterpret_cast<char*>(internalStorage);
data.index = 0;
data.length = length;
data.module = module;
yylex_init_extra(&data, &data.scanner);
bool valid = yyparse(&data) == 0;
yylex_destroy(data.scanner);
free(internalStorage);
if(valid) {
return module;
} else {
delete module;
return nullptr;
}
}
Module *VM::load(const char *text)
{
return VM::load(reinterpret_cast<const void*>(text), static_cast<size_t>(strlen(text)));
}
Module::Module() :
variables()
{
}
Module::~Module()
{
for(auto val : methods) {
delete val.second;
}
for(auto val : variables) {
delete val.second;
}
}
}

231
trainscript/tsvm.hpp Normal file
View file

@ -0,0 +1,231 @@
#pragma once
#include <map>
#include <string>
#include <vector>
#include "common.h"
namespace trainscript
{
using Int = int32_t;
using Real = double;
using Void = void;
struct Text
{
size_t length;
char *data;
};
enum class TypeID
{
Unknown = tidUNKNOWN,
Void = tidVOID,
Int = tidINT,
Real = tidREAL,
Text = tidTEXT
};
struct Type
{
TypeID id;
int pointer;
Type() : id(TypeID::Unknown), pointer(0) { }
Type(TypeID id) : id(id), pointer(0) { }
Type(TypeID id, int pointer) : id(id), pointer(pointer) { }
Type reference() const {
return { id, pointer + 1 };
}
Type dereference() const {
if(pointer == 0) throw std::exception();
return { id, pointer - 1 };
}
bool usable() const {
return (this->id != TypeID::Unknown) &&
((this->id != TypeID::Void) || (this->pointer > 0));
}
bool operator ==(const Type &other) const {
return (this->id == other.id) &&
(this->pointer == other.pointer);
}
bool operator !=(const Type &other) const {
return (this->id != other.id) ||
(this->pointer != other.pointer);
}
};
struct Variable
{
Type type;
union {
Int integer;
Real real;
Text text;
};
Variable() : type(TypeID::Unknown), integer(0) { }
explicit Variable(Type type) : type(type), integer(0) { }
explicit Variable(TypeID type) : type(type), integer(0) { }
explicit Variable(Int integer) : type(TypeID::Int), integer(integer) { }
explicit Variable(Real real) : type(TypeID::Real), real(real) { }
};
class Module;
using LocalContext = std::map<std::string, Variable&>;
class Instruction
{
Module *module;
public:
Instruction (Module *module) : module(module)
{
}
virtual ~Instruction() { }
virtual void execute(LocalContext &context) const = 0;
};
class Block :
public Instruction
{
public:
std::vector<Instruction*> instructions;
Block(Module *module) : Instruction(module) { }
~Block() {
for(auto *instr : instructions) delete instr;
}
void execute(LocalContext &context) const override {
for(auto *instr : instructions) {
instr->execute(context);
}
}
};
class DebugInstruction :
public Instruction
{
public:
std::string message;
DebugInstruction(Module *module, std::string msg) : Instruction(module), message(msg) { }
void execute(LocalContext &context) const override {
printf("debug: %s\n", message.c_str());
}
};
class DebugVariableInstruction :
public Instruction
{
public:
std::string variable;
DebugVariableInstruction(Module *module, std::string variable) : Instruction(module), variable(variable) { }
void execute(LocalContext &context) const override {
if(context.count(variable) > 0) {
auto &var = context.at(variable);
switch(var.type.id) {
case TypeID::Int:
printf("%s := %d\n", variable.c_str(), var.integer);
break;
case TypeID::Real:
printf("%s := %f\n", variable.c_str(), var.real);
break;
default:
printf("%s has unknown type.\n", variable.c_str());
break;
}
} else {
printf("variable %s not found.\n", variable.c_str());
}
}
};
class Method
{
public:
Module *module;
Block *block;
std::vector<std::pair<std::string, Variable>> arguments;
std::map<std::string, Variable> locals;
std::pair<std::string, Variable> returnValue;
Method(Module *module, Block *block) : module(module), block(block)
{
}
Variable invoke(std::vector<Variable> arguments)
{
LocalContext context;
if(this->returnValue.second.type.usable()) {
context.insert({ this->returnValue.first, this->returnValue.second });
}
if(arguments.size() != this->arguments.size()) {
printf("MECKER anzahl!\n");
return Variable();
}
for(size_t i = 0; i < this->arguments.size(); i++) {
if(this->arguments[i].second.type != arguments[i].type) {
printf("MECKER argtyp!\n");
return Variable();
}
context.insert({this->arguments[i].first, arguments[i] });
}
for(auto local : this->locals) {
context.insert({ local.first, local.second });
}
this->block->execute(context);
return this->returnValue.second;
}
};
class Module
{
public:
std::map<std::string, Variable*> variables;
std::map<std::string, Method*> methods;
public:
Module();
~Module();
Method *method(const char *name)
{
return this->methods[name];
}
Variable *variable(const char *name)
{
return this->variables[name];
}
};
class VM
{
public:
static Module *load(const void *buffer, size_t length);
static Module *load(const char *text);
};
}