##LLVM的IR语言 首先来简单介绍一下llvm使用的IR(intermediate representation)语言。它有许多类似汇编的指令但又有函数的语法,而且提供了一个强大的类型系统。
下面是一段简单的IR代码:; ModuleID = 'test'define void @main() {entry: %a = call i64 @add(i64 1, i64 2) %b = alloca i64 store i64 %a, i64* %b ret void}define i64 @add(i64, i64) {entry: %2 = alloca i64 store i64 %0, i64* %2 %3 = alloca i64 store i64 %1, i64* %3 %4 = load i64* %2 %5 = load i64* %3 %6 = add i64 %4, %5 ret i64 %6}
分号;后面的是注释
%a,%1等以%开头的可以看成是个局部的寄存器或局部变量,全局的以@开头 call指令是调用一个函数,%a = call i64 @add(i64 1, i64 2)
的意思是调用返回值是64位整型的add函数,传入的参数是64位的1和64位的2,结果保存到%a上,所以%a的类型就是64位整型%b = alloca i64
的意思是申请栈上的空间存放64位整型,地址保存到%b上,所以%b的类型是64位整型的指针(i64*)store i64 %a, i64* %b
就是把%a的值保存到%b指向的内存上 load与store相对应,把地址上的数据取出来和存数据 IR的类型很严格,而且每条指令的参数前面都要先声明类型,看起来十分啰嗦。 使用llc和lli可以编译和运行IR程序。llvm有很多工具可以优化IR代码,所以我们希望把自已的语言翻译成IR再用llvm来优化。把自己的语言翻译成IR似乎不比直接翻译成汇编简单,幸运的是llvm已经提供了大量api来生成IR。
##几个基本的LLVM API LLVMContext: 线程上下文,通个getGlobalContext获得
Module: 模块,可以看成是变量、函数和类型的集合,一般对应一个文件 Function: 函数 Type: 数据类型如i64、double、i64*、double*等 Value: 可以看成是个变量如上面的%a、%1等 BasicBlock: 基本块,如上面的entry,在IR里类型是label,可以看成是指令的集合,但必须以return、br等跳转类指令结束 IRBuilder: 是一个辅助类,提供便捷的api插入指令##设计一个简单的语言 我深受java的影响,所以这里也采用类C的语法。首先我要实现两最基本的功能,变量和函数的相关语法。变量的类型有long和double两种。函数的语法也类似于java,可以先使用后定义,不能嵌套,提供多值返回。运算表达式和注释都类似于C,语句结束用分号。就像下面的例子:
long global_a=1,global_b=2;double global_c=3.14;/*main function is the entry of the program*/void main(){ global_a = global_a + 1; global_c = global_c + global_b; printL(global_a);println(); //print 2 printD(global_c);println(); //print 5.14 long x = 112/3; printL(x);println(); //print 37 double y; [x,y] = test1(); //x=100 y=3.333 [,y] = test2(); //y=3.333 printL(x);println(); printD(y);println();}//return two valueslong,double test1(){ return 100,3.333;}//as test1, but another syntax[long a,double b] test2(){ a = 100,b=3.333; return;}
##用flex和bison做语法分析 lex和yacc的实现,不解释。。。
##变量的处理 函数外声明的是全局变量,函数内声明的都是局部变量。用Value *var = new GlobalVariable(module,type,false,GlobalValue::ExternalLinkage,initial);
来创建一个全局的变量,用Value *var = irBuilder.CreateAlloca(type);
来创建一个局部变量。无论是全局的还是局部的,var的类型都是指针型的,所以要用load和store来进行取值和赋值,例如Value *val = irBuilder.CreateLoad(var)
和irBuilder.CreateStore(val,var)
。
##函数的处理 下面的几步可以创建一个函数:
vectorargTypes;argTypes.push_back(builder.getInt64Ty());ArrayRef argTypesRef(argTypes);FunctionType *funcType = FunctionType::get(builder.getVoidTy(),argTypesRef,false);Function *func = Function::Create(funcType,Function::ExternalLinkage,"funcA",&module);
函数体就是BasicBlock的集合,可以用BasicBlock *bb = BasicBlock::Create(context,"label1",func)
在函数体的最后创建一个BasicBlock。把IRBuilder的插入点设置成相应BasicBlock,后面用builder创建的指令都会追加到这个BasicBlock里了。例如:
builder.SetInsertPoint(bb);builder.Create......
llvm提供的函数都是单值返回的,这里使用Struct类型进行封装来实现多值返回。返回多值的函数,对应llvm里返回Struct类型的函数,return a,b;
时就进行组装,[a,b] = func();
时就进行拆解。
vectortypes;ArrayRef typesRef(types);StructType *structType = StructType::create(context,typesRef);
创建Struct和获取单个元素:
Value *structVar = builder.CreateAlloca(structType);Value *elementPtr = builder.CreateStructGEP(structVar,i);Value *aVal = builder.CreateLoad(elementPtr);builder.CreateStore(bVal,elementPtr);
##完整的代码 ####token.l
%{#include "ast.hpp"#include "parser.hpp"extern "C" int yywrap(){}int charno = 1;void saveLocation(){ yylloc.first_line = yylineno; yylloc.first_column = charno; yylloc.last_line = yylineno; charno += yyleng; yylloc.last_column = charno-1;}%}%option yylineno%x CMNT%%[\t ]* {saveLocation();}\n {charno = 1;}\/\/.*\n {charno = 1;}"/*" {saveLocation(); BEGIN CMNT;}. {saveLocation();} \n {charno = 1;} "*/" {saveLocation(); BEGIN INITIAL;}return {saveLocation(); return RETURN;}[a-zA-Z_][a-zA-Z0-9_]* {saveLocation();return IDENT;}[0-9]+\.[0-9]+ {saveLocation();return DOUBLE;}[0-9]+ {saveLocation();return LONG;}[=\(\)\+\-\*\/;\{\},\[\]] {saveLocation();return yytext[0];}. {saveLocation(); return LEX_ERROR;}
####parser.y
%{#include#include "ast.hpp"#include "parser.hpp"extern int yylex();extern int yylineno,charno,yyleng;extern FILE *yyin;extern char *yytext;void yyerror(const char *msg){ cout< <<":"<<(charno-yyleng)<<": error: "< < first_line = firstLoc->first_line; loc->first_column = firstLoc->first_column; loc->last_line = lastLoc->last_line; loc->last_column = lastLoc->last_column; if(node != NULL){ node->firstLine = loc->first_line; node->firstColumn = loc->first_column; node->lastLine = loc->last_line; node->lastColumn = loc->last_column; }}void setLocation(Node *node,YYLTYPE *loc){ loc->first_line = yylineno; loc->first_column = charno; loc->last_line = yylineno; loc->last_column = charno-1; if(node != NULL){ node->firstLine = loc->first_line; node->firstColumn = loc->first_line; node->lastLine = loc->last_line; node->lastColumn = loc->last_column; }}%}%error-verbose%debug%union{ int token; Ident *ident; Program *program; Statement *stmt; Expression *expr; VarInit *varInit; SimpleVarDecl *spvarDecl; SimpleStmtList *spstmtList; CallExpr *callExpr; GlobalStatement *globalStmt; vector *identList; vector *stmtList; vector *exprList; vector *varInitList; vector *spvarDeclList; vector *globalStmtList;}%token IDENT RETURN LEX_ERROR DOUBLE LONG %type program%type ident%type stmt simple_stmt var_decl var_assi return_stmt%type expr%type var_init%type simple_var_decl%type simple_stmt_list%type call_expr%type global_stmt func_decl global_var_decl%type stmt_list%type ident_list ident_list_allow_null%type expr_list%type var_init_list%type simple_var_decl_list%type global_stmt_list%left '+' '-'%left '*' '/'%nonassoc UMINUS%start program%%program: global_stmt_list {program=new Program(*$1);$$=program;setLocation($$,&@$,&@1,&@1);} ;global_stmt_list: global_stmt {$$=new vector ();$$->push_back($1);setLocation(NULL,&@$,&@1,&@1);} |global_stmt_list global_stmt {$1->push_back($2);$$=$1;setLocation(NULL,&@$,&@1,&@2);} ;global_stmt: global_var_decl ';' {$$=$1;setLocation($$,&@$,&@1,&@1);} |func_decl {$$=$1;setLocation($$,&@$,&@1,&@1);} ;global_var_decl: ident var_init_list {$$=new GlobalVarDecl(*$1,*$2);setLocation($$,&@$,&@1,&@2);} ;ident: IDENT {$$=new Ident(*(new string(yytext,yyleng)));setLocation($$,&@$,&@1,&@1);} ;var_init_list: var_init {$$=new vector ();$$->push_back($1);setLocation(NULL,&@$,&@1,&@1);} |var_init_list ',' var_init {$1->push_back($3);$$=$1;setLocation(NULL,&@$,&@1,&@3);} ;var_init: ident {$$=new VarInit(*$1,NULL);setLocation($$,&@$,&@1,&@1);} |ident '=' expr {$$=new VarInit(*$1,$3);setLocation($$,&@$,&@1,&@3);} ;func_decl: ident ident '(' simple_var_decl_list ')' '{' stmt_list '}' {vector *types = new vector ();types->push_back($1); $$=new FuncDecl(*types,*$2,*$4,*$7);setLocation($$,&@$,&@1,&@8);} |ident_list ident '(' simple_var_decl_list ')' '{' stmt_list '}' {$$=new FuncDecl(*$1,*$2,*$4,*$7);setLocation($$,&@$,&@1,&@8);} |'[' simple_var_decl_list ']' ident '(' simple_var_decl_list ')' '{' stmt_list '}' {$$=new FuncDecl2(*$2,*$4,*$6,*$9);setLocation($$,&@$,&@1,&@10);} ;simple_var_decl_list: /*blank*/ {$$=new vector ();setLocation(NULL,&@$);} |simple_var_decl {$$=new vector ();$$->push_back($1);setLocation(NULL,&@$,&@1,&@1);} |simple_var_decl_list ',' simple_var_decl {$1->push_back($3);$$=$1;setLocation(NULL,&@$,&@1,&@3);} ;simple_var_decl: ident ident {$$=new SimpleVarDecl(*$1,*$2);setLocation($$,&@$,&@1,&@2);} ;ident_list: ident {$$=new vector ();$$->push_back($1);setLocation(NULL,&@$,&@1,&@1);} |ident_list ',' ident {$1->push_back($3);$$=$1;setLocation(NULL,&@$,&@1,&@3);} ;stmt_list: /*blank*/ {$$=new vector ();setLocation(NULL,&@$);} |stmt {$$=new vector ();$$->push_back($1);setLocation(NULL,&@$,&@1,&@1);} |stmt_list stmt {$1->push_back($2);$$=$1;setLocation(NULL,&@$,&@1,&@2);} ;stmt: ';' {$$=new NullStmt();setLocation($$,&@$,&@1,&@1);} |var_decl ';' {$$=$1;setLocation($$,&@$,&@1,&@2);} |return_stmt ';' {$$=$1;setLocation($$,&@$,&@1,&@2);} |simple_stmt_list ';' {$$=$1;setLocation($$,&@$,&@1,&@2);} ;var_decl: ident var_init_list {$$=new VarDecl(*$1,*$2);setLocation($$,&@$,&@1,&@2);} ;return_stmt: RETURN expr_list {$$=new ReturnStmt(*$2);setLocation($$,&@$,&@1,&@2);} ;simple_stmt_list: simple_stmt {$$=new SimpleStmtList();$$->add($1);setLocation($$,&@$,&@1,&@1);} |simple_stmt_list ',' simple_stmt {$1->add($3);$$=$1;setLocation($$,&@$,&@1,&@3);} ;simple_stmt: var_assi {$$=$1;setLocation($$,&@$,&@1,&@1);} |expr {$$=new ExprStmt(*$1);setLocation($$,&@$,&@1,&@1);} ;var_assi: ident '=' expr {$$=new VarAssi(*$1,*$3);setLocation($$,&@$,&@1,&@3);} |'[' ident_list_allow_null ']' '=' call_expr {$$=new MultiVarAssi(*$2,*$5);setLocation($$,&@$,&@1,&@5);} ;ident_list_allow_null: /*blank*/ {$$=new vector ();$$->push_back(NULL);setLocation(NULL,&@$);} |ident {$$=new vector ();$$->push_back($1);setLocation(NULL,&@$,&@1,&@1);} |ident_list_allow_null ',' ident {$1->push_back($3);$$=$1;setLocation(NULL,&@$,&@1,&@3);} |ident_list_allow_null ',' {$1->push_back(NULL);$$=$1;setLocation(NULL,&@$,&@1,&@2);} ;expr_list: /*blank*/ {$$=new vector ();setLocation(NULL,&@$);} |expr {$$=new vector ();$$->push_back($1);setLocation(NULL,&@$,&@1,&@1);} |expr_list ',' expr {$1->push_back($3);$$=$1;setLocation(NULL,&@$,&@1,&@3);} ;expr: expr '+' expr {$$=new BinaryExpr(*$1,'+',*$3);setLocation($$,&@$,&@1,&@3);} |expr '-' expr {$$=new BinaryExpr(*$1,'-',*$3);setLocation($$,&@$,&@1,&@3);} |expr '*' expr {$$=new BinaryExpr(*$1,'*',*$3);setLocation($$,&@$,&@1,&@3);} |expr '/' expr {$$=new BinaryExpr(*$1,'/',*$3);setLocation($$,&@$,&@1,&@3);} |'(' expr ')' {$$=$2;setLocation($$,&@$,&@1,&@3);} |'-' expr %prec UMINUS {$$=new PrefixExpr('-',*$2);setLocation($$,&@$,&@1,&@2);} |ident {$$=new IdentExpr(*$1);setLocation($$,&@$,&@1,&@1);} |LONG {$$=new Long(new string(yytext,yyleng));setLocation($$,&@$,&@1,&@1);} |DOUBLE {$$=new Double(new string(yytext,yyleng));setLocation($$,&@$,&@1,&@1);} |call_expr {$$=$1;setLocation($$,&@$,&@1,&@1);} ;call_expr: ident '(' expr_list ')' {$$=new CallExpr(*$1,*$3);setLocation($$,&@$,&@1,&@4);} ;
####ast.hpp #ifndef AST_HPP #define AST_HPP #include <iostream> #include <string> #include <vector> #include <map> #include <llvm/Value.h> #include <llvm/Module.h> #include <llvm/LLVMContext.h> #include <llvm/Type.h> #include <llvm/DerivedTypes.h> #include <llvm/Function.h> #include <llvm/BasicBlock.h> #include <llvm/Argument.h> #include <llvm/Instructions.h> #include <llvm/IRBuilder.h>
using namespace std;using namespace llvm;class AstFunction;class AstContext;class Node;class Program;class SimpleVarDecl;class VarInit;class Ident;class GlobalStatement;class GlobalVarDecl;class FuncDecl;class FuncDecl2;class Statement;class VarDecl;class VarAssi;class MultiVarAssi;class ExprStmt;class SimpleStmtList;class NullStmt;class ReturnStmt;class Expression;class BinaryExpr;class PrefixExpr;class IdentExpr;class CallExpr;class Long;class Double;extern LLVMContext &context;extern IRBuilder<> builder;extern Module module;extern Function *startFunc;extern string errorMsg;extern Program *program;extern Value* createCast(Value *value,Type *type);extern Constant* getInitial(Type *type);extern void throwError(Node *node);extern void throwWarning(Node *node,string msg);extern string getOperatorName(int op);extern string getTypeName(Type *type);class AstFunction{public: string name; Function *llvmFunction; Type *returnType; bool isReturnSingle; bool isReturnVoid; vectorreturnTypes; vector argTypes; int style; vector returnVars; AstFunction(string name,Function *llvmFunction,vector &returnTypes,vector &argTypes,int style=1) :name(name),llvmFunction(llvmFunction),returnTypes(returnTypes),argTypes(argTypes),style(style){ isReturnSingle = (returnTypes.size() == 1); isReturnVoid = (returnTypes.size() == 0); returnType = llvmFunction->getReturnType(); }};class AstContext{ AstContext *parent; map typeTable; map functionTable; map varTable;public: AstFunction *currentFunc; AstContext(AstContext *parent=NULL):parent(parent){ if(parent != NULL){ currentFunc = parent->currentFunc; }else{ currentFunc = NULL; } } Type* getType(string name); AstFunction* getFunction(string name); Value* getVar(string name); bool addFunction(string name,AstFunction *astFunction); bool addVar(string name,Value *var); bool addType(string name,Type *type);};class Node{public: int firstLine; int firstColumn; int lastLine; int lastColumn;};class Program : public Node{public: vector &stmts; Program(vector &stmts):stmts(stmts){} void codeGen(AstContext &astContext);};class Ident : public Node{public: string &str; Ident(string &str):str(str){} operator string(){return str;}};class VarInit : public Node{public: Ident &varName; Expression *expr; VarInit(Ident &varName,Expression *expr):varName(varName),expr(expr){}};class SimpleVarDecl : public Node{public: Ident &typeName; Ident &varName; SimpleVarDecl(Ident &typeName,Ident &varName) :typeName(typeName),varName(varName){}};class GlobalStatement : public Node{public: virtual void declGen(AstContext &astContext)=0; virtual void codeGen(AstContext &astContext)=0; virtual bool isFuncDecl()=0;};class GlobalVarDecl : public GlobalStatement{public: Ident &typeName; vector &varInitList; GlobalVarDecl(Ident &typeName,vector &varInitList) :typeName(typeName),varInitList(varInitList){} void codeGen(AstContext &astContext); void declGen(AstContext &astContext); bool isFuncDecl(){return false;}};class FuncDecl : public GlobalStatement{public: vector &retTypeNameList; Ident &funcName; vector &argDeclList; vector &stmtList; FuncDecl(vector &retTypeNameList,Ident &funcName, vector &argDeclList,vector &stmtList) :retTypeNameList(retTypeNameList),funcName(funcName), argDeclList(argDeclList),stmtList(stmtList){} void declGen(AstContext &astContext); void codeGen(AstContext &astContext); bool isFuncDecl(){return true;};};class FuncDecl2 : public GlobalStatement{public: vector &retDeclList; Ident &funcName; vector &argDeclList; vector &stmts; FuncDecl2(vector &retDeclList,Ident &funcName, vector &argDeclList,vector &stmts) :retDeclList(retDeclList),funcName(funcName), argDeclList(argDeclList),stmts(stmts){} void declGen(AstContext &astContext); void codeGen(AstContext &astContext); bool isFuncDecl(){return true;}};class Statement : public Node{public: virtual void codeGen(AstContext &astContext)=0;};class VarDecl : public Statement{public: Ident &typeName; vector &varInitList; VarDecl(Ident &typeName,vector &varInitList) :typeName(typeName),varInitList(varInitList){} void codeGen(AstContext &astContext);};class VarAssi : public Statement{public: Ident &varName; Expression &expr; VarAssi(Ident &varName,Expression &expr):varName(varName),expr(expr){} void codeGen(AstContext &astContext);};class MultiVarAssi : public Statement{public: vector &varNameList; CallExpr &callExpr; MultiVarAssi(vector &varNameList,CallExpr &callExpr) :varNameList(varNameList),callExpr(callExpr){} void codeGen(AstContext &astContext);};class SimpleStmtList : public Statement{public: vector stmtList; SimpleStmtList(){} void add(Statement *stmt); void codeGen(AstContext &astContext);};class ExprStmt : public Statement{public: Expression &expr; ExprStmt(Expression &expr):expr(expr){} void codeGen(AstContext &astContext);};class NullStmt : public Statement{public: NullStmt(){} void codeGen(AstContext &astContext){}};class ReturnStmt : public Statement{public: vector &exprList; ReturnStmt(vector &exprList):exprList(exprList){} void codeGen(AstContext &astContext);};class Expression : public Node{public: virtual Value* codeGen(AstContext &astContext)=0;};class BinaryExpr : public Expression{public: Expression &lexpr; Expression &rexpr; int op; BinaryExpr(Expression &lexpr,int op,Expression &rexpr) :lexpr(lexpr),rexpr(rexpr),op(op){} Value* codeGen(AstContext &astContext);};class PrefixExpr : public Expression{public: int op; Expression &expr; PrefixExpr(int op,Expression &expr):op(op),expr(expr){} Value* codeGen(AstContext &astContext);};class IdentExpr : public Expression{public: Ident &ident; IdentExpr(Ident &ident):ident(ident){} Value* codeGen(AstContext &astContext); operator string();};class CallExpr : public Expression{public: Ident &funcName; vector &exprList; CallExpr(Ident &funcName,vector &exprList) :funcName(funcName),exprList(exprList){} Value* codeGen(AstContext &astContext); vector multiCodeGen(AstContext &astContext);};class Long : public Expression{public: string *valStr; Long(string *valStr):valStr(valStr){} Value* codeGen(AstContext &astContext);};class Double : public Expression{public: string *valStr; Double(string *valStr):valStr(valStr){} Value* codeGen(AstContext &astContext);};#endif // AST_HPP
####ast.cpp
#include "ast.hpp"void throwError(Node *node){ cout<firstLine<<":"< firstColumn<<": error: "< < firstLine<<":"< firstColumn<<": warning: "< < isDoubleTy()){ return "double"; }else if(type->isIntegerTy(64)){ return "long"; }if(type->isVoidTy()){ return "void"; }else{ return "unknow"; }}Value* createCast(Value *value,Type *type){ Type *valType = value->getType(); if(valType == type){ return value; }else if(type->isDoubleTy() && valType->isDoubleTy()){ return value; }else if(type->isIntegerTy(64) && valType->isIntegerTy(64)){ return value; }else if(type->isDoubleTy() && valType->isIntegerTy(64)){ return builder.CreateSIToFP(value,type); }else if(type->isIntegerTy(64) && valType->isDoubleTy()){ return builder.CreateFPToSI(value,type); }else{ errorMsg = "no viable conversion from '"+getTypeName(valType) +"' to '"+getTypeName(type)+"'"; return NULL; }}Constant* getInitial(Type *type){ if(type->isDoubleTy()){ return ConstantFP::get(builder.getDoubleTy(),0); }else if(type->isIntegerTy(64)){ return builder.getInt64(0); }else{ errorMsg = "no initializer for '"+getTypeName(type)+"'"; return NULL; }}Type* AstContext::getType(string name){ Type *type = typeTable[name]; if(type == NULL && parent != NULL){ type = parent->getType(name); } if(type == NULL){ if(name == "void"){ errorMsg = "variable has incomplete type 'void'"; }else{ errorMsg = "undeclared type '"+name+"'"; } } return type;}AstFunction* AstContext::getFunction(string name) throw(string){ AstFunction *function = functionTable[name]; if(function == NULL && parent != NULL){ return parent->getFunction(name); } if(function == NULL){ errorMsg = "undeclared function '"+name+"'"; } return function;}Value* AstContext::getVar(string name){ Value *var = varTable[name]; if(var == NULL && parent != NULL){ return parent->getVar(name); } if(var == NULL){ errorMsg = "undeclared identifier '"+name+"'"; } return var;}bool AstContext::addFunction(string name, AstFunction *function){ if(functionTable[name] != NULL){ errorMsg = "redefine function named '"+name+"'"; return false; } functionTable[name] = function; return true;}bool AstContext::addVar(string name, Value *value){ if(varTable[name] != NULL){ errorMsg = "redefine variable named '"+name+"'"; return false; } varTable[name] = value; return true;}bool AstContext::addType(string name, Type *type){ if(typeTable[name] != NULL){ errorMsg = "redefine type named '"+name+"'"; return false; } typeTable[name] = type; return true;}void Program::codeGen(AstContext &astContext){ for(unsigned i=0; i declGen(astContext); } //create init func FunctionType *initFuncType = FunctionType::get(builder.getVoidTy(),false); Function *initFunc = Function::Create(initFuncType,Function::ExternalLinkage,"main",&module); builder.SetInsertPoint(BasicBlock::Create(context,"entry",initFunc)); for(unsigned i=0;i isFuncDecl()){ stmt->codeGen(astContext); } } AstFunction *mainFunc = astContext.getFunction("main"); if(mainFunc == NULL){ cout< < llvmFunction); builder.CreateRetVoid(); } startFunc = initFunc; for(unsigned i = 0; i < stmts.size(); i++){ GlobalStatement *stmt = stmts[i]; if(stmt->isFuncDecl()){ stmt->codeGen(astContext); } }}void GlobalVarDecl::declGen(AstContext &astContext){ Type *type = astContext.getType(typeName); if(type == NULL){ throwError(&typeName); } Constant *initial = getInitial(type); if(initial == NULL){ throwError(this); } for(unsigned i = 0; i < varInitList.size(); i++){ VarInit *varInit = varInitList[i]; Value *var = new GlobalVariable(module,type,false,GlobalValue::ExternalLinkage,initial); astContext.addVar(varInit->varName,var); }}void GlobalVarDecl::codeGen(AstContext &astContext){ Type *type = astContext.getType(typeName); if(type == NULL){ throwError(&typeName); } for(unsigned i = 0; i < varInitList.size(); i++){ VarInit *varInit = varInitList[i]; if(varInit->expr != NULL){ Value *var = astContext.getVar(varInit->varName); Value *v = varInit->expr->codeGen(astContext); v = createCast(v,type); if(v == NULL){ throwError(varInit->expr); } builder.CreateStore(v,var); } }}void FuncDecl::declGen(AstContext &astContext){ vector returnTypes; if(retTypeNameList.size() > 1 || retTypeNameList[0]->str != "void"){ for(unsigned i = 0; i < retTypeNameList.size(); i++){ Type *type = astContext.getType(*retTypeNameList[i]); if(type == NULL){ throwError(retTypeNameList[i]); } returnTypes.push_back(type); } } Type *returnType = NULL; if(returnTypes.size() == 0){ returnType = builder.getVoidTy(); }else if(returnTypes.size() == 1){ returnType = returnTypes[0]; }else{ ArrayRef typesArray(returnTypes); returnType = StructType::create(context,typesArray); } vector argTypes; for(unsigned i = 0; i < argDeclList.size(); i++){ SimpleVarDecl *argDecl = argDeclList[i]; Type *type = astContext.getType(argDecl->typeName); if(type == NULL){ throwError(&argDecl->typeName); } argTypes.push_back(type); } FunctionType *functionType = NULL; if(argTypes.size() == 0){ functionType = FunctionType::get(returnType,false); }else{ ArrayRef argTypeArrayRef(argTypes); functionType = FunctionType::get(returnType,argTypeArrayRef,false); } Function *function = Function::Create(functionType,Function::ExternalLinkage,funcName.str+"_sp",&module); AstFunction *astFunction = new AstFunction(funcName,function,returnTypes,argTypes); if(!astContext.addFunction(funcName,astFunction)){ throwError(&funcName); }}void FuncDecl::codeGen(AstContext &astContext){ AstFunction *astFunction = astContext.getFunction(funcName); Function* function = astFunction->llvmFunction; vector &argTypes = astFunction->argTypes; vector &returnTypes = astFunction->returnTypes; AstContext newContext(&astContext); builder.SetInsertPoint(BasicBlock::Create(context,"entry",function)); unsigned i = 0; for(Function::arg_iterator ai = function->arg_begin();ai != function->arg_end(); ai++,i++){ SimpleVarDecl *argDecl = argDeclList[i]; Value *alloc = builder.CreateAlloca(argTypes[i]); builder.CreateStore(ai,alloc); if(!newContext.addVar(argDecl->varName,alloc)){ throwError(&argDecl->varName); } } newContext.currentFunc = astFunction; for(i = 0; i < stmtList.size(); i++){ stmtList[i]->codeGen(newContext); } if(astFunction->isReturnVoid){ builder.CreateRetVoid(); }else if(astFunction->isReturnSingle){ Value *retVal = getInitial(astFunction->returnType); if(retVal == NULL){ throwError(retTypeNameList[0]); } builder.CreateRet(retVal); }else{ Value *alloc = builder.CreateAlloca(astFunction->returnType); for(i = 0; i < returnTypes.size(); i++){ Value *element = builder.CreateStructGEP(alloc,i); Value *elemVal = getInitial(returnTypes[i]); if(elemVal == NULL){ throwError(retTypeNameList[i]); } builder.CreateStore(elemVal,element); } builder.CreateRet(builder.CreateLoad(alloc)); }}void FuncDecl2::declGen(AstContext &astContext){ vector returnTypes; for(unsigned i = 0; i < retDeclList.size(); i++){ SimpleVarDecl *retDecl = retDeclList[i]; Type *type = astContext.getType(retDecl->typeName); if(type == NULL){ throwError(&retDecl->typeName); } returnTypes.push_back(type); } Type *returnType = NULL; if(returnTypes.size() == 0){ returnType = builder.getVoidTy(); }else if(returnTypes.size() == 1){ returnType = returnTypes[0]; }else{ ArrayRef typesArray(returnTypes); returnType = StructType::create(context,typesArray); } vector argTypes; for(unsigned i = 0; i < argDeclList.size(); i++){ SimpleVarDecl *argDecl = argDeclList[i]; Type *type = astContext.getType(argDecl->typeName); if(type == NULL){ throwError(&argDecl->typeName); } argTypes.push_back(type); } FunctionType *functionType = NULL; if(argTypes.size() == 0){ functionType = FunctionType::get(returnType,false); }else{ ArrayRef argTypeArrayRef(argTypes); functionType = FunctionType::get(returnType,argTypeArrayRef,false); } Function *function = Function::Create(functionType,Function::ExternalLinkage,funcName.str+"_sp",&module); AstFunction *astFunction = new AstFunction(funcName,function,returnTypes,argTypes,2); if(!astContext.addFunction(funcName,astFunction)){ throwError(&funcName); }}void FuncDecl2::codeGen(AstContext &astContext){ AstFunction *astFunction = astContext.getFunction(funcName); Function* function = astFunction->llvmFunction; vector &argTypes = astFunction->argTypes; vector &retTypes = astFunction->returnTypes; AstContext newContext(&astContext); builder.SetInsertPoint(BasicBlock::Create(context,"entry",function)); unsigned i = 0; for(Function::arg_iterator ai = function->arg_begin();ai != function->arg_end(); ai++,i++){ SimpleVarDecl *argDecl = argDeclList[i]; Value *alloc = builder.CreateAlloca(argTypes[i]); builder.CreateStore(ai,alloc); if(!newContext.addVar(argDecl->varName,alloc)){ throwError(&argDecl->varName); } } vector retVarList; for(i = 0; i < retDeclList.size(); i++){ SimpleVarDecl *retDecl = retDeclList[i]; Value *alloc = builder.CreateAlloca(retTypes[i]); Value *initial = getInitial(retTypes[i]); if(initial == NULL){ throwError(&retDecl->typeName); } if(!newContext.addVar(retDecl->varName,alloc)){ throwError(&retDecl->varName); } retVarList.push_back(alloc); } astFunction->returnVars = retVarList; newContext.currentFunc = astFunction; for(i = 0; i < stmts.size(); i++){ stmts[i]->codeGen(newContext); } if(astFunction->isReturnVoid){ builder.CreateRetVoid(); }else if(astFunction->isReturnSingle){ builder.CreateRet(builder.CreateLoad(retVarList[0])); }else{ Value *alloc = builder.CreateAlloca(astFunction->returnType); for(i = 0; i < retVarList.size(); i++){ Value *element = builder.CreateStructGEP(alloc,i); builder.CreateStore(builder.CreateLoad(retVarList[i]),element); } builder.CreateRet(builder.CreateLoad(alloc)); }}void VarDecl::codeGen(AstContext &astContext){ Type *type = astContext.getType(typeName); if(type == NULL){ throwError(&typeName); } for(unsigned i = 0; i < varInitList.size(); i++){ VarInit *varInit = varInitList[i]; Value *var = NULL; Value *v = NULL; if(varInit->expr != NULL){ v = varInit->expr->codeGen(astContext); v = createCast(v,type); if(v == NULL){ throwError(varInit->expr); } }else{ v = getInitial(type); if(v == NULL){ throwError(&typeName); } } var = builder.CreateAlloca(type); builder.CreateStore(v,var); if(!astContext.addVar(varInit->varName,var)){ throwError(&varInit->varName); } }}void VarAssi::codeGen(AstContext &astContext){ Value *var = astContext.getVar(varName); if(var == NULL){ throwError(&varName); } Value *value = expr.codeGen(astContext); PointerType *pt = static_cast (var->getType()); value = createCast(value,pt->getElementType()); if(value == NULL){ throwError(&expr); } builder.CreateStore(value,var);}void MultiVarAssi::codeGen(AstContext &astContext){ vector vars; for(unsigned i=0; i < varNameList.size(); i++){ Ident *varName = varNameList[i]; if(varName == NULL){ vars.push_back(NULL); }else{ Value *var = astContext.getVar(*varName); if(var == NULL){ throwError(varName); } vars.push_back(var); } } vector values = callExpr.multiCodeGen(astContext); if(values.size() < vars.size()){ errorMsg = "too few values returned from function '"+callExpr.funcName.str+"'"; throwError(&callExpr); } for(unsigned i=0; i < vars.size(); i++){ if(vars[i] == NULL){ continue; } Value *v = values[i]; PointerType *pt = static_cast (vars[i]->getType()); v = createCast(v,pt->getElementType()); if(v == NULL){ throwError(&callExpr); } builder.CreateStore(v,vars[i]); }}void SimpleStmtList::codeGen(AstContext &astContext){ for(unsigned i = 0; i < stmtList.size(); i++){ stmtList[i]->codeGen(astContext); }}void SimpleStmtList::add(Statement *stmt){ stmtList.push_back(stmt);}void ExprStmt::codeGen(AstContext &astContext){ expr.codeGen(astContext);}void ReturnStmt::codeGen(AstContext &astContext){ AstFunction *currentFunc = astContext.currentFunc; if(currentFunc->style == 1){ vector &returnTypes = currentFunc->returnTypes; if(exprList.size() < returnTypes.size()){ errorMsg = "too few values to return in function '"+currentFunc->name+"'"; throwError(this); }else if(exprList.size() > returnTypes.size()){ errorMsg = "too many values to return in function '"+currentFunc->name+"'"; throwError(this); } vector exprListValues; for(unsigned i=0; i < exprList.size(); i++){ Expression *expr = exprList[i]; exprListValues.push_back(expr->codeGen(astContext)); } if(returnTypes.size() == 0){ builder.CreateRetVoid(); }else if(returnTypes.size() == 1){ Value *v = createCast(exprListValues[0],returnTypes[0]); if(v == NULL){ throwError(exprList[0]); } builder.CreateRet(v); }else{ Value *alloc = builder.CreateAlloca(currentFunc->returnType); for(unsigned i=0; i < returnTypes.size(); i++){ Value *element = builder.CreateStructGEP(alloc,i); Value *v = createCast(exprListValues[i],returnTypes[i]); if(v == NULL){ throwError(exprList[i]); } builder.CreateStore(v,element); } builder.CreateRet(builder.CreateLoad(alloc)); } }else{ if(exprList.size() > 0){ errorMsg = "needn't declare any expression behind 'return' in style 2 function"; throwError(exprList[0]); } if(currentFunc->isReturnVoid){ builder.CreateRetVoid(); }else if(currentFunc->isReturnSingle){ Value *v = builder.CreateLoad(currentFunc->returnVars[0]); builder.CreateRet(v); }else{ Value *alloc = builder.CreateAlloca(currentFunc->returnType); for(unsigned i = 0; i < currentFunc->returnVars.size(); i++){ Value *element = builder.CreateStructGEP(alloc,i); Value *v = builder.CreateLoad(currentFunc->returnVars[i]); builder.CreateStore(v,element); } builder.CreateRet(builder.CreateLoad(alloc)); } } BasicBlock *anonyBB = BasicBlock::Create(context,"after_return",currentFunc->llvmFunction); builder.SetInsertPoint(anonyBB);}Value* BinaryExpr::codeGen(AstContext &astContext){ Value *lv = lexpr.codeGen(astContext); Value *rv = rexpr.codeGen(astContext); if( (lv->getType()->isDoubleTy() || lv->getType()->isIntegerTy(64)) && (lv->getType()->isDoubleTy() || lv->getType()->isIntegerTy(64)) ){ if(lv->getType()->isDoubleTy()){ rv = createCast(rv,lv->getType()); if(rv == NULL){ throwError(&rexpr); } }else{ lv = createCast(lv,rv->getType()); if(lv == NULL){ throwError(&lexpr); } } if(lv->getType()->isDoubleTy()){ switch(op){ case '+': return builder.CreateFAdd(lv,rv); case '-': return builder.CreateFSub(lv,rv); case '*': return builder.CreateFMul(lv,rv); case '/': return builder.CreateFDiv(lv,rv); default: ; } }else{ switch(op){ case '+': return builder.CreateAdd(lv,rv); case '-': return builder.CreateSub(lv,rv); case '*': return builder.CreateMul(lv,rv); case '/': return builder.CreateSDiv(lv,rv); default: ; } } } errorMsg = "invalid operands to binary expression ("+getTypeName(lv->getType())+ " "+getOperatorName(op)+" "+getTypeName(rv->getType())+")"; throwError(this);}Value* PrefixExpr::codeGen(AstContext &astContext){ Value *val = expr.codeGen(astContext); if(op == '-'){ if(val->getType()->isDoubleTy()){ return builder.CreateFNeg(val); }else if(val->getType()->isIntegerTy(64)){ return builder.CreateNeg(val); } } errorMsg = "invalid argument type '"+getTypeName(val->getType())+ "' to unary '"+getOperatorName(op)+"'expression"; throwError(this);}Value* IdentExpr::codeGen(AstContext &astContext){ Value *var = astContext.getVar(ident); if(var == NULL){ throwError(this); } return builder.CreateLoad(var);}vector CallExpr::multiCodeGen(AstContext &astContext){ AstFunction *myfunc = astContext.getFunction(funcName); if(myfunc == NULL){ throwError(this); } vector &argTypes = myfunc->argTypes; vector exprListValues; for(unsigned i=0; i < exprList.size(); i++){ Expression *expr = exprList[i]; exprListValues.push_back(expr->codeGen(astContext)); } if(exprListValues.size() < argTypes.size()){ errorMsg = "too few arguments to function '"+funcName.str+"''"; throwError(this); }else if(exprListValues.size() > argTypes.size()){ cout<<"too many arguments to function '"< <<"'"< llvmFunction); }else{ vector argValues; for(unsigned i=0; i < argTypes.size(); i++){ Value *v = createCast(exprListValues[i],argTypes[i]); if(v == NULL){ throwError(exprList[i]); } argValues.push_back(v); } ArrayRef args(argValues); callResult = builder.CreateCall(myfunc->llvmFunction,args); } vector resultValues; vector &resultTypes = myfunc->returnTypes; if(myfunc->isReturnVoid){ resultValues.push_back(callResult); }else if(myfunc->isReturnSingle){ resultValues.push_back(callResult); }else{ Value *alloc = builder.CreateAlloca(myfunc->returnType); builder.CreateStore(callResult,alloc); for(unsigned i=0; i < resultTypes.size(); i++){ Value *element = builder.CreateStructGEP(alloc,i); resultValues.push_back(builder.CreateLoad(element)); } } return resultValues;}Value* CallExpr::codeGen(AstContext &astContext){ vector resultValues = multiCodeGen(astContext); return resultValues[0];}Value* Long::codeGen(AstContext &astContext){ return builder.getInt64(atol(valStr->c_str()));}Value* Double::codeGen(AstContext &astContext){ return ConstantFP::get(builder.getDoubleTy(),atof(valStr->c_str()));}
####main.cpp
#include#include #include #include #include #include #include #include #include #include #include #include #include #include #include "ast.hpp"#include "parser.hpp"extern int yyparse();extern void createSystemFunctions(AstContext &astContext);extern FILE *yyin, *yyout;LLVMContext &context = getGlobalContext();Module module("test",context);IRBuilder<> builder(context);Function *startFunc = NULL;string errorMsg;Program *program = NULL;int main(int argc,char **argv){ bool runJit = false; bool irOutput = false; bool asmOutput = false; bool objOutput = false; TargetMachine::CodeGenFileType outputFileType = TargetMachine::CGFT_Null; char *outputFileName = NULL; int option; while((option = getopt(argc,argv,"o:scS")) != -1){ switch(option){ case 'o': if(outputFileName != NULL){ cout<<"warning: ignoring '-o "< <<"' because '-o " < <<"' has set before"< >"< codeGen(astContext); //module.dump();cout< createTargetMachine(triple.getTriple(),MCPU,"",targetOptions); string opFileName; if(outputFileName == NULL){ if(inputFileName == NULL){ if(asmOutput){ opFileName = "temp.s"; }else{ opFileName = "temp.o"; } }else{ if(asmOutput){ opFileName = string(basename(inputFileName)) + ".s"; }else{ opFileName = string(basename(inputFileName)) + ".o"; } } }else{ opFileName = outputFileName; } string errorMsg2; tool_output_file *outputFile = new tool_output_file(opFileName.c_str(),errorMsg2); if(!errorMsg2.empty()){ cout< < os()); targetMachine->addPassesToEmitFile(passManager,fos,outputFileType); passManager.run(module); outputFile->keep(); } if(runJit){ string errStr; ExecutionEngine *execEngine = EngineBuilder(&module).setErrorStr(&errStr).setEngineKind(EngineKind::JIT).create(); if(execEngine == NULL){ cout<<"Could not create ExecutionEngine: "< < argValues; execEngine->runFunction(startFunc,argValues); } return 0;}void createSystemFunctions(AstContext &astContext){ //insert printf func decl vector printfFuncArgTypes; printfFuncArgTypes.push_back(builder.getInt8PtrTy()); ArrayRef printfFuncArgTypesRef(printfFuncArgTypes); FunctionType *printfFuncType = FunctionType::get(builder.getInt32Ty(),printfFuncArgTypesRef,true); Constant *printfFunc = module.getOrInsertFunction("printf",printfFuncType); vector emptyTypes; //create print long func vector printfLongFuncArgTypes; printfLongFuncArgTypes.push_back(builder.getInt64Ty()); ArrayRef printfLongFuncArgTypesRef(printfLongFuncArgTypes); FunctionType *printfLongFuncType = FunctionType::get(builder.getVoidTy(),printfLongFuncArgTypesRef,false); Function *printfLongFunc = Function::Create(printfLongFuncType,Function::ExternalLinkage,"printL",&module); builder.SetInsertPoint(BasicBlock::Create(context,"entry",printfLongFunc)); Value *longFormat = builder.CreateGlobalStringPtr("%ld"); builder.CreateCall2(printfFunc,longFormat,printfLongFunc->arg_begin()); builder.CreateRetVoid(); AstFunction *printfL = new AstFunction("printL",printfLongFunc,emptyTypes,printfLongFuncArgTypes); //create print double func vector printfDoubleFuncArgTypes; printfDoubleFuncArgTypes.push_back(builder.getDoubleTy()); ArrayRef printfDoubleFuncArgTypesRef(printfDoubleFuncArgTypes); FunctionType *printfDoubleFuncType = FunctionType::get(builder.getVoidTy(),printfDoubleFuncArgTypesRef,false); Function *printfDoubleFunc = Function::Create(printfDoubleFuncType,Function::ExternalLinkage,"printD",&module); builder.SetInsertPoint(BasicBlock::Create(context,"entry",printfDoubleFunc)); Value *doubleFormat = builder.CreateGlobalStringPtr("%lf"); builder.CreateCall2(printfFunc,doubleFormat,printfDoubleFunc->arg_begin()); builder.CreateRetVoid(); AstFunction *printfD = new AstFunction("printD",printfDoubleFunc,emptyTypes,printfDoubleFuncArgTypes); //create println func FunctionType *printlnFuncType = FunctionType::get(builder.getVoidTy(),false); Function *printlnFunc = Function::Create(printlnFuncType,Function::ExternalLinkage,"println",&module); builder.SetInsertPoint(BasicBlock::Create(context,"entry",printlnFunc)); Value *lnFormat = builder.CreateGlobalStringPtr("\n"); builder.CreateCall(printfFunc,lnFormat); builder.CreateRetVoid(); AstFunction *println = new AstFunction("println",printlnFunc,emptyTypes,emptyTypes); //astContext.addFunction("printf",cast (printfFunc)); astContext.addFunction("printL",printfL); astContext.addFunction("printD",printfD); astContext.addFunction("println",println);}
相关的命令
bison -d -o parser.cpp parser.yflex -o token.cpp token.lg++ -o toy main.cpp ast.cpp parser.cpp token.cpp -Iinclude -I. `llvm-config --cxxflags --ldflags --libs core jit native all-targets asmparser`
以上llvm的版本是3.2