Control Flow
Before we start, we need to prepare compile function for something like expression and statement that not our target.
type Expr interface{ isExpr() Expr }
type EConstant interface {
Expr
isEConstant() EConstant
}
type EVoid struct{ EConstant }
type EBool struct {
EConstant
V bool
}
type EI32 struct {
EConstant
V int64
}
type EVariable struct {
Expr
Name string
}
type EAdd struct {
Expr
Lhs, Rhs Expr
}
type ELessThan struct {
Expr
Lhs, Rhs Expr
}
And compile functions:
func compileConstant(e EConstant) constant.Constant {
switch e := e.(type) {
case *EI32:
return constant.NewInt(types.I32, e.V)
case *EBool:
// we have no boolean in LLVM IR
if e.V {
return constant.NewInt(types.I1, 1)
} else {
return constant.NewInt(types.I1, 0)
}
case *EVoid:
return nil
}
panic("unknown expression")
}
func (ctx *Context) compileExpr(e Expr) value.Value {
switch e := e.(type) {
case *EVariable:
return ctx.lookupVariable(e.Name)
case *EAdd:
l, r := ctx.compileExpr(e.Lhs), ctx.compileExpr(e.Rhs)
return ctx.NewAdd(l, r)
case *ELessThan:
l, r := ctx.compileExpr(e.Lhs), ctx.compileExpr(e.Rhs)
return ctx.NewICmp(enum.IPredSLT, l, r)
case EConstant:
return compileConstant(e)
}
panic("unimplemented expression")
}
EVariable
would need context to remember variable's value. Here is the related definition of Context
:
type Context struct {
*ir.Block
parent *Context
vars map[string]value.Value
}
func NewContext(b *ir.Block) *Context {
return &Context{
Block: b,
parent: nil,
vars: make(map[string]value.Value),
}
}
func (c *Context) NewContext(b *ir.Block) *Context {
ctx := NewContext(b)
ctx.parent = c
return ctx
}
func (c Context) lookupVariable(name string) value.Value {
if v, ok := c.vars[name]; ok {
return v
} else if c.parent != nil {
return c.parent.lookupVariable(name)
} else {
fmt.Printf("variable: `%s`\n", name)
panic("no such variable")
}
}
Finally, we would have some simple statement as placeholder:
type Stmt interface{ isStmt() Stmt }
type SDefine struct {
Stmt
Name string
Typ types.Type
Expr Expr
}
type SRet struct {
Stmt
Val Expr
}
Then compile:
func (ctx *Context) compileStmt(stmt Stmt) {
if ctx.Parent != nil {
return
}
f := ctx.Parent
switch s := stmt.(type) {
case *SDefine:
v := ctx.NewAlloca(s.Typ)
ctx.NewStore(ctx.compileExpr(s.Expr), v)
ctx.vars[s.Name] = v
case *SRet:
ctx.NewRet(ctx.compileExpr(s.Val))
}
}
If
Since we can let:
if condition {
// A
} else if condition {
// B
} else {
// C
}
became:
if condition {
// A
} else {
if condition {
// B
} else {
// C
}
}
We don't have to convert any else-if pattern. Therefore, our If
looks like this:
type SIf struct {
Stmt
Cond Expr
Then Stmt
Else Stmt
}
Then we can get transformers to generate control flow if. Using conditional jump to generate if statement:
func (ctx *Context) compileStmt(stmt Stmt) {
switch s := stmt.(type) {
case *SIf:
thenCtx := ctx.NewContext(f.NewBlock("if.then"))
thenCtx.compileStmt(s.Then)
elseB := f.NewBlock("if.else")
ctx.NewContext(elseB).compileStmt(s.Else)
ctx.NewCondBr(ctx.compileExpr(s.Cond), thenCtx.Block, elseB)
if !thenCtx.HasTerminator() {
leaveB := f.NewBlock("leave.if")
thenCtx.NewBr(leaveB)
}
}
}
When generating if, the most important thing is leave block, when if-then block complete, a jump to skip else block required since there has no block in high-level language liked concept in LLVM IR. At the end of a basic-block can be a return and since return would terminate a function, jump after return is a dead code, so we have to check we have to generate leave block or not. Here is a small example as usage:
f := ir.NewFunc("foo", types.Void)
bb := f.NewBlock("")
ctx.compileStmt(&SIf{
Cond: &EBool{V: true},
Then: &SRet{Val: &EVoid{}},
Else: &SRet{Val: &EVoid{}},
})
Finally, we get:
define void @foo() {
0:
br i1 true, label %if.then, label %if.else
if.then:
ret void
if.else:
ret void
}
We didn't support else-if directly at here, then we need to know how to handle this via parsing. First, we handle a sequence of if
(
<expr>
)
<block>
. Ok, we can fill AST with Cond
and Then
, now we should get a token else
, then we expect a <block>
or if
. When we get a <block>
this is a obviously can be use as Else
, else a if
we keep parsing and use it as Else
statement since if
for sure is a statement. Of course, with this method, generated IR would have some useless label and jump, but flow analyzing should optimize them later, so it's fine.
Switch
LLVM has switch instruction, hence, we can use it directly.
type SSwitch struct {
Stmt
Target Expr
CaseList []struct {
EConstant // LLVM IR only takes constant, if you want advanced switch semantic, then you can't directly use this approach
Stmt
}
DefaultCase Stmt
}
func (ctx *Context) compileStmt(stmt Stmt) {
switch s := stmt.(type) {
case *SSwitch:
cases := []*ir.Case{}
for _, ca := range s.CaseList {
caseB := f.NewBlock("switch.case")
ctx.NewContext(caseB).compileStmt(ca.Stmt)
cases = append(cases, ir.NewCase(compileConstant(ca.EConstant), caseB))
}
defaultB := f.NewBlock("switch.default")
ctx.NewContext(defaultB).compileStmt(s.DefaultCase)
ctx.NewSwitch(ctx.compileExpr(s.Target), defaultB, cases...)
}
}
For every case, we generate a block, then we can jump to target. Then we put statements into case blocks. Finally, we generate switch for the input block. Notice that, switch instruction of LLVM won't generate break
automatically, you can use the same trick in the previous section If to generate auto leave block for each case(Go semantic), or record leave block and introduces break statement(C semantic). Now let's test it:
f := ir.NewFunc("foo", types.Void)
ctx := NewContext(f.NewBlock(""))
ctx.compileStmt(&SSwitch{
Target: &EBool{V: true},
CaseList: []struct {
EConstant
Stmt
}{
{EConstant: &EBool{V: true}, Stmt: &SRet{Val: &EVoid{}}},
},
DefaultCase: &SRet{Val: &EVoid{}},
})
And output:
define void @foo() {
0:
switch i1 true, label %switch.default [
i1 true, label %switch.case
]
switch.case:
ret void
switch.default:
ret void
}
The switch statement in this section is quite naive, for advanced semantic like pattern matching with extraction or where clause, you would need to do more.
Loop
Break
Break statement needs to extend Context
, with a new field called leaveBlock
:
type Context struct {
// ...
leaveBlock *ir.Block
}
func NewContext(b *ir.Block) *Context {
return &Context{
// ...
leaveBlock: nil,
}
}
Then it's just a jump:
func (ctx *Context) compileStmt(stmt Stmt) {
switch s := stmt.(type) {
case *SBreak:
ctx.NewBr(ctx.leaveBlock)
}
}
Remember to update leave block information(and remove it when needed), and continue can be done in the same way.
Do While
Do while is the simplest loop structure since it's code structure almost same to the IR structure. Here we go:
type SDoWhile struct {
Stmt
Cond Expr
Block Stmt
}
func (ctx *Context) compileStmt(stmt Stmt) {
switch s := stmt.(type) {
case *SDoWhile:
doCtx := ctx.NewContext(f.NewBlock("do.while.body"))
ctx.NewBr(doCtx.Block)
leaveB := f.NewBlock("leave.do.while")
doCtx.leaveBlock = leaveB
doCtx.compileStmt(s.Block)
doCtx.NewCondBr(doCtx.compileExpr(s.Cond), doCtx.Block, leaveB)
}
}
Can see that, we jump to do-while body directly. Then we have a leave block, in the end of the do-while body we jump out to leave block or body again depends on condition. Let's test it:
f := ir.NewFunc("foo", types.Void)
ctx := NewContext(f.NewBlock(""))
ctx.compileStmt(&SDoWhile{
Cond: &EBool{V: true},
Block: &SDefine{
Stmt: nil,
Name: "foo",
Typ: types.I32,
Expr: &EI32{V: 1},
},
})
f.Blocks[len(f.Blocks)-1].NewRet(nil)
And output:
define void @foo() {
0:
br label %do.while.body
do.while.body:
%1 = alloca i32
store i32 1, i32* %1
br i1 true, label %do.while.body, label %leave.do.while
leave.do.while:
ret void
}
For Loop
For-loop would be an interesting case, at here, I only present a for-loop that can only have one initialize variable to reduce complexity, therefore, we have a AST like this:
type SForLoop struct {
Stmt
InitName string
InitExpr Expr
Step Expr
Cond Expr
Block Stmt
}
For example, for (x=0; x=x+1; x<10) {}
break down to:
SForLoop {
InitName: `x`
InitExpr: `0`
Step: `x + 1`
Cond: `x < 10`
Block: `{}`
}
At first view, people might think for-loop is as easy as do-while, but in SSA form, reuse variable in a loop need a new instruction: phi.
func (ctx *Context) compileStmt(stmt Stmt) {
switch s := stmt.(type) {
case *SForLoop:
loopCtx := ctx.NewContext(f.NewBlock("for.loop.body"))
ctx.NewBr(loopCtx.Block)
firstAppear := loopCtx.NewPhi(ir.NewIncoming(loopCtx.compileExpr(s.InitExpr), ctx.Block))
loopCtx.vars[s.InitName] = firstAppear
step := loopCtx.compileExpr(s.Step)
firstAppear.Incs = append(firstAppear.Incs, ir.NewIncoming(step, loopCtx.Block))
loopCtx.vars[s.InitName] = step
leaveB := f.NewBlock("leave.for.loop")
loopCtx.leaveBlock = leaveB
loopCtx.compileStmt(s.Block)
loopCtx.NewCondBr(loopCtx.compileExpr(s.Cond), loopCtx.Block, leaveB)
}
}
- Create a loop body context
- jump from the previous block
- Put phi into loop body
- Phi would have two incoming, first is
InitExpr
, the second one isStep
result. - compile step
- compile the conditional branch, jump to loop body or leave block
Testing:
f := ir.NewFunc("foo", types.Void)
ctx := NewContext(f.NewBlock(""))
ctx.compileStmt(&SForLoop{
InitName: "x",
InitExpr: &EI32{V: 0},
Step: &EAdd{Lhs: &EVariable{Name: "x"}, Rhs: &EI32{V: 1}},
Cond: &ELessThan{Lhs: &EVariable{Name: "x"}, Rhs: &EI32{V: 10}},
Block: &SDefine{Name: "foo", Typ: types.I32, Expr: &EI32{V: 2}},
})
f.Blocks[len(f.Blocks)-1].NewRet(nil)
The test generates:
define void @foo() {
0:
br label %for.loop.body
for.loop.body:
%1 = phi i32 [ 0, %0 ], [ %2, %for.loop.body ]
%2 = add i32 %1, 1
%3 = alloca i32
store i32 2, i32* %3
%4 = icmp slt i32 %2, 10
br i1 %4, label %for.loop.body, label %leave.for.loop
leave.for.loop:
ret void
}
In fact, you can also avoid phi, you can make a try as practice.
While
The last kind of loop we want to present is while loop.
type SWhile struct {
Stmt
Cond Expr
Block Stmt
}
It looks just like do while, but have different semantic, it might not execute it's body. Here is the implementation.
func (ctx *Context) compileStmt(stmt Stmt) {
switch s := stmt.(type) {
case *SWhile:
condCtx := ctx.NewContext(f.NewBlock("while.loop.cond"))
ctx.NewBr(condCtx.Block)
loopCtx := ctx.NewContext(f.NewBlock("while.loop.body"))
leaveB := f.NewBlock("leave.do.while")
condCtx.NewCondBr(condCtx.compileExpr(s.Cond), loopCtx.Block, leaveB)
condCtx.leaveBlock = leaveB
loopCtx.leaveBlock = leaveB
loopCtx.compileStmt(s.Block)
loopCtx.NewBr(condCtx.Block)
}
}
We would need two blocks since br
is a terminator, then the logic is simple:
while.loop.cond
would jump towhile.loop.body
orleave.do.while
by conditionwhile.loop.body
always back towhile.loop.cond
.
Finally, test:
f := ir.NewFunc("foo", types.Void)
ctx := NewContext(f.NewBlock(""))
ctx.compileStmt(&SWhile{
Cond: &EBool{V: true},
Block: &SDefine{
Name: "x",
Typ: types.I32,
Expr: &EI32{V: 0},
},
})
f.Blocks[len(f.Blocks)-1].NewRet(nil)
and output:
define void @foo() {
0:
br label %while.loop.cond
while.loop.cond:
br i1 true, label %while.loop.body, label %leave.do.while
while.loop.body:
%1 = alloca i32
store i32 0, i32* %1
br label %while.loop.cond
leave.do.while:
ret void
}