Add goroutines and function pointers

7 years ago · 0168bf7797
10 changed files with 796 additions and 59 deletions
--- a/10
+++ b/10
@ -10,6 +10,7 @@ LLVM := $(shell go env GOPATH)/src/github.com/aykevl/llvm/bindings/go/llvm/workd
 LINK = $(LLVM)llvm-link
 LLC = $(LLVM)llc
 LLAS = $(LLVM)llvm-as
+OPT = $(LLVM)opt

 CFLAGS = -Wall -Werror -Os -g -fno-exceptions -flto -ffunction-sections -fdata-sections $(LLFLAGS)

@ -72,10 +73,15 @@ build/tgo: *.go
 	@mkdir -p build
 	go build -o build/tgo -i .

-# Build textual IR with the Go compiler.
-build/%.o: src/examples/% src/examples/%/*.go build/tgo src/runtime/*.go build/runtime-$(TARGET)-combined.bc
+# Build IR with the Go compiler.
+build/%.bc: src/examples/% src/examples/%/*.go build/tgo src/runtime/*.go build/runtime-$(TARGET)-combined.bc
 	./build/tgo $(TGOFLAGS) -printir -runtime build/runtime-$(TARGET)-combined.bc -o $@ $(subst src/,,$<)

+# Compile and optimize bitcode file.
+build/%.o: build/%.bc
+	$(OPT) -coro-early -coro-split -coro-elide -O1 -coro-cleanup -o $< $<
+	$(LLC) -filetype=obj -o $@ $<
+
 # Compile C sources for the runtime.
 build/%.bc: src/runtime/%.c src/runtime/*.h
 	@mkdir -p build
--- a/analysis.go
+++ b/analysis.go
@ -0,0 +1,161 @@
+
+package main
+
+import (
+	"golang.org/x/tools/go/ssa"
+)
+
+// Analysis results over a whole program.
+type Analysis struct {
+	functions      map[*ssa.Function]*FuncMeta
+	needsScheduler bool
+	goCalls        []*ssa.Go
+}
+
+// Some analysis results of a single function.
+type FuncMeta struct {
+	f                 *ssa.Function
+	blocking          bool
+	parents           []*ssa.Function // calculated by AnalyseCallgraph
+	children          []*ssa.Function
+}
+
+// Return a new Analysis object.
+func NewAnalysis() *Analysis {
+	return &Analysis{
+		functions: make(map[*ssa.Function]*FuncMeta),
+	}
+}
+
+// Add a given package to the analyzer, to be analyzed later.
+func (a *Analysis) AddPackage(pkg *ssa.Package) {
+	for _, member := range pkg.Members {
+		switch member := member.(type) {
+		case *ssa.Function:
+			a.addFunction(member)
+		case *ssa.Type:
+			ms := pkg.Prog.MethodSets.MethodSet(member.Type())
+			for i := 0; i < ms.Len(); i++ {
+				a.addFunction(pkg.Prog.MethodValue(ms.At(i)))
+			}
+		}
+	}
+}
+
+// Analyze the given function quickly without any recursion, and add it to the
+// list of functions in the analyzer.
+func (a *Analysis) addFunction(f *ssa.Function) {
+	fm := &FuncMeta{}
+	for _, block := range f.Blocks {
+		for _, instr := range block.Instrs {
+			switch instr := instr.(type) {
+			case *ssa.Call:
+				switch call := instr.Call.Value.(type) {
+				case *ssa.Function:
+					name := getFunctionName(call, false)
+					if name == "runtime.Sleep" {
+						fm.blocking = true
+					}
+					fm.children = append(fm.children, call)
+				}
+			case *ssa.Go:
+				a.goCalls = append(a.goCalls, instr)
+			}
+		}
+	}
+	a.functions[f] = fm
+
+	for _, child := range f.AnonFuncs {
+		a.addFunction(child)
+	}
+}
+
+// Fill in parents of all functions.
+//
+// All packages need to be added before this pass can run, or it will produce
+// incorrect results.
+func (a *Analysis) AnalyseCallgraph() {
+	for f, fm := range a.functions {
+		for _, child := range fm.children {
+			childRes, ok := a.functions[child]
+			if !ok {
+				print("child not found: " + child.Pkg.Pkg.Path() + "." + child.Name() + ", function: " + f.Name())
+				continue
+			}
+			childRes.parents = append(childRes.parents, f)
+		}
+	}
+}
+
+// Analyse which functions are recursively blocking.
+//
+// Depends on AnalyseCallgraph.
+func (a *Analysis) AnalyseBlockingRecursive() {
+	worklist := make([]*FuncMeta, 0)
+
+	// Fill worklist with directly blocking functions.
+	for _, fm := range a.functions {
+		if fm.blocking {
+			worklist = append(worklist, fm)
+		}
+	}
+
+	// Keep reducing this worklist by marking a function as recursively blocking
+	// from the worklist and pushing all its parents that are non-blocking.
+	// This is somewhat similar to a worklist in a mark-sweep garbage collector.
+	// The work items are then grey objects.
+	for len(worklist) != 0 {
+		// Pick the topmost.
+		fm := worklist[len(worklist)-1]
+		worklist = worklist[:len(worklist)-1]
+		for _, parent := range fm.parents {
+			parentfm := a.functions[parent]
+			if !parentfm.blocking {
+				parentfm.blocking = true
+				worklist = append(worklist, parentfm)
+			}
+		}
+	}
+}
+
+// Check whether we need a scheduler. This is only necessary when there are go
+// calls that start blocking functions (if they're not blocking, the go function
+// can be turned into a regular function call).
+//
+// Depends on AnalyseBlockingRecursive.
+func (a *Analysis) AnalyseGoCalls() {
+	for _, instr := range a.goCalls {
+		if a.isBlocking(instr.Call.Value) {
+			a.needsScheduler = true
+		}
+	}
+}
+
+// Whether this function needs a scheduler.
+//
+// Depends on AnalyseGoCalls.
+func (a *Analysis) NeedsScheduler() bool {
+	return a.needsScheduler
+}
+
+// Whether this function blocks. Builtins are also accepted for convenience.
+// They will always be non-blocking.
+//
+// Depends on AnalyseBlockingRecursive.
+func (a *Analysis) IsBlocking(f ssa.Value) bool {
+	if !a.needsScheduler {
+		return false
+	}
+	return a.isBlocking(f)
+}
+
+func (a *Analysis) isBlocking(f ssa.Value) bool {
+	switch f := f.(type) {
+	case *ssa.Builtin:
+		return false
+	case *ssa.Function:
+		return a.functions[f].blocking
+	default:
+		panic("Analysis.IsBlocking on unknown type")
+	}
+}
--- a/src/examples/blinky/blinky.go
+++ b/src/examples/blinky/blinky.go
@ -7,15 +7,34 @@ import (
 )

 func main() {
-	led := machine.GPIO{17} // LED 1 on the PCA10040
+	go led1()
+	led2()
+}
+
+func led1() {
+	led := machine.GPIO{machine.LED}
+	led.Configure(machine.GPIOConfig{Mode: machine.GPIO_OUTPUT})
+	for {
+		println("+")
+		led.Low()
+		runtime.Sleep(runtime.Millisecond * 1000)
+
+		println("-")
+		led.High()
+		runtime.Sleep(runtime.Millisecond * 1000)
+	}
+}
+
+func led2() {
+	led := machine.GPIO{machine.LED2}
 	led.Configure(machine.GPIOConfig{Mode: machine.GPIO_OUTPUT})
 	for {
-		println("LED on")
-		led.Set(false)
-		runtime.Sleep(runtime.Millisecond * 500)
+		println("  +")
+		led.Low()
+		runtime.Sleep(runtime.Millisecond * 420)

-		println("LED off")
-		led.Set(true)
-		runtime.Sleep(runtime.Millisecond * 500)
+		println("  -")
+		led.High()
+		runtime.Sleep(runtime.Millisecond * 420)
 	}
 }
--- a/src/examples/hello/hello.go
+++ b/src/examples/hello/hello.go
@ -25,6 +25,16 @@ func main() {
 	printItf(5)
 	printItf(byte('x'))
 	printItf("foo")
+
+	runFunc(hello) // must be indirect to avoid obvious inlining
+}
+
+func runFunc(f func()) {
+	f()
+}
+
+func hello() {
+	println("hello from function pointer!")
 }

 func strlen(s string) int {
--- a/src/runtime/runtime.go
+++ b/src/runtime/runtime.go
@ -6,6 +6,13 @@ const Compiler = "tgo"
 // The bitness of the CPU (e.g. 8, 32, 64). Set by the compiler as a constant.
 var TargetBits uint8

+func Sleep(d Duration) {
+	// This function is treated specially by the compiler: when goroutines are
+	// used, it is transformed into a llvm.coro.suspend() call.
+	// When goroutines are not used this function behaves as normal.
+	sleep(d)
+}
+
 func _panic(message interface{}) {
 	printstring("panic: ")
 	printitf(message)
--- a/src/runtime/runtime.ll
+++ b/src/runtime/runtime.ll
@ -2,9 +2,26 @@ source_filename = "runtime/runtime.ll"

 declare void @runtime.initAll()
 declare void @main.main()
+declare i8* @main.main$async(i8*)
+declare void @runtime.scheduler(i8*)
+
+; Will be changed to true if there are 'go' statements in the compiled program.
+@.has_scheduler = private unnamed_addr constant i1 false

 define i32 @main() {
 	call void @runtime.initAll()
+	%has_scheduler = load i1, i1* @.has_scheduler
+	; This branch will be optimized away. Only one of the targets will remain.
+	br i1 %has_scheduler, label %with_scheduler, label %without_scheduler
+
+with_scheduler:
+	; Initialize main and run the scheduler.
+	%main = call i8* @main.main$async(i8* null)
+	call void @runtime.scheduler(i8* %main)
+	ret i32 0
+
+without_scheduler:
+	; No scheduler is necessary. Call main directly.
 	call void @main.main()
 	ret i32 0
 }
--- a/src/runtime/runtime_nrf.go
+++ b/src/runtime/runtime_nrf.go
@ -45,8 +45,32 @@ func putchar(c byte) {
 	nrf.UART0.EVENTS_TXDRDY = 0
 }

-func Sleep(d Duration) {
-	C.rtc_sleep(C.uint32_t(d / 32)) // TODO: not accurate (must be d / 30.5175...)
+func sleep(d Duration) {
+	ticks64 := d / 32
+	for ticks64 != 0 {
+		monotime() // update timestamp
+		ticks := uint32(ticks64) & 0x7fffff // 23 bits (to be on the safe side)
+		C.rtc_sleep(C.uint32_t(ticks)) // TODO: not accurate (must be d / 30.5175...)
+		ticks64 -= Duration(ticks)
+	}
+}
+
+var (
+	timestamp      uint64 // microseconds since boottime
+	rtcLastCounter uint32 // 24 bits ticks
+)
+
+// Monotonically increasing numer of microseconds since start.
+//
+// Note: very long pauses between measurements (more than 8 minutes) may
+// overflow the counter, leading to incorrect results. This might be fixed by
+// handling the overflow event.
+func monotime() uint64 {
+	rtcCounter := uint32(nrf.RTC0.COUNTER)
+	offset := (rtcCounter - rtcLastCounter) % 0xffffff // change since last measurement
+	rtcLastCounter = rtcCounter
+	timestamp += uint64(offset * 32) // TODO: not precise
+	return timestamp
 }

 func abort() {
--- a/src/runtime/runtime_unix.go
+++ b/src/runtime/runtime_unix.go
@ -10,6 +10,7 @@ import (
 // #include <stdio.h>
 // #include <stdlib.h>
 // #include <unistd.h>
+// #include <time.h>
 import "C"

 const Microsecond = 1
@ -18,10 +19,20 @@ func putchar(c byte) {
 	C.putchar(C.int(c))
 }

-func Sleep(d Duration) {
+func sleep(d Duration) {
 	C.usleep(C.useconds_t(d))
 }

+// Return monotonic time in microseconds.
+//
+// TODO: use nanoseconds?
+// TODO: noescape
+func monotime() uint64 {
+	var ts C.struct_timespec
+	C.clock_gettime(C.CLOCK_MONOTONIC, &ts)
+	return uint64(ts.tv_sec) * 1000 * 1000 + uint64(ts.tv_nsec) / 1000
+}
+
 func abort() {
 	C.abort()
 }
@ -35,5 +46,5 @@ func alloc(size uintptr) unsafe.Pointer {
 }

 func free(ptr unsafe.Pointer) {
-	C.free(ptr)
+	//C.free(ptr) // TODO
 }
--- a/src/runtime/scheduler.go
+++ b/src/runtime/scheduler.go
@ -0,0 +1,249 @@
+
+package runtime
+
+// This file implements the Go scheduler using coroutines.
+// A goroutine contains a whole stack. A coroutine is just a single function.
+// How do we use coroutines for goroutines, then?
+//   * Every function that contains a blocking call (like sleep) is marked
+//     blocking, and all it's parents (callers) are marked blocking as well
+//     transitively until the root (main.main or a go statement).
+//   * A blocking function that calls a non-blocking function is called as
+//     usual.
+//   * A blocking function that calls a blocking function passes its own
+//     coroutine handle as a parameter to the subroutine and will make sure it's
+//     own coroutine is removed from the scheduler. When the subroutine returns,
+//     it will re-insert the parent into the scheduler.
+// Note that a goroutine is generally called a 'task' for brevity and because
+// that's the more common term among RTOSes. But a goroutine and a task are
+// basically the same thing. Although, the code often uses the word 'task' to
+// refer to both a coroutine and a goroutine, as most of the scheduler isn't
+// aware of the difference.
+//
+// For more background on coroutines in LLVM:
+// https://llvm.org/docs/Coroutines.html
+
+import (
+	"unsafe"
+)
+
+// State/promise of a task. Internally represented as:
+//
+//     {i8 state, i32 data, i8* next}
+type taskState struct {
+	state uint8
+	data  uint32
+	next  taskInstance
+}
+
+// Pointer to a task. Wrap unsafe.Pointer to provide some sort of type safety.
+type taskInstance unsafe.Pointer
+
+// Various states a task can be in. Not always updated (especially
+// TASK_STATE_RUNNABLE).
+const (
+	TASK_STATE_RUNNABLE = iota
+	TASK_STATE_SLEEP
+	TASK_STATE_CALL // waiting for a sub-coroutine
+)
+
+// Queues used by the scheduler.
+//
+// TODO: runqueueFront can be removed by making the run queue a circular linked
+// list. The runqueueBack will simply refer to the front in the 'next' pointer.
+var (
+	runqueueFront      taskInstance
+	runqueueBack       taskInstance
+	sleepQueue         taskInstance
+	sleepQueueBaseTime uint64
+)
+
+// Translated to void @llvm.coro.resume(i8*).
+func _llvm_coro_resume(taskInstance)
+
+// Translated to void @llvm.coro.destroy(i8*).
+func _llvm_coro_destroy(taskInstance)
+
+// Translated to i1 @llvm.coro.done(i8*).
+func _llvm_coro_done(taskInstance) bool
+
+// Translated to i8* @llvm.coro.promise(i8*, i32, i1).
+func _llvm_coro_promise(taskInstance, int32, bool) unsafe.Pointer
+
+// Get the promise belonging to a task.
+func taskPromise(t taskInstance) *taskState {
+	return (*taskState)(_llvm_coro_promise(t, 4, false))
+}
+
+// Simple logging, for debugging.
+func scheduleLog(msg string) {
+	//println(msg)
+}
+
+// Simple logging with a task pointer, for debugging.
+func scheduleLogTask(msg string, t taskInstance) {
+	//println(msg, t)
+}
+
+// Set the task state to sleep for a given time.
+//
+// This is a compiler intrinsic.
+func sleepTask(caller taskInstance, duration Duration) {
+	promise := taskPromise(caller)
+	promise.state = TASK_STATE_SLEEP
+	promise.data = uint32(duration) // TODO: longer durations
+}
+
+// Wait for the result of an async call. This means that the parent goroutine
+// will be removed from the runqueue and be rescheduled by the callee.
+//
+// This is a compiler intrinsic.
+func waitForAsyncCall(caller taskInstance) {
+	promise := taskPromise(caller)
+	promise.state = TASK_STATE_CALL
+}
+
+// Add a task to the runnable or sleep queue, depending on the state.
+//
+// This is a compiler intrinsic.
+func scheduleTask(t taskInstance) {
+	if t == nil {
+		return
+	}
+	scheduleLogTask("  schedule task:", t)
+	// See what we should do with this task: try to execute it directly
+	// again or let it sleep for a bit.
+	promise := taskPromise(t)
+	if promise.state == TASK_STATE_CALL {
+		return // calling an async task, the subroutine will re-active the parent
+	} else if promise.state == TASK_STATE_SLEEP && promise.data != 0 {
+		addSleepTask(t)
+	} else {
+		pushTask(t)
+	}
+}
+
+// Add this task to the end of the run queue. May also destroy the task if it's
+// done.
+func pushTask(t taskInstance) {
+	if _llvm_coro_done(t) {
+		scheduleLogTask("  destroy task:", t)
+		_llvm_coro_destroy(t)
+		return
+	}
+	if runqueueBack == nil { // empty runqueue
+		runqueueBack = t
+		runqueueFront = t
+	} else {
+		lastTaskPromise := taskPromise(runqueueBack)
+		lastTaskPromise.next = t
+		runqueueBack = t
+	}
+}
+
+// Get a task from the front of the run queue. May return nil if there is none.
+func popTask() taskInstance {
+	t := runqueueFront
+	if t == nil {
+		return nil
+	}
+	scheduleLogTask("    popTask:", t)
+	promise := taskPromise(t)
+	runqueueFront = promise.next
+	if runqueueFront == nil {
+		runqueueBack = nil
+	}
+	promise.next = nil
+	return t
+}
+
+// Add this task to the sleep queue, assuming its state is set to sleeping.
+func addSleepTask(t taskInstance) {
+	now := monotime()
+	if sleepQueue == nil {
+		scheduleLog("  -> sleep new queue")
+		// Create new linked list for the sleep queue.
+		sleepQueue = t
+		sleepQueueBaseTime = now
+		return
+	}
+
+	// Make sure promise.data is relative to the queue time base.
+	promise := taskPromise(t)
+
+	// Insert at front of sleep queue.
+	if promise.data < taskPromise(sleepQueue).data {
+		scheduleLog("  -> sleep at start")
+		taskPromise(sleepQueue).data -= promise.data
+		promise.next = sleepQueue
+		sleepQueue = t
+		return
+	}
+
+	// Add to sleep queue (in the middle or at the end).
+	queueIndex := sleepQueue
+	for {
+		promise.data -= taskPromise(queueIndex).data
+		if taskPromise(queueIndex).next == nil || taskPromise(queueIndex).data > promise.data {
+			if taskPromise(queueIndex).next == nil {
+				scheduleLog("  -> sleep at end")
+				promise.next = nil
+			} else {
+				scheduleLog("  -> sleep in middle")
+				promise.next = taskPromise(queueIndex).next
+				taskPromise(promise.next).data -= promise.data
+			}
+			taskPromise(queueIndex).next = t
+			break
+		}
+		queueIndex = taskPromise(queueIndex).next
+	}
+}
+
+// Run the scheduler until all tasks have finished.
+// It takes an initial task (main.main) to bootstrap.
+func scheduler(main taskInstance) {
+	// Initial task.
+	scheduleTask(main)
+
+	// Main scheduler loop.
+	for {
+		scheduleLog("\n  schedule")
+		now := monotime()
+
+		// Add tasks that are done sleeping to the end of the runqueue so they
+		// will be executed soon.
+		if sleepQueue != nil && now - sleepQueueBaseTime >= uint64(taskPromise(sleepQueue).data) {
+			scheduleLog("  run <- sleep")
+			t := sleepQueue
+			promise := taskPromise(t)
+			sleepQueueBaseTime += uint64(promise.data)
+			sleepQueue = promise.next
+			promise.next = nil
+			pushTask(t)
+		}
+
+		scheduleLog("  <- popTask")
+		t := popTask()
+		if t == nil {
+			if sleepQueue == nil {
+				// No more tasks to execute.
+				// It would be nice if we could detect deadlocks here, because
+				// there might still be functions waiting on each other in a
+				// deadlock.
+				scheduleLog("  no tasks left!")
+				return
+			}
+			scheduleLog("  sleeping...")
+			timeLeft := uint64(taskPromise(sleepQueue).data) - (now - sleepQueueBaseTime)
+			sleep(Duration(timeLeft))
+			continue
+		}
+
+		// Run the given task.
+		scheduleLogTask("  run:", t)
+		_llvm_coro_resume(t)
+
+		// Add the just resumed task to the run queue or the sleep queue.
+		scheduleTask(t)
+	}
+}
--- a/tgo.go
+++ b/tgo.go
@ -41,19 +41,33 @@ type Compiler struct {
 	stringType      llvm.Type
 	interfaceType   llvm.Type
 	typeassertType  llvm.Type
+	taskDataType    llvm.Type
 	allocFunc       llvm.Value
 	freeFunc        llvm.Value
+	coroIdFunc      llvm.Value
+	coroSizeFunc    llvm.Value
+	coroBeginFunc   llvm.Value
+	coroSuspendFunc llvm.Value
+	coroEndFunc     llvm.Value
+	coroFreeFunc    llvm.Value
 	itfTypeNumbers  map[types.Type]uint64
 	itfTypes        []types.Type
 	initFuncs       []llvm.Value
+	analysis        *Analysis
 }

 type Frame struct {
-	llvmFn llvm.Value
-	params map[*ssa.Parameter]int   // arguments to the function
-	locals map[ssa.Value]llvm.Value // local variables
-	blocks map[*ssa.BasicBlock]llvm.BasicBlock
-	phis   []Phi
+	fn           *ssa.Function
+	llvmFn       llvm.Value
+	params       map[*ssa.Parameter]int   // arguments to the function
+	locals       map[ssa.Value]llvm.Value // local variables
+	blocks       map[*ssa.BasicBlock]llvm.BasicBlock
+	phis         []Phi
+	blocking     bool
+	taskState    llvm.Value
+	taskHandle   llvm.Value
+	cleanupBlock llvm.BasicBlock
+	suspendBlock llvm.BasicBlock
 }

 func pkgPrefix(pkg *ssa.Package) string {
@ -72,6 +86,7 @@ func NewCompiler(pkgName, triple string) (*Compiler, error) {
 	c := &Compiler{
 		triple:         triple,
 		itfTypeNumbers: make(map[types.Type]uint64),
+		analysis:       NewAnalysis(),
 	}

 	target, err := llvm.GetTargetFromTriple(triple)
@ -100,12 +115,33 @@ func NewCompiler(pkgName, triple string) (*Compiler, error) {
 	// Go typeassert result: tuple of (ptr, bool)
 	c.typeassertType = llvm.StructType([]llvm.Type{c.i8ptrType, llvm.Int1Type()}, false)

+	// Goroutine / task data: {i8 state, i32 data, i8* next}
+	c.taskDataType = llvm.StructType([]llvm.Type{llvm.Int8Type(), llvm.Int32Type(), c.i8ptrType}, false)
+
 	allocType := llvm.FunctionType(c.i8ptrType, []llvm.Type{c.uintptrType}, false)
 	c.allocFunc = llvm.AddFunction(c.mod, "runtime.alloc", allocType)

 	freeType := llvm.FunctionType(llvm.VoidType(), []llvm.Type{c.i8ptrType}, false)
 	c.freeFunc = llvm.AddFunction(c.mod, "runtime.free", freeType)

+	coroIdType := llvm.FunctionType(c.ctx.TokenType(), []llvm.Type{llvm.Int32Type(), c.i8ptrType, c.i8ptrType, c.i8ptrType}, false)
+	c.coroIdFunc = llvm.AddFunction(c.mod, "llvm.coro.id", coroIdType)
+
+	coroSizeType := llvm.FunctionType(llvm.Int32Type(), nil, false)
+	c.coroSizeFunc = llvm.AddFunction(c.mod, "llvm.coro.size.i32", coroSizeType)
+
+	coroBeginType := llvm.FunctionType(c.i8ptrType, []llvm.Type{c.ctx.TokenType(), c.i8ptrType}, false)
+	c.coroBeginFunc = llvm.AddFunction(c.mod, "llvm.coro.begin", coroBeginType)
+
+	coroSuspendType := llvm.FunctionType(llvm.Int8Type(), []llvm.Type{c.ctx.TokenType(), llvm.Int1Type()}, false)
+	c.coroSuspendFunc = llvm.AddFunction(c.mod, "llvm.coro.suspend", coroSuspendType)
+
+	coroEndType := llvm.FunctionType(llvm.Int1Type(), []llvm.Type{c.i8ptrType, llvm.Int1Type()}, false)
+	c.coroEndFunc = llvm.AddFunction(c.mod, "llvm.coro.end", coroEndType)
+
+	coroFreeType := llvm.FunctionType(c.i8ptrType, []llvm.Type{c.ctx.TokenType(), c.i8ptrType}, false)
+	c.coroFreeFunc = llvm.AddFunction(c.mod, "llvm.coro.free", coroFreeType)
+
 	return c, nil
 }

@ -188,6 +224,13 @@ func (c *Compiler) Parse(mainPath string, buildTags []string) error {
 		}
 	}

+	for _, pkg := range packageList {
+		c.analysis.AddPackage(pkg)
+	}
+	c.analysis.AnalyseCallgraph()         // set up callgraph
+	c.analysis.AnalyseBlockingRecursive() // make all parents of blocking calls blocking (transitively)
+	c.analysis.AnalyseGoCalls()           // check whether we need a scheduler
+
 	// Transform each package into LLVM IR.
 	for _, pkg := range packageList {
 		err := c.parsePackage(program, pkg)
@ -214,7 +257,19 @@ func (c *Compiler) Parse(mainPath string, buildTags []string) error {
 	// Set functions referenced in runtime.ll to internal linkage, to improve
 	// optimization (hopefully).
 	main := c.mod.NamedFunction("main.main")
-	main.SetLinkage(llvm.PrivateLinkage)
+	if !main.IsDeclaration() {
+		main.SetLinkage(llvm.PrivateLinkage)
+	}
+	mainAsync := c.mod.NamedFunction("main.main$async")
+	if !mainAsync.IsDeclaration() {
+		mainAsync.SetLinkage(llvm.PrivateLinkage)
+	}
+	c.mod.NamedFunction("runtime.scheduler").SetLinkage(llvm.PrivateLinkage)
+
+	if c.analysis.NeedsScheduler() {
+		// Enable the scheduler.
+		c.mod.NamedGlobal(".has_scheduler").SetInitializer(llvm.ConstInt(llvm.Int1Type(), 1, false))
+	}

 	return nil
 }
@ -260,6 +315,32 @@ func (c *Compiler) getLLVMType(goType types.Type) (llvm.Type, error) {
 			return llvm.Type{}, err
 		}
 		return llvm.PointerType(ptrTo, 0), nil
+	case *types.Signature: // function pointer
+		// return value
+		var err error
+		var returnType llvm.Type
+		if typ.Results().Len() == 0 {
+			returnType = llvm.VoidType()
+		} else if typ.Results().Len() == 1 {
+			returnType, err = c.getLLVMType(typ.Results().At(0).Type())
+			if err != nil {
+				return llvm.Type{}, err
+			}
+		} else {
+			return llvm.Type{}, errors.New("todo: multiple return values in function pointer")
+		}
+		// param values
+		var paramTypes []llvm.Type
+		params := typ.Params()
+		for i := 0; i < params.Len(); i++ {
+			subType, err := c.getLLVMType(params.At(i).Type())
+			if err != nil {
+				return llvm.Type{}, err
+			}
+			paramTypes = append(paramTypes, subType)
+		}
+		// make a function pointer of it
+		return llvm.PointerType(llvm.FunctionType(returnType, paramTypes, false), 0), nil
 	case *types.Struct:
 		members := make([]llvm.Type, typ.NumFields())
 		for i := 0; i < typ.NumFields(); i++ {
@ -327,18 +408,22 @@ func (c *Compiler) isPointer(typ types.Type) bool {
 	}
 }

-func getFunctionName(fn *ssa.Function) string {
+func getFunctionName(fn *ssa.Function, blocking bool) string {
+	suffix := ""
+	if blocking {
+		suffix = "$async"
+	}
 	if fn.Signature.Recv() != nil {
 		// Method on a defined type.
 		typeName := fn.Params[0].Type().(*types.Named).Obj().Name()
-		return pkgPrefix(fn.Pkg) + "." + typeName + "." + fn.Name()
+		return pkgPrefix(fn.Pkg) + "." + typeName + "." + fn.Name() + suffix
 	} else {
 		// Bare function.
 		if strings.HasPrefix(fn.Name(), "_Cfunc_") {
 			// Name CGo functions directly.
 			return fn.Name()[len("_Cfunc_"):]
 		} else {
-			name := pkgPrefix(fn.Pkg) + "." + fn.Name()
+			name := pkgPrefix(fn.Pkg) + "." + fn.Name() + suffix
 			if fn.Pkg.Pkg.Path() == "runtime" && strings.HasPrefix(fn.Name(), "_llvm_") {
 				// Special case for LLVM intrinsics in the runtime.
 				name = "llvm." + strings.Replace(fn.Name()[len("_llvm_"):], "_", ".", -1)
@ -488,13 +573,20 @@ func (c *Compiler) parseFuncDecl(f *ssa.Function) (*Frame, error) {
 	f.WriteTo(os.Stdout)

 	frame := &Frame{
-		params: make(map[*ssa.Parameter]int),
-		locals: make(map[ssa.Value]llvm.Value),
-		blocks: make(map[*ssa.BasicBlock]llvm.BasicBlock),
+		fn:       f,
+		params:   make(map[*ssa.Parameter]int),
+		locals:   make(map[ssa.Value]llvm.Value),
+		blocks:   make(map[*ssa.BasicBlock]llvm.BasicBlock),
+		blocking: c.analysis.IsBlocking(f),
 	}

 	var retType llvm.Type
-	if f.Signature.Results() == nil {
+	if frame.blocking {
+		if f.Signature.Results() != nil {
+			return nil, errors.New("todo: return values in blocking function")
+		}
+		retType = c.i8ptrType
+	} else if f.Signature.Results() == nil {
 		retType = llvm.VoidType()
 	} else if f.Signature.Results().Len() == 1 {
 		var err error
@ -507,6 +599,9 @@ func (c *Compiler) parseFuncDecl(f *ssa.Function) (*Frame, error) {
 	}

 	var paramTypes []llvm.Type
+	if frame.blocking {
+		paramTypes = append(paramTypes, c.i8ptrType) // parent coroutine
+	}
 	for i, param := range f.Params {
 		paramType, err := c.getLLVMType(param.Type())
 		if err != nil {
@ -518,7 +613,7 @@ func (c *Compiler) parseFuncDecl(f *ssa.Function) (*Frame, error) {

 	fnType := llvm.FunctionType(retType, paramTypes, false)

-	name := getFunctionName(f)
+	name := getFunctionName(f, frame.blocking)
 	frame.llvmFn = c.mod.NamedFunction(name)
 	if frame.llvmFn.IsNil() {
 		frame.llvmFn = llvm.AddFunction(c.mod, name, fnType)
@ -624,6 +719,10 @@ func (c *Compiler) parseFunc(frame *Frame, f *ssa.Function) error {
 		llvmBlock := c.ctx.AddBasicBlock(frame.llvmFn, block.Comment)
 		frame.blocks[block] = llvmBlock
 	}
+	if frame.blocking {
+		frame.cleanupBlock = c.ctx.AddBasicBlock(frame.llvmFn, "task.cleanup")
+		frame.suspendBlock = c.ctx.AddBasicBlock(frame.llvmFn, "task.suspend")
+	}

 	// Load function parameters
 	for _, param := range f.Params {
@ -631,7 +730,41 @@ func (c *Compiler) parseFunc(frame *Frame, f *ssa.Function) error {
 		frame.locals[param] = llvmParam
 	}

-	// Fill those blocks with instructions.
+	if frame.blocking {
+		// Coroutine initialization.
+		c.builder.SetInsertPointAtEnd(frame.blocks[f.Blocks[0]])
+		frame.taskState = c.builder.CreateAlloca(c.taskDataType, "task.state")
+		stateI8 := c.builder.CreateBitCast(frame.taskState, c.i8ptrType, "task.state.i8")
+		id := c.builder.CreateCall(c.coroIdFunc, []llvm.Value{
+			llvm.ConstInt(llvm.Int32Type(), 0, false),
+			stateI8,
+			llvm.ConstNull(c.i8ptrType),
+			llvm.ConstNull(c.i8ptrType),
+		}, "task.token")
+		size := c.builder.CreateCall(c.coroSizeFunc, nil, "task.size")
+		if c.targetData.TypeAllocSize(size.Type()) > c.targetData.TypeAllocSize(c.uintptrType) {
+			size = c.builder.CreateTrunc(size, c.uintptrType, "task.size.uintptr")
+		} else if c.targetData.TypeAllocSize(size.Type()) < c.targetData.TypeAllocSize(c.uintptrType) {
+			size = c.builder.CreateZExt(size, c.uintptrType, "task.size.uintptr")
+		}
+		data := c.builder.CreateCall(c.allocFunc, []llvm.Value{size}, "task.data")
+		frame.taskHandle = c.builder.CreateCall(c.coroBeginFunc, []llvm.Value{id, data}, "task.handle")
+
+		// Coroutine cleanup. Free resources associated with this coroutine.
+		c.builder.SetInsertPointAtEnd(frame.cleanupBlock)
+		mem := c.builder.CreateCall(c.coroFreeFunc, []llvm.Value{id, frame.taskHandle}, "task.data.free")
+		c.builder.CreateCall(c.freeFunc, []llvm.Value{mem}, "")
+		// re-insert parent coroutine
+		c.builder.CreateCall(c.mod.NamedFunction("runtime.scheduleTask"), []llvm.Value{frame.llvmFn.FirstParam()}, "")
+		c.builder.CreateBr(frame.suspendBlock)
+
+		// Coroutine suspend. A call to llvm.coro.suspend() will branch here.
+		c.builder.SetInsertPointAtEnd(frame.suspendBlock)
+		c.builder.CreateCall(c.coroEndFunc, []llvm.Value{frame.taskHandle, llvm.ConstInt(llvm.Int1Type(), 0, false)}, "unused")
+		c.builder.CreateRet(frame.taskHandle)
+	}
+
+	// Fill blocks with instructions.
 	for _, block := range f.DomPreorder() {
 		c.builder.SetInsertPointAtEnd(frame.blocks[block])
 		for _, instr := range block.Instrs {
@ -664,6 +797,27 @@ func (c *Compiler) parseInstr(frame *Frame, instr ssa.Instruction) error {
 		value, err := c.parseExpr(frame, instr)
 		frame.locals[instr] = value
 		return err
+	case *ssa.Go:
+		if instr.Common().Method != nil {
+			return errors.New("todo: go on method receiver")
+		}
+
+		// Execute non-blocking calls (including builtins) directly.
+		// parentHandle param is ignored.
+		if !c.analysis.IsBlocking(instr.Common().Value) {
+			_, err := c.parseCall(frame, instr.Common(), llvm.Value{})
+			return err // probably nil
+		}
+
+		// Start this goroutine.
+		// parentHandle is nil, as the goroutine has no parent frame (it's a new
+		// stack).
+		handle, err := c.parseCall(frame, instr.Common(), llvm.Value{})
+		if err != nil {
+			return err
+		}
+		c.builder.CreateCall(c.mod.NamedFunction("runtime.scheduleTask"), []llvm.Value{handle}, "")
+		return nil
 	case *ssa.If:
 		cond, err := c.parseExpr(frame, instr.Cond)
 		if err != nil {
@ -687,18 +841,32 @@ func (c *Compiler) parseInstr(frame *Frame, instr ssa.Instruction) error {
 		c.builder.CreateUnreachable()
 		return nil
 	case *ssa.Return:
-		if len(instr.Results) == 0 {
-			c.builder.CreateRetVoid()
-			return nil
-		} else if len(instr.Results) == 1 {
-			val, err := c.parseExpr(frame, instr.Results[0])
-			if err != nil {
-				return err
+		if frame.blocking {
+			if len(instr.Results) != 0 {
+				return errors.New("todo: return values from blocking function")
 			}
-			c.builder.CreateRet(val)
+			// Final suspend.
+			continuePoint := c.builder.CreateCall(c.coroSuspendFunc, []llvm.Value{
+				llvm.ConstNull(c.ctx.TokenType()),
+				llvm.ConstInt(llvm.Int1Type(), 1, false), // final=true
+			}, "")
+			sw := c.builder.CreateSwitch(continuePoint, frame.suspendBlock, 2)
+			sw.AddCase(llvm.ConstInt(llvm.Int8Type(), 1, false), frame.cleanupBlock)
 			return nil
 		} else {
-			return errors.New("todo: return value")
+			if len(instr.Results) == 0 {
+				c.builder.CreateRetVoid()
+				return nil
+			} else if len(instr.Results) == 1 {
+				val, err := c.parseExpr(frame, instr.Results[0])
+				if err != nil {
+					return err
+				}
+				c.builder.CreateRet(val)
+				return nil
+			} else {
+				return errors.New("todo: return value")
+			}
 		}
 	case *ssa.Store:
 		llvmAddr, err := c.parseExpr(frame, instr.Addr)
@ -797,16 +965,17 @@ func (c *Compiler) parseBuiltin(frame *Frame, args []ssa.Value, callName string)
 	}
 }

-func (c *Compiler) parseFunctionCall(frame *Frame, call *ssa.CallCommon, fn *ssa.Function) (llvm.Value, error) {
-	fmt.Printf("    function: %s\n", fn)
-
-	name := getFunctionName(fn)
-	target := c.mod.NamedFunction(name)
-	if target.IsNil() {
-		return llvm.Value{}, errors.New("undefined function: " + name)
-	}
-
+func (c *Compiler) parseFunctionCall(frame *Frame, call *ssa.CallCommon, llvmFn llvm.Value, blocking bool, parentHandle llvm.Value) (llvm.Value, error) {
 	var params []llvm.Value
+	if blocking {
+		if parentHandle.IsNil() {
+			// Started from 'go' statement.
+			params = append(params, llvm.ConstNull(c.i8ptrType))
+		} else {
+			// Blocking function calls another blocking function.
+			params = append(params, parentHandle)
+		}
+	}
 	for _, param := range call.Args {
 		val, err := c.parseExpr(frame, param)
 		if err != nil {
@ -815,19 +984,75 @@ func (c *Compiler) parseFunctionCall(frame *Frame, call *ssa.CallCommon, fn *ssa
 		params = append(params, val)
 	}

-	return c.builder.CreateCall(target, params, ""), nil
-}
+	if frame.blocking && llvmFn.Name() == "runtime.Sleep" {
+		// Set task state to TASK_STATE_SLEEP and set the duration.
+		c.builder.CreateCall(c.mod.NamedFunction("runtime.sleepTask"), []llvm.Value{frame.taskHandle, params[0]}, "")
+
+		// Yield to scheduler.
+		continuePoint := c.builder.CreateCall(c.coroSuspendFunc, []llvm.Value{
+			llvm.ConstNull(c.ctx.TokenType()),
+			llvm.ConstInt(llvm.Int1Type(), 0, false),
+		}, "")
+		wakeup := c.ctx.InsertBasicBlock(llvm.NextBasicBlock(c.builder.GetInsertBlock()), "task.wakeup")
+		sw := c.builder.CreateSwitch(continuePoint, frame.suspendBlock, 2)
+		sw.AddCase(llvm.ConstInt(llvm.Int8Type(), 0, false), wakeup)
+		sw.AddCase(llvm.ConstInt(llvm.Int8Type(), 1, false), frame.cleanupBlock)
+		c.builder.SetInsertPointAtEnd(wakeup)
+
+		return llvm.Value{}, nil
+	}

-func (c *Compiler) parseCall(frame *Frame, instr *ssa.Call) (llvm.Value, error) {
-	fmt.Printf("    call: %s\n", instr)
+	result := c.builder.CreateCall(llvmFn, params, "")
+	if blocking && !parentHandle.IsNil() {
+		// Calling a blocking function as a regular function call.
+		// This is done by passing the current coroutine as a parameter to the
+		// new coroutine and dropping the current coroutine from the scheduler
+		// (with the TASK_STATE_CALL state). When the subroutine is finished, it
+		// will reactivate the parent (this frame) in it's destroy function.
+
+		c.builder.CreateCall(c.mod.NamedFunction("runtime.scheduleTask"), []llvm.Value{result}, "")
+
+		// Set task state to TASK_STATE_CALL.
+		c.builder.CreateCall(c.mod.NamedFunction("runtime.waitForAsyncCall"), []llvm.Value{frame.taskHandle}, "")
+
+		// Yield to the scheduler.
+		continuePoint := c.builder.CreateCall(c.coroSuspendFunc, []llvm.Value{
+			llvm.ConstNull(c.ctx.TokenType()),
+			llvm.ConstInt(llvm.Int1Type(), 0, false),
+		}, "")
+		resume := c.ctx.InsertBasicBlock(llvm.NextBasicBlock(c.builder.GetInsertBlock()), "task.callComplete")
+		sw := c.builder.CreateSwitch(continuePoint, frame.suspendBlock, 2)
+		sw.AddCase(llvm.ConstInt(llvm.Int8Type(), 0, false), resume)
+		sw.AddCase(llvm.ConstInt(llvm.Int8Type(), 1, false), frame.cleanupBlock)
+		c.builder.SetInsertPointAtEnd(resume)
+	}
+	return result, nil
+}

-	switch call := instr.Common().Value.(type) {
+func (c *Compiler) parseCall(frame *Frame, instr *ssa.CallCommon, parentHandle llvm.Value) (llvm.Value, error) {
+	switch call := instr.Value.(type) {
 	case *ssa.Builtin:
-		return c.parseBuiltin(frame, instr.Common().Args, call.Name())
+		return c.parseBuiltin(frame, instr.Args, call.Name())
 	case *ssa.Function:
-		return c.parseFunctionCall(frame, instr.Common(), call)
-	default:
-		return llvm.Value{}, errors.New("todo: unknown call type: " + fmt.Sprintf("%#v", call))
+		targetBlocks := false
+		name := getFunctionName(call, targetBlocks)
+		llvmFn := c.mod.NamedFunction(name)
+		if llvmFn.IsNil() {
+			targetBlocks = true
+			nameAsync := getFunctionName(call, targetBlocks)
+			llvmFn = c.mod.NamedFunction(nameAsync)
+			if llvmFn.IsNil() {
+				return llvm.Value{}, errors.New("undefined function: " + name)
+			}
+		}
+		return c.parseFunctionCall(frame, instr, llvmFn, targetBlocks, parentHandle)
+	default: // function pointer
+		value, err := c.parseExpr(frame, instr.Value)
+		if err != nil {
+			return llvm.Value{}, err
+		}
+		// TODO: blocking function pointers (needs analysis)
+		return c.parseFunctionCall(frame, instr, value, false, parentHandle)
 	}
 }

@ -866,7 +1091,9 @@ func (c *Compiler) parseExpr(frame *Frame, expr ssa.Value) (llvm.Value, error) {
 	case *ssa.BinOp:
 		return c.parseBinOp(frame, expr)
 	case *ssa.Call:
-		return c.parseCall(frame, expr)
+		// Passing the current task here to the subroutine. It is only used when
+		// the subroutine is blocking.
+		return c.parseCall(frame, expr.Common(), frame.taskHandle)
 	case *ssa.ChangeType:
 		return c.parseConvert(frame, expr.Type(), expr.X)
 	case *ssa.Const:
@ -890,6 +1117,8 @@ func (c *Compiler) parseExpr(frame *Frame, expr ssa.Value) (llvm.Value, error) {
 			llvm.ConstInt(llvm.Int32Type(), uint64(expr.Field), false),
 		}
 		return c.builder.CreateGEP(val, indices, ""), nil
+	case *ssa.Function:
+		return c.mod.NamedFunction(getFunctionName(expr, false)), nil
 	case *ssa.Global:
 		fullName := getGlobalName(expr)
 		value := c.mod.NamedGlobal(fullName)
@ -1274,11 +1503,15 @@ func (c *Compiler) LinkModule(mod llvm.Module) error {

 func (c *Compiler) ApplyFunctionSections() {
 	// Put every function in a separate section. This makes it possible for the
-	// linker to remove dead code (--gc-sections).
+	// linker to remove dead code (-ffunction-sections).
 	llvmFn := c.mod.FirstFunction()
 	for !llvmFn.IsNil() {
 		if !llvmFn.IsDeclaration() {
-			llvmFn.SetSection(".text." + llvmFn.Name())
+			name := llvmFn.Name()
+			if strings.HasSuffix(name, "$async") {
+				name = name[:len(name)-len("$async")]
+			}
+			llvmFn.SetSection(".text." + name)
 		}
 		llvmFn = llvm.NextFunction(llvmFn)
 	}
@ -1367,7 +1600,7 @@ func Compile(pkgName, runtimePath, outpath, target string, printIR bool) error {
 	if err := c.Verify(); err != nil {
 		return err
 	}
-	c.Optimize(2, 1) // -O2 -Os
+	//c.Optimize(2, 1) // -O2 -Os
 	if err := c.Verify(); err != nil {
 		return err
 	}