all: refactor heap-to-stack transform into the transform package

Also add unit tests. This is the first of several transformation (optimization/lowering) passes that I'd like to move to the new transform package. This separates the compiler from the optimizer. Also, it finally adds unit tests for the compiler, not just end-to-end compilation tests. This should improve robustness and should make it easier to change these transformation passes in the future. While the heap-to-stack transform is relatively simple, other passes are much more complex. Adding unit tests not only helps robustness over time, but also doubles as documentation as to what these transformation passes do exactly.
5 years ago · d905476231
13 changed files with 406 additions and 100 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -65,7 +65,7 @@ commands:
            - go-cache-v2-{{ checksum "go.mod" }}
      - llvm-source-linux
      - run: go install .
-      - run: go test -v
+      - run: go test -v ./transform .
      - run: make gen-device -j4
      - run: make smoketest RISCV=0
      - save_cache:
--- a/2
+++ b/2
@ -86,7 +86,7 @@ build/tinygo:
 	CGO_CPPFLAGS="$(CGO_CPPFLAGS)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" go build -o build/tinygo -tags byollvm .

 test:
-	CGO_CPPFLAGS="$(CGO_CPPFLAGS)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" go test -v -tags byollvm .
+	CGO_CPPFLAGS="$(CGO_CPPFLAGS)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" go test -v -tags byollvm ./transform .

 tinygo-test:
 	cd tests/tinygotest && tinygo test
--- a/compiler/optimizer.go
+++ b/compiler/optimizer.go
@ -3,6 +3,7 @@ package compiler
 import (
 	"errors"

+	"github.com/tinygo-org/tinygo/transform"
 	"tinygo.org/x/go-llvm"
 )

@ -45,7 +46,7 @@ func (c *Compiler) Optimize(optLevel, sizeLevel int, inlinerThreshold uint) erro
 		// Run Go-specific optimization passes.
 		c.OptimizeMaps()
 		c.OptimizeStringToBytes()
-		c.OptimizeAllocs()
+		transform.OptimizeAllocs(c.mod)
 		c.LowerInterfaces()
 		c.LowerFuncValues()

@ -55,7 +56,7 @@ func (c *Compiler) Optimize(optLevel, sizeLevel int, inlinerThreshold uint) erro
 		goPasses.Run(c.mod)

 		// Run TinyGo-specific interprocedural optimizations.
-		c.OptimizeAllocs()
+		transform.OptimizeAllocs(c.mod)
 		c.OptimizeStringToBytes()

 		// Lower runtime.isnil calls to regular nil comparisons.
@ -245,101 +246,6 @@ func (c *Compiler) OptimizeStringToBytes() {
 	}
 }

-// Basic escape analysis: translate runtime.alloc calls into alloca
-// instructions.
-func (c *Compiler) OptimizeAllocs() {
-	allocator := c.mod.NamedFunction("runtime.alloc")
-	if allocator.IsNil() {
-		// nothing to optimize
-		return
-	}
-
-	heapallocs := getUses(allocator)
-	for _, heapalloc := range heapallocs {
-		nilValue := llvm.Value{}
-		if heapalloc.Operand(0).IsAConstant() == nilValue {
-			// Do not allocate variable length arrays on the stack.
-			continue
-		}
-		size := heapalloc.Operand(0).ZExtValue()
-		if size > 256 {
-			// The maximum value for a stack allocation.
-			// TODO: tune this, this is just a random value.
-			continue
-		}
-
-		// In general the pattern is:
-		//     %0 = call i8* @runtime.alloc(i32 %size)
-		//     %1 = bitcast i8* %0 to type*
-		//     (use %1 only)
-		// But the bitcast might sometimes be dropped when allocating an *i8.
-		// The 'bitcast' variable below is thus usually a bitcast of the
-		// heapalloc but not always.
-		bitcast := heapalloc // instruction that creates the value
-		if uses := getUses(heapalloc); len(uses) == 1 && uses[0].IsABitCastInst() != nilValue {
-			// getting only bitcast use
-			bitcast = uses[0]
-		}
-		if !c.doesEscape(bitcast) {
-			// Insert alloca in the entry block. Do it here so that mem2reg can
-			// promote it to a SSA value.
-			fn := bitcast.InstructionParent().Parent()
-			c.builder.SetInsertPointBefore(fn.EntryBasicBlock().FirstInstruction())
-			alignment := c.targetData.ABITypeAlignment(c.i8ptrType)
-			sizeInWords := (size + uint64(alignment) - 1) / uint64(alignment)
-			allocaType := llvm.ArrayType(c.ctx.IntType(alignment*8), int(sizeInWords))
-			alloca := c.builder.CreateAlloca(allocaType, "stackalloc.alloca")
-			zero := llvm.ConstNull(alloca.Type().ElementType())
-			c.builder.CreateStore(zero, alloca)
-			stackalloc := c.builder.CreateBitCast(alloca, bitcast.Type(), "stackalloc")
-			bitcast.ReplaceAllUsesWith(stackalloc)
-			if heapalloc != bitcast {
-				bitcast.EraseFromParentAsInstruction()
-			}
-			heapalloc.EraseFromParentAsInstruction()
-		}
-	}
-}
-
-// Very basic escape analysis.
-func (c *Compiler) doesEscape(value llvm.Value) bool {
-	uses := getUses(value)
-	for _, use := range uses {
-		nilValue := llvm.Value{}
-		if use.IsAGetElementPtrInst() != nilValue {
-			if c.doesEscape(use) {
-				return true
-			}
-		} else if use.IsABitCastInst() != nilValue {
-			// A bitcast escapes if the casted-to value escapes.
-			if c.doesEscape(use) {
-				return true
-			}
-		} else if use.IsALoadInst() != nilValue {
-			// Load does not escape.
-		} else if use.IsAStoreInst() != nilValue {
-			// Store only escapes when the value is stored to, not when the
-			// value is stored into another value.
-			if use.Operand(0) == value {
-				return true
-			}
-		} else if use.IsACallInst() != nilValue {
-			if !c.hasFlag(use, value, "nocapture") {
-				return true
-			}
-		} else if use.IsAICmpInst() != nilValue {
-			// Comparing pointers don't let the pointer escape.
-			// This is often a compiler-inserted nil check.
-		} else {
-			// Unknown instruction, might escape.
-			return true
-		}
-	}
-
-	// does not escape
-	return false
-}
-
 // Check whether the given value (which is of pointer type) is never stored to.
 func (c *Compiler) isReadOnly(value llvm.Value) bool {
 	uses := getUses(value)
--- a/go.mod
+++ b/go.mod
@ -6,5 +6,5 @@ require (
 	github.com/blakesmith/ar v0.0.0-20150311145944-8bd4349a67f2
 	github.com/marcinbor85/gohex v0.0.0-20180128172054-7a43cd876e46
 	golang.org/x/tools v0.0.0-20190227180812-8dcc6e70cdef
-	tinygo.org/x/go-llvm v0.0.0-20190224120431-7707ae5d1261
+	tinygo.org/x/go-llvm v0.0.0-20190818154551-95bc4ffe1add
 )
--- a/go.sum
+++ b/go.sum
@ -12,3 +12,5 @@ golang.org/x/tools v0.0.0-20190227180812-8dcc6e70cdef/go.mod h1:9Yl7xja0Znq3iFh3
 google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 tinygo.org/x/go-llvm v0.0.0-20190224120431-7707ae5d1261 h1:rJS2Hga39YAnm7DE4qrPm6Dr/67EOojL0XPzvbEeBiw=
 tinygo.org/x/go-llvm v0.0.0-20190224120431-7707ae5d1261/go.mod h1:fv1F0BSNpxMfCL0zF3M4OPFbgYHnhtB6ST0HvUtu/LE=
+tinygo.org/x/go-llvm v0.0.0-20190818154551-95bc4ffe1add h1:dFjMH1sLhYADg8UQm7DB56B7e+TfvAmWmEZLhyv3r/w=
+tinygo.org/x/go-llvm v0.0.0-20190818154551-95bc4ffe1add/go.mod h1:fv1F0BSNpxMfCL0zF3M4OPFbgYHnhtB6ST0HvUtu/LE=
--- a/transform/allocs.go
+++ b/transform/allocs.go
@ -0,0 +1,124 @@
+package transform
+
+// This file implements an escape analysis pass. It looks for calls to
+// runtime.alloc and replaces these calls with a stack allocation if the
+// allocated value does not escape. It uses the LLVM nocapture flag for
+// interprocedural escape analysis.
+
+import (
+	"tinygo.org/x/go-llvm"
+)
+
+// maxStackAlloc is the maximum size of an object that will be allocated on the
+// stack. Bigger objects have increased risk of stack overflows and thus will
+// always be heap allocated.
+//
+// TODO: tune this, this is just a random value.
+const maxStackAlloc = 256
+
+// OptimizeAllocs tries to replace heap allocations with stack allocations
+// whenever possible. It relies on the LLVM 'nocapture' flag for interprocedural
+// escape analysis, and within a function looks whether an allocation can escape
+// to the heap.
+func OptimizeAllocs(mod llvm.Module) {
+	allocator := mod.NamedFunction("runtime.alloc")
+	if allocator.IsNil() {
+		// nothing to optimize
+		return
+	}
+
+	targetData := llvm.NewTargetData(mod.DataLayout())
+	i8ptrType := llvm.PointerType(mod.Context().Int8Type(), 0)
+	builder := mod.Context().NewBuilder()
+
+	for _, heapalloc := range getUses(allocator) {
+		if heapalloc.Operand(0).IsAConstant().IsNil() {
+			// Do not allocate variable length arrays on the stack.
+			continue
+		}
+
+		size := heapalloc.Operand(0).ZExtValue()
+		if size > maxStackAlloc {
+			// The maximum size for a stack allocation.
+			continue
+		}
+
+		// In general the pattern is:
+		//     %0 = call i8* @runtime.alloc(i32 %size)
+		//     %1 = bitcast i8* %0 to type*
+		//     (use %1 only)
+		// But the bitcast might sometimes be dropped when allocating an *i8.
+		// The 'bitcast' variable below is thus usually a bitcast of the
+		// heapalloc but not always.
+		bitcast := heapalloc // instruction that creates the value
+		if uses := getUses(heapalloc); len(uses) == 1 && !uses[0].IsABitCastInst().IsNil() {
+			// getting only bitcast use
+			bitcast = uses[0]
+		}
+
+		if mayEscape(bitcast) {
+			continue
+		}
+		// The pointer value does not escape.
+
+		// Insert alloca in the entry block. Do it here so that mem2reg can
+		// promote it to a SSA value.
+		fn := bitcast.InstructionParent().Parent()
+		builder.SetInsertPointBefore(fn.EntryBasicBlock().FirstInstruction())
+		alignment := targetData.ABITypeAlignment(i8ptrType)
+		sizeInWords := (size + uint64(alignment) - 1) / uint64(alignment)
+		allocaType := llvm.ArrayType(mod.Context().IntType(alignment*8), int(sizeInWords))
+		alloca := builder.CreateAlloca(allocaType, "stackalloc.alloca")
+		zero := llvm.ConstNull(alloca.Type().ElementType())
+		builder.CreateStore(zero, alloca)
+		stackalloc := builder.CreateBitCast(alloca, bitcast.Type(), "stackalloc")
+		bitcast.ReplaceAllUsesWith(stackalloc)
+		if heapalloc != bitcast {
+			bitcast.EraseFromParentAsInstruction()
+		}
+		heapalloc.EraseFromParentAsInstruction()
+	}
+}
+
+// mayEscape returns whether the value might escape. It returns true if it might
+// escape, and false if it definitely doesn't. The value must be an instruction.
+func mayEscape(value llvm.Value) bool {
+	uses := getUses(value)
+	for _, use := range uses {
+		if use.IsAInstruction().IsNil() {
+			panic("expected instruction use")
+		}
+		switch use.InstructionOpcode() {
+		case llvm.GetElementPtr:
+			if mayEscape(use) {
+				return true
+			}
+		case llvm.BitCast:
+			// A bitcast escapes if the casted-to value escapes.
+			if mayEscape(use) {
+				return true
+			}
+		case llvm.Load:
+			// Load does not escape.
+		case llvm.Store:
+			// Store only escapes when the value is stored to, not when the
+			// value is stored into another value.
+			if use.Operand(0) == value {
+				return true
+			}
+		case llvm.Call:
+			if !hasFlag(use, value, "nocapture") {
+				return true
+			}
+		case llvm.ICmp:
+			// Comparing pointers don't let the pointer escape.
+			// This is often a compiler-inserted nil check.
+		default:
+			// Unknown instruction, might escape.
+			return true
+		}
+	}
+
+	// Checked all uses, and none let the pointer value escape.
+	return false
+}
--- a/transform/allocs_test.go
+++ b/transform/allocs_test.go
@ -0,0 +1,10 @@
+package transform
+
+import (
+	"testing"
+)
+
+func TestAllocs(t *testing.T) {
+	t.Parallel()
+	testTransform(t, "testdata/allocs", OptimizeAllocs)
+}
--- a/transform/llvm.go
+++ b/transform/llvm.go
@ -0,0 +1,17 @@
+package transform
+
+import (
+	"tinygo.org/x/go-llvm"
+)
+
+// Return a list of values (actually, instructions) where this value is used as
+// an operand.
+func getUses(value llvm.Value) []llvm.Value {
+	var uses []llvm.Value
+	use := value.FirstUse()
+	for !use.IsNil() {
+		uses = append(uses, use.User())
+		use = use.NextUse()
+	}
+	return uses
+}
--- a/transform/testdata/allocs.ll
+++ b/transform/testdata/allocs.ll
@ -0,0 +1,61 @@
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7m-none-eabi"
+
+declare nonnull i8* @runtime.alloc(i32)
+
+; Test allocating a single int (i32) that should be allocated on the stack.
+define void @testInt() {
+  %1 = call i8* @runtime.alloc(i32 4)
+  %2 = bitcast i8* %1 to i32*
+  store i32 5, i32* %2
+  ret void
+}
+
+; Test allocating an array of 3 i16 values that should be allocated on the
+; stack.
+define i16 @testArray() {
+  %1 = call i8* @runtime.alloc(i32 6)
+  %2 = bitcast i8* %1 to i16*
+  %3 = getelementptr i16, i16* %2, i32 1
+  store i16 5, i16* %3
+  %4 = getelementptr i16, i16* %2, i32 2
+  %5 = load i16, i16* %4
+  ret i16 %5
+}
+
+; Call a function that will let the pointer escape, so the heap-to-stack
+; transform shouldn't be applied.
+define void @testEscapingCall() {
+  %1 = call i8* @runtime.alloc(i32 4)
+  %2 = bitcast i8* %1 to i32*
+  %3 = call i32* @escapeIntPtr(i32* %2)
+  ret void
+}
+
+define void @testEscapingCall2() {
+  %1 = call i8* @runtime.alloc(i32 4)
+  %2 = bitcast i8* %1 to i32*
+  %3 = call i32* @escapeIntPtrSometimes(i32* %2, i32* %2)
+  ret void
+}
+
+; Call a function that doesn't let the pointer escape.
+define void @testNonEscapingCall() {
+  %1 = call i8* @runtime.alloc(i32 4)
+  %2 = bitcast i8* %1 to i32*
+  %3 = call i32* @noescapeIntPtr(i32* %2)
+  ret void
+}
+
+; Return the allocated value, which lets it escape.
+define i32* @testEscapingReturn() {
+  %1 = call i8* @runtime.alloc(i32 4)
+  %2 = bitcast i8* %1 to i32*
+  ret i32* %2
+}
+
+declare i32* @escapeIntPtr(i32*)
+
+declare i32* @noescapeIntPtr(i32* nocapture)
+
+declare i32* @escapeIntPtrSometimes(i32* nocapture, i32*)
--- a/transform/testdata/allocs.out.ll
+++ b/transform/testdata/allocs.out.ll
@ -0,0 +1,57 @@
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7m-none-eabi"
+
+declare nonnull i8* @runtime.alloc(i32)
+
+define void @testInt() {
+  %stackalloc.alloca = alloca [1 x i32]
+  store [1 x i32] zeroinitializer, [1 x i32]* %stackalloc.alloca
+  %stackalloc = bitcast [1 x i32]* %stackalloc.alloca to i32*
+  store i32 5, i32* %stackalloc
+  ret void
+}
+
+define i16 @testArray() {
+  %stackalloc.alloca = alloca [2 x i32]
+  store [2 x i32] zeroinitializer, [2 x i32]* %stackalloc.alloca
+  %stackalloc = bitcast [2 x i32]* %stackalloc.alloca to i16*
+  %1 = getelementptr i16, i16* %stackalloc, i32 1
+  store i16 5, i16* %1
+  %2 = getelementptr i16, i16* %stackalloc, i32 2
+  %3 = load i16, i16* %2
+  ret i16 %3
+}
+
+define void @testEscapingCall() {
+  %1 = call i8* @runtime.alloc(i32 4)
+  %2 = bitcast i8* %1 to i32*
+  %3 = call i32* @escapeIntPtr(i32* %2)
+  ret void
+}
+
+define void @testEscapingCall2() {
+  %1 = call i8* @runtime.alloc(i32 4)
+  %2 = bitcast i8* %1 to i32*
+  %3 = call i32* @escapeIntPtrSometimes(i32* %2, i32* %2)
+  ret void
+}
+
+define void @testNonEscapingCall() {
+  %stackalloc.alloca = alloca [1 x i32]
+  store [1 x i32] zeroinitializer, [1 x i32]* %stackalloc.alloca
+  %stackalloc = bitcast [1 x i32]* %stackalloc.alloca to i32*
+  %1 = call i32* @noescapeIntPtr(i32* %stackalloc)
+  ret void
+}
+
+define i32* @testEscapingReturn() {
+  %1 = call i8* @runtime.alloc(i32 4)
+  %2 = bitcast i8* %1 to i32*
+  ret i32* %2
+}
+
+declare i32* @escapeIntPtr(i32*)
+
+declare i32* @noescapeIntPtr(i32* nocapture)
+
+declare i32* @escapeIntPtrSometimes(i32* nocapture, i32*)
--- a/transform/transform.go
+++ b/transform/transform.go
@ -0,0 +1,13 @@
+// Package transform contains transformation passes for the TinyGo compiler.
+// These transformation passes may be optimization passes or lowering passes.
+//
+// Optimization passes transform the IR in such a way that they increase the
+// performance of the generated code and/or help the LLVM optimizer better do
+// its job by simplifying the IR. This usually means that certain
+// TinyGo-specific runtime calls are removed or replaced with something simpler
+// if that is a valid operation.
+//
+// Lowering passes are usually required to run. One example is the interface
+// lowering pass, which replaces stub runtime calls to get an interface method
+// with the method implementation (either a direct call or a thunk).
+package transform
--- a/transform/transform_test.go
+++ b/transform/transform_test.go
@ -0,0 +1,82 @@
+package transform
+
+// This file defines some helper functions for testing transforms.
+
+import (
+	"io/ioutil"
+	"os"
+	"strings"
+	"testing"
+
+	"tinygo.org/x/go-llvm"
+)
+
+// testTransform runs a transformation pass on an input file (pathPrefix+".ll")
+// and checks whether it matches the expected output (pathPrefix+".out.ll"). The
+// output is compared with a fuzzy match that ignores some irrelevant lines such
+// as empty lines.
+func testTransform(t *testing.T, pathPrefix string, transform func(mod llvm.Module)) {
+	// Read the input IR.
+	ctx := llvm.NewContext()
+	buf, err := llvm.NewMemoryBufferFromFile(pathPrefix + ".ll")
+	os.Stat(pathPrefix + ".ll") // make sure this file is tracked by `go test` caching
+	if err != nil {
+		t.Fatalf("could not read file %s: %v", pathPrefix+".ll", err)
+	}
+	mod, err := ctx.ParseIR(buf)
+	if err != nil {
+		t.Fatalf("could not load module:\n%v", err)
+	}
+
+	// Perform the transform.
+	transform(mod)
+
+	// Read the expected output IR.
+	out, err := ioutil.ReadFile(pathPrefix + ".out.ll")
+	if err != nil {
+		t.Fatalf("could not read output file %s: %v", pathPrefix+".out.ll", err)
+	}
+
+	// See whether the transform output matches with the expected output IR.
+	expected := string(out)
+	actual := mod.String()
+	if !fuzzyEqualIR(expected, actual) {
+		t.Logf("output does not match expected output:\n%s", actual)
+		t.Fail()
+	}
+}
+
+// fuzzyEqualIR returns true if the two LLVM IR strings passed in are roughly
+// equal. That means, only relevant lines are compared (excluding comments
+// etc.).
+func fuzzyEqualIR(s1, s2 string) bool {
+	lines1 := filterIrrelevantIRLines(strings.Split(s1, "\n"))
+	lines2 := filterIrrelevantIRLines(strings.Split(s2, "\n"))
+	if len(lines1) != len(lines2) {
+		return false
+	}
+	for i, line := range lines1 {
+		if line != lines2[i] {
+			return false
+		}
+	}
+
+	return true
+}
+
+// filterIrrelevantIRLines removes lines from the input slice of strings that
+// are not relevant in comparing IR. For example, empty lines and comments are
+// stripped out.
+func filterIrrelevantIRLines(lines []string) []string {
+	var out []string
+	for _, line := range lines {
+		if line == "" || line[0] == ';' {
+			continue
+		}
+		if strings.HasPrefix(line, "source_filename = ") {
+			continue
+		}
+		out = append(out, line)
+	}
+	return out
+}
--- a/transform/util.go
+++ b/transform/util.go
@ -0,0 +1,34 @@
+package transform
+
+// This file contains utilities used across transforms.
+
+import (
+	"tinygo.org/x/go-llvm"
+)
+
+// Check whether all uses of this param as parameter to the call have the given
+// flag. In most cases, there will only be one use but a function could take the
+// same parameter twice, in which case both must have the flag.
+// A flag can be any enum flag, like "readonly".
+func hasFlag(call, param llvm.Value, kind string) bool {
+	fn := call.CalledValue()
+	if fn.IsAFunction().IsNil() {
+		// This is not a function but something else, like a function pointer.
+		return false
+	}
+	kindID := llvm.AttributeKindID(kind)
+	for i := 0; i < fn.ParamsCount(); i++ {
+		if call.Operand(i) != param {
+			// This is not the parameter we're checking.
+			continue
+		}
+		index := i + 1 // param attributes start at 1
+		attr := fn.GetEnumAttributeAtIndex(index, kindID)
+		if attr.IsNil() {
+			// At least one parameter doesn't have the flag (there may be
+			// multiple).
+			return false
+		}
+	}
+	return true
+}