From f0bb3c092d0c6b4aa639bb3d6e47e2234f329662 Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Sun, 17 Nov 2019 20:37:45 +0100 Subject: [PATCH] compiler: move GC passes to the transform package --- compiler/gc.go | 357 --------------------- compiler/optimizer.go | 4 +- transform/gc.go | 398 ++++++++++++++++++++++++ transform/gc_test.go | 21 ++ transform/llvm.go | 24 ++ transform/testdata/gc-globals.ll | 29 ++ transform/testdata/gc-globals.out.ll | 27 ++ transform/testdata/gc-stackslots.ll | 52 ++++ transform/testdata/gc-stackslots.out.ll | 58 ++++ 9 files changed, 611 insertions(+), 359 deletions(-) create mode 100644 transform/gc.go create mode 100644 transform/gc_test.go create mode 100644 transform/testdata/gc-globals.ll create mode 100644 transform/testdata/gc-globals.out.ll create mode 100644 transform/testdata/gc-stackslots.ll create mode 100644 transform/testdata/gc-stackslots.out.ll diff --git a/compiler/gc.go b/compiler/gc.go index fd82525e..0992c01d 100644 --- a/compiler/gc.go +++ b/compiler/gc.go @@ -5,7 +5,6 @@ package compiler import ( "go/token" - "math/big" "golang.org/x/tools/go/ssa" "tinygo.org/x/go-llvm" @@ -105,359 +104,3 @@ func typeHasPointers(t llvm.Type) bool { return false } } - -// makeGCStackSlots converts all calls to runtime.trackPointer to explicit -// stores to stack slots that are scannable by the GC. -func (c *Compiler) makeGCStackSlots() bool { - // Check whether there are allocations at all. - alloc := c.mod.NamedFunction("runtime.alloc") - if alloc.IsNil() { - // Nothing to. Make sure all remaining bits and pieces for stack - // chains are neutralized. - for _, call := range getUses(c.mod.NamedFunction("runtime.trackPointer")) { - call.EraseFromParentAsInstruction() - } - stackChainStart := c.mod.NamedGlobal("runtime.stackChainStart") - if !stackChainStart.IsNil() { - stackChainStart.SetInitializer(llvm.ConstNull(stackChainStart.Type().ElementType())) - stackChainStart.SetGlobalConstant(true) - } - return false - } - - trackPointer := c.mod.NamedFunction("runtime.trackPointer") - if trackPointer.IsNil() || trackPointer.FirstUse().IsNil() { - return false // nothing to do - } - - // Look at *all* functions to see whether they are free of function pointer - // calls. - // This takes less than 5ms for ~100kB of WebAssembly but would perhaps be - // faster when written in C++ (to avoid the CGo overhead). - funcsWithFPCall := map[llvm.Value]struct{}{} - n := 0 - for fn := c.mod.FirstFunction(); !fn.IsNil(); fn = llvm.NextFunction(fn) { - n++ - if _, ok := funcsWithFPCall[fn]; ok { - continue // already found - } - done := false - for bb := fn.FirstBasicBlock(); !bb.IsNil() && !done; bb = llvm.NextBasicBlock(bb) { - for call := bb.FirstInstruction(); !call.IsNil() && !done; call = llvm.NextInstruction(call) { - if call.IsACallInst().IsNil() { - continue // only looking at calls - } - called := call.CalledValue() - if !called.IsAFunction().IsNil() { - continue // only looking for function pointers - } - funcsWithFPCall[fn] = struct{}{} - markParentFunctions(funcsWithFPCall, fn) - done = true - } - } - } - - // Determine which functions need stack objects. Many leaf functions don't - // need it: it only causes overhead for them. - // Actually, in one test it was only able to eliminate stack object from 12% - // of functions that had a call to runtime.trackPointer (8 out of 68 - // functions), so this optimization is not as big as it may seem. - allocatingFunctions := map[llvm.Value]struct{}{} // set of allocating functions - - // Work from runtime.alloc and trace all parents to check which functions do - // a heap allocation (and thus which functions do not). - markParentFunctions(allocatingFunctions, alloc) - - // Also trace all functions that call a function pointer. - for fn := range funcsWithFPCall { - // Assume that functions that call a function pointer do a heap - // allocation as a conservative guess because the called function might - // do a heap allocation. - allocatingFunctions[fn] = struct{}{} - markParentFunctions(allocatingFunctions, fn) - } - - // Collect some variables used below in the loop. - stackChainStart := c.mod.NamedGlobal("runtime.stackChainStart") - if stackChainStart.IsNil() { - // This may be reached in a weird scenario where we call runtime.alloc but the garbage collector is unreachable. - // This can be accomplished by allocating 0 bytes. - // There is no point in tracking anything. - for _, use := range getUses(trackPointer) { - use.EraseFromParentAsInstruction() - } - return false - } - stackChainStartType := stackChainStart.Type().ElementType() - stackChainStart.SetInitializer(llvm.ConstNull(stackChainStartType)) - - // Iterate until runtime.trackPointer has no uses left. - for use := trackPointer.FirstUse(); !use.IsNil(); use = trackPointer.FirstUse() { - // Pick the first use of runtime.trackPointer. - call := use.User() - if call.IsACallInst().IsNil() { - panic("expected runtime.trackPointer use to be a call") - } - - // Pick the parent function. - fn := call.InstructionParent().Parent() - - if _, ok := allocatingFunctions[fn]; !ok { - // This function nor any of the functions it calls (recursively) - // allocate anything from the heap, so it will not trigger a garbage - // collection cycle. Thus, it does not need to track local pointer - // values. - // This is a useful optimization but not as big as you might guess, - // as described above (it avoids stack objects for ~12% of - // functions). - call.EraseFromParentAsInstruction() - continue - } - - // Find all calls to runtime.trackPointer in this function. - var calls []llvm.Value - var returns []llvm.Value - for bb := fn.FirstBasicBlock(); !bb.IsNil(); bb = llvm.NextBasicBlock(bb) { - for inst := bb.FirstInstruction(); !inst.IsNil(); inst = llvm.NextInstruction(inst) { - switch inst.InstructionOpcode() { - case llvm.Call: - if inst.CalledValue() == trackPointer { - calls = append(calls, inst) - } - case llvm.Ret: - returns = append(returns, inst) - } - } - } - - // Determine what to do with each call. - var allocas, pointers []llvm.Value - for _, call := range calls { - ptr := call.Operand(0) - call.EraseFromParentAsInstruction() - if ptr.IsAInstruction().IsNil() { - continue - } - - // Some trivial optimizations. - if ptr.IsAInstruction().IsNil() { - continue - } - switch ptr.InstructionOpcode() { - case llvm.PHI, llvm.GetElementPtr: - // These values do not create new values: the values already - // existed locally in this function so must have been tracked - // already. - continue - case llvm.ExtractValue, llvm.BitCast: - // These instructions do not create new values, but their - // original value may not be tracked. So keep tracking them for - // now. - // With more analysis, it should be possible to optimize a - // significant chunk of these away. - case llvm.Call, llvm.Load, llvm.IntToPtr: - // These create new values so must be stored locally. But - // perhaps some of these can be fused when they actually refer - // to the same value. - default: - // Ambiguous. These instructions are uncommon, but perhaps could - // be optimized if needed. - } - - if !ptr.IsAAllocaInst().IsNil() { - if typeHasPointers(ptr.Type().ElementType()) { - allocas = append(allocas, ptr) - } - } else { - pointers = append(pointers, ptr) - } - } - - if len(allocas) == 0 && len(pointers) == 0 { - // This function does not need to keep track of stack pointers. - continue - } - - // Determine the type of the required stack slot. - fields := []llvm.Type{ - stackChainStartType, // Pointer to parent frame. - c.uintptrType, // Number of elements in this frame. - } - for _, alloca := range allocas { - fields = append(fields, alloca.Type().ElementType()) - } - for _, ptr := range pointers { - fields = append(fields, ptr.Type()) - } - stackObjectType := c.ctx.StructType(fields, false) - - // Create the stack object at the function entry. - c.builder.SetInsertPointBefore(fn.EntryBasicBlock().FirstInstruction()) - stackObject := c.builder.CreateAlloca(stackObjectType, "gc.stackobject") - initialStackObject := llvm.ConstNull(stackObjectType) - numSlots := (c.targetData.TypeAllocSize(stackObjectType) - c.targetData.TypeAllocSize(c.i8ptrType)*2) / uint64(c.targetData.ABITypeAlignment(c.uintptrType)) - numSlotsValue := llvm.ConstInt(c.uintptrType, numSlots, false) - initialStackObject = llvm.ConstInsertValue(initialStackObject, numSlotsValue, []uint32{1}) - c.builder.CreateStore(initialStackObject, stackObject) - - // Update stack start. - parent := c.builder.CreateLoad(stackChainStart, "") - gep := c.builder.CreateGEP(stackObject, []llvm.Value{ - llvm.ConstInt(c.ctx.Int32Type(), 0, false), - llvm.ConstInt(c.ctx.Int32Type(), 0, false), - }, "") - c.builder.CreateStore(parent, gep) - stackObjectCast := c.builder.CreateBitCast(stackObject, stackChainStartType, "") - c.builder.CreateStore(stackObjectCast, stackChainStart) - - // Replace all independent allocas with GEPs in the stack object. - for i, alloca := range allocas { - gep := c.builder.CreateGEP(stackObject, []llvm.Value{ - llvm.ConstInt(c.ctx.Int32Type(), 0, false), - llvm.ConstInt(c.ctx.Int32Type(), uint64(2+i), false), - }, "") - alloca.ReplaceAllUsesWith(gep) - alloca.EraseFromParentAsInstruction() - } - - // Do a store to the stack object after each new pointer that is created. - for i, ptr := range pointers { - c.builder.SetInsertPointBefore(llvm.NextInstruction(ptr)) - gep := c.builder.CreateGEP(stackObject, []llvm.Value{ - llvm.ConstInt(c.ctx.Int32Type(), 0, false), - llvm.ConstInt(c.ctx.Int32Type(), uint64(2+len(allocas)+i), false), - }, "") - c.builder.CreateStore(ptr, gep) - } - - // Make sure this stack object is popped from the linked list of stack - // objects at return. - for _, ret := range returns { - c.builder.SetInsertPointBefore(ret) - c.builder.CreateStore(parent, stackChainStart) - } - } - - return true -} - -func (c *Compiler) addGlobalsBitmap() bool { - if c.mod.NamedGlobal("runtime.trackedGlobalsStart").IsNil() { - return false // nothing to do: no GC in use - } - - var trackedGlobals []llvm.Value - var trackedGlobalTypes []llvm.Type - for global := c.mod.FirstGlobal(); !global.IsNil(); global = llvm.NextGlobal(global) { - if global.IsDeclaration() { - continue - } - typ := global.Type().ElementType() - ptrs := c.getPointerBitmap(typ, global.Name()) - if ptrs.BitLen() == 0 { - continue - } - trackedGlobals = append(trackedGlobals, global) - trackedGlobalTypes = append(trackedGlobalTypes, typ) - } - - globalsBundleType := c.ctx.StructType(trackedGlobalTypes, false) - globalsBundle := llvm.AddGlobal(c.mod, globalsBundleType, "tinygo.trackedGlobals") - globalsBundle.SetLinkage(llvm.InternalLinkage) - globalsBundle.SetUnnamedAddr(true) - initializer := llvm.Undef(globalsBundleType) - for i, global := range trackedGlobals { - initializer = llvm.ConstInsertValue(initializer, global.Initializer(), []uint32{uint32(i)}) - gep := llvm.ConstGEP(globalsBundle, []llvm.Value{ - llvm.ConstInt(c.ctx.Int32Type(), 0, false), - llvm.ConstInt(c.ctx.Int32Type(), uint64(i), false), - }) - global.ReplaceAllUsesWith(gep) - global.EraseFromParentAsGlobal() - } - globalsBundle.SetInitializer(initializer) - - trackedGlobalsStart := llvm.ConstPtrToInt(globalsBundle, c.uintptrType) - c.mod.NamedGlobal("runtime.trackedGlobalsStart").SetInitializer(trackedGlobalsStart) - - alignment := c.targetData.PrefTypeAlignment(c.i8ptrType) - trackedGlobalsLength := llvm.ConstInt(c.uintptrType, c.targetData.TypeAllocSize(globalsBundleType)/uint64(alignment), false) - c.mod.NamedGlobal("runtime.trackedGlobalsLength").SetInitializer(trackedGlobalsLength) - - bitmapBytes := c.getPointerBitmap(globalsBundleType, "globals bundle").Bytes() - bitmapValues := make([]llvm.Value, len(bitmapBytes)) - for i, b := range bitmapBytes { - bitmapValues[len(bitmapBytes)-i-1] = llvm.ConstInt(c.ctx.Int8Type(), uint64(b), false) - } - bitmapArray := llvm.ConstArray(c.ctx.Int8Type(), bitmapValues) - bitmapNew := llvm.AddGlobal(c.mod, bitmapArray.Type(), "runtime.trackedGlobalsBitmap.tmp") - bitmapOld := c.mod.NamedGlobal("runtime.trackedGlobalsBitmap") - bitmapOld.ReplaceAllUsesWith(llvm.ConstBitCast(bitmapNew, bitmapOld.Type())) - bitmapNew.SetInitializer(bitmapArray) - bitmapNew.SetName("runtime.trackedGlobalsBitmap") - - return true // the IR was changed -} - -func (c *Compiler) getPointerBitmap(typ llvm.Type, name string) *big.Int { - alignment := c.targetData.PrefTypeAlignment(c.i8ptrType) - switch typ.TypeKind() { - case llvm.IntegerTypeKind, llvm.FloatTypeKind, llvm.DoubleTypeKind: - return big.NewInt(0) - case llvm.PointerTypeKind: - return big.NewInt(1) - case llvm.StructTypeKind: - ptrs := big.NewInt(0) - for i, subtyp := range typ.StructElementTypes() { - subptrs := c.getPointerBitmap(subtyp, name) - if subptrs.BitLen() == 0 { - continue - } - offset := c.targetData.ElementOffset(typ, i) - if offset%uint64(alignment) != 0 { - panic("precise GC: global contains unaligned pointer: " + name) - } - subptrs.Lsh(subptrs, uint(offset)/uint(alignment)) - ptrs.Or(ptrs, subptrs) - } - return ptrs - case llvm.ArrayTypeKind: - subtyp := typ.ElementType() - subptrs := c.getPointerBitmap(subtyp, name) - ptrs := big.NewInt(0) - if subptrs.BitLen() == 0 { - return ptrs - } - elementSize := c.targetData.TypeAllocSize(subtyp) - for i := 0; i < typ.ArrayLength(); i++ { - ptrs.Lsh(ptrs, uint(elementSize)/uint(alignment)) - ptrs.Or(ptrs, subptrs) - } - return ptrs - default: - panic("unknown type kind of global: " + name) - } -} - -// markParentFunctions traverses all parent function calls (recursively) and -// adds them to the set of marked functions. It only considers function calls: -// any other uses of such a function is ignored. -func markParentFunctions(marked map[llvm.Value]struct{}, fn llvm.Value) { - worklist := []llvm.Value{fn} - for len(worklist) != 0 { - fn := worklist[len(worklist)-1] - worklist = worklist[:len(worklist)-1] - for _, use := range getUses(fn) { - if use.IsACallInst().IsNil() || use.CalledValue() != fn { - // Not the parent function. - continue - } - parent := use.InstructionParent().Parent() - if _, ok := marked[parent]; !ok { - marked[parent] = struct{}{} - worklist = append(worklist, parent) - } - } - } -} diff --git a/compiler/optimizer.go b/compiler/optimizer.go index 46855010..601c6eef 100644 --- a/compiler/optimizer.go +++ b/compiler/optimizer.go @@ -137,8 +137,8 @@ func (c *Compiler) Optimize(optLevel, sizeLevel int, inlinerThreshold uint) erro builder.Populate(modPasses) modPasses.Run(c.mod) - hasGCPass := c.addGlobalsBitmap() - hasGCPass = c.makeGCStackSlots() || hasGCPass + hasGCPass := transform.AddGlobalsBitmap(c.mod) + hasGCPass = transform.MakeGCStackSlots(c.mod) || hasGCPass if hasGCPass { if err := c.Verify(); err != nil { return errors.New("GC pass caused a verification failure") diff --git a/transform/gc.go b/transform/gc.go new file mode 100644 index 00000000..ee32ed0c --- /dev/null +++ b/transform/gc.go @@ -0,0 +1,398 @@ +package transform + +import ( + "math/big" + + "tinygo.org/x/go-llvm" +) + +// MakeGCStackSlots converts all calls to runtime.trackPointer to explicit +// stores to stack slots that are scannable by the GC. +func MakeGCStackSlots(mod llvm.Module) bool { + // Check whether there are allocations at all. + alloc := mod.NamedFunction("runtime.alloc") + if alloc.IsNil() { + // Nothing to. Make sure all remaining bits and pieces for stack + // chains are neutralized. + for _, call := range getUses(mod.NamedFunction("runtime.trackPointer")) { + call.EraseFromParentAsInstruction() + } + stackChainStart := mod.NamedGlobal("runtime.stackChainStart") + if !stackChainStart.IsNil() { + stackChainStart.SetInitializer(llvm.ConstNull(stackChainStart.Type().ElementType())) + stackChainStart.SetGlobalConstant(true) + } + return false + } + + trackPointer := mod.NamedFunction("runtime.trackPointer") + if trackPointer.IsNil() || trackPointer.FirstUse().IsNil() { + return false // nothing to do + } + + ctx := mod.Context() + builder := ctx.NewBuilder() + targetData := llvm.NewTargetData(mod.DataLayout()) + uintptrType := ctx.IntType(targetData.PointerSize() * 8) + + // Look at *all* functions to see whether they are free of function pointer + // calls. + // This takes less than 5ms for ~100kB of WebAssembly but would perhaps be + // faster when written in C++ (to avoid the CGo overhead). + funcsWithFPCall := map[llvm.Value]struct{}{} + n := 0 + for fn := mod.FirstFunction(); !fn.IsNil(); fn = llvm.NextFunction(fn) { + n++ + if _, ok := funcsWithFPCall[fn]; ok { + continue // already found + } + done := false + for bb := fn.FirstBasicBlock(); !bb.IsNil() && !done; bb = llvm.NextBasicBlock(bb) { + for call := bb.FirstInstruction(); !call.IsNil() && !done; call = llvm.NextInstruction(call) { + if call.IsACallInst().IsNil() { + continue // only looking at calls + } + called := call.CalledValue() + if !called.IsAFunction().IsNil() { + continue // only looking for function pointers + } + funcsWithFPCall[fn] = struct{}{} + markParentFunctions(funcsWithFPCall, fn) + done = true + } + } + } + + // Determine which functions need stack objects. Many leaf functions don't + // need it: it only causes overhead for them. + // Actually, in one test it was only able to eliminate stack object from 12% + // of functions that had a call to runtime.trackPointer (8 out of 68 + // functions), so this optimization is not as big as it may seem. + allocatingFunctions := map[llvm.Value]struct{}{} // set of allocating functions + + // Work from runtime.alloc and trace all parents to check which functions do + // a heap allocation (and thus which functions do not). + markParentFunctions(allocatingFunctions, alloc) + + // Also trace all functions that call a function pointer. + for fn := range funcsWithFPCall { + // Assume that functions that call a function pointer do a heap + // allocation as a conservative guess because the called function might + // do a heap allocation. + allocatingFunctions[fn] = struct{}{} + markParentFunctions(allocatingFunctions, fn) + } + + // Collect some variables used below in the loop. + stackChainStart := mod.NamedGlobal("runtime.stackChainStart") + if stackChainStart.IsNil() { + // This may be reached in a weird scenario where we call runtime.alloc but the garbage collector is unreachable. + // This can be accomplished by allocating 0 bytes. + // There is no point in tracking anything. + for _, use := range getUses(trackPointer) { + use.EraseFromParentAsInstruction() + } + return false + } + stackChainStartType := stackChainStart.Type().ElementType() + stackChainStart.SetInitializer(llvm.ConstNull(stackChainStartType)) + + // Iterate until runtime.trackPointer has no uses left. + for use := trackPointer.FirstUse(); !use.IsNil(); use = trackPointer.FirstUse() { + // Pick the first use of runtime.trackPointer. + call := use.User() + if call.IsACallInst().IsNil() { + panic("expected runtime.trackPointer use to be a call") + } + + // Pick the parent function. + fn := call.InstructionParent().Parent() + + if _, ok := allocatingFunctions[fn]; !ok { + // This function nor any of the functions it calls (recursively) + // allocate anything from the heap, so it will not trigger a garbage + // collection cycle. Thus, it does not need to track local pointer + // values. + // This is a useful optimization but not as big as you might guess, + // as described above (it avoids stack objects for ~12% of + // functions). + call.EraseFromParentAsInstruction() + continue + } + + // Find all calls to runtime.trackPointer in this function. + var calls []llvm.Value + var returns []llvm.Value + for bb := fn.FirstBasicBlock(); !bb.IsNil(); bb = llvm.NextBasicBlock(bb) { + for inst := bb.FirstInstruction(); !inst.IsNil(); inst = llvm.NextInstruction(inst) { + switch inst.InstructionOpcode() { + case llvm.Call: + if inst.CalledValue() == trackPointer { + calls = append(calls, inst) + } + case llvm.Ret: + returns = append(returns, inst) + } + } + } + + // Determine what to do with each call. + var allocas, pointers []llvm.Value + for _, call := range calls { + ptr := call.Operand(0) + call.EraseFromParentAsInstruction() + if ptr.IsAInstruction().IsNil() { + continue + } + + // Some trivial optimizations. + if ptr.IsAInstruction().IsNil() { + continue + } + switch ptr.InstructionOpcode() { + case llvm.PHI, llvm.GetElementPtr: + // These values do not create new values: the values already + // existed locally in this function so must have been tracked + // already. + continue + case llvm.ExtractValue, llvm.BitCast: + // These instructions do not create new values, but their + // original value may not be tracked. So keep tracking them for + // now. + // With more analysis, it should be possible to optimize a + // significant chunk of these away. + case llvm.Call, llvm.Load, llvm.IntToPtr: + // These create new values so must be stored locally. But + // perhaps some of these can be fused when they actually refer + // to the same value. + default: + // Ambiguous. These instructions are uncommon, but perhaps could + // be optimized if needed. + } + + if !ptr.IsAAllocaInst().IsNil() { + if typeHasPointers(ptr.Type().ElementType()) { + allocas = append(allocas, ptr) + } + } else { + pointers = append(pointers, ptr) + } + } + + if len(allocas) == 0 && len(pointers) == 0 { + // This function does not need to keep track of stack pointers. + continue + } + + // Determine the type of the required stack slot. + fields := []llvm.Type{ + stackChainStartType, // Pointer to parent frame. + uintptrType, // Number of elements in this frame. + } + for _, alloca := range allocas { + fields = append(fields, alloca.Type().ElementType()) + } + for _, ptr := range pointers { + fields = append(fields, ptr.Type()) + } + stackObjectType := ctx.StructType(fields, false) + + // Create the stack object at the function entry. + builder.SetInsertPointBefore(fn.EntryBasicBlock().FirstInstruction()) + stackObject := builder.CreateAlloca(stackObjectType, "gc.stackobject") + initialStackObject := llvm.ConstNull(stackObjectType) + numSlots := (targetData.TypeAllocSize(stackObjectType) - uint64(targetData.PointerSize())*2) / uint64(targetData.ABITypeAlignment(uintptrType)) + numSlotsValue := llvm.ConstInt(uintptrType, numSlots, false) + initialStackObject = llvm.ConstInsertValue(initialStackObject, numSlotsValue, []uint32{1}) + builder.CreateStore(initialStackObject, stackObject) + + // Update stack start. + parent := builder.CreateLoad(stackChainStart, "") + gep := builder.CreateGEP(stackObject, []llvm.Value{ + llvm.ConstInt(ctx.Int32Type(), 0, false), + llvm.ConstInt(ctx.Int32Type(), 0, false), + }, "") + builder.CreateStore(parent, gep) + stackObjectCast := builder.CreateBitCast(stackObject, stackChainStartType, "") + builder.CreateStore(stackObjectCast, stackChainStart) + + // Replace all independent allocas with GEPs in the stack object. + for i, alloca := range allocas { + gep := builder.CreateGEP(stackObject, []llvm.Value{ + llvm.ConstInt(ctx.Int32Type(), 0, false), + llvm.ConstInt(ctx.Int32Type(), uint64(2+i), false), + }, "") + alloca.ReplaceAllUsesWith(gep) + alloca.EraseFromParentAsInstruction() + } + + // Do a store to the stack object after each new pointer that is created. + for i, ptr := range pointers { + builder.SetInsertPointBefore(llvm.NextInstruction(ptr)) + gep := builder.CreateGEP(stackObject, []llvm.Value{ + llvm.ConstInt(ctx.Int32Type(), 0, false), + llvm.ConstInt(ctx.Int32Type(), uint64(2+len(allocas)+i), false), + }, "") + builder.CreateStore(ptr, gep) + } + + // Make sure this stack object is popped from the linked list of stack + // objects at return. + for _, ret := range returns { + builder.SetInsertPointBefore(ret) + builder.CreateStore(parent, stackChainStart) + } + } + + return true +} + +// AddGlobalsBitmap performs a few related functions. It is needed for scanning +// globals on platforms where the .data/.bss section is not easily accessible by +// the GC, and thus all globals that contain pointers must be made reachable by +// the GC in some other way. +// +// First, it scans all globals, and bundles all globals that contain a pointer +// into one large global (updating all uses in the process). Then it creates a +// bitmap (bit vector) to locate all the pointers in this large global. This +// bitmap allows the GC to know in advance where exactly all the pointers live +// in the large globals bundle, to avoid false positives. +func AddGlobalsBitmap(mod llvm.Module) bool { + if mod.NamedGlobal("runtime.trackedGlobalsStart").IsNil() { + return false // nothing to do: no GC in use + } + + ctx := mod.Context() + targetData := llvm.NewTargetData(mod.DataLayout()) + uintptrType := ctx.IntType(targetData.PointerSize() * 8) + + // Collect all globals that contain pointers (and thus must be scanned by + // the GC). + var trackedGlobals []llvm.Value + var trackedGlobalTypes []llvm.Type + for global := mod.FirstGlobal(); !global.IsNil(); global = llvm.NextGlobal(global) { + if global.IsDeclaration() { + continue + } + typ := global.Type().ElementType() + ptrs := getPointerBitmap(targetData, typ, global.Name()) + if ptrs.BitLen() == 0 { + continue + } + trackedGlobals = append(trackedGlobals, global) + trackedGlobalTypes = append(trackedGlobalTypes, typ) + } + + // Make a new global that bundles all existing globals, and remove the + // existing globals. All uses of the previous independent globals are + // replaced with a GEP into the new globals bundle. + globalsBundleType := ctx.StructType(trackedGlobalTypes, false) + globalsBundle := llvm.AddGlobal(mod, globalsBundleType, "tinygo.trackedGlobals") + globalsBundle.SetLinkage(llvm.InternalLinkage) + globalsBundle.SetUnnamedAddr(true) + initializer := llvm.Undef(globalsBundleType) + for i, global := range trackedGlobals { + initializer = llvm.ConstInsertValue(initializer, global.Initializer(), []uint32{uint32(i)}) + gep := llvm.ConstGEP(globalsBundle, []llvm.Value{ + llvm.ConstInt(ctx.Int32Type(), 0, false), + llvm.ConstInt(ctx.Int32Type(), uint64(i), false), + }) + global.ReplaceAllUsesWith(gep) + global.EraseFromParentAsGlobal() + } + globalsBundle.SetInitializer(initializer) + + // Update trackedGlobalsStart, which points to the globals bundle. + trackedGlobalsStart := llvm.ConstPtrToInt(globalsBundle, uintptrType) + mod.NamedGlobal("runtime.trackedGlobalsStart").SetInitializer(trackedGlobalsStart) + + // Update trackedGlobalsLength, which contains the length (in words) of the + // globals bundle. + alignment := targetData.PrefTypeAlignment(llvm.PointerType(ctx.Int8Type(), 0)) + trackedGlobalsLength := llvm.ConstInt(uintptrType, targetData.TypeAllocSize(globalsBundleType)/uint64(alignment), false) + mod.NamedGlobal("runtime.trackedGlobalsLength").SetInitializer(trackedGlobalsLength) + + // Create a bitmap (a new global) that stores for each word in the globals + // bundle whether it contains a pointer. This allows globals to be scanned + // precisely: no non-pointers will be considered pointers if the bit pattern + // looks like one. + // This code assumes that pointers are self-aligned. For example, that a + // 32-bit (4-byte) pointer is also aligned to 4 bytes. + bitmapBytes := getPointerBitmap(targetData, globalsBundleType, "globals bundle").Bytes() + bitmapValues := make([]llvm.Value, len(bitmapBytes)) + for i, b := range bitmapBytes { + bitmapValues[len(bitmapBytes)-i-1] = llvm.ConstInt(ctx.Int8Type(), uint64(b), false) + } + bitmapArray := llvm.ConstArray(ctx.Int8Type(), bitmapValues) + bitmapNew := llvm.AddGlobal(mod, bitmapArray.Type(), "runtime.trackedGlobalsBitmap.tmp") + bitmapOld := mod.NamedGlobal("runtime.trackedGlobalsBitmap") + bitmapOld.ReplaceAllUsesWith(llvm.ConstBitCast(bitmapNew, bitmapOld.Type())) + bitmapNew.SetInitializer(bitmapArray) + bitmapNew.SetName("runtime.trackedGlobalsBitmap") + + return true // the IR was changed +} + +// getPointerBitmap scans the given LLVM type for pointers and sets bits in a +// bigint at the word offset that contains a pointer. This scan is recursive. +func getPointerBitmap(targetData llvm.TargetData, typ llvm.Type, name string) *big.Int { + alignment := targetData.PrefTypeAlignment(llvm.PointerType(typ.Context().Int8Type(), 0)) + switch typ.TypeKind() { + case llvm.IntegerTypeKind, llvm.FloatTypeKind, llvm.DoubleTypeKind: + return big.NewInt(0) + case llvm.PointerTypeKind: + return big.NewInt(1) + case llvm.StructTypeKind: + ptrs := big.NewInt(0) + for i, subtyp := range typ.StructElementTypes() { + subptrs := getPointerBitmap(targetData, subtyp, name) + if subptrs.BitLen() == 0 { + continue + } + offset := targetData.ElementOffset(typ, i) + if offset%uint64(alignment) != 0 { + panic("precise GC: global contains unaligned pointer: " + name) + } + subptrs.Lsh(subptrs, uint(offset)/uint(alignment)) + ptrs.Or(ptrs, subptrs) + } + return ptrs + case llvm.ArrayTypeKind: + subtyp := typ.ElementType() + subptrs := getPointerBitmap(targetData, subtyp, name) + ptrs := big.NewInt(0) + if subptrs.BitLen() == 0 { + return ptrs + } + elementSize := targetData.TypeAllocSize(subtyp) + for i := 0; i < typ.ArrayLength(); i++ { + ptrs.Lsh(ptrs, uint(elementSize)/uint(alignment)) + ptrs.Or(ptrs, subptrs) + } + return ptrs + default: + panic("unknown type kind of global: " + name) + } +} + +// markParentFunctions traverses all parent function calls (recursively) and +// adds them to the set of marked functions. It only considers function calls: +// any other uses of such a function is ignored. +func markParentFunctions(marked map[llvm.Value]struct{}, fn llvm.Value) { + worklist := []llvm.Value{fn} + for len(worklist) != 0 { + fn := worklist[len(worklist)-1] + worklist = worklist[:len(worklist)-1] + for _, use := range getUses(fn) { + if use.IsACallInst().IsNil() || use.CalledValue() != fn { + // Not the parent function. + continue + } + parent := use.InstructionParent().Parent() + if _, ok := marked[parent]; !ok { + marked[parent] = struct{}{} + worklist = append(worklist, parent) + } + } + } +} diff --git a/transform/gc_test.go b/transform/gc_test.go new file mode 100644 index 00000000..64962f0a --- /dev/null +++ b/transform/gc_test.go @@ -0,0 +1,21 @@ +package transform + +import ( + "testing" + + "tinygo.org/x/go-llvm" +) + +func TestAddGlobalsBitmap(t *testing.T) { + t.Parallel() + testTransform(t, "testdata/gc-globals", func(mod llvm.Module) { + AddGlobalsBitmap(mod) + }) +} + +func TestMakeGCStackSlots(t *testing.T) { + t.Parallel() + testTransform(t, "testdata/gc-stackslots", func(mod llvm.Module) { + MakeGCStackSlots(mod) + }) +} diff --git a/transform/llvm.go b/transform/llvm.go index f41d2f9c..86dd897e 100644 --- a/transform/llvm.go +++ b/transform/llvm.go @@ -65,3 +65,27 @@ func replaceGlobalIntWithArray(mod llvm.Module, name string, buf interface{}) ll global.SetName(name) return global } + +// typeHasPointers returns whether this type is a pointer or contains pointers. +// If the type is an aggregate type, it will check whether there is a pointer +// inside. +func typeHasPointers(t llvm.Type) bool { + switch t.TypeKind() { + case llvm.PointerTypeKind: + return true + case llvm.StructTypeKind: + for _, subType := range t.StructElementTypes() { + if typeHasPointers(subType) { + return true + } + } + return false + case llvm.ArrayTypeKind: + if typeHasPointers(t.ElementType()) { + return true + } + return false + default: + return false + } +} diff --git a/transform/testdata/gc-globals.ll b/transform/testdata/gc-globals.ll new file mode 100644 index 00000000..a2d2b1f4 --- /dev/null +++ b/transform/testdata/gc-globals.ll @@ -0,0 +1,29 @@ +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown-wasm" + +%runtime._string = type { i8*, i32 } +%runtime._interface = type { i32, i8* } + +@globalInt = constant i32 5 +@globalString = constant %runtime._string zeroinitializer +@globalInterface = constant %runtime._interface zeroinitializer +@runtime.trackedGlobalsLength = external global i32 +@runtime.trackedGlobalsBitmap = external global [0 x i8] +@runtime.trackedGlobalsStart = external global i32 + +define void @main() { + %1 = load i32, i32* @globalInt + %2 = load %runtime._string, %runtime._string* @globalString + %3 = load %runtime._interface, %runtime._interface* @globalInterface + ret void +} + +define void @runtime.markGlobals() { + ; Very small subset of what runtime.markGlobals would really do. + ; Just enough to make sure the transformation is correct. + %1 = load i32, i32* @runtime.trackedGlobalsStart + %2 = load i32, i32* @runtime.trackedGlobalsLength + %3 = getelementptr inbounds [0 x i8], [0 x i8]* @runtime.trackedGlobalsBitmap, i32 0, i32 0 + %4 = load i8, i8* %3 + ret void +} diff --git a/transform/testdata/gc-globals.out.ll b/transform/testdata/gc-globals.out.ll new file mode 100644 index 00000000..1dd10096 --- /dev/null +++ b/transform/testdata/gc-globals.out.ll @@ -0,0 +1,27 @@ +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown-wasm" + +%runtime._string = type { i8*, i32 } +%runtime._interface = type { i32, i8* } + +@globalInt = constant i32 5 +@runtime.trackedGlobalsLength = global i32 4 +@runtime.trackedGlobalsBitmap = external global [0 x i8] +@runtime.trackedGlobalsStart = global i32 ptrtoint ({ %runtime._string, %runtime._interface }* @tinygo.trackedGlobals to i32) +@tinygo.trackedGlobals = internal unnamed_addr global { %runtime._string, %runtime._interface } zeroinitializer +@runtime.trackedGlobalsBitmap.1 = global [1 x i8] c"\09" + +define void @main() { + %1 = load i32, i32* @globalInt + %2 = load %runtime._string, %runtime._string* getelementptr inbounds ({ %runtime._string, %runtime._interface }, { %runtime._string, %runtime._interface }* @tinygo.trackedGlobals, i32 0, i32 0) + %3 = load %runtime._interface, %runtime._interface* getelementptr inbounds ({ %runtime._string, %runtime._interface }, { %runtime._string, %runtime._interface }* @tinygo.trackedGlobals, i32 0, i32 1) + ret void +} + +define void @runtime.markGlobals() { + %1 = load i32, i32* @runtime.trackedGlobalsStart + %2 = load i32, i32* @runtime.trackedGlobalsLength + %3 = getelementptr inbounds [0 x i8], [0 x i8]* bitcast ([1 x i8]* @runtime.trackedGlobalsBitmap.1 to [0 x i8]*), i32 0, i32 0 + %4 = load i8, i8* %3 + ret void +} diff --git a/transform/testdata/gc-stackslots.ll b/transform/testdata/gc-stackslots.ll new file mode 100644 index 00000000..5e62f8c5 --- /dev/null +++ b/transform/testdata/gc-stackslots.ll @@ -0,0 +1,52 @@ +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown-wasm" + +%runtime.stackChainObject = type { %runtime.stackChainObject*, i32 } + +@runtime.stackChainStart = external global %runtime.stackChainObject* +@someGlobal = global i8 3 + +declare void @runtime.trackPointer(i8* nocapture readonly) + +declare noalias nonnull i8* @runtime.alloc(i32) + +; Generic function that returns a pointer (that must be tracked). +define i8* @getPointer() { + ret i8* @someGlobal +} + +define i8* @needsStackSlots() { + ; Tracked pointer. Although, in this case the value is immediately returned + ; so tracking it is not really necessary. + %ptr = call i8* @runtime.alloc(i32 4) + call void @runtime.trackPointer(i8* %ptr) + ret i8* %ptr +} + +; Check some edge cases of pointer tracking. +define i8* @needsStackSlots2() { + ; Only one stack slot should be created for this (but at the moment, one is + ; created for each call to runtime.trackPointer). + %ptr1 = call i8* @getPointer() + call void @runtime.trackPointer(i8* %ptr1) + call void @runtime.trackPointer(i8* %ptr1) + call void @runtime.trackPointer(i8* %ptr1) + + ; Create a pointer that does not need to be tracked (but is tracked). + %ptr2 = getelementptr i8, i8* @someGlobal, i32 0 + call void @runtime.trackPointer(i8* %ptr2) + + ; Here is finally the point where an allocation happens. + %unused = call i8* @runtime.alloc(i32 4) + call void @runtime.trackPointer(i8* %unused) + + ret i8* %ptr1 +} + +; Return a pointer from a caller. Because it doesn't allocate, no stack objects +; need to be created. +define i8* @noAllocatingFunction() { + %ptr = call i8* @getPointer() + call void @runtime.trackPointer(i8* %ptr) + ret i8* %ptr +} diff --git a/transform/testdata/gc-stackslots.out.ll b/transform/testdata/gc-stackslots.out.ll new file mode 100644 index 00000000..aa0aa35a --- /dev/null +++ b/transform/testdata/gc-stackslots.out.ll @@ -0,0 +1,58 @@ +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown-wasm" + +%runtime.stackChainObject = type { %runtime.stackChainObject*, i32 } + +@runtime.stackChainStart = global %runtime.stackChainObject* null +@someGlobal = global i8 3 + +declare void @runtime.trackPointer(i8* nocapture readonly) + +declare noalias nonnull i8* @runtime.alloc(i32) + +define i8* @getPointer() { + ret i8* @someGlobal +} + +define i8* @needsStackSlots() { + %gc.stackobject = alloca { %runtime.stackChainObject*, i32, i8* } + store { %runtime.stackChainObject*, i32, i8* } { %runtime.stackChainObject* null, i32 1, i8* null }, { %runtime.stackChainObject*, i32, i8* }* %gc.stackobject + %1 = load %runtime.stackChainObject*, %runtime.stackChainObject** @runtime.stackChainStart + %2 = getelementptr { %runtime.stackChainObject*, i32, i8* }, { %runtime.stackChainObject*, i32, i8* }* %gc.stackobject, i32 0, i32 0 + store %runtime.stackChainObject* %1, %runtime.stackChainObject** %2 + %3 = bitcast { %runtime.stackChainObject*, i32, i8* }* %gc.stackobject to %runtime.stackChainObject* + store %runtime.stackChainObject* %3, %runtime.stackChainObject** @runtime.stackChainStart + %ptr = call i8* @runtime.alloc(i32 4) + %4 = getelementptr { %runtime.stackChainObject*, i32, i8* }, { %runtime.stackChainObject*, i32, i8* }* %gc.stackobject, i32 0, i32 2 + store i8* %ptr, i8** %4 + store %runtime.stackChainObject* %1, %runtime.stackChainObject** @runtime.stackChainStart + ret i8* %ptr +} + +define i8* @needsStackSlots2() { + %gc.stackobject = alloca { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* } + store { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* } { %runtime.stackChainObject* null, i32 4, i8* null, i8* null, i8* null, i8* null }, { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* }* %gc.stackobject + %1 = load %runtime.stackChainObject*, %runtime.stackChainObject** @runtime.stackChainStart + %2 = getelementptr { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* }, { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* }* %gc.stackobject, i32 0, i32 0 + store %runtime.stackChainObject* %1, %runtime.stackChainObject** %2 + %3 = bitcast { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* }* %gc.stackobject to %runtime.stackChainObject* + store %runtime.stackChainObject* %3, %runtime.stackChainObject** @runtime.stackChainStart + %ptr1 = call i8* @getPointer() + %4 = getelementptr { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* }, { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* }* %gc.stackobject, i32 0, i32 4 + store i8* %ptr1, i8** %4 + %5 = getelementptr { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* }, { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* }* %gc.stackobject, i32 0, i32 3 + store i8* %ptr1, i8** %5 + %6 = getelementptr { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* }, { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* }* %gc.stackobject, i32 0, i32 2 + store i8* %ptr1, i8** %6 + %ptr2 = getelementptr i8, i8* @someGlobal, i32 0 + %unused = call i8* @runtime.alloc(i32 4) + %7 = getelementptr { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* }, { %runtime.stackChainObject*, i32, i8*, i8*, i8*, i8* }* %gc.stackobject, i32 0, i32 5 + store i8* %unused, i8** %7 + store %runtime.stackChainObject* %1, %runtime.stackChainObject** @runtime.stackChainStart + ret i8* %ptr1 +} + +define i8* @noAllocatingFunction() { + %ptr = call i8* @getPointer() + ret i8* %ptr +}