tinygo/transform/gc.go


								package transform


								import (

									"tinygo.org/x/go-llvm"

								)


								// MakeGCStackSlots converts all calls to runtime.trackPointer to explicit

								// stores to stack slots that are scannable by the GC.

								func MakeGCStackSlots(mod llvm.Module) bool {

									// Check whether there are allocations at all.

									alloc := mod.NamedFunction("runtime.alloc")

									if alloc.IsNil() {

										// Nothing to. Make sure all remaining bits and pieces for stack

										// chains are neutralized.

										for _, call := range getUses(mod.NamedFunction("runtime.trackPointer")) {

											call.EraseFromParentAsInstruction()

										}

										stackChainStart := mod.NamedGlobal("runtime.stackChainStart")

										if !stackChainStart.IsNil() {

											stackChainStart.SetLinkage(llvm.InternalLinkage)

											stackChainStart.SetInitializer(llvm.ConstNull(stackChainStart.Type().ElementType()))

											stackChainStart.SetGlobalConstant(true)

										}

										return false

									}


									trackPointer := mod.NamedFunction("runtime.trackPointer")

									if trackPointer.IsNil() || trackPointer.FirstUse().IsNil() {

										return false // nothing to do

									}


									ctx := mod.Context()

									builder := ctx.NewBuilder()

									defer builder.Dispose()

									targetData := llvm.NewTargetData(mod.DataLayout())

									defer targetData.Dispose()

									uintptrType := ctx.IntType(targetData.PointerSize() * 8)


									// Look at *all* functions to see whether they are free of function pointer

									// calls.

									// This takes less than 5ms for ~100kB of WebAssembly but would perhaps be

									// faster when written in C++ (to avoid the CGo overhead).

									funcsWithFPCall := map[llvm.Value]struct{}{}

									n := 0

									for fn := mod.FirstFunction(); !fn.IsNil(); fn = llvm.NextFunction(fn) {

										n++

										if _, ok := funcsWithFPCall[fn]; ok {

											continue // already found

										}

										done := false

										for bb := fn.FirstBasicBlock(); !bb.IsNil() && !done; bb = llvm.NextBasicBlock(bb) {

											for call := bb.FirstInstruction(); !call.IsNil() && !done; call = llvm.NextInstruction(call) {

												if call.IsACallInst().IsNil() {

													continue // only looking at calls

												}

												called := call.CalledValue()

												if !called.IsAFunction().IsNil() {

													continue // only looking for function pointers

												}

												funcsWithFPCall[fn] = struct{}{}

												markParentFunctions(funcsWithFPCall, fn)

												done = true

											}

										}

									}


									// Determine which functions need stack objects. Many leaf functions don't

									// need it: it only causes overhead for them.

									// Actually, in one test it was only able to eliminate stack object from 12%

									// of functions that had a call to runtime.trackPointer (8 out of 68

									// functions), so this optimization is not as big as it may seem.

									allocatingFunctions := map[llvm.Value]struct{}{} // set of allocating functions


									// Work from runtime.alloc and trace all parents to check which functions do

									// a heap allocation (and thus which functions do not).

									markParentFunctions(allocatingFunctions, alloc)


									// Also trace all functions that call a function pointer.

									for fn := range funcsWithFPCall {

										// Assume that functions that call a function pointer do a heap

										// allocation as a conservative guess because the called function might

										// do a heap allocation.

										allocatingFunctions[fn] = struct{}{}

										markParentFunctions(allocatingFunctions, fn)

									}


									// Collect some variables used below in the loop.

									stackChainStart := mod.NamedGlobal("runtime.stackChainStart")

									if stackChainStart.IsNil() {

										// This may be reached in a weird scenario where we call runtime.alloc but the garbage collector is unreachable.

										// This can be accomplished by allocating 0 bytes.

										// There is no point in tracking anything.

										for _, use := range getUses(trackPointer) {

											use.EraseFromParentAsInstruction()

										}

										return false

									}

									stackChainStart.SetLinkage(llvm.InternalLinkage)

									stackChainStartType := stackChainStart.Type().ElementType()

									stackChainStart.SetInitializer(llvm.ConstNull(stackChainStartType))


									// Iterate until runtime.trackPointer has no uses left.

									for use := trackPointer.FirstUse(); !use.IsNil(); use = trackPointer.FirstUse() {

										// Pick the first use of runtime.trackPointer.

										call := use.User()

										if call.IsACallInst().IsNil() {

											panic("expected runtime.trackPointer use to be a call")

										}


										// Pick the parent function.

										fn := call.InstructionParent().Parent()


										if _, ok := allocatingFunctions[fn]; !ok {

											// This function nor any of the functions it calls (recursively)

											// allocate anything from the heap, so it will not trigger a garbage

											// collection cycle. Thus, it does not need to track local pointer

											// values.

											// This is a useful optimization but not as big as you might guess,

											// as described above (it avoids stack objects for ~12% of

											// functions).

											call.EraseFromParentAsInstruction()

											continue

										}


										// Find all calls to runtime.trackPointer in this function.

										var calls []llvm.Value

										var returns []llvm.Value

										for bb := fn.FirstBasicBlock(); !bb.IsNil(); bb = llvm.NextBasicBlock(bb) {

											for inst := bb.FirstInstruction(); !inst.IsNil(); inst = llvm.NextInstruction(inst) {

												switch inst.InstructionOpcode() {

												case llvm.Call:

													if inst.CalledValue() == trackPointer {

														calls = append(calls, inst)

													}

												case llvm.Ret:

													returns = append(returns, inst)

												}

											}

										}


										// Determine what to do with each call.

										var allocas, pointers []llvm.Value

										for _, call := range calls {

											ptr := call.Operand(0)

											call.EraseFromParentAsInstruction()

											if ptr.IsAInstruction().IsNil() {

												continue

											}


											// Some trivial optimizations.

											if ptr.IsAInstruction().IsNil() {

												continue

											}

											switch ptr.InstructionOpcode() {

											case llvm.GetElementPtr:

												// Check for all zero offsets.

												// Sometimes LLVM rewrites bitcasts to zero-index GEPs, and we still need to track the GEP.

												n := ptr.OperandsCount()

												var hasOffset bool

												for i := 1; i < n; i++ {

													offset := ptr.Operand(i)

													if offset.IsAConstantInt().IsNil() || offset.ZExtValue() != 0 {

														hasOffset = true

														break

													}

												}


												if hasOffset {

													// These values do not create new values: the values already

													// existed locally in this function so must have been tracked

													// already.

													continue

												}

											case llvm.PHI:

												// While the value may have already been tracked, it may be overwritten in a loop.

												// Therefore, a second copy must be created to ensure that it is tracked over the entirety of its lifetime.

											case llvm.ExtractValue, llvm.BitCast:

												// These instructions do not create new values, but their

												// original value may not be tracked. So keep tracking them for

												// now.

												// With more analysis, it should be possible to optimize a

												// significant chunk of these away.

											case llvm.Call, llvm.Load, llvm.IntToPtr:

												// These create new values so must be stored locally. But

												// perhaps some of these can be fused when they actually refer

												// to the same value.

											default:

												// Ambiguous. These instructions are uncommon, but perhaps could

												// be optimized if needed.

											}


											if !ptr.IsAAllocaInst().IsNil() {

												if typeHasPointers(ptr.Type().ElementType()) {

													allocas = append(allocas, ptr)

												}

											} else {

												pointers = append(pointers, ptr)

											}

										}


										if len(allocas) == 0 && len(pointers) == 0 {

											// This function does not need to keep track of stack pointers.

											continue

										}


										// Determine the type of the required stack slot.

										fields := []llvm.Type{

											stackChainStartType, // Pointer to parent frame.

											uintptrType,         // Number of elements in this frame.

										}

										for _, alloca := range allocas {

											fields = append(fields, alloca.Type().ElementType())

										}

										for _, ptr := range pointers {

											fields = append(fields, ptr.Type())

										}

										stackObjectType := ctx.StructType(fields, false)


										// Create the stack object at the function entry.

										builder.SetInsertPointBefore(fn.EntryBasicBlock().FirstInstruction())

										stackObject := builder.CreateAlloca(stackObjectType, "gc.stackobject")

										initialStackObject := llvm.ConstNull(stackObjectType)

										numSlots := (targetData.TypeAllocSize(stackObjectType) - uint64(targetData.PointerSize())*2) / uint64(targetData.ABITypeAlignment(uintptrType))

										numSlotsValue := llvm.ConstInt(uintptrType, numSlots, false)

										initialStackObject = llvm.ConstInsertValue(initialStackObject, numSlotsValue, []uint32{1})

										builder.CreateStore(initialStackObject, stackObject)


										// Update stack start.

										parent := builder.CreateLoad(stackChainStart, "")

										gep := builder.CreateGEP(stackObject, []llvm.Value{

											llvm.ConstInt(ctx.Int32Type(), 0, false),

											llvm.ConstInt(ctx.Int32Type(), 0, false),

										}, "")

										builder.CreateStore(parent, gep)

										stackObjectCast := builder.CreateBitCast(stackObject, stackChainStartType, "")

										builder.CreateStore(stackObjectCast, stackChainStart)


										// Replace all independent allocas with GEPs in the stack object.

										for i, alloca := range allocas {

											gep := builder.CreateGEP(stackObject, []llvm.Value{

												llvm.ConstInt(ctx.Int32Type(), 0, false),

												llvm.ConstInt(ctx.Int32Type(), uint64(2+i), false),

											}, "")

											alloca.ReplaceAllUsesWith(gep)

											alloca.EraseFromParentAsInstruction()

										}


										// Do a store to the stack object after each new pointer that is created.

										pointerStores := make(map[llvm.Value]struct{})

										for i, ptr := range pointers {

											// Insert the store after the pointer value is created.

											insertionPoint := llvm.NextInstruction(ptr)

											for !insertionPoint.IsAPHINode().IsNil() {

												// PHI nodes are required to be at the start of the block.

												// Insert after the last PHI node.

												insertionPoint = llvm.NextInstruction(insertionPoint)

											}

											builder.SetInsertPointBefore(insertionPoint)


											// Extract a pointer to the appropriate section of the stack object.

											gep := builder.CreateGEP(stackObject, []llvm.Value{

												llvm.ConstInt(ctx.Int32Type(), 0, false),

												llvm.ConstInt(ctx.Int32Type(), uint64(2+len(allocas)+i), false),

											}, "")


											// Store the pointer into the stack slot.

											store := builder.CreateStore(ptr, gep)

											pointerStores[store] = struct{}{}

										}


										// Make sure this stack object is popped from the linked list of stack

										// objects at return.

										for _, ret := range returns {

											// Check for any tail calls at this return.

											prev := llvm.PrevInstruction(ret)

											if !prev.IsNil() && !prev.IsABitCastInst().IsNil() {

												// A bitcast can appear before a tail call, so skip backwards more.

												prev = llvm.PrevInstruction(prev)

											}

											if !prev.IsNil() && !prev.IsACallInst().IsNil() {

												// This is no longer a tail call.

												prev.SetTailCall(false)

											}

											builder.SetInsertPointBefore(ret)

											builder.CreateStore(parent, stackChainStart)

										}

									}


									return true

								}


								// markParentFunctions traverses all parent function calls (recursively) and

								// adds them to the set of marked functions. It only considers function calls:

								// any other uses of such a function is ignored.

								func markParentFunctions(marked map[llvm.Value]struct{}, fn llvm.Value) {

									worklist := []llvm.Value{fn}

									for len(worklist) != 0 {

										fn := worklist[len(worklist)-1]

										worklist = worklist[:len(worklist)-1]

										for _, use := range getUses(fn) {

											if use.IsACallInst().IsNil() || use.CalledValue() != fn {

												// Not the parent function.

												continue

											}

											parent := use.InstructionParent().Parent()

											if _, ok := marked[parent]; !ok {

												marked[parent] = struct{}{}

												worklist = append(worklist, parent)

											}

										}

									}

								}