Browse Source

interp: use object layout information for LLVM types

This commit will use the memory layout information for heap allocations
added in the previous commit to determine LLVM types, instead of
guessing their types based on the content. This fixes a bug in which
recursive data structures (such as doubly linked lists) would result in
a compiler stack overflow due to infinite recursion.

Not all heap allocations have a memory layout yet, but this can be
incrementally fixed in the future. So far, this commit should fix
(almost?) all cases of this stack overflow issue.
pull/2204/head
Ayke van Laethem 3 years ago
committed by Ron Evans
parent
commit
1869efe954
  1. 1
      interp/interp_test.go
  2. 10
      interp/interpreter.go
  3. 148
      interp/memory.go
  4. 53
      interp/testdata/alloc.ll
  5. 25
      interp/testdata/alloc.out.ll
  6. 45
      testdata/init.go

1
interp/interp_test.go

@ -17,6 +17,7 @@ func TestInterp(t *testing.T) {
"consteval",
"interface",
"revert",
"alloc",
} {
name := name // make tc local to this closure
t.Run(name, func(t *testing.T) {

10
interp/interpreter.go

@ -234,11 +234,15 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
// Get the requested memory size to be allocated.
size := operands[1].Uint()
// Get the object layout, if it is available.
llvmLayoutType := r.getLLVMTypeFromLayout(operands[2])
// Create the object.
alloc := object{
globalName: r.pkgName + "$alloc",
buffer: newRawValue(uint32(size)),
size: uint32(size),
globalName: r.pkgName + "$alloc",
llvmLayoutType: llvmLayoutType,
buffer: newRawValue(uint32(size)),
size: uint32(size),
}
index := len(r.objects)
r.objects = append(r.objects, alloc)

148
interp/memory.go

@ -18,6 +18,7 @@ import (
"encoding/binary"
"errors"
"math"
"math/big"
"strconv"
"strings"
@ -27,17 +28,20 @@ import (
// An object is a memory buffer that may be an already existing global or a
// global created with runtime.alloc or the alloca instruction. If llvmGlobal is
// set, that's the global for this object, otherwise it needs to be created (if
// it is still reachable when the package initializer returns).
// it is still reachable when the package initializer returns). The
// llvmLayoutType is not necessarily a complete type: it may need to be
// repeated (for example, for a slice value).
//
// Objects are copied in a memory view when they are stored to, to provide the
// ability to roll back interpreting a function.
type object struct {
llvmGlobal llvm.Value
llvmType llvm.Type // must match llvmGlobal.Type() if both are set, may be unset if llvmGlobal is set
globalName string // name, if not yet created (not guaranteed to be the final name)
buffer value // buffer with value as given by interp, nil if external
size uint32 // must match buffer.len(), if available
marked uint8 // 0 means unmarked, 1 means external read, 2 means external write
llvmGlobal llvm.Value
llvmType llvm.Type // must match llvmGlobal.Type() if both are set, may be unset if llvmGlobal is set
llvmLayoutType llvm.Type // LLVM type based on runtime.alloc layout parameter, if available
globalName string // name, if not yet created (not guaranteed to be the final name)
buffer value // buffer with value as given by interp, nil if external
size uint32 // must match buffer.len(), if available
marked uint8 // 0 means unmarked, 1 means external read, 2 means external write
}
// clone() returns a cloned version of this object, for when an object needs to
@ -541,7 +545,7 @@ func (v pointerValue) toLLVMValue(llvmType llvm.Type, mem *memoryView) (llvm.Val
// runtime.alloc.
// First allocate a new global for this object.
obj := mem.get(v.index())
if obj.llvmType.IsNil() {
if obj.llvmType.IsNil() && obj.llvmLayoutType.IsNil() {
// Create an initializer without knowing the global type.
// This is probably the result of a runtime.alloc call.
initializer, err := obj.buffer.asRawValue(mem.r).rawLLVMValue(mem)
@ -555,7 +559,23 @@ func (v pointerValue) toLLVMValue(llvmType llvm.Type, mem *memoryView) (llvm.Val
obj.llvmGlobal = llvmValue
mem.put(v.index(), obj)
} else {
globalType := obj.llvmType.ElementType()
// The global type is known, or at least its structure.
var globalType llvm.Type
if !obj.llvmType.IsNil() {
// The exact type is known.
globalType = obj.llvmType.ElementType()
} else { // !obj.llvmLayoutType.IsNil()
// The exact type isn't known, but the object layout is known.
globalType = obj.llvmLayoutType
// The layout may not span the full size of the global because
// of repetition. One example would be make([]string, 5) which
// would be 10 words in size but the layout would only be two
// words (for the string type).
typeSize := mem.r.targetData.TypeAllocSize(globalType)
if typeSize != uint64(obj.size) {
globalType = llvm.ArrayType(globalType, int(uint64(obj.size)/typeSize))
}
}
if checks && mem.r.targetData.TypeAllocSize(globalType) != uint64(obj.size) {
panic("size of the globalType isn't the same as the object size")
}
@ -574,6 +594,11 @@ func (v pointerValue) toLLVMValue(llvmType llvm.Type, mem *memoryView) (llvm.Val
return llvm.Value{}, errors.New("interp: allocated value does not match allocated type")
}
llvmValue.SetInitializer(initializer)
if obj.llvmType.IsNil() {
// The exact type isn't known (only the layout), so use the
// alignment that would normally be expected from runtime.alloc.
llvmValue.SetAlignment(mem.r.maxAlign)
}
}
// It should be included in r.globals because otherwise markExternal
@ -1155,3 +1180,108 @@ func (r *runner) getValue(llvmValue llvm.Value) value {
panic("unknown value")
}
}
// readObjectLayout reads the object layout as it is stored by the compiler. It
// returns the size in the number of words and the bitmap.
func (r *runner) readObjectLayout(layoutValue value) (uint64, *big.Int) {
pointerSize := layoutValue.len(r)
if checks && uint64(pointerSize) != r.targetData.TypeAllocSize(r.i8ptrType) {
panic("inconsistent pointer size")
}
// The object layout can be stored in a global variable, directly as an
// integer value, or can be nil.
ptr, err := layoutValue.asPointer(r)
if err == errIntegerAsPointer {
// It's an integer, which means it's a small object or unknown.
layout := layoutValue.Uint()
if layout == 0 {
// Nil pointer, which means the layout is unknown.
return 0, nil
}
if layout%2 != 1 {
// Sanity check: the least significant bit must be set. This is how
// the runtime can separate pointers from integers.
panic("unexpected layout")
}
// Determine format of bitfields in the integer.
pointerBits := uint64(pointerSize * 8)
var sizeFieldBits uint64
switch pointerBits {
case 16:
sizeFieldBits = 4
case 32:
sizeFieldBits = 5
case 64:
sizeFieldBits = 6
default:
panic("unknown pointer size")
}
// Extract fields.
objectSizeWords := (layout >> 1) & (1<<sizeFieldBits - 1)
bitmap := new(big.Int).SetUint64(layout >> (1 + sizeFieldBits))
return objectSizeWords, bitmap
}
// Read the object size in words and the bitmap from the global.
buf := r.objects[ptr.index()].buffer.(rawValue)
objectSizeWords := rawValue{buf: buf.buf[:r.pointerSize]}.Uint()
rawByteValues := buf.buf[r.pointerSize:]
rawBytes := make([]byte, len(rawByteValues))
for i, v := range rawByteValues {
if uint64(byte(v)) != v {
panic("found pointer in data array?") // sanity check
}
rawBytes[i] = byte(v)
}
bitmap := new(big.Int).SetBytes(rawBytes)
return objectSizeWords, bitmap
}
// getLLVMTypeFromLayout returns the 'layout type', which is an approximation of
// the real type. Pointers are in the correct location but the actual object may
// have some additional repetition, for example in the buffer of a slice.
func (r *runner) getLLVMTypeFromLayout(layoutValue value) llvm.Type {
objectSizeWords, bitmap := r.readObjectLayout(layoutValue)
if bitmap == nil {
// No information available.
return llvm.Type{}
}
if bitmap.BitLen() == 0 {
// There are no pointers in this object, so treat this as a raw byte
// buffer. This is important because objects without pointers may have
// lower alignment.
return r.mod.Context().Int8Type()
}
// Create the LLVM type.
pointerSize := layoutValue.len(r)
pointerAlignment := r.targetData.PrefTypeAlignment(r.i8ptrType)
var fields []llvm.Type
for i := 0; i < int(objectSizeWords); {
if bitmap.Bit(i) != 0 {
// Pointer field.
fields = append(fields, r.i8ptrType)
i += int(pointerSize / uint32(pointerAlignment))
} else {
// Byte/word field.
fields = append(fields, r.mod.Context().IntType(pointerAlignment*8))
i += 1
}
}
var llvmLayoutType llvm.Type
if len(fields) == 1 {
llvmLayoutType = fields[0]
} else {
llvmLayoutType = r.mod.Context().StructType(fields, false)
}
objectSizeBytes := objectSizeWords * uint64(pointerAlignment)
if checks && r.targetData.TypeAllocSize(llvmLayoutType) != objectSizeBytes {
panic("unexpected size") // sanity check
}
return llvmLayoutType
}

53
interp/testdata/alloc.ll

@ -0,0 +1,53 @@
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32--wasi"
@"runtime/gc.layout:62-2000000000000001" = linkonce_odr unnamed_addr constant { i32, [8 x i8] } { i32 62, [8 x i8] c" \00\00\00\00\00\00\01" }
@pointerFree12 = global i8* null
@pointerFree7 = global i8* null
@pointerFree3 = global i8* null
@pointerFree0 = global i8* null
@layout1 = global i8* null
@layout2 = global i8* null
@layout3 = global i8* null
@layout4 = global i8* null
@bigobj1 = global i8* null
declare i8* @runtime.alloc(i32, i8*) unnamed_addr
define void @runtime.initAll() unnamed_addr {
call void @main.init()
ret void
}
define internal void @main.init() unnamed_addr {
; Object that's word-aligned.
%pointerFree12 = call i8* @runtime.alloc(i32 12, i8* inttoptr (i32 3 to i8*))
store i8* %pointerFree12, i8** @pointerFree12
; Object larger than a word but not word-aligned.
%pointerFree7 = call i8* @runtime.alloc(i32 7, i8* inttoptr (i32 3 to i8*))
store i8* %pointerFree7, i8** @pointerFree7
; Object smaller than a word (and of course not word-aligned).
%pointerFree3 = call i8* @runtime.alloc(i32 3, i8* inttoptr (i32 3 to i8*))
store i8* %pointerFree3, i8** @pointerFree3
; Zero-sized object.
%pointerFree0 = call i8* @runtime.alloc(i32 0, i8* inttoptr (i32 3 to i8*))
store i8* %pointerFree0, i8** @pointerFree0
; Object made out of 3 pointers.
%layout1 = call i8* @runtime.alloc(i32 12, i8* inttoptr (i32 67 to i8*))
store i8* %layout1, i8** @layout1
; Array (or slice) of 5 slices.
%layout2 = call i8* @runtime.alloc(i32 60, i8* inttoptr (i32 71 to i8*))
store i8* %layout2, i8** @layout2
; Oddly shaped object, using all bits in the layout integer.
%layout3 = call i8* @runtime.alloc(i32 104, i8* inttoptr (i32 2467830261 to i8*))
store i8* %layout3, i8** @layout3
; ...repeated.
%layout4 = call i8* @runtime.alloc(i32 312, i8* inttoptr (i32 2467830261 to i8*))
store i8* %layout4, i8** @layout4
; Large object that needs to be stored in a separate global.
%bigobj1 = call i8* @runtime.alloc(i32 248, i8* bitcast ({ i32, [8 x i8] }* @"runtime/gc.layout:62-2000000000000001" to i8*))
store i8* %bigobj1, i8** @bigobj1
ret void
}

25
interp/testdata/alloc.out.ll

@ -0,0 +1,25 @@
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32--wasi"
@pointerFree12 = local_unnamed_addr global i8* getelementptr inbounds ([12 x i8], [12 x i8]* @"main$alloc", i32 0, i32 0)
@pointerFree7 = local_unnamed_addr global i8* getelementptr inbounds ([7 x i8], [7 x i8]* @"main$alloc.1", i32 0, i32 0)
@pointerFree3 = local_unnamed_addr global i8* getelementptr inbounds ([3 x i8], [3 x i8]* @"main$alloc.2", i32 0, i32 0)
@pointerFree0 = local_unnamed_addr global i8* getelementptr inbounds ([0 x i8], [0 x i8]* @"main$alloc.3", i32 0, i32 0)
@layout1 = local_unnamed_addr global i8* bitcast ([3 x i8*]* @"main$alloc.4" to i8*)
@layout2 = local_unnamed_addr global i8* bitcast ([5 x { i8*, i32, i32 }]* @"main$alloc.5" to i8*)
@layout3 = local_unnamed_addr global i8* bitcast ({ i8*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i8*, i8*, i32, i32, i8*, i32, i32, i8* }* @"main$alloc.6" to i8*)
@layout4 = local_unnamed_addr global i8* bitcast ([3 x { i8*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i8*, i8*, i32, i32, i8*, i32, i32, i8* }]* @"main$alloc.7" to i8*)
@bigobj1 = local_unnamed_addr global i8* bitcast ({ i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }* @"main$alloc.8" to i8*)
@"main$alloc" = internal global [12 x i8] zeroinitializer, align 4
@"main$alloc.1" = internal global [7 x i8] zeroinitializer, align 4
@"main$alloc.2" = internal global [3 x i8] zeroinitializer, align 4
@"main$alloc.3" = internal global [0 x i8] zeroinitializer, align 4
@"main$alloc.4" = internal global [3 x i8*] zeroinitializer, align 4
@"main$alloc.5" = internal global [5 x { i8*, i32, i32 }] zeroinitializer, align 4
@"main$alloc.6" = internal global { i8*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i8*, i8*, i32, i32, i8*, i32, i32, i8* } zeroinitializer, align 4
@"main$alloc.7" = internal global [3 x { i8*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i8*, i8*, i32, i32, i8*, i32, i32, i8* }] zeroinitializer, align 4
@"main$alloc.8" = internal global { i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* } zeroinitializer, align 4
define void @runtime.initAll() unnamed_addr {
ret void
}

45
testdata/init.go

@ -44,8 +44,53 @@ var (
uint8SliceDst []uint8
intSliceSrc = []int16{5, 123, 1024}
intSliceDst []int16
someList *linkedList
someBigList *bigLinkedList
)
type linkedList struct {
prev *linkedList
next *linkedList
v int // arbitrary value (don't care)
}
func init() {
someList = &linkedList{
v: -1,
}
for i := 0; i < 3; i++ {
prev := someList
someList = &linkedList{
v: i,
prev: prev,
}
prev.next = someList
}
}
type bigLinkedList struct {
prev *bigLinkedList
next *bigLinkedList
v int
buf [100]*int
}
func init() {
// Create a circular reference.
someBigList = &bigLinkedList{
v: -1,
}
for i := 0; i < 3; i++ {
prev := someBigList
someBigList = &bigLinkedList{
v: i,
prev: prev,
}
prev.next = someBigList
}
}
func init() {
uint8SliceDst = make([]uint8, len(uint8SliceSrc))
copy(uint8SliceDst, uint8SliceSrc)

Loading…
Cancel
Save