From e6caa3fe9ef0a68779953b7fc9c3cfce3c8158ca Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Tue, 25 Jun 2024 20:14:05 +0200 Subject: [PATCH] transform: fix incorrect alignment of heap-to-stack transform It assumed the maximum alignment was equal to sizeof(void*), which is definitely not the case. So this only worked more or less by accident previously. It now uses the alignment as specified by the frontend, or else `unsafe.Alignof(complex128)` which is typically the maximum alignment of a given platform (though this shouldn't really happen in practice: the optimizer should keep the 'align' attribute in place). --- go.mod | 2 +- go.sum | 4 ++-- transform/allocs.go | 31 ++++++++++++++----------------- transform/testdata/allocs.ll | 31 +++++++++++++++++++++++-------- transform/testdata/allocs.out.ll | 25 ++++++++++++++++++++++--- 5 files changed, 62 insertions(+), 31 deletions(-) diff --git a/go.mod b/go.mod index d17f62e3..b054e77b 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( golang.org/x/sys v0.21.0 golang.org/x/tools v0.22.1-0.20240621165957-db513b091504 gopkg.in/yaml.v2 v2.4.0 - tinygo.org/x/go-llvm v0.0.0-20240518103902-697964f2a9dc + tinygo.org/x/go-llvm v0.0.0-20240627184919-3b50c76783a8 ) require ( diff --git a/go.sum b/go.sum index c093ee07..8d6dc516 100644 --- a/go.sum +++ b/go.sum @@ -106,5 +106,5 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -tinygo.org/x/go-llvm v0.0.0-20240518103902-697964f2a9dc h1:TCzibFa4oLu+njEP3fnRUmZ+QQeb8BjtOwctgcjzL0k= -tinygo.org/x/go-llvm v0.0.0-20240518103902-697964f2a9dc/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0= +tinygo.org/x/go-llvm v0.0.0-20240627184919-3b50c76783a8 h1:bLsZXRUBavt++CJlMN7sppNziqu3LyamESLhFJcpqFQ= +tinygo.org/x/go-llvm v0.0.0-20240627184919-3b50c76783a8/go.mod h1:GFbusT2VTA4I+l4j80b17KFK+6whv69Wtny5U+T8RR0= diff --git a/transform/allocs.go b/transform/allocs.go index 67ca1ea4..870faa5b 100644 --- a/transform/allocs.go +++ b/transform/allocs.go @@ -29,10 +29,14 @@ func OptimizeAllocs(mod llvm.Module, printAllocs *regexp.Regexp, maxStackAlloc u targetData := llvm.NewTargetData(mod.DataLayout()) defer targetData.Dispose() - ptrType := llvm.PointerType(mod.Context().Int8Type(), 0) - builder := mod.Context().NewBuilder() + ctx := mod.Context() + builder := ctx.NewBuilder() defer builder.Dispose() + // Determine the maximum alignment on this platform. + complex128Type := ctx.StructType([]llvm.Type{ctx.DoubleType(), ctx.DoubleType()}, false) + maxAlign := int64(targetData.ABITypeAlignment(complex128Type)) + for _, heapalloc := range getUses(allocator) { logAllocs := printAllocs != nil && printAllocs.MatchString(heapalloc.InstructionParent().Parent().Name()) if heapalloc.Operand(0).IsAConstantInt().IsNil() { @@ -90,21 +94,14 @@ func OptimizeAllocs(mod llvm.Module, printAllocs *regexp.Regexp, maxStackAlloc u } // The pointer value does not escape. - // Determine the appropriate alignment of the alloca. The size of the - // allocation gives us a hint what the alignment should be. - var alignment int - if size%2 != 0 { - alignment = 1 - } else if size%4 != 0 { - alignment = 2 - } else if size%8 != 0 { - alignment = 4 - } else { - alignment = 8 - } - if pointerAlignment := targetData.ABITypeAlignment(ptrType); pointerAlignment < alignment { - // Use min(alignment, alignof(void*)) as the alignment. - alignment = pointerAlignment + // Determine the appropriate alignment of the alloca. + attr := heapalloc.GetCallSiteEnumAttribute(0, llvm.AttributeKindID("align")) + alignment := int(maxAlign) + if !attr.IsNil() { + // 'align' return value attribute is set, so use it. + // This is basically always the case, but to be sure we'll default + // to maxAlign if it isn't. + alignment = int(attr.GetEnumValue()) } // Insert alloca in the entry block. Do it here so that mem2reg can diff --git a/transform/testdata/allocs.ll b/transform/testdata/allocs.ll index 1c2fdd5a..4f6960ef 100644 --- a/transform/testdata/allocs.ll +++ b/transform/testdata/allocs.ll @@ -7,7 +7,7 @@ declare nonnull ptr @runtime.alloc(i32, ptr) ; Test allocating a single int (i32) that should be allocated on the stack. define void @testInt() { - %alloc = call ptr @runtime.alloc(i32 4, ptr null) + %alloc = call align 4 ptr @runtime.alloc(i32 4, ptr null) store i32 5, ptr %alloc ret void } @@ -15,7 +15,7 @@ define void @testInt() { ; Test allocating an array of 3 i16 values that should be allocated on the ; stack. define i16 @testArray() { - %alloc = call ptr @runtime.alloc(i32 6, ptr null) + %alloc = call align 2 ptr @runtime.alloc(i32 6, ptr null) %alloc.1 = getelementptr i16, ptr %alloc, i32 1 store i16 5, ptr %alloc.1 %alloc.2 = getelementptr i16, ptr %alloc, i32 2 @@ -23,30 +23,45 @@ define i16 @testArray() { ret i16 %val } +; Test allocating objects with an unknown alignment. +define void @testUnknownAlign() { + %alloc32 = call ptr @runtime.alloc(i32 32, ptr null) + store i8 5, ptr %alloc32 + %alloc24 = call ptr @runtime.alloc(i32 24, ptr null) + store i16 5, ptr %alloc24 + %alloc12 = call ptr @runtime.alloc(i32 12, ptr null) + store i16 5, ptr %alloc12 + %alloc6 = call ptr @runtime.alloc(i32 6, ptr null) + store i16 5, ptr %alloc6 + %alloc3 = call ptr @runtime.alloc(i32 3, ptr null) + store i16 5, ptr %alloc3 + ret void +} + ; Call a function that will let the pointer escape, so the heap-to-stack ; transform shouldn't be applied. define void @testEscapingCall() { - %alloc = call ptr @runtime.alloc(i32 4, ptr null) + %alloc = call align 4 ptr @runtime.alloc(i32 4, ptr null) %val = call ptr @escapeIntPtr(ptr %alloc) ret void } define void @testEscapingCall2() { - %alloc = call ptr @runtime.alloc(i32 4, ptr null) + %alloc = call align 4 ptr @runtime.alloc(i32 4, ptr null) %val = call ptr @escapeIntPtrSometimes(ptr %alloc, ptr %alloc) ret void } ; Call a function that doesn't let the pointer escape. define void @testNonEscapingCall() { - %alloc = call ptr @runtime.alloc(i32 4, ptr null) + %alloc = call align 4 ptr @runtime.alloc(i32 4, ptr null) %val = call ptr @noescapeIntPtr(ptr %alloc) ret void } ; Return the allocated value, which lets it escape. define ptr @testEscapingReturn() { - %alloc = call ptr @runtime.alloc(i32 4, ptr null) + %alloc = call align 4 ptr @runtime.alloc(i32 4, ptr null) ret ptr %alloc } @@ -55,7 +70,7 @@ define void @testNonEscapingLoop() { entry: br label %loop loop: - %alloc = call ptr @runtime.alloc(i32 4, ptr null) + %alloc = call align 4 ptr @runtime.alloc(i32 4, ptr null) %ptr = call ptr @noescapeIntPtr(ptr %alloc) %result = icmp eq ptr null, %ptr br i1 %result, label %loop, label %end @@ -65,7 +80,7 @@ end: ; Test a zero-sized allocation. define void @testZeroSizedAlloc() { - %alloc = call ptr @runtime.alloc(i32 0, ptr null) + %alloc = call align 1 ptr @runtime.alloc(i32 0, ptr null) %ptr = call ptr @noescapeIntPtr(ptr %alloc) ret void } diff --git a/transform/testdata/allocs.out.ll b/transform/testdata/allocs.out.ll index b3c8bf8f..e4a5e4f7 100644 --- a/transform/testdata/allocs.out.ll +++ b/transform/testdata/allocs.out.ll @@ -22,14 +22,33 @@ define i16 @testArray() { ret i16 %val } +define void @testUnknownAlign() { + %stackalloc4 = alloca [32 x i8], align 8 + %stackalloc3 = alloca [24 x i8], align 8 + %stackalloc2 = alloca [12 x i8], align 8 + %stackalloc1 = alloca [6 x i8], align 8 + %stackalloc = alloca [3 x i8], align 8 + store [32 x i8] zeroinitializer, ptr %stackalloc4, align 8 + store i8 5, ptr %stackalloc4, align 1 + store [24 x i8] zeroinitializer, ptr %stackalloc3, align 8 + store i16 5, ptr %stackalloc3, align 2 + store [12 x i8] zeroinitializer, ptr %stackalloc2, align 8 + store i16 5, ptr %stackalloc2, align 2 + store [6 x i8] zeroinitializer, ptr %stackalloc1, align 8 + store i16 5, ptr %stackalloc1, align 2 + store [3 x i8] zeroinitializer, ptr %stackalloc, align 8 + store i16 5, ptr %stackalloc, align 2 + ret void +} + define void @testEscapingCall() { - %alloc = call ptr @runtime.alloc(i32 4, ptr null) + %alloc = call align 4 ptr @runtime.alloc(i32 4, ptr null) %val = call ptr @escapeIntPtr(ptr %alloc) ret void } define void @testEscapingCall2() { - %alloc = call ptr @runtime.alloc(i32 4, ptr null) + %alloc = call align 4 ptr @runtime.alloc(i32 4, ptr null) %val = call ptr @escapeIntPtrSometimes(ptr %alloc, ptr %alloc) ret void } @@ -42,7 +61,7 @@ define void @testNonEscapingCall() { } define ptr @testEscapingReturn() { - %alloc = call ptr @runtime.alloc(i32 4, ptr null) + %alloc = call align 4 ptr @runtime.alloc(i32 4, ptr null) ret ptr %alloc }