From ef8c1a187d7758d09cdde3a63e7c5f3ab675b9db Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Wed, 8 Dec 2021 22:09:49 +0100 Subject: [PATCH] transform: allocate the correct amount of bytes in an alloca When I wrote the code originally, I didn't know about SetAlignment so I hacked a way around it by allocating [...]uintptr types. However, this allocates a few too many bytes in some cases. This commit changes this to only allocate the space that we actually need. The code size effect is mixed, but generally positive. The combined average is reduced by 0.27% with more programs being reduced in size than are increasing in size. --- transform/allocs.go | 25 +++++++++++++++++++++---- transform/testdata/allocs.out.ll | 24 ++++++++++++------------ 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/transform/allocs.go b/transform/allocs.go index 8bc81783..ef5467bc 100644 --- a/transform/allocs.go +++ b/transform/allocs.go @@ -95,19 +95,36 @@ func OptimizeAllocs(mod llvm.Module, printAllocs *regexp.Regexp, logger func(tok } // The pointer value does not escape. + // Determine the appropriate alignment of the alloca. The size of the + // allocation gives us a hint what the alignment should be. + var alignment int + if size%2 != 0 { + alignment = 1 + } else if size%4 != 0 { + alignment = 2 + } else if size%8 != 0 { + alignment = 4 + } else { + alignment = 8 + } + if pointerAlignment := targetData.ABITypeAlignment(i8ptrType); pointerAlignment < alignment { + // Use min(alignment, alignof(void*)) as the alignment. + alignment = pointerAlignment + } + // Insert alloca in the entry block. Do it here so that mem2reg can // promote it to a SSA value. fn := bitcast.InstructionParent().Parent() builder.SetInsertPointBefore(fn.EntryBasicBlock().FirstInstruction()) - alignment := targetData.ABITypeAlignment(i8ptrType) - sizeInWords := (size + uint64(alignment) - 1) / uint64(alignment) - allocaType := llvm.ArrayType(mod.Context().IntType(alignment*8), int(sizeInWords)) + allocaType := llvm.ArrayType(mod.Context().Int8Type(), int(size)) alloca := builder.CreateAlloca(allocaType, "stackalloc.alloca") + alloca.SetAlignment(alignment) // Zero the allocation inside the block where the value was originally allocated. zero := llvm.ConstNull(alloca.Type().ElementType()) builder.SetInsertPointBefore(bitcast) - builder.CreateStore(zero, alloca) + store := builder.CreateStore(zero, alloca) + store.SetAlignment(alignment) // Replace heap alloc bitcast with stack alloc bitcast. stackalloc := builder.CreateBitCast(alloca, bitcast.Type(), "stackalloc") diff --git a/transform/testdata/allocs.out.ll b/transform/testdata/allocs.out.ll index ec7c4c59..48f9b768 100644 --- a/transform/testdata/allocs.out.ll +++ b/transform/testdata/allocs.out.ll @@ -6,17 +6,17 @@ target triple = "armv7m-none-eabi" declare nonnull i8* @runtime.alloc(i32, i8*) define void @testInt() { - %stackalloc.alloca = alloca [1 x i32], align 4 - store [1 x i32] zeroinitializer, [1 x i32]* %stackalloc.alloca, align 4 - %stackalloc = bitcast [1 x i32]* %stackalloc.alloca to i32* + %stackalloc.alloca = alloca [4 x i8], align 4 + store [4 x i8] zeroinitializer, [4 x i8]* %stackalloc.alloca, align 4 + %stackalloc = bitcast [4 x i8]* %stackalloc.alloca to i32* store i32 5, i32* %stackalloc, align 4 ret void } define i16 @testArray() { - %stackalloc.alloca = alloca [2 x i32], align 4 - store [2 x i32] zeroinitializer, [2 x i32]* %stackalloc.alloca, align 4 - %stackalloc = bitcast [2 x i32]* %stackalloc.alloca to i16* + %stackalloc.alloca = alloca [6 x i8], align 2 + store [6 x i8] zeroinitializer, [6 x i8]* %stackalloc.alloca, align 2 + %stackalloc = bitcast [6 x i8]* %stackalloc.alloca to i16* %1 = getelementptr i16, i16* %stackalloc, i32 1 store i16 5, i16* %1, align 2 %2 = getelementptr i16, i16* %stackalloc, i32 2 @@ -39,9 +39,9 @@ define void @testEscapingCall2() { } define void @testNonEscapingCall() { - %stackalloc.alloca = alloca [1 x i32], align 4 - store [1 x i32] zeroinitializer, [1 x i32]* %stackalloc.alloca, align 4 - %stackalloc = bitcast [1 x i32]* %stackalloc.alloca to i32* + %stackalloc.alloca = alloca [4 x i8], align 4 + store [4 x i8] zeroinitializer, [4 x i8]* %stackalloc.alloca, align 4 + %stackalloc = bitcast [4 x i8]* %stackalloc.alloca to i32* %1 = call i32* @noescapeIntPtr(i32* %stackalloc) ret void } @@ -54,12 +54,12 @@ define i32* @testEscapingReturn() { define void @testNonEscapingLoop() { entry: - %stackalloc.alloca = alloca [1 x i32], align 4 + %stackalloc.alloca = alloca [4 x i8], align 4 br label %loop loop: ; preds = %loop, %entry - store [1 x i32] zeroinitializer, [1 x i32]* %stackalloc.alloca, align 4 - %stackalloc = bitcast [1 x i32]* %stackalloc.alloca to i32* + store [4 x i8] zeroinitializer, [4 x i8]* %stackalloc.alloca, align 4 + %stackalloc = bitcast [4 x i8]* %stackalloc.alloca to i32* %0 = call i32* @noescapeIntPtr(i32* %stackalloc) %1 = icmp eq i32* null, %0 br i1 %1, label %loop, label %end