diff --git a/transform/allocs.go b/transform/allocs.go index 8bc81783..ef5467bc 100644 --- a/transform/allocs.go +++ b/transform/allocs.go @@ -95,19 +95,36 @@ func OptimizeAllocs(mod llvm.Module, printAllocs *regexp.Regexp, logger func(tok } // The pointer value does not escape. + // Determine the appropriate alignment of the alloca. The size of the + // allocation gives us a hint what the alignment should be. + var alignment int + if size%2 != 0 { + alignment = 1 + } else if size%4 != 0 { + alignment = 2 + } else if size%8 != 0 { + alignment = 4 + } else { + alignment = 8 + } + if pointerAlignment := targetData.ABITypeAlignment(i8ptrType); pointerAlignment < alignment { + // Use min(alignment, alignof(void*)) as the alignment. + alignment = pointerAlignment + } + // Insert alloca in the entry block. Do it here so that mem2reg can // promote it to a SSA value. fn := bitcast.InstructionParent().Parent() builder.SetInsertPointBefore(fn.EntryBasicBlock().FirstInstruction()) - alignment := targetData.ABITypeAlignment(i8ptrType) - sizeInWords := (size + uint64(alignment) - 1) / uint64(alignment) - allocaType := llvm.ArrayType(mod.Context().IntType(alignment*8), int(sizeInWords)) + allocaType := llvm.ArrayType(mod.Context().Int8Type(), int(size)) alloca := builder.CreateAlloca(allocaType, "stackalloc.alloca") + alloca.SetAlignment(alignment) // Zero the allocation inside the block where the value was originally allocated. zero := llvm.ConstNull(alloca.Type().ElementType()) builder.SetInsertPointBefore(bitcast) - builder.CreateStore(zero, alloca) + store := builder.CreateStore(zero, alloca) + store.SetAlignment(alignment) // Replace heap alloc bitcast with stack alloc bitcast. stackalloc := builder.CreateBitCast(alloca, bitcast.Type(), "stackalloc") diff --git a/transform/testdata/allocs.out.ll b/transform/testdata/allocs.out.ll index ec7c4c59..48f9b768 100644 --- a/transform/testdata/allocs.out.ll +++ b/transform/testdata/allocs.out.ll @@ -6,17 +6,17 @@ target triple = "armv7m-none-eabi" declare nonnull i8* @runtime.alloc(i32, i8*) define void @testInt() { - %stackalloc.alloca = alloca [1 x i32], align 4 - store [1 x i32] zeroinitializer, [1 x i32]* %stackalloc.alloca, align 4 - %stackalloc = bitcast [1 x i32]* %stackalloc.alloca to i32* + %stackalloc.alloca = alloca [4 x i8], align 4 + store [4 x i8] zeroinitializer, [4 x i8]* %stackalloc.alloca, align 4 + %stackalloc = bitcast [4 x i8]* %stackalloc.alloca to i32* store i32 5, i32* %stackalloc, align 4 ret void } define i16 @testArray() { - %stackalloc.alloca = alloca [2 x i32], align 4 - store [2 x i32] zeroinitializer, [2 x i32]* %stackalloc.alloca, align 4 - %stackalloc = bitcast [2 x i32]* %stackalloc.alloca to i16* + %stackalloc.alloca = alloca [6 x i8], align 2 + store [6 x i8] zeroinitializer, [6 x i8]* %stackalloc.alloca, align 2 + %stackalloc = bitcast [6 x i8]* %stackalloc.alloca to i16* %1 = getelementptr i16, i16* %stackalloc, i32 1 store i16 5, i16* %1, align 2 %2 = getelementptr i16, i16* %stackalloc, i32 2 @@ -39,9 +39,9 @@ define void @testEscapingCall2() { } define void @testNonEscapingCall() { - %stackalloc.alloca = alloca [1 x i32], align 4 - store [1 x i32] zeroinitializer, [1 x i32]* %stackalloc.alloca, align 4 - %stackalloc = bitcast [1 x i32]* %stackalloc.alloca to i32* + %stackalloc.alloca = alloca [4 x i8], align 4 + store [4 x i8] zeroinitializer, [4 x i8]* %stackalloc.alloca, align 4 + %stackalloc = bitcast [4 x i8]* %stackalloc.alloca to i32* %1 = call i32* @noescapeIntPtr(i32* %stackalloc) ret void } @@ -54,12 +54,12 @@ define i32* @testEscapingReturn() { define void @testNonEscapingLoop() { entry: - %stackalloc.alloca = alloca [1 x i32], align 4 + %stackalloc.alloca = alloca [4 x i8], align 4 br label %loop loop: ; preds = %loop, %entry - store [1 x i32] zeroinitializer, [1 x i32]* %stackalloc.alloca, align 4 - %stackalloc = bitcast [1 x i32]* %stackalloc.alloca to i32* + store [4 x i8] zeroinitializer, [4 x i8]* %stackalloc.alloca, align 4 + %stackalloc = bitcast [4 x i8]* %stackalloc.alloca to i32* %0 = call i32* @noescapeIntPtr(i32* %stackalloc) %1 = icmp eq i32* null, %0 br i1 %1, label %loop, label %end