Browse Source

transform: allocate the correct amount of bytes in an alloca

When I wrote the code originally, I didn't know about SetAlignment so I
hacked a way around it by allocating [...]uintptr types. However, this
allocates a few too many bytes in some cases.
This commit changes this to only allocate the space that we actually
need.

The code size effect is mixed, but generally positive. The combined
average is reduced by 0.27% with more programs being reduced in size
than are increasing in size.
pull/2198/head
Ayke van Laethem 3 years ago
committed by Ron Evans
parent
commit
ef8c1a187d
  1. 25
      transform/allocs.go
  2. 24
      transform/testdata/allocs.out.ll

25
transform/allocs.go

@ -95,19 +95,36 @@ func OptimizeAllocs(mod llvm.Module, printAllocs *regexp.Regexp, logger func(tok
} }
// The pointer value does not escape. // The pointer value does not escape.
// Determine the appropriate alignment of the alloca. The size of the
// allocation gives us a hint what the alignment should be.
var alignment int
if size%2 != 0 {
alignment = 1
} else if size%4 != 0 {
alignment = 2
} else if size%8 != 0 {
alignment = 4
} else {
alignment = 8
}
if pointerAlignment := targetData.ABITypeAlignment(i8ptrType); pointerAlignment < alignment {
// Use min(alignment, alignof(void*)) as the alignment.
alignment = pointerAlignment
}
// Insert alloca in the entry block. Do it here so that mem2reg can // Insert alloca in the entry block. Do it here so that mem2reg can
// promote it to a SSA value. // promote it to a SSA value.
fn := bitcast.InstructionParent().Parent() fn := bitcast.InstructionParent().Parent()
builder.SetInsertPointBefore(fn.EntryBasicBlock().FirstInstruction()) builder.SetInsertPointBefore(fn.EntryBasicBlock().FirstInstruction())
alignment := targetData.ABITypeAlignment(i8ptrType) allocaType := llvm.ArrayType(mod.Context().Int8Type(), int(size))
sizeInWords := (size + uint64(alignment) - 1) / uint64(alignment)
allocaType := llvm.ArrayType(mod.Context().IntType(alignment*8), int(sizeInWords))
alloca := builder.CreateAlloca(allocaType, "stackalloc.alloca") alloca := builder.CreateAlloca(allocaType, "stackalloc.alloca")
alloca.SetAlignment(alignment)
// Zero the allocation inside the block where the value was originally allocated. // Zero the allocation inside the block where the value was originally allocated.
zero := llvm.ConstNull(alloca.Type().ElementType()) zero := llvm.ConstNull(alloca.Type().ElementType())
builder.SetInsertPointBefore(bitcast) builder.SetInsertPointBefore(bitcast)
builder.CreateStore(zero, alloca) store := builder.CreateStore(zero, alloca)
store.SetAlignment(alignment)
// Replace heap alloc bitcast with stack alloc bitcast. // Replace heap alloc bitcast with stack alloc bitcast.
stackalloc := builder.CreateBitCast(alloca, bitcast.Type(), "stackalloc") stackalloc := builder.CreateBitCast(alloca, bitcast.Type(), "stackalloc")

24
transform/testdata/allocs.out.ll

@ -6,17 +6,17 @@ target triple = "armv7m-none-eabi"
declare nonnull i8* @runtime.alloc(i32, i8*) declare nonnull i8* @runtime.alloc(i32, i8*)
define void @testInt() { define void @testInt() {
%stackalloc.alloca = alloca [1 x i32], align 4 %stackalloc.alloca = alloca [4 x i8], align 4
store [1 x i32] zeroinitializer, [1 x i32]* %stackalloc.alloca, align 4 store [4 x i8] zeroinitializer, [4 x i8]* %stackalloc.alloca, align 4
%stackalloc = bitcast [1 x i32]* %stackalloc.alloca to i32* %stackalloc = bitcast [4 x i8]* %stackalloc.alloca to i32*
store i32 5, i32* %stackalloc, align 4 store i32 5, i32* %stackalloc, align 4
ret void ret void
} }
define i16 @testArray() { define i16 @testArray() {
%stackalloc.alloca = alloca [2 x i32], align 4 %stackalloc.alloca = alloca [6 x i8], align 2
store [2 x i32] zeroinitializer, [2 x i32]* %stackalloc.alloca, align 4 store [6 x i8] zeroinitializer, [6 x i8]* %stackalloc.alloca, align 2
%stackalloc = bitcast [2 x i32]* %stackalloc.alloca to i16* %stackalloc = bitcast [6 x i8]* %stackalloc.alloca to i16*
%1 = getelementptr i16, i16* %stackalloc, i32 1 %1 = getelementptr i16, i16* %stackalloc, i32 1
store i16 5, i16* %1, align 2 store i16 5, i16* %1, align 2
%2 = getelementptr i16, i16* %stackalloc, i32 2 %2 = getelementptr i16, i16* %stackalloc, i32 2
@ -39,9 +39,9 @@ define void @testEscapingCall2() {
} }
define void @testNonEscapingCall() { define void @testNonEscapingCall() {
%stackalloc.alloca = alloca [1 x i32], align 4 %stackalloc.alloca = alloca [4 x i8], align 4
store [1 x i32] zeroinitializer, [1 x i32]* %stackalloc.alloca, align 4 store [4 x i8] zeroinitializer, [4 x i8]* %stackalloc.alloca, align 4
%stackalloc = bitcast [1 x i32]* %stackalloc.alloca to i32* %stackalloc = bitcast [4 x i8]* %stackalloc.alloca to i32*
%1 = call i32* @noescapeIntPtr(i32* %stackalloc) %1 = call i32* @noescapeIntPtr(i32* %stackalloc)
ret void ret void
} }
@ -54,12 +54,12 @@ define i32* @testEscapingReturn() {
define void @testNonEscapingLoop() { define void @testNonEscapingLoop() {
entry: entry:
%stackalloc.alloca = alloca [1 x i32], align 4 %stackalloc.alloca = alloca [4 x i8], align 4
br label %loop br label %loop
loop: ; preds = %loop, %entry loop: ; preds = %loop, %entry
store [1 x i32] zeroinitializer, [1 x i32]* %stackalloc.alloca, align 4 store [4 x i8] zeroinitializer, [4 x i8]* %stackalloc.alloca, align 4
%stackalloc = bitcast [1 x i32]* %stackalloc.alloca to i32* %stackalloc = bitcast [4 x i8]* %stackalloc.alloca to i32*
%0 = call i32* @noescapeIntPtr(i32* %stackalloc) %0 = call i32* @noescapeIntPtr(i32* %stackalloc)
%1 = icmp eq i32* null, %0 %1 = icmp eq i32* null, %0
br i1 %1, label %loop, label %end br i1 %1, label %loop, label %end

Loading…
Cancel
Save