tinygo/builder/sizes.go

package builder

import (
	"bytes"
	"debug/dwarf"
	"debug/elf"
	"debug/macho"
	"debug/pe"
	"encoding/binary"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"regexp"
	"sort"
	"strings"

	"github.com/aykevl/go-wasm"
	"github.com/tinygo-org/tinygo/goenv"
)

// Set to true to print extra debug logs.
const sizesDebug = false

// programSize contains size statistics per package of a compiled program.
type programSize struct {
	Packages map[string]packageSize
	Code     uint64
	ROData   uint64
	Data     uint64
	BSS      uint64
}

// sortedPackageNames returns the list of package names (ProgramSize.Packages)
// sorted alphabetically.
func (ps *programSize) sortedPackageNames() []string {
	names := make([]string, 0, len(ps.Packages))
	for name := range ps.Packages {
		names = append(names, name)
	}
	sort.Strings(names)
	return names
}

// Flash usage in regular microcontrollers.
func (ps *programSize) Flash() uint64 {
	return ps.Code + ps.ROData + ps.Data
}

// Static RAM usage in regular microcontrollers.
func (ps *programSize) RAM() uint64 {
	return ps.Data + ps.BSS
}

// packageSize contains the size of a package, calculated from the linked object
// file.
type packageSize struct {
	Code   uint64
	ROData uint64
	Data   uint64
	BSS    uint64
}

// Flash usage in regular microcontrollers.
func (ps *packageSize) Flash() uint64 {
	return ps.Code + ps.ROData + ps.Data
}

// Static RAM usage in regular microcontrollers.
func (ps *packageSize) RAM() uint64 {
	return ps.Data + ps.BSS
}

// A mapping of a single chunk of code or data to a file path.
type addressLine struct {
	Address    uint64
	Length     uint64 // length of this chunk
	Align      uint64 // (maximum) alignment of this line
	File       string // file path as stored in DWARF
	IsVariable bool   // true if this is a variable (or constant), false if it is code
}

// Sections defined in the input file. This struct defines them in a
// filetype-agnostic way but roughly follow the ELF types (.text, .data, .bss,
// etc).
type memorySection struct {
	Type    memoryType
	Address uint64
	Size    uint64
	Align   uint64
}

type memoryType int

const (
	memoryCode memoryType = iota + 1
	memoryData
	memoryROData
	memoryBSS
	memoryStack
)

func (t memoryType) String() string {
	return [...]string{
		0:            "-",
		memoryCode:   "code",
		memoryData:   "data",
		memoryROData: "rodata",
		memoryBSS:    "bss",
		memoryStack:  "stack",
	}[t]
}

// Regular expressions to match particular symbol names. These are not stored as
// DWARF variables because they have no mapping to source code global variables.
var (
	// Various globals that aren't a variable but nonetheless need to be stored
	// somewhere:
	//   alloc:  heap allocations during init interpretation
	//   pack:   data created when storing a constant in an interface for example
	//   string: buffer behind strings
	packageSymbolRegexp = regexp.MustCompile(`\$(alloc|pack|string)(\.[0-9]+)?$`)
)

// readProgramSizeFromDWARF reads the source location for each line of code and
// each variable in the program, as far as this is stored in the DWARF debug
// information.
func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset, codeAlignment uint64, skipTombstone bool) ([]addressLine, error) {
	r := data.Reader()
	var lines []*dwarf.LineFile
	var addresses []addressLine
	for {
		e, err := r.Next()
		if err != nil {
			return nil, err
		}
		if e == nil {
			break
		}
		switch e.Tag {
		case dwarf.TagCompileUnit:
			// Found a compile unit.
			// We can read the .debug_line section using it, which contains a
			// mapping for most instructions to their file/line/column - even
			// for inlined functions!
			lr, err := data.LineReader(e)
			if err != nil {
				return nil, err
			}
			lines = lr.Files()
			var lineEntry = dwarf.LineEntry{
				EndSequence: true,
			}

			// Line tables are organized as sequences of line entries until an
			// end sequence. A single line table can contain multiple such
			// sequences. The last line entry is an EndSequence to indicate the
			// end.
			for {
				// Read the next .debug_line entry.
				prevLineEntry := lineEntry
				err := lr.Next(&lineEntry)
				if err != nil {
					if err == io.EOF {
						break
					}
					return nil, err
				}

				if prevLineEntry.EndSequence && lineEntry.Address == 0 && skipTombstone {
					// Tombstone value. This symbol has been removed, for
					// example by the --gc-sections linker flag. It is still
					// here in the debug information because the linker can't
					// just remove this reference.
					// Read until the next EndSequence so that this sequence is
					// skipped.
					// For more details, see (among others):
					// https://reviews.llvm.org/D84825
					// The value 0 can however really occur in object files,
					// that typically start at address 0. So don't skip
					// tombstone values in object files (like when parsing MachO
					// files).
					for {
						err := lr.Next(&lineEntry)
						if err != nil {
							return nil, err
						}
						if lineEntry.EndSequence {
							break
						}
					}
				}

				if !prevLineEntry.EndSequence {
					// The chunk describes the code from prevLineEntry to
					// lineEntry.
					line := addressLine{
						Address: prevLineEntry.Address + codeOffset,
						Length:  lineEntry.Address - prevLineEntry.Address,
						Align:   codeAlignment,
						File:    prevLineEntry.File.Name,
					}
					if line.Length != 0 {
						addresses = append(addresses, line)
					}
				}
			}
		case dwarf.TagVariable:
			// Global variable (or constant). Most of these are not actually
			// stored in the binary, because they have been optimized out. Only
			// the ones with a location are still present.
			r.SkipChildren()

			file := e.AttrField(dwarf.AttrDeclFile)
			location := e.AttrField(dwarf.AttrLocation)
			globalType := e.AttrField(dwarf.AttrType)
			if file == nil || location == nil || globalType == nil {
				// Doesn't contain the requested information.
				continue
			}

			// Try to parse the location. While this could in theory be a very
			// complex expression, usually it's just a DW_OP_addr opcode
			// followed by an address.
			addr, err := readDWARFConstant(r.AddressSize(), location.Val.([]uint8))
			if err != nil {
				continue // ignore the error, we don't know what to do with it
			}

			// Parse the type of the global variable, which (importantly)
			// contains the variable size. We're not interested in the type,
			// only in the size.
			typ, err := data.Type(globalType.Val.(dwarf.Offset))
			if err != nil {
				return nil, err
			}

			// Read alignment, if it's stored as part of the debug information.
			var alignment uint64
			if attr := e.AttrField(dwarf.AttrAlignment); attr != nil {
				alignment = uint64(attr.Val.(int64))
			}

			addresses = append(addresses, addressLine{
				Address:    addr,
				Length:     uint64(typ.Size()),
				Align:      alignment,
				File:       lines[file.Val.(int64)].Name,
				IsVariable: true,
			})
		default:
			r.SkipChildren()
		}
	}
	return addresses, nil
}

// Parse a DWARF constant. For addresses, this is usually a very simple
// expression.
func readDWARFConstant(addressSize int, bytecode []byte) (uint64, error) {
	var addr uint64
	for len(bytecode) != 0 {
		op := bytecode[0]
		bytecode = bytecode[1:]
		switch op {
		case 0x03: // DW_OP_addr
			switch addressSize {
			case 2:
				addr = uint64(binary.LittleEndian.Uint16(bytecode))
			case 4:
				addr = uint64(binary.LittleEndian.Uint32(bytecode))
			case 8:
				addr = binary.LittleEndian.Uint64(bytecode)
			default:
				panic("unexpected address size")
			}
			bytecode = bytecode[addressSize:]
		case 0x23: // DW_OP_plus_uconst
			offset, n := readULEB128(bytecode)
			addr += offset
			bytecode = bytecode[n:]
		default:
			return 0, fmt.Errorf("unknown DWARF opcode: 0x%x", op)
		}
	}
	return addr, nil
}

// Source: https://en.wikipedia.org/wiki/LEB128#Decode_unsigned_integer
func readULEB128(buf []byte) (result uint64, n int) {
	var shift uint8
	for {
		b := buf[n]
		n++
		result |= uint64(b&0x7f) << shift
		if b&0x80 == 0 {
			break
		}
		shift += 7
	}
	return
}

// Read a MachO object file and return a line table.
// Also return an index from symbol name to start address in the line table.
func readMachOSymbolAddresses(path string) (map[string]int, []addressLine, error) {
	// Some constants from mach-o/nlist.h
	// See: https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/nlist.h.auto.html
	const (
		N_STAB = 0xe0
		N_TYPE = 0x0e // bitmask for N_TYPE field
		N_SECT = 0xe  // one of the possible type in the N_TYPE field
	)

	// Read DWARF from the given object file.
	file, err := macho.Open(path)
	if err != nil {
		return nil, nil, err
	}
	defer file.Close()
	dwarf, err := file.DWARF()
	if err != nil {
		return nil, nil, err
	}
	lines, err := readProgramSizeFromDWARF(dwarf, 0, 0, false)
	if err != nil {
		return nil, nil, err
	}

	// Make a map from start addresses to indices in the line table (because the
	// line table is a slice, not a map).
	addressToLine := make(map[uint64]int, len(lines))
	for i, line := range lines {
		if _, ok := addressToLine[line.Address]; ok {
			addressToLine[line.Address] = -1
			continue
		}
		addressToLine[line.Address] = i
	}

	// Make a map that for each symbol gives the start index in the line table.
	addresses := make(map[string]int, len(addressToLine))
	for _, symbol := range file.Symtab.Syms {
		if symbol.Type&N_STAB != 0 {
			continue // STABS entry, ignore
		}
		if symbol.Type&0x0e != N_SECT {
			continue // undefined symbol
		}
		if index, ok := addressToLine[symbol.Value]; ok && index >= 0 {
			if _, ok := addresses[symbol.Name]; ok {
				// There is a duplicate. Mark it as unavailable.
				addresses[symbol.Name] = -1
				continue
			}
			addresses[symbol.Name] = index
		}
	}

	return addresses, lines, nil
}

// loadProgramSize calculate a program/data size breakdown of each package for a
// given ELF file.
// If the file doesn't contain DWARF debug information, the returned program
// size will still have valid summaries but won't have complete size information
// per package.
func loadProgramSize(path string, packagePathMap map[string]string) (*programSize, error) {
	// Open the binary file.
	f, err := os.Open(path)
	if err != nil {
		return nil, err
	}
	defer f.Close()

	// This stores all chunks of addresses found in the binary.
	var addresses []addressLine

	// Load the binary file, which could be in a number of file formats.
	var sections []memorySection
	if file, err := elf.NewFile(f); err == nil {
		var codeAlignment uint64
		switch file.Machine {
		case elf.EM_ARM:
			codeAlignment = 4 // usually 2, but can be 4
		}
		// Read DWARF information. The error is intentionally ignored.
		data, _ := file.DWARF()
		if data != nil {
			addresses, err = readProgramSizeFromDWARF(data, 0, codeAlignment, true)
			if err != nil {
				// However, _do_ report an error here. Something must have gone
				// wrong while trying to parse DWARF data.
				return nil, err
			}
		}

		// Read the ELF symbols for some more chunks of location information.
		// Some globals (such as strings) aren't stored in the DWARF debug
		// information and therefore need to be obtained in a different way.
		allSymbols, err := file.Symbols()
		if err != nil {
			return nil, err
		}
		for _, symbol := range allSymbols {
			symType := elf.ST_TYPE(symbol.Info)
			if symbol.Size == 0 {
				continue
			}
			if symType != elf.STT_FUNC && symType != elf.STT_OBJECT && symType != elf.STT_NOTYPE {
				continue
			}
			if symbol.Section >= elf.SHN_LORESERVE {
				// Not a regular section, so skip it.
				// One example is elf.SHN_ABS, which is used for symbols
				// declared with an absolute value such as the memset function
				// on the ESP32 which is defined in the mask ROM.
				continue
			}
			section := file.Sections[symbol.Section]
			if section.Flags&elf.SHF_ALLOC == 0 {
				continue
			}
			if packageSymbolRegexp.MatchString(symbol.Name) || symbol.Name == "__isr_vector" {
				addresses = append(addresses, addressLine{
					Address:    symbol.Value,
					Length:     symbol.Size,
					File:       symbol.Name,
					IsVariable: true,
				})
			}
		}

		// Load allocated sections.
		for _, section := range file.Sections {
			if section.Flags&elf.SHF_ALLOC == 0 {
				continue
			}
			if section.Type == elf.SHT_NOBITS {
				if section.Name == ".stack" {
					// TinyGo emits stack sections on microcontroller using the
					// ".stack" name.
					// This is a bit ugly, but I don't think there is a way to
					// mark the stack section in a linker script.
					sections = append(sections, memorySection{
						Address: section.Addr,
						Size:    section.Size,
						Align:   section.Addralign,
						Type:    memoryStack,
					})
				} else {
					// Regular .bss section.
					sections = append(sections, memorySection{
						Address: section.Addr,
						Size:    section.Size,
						Align:   section.Addralign,
						Type:    memoryBSS,
					})
				}
			} else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_EXECINSTR != 0 {
				// .text
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    section.Size,
					Align:   section.Addralign,
					Type:    memoryCode,
				})
			} else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_WRITE != 0 {
				// .data
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    section.Size,
					Align:   section.Addralign,
					Type:    memoryData,
				})
			} else if section.Type == elf.SHT_PROGBITS {
				// .rodata
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    section.Size,
					Align:   section.Addralign,
					Type:    memoryROData,
				})
			}
		}
	} else if file, err := macho.NewFile(f); err == nil {
		// Read segments, for use while reading through sections.
		segments := map[string]*macho.Segment{}
		for _, load := range file.Loads {
			switch load := load.(type) {
			case *macho.Segment:
				segments[load.Name] = load
			}
		}

		// Read MachO sections.
		for _, section := range file.Sections {
			sectionType := section.Flags & 0xff
			sectionFlags := section.Flags >> 8
			segment := segments[section.Seg]
			// For the constants used here, see:
			// https://github.com/llvm/llvm-project/blob/release/14.x/llvm/include/llvm/BinaryFormat/MachO.h
			if sectionFlags&0x800000 != 0 { // S_ATTR_PURE_INSTRUCTIONS
				// Section containing only instructions.
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    uint64(section.Size),
					Align:   uint64(section.Align),
					Type:    memoryCode,
				})
			} else if sectionType == 1 { // S_ZEROFILL
				// Section filled with zeroes on demand.
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    uint64(section.Size),
					Align:   uint64(section.Align),
					Type:    memoryBSS,
				})
			} else if segment.Maxprot&0b011 == 0b001 { // --r (read-only data)
				// Protection doesn't allow writes, so mark this section read-only.
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    uint64(section.Size),
					Align:   uint64(section.Align),
					Type:    memoryROData,
				})
			} else {
				// The rest is assumed to be regular data.
				sections = append(sections, memorySection{
					Address: section.Addr,
					Size:    uint64(section.Size),
					Align:   uint64(section.Align),
					Type:    memoryData,
				})
			}
		}

		// Read DWARF information.
		// The data isn't stored directly in the binary as in most executable
		// formats. Instead, it is left in the object files that were used as a
		// basis for linking. The executable does however contain STABS debug
		// information that points to the source object file and is used by
		// debuggers.
		// For more information:
		// http://wiki.dwarfstd.org/index.php?title=Apple%27s_%22Lazy%22_DWARF_Scheme
		var objSymbolNames map[string]int
		var objAddresses []addressLine
		var previousSymbol macho.Symbol
		for _, symbol := range file.Symtab.Syms {
			// STABS constants, from mach-o/stab.h:
			// https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/stab.h.auto.html
			const (
				N_GSYM  = 0x20
				N_FUN   = 0x24
				N_STSYM = 0x26
				N_SO    = 0x64
				N_OSO   = 0x66
			)
			if symbol.Type == N_OSO {
				// Found an object file. Now try to parse it.
				objSymbolNames, objAddresses, err = readMachOSymbolAddresses(symbol.Name)
				if err != nil && sizesDebug {
					// Errors are normally ignored. If there is an error, it's
					// simply treated as that the DWARF is not available.
					fmt.Fprintf(os.Stderr, "could not read DWARF from file %s: %s\n", symbol.Name, err)
				}
			} else if symbol.Type == N_FUN {
				// Found a function.
				// The way this is encoded is a bit weird. MachO symbols don't
				// have a length. What I've found is that the length is encoded
				// by first having a N_FUN symbol as usual, and then having a
				// symbol with a zero-length name that has the value not set to
				// the address of the symbol but to the length. So in order to
				// get both the address and the length, we look for a symbol
				// with a name followed by a symbol without a name.
				if symbol.Name == "" && previousSymbol.Type == N_FUN && previousSymbol.Name != "" {
					// Functions are encoded as many small chunks in the line
					// table (one or a few instructions per source line). But
					// the symbol length covers the whole symbols, over many
					// lines and possibly including inlined functions. So we
					// continue to iterate through the objAddresses slice until
					// we've found all the source lines that are part of this
					// symbol.
					address := previousSymbol.Value
					length := symbol.Value
					if index, ok := objSymbolNames[previousSymbol.Name]; ok && index >= 0 {
						for length > 0 {
							line := objAddresses[index]
							line.Address = address
							if line.Length > length {
								// Line extends beyond the end of te symbol?
								// Weird, shouldn't happen.
								break
							}
							addresses = append(addresses, line)
							index++
							length -= line.Length
							address += line.Length
						}
					}
				}
			} else if symbol.Type == N_GSYM || symbol.Type == N_STSYM {
				// Global variables.
				if index, ok := objSymbolNames[symbol.Name]; ok {
					address := objAddresses[index]
					address.Address = symbol.Value
					addresses = append(addresses, address)
				}
			}
			previousSymbol = symbol
		}
	} else if file, err := pe.NewFile(f); err == nil {
		// Read DWARF information. The error is intentionally ignored.
		data, _ := file.DWARF()
		if data != nil {
			addresses, err = readProgramSizeFromDWARF(data, 0, 0, true)
			if err != nil {
				// However, _do_ report an error here. Something must have gone
				// wrong while trying to parse DWARF data.
				return nil, err
			}
		}

		// Read COFF sections.
		optionalHeader := file.OptionalHeader.(*pe.OptionalHeader64)
		for _, section := range file.Sections {
			// For more information:
			// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_section_header
			const (
				IMAGE_SCN_CNT_CODE             = 0x00000020
				IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040
				IMAGE_SCN_MEM_DISCARDABLE      = 0x02000000
				IMAGE_SCN_MEM_READ             = 0x40000000
				IMAGE_SCN_MEM_WRITE            = 0x80000000
			)
			if section.Characteristics&IMAGE_SCN_MEM_DISCARDABLE != 0 {
				// Debug sections, etc.
				continue
			}
			address := uint64(section.VirtualAddress) + optionalHeader.ImageBase
			if section.Characteristics&IMAGE_SCN_CNT_CODE != 0 {
				// .text
				sections = append(sections, memorySection{
					Address: address,
					Size:    uint64(section.VirtualSize),
					Type:    memoryCode,
				})
			} else if section.Characteristics&IMAGE_SCN_CNT_INITIALIZED_DATA != 0 {
				if section.Characteristics&IMAGE_SCN_MEM_WRITE != 0 {
					// .data
					sections = append(sections, memorySection{
						Address: address,
						Size:    uint64(section.Size),
						Type:    memoryData,
					})
					if section.Size < section.VirtualSize {
						// Equivalent of a .bss section.
						// Note: because of how the PE/COFF format is
						// structured, not all zero-initialized data is marked
						// as such. A portion may be at the end of the .data
						// section and is thus marked as initialized data.
						sections = append(sections, memorySection{
							Address: address + uint64(section.Size),
							Size:    uint64(section.VirtualSize) - uint64(section.Size),
							Type:    memoryBSS,
						})
					}
				} else if section.Characteristics&IMAGE_SCN_MEM_READ != 0 {
					// .rdata, .buildid, .pdata
					sections = append(sections, memorySection{
						Address: address,
						Size:    uint64(section.VirtualSize),
						Type:    memoryROData,
					})
				}
			}
		}
	} else if file, err := wasm.Parse(f); err == nil {
		// File is in WebAssembly format.

		// Put code at a very high address, so that it won't conflict with the
		// data in the memory section.
		const codeOffset = 0x8000_0000_0000_0000

		// Read DWARF information. The error is intentionally ignored.
		data, _ := file.DWARF()
		if data != nil {
			addresses, err = readProgramSizeFromDWARF(data, codeOffset, 0, true)
			if err != nil {
				// However, _do_ report an error here. Something must have gone
				// wrong while trying to parse DWARF data.
				return nil, err
			}
		}

		var linearMemorySize uint64
		for _, section := range file.Sections {
			switch section := section.(type) {
			case *wasm.SectionCode:
				sections = append(sections, memorySection{
					Address: codeOffset,
					Size:    uint64(section.Size()),
					Type:    memoryCode,
				})
			case *wasm.SectionMemory:
				// This value is used when processing *wasm.SectionData (which
				// always comes after *wasm.SectionMemory).
				linearMemorySize = uint64(section.Entries[0].Limits.Initial) * 64 * 1024
			case *wasm.SectionData:
				// Data sections contain initial values for linear memory.
				// First load the list of data sections, and sort them by
				// address for easier processing.
				var dataSections []memorySection
				for _, entry := range section.Entries {
					address, err := wasm.Eval(bytes.NewBuffer(entry.Offset))
					if err != nil {
						return nil, fmt.Errorf("could not parse data section address: %w", err)
					}
					dataSections = append(dataSections, memorySection{
						Address: uint64(address[0].(int32)),
						Size:    uint64(len(entry.Data)),
						Type:    memoryData,
					})
				}
				sort.Slice(dataSections, func(i, j int) bool {
					return dataSections[i].Address < dataSections[j].Address
				})

				// And now add all data sections for linear memory.
				// Parts that are in the slice of data sections are added as
				// memoryData, and parts that are not are added as memoryBSS.
				addr := uint64(0)
				for _, section := range dataSections {
					if addr < section.Address {
						sections = append(sections, memorySection{
							Address: addr,
							Size:    section.Address - addr,
							Type:    memoryBSS,
						})
					}
					if addr > section.Address {
						// This might be allowed, I'm not sure.
						// It certainly doesn't make a lot of sense.
						return nil, fmt.Errorf("overlapping data section")
					}
					// addr == section.Address
					sections = append(sections, section)
					addr = section.Address + section.Size
				}
				if addr < linearMemorySize {
					sections = append(sections, memorySection{
						Address: addr,
						Size:    linearMemorySize - addr,
						Type:    memoryBSS,
					})
				}
			}
		}
	} else {
		return nil, fmt.Errorf("could not parse file: %w", err)
	}

	// Sort the slice of address chunks by address, so that we can iterate
	// through it to calculate section sizes.
	sort.Slice(addresses, func(i, j int) bool {
		if addresses[i].Address == addresses[j].Address {
			// Very rarely, there might be duplicate addresses.
			// If that happens, sort the largest chunks first.
			return addresses[i].Length > addresses[j].Length
		}
		return addresses[i].Address < addresses[j].Address
	})

	// Now finally determine the binary/RAM size usage per package by going
	// through each allocated section.
	sizes := make(map[string]packageSize)
	for _, section := range sections {
		switch section.Type {
		case memoryCode:
			readSection(section, addresses, func(path string, size uint64, isVariable bool) {
				field := sizes[path]
				if isVariable {
					field.ROData += size
				} else {
					field.Code += size
				}
				sizes[path] = field
			}, packagePathMap)
		case memoryROData:
			readSection(section, addresses, func(path string, size uint64, isVariable bool) {
				field := sizes[path]
				field.ROData += size
				sizes[path] = field
			}, packagePathMap)
		case memoryData:
			readSection(section, addresses, func(path string, size uint64, isVariable bool) {
				field := sizes[path]
				field.Data += size
				sizes[path] = field
			}, packagePathMap)
		case memoryBSS:
			readSection(section, addresses, func(path string, size uint64, isVariable bool) {
				field := sizes[path]
				field.BSS += size
				sizes[path] = field
			}, packagePathMap)
		case memoryStack:
			// We store the C stack as a pseudo-package.
			sizes["C stack"] = packageSize{
				BSS: section.Size,
			}
		}
	}

	// ...and summarize the results.
	program := &programSize{
		Packages: sizes,
	}
	for _, pkg := range sizes {
		program.Code += pkg.Code
		program.ROData += pkg.ROData
		program.Data += pkg.Data
		program.BSS += pkg.BSS
	}
	return program, nil
}

// readSection determines for each byte in this section to which package it
// belongs. It reports this usage through the addSize callback.
func readSection(section memorySection, addresses []addressLine, addSize func(string, uint64, bool), packagePathMap map[string]string) {
	// The addr variable tracks at which address we are while going through this
	// section. We start at the beginning.
	addr := section.Address
	sectionEnd := section.Address + section.Size
	if sizesDebug {
		fmt.Printf("%08x..%08x %5d: %s\n", addr, sectionEnd, section.Size, section.Type)
	}
	for _, line := range addresses {
		if line.Address < section.Address || line.Address+line.Length > sectionEnd {
			// Check that this line is entirely within the section.
			// Don't bother dealing with line entries that cross sections (that
			// seems rather unlikely anyway).
			continue
		}
		if addr < line.Address {
			// There is a gap: there is a space between the current and the
			// previous line entry.
			// Check whether this is caused by alignment requirements.
			addrAligned := (addr + line.Align - 1) &^ (line.Align - 1)
			if line.Align > 1 && addrAligned >= line.Address {
				// It is, assume that's what causes the gap.
				addSize("(padding)", line.Address-addr, true)
			} else {
				addSize("(unknown)", line.Address-addr, false)
				if sizesDebug {
					fmt.Printf("%08x..%08x %5d:  unknown (gap), alignment=%d\n", addr, line.Address, line.Address-addr, line.Align)
				}
			}
			addr = line.Address
		}
		if addr > line.Address+line.Length {
			// The current line is already covered by a previous line entry.
			// Simply skip it.
			continue
		}
		// At this point, addr falls within the current line (probably at the
		// start).
		length := line.Length
		if addr > line.Address {
			// There is some overlap: the previous line entry already covered
			// part of this line entry. So reduce the length to add to the
			// remaining bit of the line entry.
			length = line.Length - (addr - line.Address)
		}
		// Finally, mark this chunk of memory as used by the given package.
		addSize(findPackagePath(line.File, packagePathMap), length, line.IsVariable)
		addr = line.Address + line.Length
	}
	if addr < sectionEnd {
		// There is a gap at the end of the section.
		addrAligned := (addr + section.Align - 1) &^ (section.Align - 1)
		if section.Align > 1 && addrAligned >= sectionEnd {
			// The gap is caused by the section alignment.
			// For example, if a .rodata section ends with a non-aligned string.
			addSize("(padding)", sectionEnd-addr, true)
		} else {
			addSize("(unknown)", sectionEnd-addr, false)
			if sizesDebug {
				fmt.Printf("%08x..%08x %5d:  unknown (end), alignment=%d\n", addr, sectionEnd, sectionEnd-addr, section.Align)
			}
		}
	}
}

// findPackagePath returns the Go package (or a pseudo package) for the given
// path. It uses some heuristics, for example for some C libraries.
func findPackagePath(path string, packagePathMap map[string]string) string {
	// Check whether this path is part of one of the compiled packages.
	packagePath, ok := packagePathMap[filepath.Dir(path)]
	if !ok {
		if strings.HasPrefix(path, filepath.Join(goenv.Get("TINYGOROOT"), "lib")) {
			// Emit C libraries (in the lib subdirectory of TinyGo) as a single
			// package, with a "C" prefix. For example: "C compiler-rt" for the
			// compiler runtime library from LLVM.
			packagePath = "C " + strings.Split(strings.TrimPrefix(path, filepath.Join(goenv.Get("TINYGOROOT"), "lib")), string(os.PathSeparator))[1]
		} else if strings.HasPrefix(path, filepath.Join(goenv.Get("TINYGOROOT"), "llvm-project")) {
			packagePath = "C compiler-rt"
		} else if packageSymbolRegexp.MatchString(path) {
			// Parse symbol names like main$alloc or runtime$string.
			packagePath = path[:strings.LastIndex(path, "$")]
		} else if path == "__isr_vector" {
			packagePath = "C interrupt vector"
		} else if path == "<Go type>" {
			packagePath = "Go types"
		} else if path == "<Go interface assert>" {
			// Interface type assert, generated by the interface lowering pass.
			packagePath = "Go interface assert"
		} else if path == "<Go interface method>" {
			// Interface method wrapper (switch over all concrete types),
			// generated by the interface lowering pass.
			packagePath = "Go interface method"
		} else if path == "<stdin>" {
			// This can happen when the source code (in Go) doesn't have a
			// source file and uses "-" as the location. Somewhere this is
			// converted to "<stdin>".
			// Convert this back to the "-" string. Eventually, this should be
			// fixed in the compiler.
			packagePath = "-"
		} else {
			// This is some other path. Not sure what it is, so just emit its directory.
			packagePath = filepath.Dir(path) // fallback
		}
	}
	return packagePath
}