tinygo/builder/sizes.go


								package builder


								import (

									"bytes"

									"debug/dwarf"

									"debug/elf"

									"encoding/binary"

									"fmt"

									"io"

									"os"

									"path/filepath"

									"regexp"

									"sort"

									"strings"


									"github.com/aykevl/go-wasm"

									"github.com/tinygo-org/tinygo/goenv"

								)


								// Set to true to print extra debug logs.

								const sizesDebug = false


								// programSize contains size statistics per package of a compiled program.

								type programSize struct {

									Packages map[string]packageSize

									Code     uint64

									ROData   uint64

									Data     uint64

									BSS      uint64

								}


								// sortedPackageNames returns the list of package names (ProgramSize.Packages)

								// sorted alphabetically.

								func (ps *programSize) sortedPackageNames() []string {

									names := make([]string, 0, len(ps.Packages))

									for name := range ps.Packages {

										names = append(names, name)

									}

									sort.Strings(names)

									return names

								}


								// Flash usage in regular microcontrollers.

								func (ps *programSize) Flash() uint64 {

									return ps.Code + ps.ROData + ps.Data

								}


								// Static RAM usage in regular microcontrollers.

								func (ps *programSize) RAM() uint64 {

									return ps.Data + ps.BSS

								}


								// packageSize contains the size of a package, calculated from the linked object

								// file.

								type packageSize struct {

									Code   uint64

									ROData uint64

									Data   uint64

									BSS    uint64

								}


								// Flash usage in regular microcontrollers.

								func (ps *packageSize) Flash() uint64 {

									return ps.Code + ps.ROData + ps.Data

								}


								// Static RAM usage in regular microcontrollers.

								func (ps *packageSize) RAM() uint64 {

									return ps.Data + ps.BSS

								}


								// A mapping of a single chunk of code or data to a file path.

								type addressLine struct {

									Address    uint64

									Length     uint64 // length of this chunk

									File       string // file path as stored in DWARF

									IsVariable bool   // true if this is a variable (or constant), false if it is code

								}


								// Sections defined in the input file. This struct defines them in a

								// filetype-agnostic way but roughly follow the ELF types (.text, .data, .bss,

								// etc).

								type memorySection struct {

									Type    memoryType

									Address uint64

									Size    uint64

								}


								type memoryType int


								const (

									memoryCode memoryType = iota + 1

									memoryData

									memoryROData

									memoryBSS

									memoryStack

								)


								// Regular expressions to match particular symbol names. These are not stored as

								// DWARF variables because they have no mapping to source code global variables.

								var (

									// Various globals that aren't a variable but nonetheless need to be stored

									// somewhere:

									//   alloc:  heap allocations during init interpretation

									//   pack:   data created when storing a constant in an interface for example

									//   string: buffer behind strings

									packageSymbolRegexp = regexp.MustCompile(`\$(alloc|pack|string)(\.[0-9]+)?$`)


									// Reflect sidetables. Created by the reflect lowering pass.

									// See src/reflect/sidetables.go.

									reflectDataRegexp = regexp.MustCompile(`^reflect\.[a-zA-Z]+Sidetable$`)

								)


								// readProgramSizeFromDWARF reads the source location for each line of code and

								// each variable in the program, as far as this is stored in the DWARF debug

								// information.

								func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset uint64) ([]addressLine, error) {

									r := data.Reader()

									var lines []*dwarf.LineFile

									var addresses []addressLine

									for {

										e, err := r.Next()

										if err != nil {

											return nil, err

										}

										if e == nil {

											break

										}

										switch e.Tag {

										case dwarf.TagCompileUnit:

											// Found a compile unit.

											// We can read the .debug_line section using it, which contains a

											// mapping for most instructions to their file/line/column - even

											// for inlined functions!

											lr, err := data.LineReader(e)

											if err != nil {

												return nil, err

											}

											lines = lr.Files()

											var lineEntry = dwarf.LineEntry{

												EndSequence: true,

											}


											// Line tables are organized as sequences of line entries until an

											// end sequence. A single line table can contain multiple such

											// sequences. The last line entry is an EndSequence to indicate the

											// end.

											for {

												// Read the next .debug_line entry.

												prevLineEntry := lineEntry

												err := lr.Next(&lineEntry)

												if err != nil {

													if err == io.EOF {

														break

													}

													return nil, err

												}


												if prevLineEntry.EndSequence && lineEntry.Address == 0 {

													// Tombstone value. This symbol has been removed, for

													// example by the --gc-sections linker flag. It is still

													// here in the debug information because the linker can't

													// just remove this reference.

													// Read until the next EndSequence so that this sequence is

													// skipped.

													// For more details, see (among others):

													// https://reviews.llvm.org/D84825

													for {

														err := lr.Next(&lineEntry)

														if err != nil {

															return nil, err

														}

														if lineEntry.EndSequence {

															break

														}

													}

												}


												if !prevLineEntry.EndSequence {

													// The chunk describes the code from prevLineEntry to

													// lineEntry.

													line := addressLine{

														Address: prevLineEntry.Address + codeOffset,

														Length:  lineEntry.Address - prevLineEntry.Address,

														File:    prevLineEntry.File.Name,

													}

													if line.Length != 0 {

														addresses = append(addresses, line)

													}

												}

											}

										case dwarf.TagVariable:

											// Global variable (or constant). Most of these are not actually

											// stored in the binary, because they have been optimized out. Only

											// the ones with a location are still present.

											r.SkipChildren()


											file := e.AttrField(dwarf.AttrDeclFile)

											location := e.AttrField(dwarf.AttrLocation)

											globalType := e.AttrField(dwarf.AttrType)

											if file == nil || location == nil || globalType == nil {

												// Doesn't contain the requested information.

												continue

											}


											// Try to parse the location. While this could in theory be a very

											// complex expression, usually it's just a DW_OP_addr opcode

											// followed by an address.

											locationCode := location.Val.([]uint8)

											if locationCode[0] != 3 { // DW_OP_addr

												continue

											}

											var addr uint64

											switch len(locationCode) {

											case 1 + 2:

												addr = uint64(binary.LittleEndian.Uint16(locationCode[1:]))

											case 1 + 4:

												addr = uint64(binary.LittleEndian.Uint32(locationCode[1:]))

											case 1 + 8:

												addr = binary.LittleEndian.Uint64(locationCode[1:])

											default:

												continue // unknown address

											}


											// Parse the type of the global variable, which (importantly)

											// contains the variable size. We're not interested in the type,

											// only in the size.

											typ, err := data.Type(globalType.Val.(dwarf.Offset))

											if err != nil {

												return nil, err

											}


											addresses = append(addresses, addressLine{

												Address:    addr,

												Length:     uint64(typ.Size()),

												File:       lines[file.Val.(int64)].Name,

												IsVariable: true,

											})

										default:

											r.SkipChildren()

										}

									}

									return addresses, nil

								}


								// loadProgramSize calculate a program/data size breakdown of each package for a

								// given ELF file.

								// If the file doesn't contain DWARF debug information, the returned program

								// size will still have valid summaries but won't have complete size information

								// per package.

								func loadProgramSize(path string, packagePathMap map[string]string) (*programSize, error) {

									// Open the binary file.

									f, err := os.Open(path)

									if err != nil {

										return nil, err

									}

									defer f.Close()


									// This stores all chunks of addresses found in the binary.

									var addresses []addressLine


									// Load the binary file, which could be in a number of file formats.

									var sections []memorySection

									if file, err := elf.NewFile(f); err == nil {

										// Read DWARF information. The error is intentionally ignored.

										data, _ := file.DWARF()

										if data != nil {

											addresses, err = readProgramSizeFromDWARF(data, 0)

											if err != nil {

												// However, _do_ report an error here. Something must have gone

												// wrong while trying to parse DWARF data.

												return nil, err

											}

										}


										// Read the ELF symbols for some more chunks of location information.

										// Some globals (such as strings) aren't stored in the DWARF debug

										// information and therefore need to be obtained in a different way.

										allSymbols, err := file.Symbols()

										if err != nil {

											return nil, err

										}

										for _, symbol := range allSymbols {

											symType := elf.ST_TYPE(symbol.Info)

											if symbol.Size == 0 {

												continue

											}

											if symType != elf.STT_FUNC && symType != elf.STT_OBJECT && symType != elf.STT_NOTYPE {

												continue

											}

											if symbol.Section >= elf.SHN_LORESERVE {

												// Not a regular section, so skip it.

												// One example is elf.SHN_ABS, which is used for symbols

												// declared with an absolute value such as the memset function

												// on the ESP32 which is defined in the mask ROM.

												continue

											}

											section := file.Sections[symbol.Section]

											if section.Flags&elf.SHF_ALLOC == 0 {

												continue

											}

											if packageSymbolRegexp.MatchString(symbol.Name) || reflectDataRegexp.MatchString(symbol.Name) {

												addresses = append(addresses, addressLine{

													Address:    symbol.Value,

													Length:     symbol.Size,

													File:       symbol.Name,

													IsVariable: true,

												})

											}

										}


										// Load allocated sections.

										for _, section := range file.Sections {

											if section.Flags&elf.SHF_ALLOC == 0 {

												continue

											}

											if section.Type == elf.SHT_NOBITS {

												if section.Name == ".stack" {

													// TinyGo emits stack sections on microcontroller using the

													// ".stack" name.

													// This is a bit ugly, but I don't think there is a way to

													// mark the stack section in a linker script.

													sections = append(sections, memorySection{

														Address: section.Addr,

														Size:    section.Size,

														Type:    memoryStack,

													})

												} else {

													// Regular .bss section.

													sections = append(sections, memorySection{

														Address: section.Addr,

														Size:    section.Size,

														Type:    memoryBSS,

													})

												}

											} else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_EXECINSTR != 0 {

												// .text

												sections = append(sections, memorySection{

													Address: section.Addr,

													Size:    section.Size,

													Type:    memoryCode,

												})

											} else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_WRITE != 0 {

												// .data

												sections = append(sections, memorySection{

													Address: section.Addr,

													Size:    section.Size,

													Type:    memoryData,

												})

											} else if section.Type == elf.SHT_PROGBITS {

												// .rodata

												sections = append(sections, memorySection{

													Address: section.Addr,

													Size:    section.Size,

													Type:    memoryROData,

												})

											}

										}

									} else if file, err := wasm.Parse(f); err == nil {

										// File is in WebAssembly format.


										// Put code at a very high address, so that it won't conflict with the

										// data in the memory section.

										const codeOffset = 0x8000_0000_0000_0000


										// Read DWARF information. The error is intentionally ignored.

										data, err := file.DWARF()

										if data != nil {

											addresses, err = readProgramSizeFromDWARF(data, codeOffset)

											if err != nil {

												// However, _do_ report an error here. Something must have gone

												// wrong while trying to parse DWARF data.

												return nil, err

											}

										}


										var linearMemorySize uint64

										for _, section := range file.Sections {

											switch section := section.(type) {

											case *wasm.SectionCode:

												sections = append(sections, memorySection{

													Address: codeOffset,

													Size:    uint64(section.Size()),

													Type:    memoryCode,

												})

											case *wasm.SectionMemory:

												// This value is used when processing *wasm.SectionData (which

												// always comes after *wasm.SectionMemory).

												linearMemorySize = uint64(section.Entries[0].Limits.Initial) * 64 * 1024

											case *wasm.SectionData:

												// Data sections contain initial values for linear memory.

												// First load the list of data sections, and sort them by

												// address for easier processing.

												var dataSections []memorySection

												for _, entry := range section.Entries {

													address, err := wasm.Eval(bytes.NewBuffer(entry.Offset))

													if err != nil {

														return nil, fmt.Errorf("could not parse data section address: %w", err)

													}

													dataSections = append(dataSections, memorySection{

														Address: uint64(address[0].(int32)),

														Size:    uint64(len(entry.Data)),

														Type:    memoryData,

													})

												}

												sort.Slice(dataSections, func(i, j int) bool {

													return dataSections[i].Address < dataSections[j].Address

												})


												// And now add all data sections for linear memory.

												// Parts that are in the slice of data sections are added as

												// memoryData, and parts that are not are added as memoryBSS.

												addr := uint64(0)

												for _, section := range dataSections {

													if addr < section.Address {

														sections = append(sections, memorySection{

															Address: addr,

															Size:    section.Address - addr,

															Type:    memoryBSS,

														})

													}

													if addr > section.Address {

														// This might be allowed, I'm not sure.

														// It certainly doesn't make a lot of sense.

														return nil, fmt.Errorf("overlapping data section")

													}

													// addr == section.Address

													sections = append(sections, section)

													addr = section.Address + section.Size

												}

												if addr < linearMemorySize {

													sections = append(sections, memorySection{

														Address: addr,

														Size:    linearMemorySize - addr,

														Type:    memoryBSS,

													})

												}

											}

										}

									} else {

										return nil, fmt.Errorf("could not parse file: %w", err)

									}


									// Sort the slice of address chunks by address, so that we can iterate

									// through it to calculate section sizes.

									sort.Slice(addresses, func(i, j int) bool {

										if addresses[i].Address == addresses[j].Address {

											// Very rarely, there might be duplicate addresses.

											// If that happens, sort the largest chunks first.

											return addresses[i].Length > addresses[j].Length

										}

										return addresses[i].Address < addresses[j].Address

									})


									// Now finally determine the binary/RAM size usage per package by going

									// through each allocated section.

									sizes := make(map[string]packageSize)

									for _, section := range sections {

										switch section.Type {

										case memoryCode:

											readSection(section, addresses, func(path string, size uint64, isVariable bool) {

												field := sizes[path]

												if isVariable {

													field.ROData += size

												} else {

													field.Code += size

												}

												sizes[path] = field

											}, packagePathMap)

										case memoryROData:

											readSection(section, addresses, func(path string, size uint64, isVariable bool) {

												field := sizes[path]

												field.ROData += size

												sizes[path] = field

											}, packagePathMap)

										case memoryData:

											readSection(section, addresses, func(path string, size uint64, isVariable bool) {

												field := sizes[path]

												field.Data += size

												sizes[path] = field

											}, packagePathMap)

										case memoryBSS:

											readSection(section, addresses, func(path string, size uint64, isVariable bool) {

												field := sizes[path]

												field.BSS += size

												sizes[path] = field

											}, packagePathMap)

										case memoryStack:

											// We store the C stack as a pseudo-package.

											sizes["C stack"] = packageSize{

												BSS: section.Size,

											}

										}

									}


									// ...and summarize the results.

									program := &programSize{

										Packages: sizes,

									}

									for _, pkg := range sizes {

										program.Code += pkg.Code

										program.ROData += pkg.ROData

										program.Data += pkg.Data

										program.BSS += pkg.BSS

									}

									return program, nil

								}


								// readSection determines for each byte in this section to which package it

								// belongs. It reports this usage through the addSize callback.

								func readSection(section memorySection, addresses []addressLine, addSize func(string, uint64, bool), packagePathMap map[string]string) {

									// The addr variable tracks at which address we are while going through this

									// section. We start at the beginning.

									addr := section.Address

									sectionEnd := section.Address + section.Size

									for _, line := range addresses {

										if line.Address < section.Address || line.Address+line.Length >= sectionEnd {

											// Check that this line is entirely within the section.

											// Don't bother dealing with line entries that cross sections (that

											// seems rather unlikely anyway).

											continue

										}

										if addr < line.Address {

											// There is a gap: there is a space between the current and the

											// previous line entry.

											addSize("(unknown)", line.Address-addr, false)

											if sizesDebug {

												fmt.Printf("%08x..%08x %4d: unknown (gap)\n", addr, line.Address, line.Address-addr)

											}

										}

										if addr > line.Address+line.Length {

											// The current line is already covered by a previous line entry.

											// Simply skip it.

											continue

										}

										// At this point, addr falls within the current line (probably at the

										// start).

										length := line.Length

										if addr > line.Address {

											// There is some overlap: the previous line entry already covered

											// part of this line entry. So reduce the length to add to the

											// remaining bit of the line entry.

											length = line.Length - (addr - line.Address)

										}

										// Finally, mark this chunk of memory as used by the given package.

										addSize(findPackagePath(line.File, packagePathMap), length, line.IsVariable)

										addr = line.Address + line.Length

									}

									if addr < sectionEnd {

										// There is a gap at the end of the section.

										addSize("(unknown)", sectionEnd-addr, false)

										if sizesDebug {

											fmt.Printf("%08x..%08x %4d: unknown (end)\n", addr, sectionEnd, sectionEnd-addr)

										}

									}

								}


								// findPackagePath returns the Go package (or a pseudo package) for the given

								// path. It uses some heuristics, for example for some C libraries.

								func findPackagePath(path string, packagePathMap map[string]string) string {

									// Check whether this path is part of one of the compiled packages.

									packagePath, ok := packagePathMap[filepath.Dir(path)]

									if !ok {

										if strings.HasPrefix(path, filepath.Join(goenv.Get("TINYGOROOT"), "lib")) {

											// Emit C libraries (in the lib subdirectory of TinyGo) as a single

											// package, with a "C" prefix. For example: "C compiler-rt" for the

											// compiler runtime library from LLVM.

											packagePath = "C " + strings.Split(strings.TrimPrefix(path, filepath.Join(goenv.Get("TINYGOROOT"), "lib")), string(os.PathSeparator))[1]

										} else if packageSymbolRegexp.MatchString(path) {

											// Parse symbol names like main$alloc or runtime$string.

											packagePath = path[:strings.LastIndex(path, "$")]

										} else if reflectDataRegexp.MatchString(path) {

											// Parse symbol names like reflect.structTypesSidetable.

											packagePath = "Go reflect data"

										} else if path == "<Go interface assert>" {

											// Interface type assert, generated by the interface lowering pass.

											packagePath = "Go interface assert"

										} else if path == "<Go interface method>" {

											// Interface method wrapper (switch over all concrete types),

											// generated by the interface lowering pass.

											packagePath = "Go interface method"

										} else if path == "<stdin>" {

											// This can happen when the source code (in Go) doesn't have a

											// source file and uses "-" as the location. Somewhere this is

											// converted to "<stdin>".

											// Convert this back to the "-" string. Eventually, this should be

											// fixed in the compiler.

											packagePath = "-"

										} else {

											// This is some other path. Not sure what it is, so just emit its directory.

											packagePath = filepath.Dir(path) // fallback

										}

									}

									return packagePath

								}