You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

558 lines
21 KiB

package transform
// This file has some compiler support for run-time reflection using the reflect
// package. In particular, it encodes type information in type codes in such a
// way that the reflect package can decode the type from this information.
// Where needed, it also adds some side tables for looking up more information
// about a type, when that information cannot be stored directly in the type
// code.
//
// Go has 26 different type kinds.
//
// Type kinds are subdivided in basic types (see the list of basicTypes below)
// that are mostly numeric literals and non-basic (or "complex") types that are
// more difficult to encode. These non-basic types come in two forms:
// * Prefix types (pointer, slice, interface, channel): these just add
// something to an existing type. For example, a pointer like *int just adds
// the fact that it's a pointer to an existing type (int).
// These are encoded efficiently by adding a prefix to a type code.
// * Types with multiple fields (struct, array, func, map). All of these have
// multiple fields contained within. Most obviously structs can contain many
// types as fields. Also arrays contain not just the element type but also
// the length parameter which can be any arbitrary number and thus may not
// fit in a type code.
// These types are encoded using side tables.
//
// This distinction is also important for how named types are encoded. At the
// moment, named basic type just get a unique number assigned while named
// non-basic types have their underlying type stored in a sidetable.
import (
"encoding/binary"
"go/ast"
"math/big"
"sort"
"strings"
"tinygo.org/x/go-llvm"
)
// A list of basic types and their numbers. This list should be kept in sync
// with the list of Kind constants of type.go in the reflect package.
var basicTypes = map[string]int64{
"bool": 1,
"int": 2,
"int8": 3,
"int16": 4,
"int32": 5,
"int64": 6,
"uint": 7,
"uint8": 8,
"uint16": 9,
"uint32": 10,
"uint64": 11,
"uintptr": 12,
"float32": 13,
"float64": 14,
"complex64": 15,
"complex128": 16,
"string": 17,
"unsafe.Pointer": 18,
}
// A list of non-basic types. Adding 19 to this number will give the Kind as
// used in src/reflect/types.go, and it must be kept in sync with that list.
var nonBasicTypes = map[string]int64{
"chan": 0,
"interface": 1,
"pointer": 2,
"slice": 3,
"array": 4,
"func": 5,
"map": 6,
"struct": 7,
}
// typeCodeAssignmentState keeps some global state around for type code
// assignments, used to assign one unique type code to each Go type.
type typeCodeAssignmentState struct {
// An integer that's incremented each time it's used to give unique IDs to
// type codes that are not yet fully supported otherwise by the reflect
// package (or are simply unused in the compiled program).
fallbackIndex int
// This is the length of an uintptr. Only used occasionally to know whether
// a given number can be encoded as a varint.
uintptrLen int
// Map of named types to their type code. It is important that named types
// get unique IDs for each type.
namedBasicTypes map[string]int
namedNonBasicTypes map[string]int
// Map of array types to their type code.
arrayTypes map[string]int
arrayTypesSidetable []byte
needsArrayTypesSidetable bool
// Map of struct types to their type code.
structTypes map[string]int
structTypesSidetable []byte
needsStructNamesSidetable bool
// Map of struct names and tags to their name string.
structNames map[string]int
structNamesSidetable []byte
needsStructTypesSidetable bool
// This byte array is stored in reflect.namedNonBasicTypesSidetable and is
// used at runtime to get details about a named non-basic type.
// Entries are varints (see makeVarint below and readVarint in
// reflect/sidetables.go for the encoding): one varint per entry. The
// integers in namedNonBasicTypes are indices into this array. Because these
// are varints, most type codes are really small (just one byte).
//
// Note that this byte buffer is not created when it is not needed
// (reflect.namedNonBasicTypesSidetable has no uses), see
// needsNamedTypesSidetable.
namedNonBasicTypesSidetable []uint64
// This indicates whether namedNonBasicTypesSidetable needs to be created at
// all. If it is false, namedNonBasicTypesSidetable will contain simple
// monotonically increasing numbers.
needsNamedNonBasicTypesSidetable bool
}
// LowerReflect is used to assign a type code to each type in the program
// that is ever stored in an interface. It tries to use the smallest possible
// numbers to make the code that works with interfaces as small as possible.
func LowerReflect(mod llvm.Module) {
// if reflect were not used, we could skip generating the sidetable
// this does not help in practice, and is difficult to do correctly
// Obtain slice of all types in the program.
type typeInfo struct {
typecode llvm.Value
name string
numUses int
}
var types []*typeInfo
for global := mod.FirstGlobal(); !global.IsNil(); global = llvm.NextGlobal(global) {
if strings.HasPrefix(global.Name(), "reflect/types.type:") {
types = append(types, &typeInfo{
typecode: global,
name: global.Name(),
numUses: len(getUses(global)),
})
}
}
// Sort the slice in a way that often used types are assigned a type code
// first.
sort.Slice(types, func(i, j int) bool {
if types[i].numUses != types[j].numUses {
return types[i].numUses < types[j].numUses
}
// It would make more sense to compare the name in the other direction,
// but for some reason that increases binary size. Could be a fluke, but
// could also have some good reason (and possibly hint at a small
// optimization).
return types[i].name > types[j].name
})
// Assign typecodes the way the reflect package expects.
targetData := llvm.NewTargetData(mod.DataLayout())
defer targetData.Dispose()
uintptrType := mod.Context().IntType(targetData.PointerSize() * 8)
state := typeCodeAssignmentState{
fallbackIndex: 1,
uintptrLen: targetData.PointerSize() * 8,
namedBasicTypes: make(map[string]int),
namedNonBasicTypes: make(map[string]int),
arrayTypes: make(map[string]int),
structTypes: make(map[string]int),
structNames: make(map[string]int),
needsNamedNonBasicTypesSidetable: len(getUses(mod.NamedGlobal("reflect.namedNonBasicTypesSidetable"))) != 0,
needsStructTypesSidetable: len(getUses(mod.NamedGlobal("reflect.structTypesSidetable"))) != 0,
needsStructNamesSidetable: len(getUses(mod.NamedGlobal("reflect.structNamesSidetable"))) != 0,
needsArrayTypesSidetable: len(getUses(mod.NamedGlobal("reflect.arrayTypesSidetable"))) != 0,
}
for _, t := range types {
num := state.getTypeCodeNum(t.typecode)
if num.BitLen() > state.uintptrLen || !num.IsUint64() {
// TODO: support this in some way, using a side table for example.
// That's less efficient but better than not working at all.
// Particularly important on systems with 16-bit pointers (e.g.
// AVR).
panic("compiler: could not store type code number inside interface type code")
}
// Replace each use of the type code global with the constant type code.
for _, use := range getUses(t.typecode) {
if use.IsAConstantExpr().IsNil() {
continue
}
typecode := llvm.ConstInt(uintptrType, num.Uint64(), false)
switch use.Opcode() {
case llvm.PtrToInt:
// Already of the correct type.
case llvm.BitCast:
// Could happen when stored in an interface (which is of type
// i8*).
typecode = llvm.ConstIntToPtr(typecode, use.Type())
default:
panic("unexpected constant expression")
}
use.ReplaceAllUsesWith(typecode)
}
}
// Only create this sidetable when it is necessary.
if state.needsNamedNonBasicTypesSidetable {
global := replaceGlobalIntWithArray(mod, "reflect.namedNonBasicTypesSidetable", state.namedNonBasicTypesSidetable)
global.SetLinkage(llvm.InternalLinkage)
global.SetUnnamedAddr(true)
global.SetGlobalConstant(true)
}
if state.needsArrayTypesSidetable {
global := replaceGlobalIntWithArray(mod, "reflect.arrayTypesSidetable", state.arrayTypesSidetable)
global.SetLinkage(llvm.InternalLinkage)
global.SetUnnamedAddr(true)
global.SetGlobalConstant(true)
}
if state.needsStructTypesSidetable {
global := replaceGlobalIntWithArray(mod, "reflect.structTypesSidetable", state.structTypesSidetable)
global.SetLinkage(llvm.InternalLinkage)
global.SetUnnamedAddr(true)
global.SetGlobalConstant(true)
}
if state.needsStructNamesSidetable {
global := replaceGlobalIntWithArray(mod, "reflect.structNamesSidetable", state.structNamesSidetable)
global.SetLinkage(llvm.InternalLinkage)
global.SetUnnamedAddr(true)
global.SetGlobalConstant(true)
}
// Remove most objects created for interface and reflect lowering.
// They would normally be removed anyway in later passes, but not always.
// It also cleans up the IR for testing.
for _, typ := range types {
initializer := typ.typecode.Initializer()
references := llvm.ConstExtractValue(initializer, []uint32{0})
typ.typecode.SetInitializer(llvm.ConstNull(initializer.Type()))
if strings.HasPrefix(typ.name, "reflect/types.type:struct:") {
// Structs have a 'references' field that is not a typecode but
// a pointer to a runtime.structField array and therefore a
// bitcast. This global should be erased separately, otherwise
// typecode objects cannot be erased.
structFields := references.Operand(0)
structFields.EraseFromParentAsGlobal()
}
}
}
// getTypeCodeNum returns the typecode for a given type as expected by the
// reflect package. Also see getTypeCodeName, which serializes types to a string
// based on a types.Type value for this function.
func (state *typeCodeAssignmentState) getTypeCodeNum(typecode llvm.Value) *big.Int {
// Note: see src/reflect/type.go for bit allocations.
class, value := getClassAndValueFromTypeCode(typecode)
name := ""
if class == "named" {
name = value
typecode = llvm.ConstExtractValue(typecode.Initializer(), []uint32{0})
class, value = getClassAndValueFromTypeCode(typecode)
}
if class == "basic" {
// Basic types follow the following bit pattern:
// ...xxxxx0
// where xxxxx is allocated for the 18 possible basic types and all the
// upper bits are used to indicate the named type.
num, ok := basicTypes[value]
if !ok {
panic("invalid basic type: " + value)
}
if name != "" {
// This type is named, set the upper bits to the name ID.
num |= int64(state.getBasicNamedTypeNum(name)) << 5
}
return big.NewInt(num << 1)
} else {
// Non-baisc types use the following bit pattern:
// ...nxxx1
// where xxx indicates the non-basic type. The upper bits contain
// whatever the type contains. Types that wrap a single other type
// (channel, interface, pointer, slice) just contain the bits of the
// wrapped type. Other types (like struct) need more fields and thus
// cannot be encoded as a simple prefix.
var classNumber int64
if n, ok := nonBasicTypes[class]; ok {
classNumber = n
} else {
panic("unknown type kind: " + class)
}
var num *big.Int
lowBits := (classNumber << 1) + 1 // the 5 low bits of the typecode
if name == "" {
num = state.getNonBasicTypeCode(class, typecode)
} else {
// We must return a named type here. But first check whether it
// has already been defined.
if index, ok := state.namedNonBasicTypes[name]; ok {
num := big.NewInt(int64(index))
num.Lsh(num, 5).Or(num, big.NewInt((classNumber<<1)+1+(1<<4)))
return num
}
lowBits |= 1 << 4 // set the 'n' bit (see above)
if !state.needsNamedNonBasicTypesSidetable {
// Use simple small integers in this case, to make these numbers
// smaller.
index := len(state.namedNonBasicTypes) + 1
state.namedNonBasicTypes[name] = index
num = big.NewInt(int64(index))
} else {
// We need to store full type information.
// First allocate a number in the named non-basic type
// sidetable.
index := len(state.namedNonBasicTypesSidetable)
state.namedNonBasicTypesSidetable = append(state.namedNonBasicTypesSidetable, 0)
state.namedNonBasicTypes[name] = index
// Get the typecode of the underlying type (which could be the
// element type in the case of pointers, for example).
num = state.getNonBasicTypeCode(class, typecode)
if num.BitLen() > state.uintptrLen || !num.IsUint64() {
panic("cannot store value in sidetable")
}
// Now update the side table with the number we just
// determined. We need this multi-step approach to avoid stack
// overflow due to adding types recursively in the case of
// linked lists (a pointer which points to a struct that
// contains that same pointer).
state.namedNonBasicTypesSidetable[index] = num.Uint64()
num = big.NewInt(int64(index))
}
}
// Concatenate the 'num' and 'lowBits' bitstrings.
num.Lsh(num, 5).Or(num, big.NewInt(lowBits))
return num
}
}
// getNonBasicTypeCode is used by getTypeCodeNum. It returns the upper bits of
// the type code used there in the type code.
func (state *typeCodeAssignmentState) getNonBasicTypeCode(class string, typecode llvm.Value) *big.Int {
switch class {
case "chan", "pointer", "slice":
// Prefix-style type kinds. The upper bits contain the element type.
sub := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0})
return state.getTypeCodeNum(sub)
case "array":
// An array is basically a pair of (typecode, length) stored in a
// sidetable.
return big.NewInt(int64(state.getArrayTypeNum(typecode)))
case "struct":
// More complicated type kind. The upper bits contain the index to the
// struct type in the struct types sidetable.
return big.NewInt(int64(state.getStructTypeNum(typecode)))
default:
// Type has not yet been implemented, so fall back by using a unique
// number.
num := big.NewInt(int64(state.fallbackIndex))
state.fallbackIndex++
return num
}
}
// getClassAndValueFromTypeCode takes a typecode (a llvm.Value of type
// runtime.typecodeID), looks at the name, and extracts the typecode class and
// value from it. For example, for a typecode with the following name:
//
// reflect/types.type:pointer:named:reflect.ValueError
//
// It extracts:
//
// class = "pointer"
// value = "named:reflect.ValueError"
func getClassAndValueFromTypeCode(typecode llvm.Value) (class, value string) {
typecodeName := typecode.Name()
const prefix = "reflect/types.type:"
if !strings.HasPrefix(typecodeName, prefix) {
panic("unexpected typecode name: " + typecodeName)
}
id := typecodeName[len(prefix):]
class = id[:strings.IndexByte(id, ':')]
value = id[len(class)+1:]
return
}
// getBasicNamedTypeNum returns an appropriate (unique) number for the given
// named type. If the name already has a number that number is returned, else a
// new number is returned. The number is always non-zero.
func (state *typeCodeAssignmentState) getBasicNamedTypeNum(name string) int {
if num, ok := state.namedBasicTypes[name]; ok {
return num
}
num := len(state.namedBasicTypes) + 1
state.namedBasicTypes[name] = num
return num
}
// getArrayTypeNum returns the array type number, which is an index into the
// reflect.arrayTypesSidetable or a unique number for this type if this table is
// not used.
func (state *typeCodeAssignmentState) getArrayTypeNum(typecode llvm.Value) int {
name := typecode.Name()
if num, ok := state.arrayTypes[name]; ok {
// This array type already has an entry in the sidetable. Don't store
// it twice.
return num
}
if !state.needsArrayTypesSidetable {
// We don't need array sidetables, so we can just assign monotonically
// increasing numbers to each array type.
num := len(state.arrayTypes)
state.arrayTypes[name] = num
return num
}
elemTypeCode := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0})
elemTypeNum := state.getTypeCodeNum(elemTypeCode)
if elemTypeNum.BitLen() > state.uintptrLen || !elemTypeNum.IsUint64() {
// TODO: make this a regular error
panic("array element type has a type code that is too big")
}
// The array side table is a sequence of {element type, array length}.
arrayLength := llvm.ConstExtractValue(typecode.Initializer(), []uint32{1}).ZExtValue()
buf := makeVarint(elemTypeNum.Uint64())
buf = append(buf, makeVarint(arrayLength)...)
index := len(state.arrayTypesSidetable)
state.arrayTypes[name] = index
state.arrayTypesSidetable = append(state.arrayTypesSidetable, buf...)
return index
}
// getStructTypeNum returns the struct type number, which is an index into
// reflect.structTypesSidetable or an unique number for every struct if this
// sidetable is not needed in the to-be-compiled program.
func (state *typeCodeAssignmentState) getStructTypeNum(typecode llvm.Value) int {
name := typecode.Name()
if num, ok := state.structTypes[name]; ok {
// This struct already has an assigned type code.
return num
}
if !state.needsStructTypesSidetable {
// We don't need struct sidetables, so we can just assign monotonically
// increasing numbers to each struct type.
num := len(state.structTypes)
state.structTypes[name] = num
return num
}
// Get the fields this struct type contains.
// The struct number will be the start index of
structTypeGlobal := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0}).Operand(0).Initializer()
numFields := structTypeGlobal.Type().ArrayLength()
// The first data that is stored in the struct sidetable is the number of
// fields this struct contains. This is usually just a single byte because
// most structs don't contain that many fields, but make it a varint just
// to be sure.
buf := makeVarint(uint64(numFields))
// Iterate over every field in the struct.
// Every field is stored sequentially in the struct sidetable. Fields can
// be retrieved from this list of fields at runtime by iterating over all
// of them until the right field has been found.
// Perhaps adding some index would speed things up, but it would also make
// the sidetable bigger.
for i := 0; i < numFields; i++ {
// Collect some information about this field.
field := llvm.ConstExtractValue(structTypeGlobal, []uint32{uint32(i)})
nameGlobal := llvm.ConstExtractValue(field, []uint32{1})
if nameGlobal == llvm.ConstPointerNull(nameGlobal.Type()) {
panic("compiler: no name for this struct field")
}
fieldNameBytes := getGlobalBytes(nameGlobal.Operand(0))
fieldNameNumber := state.getStructNameNumber(fieldNameBytes)
// See whether this struct field has an associated tag, and if so,
// store that tag in the tags sidetable.
tagGlobal := llvm.ConstExtractValue(field, []uint32{2})
hasTag := false
tagNumber := 0
if tagGlobal != llvm.ConstPointerNull(tagGlobal.Type()) {
hasTag = true
tagBytes := getGlobalBytes(tagGlobal.Operand(0))
tagNumber = state.getStructNameNumber(tagBytes)
}
// The 'embedded' or 'anonymous' flag for this field.
embedded := llvm.ConstExtractValue(field, []uint32{3}).ZExtValue() != 0
// The first byte in the struct types sidetable is a flags byte with
// two bits in it.
flagsByte := byte(0)
if embedded {
flagsByte |= 1
}
if hasTag {
flagsByte |= 2
}
if ast.IsExported(string(fieldNameBytes)) {
flagsByte |= 4
}
buf = append(buf, flagsByte)
// Get the type number and add it to the buffer.
// All fields have a type, so include it directly here.
typeNum := state.getTypeCodeNum(llvm.ConstExtractValue(field, []uint32{0}))
if typeNum.BitLen() > state.uintptrLen || !typeNum.IsUint64() {
// TODO: make this a regular error
panic("struct field has a type code that is too big")
}
buf = append(buf, makeVarint(typeNum.Uint64())...)
// Add the name.
buf = append(buf, makeVarint(uint64(fieldNameNumber))...)
// Add the tag, if there is one.
if hasTag {
buf = append(buf, makeVarint(uint64(tagNumber))...)
}
}
num := len(state.structTypesSidetable)
state.structTypes[name] = num
state.structTypesSidetable = append(state.structTypesSidetable, buf...)
return num
}
// getStructNameNumber stores this string (name or tag) onto the struct names
// sidetable. The format is a varint of the length of the struct, followed by
// the raw bytes of the name. Multiple identical strings are stored under the
// same name for space efficiency.
func (state *typeCodeAssignmentState) getStructNameNumber(nameBytes []byte) int {
name := string(nameBytes)
if n, ok := state.structNames[name]; ok {
// This name was used before, re-use it now (for space efficiency).
return n
}
// This name is not yet in the names sidetable. Add it now.
n := len(state.structNamesSidetable)
state.structNames[name] = n
state.structNamesSidetable = append(state.structNamesSidetable, makeVarint(uint64(len(nameBytes)))...)
state.structNamesSidetable = append(state.structNamesSidetable, nameBytes...)
return n
}
// makeVarint is a small helper function that returns the bytes of the number in
// varint encoding.
func makeVarint(n uint64) []byte {
buf := make([]byte, binary.MaxVarintLen64)
return buf[:binary.PutUvarint(buf, n)]
}