Browse Source

cgo: only include the symbols that are necessary (recursively)

Only try to convert the C symbols to their Go equivalents that are
actually referenced by the Go code with C.<somesymbol>. This avoids
having to support all possible C types, which is difficult because of
oddities like `typedef void` or `__builtin_va_list`. Especially
__builtin_va_list, which varies between targets.
pull/312/head
Ayke van Laethem 6 years ago
committed by Ron Evans
parent
commit
b1ed8a46b7
  1. 58
      loader/cgo.go
  2. 112
      loader/libclang.go
  3. 6
      testdata/cgo/main.h

58
loader/cgo.go

@ -23,6 +23,7 @@ type fileInfo struct {
typedefs map[string]*typedefInfo
elaboratedTypes map[string]ast.Expr
importCPos token.Pos
missingSymbols map[string]struct{}
}
// functionInfo stores some information about a Cgo function found by libclang
@ -60,7 +61,22 @@ var cgoAliases = map[string]string{
"C.uint32_t": "uint32",
"C.uint64_t": "uint64",
"C.uintptr_t": "uintptr",
"C.__builtin_va_list": "uintptr", // dummy value until fully implemented
}
// cgoBuiltinAliases are handled specially because they only exist on the Go
// side of CGo, not on the CGo (they're prefixed with "_Cgo_" there).
var cgoBuiltinAliases = map[string]struct{}{
"char": struct{}{},
"schar": struct{}{},
"uchar": struct{}{},
"short": struct{}{},
"ushort": struct{}{},
"int": struct{}{},
"uint": struct{}{},
"long": struct{}{},
"ulong": struct{}{},
"longlong": struct{}{},
"ulonglong": struct{}{},
}
// cgoTypes lists some C types with ambiguous sizes that must be retrieved
@ -91,6 +107,13 @@ func (p *Package) processCgo(filename string, f *ast.File, cflags []string) []er
globals: map[string]*globalInfo{},
typedefs: map[string]*typedefInfo{},
elaboratedTypes: map[string]ast.Expr{},
missingSymbols: map[string]struct{}{},
}
// Find all C.* symbols.
f = astutil.Apply(f, info.findMissingCGoNames, nil).(*ast.File)
for name := range cgoBuiltinAliases {
info.missingSymbols["_Cgo_"+name] = struct{}{}
}
// Find `import "C"` statements in the file.
@ -222,6 +245,9 @@ func (info *fileInfo) addFuncDecls() {
// // ...
// )
func (info *fileInfo) addFuncPtrDecls() {
if len(info.functions) == 0 {
return
}
gen := &ast.GenDecl{
TokPos: info.importCPos,
Tok: token.VAR,
@ -270,6 +296,9 @@ func (info *fileInfo) addFuncPtrDecls() {
// // ...
// )
func (info *fileInfo) addVarDecls() {
if len(info.globals) == 0 {
return
}
gen := &ast.GenDecl{
TokPos: info.importCPos,
Tok: token.VAR,
@ -346,6 +375,9 @@ func (info *fileInfo) addTypeAliases() {
}
func (info *fileInfo) addTypedefs() {
if len(info.typedefs) == 0 {
return
}
gen := &ast.GenDecl{
TokPos: info.importCPos,
Tok: token.TYPE,
@ -394,6 +426,9 @@ func (info *fileInfo) addTypedefs() {
// See also:
// https://en.cppreference.com/w/cpp/language/elaborated_type_specifier
func (info *fileInfo) addElaboratedTypes() {
if len(info.elaboratedTypes) == 0 {
return
}
gen := &ast.GenDecl{
TokPos: info.importCPos,
Tok: token.TYPE,
@ -424,6 +459,27 @@ func (info *fileInfo) addElaboratedTypes() {
info.Decls = append(info.Decls, gen)
}
// findMissingCGoNames traverses the AST and finds all C.something names. Only
// these symbols are extracted from the parsed C AST and converted to the Go
// equivalent.
func (info *fileInfo) findMissingCGoNames(cursor *astutil.Cursor) bool {
switch node := cursor.Node().(type) {
case *ast.SelectorExpr:
x, ok := node.X.(*ast.Ident)
if !ok {
return true
}
if x.Name == "C" {
name := node.Sel.Name
if _, ok := cgoBuiltinAliases[name]; ok {
name = "_Cgo_" + name
}
info.missingSymbols[name] = struct{}{}
}
}
return true
}
// walker replaces all "C".<something> expressions to literal "C.<something>"
// expressions. Such expressions are impossible to write in Go (a dot cannot be
// used in the middle of a name) so in practice all C identifiers live in a

112
loader/libclang.go

@ -168,6 +168,9 @@ func tinygo_clang_globals_visitor(c, parent C.GoCXCursor, client_data C.CXClient
switch kind {
case C.CXCursor_FunctionDecl:
name := getString(C.tinygo_clang_getCursorSpelling(c))
if _, required := info.missingSymbols[name]; !required {
return C.CXChildVisit_Continue
}
cursorType := C.tinygo_clang_getCursorType(c)
if C.clang_isFunctionTypeVariadic(cursorType) != 0 {
return C.CXChildVisit_Continue // not supported
@ -199,58 +202,23 @@ func tinygo_clang_globals_visitor(c, parent C.GoCXCursor, client_data C.CXClient
}
case C.CXCursor_StructDecl:
typ := C.tinygo_clang_getCursorType(c)
name := getString(C.tinygo_clang_getCursorSpelling(c))
if _, required := info.missingSymbols["struct_"+name]; !required {
return C.CXChildVisit_Continue
}
info.makeASTType(typ, pos)
case C.CXCursor_TypedefDecl:
typedefType := C.tinygo_clang_getCursorType(c)
name := getString(C.clang_getTypedefName(typedefType))
underlyingType := C.tinygo_clang_getTypedefDeclUnderlyingType(c)
expr := info.makeASTType(underlyingType, pos)
if strings.HasPrefix(name, "_Cgo_") {
expr := expr.(*ast.Ident)
typeSize := C.clang_Type_getSizeOf(underlyingType)
switch expr.Name {
case "C.char":
if typeSize != 1 {
// This happens for some very special purpose architectures
// (DSPs etc.) that are not currently targeted.
// https://www.embecosm.com/2017/04/18/non-8-bit-char-support-in-clang-and-llvm/
panic("unknown char width")
}
switch underlyingType.kind {
case C.CXType_Char_S:
expr.Name = "int8"
case C.CXType_Char_U:
expr.Name = "uint8"
}
case "C.schar", "C.short", "C.int", "C.long", "C.longlong":
switch typeSize {
case 1:
expr.Name = "int8"
case 2:
expr.Name = "int16"
case 4:
expr.Name = "int32"
case 8:
expr.Name = "int64"
}
case "C.uchar", "C.ushort", "C.uint", "C.ulong", "C.ulonglong":
switch typeSize {
case 1:
expr.Name = "uint8"
case 2:
expr.Name = "uint16"
case 4:
expr.Name = "uint32"
case 8:
expr.Name = "uint64"
}
}
}
info.typedefs[name] = &typedefInfo{
typeExpr: expr,
if _, required := info.missingSymbols[name]; !required {
return C.CXChildVisit_Continue
}
info.makeASTType(typedefType, pos)
case C.CXCursor_VarDecl:
name := getString(C.tinygo_clang_getCursorSpelling(c))
if _, required := info.missingSymbols[name]; !required {
return C.CXChildVisit_Continue
}
cursorType := C.tinygo_clang_getCursorType(c)
info.globals[name] = &globalInfo{
typeExpr: info.makeASTType(cursorType, pos),
@ -394,10 +362,60 @@ func (info *fileInfo) makeASTType(typ C.CXType, pos token.Pos) ast.Expr {
},
}
case C.CXType_Typedef:
typedefName := getString(C.clang_getTypedefName(typ))
name := getString(C.clang_getTypedefName(typ))
if _, ok := info.typedefs[name]; !ok {
info.typedefs[name] = nil // don't recurse
c := C.tinygo_clang_getTypeDeclaration(typ)
underlyingType := C.tinygo_clang_getTypedefDeclUnderlyingType(c)
expr := info.makeASTType(underlyingType, pos)
if strings.HasPrefix(name, "_Cgo_") {
expr := expr.(*ast.Ident)
typeSize := C.clang_Type_getSizeOf(underlyingType)
switch expr.Name {
case "C.char":
if typeSize != 1 {
// This happens for some very special purpose architectures
// (DSPs etc.) that are not currently targeted.
// https://www.embecosm.com/2017/04/18/non-8-bit-char-support-in-clang-and-llvm/
panic("unknown char width")
}
switch underlyingType.kind {
case C.CXType_Char_S:
expr.Name = "int8"
case C.CXType_Char_U:
expr.Name = "uint8"
}
case "C.schar", "C.short", "C.int", "C.long", "C.longlong":
switch typeSize {
case 1:
expr.Name = "int8"
case 2:
expr.Name = "int16"
case 4:
expr.Name = "int32"
case 8:
expr.Name = "int64"
}
case "C.uchar", "C.ushort", "C.uint", "C.ulong", "C.ulonglong":
switch typeSize {
case 1:
expr.Name = "uint8"
case 2:
expr.Name = "uint16"
case 4:
expr.Name = "uint32"
case 8:
expr.Name = "uint64"
}
}
}
info.typedefs[name] = &typedefInfo{
typeExpr: expr,
}
}
return &ast.Ident{
NamePos: pos,
Name: "C." + typedefName,
Name: "C." + name,
}
case C.CXType_Elaborated:
underlying := C.clang_Type_getNamedType(typ)

6
testdata/cgo/main.h

@ -2,12 +2,17 @@
#include <stdint.h>
typedef short myint;
typedef short unusedTypedef;
int add(int a, int b);
int unusedFunction(void);
typedef int (*binop_t) (int, int);
int doCallback(int a, int b, binop_t cb);
typedef int * intPointer;
void store(int value, int *ptr);
// this signature should not be included by CGo
void unusedFunction2(int x, __builtin_va_list args);
typedef struct collection {
short s;
long l;
@ -37,6 +42,7 @@ void unionSetData(short f0, short f1, short f2);
// test globals and datatypes
extern int global;
extern int unusedGlobal;
extern bool globalBool;
extern bool globalBool2;
extern float globalFloat;

Loading…
Cancel
Save