You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

218 lines
5.2 KiB

package cgo
// This file implements a parser of a subset of the C language, just enough to
// parse common #define statements to Go constant expressions.
import (
"fmt"
"go/ast"
"go/scanner"
"go/token"
"strings"
)
// parseConst parses the given string as a C constant.
func parseConst(pos token.Pos, fset *token.FileSet, value string) (ast.Expr, *scanner.Error) {
t := newTokenizer(pos, fset, value)
expr, err := parseConstExpr(t)
if t.token != token.EOF {
return nil, &scanner.Error{
Pos: t.fset.Position(t.pos),
Msg: "unexpected token " + t.token.String(),
}
}
return expr, err
}
// parseConstExpr parses a stream of C tokens to a Go expression.
func parseConstExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
switch t.token {
case token.LPAREN:
lparen := t.pos
t.Next()
x, err := parseConstExpr(t)
if err != nil {
return nil, err
}
if t.token != token.RPAREN {
return nil, unexpectedToken(t, token.RPAREN)
}
expr := &ast.ParenExpr{
Lparen: lparen,
X: x,
Rparen: t.pos,
}
t.Next()
return expr, nil
case token.INT, token.FLOAT, token.STRING, token.CHAR:
expr := &ast.BasicLit{
ValuePos: t.pos,
Kind: t.token,
Value: t.value,
}
t.Next()
return expr, nil
case token.IDENT:
expr := &ast.Ident{
NamePos: t.pos,
Name: "C." + t.value,
}
t.Next()
return expr, nil
case token.EOF:
return nil, &scanner.Error{
Pos: t.fset.Position(t.pos),
Msg: "empty constant",
}
default:
return nil, &scanner.Error{
Pos: t.fset.Position(t.pos),
Msg: fmt.Sprintf("unexpected token %s", t.token),
}
}
}
// unexpectedToken returns an error of the form "unexpected token FOO, expected
// BAR".
func unexpectedToken(t *tokenizer, expected token.Token) *scanner.Error {
return &scanner.Error{
Pos: t.fset.Position(t.pos),
Msg: fmt.Sprintf("unexpected token %s, expected %s", t.token, expected),
}
}
// tokenizer reads C source code and converts it to Go tokens.
type tokenizer struct {
pos token.Pos
fset *token.FileSet
token token.Token
value string
buf string
}
// newTokenizer initializes a new tokenizer, positioned at the first token in
// the string.
func newTokenizer(start token.Pos, fset *token.FileSet, buf string) *tokenizer {
t := &tokenizer{
pos: start,
fset: fset,
buf: buf,
token: token.ILLEGAL,
}
t.Next() // Parse the first token.
return t
}
// Next consumes the next token in the stream. There is no return value, read
// the next token from the pos, token and value properties.
func (t *tokenizer) Next() {
t.pos += token.Pos(len(t.value))
for {
if len(t.buf) == 0 {
t.token = token.EOF
return
}
c := t.buf[0]
switch {
case c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v':
// Skip whitespace.
// Based on this source, not sure whether it represents C whitespace:
// https://en.cppreference.com/w/cpp/string/byte/isspace
t.pos++
t.buf = t.buf[1:]
case c == '(' || c == ')':
// Single-character tokens.
switch c {
case '(':
t.token = token.LPAREN
case ')':
t.token = token.RPAREN
}
t.value = t.buf[:1]
t.buf = t.buf[1:]
return
case c >= '0' && c <= '9':
// Numeric constant (int, float, etc.).
// Find the last non-numeric character.
tokenLen := len(t.buf)
hasDot := false
for i, c := range t.buf {
if c == '.' {
hasDot = true
}
if c >= '0' && c <= '9' || c == '.' || c == '_' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' {
tokenLen = i + 1
} else {
break
}
}
t.value = t.buf[:tokenLen]
t.buf = t.buf[tokenLen:]
if hasDot {
// Integer constants are more complicated than this but this is
// a close approximation.
// https://en.cppreference.com/w/cpp/language/integer_literal
t.token = token.FLOAT
t.value = strings.TrimRight(t.value, "f")
} else {
t.token = token.INT
t.value = strings.TrimRight(t.value, "uUlL")
}
return
case c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_':
// Identifier. Find all remaining tokens that are part of this
// identifier.
tokenLen := len(t.buf)
for i, c := range t.buf {
if c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_' {
tokenLen = i + 1
} else {
break
}
}
t.value = t.buf[:tokenLen]
t.buf = t.buf[tokenLen:]
t.token = token.IDENT
return
case c == '"':
// String constant. Find the first '"' character that is not
// preceded by a backslash.
escape := false
tokenLen := len(t.buf)
for i, c := range t.buf {
if i != 0 && c == '"' && !escape {
tokenLen = i + 1
break
}
if !escape {
escape = c == '\\'
}
}
t.token = token.STRING
t.value = t.buf[:tokenLen]
t.buf = t.buf[tokenLen:]
return
case c == '\'':
// Char (rune) constant. Find the first '\'' character that is not
// preceded by a backslash.
escape := false
tokenLen := len(t.buf)
for i, c := range t.buf {
if i != 0 && c == '\'' && !escape {
tokenLen = i + 1
break
}
if !escape {
escape = c == '\\'
}
}
t.token = token.CHAR
t.value = t.buf[:tokenLen]
t.buf = t.buf[tokenLen:]
return
default:
t.token = token.ILLEGAL
return
}
}
}