tinygo/cgo/const.go

package cgo

// This file implements a parser of a subset of the C language, just enough to
// parse common #define statements to Go constant expressions.

import (
	"fmt"
	"go/ast"
	"go/scanner"
	"go/token"
	"strings"
)

var (
	prefixParseFns map[token.Token]func(*tokenizer) (ast.Expr, *scanner.Error)
	precedences    = map[token.Token]int{
		token.ADD: precedenceAdd,
		token.SUB: precedenceAdd,
		token.MUL: precedenceMul,
		token.QUO: precedenceMul,
		token.REM: precedenceMul,
	}
)

const (
	precedenceLowest = iota + 1
	precedenceAdd
	precedenceMul
	precedencePrefix
)

func init() {
	// This must be done in an init function to avoid an initialization order
	// failure.
	prefixParseFns = map[token.Token]func(*tokenizer) (ast.Expr, *scanner.Error){
		token.IDENT:  parseIdent,
		token.INT:    parseBasicLit,
		token.FLOAT:  parseBasicLit,
		token.STRING: parseBasicLit,
		token.CHAR:   parseBasicLit,
		token.LPAREN: parseParenExpr,
	}
}

// parseConst parses the given string as a C constant.
func parseConst(pos token.Pos, fset *token.FileSet, value string) (ast.Expr, *scanner.Error) {
	t := newTokenizer(pos, fset, value)
	expr, err := parseConstExpr(t, precedenceLowest)
	t.Next()
	if t.curToken != token.EOF {
		return nil, &scanner.Error{
			Pos: t.fset.Position(t.curPos),
			Msg: "unexpected token " + t.curToken.String() + ", expected end of expression",
		}
	}
	return expr, err
}

// parseConstExpr parses a stream of C tokens to a Go expression.
func parseConstExpr(t *tokenizer, precedence int) (ast.Expr, *scanner.Error) {
	if t.curToken == token.EOF {
		return nil, &scanner.Error{
			Pos: t.fset.Position(t.curPos),
			Msg: "empty constant",
		}
	}
	prefix := prefixParseFns[t.curToken]
	if prefix == nil {
		return nil, &scanner.Error{
			Pos: t.fset.Position(t.curPos),
			Msg: fmt.Sprintf("unexpected token %s", t.curToken),
		}
	}
	leftExpr, err := prefix(t)

	for t.peekToken != token.EOF && precedence < precedences[t.peekToken] {
		switch t.peekToken {
		case token.ADD, token.SUB, token.MUL, token.QUO, token.REM:
			t.Next()
			leftExpr, err = parseBinaryExpr(t, leftExpr)
		}
	}

	return leftExpr, err
}

func parseIdent(t *tokenizer) (ast.Expr, *scanner.Error) {
	return &ast.Ident{
		NamePos: t.curPos,
		Name:    "C." + t.curValue,
	}, nil
}

func parseBasicLit(t *tokenizer) (ast.Expr, *scanner.Error) {
	return &ast.BasicLit{
		ValuePos: t.curPos,
		Kind:     t.curToken,
		Value:    t.curValue,
	}, nil
}

func parseParenExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
	lparen := t.curPos
	t.Next()
	x, err := parseConstExpr(t, precedenceLowest)
	if err != nil {
		return nil, err
	}
	t.Next()
	if t.curToken != token.RPAREN {
		return nil, unexpectedToken(t, token.RPAREN)
	}
	expr := &ast.ParenExpr{
		Lparen: lparen,
		X:      x,
		Rparen: t.curPos,
	}
	return expr, nil
}

func parseBinaryExpr(t *tokenizer, left ast.Expr) (ast.Expr, *scanner.Error) {
	expression := &ast.BinaryExpr{
		X:     left,
		Op:    t.curToken,
		OpPos: t.curPos,
	}
	precedence := precedences[t.curToken]
	t.Next()
	right, err := parseConstExpr(t, precedence)
	expression.Y = right
	return expression, err
}

// unexpectedToken returns an error of the form "unexpected token FOO, expected
// BAR".
func unexpectedToken(t *tokenizer, expected token.Token) *scanner.Error {
	return &scanner.Error{
		Pos: t.fset.Position(t.curPos),
		Msg: fmt.Sprintf("unexpected token %s, expected %s", t.curToken, expected),
	}
}

// tokenizer reads C source code and converts it to Go tokens.
type tokenizer struct {
	curPos, peekPos     token.Pos
	fset                *token.FileSet
	curToken, peekToken token.Token
	curValue, peekValue string
	buf                 string
}

// newTokenizer initializes a new tokenizer, positioned at the first token in
// the string.
func newTokenizer(start token.Pos, fset *token.FileSet, buf string) *tokenizer {
	t := &tokenizer{
		peekPos:   start,
		fset:      fset,
		buf:       buf,
		peekToken: token.ILLEGAL,
	}
	// Parse the first two tokens (cur and peek).
	t.Next()
	t.Next()
	return t
}

// Next consumes the next token in the stream. There is no return value, read
// the next token from the pos, token and value properties.
func (t *tokenizer) Next() {
	// The previous peek is now the current token.
	t.curPos = t.peekPos
	t.curToken = t.peekToken
	t.curValue = t.peekValue

	// Parse the next peek token.
	t.peekPos += token.Pos(len(t.curValue))
	for {
		if len(t.buf) == 0 {
			t.peekToken = token.EOF
			return
		}
		c := t.buf[0]
		switch {
		case c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v':
			// Skip whitespace.
			// Based on this source, not sure whether it represents C whitespace:
			// https://en.cppreference.com/w/cpp/string/byte/isspace
			t.peekPos++
			t.buf = t.buf[1:]
		case c == '(' || c == ')' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%':
			// Single-character tokens.
			// TODO: ++ (increment) and -- (decrement) operators.
			switch c {
			case '(':
				t.peekToken = token.LPAREN
			case ')':
				t.peekToken = token.RPAREN
			case '+':
				t.peekToken = token.ADD
			case '-':
				t.peekToken = token.SUB
			case '*':
				t.peekToken = token.MUL
			case '/':
				t.peekToken = token.QUO
			case '%':
				t.peekToken = token.REM
			}
			t.peekValue = t.buf[:1]
			t.buf = t.buf[1:]
			return
		case c >= '0' && c <= '9':
			// Numeric constant (int, float, etc.).
			// Find the last non-numeric character.
			tokenLen := len(t.buf)
			hasDot := false
			for i, c := range t.buf {
				if c == '.' {
					hasDot = true
				}
				if c >= '0' && c <= '9' || c == '.' || c == '_' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' {
					tokenLen = i + 1
				} else {
					break
				}
			}
			t.peekValue = t.buf[:tokenLen]
			t.buf = t.buf[tokenLen:]
			if hasDot {
				// Integer constants are more complicated than this but this is
				// a close approximation.
				// https://en.cppreference.com/w/cpp/language/integer_literal
				t.peekToken = token.FLOAT
				t.peekValue = strings.TrimRight(t.peekValue, "f")
			} else {
				t.peekToken = token.INT
				t.peekValue = strings.TrimRight(t.peekValue, "uUlL")
			}
			return
		case c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_':
			// Identifier. Find all remaining tokens that are part of this
			// identifier.
			tokenLen := len(t.buf)
			for i, c := range t.buf {
				if c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_' {
					tokenLen = i + 1
				} else {
					break
				}
			}
			t.peekValue = t.buf[:tokenLen]
			t.buf = t.buf[tokenLen:]
			t.peekToken = token.IDENT
			return
		case c == '"':
			// String constant. Find the first '"' character that is not
			// preceded by a backslash.
			escape := false
			tokenLen := len(t.buf)
			for i, c := range t.buf {
				if i != 0 && c == '"' && !escape {
					tokenLen = i + 1
					break
				}
				if !escape {
					escape = c == '\\'
				}
			}
			t.peekToken = token.STRING
			t.peekValue = t.buf[:tokenLen]
			t.buf = t.buf[tokenLen:]
			return
		case c == '\'':
			// Char (rune) constant. Find the first '\'' character that is not
			// preceded by a backslash.
			escape := false
			tokenLen := len(t.buf)
			for i, c := range t.buf {
				if i != 0 && c == '\'' && !escape {
					tokenLen = i + 1
					break
				}
				if !escape {
					escape = c == '\\'
				}
			}
			t.peekToken = token.CHAR
			t.peekValue = t.buf[:tokenLen]
			t.buf = t.buf[tokenLen:]
			return
		default:
			t.peekToken = token.ILLEGAL
			return
		}
	}
}