package cgo

// This file implements a parser of a subset of the C language, just enough to
// parse common #define statements to Go constant expressions.

import (
	"fmt"
	"go/ast"
	"go/scanner"
	"go/token"
	"strings"
)

// parseConst parses the given string as a C constant.
func parseConst(pos token.Pos, fset *token.FileSet, value string) (ast.Expr, *scanner.Error) {
	t := newTokenizer(pos, fset, value)
	expr, err := parseConstExpr(t)
	if t.token != token.EOF {
		return nil, &scanner.Error{
			Pos: t.fset.Position(t.pos),
			Msg: "unexpected token " + t.token.String(),
		}
	}
	return expr, err
}

// parseConstExpr parses a stream of C tokens to a Go expression.
func parseConstExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
	switch t.token {
	case token.LPAREN:
		lparen := t.pos
		t.Next()
		x, err := parseConstExpr(t)
		if err != nil {
			return nil, err
		}
		if t.token != token.RPAREN {
			return nil, unexpectedToken(t, token.RPAREN)
		}
		expr := &ast.ParenExpr{
			Lparen: lparen,
			X:      x,
			Rparen: t.pos,
		}
		t.Next()
		return expr, nil
	case token.INT, token.FLOAT, token.STRING, token.CHAR:
		expr := &ast.BasicLit{
			ValuePos: t.pos,
			Kind:     t.token,
			Value:    t.value,
		}
		t.Next()
		return expr, nil
	case token.IDENT:
		expr := &ast.Ident{
			NamePos: t.pos,
			Name:    "C." + t.value,
		}
		t.Next()
		return expr, nil
	case token.EOF:
		return nil, &scanner.Error{
			Pos: t.fset.Position(t.pos),
			Msg: "empty constant",
		}
	default:
		return nil, &scanner.Error{
			Pos: t.fset.Position(t.pos),
			Msg: fmt.Sprintf("unexpected token %s", t.token),
		}
	}
}

// unexpectedToken returns an error of the form "unexpected token FOO, expected
// BAR".
func unexpectedToken(t *tokenizer, expected token.Token) *scanner.Error {
	return &scanner.Error{
		Pos: t.fset.Position(t.pos),
		Msg: fmt.Sprintf("unexpected token %s, expected %s", t.token, expected),
	}
}

// tokenizer reads C source code and converts it to Go tokens.
type tokenizer struct {
	pos   token.Pos
	fset  *token.FileSet
	token token.Token
	value string
	buf   string
}

// newTokenizer initializes a new tokenizer, positioned at the first token in
// the string.
func newTokenizer(start token.Pos, fset *token.FileSet, buf string) *tokenizer {
	t := &tokenizer{
		pos:   start,
		fset:  fset,
		buf:   buf,
		token: token.ILLEGAL,
	}
	t.Next() // Parse the first token.
	return t
}

// Next consumes the next token in the stream. There is no return value, read
// the next token from the pos, token and value properties.
func (t *tokenizer) Next() {
	t.pos += token.Pos(len(t.value))
	for {
		if len(t.buf) == 0 {
			t.token = token.EOF
			return
		}
		c := t.buf[0]
		switch {
		case c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v':
			// Skip whitespace.
			// Based on this source, not sure whether it represents C whitespace:
			// https://en.cppreference.com/w/cpp/string/byte/isspace
			t.pos++
			t.buf = t.buf[1:]
		case c == '(' || c == ')':
			// Single-character tokens.
			switch c {
			case '(':
				t.token = token.LPAREN
			case ')':
				t.token = token.RPAREN
			}
			t.value = t.buf[:1]
			t.buf = t.buf[1:]
			return
		case c >= '0' && c <= '9':
			// Numeric constant (int, float, etc.).
			// Find the last non-numeric character.
			tokenLen := len(t.buf)
			hasDot := false
			for i, c := range t.buf {
				if c == '.' {
					hasDot = true
				}
				if c >= '0' && c <= '9' || c == '.' || c == '_' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' {
					tokenLen = i + 1
				} else {
					break
				}
			}
			t.value = t.buf[:tokenLen]
			t.buf = t.buf[tokenLen:]
			if hasDot {
				// Integer constants are more complicated than this but this is
				// a close approximation.
				// https://en.cppreference.com/w/cpp/language/integer_literal
				t.token = token.FLOAT
				t.value = strings.TrimRight(t.value, "f")
			} else {
				t.token = token.INT
				t.value = strings.TrimRight(t.value, "uUlL")
			}
			return
		case c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_':
			// Identifier. Find all remaining tokens that are part of this
			// identifier.
			tokenLen := len(t.buf)
			for i, c := range t.buf {
				if c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_' {
					tokenLen = i + 1
				} else {
					break
				}
			}
			t.value = t.buf[:tokenLen]
			t.buf = t.buf[tokenLen:]
			t.token = token.IDENT
			return
		case c == '"':
			// String constant. Find the first '"' character that is not
			// preceded by a backslash.
			escape := false
			tokenLen := len(t.buf)
			for i, c := range t.buf {
				if i != 0 && c == '"' && !escape {
					tokenLen = i + 1
					break
				}
				if !escape {
					escape = c == '\\'
				}
			}
			t.token = token.STRING
			t.value = t.buf[:tokenLen]
			t.buf = t.buf[tokenLen:]
			return
		case c == '\'':
			// Char (rune) constant. Find the first '\'' character that is not
			// preceded by a backslash.
			escape := false
			tokenLen := len(t.buf)
			for i, c := range t.buf {
				if i != 0 && c == '\'' && !escape {
					tokenLen = i + 1
					break
				}
				if !escape {
					escape = c == '\\'
				}
			}
			t.token = token.CHAR
			t.value = t.buf[:tokenLen]
			t.buf = t.buf[tokenLen:]
			return
		default:
			t.token = token.ILLEGAL
			return
		}
	}
}