Browse Source

ISLE: lexer simplifications (#9108)

* ISLE: reduce allocations when lexing integers

Instead of creating a temporary `Vec<u8>`, use a slice of the original
underlying `buf`, and only allocate a temporary `String` if it contains
an `_`.

Copyright (c) 2024, Arm Limited.

Signed-off-by: Karl Meakin <karl.meakin@arm.com>

* ISLE: don't `vec![]` macro in lexer tests

`Vec` can be compared against arrays, since both deref to slices.

Copyright (c) 2024, Arm Limited.

Signed-off-by: Karl Meakin <karlwfmeakin@gmail.com>

* ISLE: create `Files`

Centralize all file related arenas in `Files` struct.

Copyright (c) 2024, Arm Limited.

Signed-off-by: Karl Meakin <karl.meakin@arm.com>

* ISLE: dont track line/col in `Pos`

They are already tracked in `Files`, so no need to track them in `Pos`
as well. This lets us simply the implementation of `Lexer::advance_pos`
a bit.

Copyright (c) 2024, Arm Limited.

Signed-off-by: Karl Meakin <karl.meakin@arm.com>

* ISLE: don't pass `Files` into every pass

`Files` was being threaded through a lot of passes where it wasn't
needed. It is only needed for reporting errors in `compile.rs` and for
reporting line numbers when printing in `codegen.rs`.

Copyright (c) 2024, Arm Limited.

Signed-off-by: Karl Meakin <karl.meakin@arm.com>

* ISLE: store `&str` in `Lexer`

Store the text being lexed as `&str`, rather than `&[u8]`, so that
substrings don't need to be rechecked for UTF-8 validity when lexing
identifiers or integers.

Copyright (c) 2024, Arm Limited.

Signed-off-by: Karl Meakin <karl.meakin@arm.com>

* ISLE: add `peek_byte` helper for lexer

Copyright (c) 2024, Arm Limited.

Signed-off-by: Karl Meakin <karl.meakin@arm.com>

* ISLE: tests for lexing integers

Copyright (c) 2024, Arm Limited.

Signed-off-by: Karl Meakin <karl.meakin@arm.com>

* ISLE: dont parse integers twice

Instead of trying to parse an integer as an `i128`, and then as an
`u128` if that fails, parse it only as a `u128` and then check for
`i128::MIN`.

Copyright (c) 2024, Arm Limited.

Signed-off-by: Karl Meakin <karl.meakin@arm.com>

---------

Signed-off-by: Karl Meakin <karl.meakin@arm.com>
Signed-off-by: Karl Meakin <karlwfmeakin@gmail.com>
pull/9085/head
Karl Meakin 3 months ago
committed by GitHub
parent
commit
e0a907a94a
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 14
      cranelift/isle/fuzz/fuzz_targets/compile.rs
  2. 9
      cranelift/isle/isle/src/ast.rs
  3. 16
      cranelift/isle/isle/src/codegen.rs
  4. 59
      cranelift/isle/isle/src/compile.rs
  5. 35
      cranelift/isle/isle/src/error.rs
  6. 133
      cranelift/isle/isle/src/files.rs
  7. 292
      cranelift/isle/isle/src/lexer.rs
  8. 1
      cranelift/isle/isle/src/lib.rs
  9. 15
      cranelift/isle/isle/src/overlap.rs
  10. 26
      cranelift/isle/isle/src/parser.rs
  11. 71
      cranelift/isle/isle/src/sema.rs

14
cranelift/isle/fuzz/fuzz_targets/compile.rs

@ -1,11 +1,14 @@
#![no_main]
use std::sync::Arc;
use cranelift_isle::files::Files;
use libfuzzer_sys::fuzz_target;
fuzz_target!(|s: &str| {
fuzz_target!(|src: &str| {
let _ = env_logger::try_init();
let lexer = cranelift_isle::lexer::Lexer::from_str(s, "fuzz-input.isle");
let lexer = cranelift_isle::lexer::Lexer::new(0, src);
log::debug!("lexer = {:?}", lexer);
let lexer = match lexer {
Ok(l) => l,
@ -19,7 +22,12 @@ fuzz_target!(|s: &str| {
Err(_) => return,
};
let code = cranelift_isle::compile::compile(&defs, &Default::default());
let files = Arc::new(Files::from_names_and_contents([(
"fuzz-input.isle".to_string(),
src.to_string(),
)]));
let code = cranelift_isle::compile::compile(files, &defs, &Default::default());
log::debug!("code = {:?}", code);
let code = match code {
Ok(c) => c,

9
cranelift/isle/isle/src/ast.rs

@ -4,15 +4,6 @@
use crate::lexer::Pos;
use crate::log;
use std::sync::Arc;
/// The parsed form of an ISLE file.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Defs {
pub defs: Vec<Def>,
pub filenames: Vec<Arc<str>>,
pub file_texts: Vec<Arc<str>>,
}
/// One toplevel form in an ISLE file.
#[derive(Clone, PartialEq, Eq, Debug)]

16
cranelift/isle/isle/src/codegen.rs

@ -1,11 +1,13 @@
//! Generate Rust code from a series of Sequences.
use crate::files::Files;
use crate::sema::{ExternalSig, ReturnKind, Sym, Term, TermEnv, TermId, Type, TypeEnv, TypeId};
use crate::serialize::{Block, ControlFlow, EvalStep, MatchArm};
use crate::stablemapset::StableSet;
use crate::trie_again::{Binding, BindingId, Constraint, RuleSet};
use std::fmt::Write;
use std::slice::Iter;
use std::sync::Arc;
/// Options for code generation.
#[derive(Clone, Debug, Default)]
@ -17,16 +19,18 @@ pub struct CodegenOptions {
/// Emit Rust source code for the given type and term environments.
pub fn codegen(
files: Arc<Files>,
typeenv: &TypeEnv,
termenv: &TermEnv,
terms: &[(TermId, RuleSet)],
options: &CodegenOptions,
) -> String {
Codegen::compile(typeenv, termenv, terms).generate_rust(options)
Codegen::compile(files, typeenv, termenv, terms).generate_rust(options)
}
#[derive(Clone, Debug)]
struct Codegen<'a> {
files: Arc<Files>,
typeenv: &'a TypeEnv,
termenv: &'a TermEnv,
terms: &'a [(TermId, RuleSet)],
@ -91,11 +95,13 @@ impl<'a, W: Write> BodyContext<'a, W> {
impl<'a> Codegen<'a> {
fn compile(
files: Arc<Files>,
typeenv: &'a TypeEnv,
termenv: &'a TermEnv,
terms: &'a [(TermId, RuleSet)],
) -> Codegen<'a> {
Codegen {
files,
typeenv,
termenv,
terms,
@ -121,7 +127,7 @@ impl<'a> Codegen<'a> {
"// Generated automatically from the instruction-selection DSL code in:",
)
.unwrap();
for file in &self.typeenv.filenames {
for file in &self.files.file_names {
writeln!(code, "// - {file}").unwrap();
}
@ -335,7 +341,7 @@ impl<L: Length, C> Length for ContextIterWrapper<L, C> {{
code,
"\n/// Internal type {}: defined at {}.",
name,
pos.pretty_print_line(&self.typeenv.filenames[..])
pos.pretty_print_line(&self.files)
)
.unwrap();
@ -454,7 +460,7 @@ impl<L: Length, C> Length for ContextIterWrapper<L, C> {{
term_name,
termdata
.decl_pos
.pretty_print_line(&self.typeenv.filenames[..])
.pretty_print_line(&self.files)
),
}
};
@ -640,7 +646,7 @@ impl<L: Length, C> Length for ContextIterWrapper<L, C> {{
ctx.out,
"{}// Rule at {}.",
&ctx.indent,
pos.pretty_print_line(&self.typeenv.filenames)
pos.pretty_print_line(&self.files)
)?;
write!(ctx.out, "{}", &ctx.indent)?;
match ret_kind {

59
cranelift/isle/isle/src/compile.rs

@ -1,16 +1,34 @@
//! Compilation process, from AST to Sema to Sequences of Insts.
use std::path::Path;
use std::sync::Arc;
use crate::error::Errors;
use crate::{ast, codegen, sema};
use crate::files::Files;
use crate::{ast, codegen, overlap, sema};
/// Compile the given AST definitions into Rust source code.
pub fn compile(defs: &ast::Defs, options: &codegen::CodegenOptions) -> Result<String, Errors> {
let mut typeenv = sema::TypeEnv::from_ast(defs)?;
let termenv = sema::TermEnv::from_ast(&mut typeenv, defs)?;
let terms = crate::overlap::check(&typeenv, &termenv)?;
Ok(codegen::codegen(&typeenv, &termenv, &terms, options))
pub fn compile(
files: Arc<Files>,
defs: &[ast::Def],
options: &codegen::CodegenOptions,
) -> Result<String, Errors> {
let mut type_env = match sema::TypeEnv::from_ast(defs) {
Ok(type_env) => type_env,
Err(errs) => return Err(Errors::new(errs, files)),
};
let term_env = match sema::TermEnv::from_ast(&mut type_env, defs) {
Ok(term_env) => term_env,
Err(errs) => return Err(Errors::new(errs, files)),
};
let terms = match overlap::check(&term_env) {
Ok(terms) => terms,
Err(errs) => return Err(Errors::new(errs, files)),
};
Ok(codegen::codegen(
files, &type_env, &term_env, &terms, options,
))
}
/// Compile the given files into Rust source code.
@ -18,7 +36,30 @@ pub fn from_files<P: AsRef<Path>>(
inputs: impl IntoIterator<Item = P>,
options: &codegen::CodegenOptions,
) -> Result<String, Errors> {
let lexer = crate::lexer::Lexer::from_files(inputs)?;
let defs = crate::parser::parse(lexer)?;
compile(&defs, options)
let files = match Files::from_paths(inputs) {
Ok(files) => files,
Err((path, err)) => {
return Err(Errors::from_io(
err,
format!("cannot read file {}", path.display()),
))
}
};
let files = Arc::new(files);
let mut defs = Vec::new();
for (file, src) in files.file_texts.iter().enumerate() {
let lexer = match crate::lexer::Lexer::new(file, src) {
Ok(lexer) => lexer,
Err(err) => return Err(Errors::new(vec![err], files)),
};
match crate::parser::parse(lexer) {
Ok(mut ds) => defs.append(&mut ds),
Err(err) => return Err(Errors::new(vec![err], files)),
}
}
compile(files, &defs, options)
}

35
cranelift/isle/isle/src/error.rs

@ -2,14 +2,13 @@
use std::sync::Arc;
use crate::lexer::Pos;
use crate::{files::Files, lexer::Pos};
/// A collection of errors from attempting to compile some ISLE source files.
pub struct Errors {
/// The individual errors.
pub errors: Vec<Error>,
pub(crate) filenames: Vec<Arc<str>>,
pub(crate) file_texts: Vec<Arc<str>>,
pub(crate) files: Arc<Files>,
}
impl std::fmt::Debug for Errors {
@ -139,6 +138,11 @@ pub enum Error {
}
impl Errors {
/// Create new Errors
pub fn new(errors: Vec<Error>, files: Arc<Files>) -> Self {
Self { errors, files }
}
/// Create `isle::Errors` from the given I/O error and context.
pub fn from_io(error: std::io::Error, context: impl Into<String>) -> Self {
Errors {
@ -146,8 +150,7 @@ impl Errors {
error,
context: context.into(),
}],
filenames: Vec::new(),
file_texts: Vec::new(),
files: Arc::new(Files::default()),
}
}
@ -161,7 +164,12 @@ impl Errors {
let w = termcolor::BufferWriter::stderr(termcolor::ColorChoice::Auto);
let mut b = w.buffer();
let mut files = codespan_reporting::files::SimpleFiles::new();
for (name, source) in self.filenames.iter().zip(self.file_texts.iter()) {
for (name, source) in self
.files
.file_names
.iter()
.zip(self.files.file_texts.iter())
{
files.add(name, source);
}
for diagnostic in diagnostics {
@ -179,21 +187,16 @@ impl Errors {
f: &mut std::fmt::Formatter,
diagnostics: Vec<Diagnostic<usize>>,
) -> std::fmt::Result {
let line_ends: Vec<Vec<_>> = self
.file_texts
.iter()
.map(|text| text.match_indices('\n').map(|(i, _)| i + 1).collect())
.collect();
let pos = |file_id: usize, offset| {
let ends = &line_ends[file_id];
let line0 = ends.partition_point(|&end| end <= offset);
let text = &self.file_texts[file_id];
let ends = self.files.file_line_map(file_id).unwrap();
let line0 = ends.line(offset);
let text = &self.files.file_texts[file_id];
let start = line0.checked_sub(1).map_or(0, |prev| ends[prev]);
let end = ends.get(line0).copied().unwrap_or(text.len());
let col = offset - start + 1;
format!(
"{}:{}:{}: {}",
self.filenames[file_id],
self.files.file_names[file_id],
line0 + 1,
col,
&text[start..end]
@ -243,8 +246,6 @@ impl Span {
to: Pos {
file: pos.file,
offset: pos.offset + 1,
line: pos.line,
col: pos.col + 1,
},
}
}

133
cranelift/isle/isle/src/files.rs

@ -0,0 +1,133 @@
#![allow(missing_docs)]
use std::ops::Index;
use std::path::{Path, PathBuf};
#[derive(Default, Clone, PartialEq, Eq, Debug)]
pub struct Files {
/// Arena of filenames from the input source.
///
/// Indexed via `Pos::file`.
pub file_names: Vec<String>,
/// Arena of file source texts.
///
/// Indexed via `Pos::file`.
pub file_texts: Vec<String>,
/// Arena of file line maps.
///
/// Indexed via `Pos::file`.
pub file_line_maps: Vec<LineMap>,
}
#[derive(Default, Clone, PartialEq, Eq, Debug)]
pub struct LineMap {
/// Mapping from line number to starting byte position.
line_ends: Vec<usize>,
}
impl Index<usize> for LineMap {
type Output = usize;
fn index(&self, index: usize) -> &Self::Output {
&self.line_ends[index]
}
}
impl LineMap {
pub fn from_str(text: &str) -> Self {
let line_ends = text.match_indices('\n').map(|(i, _)| i + 1).collect();
Self { line_ends }
}
/// Get the line on which `pos` occurs
pub fn line(&self, pos: usize) -> usize {
self.line_ends.partition_point(|&end| end <= pos)
}
/// Get the starting byte position of `line`.
pub fn get(&self, line: usize) -> Option<&usize> {
self.line_ends.get(line)
}
}
impl Files {
pub fn from_paths<P: AsRef<Path>>(
paths: impl IntoIterator<Item = P>,
) -> Result<Self, (PathBuf, std::io::Error)> {
let mut file_names = Vec::new();
let mut file_texts = Vec::new();
let mut file_line_maps = Vec::new();
for path in paths {
let path = path.as_ref();
let contents =
std::fs::read_to_string(path).map_err(|err| (path.to_path_buf(), err))?;
let name = path.display().to_string();
file_line_maps.push(LineMap::from_str(&contents));
file_names.push(name);
file_texts.push(contents);
}
Ok(Self {
file_names,
file_texts,
file_line_maps,
})
}
pub fn from_names_and_contents(files: impl IntoIterator<Item = (String, String)>) -> Self {
let mut file_names = Vec::new();
let mut file_texts = Vec::new();
let mut file_line_maps = Vec::new();
for (name, contents) in files {
file_line_maps.push(LineMap::from_str(&contents));
file_names.push(name);
file_texts.push(contents);
}
Self {
file_names,
file_texts,
file_line_maps,
}
}
pub fn file_name(&self, file: usize) -> Option<&str> {
self.file_names.get(file).map(|x| x.as_str())
}
pub fn file_text(&self, file: usize) -> Option<&str> {
self.file_texts.get(file).map(|x| x.as_str())
}
pub fn file_line_map(&self, file: usize) -> Option<&LineMap> {
self.file_line_maps.get(file)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn line_map() {
let line_map = LineMap::from_str("");
assert_eq!(line_map.line_ends, &[]);
assert_eq!(line_map.line(0), 0);
assert_eq!(line_map.line(100), 0);
let line_map = LineMap::from_str("line 0");
assert_eq!(line_map.line_ends, &[]);
assert_eq!(line_map.line(0), 0);
assert_eq!(line_map.line(100), 0);
let line_map = LineMap::from_str("line 0\nline 1");
assert_eq!(line_map.line_ends, &[7]);
assert_eq!(line_map.line(0), 0);
assert_eq!(line_map.line(100), 1);
}
}

292
cranelift/isle/isle/src/lexer.rs

@ -1,29 +1,18 @@
//! Lexer for the ISLE language.
use crate::error::{Error, Errors, Span};
use std::borrow::Cow;
use std::path::{Path, PathBuf};
use std::sync::Arc;
type Result<T> = std::result::Result<T, Errors>;
use crate::error::{Error, Span};
use crate::files::Files;
type Result<T> = std::result::Result<T, Error>;
/// The lexer.
///
/// Breaks source text up into a sequence of tokens (with source positions).
#[derive(Clone, Debug)]
pub struct Lexer<'a> {
/// Arena of filenames from the input source.
///
/// Indexed via `Pos::file`.
pub filenames: Vec<Arc<str>>,
/// Arena of file source texts.
///
/// Indexed via `Pos::file`.
pub file_texts: Vec<Arc<str>>,
file_starts: Vec<usize>,
buf: Cow<'a, [u8]>,
pub struct Lexer<'src> {
src: &'src str,
pos: Pos,
lookahead: Option<(Pos, Token)>,
}
@ -38,16 +27,21 @@ pub struct Pos {
pub file: usize,
/// This source position's byte offset in the file.
pub offset: usize,
/// This source position's line number in the file.
pub line: usize,
/// This source position's column number in the file.
pub col: usize,
}
impl Pos {
/// Create a new `Pos`.
pub fn new(file: usize, offset: usize) -> Self {
Self { file, offset }
}
/// Print this source position as `file.isle line 12`.
pub fn pretty_print_line(&self, filenames: &[Arc<str>]) -> String {
format!("{} line {}", filenames[self.file], self.line)
pub fn pretty_print_line(&self, files: &Files) -> String {
format!(
"{} line {}",
files.file_name(self.file).unwrap(),
files.file_line_map(self.file).unwrap().line(self.offset)
)
}
}
@ -66,69 +60,12 @@ pub enum Token {
At,
}
impl<'a> Lexer<'a> {
/// Create a new lexer for the given source contents and filename.
pub fn from_str(s: &'a str, filename: &'a str) -> Result<Lexer<'a>> {
let mut l = Lexer {
filenames: vec![filename.into()],
file_texts: vec![s.into()],
file_starts: vec![0],
buf: Cow::Borrowed(s.as_bytes()),
pos: Pos {
file: 0,
offset: 0,
line: 1,
col: 0,
},
lookahead: None,
};
l.reload()?;
Ok(l)
}
/// Create a new lexer from the given files.
pub fn from_files<P>(file_paths: impl IntoIterator<Item = P>) -> Result<Lexer<'a>>
where
P: AsRef<Path>,
{
let mut files = vec![];
for f in file_paths.into_iter() {
let f = f.as_ref().to_path_buf();
let s = std::fs::read_to_string(f.as_path())
.map_err(|e| Errors::from_io(e, format!("failed to read file: {}", f.display())))?;
files.push((f, s));
}
Self::from_file_contents(files)
}
/// Create a new lexer from the given files and contents.
pub fn from_file_contents(files: Vec<(PathBuf, String)>) -> Result<Lexer<'a>> {
let mut filenames = Vec::<Arc<str>>::new();
let mut file_texts = Vec::<Arc<str>>::new();
for (f, content) in files.iter() {
filenames.push(f.display().to_string().into());
file_texts.push(content.as_str().into());
}
assert!(!filenames.is_empty());
let mut file_starts = vec![];
let mut buf = String::new();
for text in &file_texts {
file_starts.push(buf.len());
buf += text;
buf += "\n";
}
impl<'src> Lexer<'src> {
/// Create a new lexer for the given source contents
pub fn new(file: usize, src: &'src str) -> Result<Lexer<'src>> {
let mut l = Lexer {
filenames,
file_texts,
buf: Cow::Owned(buf.into_bytes()),
file_starts,
pos: Pos {
file: 0,
offset: 0,
line: 1,
col: 0,
},
src,
pos: Pos::new(file, 0),
lookahead: None,
};
l.reload()?;
@ -137,39 +74,17 @@ impl<'a> Lexer<'a> {
/// Get the lexer's current source position.
pub fn pos(&self) -> Pos {
Pos {
file: self.pos.file,
offset: self.pos.offset - self.file_starts[self.pos.file],
line: self.pos.line,
col: self.pos.col,
}
self.pos
}
fn advance_pos(&mut self) {
self.pos.col += 1;
if self.buf[self.pos.offset] == b'\n' {
self.pos.line += 1;
self.pos.col = 0;
}
self.pos.offset += 1;
if self.pos.file + 1 < self.file_starts.len() {
let next_start = self.file_starts[self.pos.file + 1];
if self.pos.offset >= next_start {
assert!(self.pos.offset == next_start);
self.pos.file += 1;
self.pos.line = 1;
}
}
}
fn error(&self, pos: Pos, msg: impl Into<String>) -> Errors {
Errors {
errors: vec![Error::ParseError {
msg: msg.into(),
span: Span::new_single(pos),
}],
filenames: self.filenames.clone(),
file_texts: self.file_texts.clone(),
fn error(&self, pos: Pos, msg: impl Into<String>) -> Error {
Error::ParseError {
msg: msg.into(),
span: Span::new_single(pos),
}
}
@ -190,26 +105,26 @@ impl<'a> Lexer<'a> {
}
// Skip any whitespace and any comments.
while self.pos.offset < self.buf.len() {
if self.buf[self.pos.offset].is_ascii_whitespace() {
self.advance_pos();
continue;
}
if self.buf[self.pos.offset] == b';' {
while self.pos.offset < self.buf.len() && self.buf[self.pos.offset] != b'\n' {
self.advance_pos();
while let Some(c) = self.peek_byte() {
match c {
c if c.is_ascii_whitespace() => self.advance_pos(),
b';' => {
while let Some(c) = self.peek_byte() {
match c {
b'\n' => break,
_ => self.advance_pos(),
}
}
}
continue;
_ => break,
}
break;
}
if self.pos.offset == self.buf.len() {
let Some(c) = self.peek_byte() else {
return Ok(None);
}
};
let char_pos = self.pos();
match self.buf[self.pos.offset] {
match c {
b'(' => {
self.advance_pos();
Ok(Some((char_pos, Token::LParen)))
@ -225,44 +140,43 @@ impl<'a> Lexer<'a> {
c if is_sym_first_char(c) => {
let start = self.pos.offset;
let start_pos = self.pos();
while self.pos.offset < self.buf.len()
&& is_sym_other_char(self.buf[self.pos.offset])
{
self.advance_pos();
while let Some(c) = self.peek_byte() {
match c {
c if is_sym_other_char(c) => self.advance_pos(),
_ => break,
}
}
let end = self.pos.offset;
let s = std::str::from_utf8(&self.buf[start..end])
.expect("Only ASCII characters, should be UTF-8");
let s = &self.src[start..end];
debug_assert!(!s.is_empty());
Ok(Some((start_pos, Token::Symbol(s.to_string()))))
}
c @ (b'0'..=b'9' | b'-') => {
let start_pos = self.pos();
let neg = if c == b'-' {
let mut neg = false;
if c == b'-' {
self.advance_pos();
true
} else {
false
};
neg = true;
}
let mut radix = 10;
// Check for prefixed literals.
match (
self.buf.get(self.pos.offset),
self.buf.get(self.pos.offset + 1),
self.src.as_bytes().get(self.pos.offset),
self.src.as_bytes().get(self.pos.offset + 1),
) {
(Some(b'0'), Some(b'x')) | (Some(b'0'), Some(b'X')) => {
(Some(b'0'), Some(b'x' | b'X')) => {
self.advance_pos();
self.advance_pos();
radix = 16;
}
(Some(b'0'), Some(b'o')) => {
(Some(b'0'), Some(b'o' | b'O')) => {
self.advance_pos();
self.advance_pos();
radix = 8;
}
(Some(b'0'), Some(b'b')) => {
(Some(b'0'), Some(b'b' | b'B')) => {
self.advance_pos();
self.advance_pos();
radix = 2;
@ -273,32 +187,37 @@ impl<'a> Lexer<'a> {
// Find the range in the buffer for this integer literal. We'll
// pass this range to `i64::from_str_radix` to do the actual
// string-to-integer conversion.
let mut s = vec![];
while self.pos.offset < self.buf.len()
&& ((radix <= 10 && self.buf[self.pos.offset].is_ascii_digit())
|| (radix == 16 && self.buf[self.pos.offset].is_ascii_hexdigit())
|| self.buf[self.pos.offset] == b'_')
{
if self.buf[self.pos.offset] != b'_' {
s.push(self.buf[self.pos.offset]);
let start = self.pos.offset;
while let Some(c) = self.peek_byte() {
match c {
b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_' => self.advance_pos(),
_ => break,
}
self.advance_pos();
}
let s_utf8 = std::str::from_utf8(&s[..]).unwrap();
let end = self.pos.offset;
let s = &self.src[start..end];
let s = if s.contains('_') {
Cow::Owned(s.replace('_', ""))
} else {
Cow::Borrowed(s)
};
// Support either signed range (-2^127..2^127) or
// unsigned range (0..2^128).
let num = i128::from_str_radix(s_utf8, radix)
.or_else(|_| u128::from_str_radix(s_utf8, radix).map(|val| val as i128))
.map_err(|e| self.error(start_pos, e.to_string()))?;
let tok = if neg {
Token::Int(num.checked_neg().ok_or_else(|| {
self.error(start_pos, "integer literal cannot fit in i128")
})?)
} else {
Token::Int(num)
let num = match u128::from_str_radix(&s, radix) {
Ok(num) => num,
Err(err) => return Err(self.error(start_pos, err.to_string())),
};
let num = match (neg, num) {
(true, 0x80000000000000000000000000000000) => {
return Err(self.error(start_pos, "integer literal cannot fit in i128"))
}
(true, _) => -(num as i128),
(false, _) => num as i128,
};
let tok = Token::Int(num);
Ok(Some((start_pos, tok)))
}
c => Err(self.error(self.pos, format!("Unexpected character '{c}'"))),
@ -313,7 +232,7 @@ impl<'a> Lexer<'a> {
}
fn reload(&mut self) -> Result<()> {
if self.lookahead.is_none() && self.pos.offset < self.buf.len() {
if self.lookahead.is_none() && self.pos.offset < self.src.len() {
self.lookahead = self.next_token()?;
}
Ok(())
@ -328,6 +247,10 @@ impl<'a> Lexer<'a> {
pub fn eof(&self) -> bool {
self.lookahead.is_none()
}
fn peek_byte(&self) -> Option<u8> {
self.src.as_bytes().get(self.pos.offset).copied()
}
}
impl Token {
@ -346,9 +269,10 @@ impl Token {
mod test {
use super::*;
fn lex(s: &str, file: &str) -> Vec<Token> {
#[track_caller]
fn lex(src: &str) -> Vec<Token> {
let mut toks = vec![];
let mut lexer = Lexer::from_str(s, file).unwrap();
let mut lexer = Lexer::new(0, src).unwrap();
while let Some((_, tok)) = lexer.next().unwrap() {
toks.push(tok);
}
@ -358,11 +282,8 @@ mod test {
#[test]
fn lexer_basic() {
assert_eq!(
lex(
";; comment\n; another\r\n \t(one two three 23 -568 )\n",
"lexer_basic"
),
vec![
lex(";; comment\n; another\r\n \t(one two three 23 -568 )\n"),
[
Token::LParen,
Token::Symbol("one".to_string()),
Token::Symbol("two".to_string()),
@ -376,22 +297,19 @@ mod test {
#[test]
fn ends_with_sym() {
assert_eq!(
lex("asdf", "ends_with_sym"),
vec![Token::Symbol("asdf".to_string()),]
);
assert_eq!(lex("asdf"), [Token::Symbol("asdf".to_string())]);
}
#[test]
fn ends_with_num() {
assert_eq!(lex("23", "ends_with_num"), vec![Token::Int(23)],);
assert_eq!(lex("23"), [Token::Int(23)]);
}
#[test]
fn weird_syms() {
assert_eq!(
lex("(+ [] => !! _test!;comment\n)", "weird_syms"),
vec![
lex("(+ [] => !! _test!;comment\n)"),
[
Token::LParen,
Token::Symbol("+".to_string()),
Token::Symbol("[]".to_string()),
@ -402,4 +320,24 @@ mod test {
]
);
}
#[test]
fn integers() {
assert_eq!(
lex("0 1 -1"),
[Token::Int(0), Token::Int(1), Token::Int(-1)]
);
assert_eq!(
lex("340_282_366_920_938_463_463_374_607_431_768_211_455"),
[Token::Int(-1)]
);
assert_eq!(
lex("170_141_183_460_469_231_731_687_303_715_884_105_727"),
[Token::Int(i128::MAX)]
);
assert!(Lexer::new(0, "-170_141_183_460_469_231_731_687_303_715_884_105_728").is_err())
}
}

1
cranelift/isle/isle/src/lib.rs

@ -23,6 +23,7 @@ pub mod codegen;
pub mod compile;
pub mod disjointsets;
pub mod error;
pub mod files;
pub mod lexer;
mod log;
pub mod overlap;

15
cranelift/isle/isle/src/overlap.rs

@ -3,27 +3,20 @@
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use crate::error::{self, Error, Span};
use crate::error::{Error, Span};
use crate::lexer::Pos;
use crate::sema::{TermEnv, TermId, TermKind, TypeEnv};
use crate::sema::{TermEnv, TermId, TermKind};
use crate::trie_again;
/// Check for overlap.
pub fn check(
tyenv: &TypeEnv,
termenv: &TermEnv,
) -> Result<Vec<(TermId, trie_again::RuleSet)>, error::Errors> {
pub fn check(termenv: &TermEnv) -> Result<Vec<(TermId, trie_again::RuleSet)>, Vec<Error>> {
let (terms, mut errors) = trie_again::build(termenv);
errors.append(&mut check_overlaps(&terms, termenv).report());
if errors.is_empty() {
Ok(terms)
} else {
Err(error::Errors {
errors,
filenames: tyenv.filenames.clone(),
file_texts: tyenv.file_texts.clone(),
})
Err(errors)
}
}

26
cranelift/isle/isle/src/parser.rs

@ -1,13 +1,13 @@
//! Parser for ISLE language.
use crate::ast::*;
use crate::error::{Error, Errors, Span};
use crate::error::{Error, Span};
use crate::lexer::{Lexer, Pos, Token};
type Result<T> = std::result::Result<T, Errors>;
type Result<T> = std::result::Result<T, Error>;
/// Parse the top-level ISLE definitions and return their AST.
pub fn parse(lexer: Lexer) -> Result<Defs> {
pub fn parse(lexer: Lexer) -> Result<Vec<Def>> {
let parser = Parser::new(lexer);
parser.parse_defs()
}
@ -34,14 +34,10 @@ impl<'a> Parser<'a> {
Parser { lexer }
}
fn error(&self, pos: Pos, msg: String) -> Errors {
Errors {
errors: vec![Error::ParseError {
msg,
span: Span::new_single(pos),
}],
filenames: self.lexer.filenames.clone(),
file_texts: self.lexer.file_texts.clone(),
fn error(&self, pos: Pos, msg: String) -> Error {
Error::ParseError {
msg,
span: Span::new_single(pos),
}
}
@ -136,16 +132,12 @@ impl<'a> Parser<'a> {
}
}
fn parse_defs(mut self) -> Result<Defs> {
fn parse_defs(mut self) -> Result<Vec<Def>> {
let mut defs = vec![];
while !self.lexer.eof() {
defs.push(self.parse_def()?);
}
Ok(Defs {
defs,
filenames: self.lexer.filenames,
file_texts: self.lexer.file_texts,
})
Ok(defs)
}
fn parse_def(&mut self) -> Result<Def> {

71
cranelift/isle/isle/src/sema.rs

@ -22,7 +22,6 @@ use std::collections::hash_map::Entry;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::HashMap;
use std::sync::Arc;
declare_id!(
/// The id of an interned symbol.
@ -58,16 +57,6 @@ declare_id!(
/// Keeps track of which symbols and rules have which types.
#[derive(Debug)]
pub struct TypeEnv {
/// Arena of input ISLE source filenames.
///
/// We refer to these indirectly through the `Pos::file` indices.
pub filenames: Vec<Arc<str>>,
/// Arena of input ISLE source contents.
///
/// We refer to these indirectly through the `Pos::file` indices.
pub file_texts: Vec<Arc<str>>,
/// Arena of interned symbol names.
///
/// Referred to indirectly via `Sym` indices.
@ -912,10 +901,8 @@ macro_rules! unwrap_or_continue {
impl TypeEnv {
/// Construct the type environment from the AST.
pub fn from_ast(defs: &ast::Defs) -> Result<TypeEnv, Errors> {
pub fn from_ast(defs: &[ast::Def]) -> Result<TypeEnv, Vec<Error>> {
let mut tyenv = TypeEnv {
filenames: defs.filenames.clone(),
file_texts: defs.file_texts.clone(),
syms: vec![],
sym_map: StableMap::new(),
types: vec![],
@ -926,7 +913,7 @@ impl TypeEnv {
// Traverse defs, assigning type IDs to type names. We'll fill
// in types on a second pass.
for def in &defs.defs {
for def in defs {
match def {
&ast::Def::Type(ref td) => {
let tid = TypeId(tyenv.type_map.len());
@ -954,7 +941,7 @@ impl TypeEnv {
// Now lower AST nodes to type definitions, raising errors
// where typenames of fields are undefined or field names are
// duplicated.
for def in &defs.defs {
for def in defs {
match def {
&ast::Def::Type(ref td) => {
let tid = tyenv.types.len();
@ -967,7 +954,7 @@ impl TypeEnv {
}
// Now collect types for extern constants.
for def in &defs.defs {
for def in defs {
if let &ast::Def::Extern(ast::Extern::Const {
ref name,
ref ty,
@ -991,15 +978,11 @@ impl TypeEnv {
Ok(tyenv)
}
fn return_errors(&mut self) -> Result<(), Errors> {
fn return_errors(&mut self) -> Result<(), Vec<Error>> {
if self.errors.is_empty() {
Ok(())
} else {
Err(Errors {
errors: std::mem::take(&mut self.errors),
filenames: self.filenames.clone(),
file_texts: self.file_texts.clone(),
})
Err(std::mem::take(&mut self.errors))
}
}
@ -1169,7 +1152,7 @@ impl Bindings {
impl TermEnv {
/// Construct the term environment from the AST and the type environment.
pub fn from_ast(tyenv: &mut TypeEnv, defs: &ast::Defs) -> Result<TermEnv, Errors> {
pub fn from_ast(tyenv: &mut TypeEnv, defs: &[ast::Def]) -> Result<TermEnv, Vec<Error>> {
let mut env = TermEnv {
terms: vec![],
term_map: StableMap::new(),
@ -1196,13 +1179,13 @@ impl TermEnv {
Ok(env)
}
fn collect_pragmas(&mut self, _: &ast::Defs) {
fn collect_pragmas(&mut self, _: &[ast::Def]) {
// currently, no pragmas are defined, but the infrastructure is useful to keep around
return;
}
fn collect_term_sigs(&mut self, tyenv: &mut TypeEnv, defs: &ast::Defs) {
for def in &defs.defs {
fn collect_term_sigs(&mut self, tyenv: &mut TypeEnv, defs: &[ast::Def]) {
for def in defs {
match def {
&ast::Def::Decl(ref decl) => {
let name = tyenv.intern_mut(&decl.term);
@ -1315,8 +1298,8 @@ impl TermEnv {
}
}
fn collect_constructors(&mut self, tyenv: &mut TypeEnv, defs: &ast::Defs) {
for def in &defs.defs {
fn collect_constructors(&mut self, tyenv: &mut TypeEnv, defs: &[ast::Def]) {
for def in defs {
log!("collect_constructors from def: {:?}", def);
match def {
&ast::Def::Rule(ref rule) => {
@ -1378,10 +1361,10 @@ impl TermEnv {
}
}
fn collect_extractor_templates(&mut self, tyenv: &mut TypeEnv, defs: &ast::Defs) {
fn collect_extractor_templates(&mut self, tyenv: &mut TypeEnv, defs: &[ast::Def]) {
let mut extractor_call_graph = BTreeMap::new();
for def in &defs.defs {
for def in defs {
if let &ast::Def::Extractor(ref ext) = def {
let term = match self.get_term_by_name(tyenv, &ext.term) {
Some(x) => x,
@ -1502,8 +1485,8 @@ impl TermEnv {
}
}
fn collect_converters(&mut self, tyenv: &mut TypeEnv, defs: &ast::Defs) {
for def in &defs.defs {
fn collect_converters(&mut self, tyenv: &mut TypeEnv, defs: &[ast::Def]) {
for def in defs {
match def {
&ast::Def::Converter(ast::Converter {
ref term,
@ -1565,8 +1548,8 @@ impl TermEnv {
}
}
fn collect_externs(&mut self, tyenv: &mut TypeEnv, defs: &ast::Defs) {
for def in &defs.defs {
fn collect_externs(&mut self, tyenv: &mut TypeEnv, defs: &[ast::Def]) {
for def in defs {
match def {
&ast::Def::Extern(ast::Extern::Constructor {
ref term,
@ -1688,8 +1671,8 @@ impl TermEnv {
}
}
fn collect_rules(&mut self, tyenv: &mut TypeEnv, defs: &ast::Defs) {
for def in &defs.defs {
fn collect_rules(&mut self, tyenv: &mut TypeEnv, defs: &[ast::Def]) {
for def in defs {
match def {
&ast::Def::Rule(ref rule) => {
let pos = rule.pos;
@ -1781,8 +1764,8 @@ impl TermEnv {
}
}
fn check_for_undefined_decls(&self, tyenv: &mut TypeEnv, defs: &ast::Defs) {
for def in &defs.defs {
fn check_for_undefined_decls(&self, tyenv: &mut TypeEnv, defs: &[ast::Def]) {
for def in defs {
if let ast::Def::Decl(decl) = def {
let term = self.get_term_by_name(tyenv, &decl.term).unwrap();
let term = &self.terms[term.index()];
@ -1799,8 +1782,8 @@ impl TermEnv {
}
}
fn check_for_expr_terms_without_constructors(&self, tyenv: &mut TypeEnv, defs: &ast::Defs) {
for def in &defs.defs {
fn check_for_expr_terms_without_constructors(&self, tyenv: &mut TypeEnv, defs: &[ast::Def]) {
for def in defs {
if let ast::Def::Rule(rule) = def {
rule.expr.terms(&mut |pos, ident| {
let term = match self.get_term_by_name(tyenv, ident) {
@ -2410,7 +2393,7 @@ mod test {
(type u32 (primitive u32))
(type A extern (enum (B (f1 u32) (f2 u32)) (C (f1 u32))))
";
let ast = parse(Lexer::from_str(text, "file.isle").unwrap()).expect("should parse");
let ast = parse(Lexer::new(0, text).unwrap()).expect("should parse");
let tyenv = TypeEnv::from_ast(&ast).expect("should not have type-definition errors");
let sym_a = tyenv
@ -2448,8 +2431,6 @@ mod test {
Pos {
file: 0,
offset: 19,
line: 2,
col: 18,
},
),
Type::Enum {
@ -2489,8 +2470,6 @@ mod test {
pos: Pos {
file: 0,
offset: 58,
line: 3,
col: 18,
},
},
];

Loading…
Cancel
Save