Browse Source
* ISLE: reduce allocations when lexing integers Instead of creating a temporary `Vec<u8>`, use a slice of the original underlying `buf`, and only allocate a temporary `String` if it contains an `_`. Copyright (c) 2024, Arm Limited. Signed-off-by: Karl Meakin <karl.meakin@arm.com> * ISLE: don't `vec![]` macro in lexer tests `Vec` can be compared against arrays, since both deref to slices. Copyright (c) 2024, Arm Limited. Signed-off-by: Karl Meakin <karlwfmeakin@gmail.com> * ISLE: create `Files` Centralize all file related arenas in `Files` struct. Copyright (c) 2024, Arm Limited. Signed-off-by: Karl Meakin <karl.meakin@arm.com> * ISLE: dont track line/col in `Pos` They are already tracked in `Files`, so no need to track them in `Pos` as well. This lets us simply the implementation of `Lexer::advance_pos` a bit. Copyright (c) 2024, Arm Limited. Signed-off-by: Karl Meakin <karl.meakin@arm.com> * ISLE: don't pass `Files` into every pass `Files` was being threaded through a lot of passes where it wasn't needed. It is only needed for reporting errors in `compile.rs` and for reporting line numbers when printing in `codegen.rs`. Copyright (c) 2024, Arm Limited. Signed-off-by: Karl Meakin <karl.meakin@arm.com> * ISLE: store `&str` in `Lexer` Store the text being lexed as `&str`, rather than `&[u8]`, so that substrings don't need to be rechecked for UTF-8 validity when lexing identifiers or integers. Copyright (c) 2024, Arm Limited. Signed-off-by: Karl Meakin <karl.meakin@arm.com> * ISLE: add `peek_byte` helper for lexer Copyright (c) 2024, Arm Limited. Signed-off-by: Karl Meakin <karl.meakin@arm.com> * ISLE: tests for lexing integers Copyright (c) 2024, Arm Limited. Signed-off-by: Karl Meakin <karl.meakin@arm.com> * ISLE: dont parse integers twice Instead of trying to parse an integer as an `i128`, and then as an `u128` if that fails, parse it only as a `u128` and then check for `i128::MIN`. Copyright (c) 2024, Arm Limited. Signed-off-by: Karl Meakin <karl.meakin@arm.com> --------- Signed-off-by: Karl Meakin <karl.meakin@arm.com> Signed-off-by: Karl Meakin <karlwfmeakin@gmail.com>pull/9085/head
Karl Meakin
3 months ago
committed by
GitHub
11 changed files with 377 additions and 294 deletions
@ -0,0 +1,133 @@ |
|||
#![allow(missing_docs)] |
|||
|
|||
use std::ops::Index; |
|||
use std::path::{Path, PathBuf}; |
|||
|
|||
#[derive(Default, Clone, PartialEq, Eq, Debug)] |
|||
pub struct Files { |
|||
/// Arena of filenames from the input source.
|
|||
///
|
|||
/// Indexed via `Pos::file`.
|
|||
pub file_names: Vec<String>, |
|||
|
|||
/// Arena of file source texts.
|
|||
///
|
|||
/// Indexed via `Pos::file`.
|
|||
pub file_texts: Vec<String>, |
|||
|
|||
/// Arena of file line maps.
|
|||
///
|
|||
/// Indexed via `Pos::file`.
|
|||
pub file_line_maps: Vec<LineMap>, |
|||
} |
|||
|
|||
#[derive(Default, Clone, PartialEq, Eq, Debug)] |
|||
pub struct LineMap { |
|||
/// Mapping from line number to starting byte position.
|
|||
line_ends: Vec<usize>, |
|||
} |
|||
|
|||
impl Index<usize> for LineMap { |
|||
type Output = usize; |
|||
|
|||
fn index(&self, index: usize) -> &Self::Output { |
|||
&self.line_ends[index] |
|||
} |
|||
} |
|||
|
|||
impl LineMap { |
|||
pub fn from_str(text: &str) -> Self { |
|||
let line_ends = text.match_indices('\n').map(|(i, _)| i + 1).collect(); |
|||
Self { line_ends } |
|||
} |
|||
|
|||
/// Get the line on which `pos` occurs
|
|||
pub fn line(&self, pos: usize) -> usize { |
|||
self.line_ends.partition_point(|&end| end <= pos) |
|||
} |
|||
|
|||
/// Get the starting byte position of `line`.
|
|||
pub fn get(&self, line: usize) -> Option<&usize> { |
|||
self.line_ends.get(line) |
|||
} |
|||
} |
|||
|
|||
impl Files { |
|||
pub fn from_paths<P: AsRef<Path>>( |
|||
paths: impl IntoIterator<Item = P>, |
|||
) -> Result<Self, (PathBuf, std::io::Error)> { |
|||
let mut file_names = Vec::new(); |
|||
let mut file_texts = Vec::new(); |
|||
let mut file_line_maps = Vec::new(); |
|||
|
|||
for path in paths { |
|||
let path = path.as_ref(); |
|||
let contents = |
|||
std::fs::read_to_string(path).map_err(|err| (path.to_path_buf(), err))?; |
|||
let name = path.display().to_string(); |
|||
|
|||
file_line_maps.push(LineMap::from_str(&contents)); |
|||
file_names.push(name); |
|||
file_texts.push(contents); |
|||
} |
|||
|
|||
Ok(Self { |
|||
file_names, |
|||
file_texts, |
|||
file_line_maps, |
|||
}) |
|||
} |
|||
|
|||
pub fn from_names_and_contents(files: impl IntoIterator<Item = (String, String)>) -> Self { |
|||
let mut file_names = Vec::new(); |
|||
let mut file_texts = Vec::new(); |
|||
let mut file_line_maps = Vec::new(); |
|||
|
|||
for (name, contents) in files { |
|||
file_line_maps.push(LineMap::from_str(&contents)); |
|||
file_names.push(name); |
|||
file_texts.push(contents); |
|||
} |
|||
|
|||
Self { |
|||
file_names, |
|||
file_texts, |
|||
file_line_maps, |
|||
} |
|||
} |
|||
|
|||
pub fn file_name(&self, file: usize) -> Option<&str> { |
|||
self.file_names.get(file).map(|x| x.as_str()) |
|||
} |
|||
|
|||
pub fn file_text(&self, file: usize) -> Option<&str> { |
|||
self.file_texts.get(file).map(|x| x.as_str()) |
|||
} |
|||
|
|||
pub fn file_line_map(&self, file: usize) -> Option<&LineMap> { |
|||
self.file_line_maps.get(file) |
|||
} |
|||
} |
|||
|
|||
#[cfg(test)] |
|||
mod tests { |
|||
use super::*; |
|||
|
|||
#[test] |
|||
fn line_map() { |
|||
let line_map = LineMap::from_str(""); |
|||
assert_eq!(line_map.line_ends, &[]); |
|||
assert_eq!(line_map.line(0), 0); |
|||
assert_eq!(line_map.line(100), 0); |
|||
|
|||
let line_map = LineMap::from_str("line 0"); |
|||
assert_eq!(line_map.line_ends, &[]); |
|||
assert_eq!(line_map.line(0), 0); |
|||
assert_eq!(line_map.line(100), 0); |
|||
|
|||
let line_map = LineMap::from_str("line 0\nline 1"); |
|||
assert_eq!(line_map.line_ends, &[7]); |
|||
assert_eq!(line_map.line(0), 0); |
|||
assert_eq!(line_map.line(100), 1); |
|||
} |
|||
} |
Loading…
Reference in new issue