411 lines
11 KiB
Rust
411 lines
11 KiB
Rust
use std::str::FromStr;
|
|
|
|
use nom::{
|
|
branch::alt,
|
|
bytes::complete::{is_a, is_not, tag},
|
|
character::complete::{alphanumeric1, char, space0},
|
|
combinator::{map, recognize, value, verify},
|
|
multi::{fold_many1, many0, many1_count},
|
|
sequence::{delimited, pair, preceded, separated_pair},
|
|
IResult,
|
|
};
|
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub enum Fragment<'a> {
|
|
Literal(&'a str),
|
|
QuotedLiteral(&'a str),
|
|
Variable(&'a str),
|
|
}
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub struct Word<'a>(pub Vec<Fragment<'a>>);
|
|
|
|
#[derive(Debug)]
|
|
pub struct TokenStream<'a> {
|
|
input: &'a str,
|
|
buffer: Option<Token<'a>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub enum Redirect<'a> {
|
|
Output(OutputRedirect<'a>),
|
|
Input(Word<'a>),
|
|
}
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub enum OutputRedirect<'a> {
|
|
Err(Word<'a>),
|
|
Out(Word<'a>),
|
|
Both(Word<'a>),
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub enum Keyword {
|
|
If,
|
|
While,
|
|
For,
|
|
Match,
|
|
Let,
|
|
}
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub enum Operator {
|
|
Eq,
|
|
Gt,
|
|
Lt,
|
|
Or,
|
|
Assign,
|
|
}
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub enum Punctuation {
|
|
LBrace,
|
|
RBrace,
|
|
LParen,
|
|
RParen,
|
|
LBracket,
|
|
RBracket,
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub enum Token<'a> {
|
|
Word(Word<'a>),
|
|
Redirect(Redirect<'a>),
|
|
Keyword(Keyword),
|
|
Punctuation(Punctuation),
|
|
Operator(Operator),
|
|
}
|
|
|
|
type NomError<'a> = nom::Err<nom::error::Error<&'a str>>;
|
|
impl<'a> TokenStream<'a> {
|
|
pub fn new(input: &'a str) -> Self {
|
|
Self {
|
|
input,
|
|
buffer: None,
|
|
}
|
|
}
|
|
|
|
pub fn is_eof(&self) -> bool {
|
|
self.input.is_empty() && self.buffer.is_none()
|
|
}
|
|
|
|
fn read(&mut self) -> Result<Option<Token<'a>>, NomError<'a>> {
|
|
if self.input.is_empty() {
|
|
self.buffer = None;
|
|
Ok(None)
|
|
} else {
|
|
let (rest, token) = lex_token(self.input)?;
|
|
self.input = rest;
|
|
self.buffer = Some(token.clone());
|
|
Ok(Some(token))
|
|
}
|
|
}
|
|
|
|
pub fn next(&mut self) -> Result<Option<Token<'a>>, NomError<'a>> {
|
|
let token = self.peek()?;
|
|
self.read()?;
|
|
Ok(token)
|
|
}
|
|
pub fn peek(&mut self) -> Result<Option<Token<'a>>, NomError<'a>> {
|
|
if let Some(buffer) = self.buffer.clone() {
|
|
return Ok(Some(buffer));
|
|
}
|
|
self.read()
|
|
}
|
|
}
|
|
|
|
impl Word<'_> {
|
|
pub fn as_literal(&self) -> Option<&str> {
|
|
if self.0.len() != 1 {
|
|
return None;
|
|
}
|
|
let Fragment::Literal(lit) = self.0[0] else {
|
|
return None;
|
|
};
|
|
Some(lit)
|
|
}
|
|
}
|
|
|
|
impl FromStr for Keyword {
|
|
type Err = ();
|
|
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
match s {
|
|
"if" => Ok(Self::If),
|
|
"while" => Ok(Self::While),
|
|
"for" => Ok(Self::For),
|
|
"match" => Ok(Self::Match),
|
|
"let" => Ok(Self::Let),
|
|
_ => Err(()),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn lex_identifier(i: &str) -> IResult<&str, &str> {
|
|
recognize(many1_count(alt((alphanumeric1, is_a("-_")))))(i)
|
|
}
|
|
fn lex_filename(i: &str) -> IResult<&str, &str> {
|
|
recognize(many1_count(alt((alphanumeric1, is_a("./-_:")))))(i)
|
|
}
|
|
|
|
fn lex_braced_var(i: &str) -> IResult<&str, &str> {
|
|
// ${ABCD}
|
|
delimited(tag("${"), lex_identifier, char('}'))(i)
|
|
}
|
|
fn lex_unbraced_var(i: &str) -> IResult<&str, &str> {
|
|
// $ABCD
|
|
preceded(char('$'), lex_identifier)(i)
|
|
}
|
|
fn lex_var(i: &str) -> IResult<&str, &str> {
|
|
alt((lex_braced_var, lex_unbraced_var))(i)
|
|
}
|
|
|
|
fn lex_dquoted_literal(i: &str) -> IResult<&str, &str> {
|
|
let is_not_var_slash_quote = is_not("\\\"$");
|
|
verify(is_not_var_slash_quote, |s: &str| !s.is_empty())(i)
|
|
}
|
|
fn lex_dquoted(i: &str) -> IResult<&str, Vec<Fragment>> {
|
|
delimited(
|
|
char('"'),
|
|
many0(alt((
|
|
map(lex_var, Fragment::Variable),
|
|
map(lex_dquoted_literal, Fragment::QuotedLiteral),
|
|
))),
|
|
char('"'),
|
|
)(i)
|
|
}
|
|
|
|
fn lex_squoted_text(i: &str) -> IResult<&str, &str> {
|
|
let is_not_slash_quote = is_not("\\'");
|
|
recognize(many0(is_not_slash_quote))(i)
|
|
}
|
|
fn lex_squoted(i: &str) -> IResult<&str, &str> {
|
|
delimited(char('\''), lex_squoted_text, char('\''))(i)
|
|
}
|
|
|
|
fn lex_unquoted_fragment(i: &str) -> IResult<&str, Fragment> {
|
|
alt((
|
|
map(lex_var, Fragment::Variable),
|
|
map(lex_filename, Fragment::Literal),
|
|
))(i)
|
|
}
|
|
|
|
fn lex_word(i: &str) -> IResult<&str, Word> {
|
|
fold_many1(
|
|
alt((
|
|
lex_dquoted,
|
|
map(lex_squoted, |s| vec![Fragment::QuotedLiteral(s)]),
|
|
map(lex_unquoted_fragment, |s| vec![s]),
|
|
)),
|
|
|| Word(vec![]),
|
|
|mut acc, items| {
|
|
acc.0.extend(items);
|
|
acc
|
|
},
|
|
)(i)
|
|
}
|
|
|
|
fn lex_explicit_output_redirect(i: &str) -> IResult<&str, OutputRedirect> {
|
|
// out>abcdef
|
|
// err>abcdef
|
|
// out+err>abcdef
|
|
// oe>abcdef
|
|
// eo>abcdef
|
|
|
|
#[derive(Debug, Clone)]
|
|
enum Source {
|
|
Out,
|
|
Err,
|
|
Both,
|
|
}
|
|
|
|
map(
|
|
separated_pair(
|
|
alt((
|
|
value(Source::Out, tag("out")),
|
|
value(Source::Err, tag("err")),
|
|
value(Source::Both, tag("oe")),
|
|
value(Source::Both, tag("eo")),
|
|
value(Source::Out, char('o')),
|
|
value(Source::Err, char('e')),
|
|
)),
|
|
char('>'),
|
|
lex_word,
|
|
),
|
|
|(source, path)| match source {
|
|
Source::Out => OutputRedirect::Out(path),
|
|
Source::Err => OutputRedirect::Err(path),
|
|
Source::Both => OutputRedirect::Both(path),
|
|
},
|
|
)(i)
|
|
}
|
|
fn lex_implicit_output_redirect(i: &str) -> IResult<&str, OutputRedirect> {
|
|
// >abcdef
|
|
map(
|
|
preceded(pair(char('>'), space0), lex_word),
|
|
OutputRedirect::Out,
|
|
)(i)
|
|
}
|
|
|
|
fn lex_output_redirect(i: &str) -> IResult<&str, OutputRedirect> {
|
|
alt((lex_implicit_output_redirect, lex_explicit_output_redirect))(i)
|
|
}
|
|
fn lex_input_redirect(i: &str) -> IResult<&str, Word> {
|
|
// <abcdef
|
|
preceded(pair(char('<'), space0), lex_word)(i)
|
|
}
|
|
fn lex_redirect(i: &str) -> IResult<&str, Redirect> {
|
|
alt((
|
|
map(lex_input_redirect, Redirect::Input),
|
|
map(lex_output_redirect, Redirect::Output),
|
|
))(i)
|
|
}
|
|
|
|
fn lex_maybe_keyword(i: &str) -> IResult<&str, Token> {
|
|
// TODO this will recognize quoted text as a keyword
|
|
map(lex_word, |word| {
|
|
if let Some(kw) = word.as_literal().and_then(|s| Keyword::from_str(s).ok()) {
|
|
return Token::Keyword(kw);
|
|
}
|
|
Token::Word(word)
|
|
})(i)
|
|
}
|
|
|
|
fn lex_punctuation(i: &str) -> IResult<&str, Punctuation> {
|
|
alt((
|
|
value(Punctuation::LBrace, char('{')),
|
|
value(Punctuation::RBrace, char('}')),
|
|
value(Punctuation::LParen, char('(')),
|
|
value(Punctuation::RParen, char(')')),
|
|
value(Punctuation::LBracket, char('[')),
|
|
value(Punctuation::RBracket, char(']')),
|
|
))(i)
|
|
}
|
|
|
|
fn lex_operator(i: &str) -> IResult<&str, Operator> {
|
|
alt((
|
|
value(Operator::Eq, tag("==")),
|
|
value(Operator::Assign, char('=')),
|
|
value(Operator::Or, char('|')),
|
|
))(i)
|
|
}
|
|
|
|
pub fn lex_token(i: &str) -> IResult<&str, Token> {
|
|
preceded(
|
|
space0,
|
|
alt((
|
|
map(lex_punctuation, Token::Punctuation),
|
|
map(lex_redirect, Token::Redirect),
|
|
map(lex_operator, Token::Operator),
|
|
lex_maybe_keyword,
|
|
)),
|
|
)(i)
|
|
}
|
|
|
|
pub fn lex_tokens(i: &str) -> IResult<&str, Vec<Token>> {
|
|
many0(lex_token)(i)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use std::fmt;
|
|
|
|
use nom::IResult;
|
|
|
|
use super::{
|
|
lex_filename, lex_tokens, Fragment, Keyword, Operator, OutputRedirect, Redirect, Token,
|
|
Word,
|
|
};
|
|
|
|
#[track_caller]
|
|
fn run_tests<
|
|
'a,
|
|
T: PartialEq + fmt::Debug,
|
|
I: IntoIterator<Item = (&'a str, T, &'a str)>,
|
|
F: Fn(&'a str) -> IResult<&'a str, T>,
|
|
>(
|
|
it: I,
|
|
parser: F,
|
|
) {
|
|
let location = std::panic::Location::caller();
|
|
|
|
for (i, (input, expect, expect_rest)) in it.into_iter().enumerate() {
|
|
let (rest, output) = match parser(input) {
|
|
Ok(ok) => ok,
|
|
Err(error) => {
|
|
eprintln!("Test #{i} in {location:?} failed:");
|
|
eprintln!("* Input: {input:?}");
|
|
eprintln!("* Parser returned error: {error}");
|
|
panic!();
|
|
}
|
|
};
|
|
|
|
if rest != expect_rest {
|
|
eprintln!("Test #{i} in {location:?} failed:");
|
|
eprintln!("* Input: {input:?}");
|
|
if expect_rest.is_empty() {
|
|
eprintln!("* Unexpected trailing characters: {rest:?}");
|
|
} else {
|
|
eprintln!("* Expected trailing characters: {expect_rest:?}");
|
|
eprintln!("* Actual trailing characters: {rest:?}");
|
|
}
|
|
panic!();
|
|
}
|
|
|
|
if output != expect {
|
|
eprintln!("Test #{i} in {location:?} failed:");
|
|
eprintln!("* Input: {input:?}");
|
|
eprintln!("* Expected output: {expect:?}");
|
|
eprintln!("* Actual output: {output:?}");
|
|
panic!();
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_lex_filename() {
|
|
run_tests(
|
|
[
|
|
("./abc123_a-a/file>other", "./abc123_a-a/file", ">other"),
|
|
("/a/b/c d e f g", "/a/b/c", " d e f g"),
|
|
],
|
|
lex_filename,
|
|
)
|
|
}
|
|
|
|
#[test]
|
|
fn test_lex_tokens() {
|
|
run_tests(
|
|
[
|
|
(
|
|
" if /a/b/c\" $a b c\"$d efg",
|
|
vec![
|
|
Token::Keyword(Keyword::If),
|
|
Token::Word(Word(vec![
|
|
Fragment::Literal("/a/b/c"),
|
|
Fragment::Literal(" "),
|
|
Fragment::Variable("a"),
|
|
Fragment::Literal(" b c"),
|
|
Fragment::Variable("d"),
|
|
])),
|
|
Token::Word(Word(vec![Fragment::Literal("efg")])),
|
|
],
|
|
"",
|
|
),
|
|
(
|
|
"\t>$d\"filename\"",
|
|
vec![Token::Redirect(Redirect::Output(OutputRedirect::Out(
|
|
Word(vec![Fragment::Variable("d"), Fragment::Literal("filename")]),
|
|
)))],
|
|
"",
|
|
),
|
|
(
|
|
"| abc",
|
|
vec![
|
|
Token::Operator(Operator::Or),
|
|
Token::Word(Word(vec![Fragment::Literal("abc")])),
|
|
],
|
|
"",
|
|
),
|
|
],
|
|
lex_tokens,
|
|
)
|
|
}
|
|
}
|