shell: rework shell parsing, if/while/for/&&/||

This commit is contained in:
Mark Poliakov 2025-03-12 22:11:54 +02:00
parent 4798240473
commit 608912804f
27 changed files with 2184 additions and 600 deletions

View File

@ -177,7 +177,6 @@ impl Metadata {
pub fn set_mtime_now(&mut self) {
let t = real_time();
log::info!("set_mtime_now = {t:?}");
self.mtime = t;
self.atime = t;
}

1
userspace/Cargo.lock generated
View File

@ -2486,6 +2486,7 @@ version = "0.1.0"
dependencies = [
"clap",
"cross",
"libc",
"log",
"logsink",
"nom",

View File

@ -0,0 +1,46 @@
use std::{ffi::CString, io, mem::MaybeUninit, os::fd::RawFd, path::Path};
use crate::fs::FileModeUpdate;
pub fn update_file_mode<P: AsRef<Path>>(
at: Option<RawFd>,
path: P,
mode: FileModeUpdate,
) -> io::Result<()> {
let at = match at {
Some(fd) => fd,
None => libc::AT_FDCWD,
};
let path = CString::new(path.as_ref().as_os_str().as_encoded_bytes()).unwrap();
let mut stat: MaybeUninit<libc::stat> = MaybeUninit::uninit();
if unsafe {
libc::fstatat(
at,
path.as_ptr(),
stat.as_mut_ptr(),
libc::AT_SYMLINK_NOFOLLOW,
)
} != 0
{
return Err(io::Error::last_os_error());
}
let stat = unsafe { stat.assume_init() };
let new_mode = match mode {
FileModeUpdate::Set(value) => {
(stat.st_mode & !0o777) | ((value.bits() as libc::mode_t) & 0o777)
}
FileModeUpdate::Modify { set, clear } => {
let mut bits = stat.st_mode;
bits &= !((clear.bits() as libc::mode_t) & 0o777);
bits |= (set.bits() as libc::mode_t) & 0o777;
bits
}
};
if unsafe { libc::fchmodat(at, path.as_ptr(), new_mode, libc::AT_SYMLINK_NOFOLLOW) } != 0 {
return Err(io::Error::last_os_error());
}
Ok(())
}

View File

@ -1,9 +1,11 @@
pub mod fs;
pub mod mem;
pub mod pid;
pub mod pipe;
pub mod poll;
pub mod socket;
pub mod term;
pub mod time;
pub mod timer;
use std::{ffi::c_int, io, sync::Mutex};

View File

@ -17,9 +17,9 @@ pub struct LocalPacketSocketImpl {
ephemeral: Option<TempDir>,
}
#[derive(Debug)]
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct BorrowedAddressImpl<'a>(&'a Path);
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct OwnedAddressImpl(PathBuf);
impl<'a> sys::BorrowedAddress<'a> for BorrowedAddressImpl<'a> {

View File

@ -0,0 +1,12 @@
use std::{io, ptr::null};
pub fn set_real_time(seconds: u64, nanoseconds: u32) -> io::Result<()> {
let tv = libc::timeval {
tv_sec: seconds as _,
tv_usec: (nanoseconds / 1000) as _,
};
if unsafe { libc::settimeofday(&raw const tv, null()) } != 0 {
return Err(io::Error::last_os_error());
}
Ok(())
}

View File

@ -1,15 +1,22 @@
#![feature(yggdrasil_os, rustc_private)]
#![cfg_attr(
any(rust_analyzer, target_os = "yggdrasil"),
feature(yggdrasil_os, rustc_private)
)]
#[cfg(rust_analyzer)]
pub use yggdrasil_rt as rt;
#[cfg(rust_analyzer)]
pub use yggdrasil_abi as abi;
pub use abi_lib;
#[cfg(rust_analyzer)]
pub use abi_serde;
#[cfg(rust_analyzer)]
pub use abi_lib;
pub use yggdrasil_abi as abi;
#[cfg(rust_analyzer)]
pub use yggdrasil_rt as rt;
#[cfg(any(rust_analyzer, target_os = "yggdrasil"))]
pub use std::os::yggdrasil::rt;
#[cfg(any(rust_analyzer, target_os = "yggdrasil"))]
pub use std::os::yggdrasil::rt::abi;
pub use std::os::yggdrasil::rt::abi::abi_serde;
#[cfg(any(rust_analyzer, target_os = "yggdrasil"))]
pub use std::os::yggdrasil::rt::abi::abi_lib;
#[cfg(any(rust_analyzer, target_os = "yggdrasil"))]
pub use std::os::yggdrasil::rt::abi::abi_serde;

View File

@ -86,6 +86,10 @@ path = "src/touch.rs"
name = "rm"
path = "src/rm.rs"
[[bin]]
name = "env"
path = "src/env.rs"
[[bin]]
name = "cat"
path = "src/cat.rs"

View File

@ -0,0 +1,5 @@
fn main() {
for (key, value) in std::env::vars() {
println!("{key}={value}");
}
}

View File

@ -16,8 +16,12 @@ nom = "7.1.3"
[target.'cfg(target_os = "yggdrasil")'.dependencies]
runtime.workspace = true
[target.'cfg(unix)'.dependencies]
libc = "*"
[dev-dependencies]
runtime.workspace = true
libc = "*"
[lints]
workspace = true

View File

@ -0,0 +1,13 @@
#!/bin/sh
execute="cargo run --release --quiet -- "
for name in tests/*.sh; do
expect=$(/bin/sh $name)
actual=$($execute $name || echo "Failed")
printf "\x1B[1m%s\x1B[0m: " $name
if [ "$actual" == "$expect" ]; then
printf "\x1B[1;32mOK\x1B[0m\n"
else
printf "\x1B[1;31mFAIL\x1B[0m\n"
fi
done

View File

@ -1,10 +1,10 @@
use std::{collections::BTreeMap, env, io::Write, process::ExitCode, sync::RwLock};
use std::{collections::BTreeMap, env, io::Write, path::Path, process::ExitCode, sync::RwLock};
use crate::exec::{InheritStderr, InheritStdout, Input, Outcome, Output};
pub type Builtin = fn(Io, Vec<String>, Envs) -> Outcome;
pub type Builtin = fn(Args, Vec<String>, Envs) -> Outcome;
pub struct Io {
pub struct Args {
pub stdin: Input,
pub stdout: Output<InheritStdout>,
pub stderr: Output<InheritStderr>,
@ -28,7 +28,73 @@ pub fn register(name: &str, function: Builtin) {
BUILTINS.write().unwrap().insert(name.into(), function);
}
fn b_echo(mut io: Io, args: Vec<String>, _env: Envs) -> Outcome {
pub fn b_test(_io: Args, args: Vec<String>, _env: Envs) -> Outcome {
if args.is_empty() {
return Outcome::err();
}
let res = match &args[..] {
[x] => !x.is_empty(),
[op, x] => match op.as_str() {
"-b" => todo!(),
"-c" => todo!(),
"-d" => Path::new(x).is_dir(),
"-e" => Path::new(x).exists(),
"-f" => Path::new(x).is_file(),
"-g" => todo!(),
"-h" => todo!(),
"-L" => Path::new(x).is_symlink(),
"-n" => todo!(),
"-p" => todo!(),
"-r" => todo!(),
"-S" => todo!(),
"-s" => todo!(),
"-t" => todo!(),
"-u" => todo!(),
"-w" => todo!(),
"-x" => todo!(),
"-z" => todo!(),
_ => false,
},
[x, op, y] => match op.as_str() {
"-nt" => {
if let (Ok(t0), Ok(t1)) = (
Path::new(x).metadata().and_then(|m| m.modified()),
Path::new(y).metadata().and_then(|m| m.modified()),
) {
t0 > t1
} else {
false
}
}
"-ot" => {
if let (Ok(t0), Ok(t1)) = (
Path::new(x).metadata().and_then(|m| m.modified()),
Path::new(y).metadata().and_then(|m| m.modified()),
) {
t0 < t1
} else {
false
}
}
"-ef" => todo!(),
"-eq" => todo!(),
"-ne" => todo!(),
"-gt" => todo!(),
"-lt" => todo!(),
"-ge" => todo!(),
"-le" => todo!(),
_ => false,
},
_ => todo!(),
};
match res {
true => Outcome::ok(),
false => Outcome::err(),
}
}
fn b_echo(mut io: Args, args: Vec<String>, _env: Envs) -> Outcome {
for (i, arg) in args.iter().enumerate() {
if i != 0 {
write!(io.stdout, " ").ok();
@ -39,7 +105,7 @@ fn b_echo(mut io: Io, args: Vec<String>, _env: Envs) -> Outcome {
Outcome::ok()
}
fn b_cd(mut io: Io, args: Vec<String>, _env: Envs) -> Outcome {
fn b_cd(mut io: Args, args: Vec<String>, _env: Envs) -> Outcome {
if args.len() != 1 {
writeln!(io.stderr, "`cd` requires one argument").ok();
return Outcome::err();
@ -54,7 +120,7 @@ fn b_cd(mut io: Io, args: Vec<String>, _env: Envs) -> Outcome {
}
}
fn b_pwd(mut io: Io, _args: Vec<String>, _env: Envs) -> Outcome {
fn b_pwd(mut io: Args, _args: Vec<String>, _env: Envs) -> Outcome {
match env::current_dir() {
Ok(path) => {
writeln!(io.stdout, "{}", path.display()).ok();
@ -67,7 +133,7 @@ fn b_pwd(mut io: Io, _args: Vec<String>, _env: Envs) -> Outcome {
}
}
fn b_exit(mut io: Io, args: Vec<String>, _env: Envs) -> Outcome {
fn b_exit(mut io: Args, args: Vec<String>, _env: Envs) -> Outcome {
let code = match args.len() {
0 => ExitCode::SUCCESS,
1 => todo!(),
@ -80,7 +146,7 @@ fn b_exit(mut io: Io, args: Vec<String>, _env: Envs) -> Outcome {
Outcome::ExitShell(code)
}
fn b_export(_io: Io, _args: Vec<String>, env: Envs) -> Outcome {
fn b_export(_io: Args, _args: Vec<String>, env: Envs) -> Outcome {
for (key, value) in env.0 {
env::set_var(key, value);
}

View File

@ -0,0 +1,182 @@
use std::{collections::HashMap, process::Command};
use crate::syntax::lex::{DoubleQuotedSegment, WordSegment, WordToken};
#[derive(Default, Debug, PartialEq)]
pub struct LocalEnvironment {
variables: HashMap<String, Vec<String>>,
}
#[derive(Default, Debug, PartialEq)]
pub struct Environment {
local_stack: Vec<LocalEnvironment>,
aliases: HashMap<String, String>,
}
#[derive(Default, Debug, PartialEq)]
pub struct Expander {
words: Vec<String>,
}
impl LocalEnvironment {
fn lookup(&self, name: &str) -> Option<&Vec<String>> {
self.variables.get(name)
}
fn insert_literal(&mut self, name: impl Into<String>, value: impl Into<String>) {
self.variables.insert(name.into(), vec![value.into()]);
}
fn to_command(&self, command: &mut Command) {
for (key, value) in self.variables.iter() {
command.env(key, value.join(" "));
}
}
}
impl Environment {
pub fn push_environment(&mut self) {
self.local_stack.push(LocalEnvironment {
variables: HashMap::new(),
});
}
pub fn pop_environment(&mut self) {
self.local_stack.pop();
}
pub fn current(&self) -> Option<&LocalEnvironment> {
self.local_stack.last()
}
pub fn current_mut(&mut self) -> Option<&mut LocalEnvironment> {
self.local_stack.last_mut()
}
fn env_stack(&self) -> impl Iterator<Item = &LocalEnvironment> {
self.local_stack.iter().rev()
}
pub fn lookup_current(&self, name: &str) -> Option<&Vec<String>> {
self.env_stack().find_map(|e| e.lookup(name))
}
pub fn lookup_alias(&self, name: &str) -> Option<&String> {
self.aliases.get(name)
}
pub fn insert_current_literal(&mut self, name: impl Into<String>, value: impl Into<String>) {
if let Some(current) = self.current_mut() {
current.insert_literal(name, value);
} else {
std::env::set_var(name.into(), value.into());
}
}
pub fn insert_alias(&mut self, name: impl Into<String>, value: impl Into<String>) {
self.aliases.insert(name.into(), value.into());
}
pub fn to_command(&self, command: &mut Command) {
for env in self.local_stack.iter() {
env.to_command(command);
}
}
pub fn expand(&self, word: &WordToken) -> Vec<String> {
let mut this = Expander {
words: vec!["".into()],
};
for segment in word.0.iter() {
this.expand_segment(segment, self);
}
this.build()
}
}
impl Expander {
fn last_mut(&mut self) -> &mut String {
if self.words.is_empty() {
self.words.push(String::new());
}
let len = self.words.len();
&mut self.words[len - 1]
}
fn write_to_last(&mut self, value: &str) {
self.last_mut().push_str(value);
}
fn push_char(&mut self, ch: char) {
self.last_mut().push(ch);
}
fn finish_word(&mut self) {
self.words.push(String::new());
}
pub fn write(&mut self, value: &[String], quoted: bool) {
let word_count = value.len();
for (i, words) in value.iter().enumerate() {
if quoted {
self.write_to_last(words);
if i != word_count - 1 {
self.finish_word();
}
} else {
let subword_count = words.split_whitespace().count();
for (j, word) in words.split_whitespace().enumerate() {
self.write_to_last(word);
if i != word_count - 1 || j != subword_count - 1 {
self.finish_word();
}
}
}
}
}
fn expand_quoted(&mut self, segment: &DoubleQuotedSegment, env: &Environment) {
match segment {
&DoubleQuotedSegment::Literal(text) => self.write_to_last(text),
&DoubleQuotedSegment::Var(name) if let Some(value) = env.lookup_current(name) => {
self.write(value, true);
}
&DoubleQuotedSegment::Var(name) if let Ok(value) = std::env::var(name) => {
self.write(&[value], true);
}
&DoubleQuotedSegment::Var(_) => (),
DoubleQuotedSegment::Escape(escape) if let Some(ch) = escape.to_char() => {
self.push_char(ch)
}
DoubleQuotedSegment::Escape(_) => todo!(),
}
}
pub fn expand_segment(&mut self, segment: &WordSegment, env: &Environment) {
match segment {
&WordSegment::Literal(text) | &WordSegment::SingleQuote(text) => {
self.write_to_last(text)
}
&WordSegment::Var(name) if let Some(value) = env.lookup_current(name) => {
self.write(value, false);
}
&WordSegment::Var(name) if let Ok(value) = std::env::var(name) => {
self.write(&[value], false);
}
&WordSegment::Var(_) => (),
WordSegment::Escape(escape) if let Some(ch) = escape.to_char() => {
self.push_char(ch);
}
WordSegment::Escape(_) => todo!(),
WordSegment::DoubleQuote(quoted) => {
for seg in quoted {
self.expand_quoted(seg, env);
}
}
}
}
pub fn build(self) -> Vec<String> {
self.words
}
}

View File

@ -0,0 +1,224 @@
use std::{io::BufReader, marker::PhantomData, process::ExitCode};
use crate::{
builtin::{self, Envs},
error::Error,
exec::{exec_pipeline, wait_for_pipeline, Execution, InheritStdout, Input, Outcome, Output},
syntax::parse::{BinaryOperator, ConditionalExpression, Expression},
};
use super::{env::Environment, ExpandedPipeline, ExpandedPipelineElement};
pub fn evaluate_pipeline(
pipeline: &ExpandedPipeline,
env: &mut Environment,
) -> Result<(Outcome, Option<ExitCode>), Error> {
let mut executions = vec![];
let mut stdins = vec![];
let mut stdouts = vec![];
let pipeline_stdin = Input::Inherit;
let pipeline_stdout = Output::<InheritStdout>::Inherit(PhantomData);
stdins.push(pipeline_stdin);
for _ in 1..pipeline.elements.len() {
let (read, write) = std::pipe::pipe().unwrap();
stdins.push(Input::Pipe(BufReader::new(read)));
stdouts.push(Output::Pipe(write));
}
stdouts.push(pipeline_stdout);
assert_eq!(stdins.len(), stdouts.len());
assert_eq!(pipeline.elements.len(), stdouts.len());
let io = Iterator::zip(stdins.drain(..), stdouts.drain(..));
for (command, (stdin, stdout)) in pipeline.elements.iter().zip(io) {
let (program, arguments) = command.words.split_first().unwrap();
// let stderr = stderr.try_clone()?;
let envs = command
.envs
.iter()
.map(|(a, b)| (a.clone(), b.clone()))
.collect();
let execution = Execution {
program: program.to_owned(),
arguments: arguments.to_vec(),
envs,
stdin,
stdout,
stderr: Output::Inherit(PhantomData),
};
executions.push(execution);
}
let handles = exec_pipeline(executions.into_iter(), env)?;
let (status, exit) = wait_for_pipeline(handles)?;
Ok((status, exit))
}
fn evaluate_conditional_body(
mut body: &Expression,
env: &mut Environment,
) -> (Outcome, Option<ExitCode>) {
while let Expression::Not(inner) = body {
body = &*inner;
}
let status = match body {
Expression::Binary(BinaryOperator::Equal, lhs, rhs)
if let (Some(lhs), Some(rhs)) = (lhs.as_word(), rhs.as_word()) =>
{
let lhs = env.expand(lhs);
let rhs = env.expand(rhs);
Some(lhs == rhs)
}
Expression::Binary(BinaryOperator::NotEqual, lhs, rhs)
if let (Some(lhs), Some(rhs)) = (lhs.as_word(), rhs.as_word()) =>
{
let lhs = env.expand(lhs);
let rhs = env.expand(rhs);
Some(lhs != rhs)
}
Expression::Pipeline(pipeline)
if let Some(element) = pipeline
.as_single_command()
.map(|element| ExpandedPipelineElement::from_syntax(env, element)) =>
{
let io = builtin::Args {
stdin: Input::Inherit,
stdout: Output::Inherit(PhantomData),
stderr: Output::Inherit(PhantomData),
};
return (builtin::b_test(io, element.words, Envs::from(vec![])), None);
}
_ => None,
};
match status {
Some(true) => (Outcome::ok(), None),
Some(false) => (Outcome::err(), None),
None => {
eprintln!("test: invalid expression: {:?}", body);
(Outcome::err(), None)
}
}
}
pub fn evaluate_conditional(
conditional: &ConditionalExpression,
env: &mut Environment,
) -> (Outcome, Option<ExitCode>) {
evaluate_conditional_body(&*conditional.body, env)
}
pub fn evaluate(expression: &Expression, env: &mut Environment) -> (Outcome, Option<ExitCode>) {
match expression {
Expression::Pipeline(pipeline) => {
let mut expanded = ExpandedPipeline::from_syntax(env, pipeline);
let mut setenvs = vec![];
expanded.elements.retain_mut(|e| {
if e.words.is_empty() {
setenvs.extend(e.envs.drain(..));
}
!e.words.is_empty()
});
for (key, value) in setenvs {
env.insert_current_literal(key, value);
}
if expanded.elements.is_empty() {
return (Outcome::ok(), None);
}
match evaluate_pipeline(&expanded, env) {
Ok(res) => res,
Err(error) => {
eprintln!("{error}");
(Outcome::err(), None)
}
}
}
Expression::Conditional(cond) => evaluate_conditional(cond, env),
// TODO redirects
Expression::If(if_expression) => {
let (condition, exit) = evaluate(&if_expression.condition, env);
if exit.is_some() {
return (condition, exit);
}
if condition.is_success() {
// Execute true branch
evaluate_list(&if_expression.if_true, env)
} else if let Some(if_false) = if_expression.if_false.as_ref() {
evaluate_list(if_false, env)
} else {
(Outcome::ok(), None)
}
}
// TODO redirects
Expression::While(while_expression) => loop {
let (condition, exit) = evaluate(&while_expression.condtion, env);
if exit.is_some() || !condition.is_success() {
return (Outcome::ok(), exit);
}
let (_, exit) = evaluate_list(&while_expression.body, env);
if exit.is_some() {
return (Outcome::ok(), exit);
}
},
// TODO redirects
Expression::For(for_expression) => {
let list = ExpandedPipelineElement::from_syntax(env, &*for_expression.list);
env.push_environment();
for word in list.words {
env.insert_current_literal(for_expression.variable, word);
let (_, exit) = evaluate_list(&for_expression.body, env);
if exit.is_some() {
env.pop_environment();
return (Outcome::ok(), exit);
}
}
env.pop_environment();
(Outcome::ok(), None)
}
Expression::Binary(BinaryOperator::And, lhs, rhs) => {
let (lhs, exit) = evaluate(&*lhs, env);
if exit.is_some() {
return (lhs, exit);
}
if lhs.is_success() {
evaluate(&*rhs, env)
} else {
(lhs, exit)
}
}
Expression::Binary(BinaryOperator::Or, lhs, rhs) => {
let (lhs, exit) = evaluate(&*lhs, env);
if exit.is_some() {
return (lhs, exit);
}
if !lhs.is_success() {
evaluate(&*rhs, env)
} else {
(lhs, exit)
}
}
_ => todo!(),
}
}
pub fn evaluate_list<'a, I: IntoIterator<Item = &'a Expression<'a>>>(
expressions: I,
env: &mut Environment,
) -> (Outcome, Option<ExitCode>) {
let mut status = Outcome::ok();
for expression in expressions {
let (outcome, exit) = evaluate(expression, env);
if exit.is_some() {
return (outcome, exit);
}
status = outcome;
}
(status, None)
}

View File

@ -0,0 +1,53 @@
use env::Environment;
use crate::syntax::parse::{PipelineElement, PipelineExpression};
pub mod env;
pub mod eval;
pub struct ExpandedPipelineElement {
pub words: Vec<String>,
pub envs: Vec<(String, String)>,
}
pub struct ExpandedPipeline {
pub elements: Vec<ExpandedPipelineElement>,
}
impl ExpandedPipelineElement {
pub fn from_syntax(env: &Environment, element: &PipelineElement) -> Self {
// TODO redirects
let mut words: Vec<_> = element
.words
.iter()
.map(|word| env.expand(word))
.flatten()
.collect();
let mut envs = vec![];
words.retain(|word| {
if let Some((key, value)) = word.split_once('=')
&& !key.ends_with(' ')
&& !value.starts_with(' ')
{
envs.push((key.to_owned(), value.to_owned()));
false
} else {
true
}
});
Self { words, envs }
}
}
impl ExpandedPipeline {
pub fn from_syntax(env: &Environment, pipeline: &PipelineExpression) -> Self {
let elements = pipeline
.elements
.iter()
.map(|e| ExpandedPipelineElement::from_syntax(env, e))
.collect();
Self { elements }
}
}

View File

@ -0,0 +1,9 @@
use std::io;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("{0}")]
Io(#[from] io::Error),
#[error("")]
InvalidUsage,
}

View File

@ -1,24 +1,26 @@
use std::{
fmt,
fs::File,
io::{self, stdin, BufRead, BufReader, Stderr, Stdout, Write},
io::{self, BufRead, BufReader, Stderr, Stdout, Write},
marker::PhantomData,
os::{
fd::{FromRawFd, IntoRawFd},
yggdrasil::{
io::terminal::set_terminal_group,
process::{create_process_group, CommandExt},
},
},
os::fd::{FromRawFd, IntoRawFd},
pipe::{PipeReader, PipeWriter},
process::{self, Child, ExitCode, ExitStatus, Stdio},
thread::{self, JoinHandle},
};
use runtime::rt::process::ProcessGroupId;
#[cfg(any(rust_analyzer, target_os = "yggdrasil"))]
use std::os::yggdrasil::{
io::terminal::set_terminal_group,
process::{create_process_group, CommandExt},
};
use crate::{builtin, env::Command, Error};
#[cfg(any(rust_analyzer, unix))]
use std::os::unix::process::CommandExt;
use crate::{builtin, command::env::Environment, error::Error};
#[derive(Debug)]
pub enum Outcome {
Process(ExitStatus),
Builtin(ExitCode),
@ -158,18 +160,22 @@ impl Outcome {
}
// TODO move pipelines into process groups
fn spawn_command(execution: Execution, pgid: ProcessGroupId) -> Result<Child, Error> {
#[cfg(any(rust_analyzer, target_os = "yggdrasil"))]
fn spawn_command(
execution: Execution,
pgid: runtime::rt::process::ProcessGroupId,
env: &Environment,
) -> Result<Child, Error> {
let mut command = process::Command::new(execution.program);
command
.args(execution.arguments)
.stdin(execution.stdin)
.stdout(execution.stdout)
.stderr(execution.stderr);
#[cfg(any(target_os = "yggdrasil", rust_analyzer))]
command.process_group(pgid);
.stderr(execution.stderr)
.process_group(pgid);
env.to_command(&mut command);
for (key, value) in execution.envs {
command.env(key, value);
}
@ -179,13 +185,46 @@ fn spawn_command(execution: Execution, pgid: ProcessGroupId) -> Result<Child, Er
Ok(child)
}
pub fn exec_pipeline<I: IntoIterator<Item = Execution>>(pipeline: I) -> Result<Vec<Handle>, Error> {
#[cfg(any(rust_analyzer, unix))]
fn spawn_command(
execution: Execution,
pgid: &mut Option<i32>,
env: &Environment,
) -> Result<Child, Error> {
let mut command = process::Command::new(execution.program);
command
.args(execution.arguments)
.process_group((*pgid).unwrap_or(0))
.stdin(execution.stdin)
.stdout(execution.stdout)
.stderr(execution.stderr);
env.to_command(&mut command);
for (key, value) in execution.envs {
command.env(key, value);
}
let child = command.spawn()?;
if pgid.is_none() {
*pgid = Some(child.id() as i32);
}
Ok(child)
}
#[cfg(any(unix, rust_analyzer))]
pub fn exec_pipeline<I: IntoIterator<Item = Execution>>(
pipeline: I,
env: &Environment,
) -> Result<Vec<Handle>, Error> {
let mut handles = vec![];
#[cfg(any(target_os = "yggdrasil", rust_analyzer))]
let pgid = create_process_group();
let mut pgid = None;
for element in pipeline.into_iter() {
let handle = if let Some(builtin) = builtin::get(&element.program) {
let io = builtin::Io {
let io = builtin::Args {
stdin: element.stdin,
stdout: element.stdout,
stderr: element.stderr,
@ -195,14 +234,48 @@ pub fn exec_pipeline<I: IntoIterator<Item = Execution>>(pipeline: I) -> Result<V
builtin(io, element.arguments, element.envs.into())
}))
} else {
let child = spawn_command(element, pgid)?;
let child = spawn_command(element, &mut pgid, env)?;
Handle::Process(child)
};
handles.push(handle);
}
// unsafe {
// libc::tcsetpgrp(stdout().as_raw_fd(), pgid.unwrap());
// // set_terminal_group(&stdin(), pgid)?;
// }
Ok(handles)
}
#[cfg(any(target_os = "yggdrasil", rust_analyzer))]
pub fn exec_pipeline<I: IntoIterator<Item = Execution>>(
pipeline: I,
env: &Environment,
) -> Result<Vec<Handle>, Error> {
use std::io::stdin;
let mut handles = vec![];
let pgid = create_process_group();
for element in pipeline.into_iter() {
let handle = if let Some(builtin) = builtin::get(&element.program) {
let io = builtin::Args {
stdin: element.stdin,
stdout: element.stdout,
stderr: element.stderr,
};
Handle::Thread(thread::spawn(move || {
builtin(io, element.arguments, element.envs.into())
}))
} else {
let child = spawn_command(element, pgid, env)?;
Handle::Process(child)
};
handles.push(handle);
}
#[cfg(any(target_os = "yggdrasil", rust_analyzer))]
unsafe {
set_terminal_group(&stdin(), pgid)?;
}
@ -243,6 +316,12 @@ pub fn wait_for_pipeline(handles: Vec<Handle>) -> Result<(Outcome, Option<ExitCo
}
}
#[cfg(any(unix, rust_analyzer))]
unsafe {
libc::tcsetpgrp(io::stdout().as_raw_fd(), libc::getpgrp());
}
// set_terminal_group(&stdin(), pgid)?;
let error = match error {
Some(exit) => exit,
None => Outcome::ok(),
@ -250,63 +329,3 @@ pub fn wait_for_pipeline(handles: Vec<Handle>) -> Result<(Outcome, Option<ExitCo
Ok((error, exit))
}
pub fn eval(command: Command) -> Result<(Outcome, Option<ExitCode>), Error> {
// Set up pipeline I/O
let mut stdins = vec![];
let mut stdouts = vec![];
let stdin = match command.redirects.stdin.as_ref() {
Some(path) => {
let file = File::open(path)?;
Input::File(BufReader::new(file))
}
None => Input::Inherit,
};
let stdout = match command.redirects.stdout.as_ref() {
Some(path) => Output::File(File::create(path)?),
None => Output::Inherit(PhantomData),
};
let stderr = match command.redirects.stderr.as_ref() {
Some(path) => Output::File(File::create(path)?),
None => Output::Inherit(PhantomData),
};
stdins.push(stdin);
for _ in 1..command.pipeline.len() {
let (read, write) = std::pipe::pipe()?;
stdins.push(Input::Pipe(BufReader::new(read)));
stdouts.push(Output::Pipe(write));
}
stdouts.push(stdout);
assert_eq!(stdins.len(), stdouts.len());
assert_eq!(stdins.len(), command.pipeline.len());
let io = Iterator::zip(stdins.drain(..), stdouts.drain(..));
let mut pipeline = vec![];
for (command, (stdin, stdout)) in command.pipeline.iter().zip(io) {
let (program, arguments) = command.words.split_first().unwrap();
let stderr = stderr.try_clone()?;
let envs = command
.envs
.iter()
.map(|(a, b)| (a.clone(), b.clone().unwrap_or_default()))
.collect();
let execution = Execution {
program: program.into(),
arguments: arguments.to_vec(),
envs,
stdin,
stdout,
stderr,
};
pipeline.push(execution);
}
let handles = exec_pipeline(pipeline)?;
let (status, exit) = wait_for_pipeline(handles)?;
Ok((status, exit))
}

View File

@ -3,37 +3,31 @@
iter_chain,
anonymous_pipe,
trait_alias,
exitcode_exit_method
exitcode_exit_method,
let_chains
)]
#![cfg_attr(target_os = "yggdrasil", feature(yggdrasil_os, rustc_private))]
#![allow(clippy::new_without_default, clippy::should_implement_trait)]
use std::{
fs::File,
io::{self, stdin, stdout, BufRead, BufReader, Write},
io::{stdin, stdout, BufRead, BufReader, Write},
path::Path,
process::ExitCode,
};
use clap::Parser;
use env::{Env, Expand};
use command::env::Environment;
use error::Error;
use exec::Outcome;
use syntax::parse::parse_interactive;
pub mod builtin;
pub mod env;
pub mod command;
pub mod error;
pub mod exec;
pub mod readline;
pub mod syntax;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("{0}")]
Io(#[from] io::Error),
#[error("Invalid usage")]
InvalidUsage,
}
#[derive(Debug, Parser)]
pub struct ShellArgs {
#[arg(short)]
@ -50,7 +44,7 @@ pub enum ShellInput {
}
impl ShellInput {
pub fn read_line(&mut self, line: &mut String) -> Result<usize, Error> {
pub fn read_line(&mut self, line: &mut String, continuation: bool) -> Result<usize, Error> {
match self {
Self::File(file) => Ok(file.read_line(line)?),
Self::Interactive => {
@ -58,12 +52,16 @@ impl ShellInput {
let mut stdin = stdin();
readline::readline(&mut stdin, &mut stdout, line, |stdout| {
let cwd = std::env::current_dir();
let cwd = match cwd {
Ok(cwd) => format!("{}", cwd.display()),
Err(_) => "???".into(),
let prompt = if !continuation {
let cwd = std::env::current_dir();
let cwd = match cwd {
Ok(cwd) => format!("{}", cwd.display()),
Err(_) => "???".into(),
};
format!("{cwd} $ ")
} else {
"> ".to_owned()
};
let prompt = format!("{cwd} $ ");
stdout.write_all(prompt.as_bytes()).ok();
})
}
@ -71,75 +69,80 @@ impl ShellInput {
}
}
fn run_single(env: &Env, command: &str) -> Outcome {
let line = command.trim();
fn run_single(_env: &Environment, _command: &str) -> Outcome {
todo!()
// let line = command.trim();
let command = match parse_interactive(line) {
Ok(c) => c,
Err(e) => {
eprintln!("Syntax error: {e}");
return Outcome::err();
}
};
let command = match command.expand(env) {
Ok(c) => c,
Err(e) => {
eprintln!("{e}");
return Outcome::err();
}
};
let (outcome, exit) = match exec::eval(command) {
Ok(res) => res,
Err(error) => {
eprintln!("{error}");
return Outcome::err();
}
};
// let command = todo!();
// let command = match parse_interactive(line) {
// Ok(c) => c,
// Err(e) => {
// eprintln!("Syntax error: {e}");
// return Outcome::err();
// }
// };
// let command = match command.expand(env) {
// Ok(c) => c,
// Err(e) => {
// eprintln!("{e}");
// return Outcome::err();
// }
// };
// let (outcome, exit) = match exec::eval(command) {
// Ok(res) => res,
// Err(error) => {
// eprintln!("{error}");
// return Outcome::err();
// }
// };
if let Some(exit) = exit {
exit.exit_process();
}
// if let Some(exit) = exit {
// exit.exit_process();
// }
outcome
// outcome
}
fn run(mut input: ShellInput, env: &Env) -> Result<(), Error> {
fn run(mut input: ShellInput, env: &mut Environment) -> Result<(), Error> {
let mut command_text = String::new();
let mut line = String::new();
loop {
line.clear();
let len = input.read_line(&mut line)?;
let len = input.read_line(&mut line, !command_text.is_empty())?;
if len == 0 {
break Ok(());
}
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
let trimmed = line.trim();
if trimmed.starts_with('#') || trimmed.is_empty() {
continue;
}
let command = match parse_interactive(line) {
Ok(c) => c,
Err(e) => {
eprintln!("Syntax error: {e}");
command_text.push_str(&line.trim_matches([' ', '\t']));
let expr = match syntax::parse::parse_toplevel(&command_text) {
Ok(("" | "\n", expr)) => expr,
Ok((rest, _)) => {
eprintln!("Trailing characters: {rest:?}");
command_text.clear();
continue;
}
Err(syntax::parse::Error::Incomplete) => {
continue;
}
Err(syntax::parse::Error::Lex(e)) if e.is_incomplete() => {
continue;
}
};
let command = match command.expand(env) {
Ok(c) => c,
Err(e) => {
eprintln!("{e}");
command_text.clear();
continue;
}
};
let (outcome, exit) = match exec::eval(command) {
Ok(res) => res,
Err(error) => {
eprintln!("{error}");
continue;
}
};
let (outcome, exit) = command::eval::evaluate(&expr, env);
command_text.clear();
if !outcome.is_success() {
eprintln!("{outcome:?}");
}
if let Some(exit) = exit {
exit.exit_process();
@ -175,7 +178,7 @@ fn find_script<P: AsRef<Path>>(arg: &P) -> &Path {
arg.as_ref()
}
fn run_wrapper(args: ShellArgs, env: &mut Env) -> Result<(), Error> {
fn run_wrapper(args: ShellArgs, env: &mut Environment) -> Result<(), Error> {
let shell_name = std::env::args().next().unwrap();
match (args.command, args.script) {
(Some(_), Some(_)) => {
@ -191,32 +194,38 @@ fn run_wrapper(args: ShellArgs, env: &mut Env) -> Result<(), Error> {
},
(None, Some(script)) => {
let script_path = find_script(&script);
let script_path_str = script_path.to_str().unwrap();
env.put_var("0", script_path_str.into());
// let script_path_str = script_path.to_str().unwrap();
// env.put_var("0", script_path_str.into());
let script = BufReader::new(File::open(script_path)?);
run(ShellInput::File(script), env)
}
(None, None) => {
env.put_var("0", shell_name.into());
// env.put_var("0", shell_name.into());
run(ShellInput::Interactive, env)
}
}
}
fn main() -> ExitCode {
#[cfg(any(rust_analyzer, unix))]
unsafe {
libc::setpgid(0, 0);
libc::signal(libc::SIGTTOU, libc::SIG_IGN);
}
const PROFILE_PATH: &str = "/etc/profile";
logsink::setup_logging(false);
let args = ShellArgs::parse();
let mut env = Env::new();
let mut env = Environment::default();
env.setup_builtin(&args.args);
// env.setup_builtin(&args.args);
builtin::register_default();
if args.login {
if let Ok(profile_script) = File::open(PROFILE_PATH) {
let profile_script = BufReader::new(profile_script);
match run(ShellInput::File(profile_script), &env) {
match run(ShellInput::File(profile_script), &mut env) {
Ok(()) => (),
Err(error) => {
eprintln!("{PROFILE_PATH}: {error}");

View File

@ -90,14 +90,18 @@ fn readline_inner(
stdout.write_all(bytes.as_bytes()).ok();
stdout.flush().ok();
buffer.push(ch);
pos += 1;
if ch != '\r' {
buffer.push(ch);
pos += 1;
}
}
_ => (),
}
if ch == '\n' || ch == '\r' {
if ch == '\r' {
buffer.push('\n');
pos += 1;
stdout.write_all(b"\n").ok();
stdout.flush().ok();
}

View File

@ -1,410 +1,627 @@
use std::str::FromStr;
use std::fmt;
use nom::{
branch::alt,
bytes::complete::{is_a, is_not, tag},
character::complete::{alphanumeric1, char, space0},
bytes::{
complete::{self, tag},
streaming,
},
character::complete::{anychar, digit0, digit1, hex_digit1, oct_digit1},
combinator::{map, recognize, value, verify},
multi::{fold_many1, many0, many1_count},
sequence::{delimited, pair, preceded, separated_pair},
multi::{many0, many1},
sequence::{delimited, preceded, terminated, tuple},
IResult,
};
#[derive(Debug, Clone, PartialEq)]
pub enum Fragment<'a> {
#[derive(Debug, PartialEq)]
pub enum WordSegment<'a> {
Literal(&'a str),
QuotedLiteral(&'a str),
Variable(&'a str),
}
#[derive(Debug, Clone, PartialEq)]
pub struct Word<'a>(pub Vec<Fragment<'a>>);
#[derive(Debug)]
pub struct TokenStream<'a> {
input: &'a str,
buffer: Option<Token<'a>>,
Var(&'a str),
SingleQuote(&'a str),
DoubleQuote(Vec<DoubleQuotedSegment<'a>>),
Escape(Escape<'a>),
}
#[derive(Debug, Clone, PartialEq)]
pub enum Redirect<'a> {
Output(OutputRedirect<'a>),
Input(Word<'a>),
}
#[derive(Debug, Clone, PartialEq)]
pub enum OutputRedirect<'a> {
Err(Word<'a>),
Out(Word<'a>),
Both(Word<'a>),
#[derive(Debug, PartialEq)]
pub enum DoubleQuotedSegment<'a> {
Literal(&'a str),
Var(&'a str),
Escape(Escape<'a>),
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, PartialEq)]
pub struct WordToken<'a>(pub Vec<WordSegment<'a>>);
#[derive(Debug, PartialEq, Clone)]
pub enum Escape<'a> {
Nul,
Letter(char),
Octal(&'a str),
Hex(&'a str),
}
#[derive(Debug, PartialEq)]
pub enum Keyword {
If,
While,
Then,
Else,
Fi,
For,
Match,
Let,
While,
Do,
Done,
Case,
Esac,
In,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Operator {
Eq,
Gt,
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum Punct {
Lt,
Gt,
Ltlt,
Gtgt,
NotEqual,
DoubleSemicolon,
Ampersand,
And,
Pipe,
Or,
Assign,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Punctuation {
Equal,
LBrace,
RBrace,
LParen,
RParen,
LBracket,
RBracket,
LParen,
RParen,
Not,
Semicolon,
Newline,
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, PartialEq)]
pub enum Token<'a> {
Word(Word<'a>),
Redirect(Redirect<'a>),
Word(WordToken<'a>),
Keyword(Keyword),
Punctuation(Punctuation),
Operator(Operator),
Punct(Punct),
Redirect(Redirect<'a>),
}
type NomError<'a> = nom::Err<nom::error::Error<&'a str>>;
impl<'a> TokenStream<'a> {
pub fn new(input: &'a str) -> Self {
Self {
input,
buffer: None,
}
}
pub fn is_eof(&self) -> bool {
self.input.is_empty() && self.buffer.is_none()
}
fn read(&mut self) -> Result<Option<Token<'a>>, NomError<'a>> {
if self.input.is_empty() {
self.buffer = None;
Ok(None)
} else {
let (rest, token) = lex_token(self.input)?;
self.input = rest;
self.buffer = Some(token.clone());
Ok(Some(token))
}
}
pub fn next(&mut self) -> Result<Option<Token<'a>>, NomError<'a>> {
let token = self.peek()?;
self.read()?;
Ok(token)
}
pub fn peek(&mut self) -> Result<Option<Token<'a>>, NomError<'a>> {
if let Some(buffer) = self.buffer.clone() {
return Ok(Some(buffer));
}
self.read()
}
#[derive(Debug, PartialEq)]
pub enum OutputRedirectTarget<'a> {
Filename(WordToken<'a>),
Descriptor(&'a str),
}
impl Word<'_> {
pub fn as_literal(&self) -> Option<&str> {
if self.0.len() != 1 {
return None;
}
let Fragment::Literal(lit) = self.0[0] else {
return None;
};
Some(lit)
}
#[derive(Debug, PartialEq)]
pub struct OutputRedirect<'a> {
pub source: Option<&'a str>,
pub target: OutputRedirectTarget<'a>,
pub append: bool,
}
impl FromStr for Keyword {
type Err = ();
#[derive(Debug, PartialEq)]
pub enum InputRedirect<'a> {
Filename(WordToken<'a>),
Heredoc(&'a str),
}
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"if" => Ok(Self::If),
"while" => Ok(Self::While),
"for" => Ok(Self::For),
"match" => Ok(Self::Match),
"let" => Ok(Self::Let),
_ => Err(()),
}
}
// TODO heredoc
#[derive(Debug, PartialEq)]
pub enum Redirect<'a> {
// <xxx
Input(InputRedirect<'a>),
// 123>[>][&]...
Output(OutputRedirect<'a>),
}
fn lex_escape(i: &str) -> IResult<&str, Escape> {
preceded(
tag("\\"),
alt((
map(preceded(tag("0"), oct_digit1), Escape::Octal),
map(preceded(tag("x"), hex_digit1), Escape::Hex),
map(preceded(tag("X"), hex_digit1), Escape::Hex),
value(Escape::Nul, tag("0")),
map(anychar, Escape::Letter),
)),
)(i)
}
fn lex_literal_word_segment(i: &str) -> IResult<&str, &str> {
// Anything that is not a \, $, ', ", `, SPACE, TAB or NEWLINE
const EXCLUDE: &str = "\\$'\"` \t\n<>[]{}!&;";
verify(streaming::is_not(EXCLUDE), |s: &str| !s.is_empty())(i)
}
fn lex_single_quote_word_segment(i: &str) -> IResult<&str, &str> {
delimited(tag("'"), recognize(many0(streaming::is_not("'"))), tag("'"))(i)
}
fn lex_identifier(i: &str) -> IResult<&str, &str> {
recognize(many1_count(alt((alphanumeric1, is_a("-_@?*")))))(i)
}
fn lex_filename(i: &str) -> IResult<&str, &str> {
recognize(many1_count(alt((alphanumeric1, is_a("./-_:+,")))))(i)
// Can be anything besides &, ', `, ", ;, :, \, SPACE, TAB or NEWLINE
const EXCLUDE0: &str = "\\&'\"`; \t\n<>[]{}()";
recognize(streaming::is_not(EXCLUDE0))(i)
}
fn lex_braced_var(i: &str) -> IResult<&str, &str> {
// ${ABCD}
delimited(tag("${"), lex_identifier, char('}'))(i)
}
fn lex_unbraced_var(i: &str) -> IResult<&str, &str> {
// $ABCD
preceded(char('$'), lex_identifier)(i)
}
fn lex_var(i: &str) -> IResult<&str, &str> {
alt((lex_braced_var, lex_unbraced_var))(i)
fn lex_braced_var_segment(i: &str) -> IResult<&str, &str> {
delimited(tag("{"), lex_identifier, tag("}"))(i)
}
fn lex_dquoted_literal(i: &str) -> IResult<&str, &str> {
let is_not_var_slash_quote = is_not("\\\"$");
verify(is_not_var_slash_quote, |s: &str| !s.is_empty())(i)
}
fn lex_dquoted(i: &str) -> IResult<&str, Vec<Fragment>> {
delimited(
char('"'),
many0(alt((
map(lex_var, Fragment::Variable),
map(lex_dquoted_literal, Fragment::QuotedLiteral),
))),
char('"'),
)(i)
fn lex_var_segment(i: &str) -> IResult<&str, &str> {
preceded(tag("$"), alt((lex_braced_var_segment, lex_identifier)))(i)
}
fn lex_squoted_text(i: &str) -> IResult<&str, &str> {
let is_not_slash_quote = is_not("\\'");
recognize(many0(is_not_slash_quote))(i)
}
fn lex_squoted(i: &str) -> IResult<&str, &str> {
delimited(char('\''), lex_squoted_text, char('\''))(i)
fn lex_double_quoted_literal_segment(i: &str) -> IResult<&str, &str> {
const EXCLUDE: &str = "\\$`\"";
recognize(streaming::is_not(EXCLUDE))(i)
}
fn lex_unquoted_fragment(i: &str) -> IResult<&str, Fragment> {
fn lex_double_quoted_segment(i: &str) -> IResult<&str, DoubleQuotedSegment> {
alt((
map(lex_var, Fragment::Variable),
map(lex_filename, Fragment::Literal),
))(i)
}
fn lex_word(i: &str) -> IResult<&str, Word> {
fold_many1(
alt((
lex_dquoted,
map(lex_squoted, |s| vec![Fragment::QuotedLiteral(s)]),
map(lex_unquoted_fragment, |s| vec![s]),
)),
|| Word(vec![]),
|mut acc, items| {
acc.0.extend(items);
acc
},
)(i)
}
fn lex_explicit_output_redirect(i: &str) -> IResult<&str, OutputRedirect> {
// out>abcdef
// err>abcdef
// out+err>abcdef
// oe>abcdef
// eo>abcdef
#[derive(Debug, Clone)]
enum Source {
Out,
Err,
Both,
}
map(
separated_pair(
alt((
value(Source::Out, tag("out")),
value(Source::Err, tag("err")),
value(Source::Both, tag("oe")),
value(Source::Both, tag("eo")),
value(Source::Out, char('o')),
value(Source::Err, char('e')),
)),
char('>'),
lex_word,
map(lex_escape, DoubleQuotedSegment::Escape),
map(lex_var_segment, DoubleQuotedSegment::Var),
map(
lex_double_quoted_literal_segment,
DoubleQuotedSegment::Literal,
),
|(source, path)| match source {
Source::Out => OutputRedirect::Out(path),
Source::Err => OutputRedirect::Err(path),
Source::Both => OutputRedirect::Both(path),
},
)(i)
}
fn lex_implicit_output_redirect(i: &str) -> IResult<&str, OutputRedirect> {
// >abcdef
map(
preceded(pair(char('>'), space0), lex_word),
OutputRedirect::Out,
)(i)
}
fn lex_output_redirect(i: &str) -> IResult<&str, OutputRedirect> {
alt((lex_implicit_output_redirect, lex_explicit_output_redirect))(i)
}
fn lex_input_redirect(i: &str) -> IResult<&str, Word> {
// <abcdef
preceded(pair(char('<'), space0), lex_word)(i)
}
fn lex_redirect(i: &str) -> IResult<&str, Redirect> {
alt((
map(lex_input_redirect, Redirect::Input),
map(lex_output_redirect, Redirect::Output),
))(i)
}
fn lex_maybe_keyword(i: &str) -> IResult<&str, Token> {
// TODO this will recognize quoted text as a keyword
fn lex_double_quote_word_segment(i: &str) -> IResult<&str, Vec<DoubleQuotedSegment>> {
delimited(tag("\""), many0(lex_double_quoted_segment), tag("\""))(i)
}
fn lex_word_segment(i: &str) -> IResult<&str, WordSegment> {
alt((
map(lex_escape, WordSegment::Escape),
map(lex_var_segment, WordSegment::Var),
map(lex_double_quote_word_segment, WordSegment::DoubleQuote),
map(lex_single_quote_word_segment, WordSegment::SingleQuote),
map(lex_literal_word_segment, WordSegment::Literal),
))(i)
}
fn lex_word(i: &str) -> IResult<&str, WordToken> {
map(terminated(many1(lex_word_segment), whitespace0), WordToken)(i)
}
fn lex_word_or_keyword(i: &str) -> IResult<&str, Token> {
map(lex_word, |word| {
if let Some(kw) = word.as_literal().and_then(|s| Keyword::from_str(s).ok()) {
return Token::Keyword(kw);
if let Some(kw) = word.as_keyword() {
Token::Keyword(kw)
} else {
Token::Word(word)
}
Token::Word(word)
})(i)
}
fn lex_punctuation(i: &str) -> IResult<&str, Punctuation> {
fn lex_punct(i: &str) -> IResult<&str, Punct> {
alt((
value(Punctuation::LBrace, char('{')),
value(Punctuation::RBrace, char('}')),
value(Punctuation::LParen, char('(')),
value(Punctuation::RParen, char(')')),
value(Punctuation::LBracket, char('[')),
value(Punctuation::RBracket, char(']')),
value(Punct::Ltlt, tag("<<")),
value(Punct::Gtgt, tag(">>")),
value(Punct::And, tag("&&")),
value(Punct::Or, tag("||")),
value(Punct::DoubleSemicolon, tag(";;")),
value(Punct::NotEqual, tag("!=")),
value(Punct::Lt, tag("<")),
value(Punct::Gt, tag(">")),
value(Punct::Pipe, tag("|")),
value(Punct::Ampersand, tag("&")),
value(Punct::LParen, tag("(")),
value(Punct::RParen, tag(")")),
value(Punct::LBrace, tag("{")),
value(Punct::RBrace, tag("}")),
value(Punct::LBracket, tag("[")),
value(Punct::RBracket, tag("]")),
value(Punct::Equal, tag("=")),
value(Punct::Not, tag("!")),
value(Punct::Semicolon, tag(";")),
value(Punct::Newline, tag("\n")),
))(i)
}
fn lex_operator(i: &str) -> IResult<&str, Operator> {
fn lex_input_redirect(i: &str) -> IResult<&str, InputRedirect> {
alt((
value(Operator::Eq, tag("==")),
value(Operator::Assign, char('=')),
value(Operator::Or, char('|')),
map(preceded(tag("<<"), lex_identifier), InputRedirect::Heredoc),
map(preceded(tag("<"), lex_word), InputRedirect::Filename),
))(i)
}
fn lex_output_redirect_target(i: &str) -> IResult<&str, OutputRedirectTarget> {
alt((
map(preceded(tag("&"), digit1), OutputRedirectTarget::Descriptor),
map(lex_word, OutputRedirectTarget::Filename),
))(i)
}
fn lex_output_redirect(i: &str) -> IResult<&str, OutputRedirect> {
map(
tuple((
digit0,
alt((tag(">>"), tag(">"))),
lex_output_redirect_target,
)),
|(source, token, target)| {
let source = (!source.is_empty()).then_some(source);
let append = token == ">>";
OutputRedirect {
source,
target,
append,
}
},
)(i)
}
fn lex_redirect(i: &str) -> IResult<&str, Redirect> {
alt((
map(lex_output_redirect, Redirect::Output),
map(lex_input_redirect, Redirect::Input),
))(i)
}
fn whitespace0(i: &str) -> IResult<&str, &str> {
recognize(many0(complete::is_a(" \t")))(i)
}
pub fn lex_token(i: &str) -> IResult<&str, Token> {
preceded(
space0,
whitespace0,
alt((
map(lex_punctuation, Token::Punctuation),
map(lex_redirect, Token::Redirect),
map(lex_operator, Token::Operator),
lex_maybe_keyword,
map(lex_punct, Token::Punct),
lex_word_or_keyword,
)),
)(i)
}
pub fn lex_tokens(i: &str) -> IResult<&str, Vec<Token>> {
many0(lex_token)(i)
pub fn lex_tokens(mut i: &str) -> IResult<&str, Vec<Token>> {
let mut tokens = vec![];
while !i.is_empty() {
let (rest, token) = lex_token(i)?;
i = rest;
tokens.push(token);
}
Ok((i, tokens))
}
impl WordToken<'_> {
pub fn as_keyword(&self) -> Option<Keyword> {
if self.0.len() == 1 {
let seg0 = &self.0[0];
match seg0 {
WordSegment::Literal("if") => Some(Keyword::If),
WordSegment::Literal("then") => Some(Keyword::Then),
WordSegment::Literal("else") => Some(Keyword::Else),
WordSegment::Literal("fi") => Some(Keyword::Fi),
WordSegment::Literal("case") => Some(Keyword::Case),
WordSegment::Literal("esac") => Some(Keyword::Esac),
WordSegment::Literal("for") => Some(Keyword::For),
WordSegment::Literal("while") => Some(Keyword::While),
WordSegment::Literal("do") => Some(Keyword::Do),
WordSegment::Literal("done") => Some(Keyword::Done),
WordSegment::Literal("in") => Some(Keyword::In),
_ => None,
}
} else {
None
}
}
pub fn ends_with(&self, ch: char) -> bool {
match self.0.last() {
Some(WordSegment::Literal(lit)) => lit.ends_with(ch),
_ => false,
}
}
}
impl Keyword {
pub fn to_word(&self) -> WordToken<'static> {
let name = match self {
Self::If => "if",
Self::Then => "then",
Self::Else => "else",
Self::Fi => "fi",
Self::For => "for",
Self::While => "while",
Self::Do => "do",
Self::Done => "done",
Self::Case => "case",
Self::Esac => "esac",
Self::In => "in",
};
WordToken(vec![WordSegment::Literal(name)])
}
}
impl Escape<'_> {
pub fn to_char(&self) -> Option<char> {
match self {
Self::Nul => Some('\0'),
&Self::Hex(x)
if let Some(ch) = u32::from_str_radix(x, 16).ok().and_then(char::from_u32) =>
{
Some(ch)
}
&Self::Octal(x)
if let Some(ch) = u32::from_str_radix(x, 8).ok().and_then(char::from_u32) =>
{
Some(ch)
}
&Self::Letter('n') => Some('\n'),
&Self::Letter('t') => Some('\t'),
&Self::Letter('r') => Some('\r'),
&Self::Letter(' ') => Some(' '),
_ => None,
}
}
}
impl fmt::Display for WordToken<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for segment in self.0.iter() {
write!(f, "{segment}")?;
}
Ok(())
}
}
impl fmt::Display for DoubleQuotedSegment<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Literal(text) => write!(f, "{}", text.escape_default()),
Self::Escape(esc) => write!(f, "{esc}"),
Self::Var(var) => write!(f, "${{{var}}}"),
}
}
}
impl fmt::Display for Escape<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "\\")?;
match self {
Self::Nul => write!(f, "0"),
Self::Hex(val) => write!(f, "x{val}"),
Self::Octal(val) => write!(f, "0{val}"),
Self::Letter(l) => write!(f, "{l}"),
}
}
}
impl fmt::Display for WordSegment<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Var(var) => write!(f, "${{{var}}}"),
Self::Literal(text) => write!(f, "{text}"),
Self::Escape(escape) => write!(f, "{escape}"),
Self::SingleQuote(quote) => write!(f, "'{quote}'"),
Self::DoubleQuote(quote) => {
write!(f, "\"")?;
for seg in quote {
write!(f, "{seg}")?;
}
write!(f, "\"")
}
}
}
}
impl fmt::Display for Keyword {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.to_word(), f)
}
}
impl fmt::Display for Punct {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let t = match self {
Self::Lt => "<",
Self::Gt => ">",
Self::Ltlt => "<<",
Self::Gtgt => ">>",
Self::LBrace => "{",
Self::RBrace => "}",
Self::LParen => "(",
Self::RParen => ")",
Self::LBracket => "[",
Self::RBracket => "]",
Self::Not => "!",
Self::Equal => "=",
Self::NotEqual => "!=",
Self::Newline => "\\n",
Self::Ampersand => "&",
Self::And => "&&",
Self::Or => "||",
Self::Pipe => "|",
Self::Semicolon => ";",
Self::DoubleSemicolon => ";;",
};
write!(f, "{t}")
}
}
impl fmt::Display for InputRedirect<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Filename(filename) => write!(f, "<{filename}"),
Self::Heredoc(terminator) => write!(f, "<<{terminator}"),
}
}
}
impl fmt::Display for OutputRedirectTarget<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Filename(filename) => write!(f, "{filename}"),
Self::Descriptor(descriptor) => write!(f, "&{descriptor}"),
}
}
}
impl fmt::Display for OutputRedirect<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(source) = self.source {
write!(f, "{source}")?;
}
let prefix = if self.append { ">>" } else { ">" };
write!(f, "{prefix}{}", self.target)
}
}
impl fmt::Display for Redirect<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Input(input) => write!(f, "{input}"),
Self::Output(output) => write!(f, "{output}"),
}
}
}
impl fmt::Display for Token<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Word(word) => fmt::Display::fmt(word, f),
Self::Punct(punct) => fmt::Display::fmt(punct, f),
Self::Keyword(kw) => fmt::Display::fmt(kw, f),
Self::Redirect(redirect) => fmt::Display::fmt(redirect, f),
}
}
}
#[cfg(test)]
mod tests {
use std::fmt;
use nom::IResult;
use super::{
lex_filename, lex_tokens, Fragment, Keyword, Operator, OutputRedirect, Redirect, Token,
Word,
use crate::syntax::lex::{
lex_double_quote_word_segment, lex_literal_word_segment, lex_single_quote_word_segment,
lex_token, lex_word, DoubleQuotedSegment, Escape, Keyword, OutputRedirect,
OutputRedirectTarget, Punct, Redirect, Token, WordSegment, WordToken,
};
#[track_caller]
fn run_tests<
'a,
T: PartialEq + fmt::Debug,
I: IntoIterator<Item = (&'a str, T, &'a str)>,
F: Fn(&'a str) -> IResult<&'a str, T>,
>(
it: I,
parser: F,
) {
let location = std::panic::Location::caller();
use super::lex_tokens;
for (i, (input, expect, expect_rest)) in it.into_iter().enumerate() {
let (rest, output) = match parser(input) {
Ok(ok) => ok,
Err(error) => {
eprintln!("Test #{i} in {location:?} failed:");
eprintln!("* Input: {input:?}");
eprintln!("* Parser returned error: {error}");
panic!();
}
};
if rest != expect_rest {
eprintln!("Test #{i} in {location:?} failed:");
eprintln!("* Input: {input:?}");
if expect_rest.is_empty() {
eprintln!("* Unexpected trailing characters: {rest:?}");
} else {
eprintln!("* Expected trailing characters: {expect_rest:?}");
eprintln!("* Actual trailing characters: {rest:?}");
}
panic!();
}
if output != expect {
eprintln!("Test #{i} in {location:?} failed:");
eprintln!("* Input: {input:?}");
eprintln!("* Expected output: {expect:?}");
eprintln!("* Actual output: {output:?}");
panic!();
}
}
#[test]
fn test_lex_complete() {
let x = r#"if a; then
echo b
echo c
fi
"#;
let (r, o) = lex_tokens(x).unwrap();
assert_eq!(r, "");
assert_eq!(
o,
vec![
Token::Keyword(Keyword::If),
Token::Word(WordToken(vec![WordSegment::Literal("a")])),
Token::Punct(Punct::Semicolon),
Token::Keyword(Keyword::Then),
Token::Punct(Punct::Newline),
Token::Word(WordToken(vec![WordSegment::Literal("echo")])),
Token::Word(WordToken(vec![WordSegment::Literal("b")])),
Token::Punct(Punct::Newline),
Token::Word(WordToken(vec![WordSegment::Literal("echo")])),
Token::Word(WordToken(vec![WordSegment::Literal("c")])),
Token::Punct(Punct::Newline),
Token::Keyword(Keyword::Fi),
Token::Punct(Punct::Newline),
]
);
}
#[test]
fn test_lex_filename() {
run_tests(
[
("./abc123_a-a/file>other", "./abc123_a-a/file", ">other"),
("/a/b/c d e f g", "/a/b/c", " d e f g"),
],
lex_filename,
)
fn test_lex_literal_word_segment() {
let x = "a b c";
assert_eq!(lex_literal_word_segment(x).unwrap().1, "a");
let x = "a$b b c";
assert_eq!(lex_literal_word_segment(x).unwrap().1, "a");
let x = r#"a"b" b c"#;
assert_eq!(lex_literal_word_segment(x).unwrap().1, "a");
let x = "a1#.,/@`xyz`w";
assert_eq!(lex_literal_word_segment(x).unwrap().1, "a1#.,/@");
let x = r#"a\ b"#;
assert_eq!(lex_literal_word_segment(x).unwrap().1, "a");
}
#[test]
fn test_lex_tokens() {
run_tests(
[
(
" if /a/b/c\" $a b c\"$d efg",
vec![
Token::Keyword(Keyword::If),
Token::Word(Word(vec![
Fragment::Literal("/a/b/c"),
Fragment::Literal(" "),
Fragment::Variable("a"),
Fragment::Literal(" b c"),
Fragment::Variable("d"),
])),
Token::Word(Word(vec![Fragment::Literal("efg")])),
],
"",
),
(
"\t>$d\"filename\"",
vec![Token::Redirect(Redirect::Output(OutputRedirect::Out(
Word(vec![Fragment::Variable("d"), Fragment::Literal("filename")]),
)))],
"",
),
(
"| abc",
vec![
Token::Operator(Operator::Or),
Token::Word(Word(vec![Fragment::Literal("abc")])),
],
"",
),
],
lex_tokens,
)
fn test_lex_single_quote_word_segment() {
let x = r"'a'";
assert_eq!(lex_single_quote_word_segment(x).unwrap().1, "a");
let x = r"'\''";
assert_eq!(lex_single_quote_word_segment(x).unwrap(), ("'", r"\"));
let x = "''''";
assert_eq!(lex_single_quote_word_segment(x).unwrap(), ("''", ""));
}
#[test]
fn test_lex_double_quote_word_segment() {
let x = r#""a\$ b \"$c:x\ $d\033""#;
assert_eq!(
lex_double_quote_word_segment(x).unwrap().1,
vec![
DoubleQuotedSegment::Literal("a"),
DoubleQuotedSegment::Escape(Escape::Letter('$')),
DoubleQuotedSegment::Literal(" b "),
DoubleQuotedSegment::Escape(Escape::Letter('"')),
DoubleQuotedSegment::Var("c:x"),
DoubleQuotedSegment::Escape(Escape::Letter(' ')),
DoubleQuotedSegment::Var("d"),
DoubleQuotedSegment::Escape(Escape::Octal("33")),
]
);
}
#[test]
fn test_lex_word() {
let x = r#"a1_/-.'q $a\'"q $?\n\"2"c\ d e"#;
assert_eq!(
lex_word(x).unwrap().1,
WordToken(vec![
WordSegment::Literal("a1_/-."),
WordSegment::SingleQuote("q $a\\"),
WordSegment::DoubleQuote(vec![
DoubleQuotedSegment::Literal("q "),
DoubleQuotedSegment::Var("?"),
DoubleQuotedSegment::Escape(Escape::Letter('n')),
DoubleQuotedSegment::Escape(Escape::Letter('"')),
DoubleQuotedSegment::Literal("2"),
]),
WordSegment::Literal("c"),
WordSegment::Escape(Escape::Letter(' ')),
WordSegment::Literal("d"),
])
);
}
#[test]
fn test_lex_token() {
let x = r#"if condition; then ..."#;
assert_eq!(lex_token(x).unwrap().1, Token::Keyword(Keyword::If));
let x = r#"if'' condition; then ..."#;
assert_eq!(
lex_token(x).unwrap().1,
Token::Word(WordToken(vec![
WordSegment::Literal("if"),
WordSegment::SingleQuote("")
]))
);
let x = "1>>a\n";
assert_eq!(
lex_token(x).unwrap().1,
Token::Redirect(Redirect::Output(OutputRedirect {
source: Some("1"),
target: OutputRedirectTarget::Filename(WordToken(vec![WordSegment::Literal("a")])),
append: true
}))
);
let x = ">&2\n";
assert_eq!(
lex_token(x).unwrap().1,
Token::Redirect(Redirect::Output(OutputRedirect {
source: None,
target: OutputRedirectTarget::Descriptor("2"),
append: false
}))
);
}
}

View File

@ -1,137 +1,767 @@
use crate::syntax::lex::{Operator, Redirect, Token, TokenStream};
use crate::syntax::lex::{Punct, WordSegment};
use super::lex::{OutputRedirect, Word};
use super::lex::{self, InputRedirect, Keyword, Redirect, Token, WordToken};
#[derive(Debug, thiserror::Error)]
pub enum Error {
pub enum Error<'a> {
#[error("{0}")]
Lex(nom::Err<nom::error::Error<String>>),
#[error("Unexpected token `{0}`")]
UnexpectedToken(String),
#[error("Empty command")]
EmptyPipelineCommand,
#[error("Unexpected EOF")]
Incomplete,
#[error("Expected {expected}, got {got}")]
Expected { expected: Token<'a>, got: Token<'a> },
#[error("Unexpected {0}")]
Unexpected(Token<'a>),
#[error("Invalid case pattern: {0}")]
InvalidCasePattern(WordToken<'a>),
}
#[derive(Debug, PartialEq, Clone)]
pub struct ICommand<'a> {
pub pipeline: Vec<IPipelineElement<'a>>,
pub redirects: IRedirects<'a>,
#[derive(Debug, PartialEq)]
pub struct PipelineElement<'a> {
pub words: Vec<WordToken<'a>>,
pub redirects: Vec<Redirect<'a>>,
}
#[derive(Debug, PartialEq, Clone)]
pub struct IRedirects<'a> {
pub stdin: Option<Word<'a>>,
pub stdout: Option<Word<'a>>,
pub stderr: Option<Word<'a>>,
#[derive(Debug, PartialEq)]
pub struct PipelineExpression<'a> {
pub elements: Vec<PipelineElement<'a>>,
}
#[derive(Debug, PartialEq, Clone)]
pub struct IPipelineElement<'a> {
pub envs: Vec<(Word<'a>, Option<Word<'a>>)>,
pub words: Vec<Word<'a>>,
#[derive(Debug, PartialEq)]
pub enum BinaryOperator {
And,
Or,
Equal,
NotEqual,
}
pub fn parse_pipeline_element<'a>(ts: &mut TokenStream<'a>) -> Result<IPipelineElement<'a>, Error> {
#[derive(Debug, PartialEq)]
pub struct IfExpression<'a> {
pub condition: Box<Expression<'a>>,
pub if_true: Vec<Expression<'a>>,
pub if_false: Option<Vec<Expression<'a>>>,
pub redirects: Vec<Redirect<'a>>,
}
#[derive(Debug, PartialEq)]
pub struct WhileExpression<'a> {
pub condtion: Box<Expression<'a>>,
pub body: Vec<Expression<'a>>,
pub redirects: Vec<Redirect<'a>>,
}
#[derive(Debug, PartialEq)]
pub struct ForExpression<'a> {
pub variable: &'a str,
pub list: Box<PipelineElement<'a>>,
pub body: Vec<Expression<'a>>,
pub redirects: Vec<Redirect<'a>>,
}
#[derive(Debug, PartialEq)]
pub struct CaseBranch<'a> {
pub pattern: WordToken<'a>,
pub body: Vec<Expression<'a>>,
}
#[derive(Debug, PartialEq)]
pub struct CaseExpression<'a> {
pub expression: WordToken<'a>,
pub branches: Vec<CaseBranch<'a>>,
pub redirects: Vec<Redirect<'a>>,
}
#[derive(Debug, PartialEq)]
pub struct ConditionalExpression<'a> {
pub body: Box<Expression<'a>>,
}
#[derive(Debug, PartialEq)]
pub struct GroupExpression<'a> {
pub body: Vec<Expression<'a>>,
pub redirects: Vec<Redirect<'a>>,
}
#[derive(Debug, PartialEq)]
pub enum Expression<'a> {
Pipeline(PipelineExpression<'a>),
Binary(BinaryOperator, Box<Expression<'a>>, Box<Expression<'a>>),
If(IfExpression<'a>),
While(WhileExpression<'a>),
For(ForExpression<'a>),
Case(CaseExpression<'a>),
Conditional(ConditionalExpression<'a>),
Not(Box<Expression<'a>>),
Group(GroupExpression<'a>),
}
impl From<nom::Err<nom::error::Error<&str>>> for Error<'_> {
fn from(value: nom::Err<nom::error::Error<&str>>) -> Self {
let e = value.map_input(|e| e.to_owned());
Self::Lex(e)
}
}
fn parse_pipeline_element(mut input: &str) -> Result<(&str, PipelineElement), Error> {
let mut words = vec![];
let mut envs = vec![];
while let Some(token) = ts.peek()? {
let Token::Word(word) = token else {
let mut redirects = vec![];
loop {
if input.is_empty() {
break;
};
ts.next()?;
if let Some(Token::Operator(Operator::Assign)) = ts.peek()? {
ts.next()?;
let value = if let Some(Token::Word(word)) = ts.peek()? {
ts.next()?;
Some(word)
} else {
None
};
}
let (rest, token) = lex::lex_token(input)?;
match token {
Token::Word(word) => words.push(word),
Token::Keyword(keyword) => words.push(keyword.to_word()),
Token::Redirect(Redirect::Input(InputRedirect::Heredoc(_))) => todo!("Heredoc"),
Token::Redirect(redirect) => redirects.push(redirect),
_ => {
if words.is_empty() {
return Err(Error::Unexpected(token));
}
break;
}
}
input = rest;
}
assert!(!words.is_empty());
Ok((input, PipelineElement { words, redirects }))
}
envs.push((word, value));
} else {
words.push(word);
fn parse_pipeline(mut input: &str) -> Result<(&str, PipelineExpression), Error> {
let mut elements = vec![];
let mut redirects = vec![];
loop {
if input.is_empty() {
break;
}
input = skip_newline(input)?;
// Pipeline starts with an element
let (rest, element) = parse_pipeline_element(input)?;
elements.push(element);
input = rest;
if input.is_empty() {
break;
}
let (rest, token) = lex::lex_token(input)?;
match token {
Token::Punct(Punct::Pipe) => {
input = rest;
}
Token::Punct(Punct::Newline) => break,
Token::Punct(Punct::Semicolon) => {
input = rest;
break;
}
_ => break,
}
}
Ok(IPipelineElement { words, envs })
// Redirects follow the pipeline
loop {
if input.is_empty() {
break;
}
let (rest, token) = lex::lex_token(input)?;
if let Token::Redirect(redirect) = token {
input = rest;
redirects.push(redirect);
} else {
break;
}
}
Ok((input, PipelineExpression { elements }))
}
pub fn parse_pipeline<'a>(ts: &mut TokenStream<'a>) -> Result<Vec<IPipelineElement<'a>>, Error> {
fn parse_until<'i, T, P: Fn(&'i str) -> Result<(&'i str, T), Error<'i>>, Q: Fn(&Token) -> bool>(
mut input: &'i str,
parser: P,
predicate: Q,
) -> Result<(&'i str, Vec<T>), Error<'i>> {
let mut elements = vec![];
let mut expect_command = false;
while !ts.is_eof() {
let element = parse_pipeline_element(ts)?;
let is_empty = element.words.is_empty();
if !is_empty {
expect_command = false;
elements.push(element);
} else {
return Err(Error::EmptyPipelineCommand);
loop {
if input.is_empty() {
break;
}
input = skip_newline(input)?;
let (_, token) = lex::lex_token(input)?;
if predicate(&token) {
break;
}
// maybe followed by eof, redirect or pipe
let token = ts.peek()?;
match token {
Some(Token::Operator(Operator::Or)) => {
expect_command = true;
ts.next()?;
let (rest, element) = parser(input)?;
input = rest;
elements.push(element);
}
Ok((input, elements))
}
fn parse_if(input: &str) -> Result<(&str, Expression), Error> {
let (mut input, condition) = parse_expression(input)?;
loop {
input = skip_newline(input)?;
let (rest, then_or_semicolon) = lex::lex_token(input)?;
match then_or_semicolon {
Token::Punct(Punct::Semicolon) => {
input = rest;
}
Token::Keyword(Keyword::Then) => {
input = rest;
break;
}
_ => {
return Err(Error::Expected {
expected: Token::Keyword(Keyword::Then),
got: then_or_semicolon,
})
}
}
}
// Parse until else/fi
let (input, if_true) = parse_until(input, parse_expression, |t| {
matches!(t, Token::Keyword(Keyword::Fi | Keyword::Else))
})?;
// else/fi
let (input, else_or_fi) = lex::lex_token(input)?;
let (input, if_false) = match else_or_fi {
Token::Keyword(Keyword::Fi) => (input, None),
Token::Keyword(Keyword::Else) => {
let (input, if_false) = parse_until(input, parse_expression, |t| {
matches!(t, Token::Keyword(Keyword::Fi))
})?;
// Skip fi
let (input, _) = lex::lex_token(input)?;
(input, Some(if_false))
}
_ => unreachable!(),
};
let (input, redirects) = parse_redirects(input)?;
Ok((
input,
Expression::If(IfExpression {
condition: Box::new(condition),
if_true,
if_false,
redirects,
}),
))
}
fn parse_while(input: &str) -> Result<(&str, Expression), Error> {
let (mut input, condition) = parse_expression(input)?;
loop {
input = skip_newline(input)?;
let (rest, do_or_semicolon) = lex::lex_token(input)?;
match do_or_semicolon {
Token::Punct(Punct::Semicolon) => {
input = rest;
}
Token::Keyword(Keyword::Do) => {
input = rest;
break;
}
_ => {
return Err(Error::Expected {
expected: Token::Keyword(Keyword::Do),
got: do_or_semicolon,
})
}
}
}
let (input, body) = parse_until(input, parse_expression, |t| {
matches!(t, Token::Keyword(Keyword::Done))
})?;
// Skip done
let (input, _) = lex::lex_token(input)?;
let (input, redirects) = parse_redirects(input)?;
Ok((
input,
Expression::While(WhileExpression {
condtion: Box::new(condition),
body,
redirects,
}),
))
}
fn parse_for(input: &str) -> Result<(&str, Expression), Error> {
// xxx in ...; do ... done
let (input, variable) = lex::lex_token(input)?;
let Token::Word(variable_word) = &variable else {
todo!();
};
let [WordSegment::Literal(variable)] = variable_word.0[..] else {
todo!()
};
let (input, kw_in) = lex::lex_token(input)?;
if kw_in != Token::Keyword(Keyword::In) {
todo!();
}
let (mut input, list) = parse_pipeline_element(input)?;
loop {
input = skip_newline(input)?;
let (rest, do_or_semicolon) = lex::lex_token(input)?;
match do_or_semicolon {
Token::Punct(Punct::Semicolon) => {
input = rest;
}
Token::Keyword(Keyword::Do) => {
input = rest;
break;
}
_ => {
return Err(Error::Expected {
expected: Token::Keyword(Keyword::Do),
got: do_or_semicolon,
})
}
}
}
let (input, body) = parse_until(input, parse_expression, |t| {
matches!(t, Token::Keyword(Keyword::Done))
})?;
// Skip done
let (input, _) = lex::lex_token(input)?;
let (input, redirects) = parse_redirects(input)?;
Ok((
input,
Expression::For(ForExpression {
variable,
list: Box::new(list),
body,
redirects,
}),
))
}
fn parse_conditional(input: &str) -> Result<(&str, Expression), Error> {
let (input, body) = parse_expression(input)?;
let (input, rbracket) = lex::lex_token(input)?;
if rbracket != Token::Punct(Punct::RBracket) {
return Err(Error::Expected {
expected: Token::Punct(Punct::RBracket),
got: rbracket,
});
}
Ok((
input,
Expression::Conditional(ConditionalExpression {
body: Box::new(body),
}),
))
}
fn parse_redirects(mut input: &str) -> Result<(&str, Vec<Redirect>), Error> {
let mut redirects = vec![];
loop {
if input.is_empty() {
break;
}
let (rest, token) = lex::lex_token(input)?;
if let Token::Redirect(redirect) = token {
redirects.push(redirect);
input = rest;
} else {
break;
}
}
Ok((input, redirects))
}
fn parse_group(input: &str) -> Result<(&str, Expression), Error> {
let (input, body) = parse_until(input, parse_expression, |t| {
*t == Token::Punct(Punct::RBrace)
})?;
let (input, rbrace) = lex::lex_token(input)?;
if rbrace != Token::Punct(Punct::RBrace) {
return Err(Error::Expected {
expected: Token::Punct(Punct::RBrace),
got: rbrace,
});
}
let (input, redirects) = parse_redirects(input)?;
Ok((
input,
Expression::Group(GroupExpression { body, redirects }),
))
}
fn skip_newline(mut input: &str) -> Result<&str, Error> {
loop {
let (rest, token) = lex::lex_token(input)?;
if token == Token::Punct(Punct::Newline) {
input = rest;
} else {
break;
}
}
Ok(input)
}
fn parse_case_branch(input: &str) -> Result<(&str, CaseBranch), Error> {
let (input, pattern) = lex::lex_token(input)?;
let Token::Word(pattern) = pattern else {
return Err(Error::Expected {
expected: Token::Word(WordToken(vec![WordSegment::Literal("PATTERN)")])),
got: pattern,
});
};
if !pattern.ends_with(')') {
return Err(Error::InvalidCasePattern(pattern));
}
let (input, body) = parse_until(input, parse_expression, |t| {
*t == Token::Punct(Punct::DoubleSemicolon)
})?;
// Skip ;;
let (input, _) = lex::lex_token(input)?;
Ok((input, CaseBranch { pattern, body }))
}
fn parse_case(input: &str) -> Result<(&str, Expression), Error> {
let (input, expression) = lex::lex_token(input)?;
let Token::Word(expression) = expression else {
return Err(Error::Expected {
expected: Token::Word(WordToken(vec![WordSegment::Literal("expression")])),
got: expression,
});
};
let (input, kw_in) = lex::lex_token(input)?;
if kw_in != Token::Keyword(Keyword::In) {
return Err(Error::Expected {
expected: Token::Keyword(Keyword::In),
got: kw_in,
});
}
let (input, branches) = parse_until(input, parse_case_branch, |t| {
*t == Token::Keyword(Keyword::Esac)
})?;
// Skip esac
let (input, _) = lex::lex_token(input)?;
let (input, redirects) = parse_redirects(input)?;
Ok((
input,
Expression::Case(CaseExpression {
expression,
branches,
redirects,
}),
))
}
fn parse_atom(mut input: &str) -> Result<(&str, Expression), Error> {
loop {
let (rest, token) = lex::lex_token(input)?;
// TODO (...)
return match token {
Token::Word(_) | Token::Redirect(_) => {
let (input, pipeline) = parse_pipeline(input)?;
Ok((input, Expression::Pipeline(pipeline)))
}
Token::Keyword(Keyword::If) => parse_if(rest),
Token::Keyword(Keyword::For) => parse_for(rest),
Token::Keyword(Keyword::While) => parse_while(rest),
Token::Keyword(Keyword::Case) => parse_case(rest),
Token::Punct(Punct::Not) => {
let (rest, inner) = parse_atom(rest)?;
Ok((rest, Expression::Not(Box::new(inner))))
}
Token::Punct(Punct::LBrace) => parse_group(rest),
Token::Punct(Punct::LBracket) => parse_conditional(rest),
Token::Punct(Punct::Semicolon | Punct::Newline) => {
input = rest;
continue;
}
Some(Token::Redirect(_)) => break,
// parse_pipeline_element() should've consumed all of these
Some(Token::Word(_)) => unreachable!(),
None => break,
_ => todo!(),
}
_ => Err(Error::Unexpected(token)),
};
}
if expect_command {
return Err(Error::EmptyPipelineCommand);
}
Ok(elements)
}
pub fn parse_redirects<'a>(ts: &mut TokenStream<'a>) -> Result<IRedirects<'a>, Error> {
let mut result = IRedirects {
stdin: None,
stdout: None,
stderr: None,
fn maybe_binary<'i>(
lhs: Expression<'i>,
mut input: &'i str,
) -> Result<(&'i str, Expression<'i>), Error<'i>> {
if input.is_empty() {
return Ok((input, lhs));
}
let (rest, token) = lex::lex_token(input)?;
let Some(this_op) = BinaryOperator::from_token(&token) else {
return Ok((input, lhs));
};
while let Some(token) = ts.next()? {
match token {
Token::Redirect(Redirect::Output(redirect)) => match redirect {
OutputRedirect::Out(path) => result.stdout = Some(path),
OutputRedirect::Err(path) => result.stderr = Some(path),
OutputRedirect::Both(path) => {
result.stdout = Some(path.clone());
result.stderr = Some(path);
}
},
Token::Redirect(Redirect::Input(path)) => result.stdin = Some(path),
// parse_pipeline() should've consumed all of these
_ => unreachable!(),
input = rest;
let (rest, rhs) = parse_expression(input)?;
input = rest;
// x && (y || z)
let expr = if let Expression::Binary(that_op, that_lhs, that_rhs) = rhs {
if this_op.precedence() > that_op.precedence() {
Expression::Binary(
that_op,
Box::new(Expression::Binary(this_op, Box::new(lhs), that_lhs)),
that_rhs,
)
} else {
Expression::Binary(
this_op,
Box::new(lhs),
Box::new(Expression::Binary(that_op, that_lhs, that_rhs)),
)
}
} else {
Expression::Binary(this_op, Box::new(lhs), Box::new(rhs))
};
Ok((input, expr))
}
pub fn parse_expression(input: &str) -> Result<(&str, Expression), Error> {
let (input, atom) = parse_atom(input)?;
maybe_binary(atom, input)
}
pub fn parse_toplevel(input: &str) -> Result<(&str, Expression), Error> {
let (rest, atom) = parse_atom(input)?;
// If atom is `xxx()`, try to parse a function declaration
if let Some(identifier) = atom
.as_pipeline()
.and_then(PipelineExpression::as_function_decl)
{
let _ = identifier;
let _ = rest;
todo!()
} else {
parse_expression(input)
}
}
impl PipelineElement<'_> {
fn as_function_decl(&self) -> Option<&str> {
if !self.redirects.is_empty() || self.words.len() != 1 {
return None;
}
let word = &self.words[0];
if word.0.len() != 1 {
return None;
}
match &word.0[0] {
WordSegment::Literal(x) if let Some(prefix) = x.strip_suffix("()") => {
// TODO Check if prefix is a valid identifier
Some(prefix)
}
_ => None,
}
}
Ok(result)
}
pub fn parse_interactive(line: &str) -> Result<ICommand, Error> {
let mut ts = TokenStream::new(line);
// pipeline itself
let pipeline = parse_pipeline(&mut ts)?;
// maybe followed by redirects
let redirects = parse_redirects(&mut ts)?;
Ok(ICommand {
pipeline,
redirects,
})
}
impl<S: Into<String>> From<nom::Err<nom::error::Error<S>>> for Error {
fn from(value: nom::Err<nom::error::Error<S>>) -> Self {
Self::Lex(value.map_input(Into::into))
fn as_word(&self) -> Option<&WordToken> {
if !self.redirects.is_empty() || self.words.len() != 1 {
return None;
}
Some(&self.words[0])
}
}
impl PipelineExpression<'_> {
fn as_function_decl(&self) -> Option<&str> {
if self.elements.len() != 1 {
return None;
}
self.elements[0].as_function_decl()
}
pub fn as_word(&self) -> Option<&WordToken> {
self.as_single_command()?.as_word()
}
pub fn as_single_command(&self) -> Option<&PipelineElement> {
if self.elements.len() != 1 {
return None;
}
Some(&self.elements[0])
}
}
impl Expression<'_> {
pub fn as_pipeline(&self) -> Option<&PipelineExpression> {
match self {
Self::Pipeline(pipeline) => Some(pipeline),
_ => None,
}
}
pub fn as_word(&self) -> Option<&WordToken> {
self.as_pipeline()?.as_word()
}
}
impl BinaryOperator {
fn from_token(token: &Token) -> Option<Self> {
match token {
Token::Punct(Punct::Or) => Some(Self::Or),
Token::Punct(Punct::And) => Some(Self::And),
Token::Punct(Punct::Equal) => Some(Self::Equal),
Token::Punct(Punct::NotEqual) => Some(Self::NotEqual),
_ => None,
}
}
fn precedence(&self) -> usize {
match self {
Self::Or => 1,
Self::And => 2,
Self::Equal | Self::NotEqual => 3,
}
}
}
#[cfg(test)]
mod tests {
use crate::syntax::{
lex::{
InputRedirect, OutputRedirect, OutputRedirectTarget, Redirect, WordSegment, WordToken,
},
parse::{
parse_expression, parse_pipeline, parse_pipeline_element, BinaryOperator, Expression,
IfExpression, PipelineElement, PipelineExpression,
},
};
#[test]
fn test_parse_pipeline_element() {
let cmd = "a b c | d e f";
assert_eq!(
parse_pipeline_element(cmd).unwrap().1,
PipelineElement {
words: vec![
WordToken(vec![WordSegment::Literal("a")]),
WordToken(vec![WordSegment::Literal("b")]),
WordToken(vec![WordSegment::Literal("c")]),
],
redirects: vec![]
}
);
}
#[test]
fn test_parse_pipeline() {
let cmd = "a b c <aaa.txt | d e f >>bbb.txt 2>&1 && other command";
assert_eq!(
parse_pipeline(cmd).unwrap().1,
PipelineExpression {
elements: vec![
PipelineElement {
words: vec![
WordToken(vec![WordSegment::Literal("a")]),
WordToken(vec![WordSegment::Literal("b")]),
WordToken(vec![WordSegment::Literal("c")]),
],
redirects: vec![Redirect::Input(InputRedirect::Filename(WordToken(vec![
WordSegment::Literal("aaa.txt")
])))],
},
PipelineElement {
words: vec![
WordToken(vec![WordSegment::Literal("d")]),
WordToken(vec![WordSegment::Literal("e")]),
WordToken(vec![WordSegment::Literal("f")]),
],
redirects: vec![
Redirect::Output(OutputRedirect {
source: None,
target: OutputRedirectTarget::Filename(WordToken(vec![
WordSegment::Literal("bbb.txt")
])),
append: true
}),
Redirect::Output(OutputRedirect {
source: Some("2"),
target: OutputRedirectTarget::Descriptor("1"),
append: false
}),
],
},
],
}
);
}
#[test]
fn test_parse_expression() {
let cmd = "if a; then b; else c; fi\n";
assert_eq!(
parse_expression(cmd).unwrap().1,
Expression::If(IfExpression {
condition: Box::new(Expression::Pipeline(PipelineExpression {
elements: vec![PipelineElement {
words: vec![WordToken(vec![WordSegment::Literal("a")])],
redirects: vec![],
}],
})),
if_true: vec![Expression::Pipeline(PipelineExpression {
elements: vec![PipelineElement {
words: vec![WordToken(vec![WordSegment::Literal("b")])],
redirects: vec![],
}],
})],
if_false: Some(vec![Expression::Pipeline(PipelineExpression {
elements: vec![PipelineElement {
words: vec![WordToken(vec![WordSegment::Literal("c")])],
redirects: vec![],
}],
})]),
redirects: vec![]
})
);
let cmd = "a | b && c || d && e\n";
assert_eq!(
parse_expression(cmd).unwrap().1,
Expression::Binary(
BinaryOperator::Or,
Box::new(Expression::Binary(
BinaryOperator::And,
Box::new(Expression::Pipeline(PipelineExpression {
elements: vec![
PipelineElement {
words: vec![WordToken(vec![WordSegment::Literal("a")])],
redirects: vec![]
},
PipelineElement {
words: vec![WordToken(vec![WordSegment::Literal("b")])],
redirects: vec![]
},
],
})),
Box::new(Expression::Pipeline(PipelineExpression {
elements: vec![PipelineElement {
words: vec![WordToken(vec![WordSegment::Literal("c")])],
redirects: vec![]
},]
}))
)),
Box::new(Expression::Binary(
BinaryOperator::And,
Box::new(Expression::Pipeline(PipelineExpression {
elements: vec![PipelineElement {
words: vec![WordToken(vec![WordSegment::Literal("d")])],
redirects: vec![]
},]
})),
Box::new(Expression::Pipeline(PipelineExpression {
elements: vec![PipelineElement {
words: vec![WordToken(vec![WordSegment::Literal("e")])],
redirects: vec![]
},]
})),
))
)
);
}
}

View File

@ -0,0 +1,3 @@
#!/bin/sh
echo Hello, world

View File

@ -0,0 +1,37 @@
#!/bin/sh
if [ a = "a" ]; then
echo yes
fi
if [ "$MOST_LIKELY_EMPTY_VARIABLE" = "" ]; then
echo yes
fi
if [ "$MOST_LIKELY_EMPTY_VARIABLE" != "" ]; then
echo no
else
echo yes
fi
if [ -f /bin/sh ]; then
echo yes
fi
if [ 02-if.sh -nt /bin/sh ]; then
echo yes
fi
if [ ! 02-if.sh -ot /bin/sh ]; then
echo yes
fi
if [ -f /bin/sh ] && [ -f /bin/bash ]; then
echo yes
fi
if [ -f /bin/sh ] || [ -f /bin/bash ]; then
echo yes
fi
if [ -f /bin/sh ] && [ -f /bin/bash ] || [ -f /bin/dash ]; then
echo yes
fi

View File

@ -0,0 +1,11 @@
#!/bin/sh
echo a"$LIKELY_DOES_NOT_EXIST"b
echo "a${LIKELY_DOES_NOT_EXIST}b"
echo '${LIKELY_DOES_NOT_EXIST}'
LIKELY_EXISTS=a
echo $LIKELY_EXISTS
TWO_WORDS="a b"
echo $TWO_WORDS
echo a"$TWO_WORDS"b

View File

@ -0,0 +1,11 @@
#!/bin/sh
my_index=""
while [ "$my_index" != "aaaa" ]; do
echo a
my_index="$my_index"a
done
while false; do
echo a
done

View File

@ -0,0 +1,15 @@
#!/bin/sh
for x in a b; do
echo $x
done
for x in "a b" c d; do
echo $x
done
V="a b"
for x in $V; do
echo $x
done
for x in "$V" "$V" $V; do
echo $x
done

View File

@ -38,6 +38,7 @@ const PROGRAMS: &[(&str, &str)] = &[
("ln", "bin/ln"),
("mkdir", "bin/mkdir"),
("touch", "bin/touch"),
("env", "bin/env"),
("rm", "bin/rm"),
("cat", "bin/cat"),
("hexd", "bin/hexd"),