From c4e3128528b13243c7d4be233a8dd258406fc6a1 Mon Sep 17 00:00:00 2001 From: Mark Poliakov Date: Mon, 3 Mar 2025 00:17:13 +0200 Subject: [PATCH] shell: accept unicode input --- userspace/shell/src/readline.rs | 62 ++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/userspace/shell/src/readline.rs b/userspace/shell/src/readline.rs index 2afa18a7..163e683b 100644 --- a/userspace/shell/src/readline.rs +++ b/userspace/shell/src/readline.rs @@ -9,9 +9,43 @@ enum Outcome { Data(usize), } -fn readline_inner(stdin: &mut RawStdin, stdout: &mut Stdout, buffer: &mut String) -> Result { +struct Utf8Decoder { + buffer: [u8; 4], + len: usize, +} + +impl Utf8Decoder { + fn push(&mut self, byte: u8) -> Option { + self.buffer[self.len] = byte; + self.len += 1; + if let Ok(str) = std::str::from_utf8(&self.buffer[..self.len]) { + let ch = str.chars().next().unwrap(); + self.len = 0; + Some(ch) + } else { + if self.len == 4 { + // Got 4 bytes and could not decode a single character + self.len = 0; + Some('\u{25A1}') + } else { + None + } + } + } +} + +fn readline_inner( + stdin: &mut RawStdin, + stdout: &mut Stdout, + buffer: &mut String, +) -> Result { + let mut ch_buffer = [0; 8]; let mut pos = 0; let mut ch = [0]; + let mut decoder = Utf8Decoder { + buffer: [0; 4], + len: 0, + }; loop { let len = stdin.read(&mut ch)?; @@ -20,10 +54,13 @@ fn readline_inner(stdin: &mut RawStdin, stdout: &mut Stdout, buffer: &mut String } let ch = ch[0]; + let Some(ch) = decoder.push(ch) else { + continue; + }; match ch { // ^D - 0x04 => { + c if c as u32 == 0x04 => { if pos == 0 { break; } else { @@ -31,10 +68,10 @@ fn readline_inner(stdin: &mut RawStdin, stdout: &mut Stdout, buffer: &mut String } } // ^C - 0x03 => return Ok(Outcome::Interrupt), + c if c as u32 == 0x03 => return Ok(Outcome::Interrupt), // TODO completion - b'\t' => (), - 0x7F => { + '\t' => (), + c if c as u32 == 0x7F => { if pos != 0 { stdout.write_all(b"\x1B[D \x1B[D").ok(); stdout.flush().ok(); @@ -42,18 +79,19 @@ fn readline_inner(stdin: &mut RawStdin, stdout: &mut Stdout, buffer: &mut String pos -= 1; } } - ch if ch.is_ascii_graphic() || ch.is_ascii_whitespace() => { - stdout.write_all(&[ch]).ok(); + ch if ch.is_whitespace() || ch.is_alphanumeric() || ch.is_ascii_graphic() => { + let bytes = ch.encode_utf8(&mut ch_buffer); + stdout.write_all(bytes.as_bytes()).ok(); stdout.flush().ok(); - buffer.push(ch as char); + buffer.push(ch); pos += 1; } _ => (), } - if ch == b'\n' || ch == b'\r' { - if ch == b'\r' { + if ch == '\n' || ch == '\r' { + if ch == '\r' { stdout.write_all(b"\n").ok(); stdout.flush().ok(); } @@ -68,7 +106,7 @@ pub fn readline( stdin: &mut Stdin, stdout: &mut Stdout, buffer: &mut String, - prompt: P + prompt: P, ) -> Result { let mut stdin = RawStdin::new(stdin)?; @@ -80,7 +118,7 @@ pub fn readline( Outcome::Data(n) => break Ok(n), Outcome::Interrupt => { stdout.write_all(b"\r\n").ok(); - }, + } } } }