user: add md2txt

This commit is contained in:
Mark Poliakov 2025-03-05 11:30:04 +02:00
parent 8deeb3ac9e
commit e330db1e55
10 changed files with 757 additions and 1 deletions

27
userspace/Cargo.lock generated
View File

@ -1433,6 +1433,16 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
[[package]]
name = "md2txt"
version = "0.1.0"
dependencies = [
"clap",
"libterm",
"pulldown-cmark",
"thiserror",
]
[[package]]
name = "memchr"
version = "2.7.4"
@ -2062,6 +2072,17 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "pulldown-cmark"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0"
dependencies = [
"bitflags 2.8.0",
"memchr",
"unicase",
]
[[package]]
name = "quick-xml"
version = "0.37.2"
@ -2939,6 +2960,12 @@ dependencies = [
"thiserror",
]
[[package]]
name = "unicase"
version = "2.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
[[package]]
name = "unicode-ident"
version = "1.0.16"

View File

@ -16,6 +16,7 @@ members = [
"sysutils",
"tools/crypt",
"tools/init",
"tools/md2txt",
"tools/rdb",
"tools/red",
"tools/rsh",

36
userspace/etc/test.md Normal file
View File

@ -0,0 +1,36 @@
1. List item one.
List item one continued with a second paragraph followed by an
Indented block.
$ ls *.sh
$ mv *.sh ~/tmp
List item continued with a third paragraph.
2. List item two continued with an open block.
This paragraph is part of the preceding list item.
1. This list is nested and does not require explicit item continuation.
This paragraph is part of the preceding list item.
2. List item b.
This paragraph belongs to item two of the outer list.
> This is a blockquote
>> This is a quote of a quote
>> * This is an item of quote-quote list
>>
>> This is a continuation of quote-quote-list
>> * This is another item of a quote-quote list
Another paragraph
* Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean eget congue risus. Aenean facilisis quis augue ac accumsan. Praesent felis odio, sagittis ut pulvinar a, pharetra non ante. Cras accumsan varius auctor. Suspendisse pharetra mauris eget eros congue, ut scelerisque enim pharetra. Quisque pellentesque ante quis porttitor condimentum. Nullam nisi purus, interdum a dui vitae, hendrerit eleifend leo. Integer tempus neque ut neque faucibus vulputate. Ut orci tellus, interdum et sagittis eu, interdum ut ex. Donec ac consectetur sem. Aenean eget mauris rutrum, condimentum nisi nec, tempor nulla. Nullam ullamcorper nibh vel ligula pellentesque blandit. Curabitur suscipit placerat gravida. Nam id consectetur urna. Morbi viverra lorem vel nulla varius, at placerat nulla posuere. Donec in bibendum ex, ut tincidunt sapien.
* In ut quam tellus. Nunc ac sem vestibulum, sollicitudin ligula id, facilisis tortor. Pellentesque quam ex, ornare id diam ac, sagittis volutpat quam. Etiam faucibus, eros non tristique venenatis, odio risus interdum dolor, porttitor volutpat nulla erat in ex. Nullam venenatis leo justo. Integer ullamcorper auctor orci, non pulvinar nisi volutpat molestie. Phasellus tristique, leo id convallis cursus, diam dolor pretium dui, nec suscipit nisl ligula sit amet magna. Interdum et malesuada fames ac ante ipsum primis in faucibus. Maecenas sit amet nibh vel mauris sagittis semper quis efficitur mauris. Aenean iaculis, lectus sit amet placerat scelerisque, dui libero maximus orci, at convallis justo urna eget quam. Aenean luctus felis tristique enim suscipit, non porta eros gravida. Aliquam erat volutpat. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Vestibulum eleifend iaculis fringilla.
* Fusce eleifend mauris vel urna pretium, non suscipit massa accumsan. Phasellus pretium ultricies accumsan. Suspendisse accumsan bibendum erat, sit amet eleifend ipsum maximus in. Curabitur eleifend, ipsum ut sollicitudin varius, felis lectus elementum nibh, eget bibendum mi ex eget lacus. Nam erat sapien, sodales nec bibendum cursus, accumsan eget magna. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Ut quis aliquam est. Nunc et erat lacus. Proin consequat eleifend fringilla. Phasellus eget nulla orci. Proin fermentum mi eu nisi posuere blandit. Interdum et malesuada fames ac ante ipsum primis in faucibus.
* In eget facilisis nisi. Donec purus dolor, fringilla nec efficitur nec, elementum id metus. Aenean a scelerisque augue. Nullam tempor porttitor eros tempus imperdiet. Sed euismod felis sed neque venenatis, quis lobortis odio sagittis. Phasellus tristique auctor massa eget vulputate. Etiam nulla tellus, congue ut euismod a, posuere quis sapien. Morbi pellentesque orci sit amet commodo interdum. Nunc eleifend, velit consectetur tempor dictum, sem turpis rutrum erat, eget vehicula justo felis id erat. Morbi condimentum pulvinar sem, sit amet molestie enim suscipit quis. Aliquam convallis ante lectus, at lacinia ex gravida semper. Morbi vel metus aliquam, vulputate neque ac, sodales arcu. Ut quis bibendum sem.
* Vivamus elementum augue eget ligula laoreet, et feugiat turpis efficitur. Mauris eleifend lacus id felis condimentum, vitae volutpat tellus volutpat. Vestibulum justo diam, bibendum sit amet neque quis, commodo luctus nunc. Donec luctus, libero vel viverra venenatis, nisl libero cursus massa, dapibus tempus libero erat facilisis lacus. Donec vitae finibus metus, porta tempor velit. Proin velit odio, facilisis sit amet elementum at, laoreet at tellus. Phasellus convallis, neque sit amet imperdiet hendrerit, nisi quam laoreet odio, molestie venenatis nisl ligula in erat.

View File

@ -56,7 +56,7 @@ impl View {
self.eof = true;
break;
}
self.buffer.push(line.trim().to_owned());
self.buffer.push(line.trim_matches('\n').to_owned());
}
Ok(())

View File

@ -0,0 +1,15 @@
[package]
name = "md2txt"
version = "0.1.0"
edition = "2021"
[dependencies]
libterm.workspace = true
clap.workspace = true
thiserror.workspace = true
pulldown-cmark = { version = "0.13.0", default-features = false }
[lints]
workspace = true

View File

@ -0,0 +1,112 @@
use pulldown_cmark::{CowStr, Event, Parser, Tag};
use crate::setting::{InlineStyle, ListStyle, TerminalTypesetter, Typesetter};
#[derive(Debug)]
pub enum Child<'a> {
Node(Node<'a>),
Text(CowStr<'a>),
}
#[derive(Debug)]
pub struct Node<'a> {
tag: Tag<'a>,
content: Content<'a>,
}
#[derive(Debug, Default)]
pub struct Content<'a> {
children: Vec<Child<'a>>,
}
impl<'a> Node<'a> {
pub fn new(tag: Tag<'a>) -> Self {
Self {
tag,
content: Content::default(),
}
}
fn print(&self, printer: &mut TerminalTypesetter) {
match self.tag {
Tag::List(Some(n)) => {
printer.push_list(ListStyle::Numbered(n as usize));
self.content.print(printer);
printer.pop_list();
}
Tag::List(None) => {
printer.push_list(ListStyle::Bullet('•'));
self.content.print(printer);
printer.pop_list();
}
Tag::Paragraph => {
printer.push_paragraph();
self.content.print(printer);
printer.pop_paragraph();
}
Tag::BlockQuote(_) => {
printer.push_blockquote();
self.content.print(printer);
printer.pop_blockquote();
}
Tag::Item => {
printer.push_list_item();
self.content.print(printer);
printer.pop_list_item();
}
Tag::CodeBlock(_) => {
printer.push_code_block();
self.content.print(printer);
printer.pop_code_block();
}
Tag::Strong => {
printer.push_style(InlineStyle::Bold);
self.content.print(printer);
printer.pop_style();
}
Tag::Emphasis => {
printer.push_style(InlineStyle::Italic);
self.content.print(printer);
printer.pop_style();
}
_ => {
self.content.print(printer);
}
}
}
}
impl<'a> Content<'a> {
pub fn print(&self, printer: &mut TerminalTypesetter) {
for (i, child) in self.children.iter().enumerate() {
match child {
Child::Node(node) => node.print(printer),
Child::Text(text) => printer.append_text(i != 0, text),
}
}
}
pub fn parse(&mut self, parser: &mut Parser<'a>) {
loop {
let Some(event) = parser.next() else {
break;
};
match event {
// End
Event::End(_) => {
break;
}
// Child
Event::Start(tag) => {
let mut child = Node::new(tag);
child.content.parse(parser);
self.children.push(Child::Node(child));
}
Event::Text(text) => {
self.children.push(Child::Text(text));
}
_ => (),
}
}
}
}

View File

@ -0,0 +1,66 @@
#![feature(iter_intersperse, let_chains, vec_pop_if)]
use std::{
fs,
io::{self, stdout, IsTerminal},
path::PathBuf,
process::ExitCode,
};
use clap::Parser as CParser;
use document::Content;
use libterm::RawTerminal;
use pulldown_cmark::Parser;
use setting::{TerminalTypesetter, Typesetter};
pub mod document;
pub mod setting;
pub mod util;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("{0}")]
Io(#[from] io::Error),
#[error("Terminal is too small")]
TerminalTooSmall,
}
#[derive(Debug, CParser)]
pub struct Args {
filename: PathBuf,
}
fn run(args: &Args) -> Result<(), Error> {
let stdout = stdout();
let page_width = if stdout.is_terminal() {
stdout.raw_size().map_or(60, |s| s.0)
} else {
60
};
let right_margin = 8;
if 10 + right_margin > page_width {
return Err(Error::TerminalTooSmall);
}
let source = fs::read_to_string(&args.filename)?;
let mut parser = Parser::new(&source);
let mut root = Content::default();
root.parse(&mut parser);
let mut ts = TerminalTypesetter::default();
ts.set_page_width(page_width - right_margin);
root.print(&mut ts);
ts.finish_line();
Ok(())
}
fn main() -> ExitCode {
let args = Args::parse();
match run(&args) {
Ok(()) => ExitCode::SUCCESS,
Err(error) => {
eprintln!("{}: {error}", args.filename.display());
ExitCode::FAILURE
}
}
}

View File

@ -0,0 +1,489 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InlineStyle {
Bold,
Italic,
}
#[derive(Debug, Clone)]
pub enum ListStyle {
Numbered(usize),
Bullet(char),
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub struct InlineStyles {
pub bold: bool,
pub italic: bool,
}
pub trait Typesetter {
fn push_list(&mut self, style: ListStyle);
fn pop_list(&mut self);
fn push_list_item(&mut self);
fn pop_list_item(&mut self);
fn push_paragraph(&mut self);
fn pop_paragraph(&mut self);
fn push_code_block(&mut self);
fn pop_code_block(&mut self);
fn push_blockquote(&mut self);
fn pop_blockquote(&mut self);
fn push_style(&mut self, style: InlineStyle);
fn pop_style(&mut self);
fn append_text(&mut self, space: bool, text: &str);
fn finish_line(&mut self);
}
#[derive(Debug, Default, Clone)]
struct Token {
text: String,
styles: InlineStyles,
}
#[derive(Debug, Default, Clone)]
struct Word {
tokens: Vec<Token>,
}
#[derive(Debug, Default, Clone)]
struct LineBuilder {
prefix: String,
blockquote: usize,
words: Vec<Word>,
word: Word,
indent: usize,
full: bool,
}
impl Token {
fn width(&self) -> usize {
self.text.chars().count()
}
}
impl Word {
fn width(&self) -> usize {
self.tokens.iter().map(Token::width).sum()
}
fn is_empty(&self) -> bool {
self.tokens.is_empty()
}
fn push(&mut self, ch: char, styles: InlineStyles) {
if let Some(last) = self.tokens.last_mut() {
if last.styles == styles {
last.text.push(ch);
return;
}
}
let mut token = Token {
text: String::new(),
styles,
};
token.text.push(ch);
self.tokens.push(token);
}
}
impl LineBuilder {
fn width(&self, blockquote_width: usize) -> usize {
self.prefix.chars().count()
+ self.indent * 2
+ self.words.iter().map(Word::width).sum::<usize>()
+ self.words.len()
+ self.word.width()
+ self.blockquote * blockquote_width
+ 2
}
fn is_empty(&self) -> bool {
self.words.is_empty()
}
fn finish_word(&mut self) {
let word = std::mem::take(&mut self.word);
if !word.is_empty() {
self.words.push(word);
}
}
fn push(&mut self, ch: char, styles: InlineStyles) {
if ch == ' ' {
// Word break
self.finish_word();
return;
}
self.word.push(ch, styles);
}
}
pub struct TerminalPrinter {
style: InlineStyles,
cursor: usize,
margin_left: usize,
prefix_style: Option<InlineStyles>,
verbatim_style: Option<InlineStyles>,
blockquote_prefix: &'static str,
verbatim_prefix: &'static str,
}
impl Default for TerminalPrinter {
fn default() -> Self {
Self {
style: InlineStyles::default(),
margin_left: 0,
cursor: 0,
prefix_style: Some(InlineStyles {
bold: true,
italic: false,
}),
verbatim_style: Some(InlineStyles {
bold: true,
italic: true,
}),
blockquote_prefix: "",
verbatim_prefix: "",
}
}
}
impl TerminalPrinter {
fn reset_style(&mut self) {
print!("\x1B[0m");
self.style = InlineStyles::default();
}
fn set_style(&mut self, styles: InlineStyles) {
// Update styles
if styles.bold != self.style.bold {
if styles.bold {
print!("\x1B[1m");
} else {
print!("\x1B[22m");
}
}
if styles.italic != self.style.italic {
if styles.italic {
print!("\x1B[3m");
} else {
print!("\x1B[23m");
}
}
self.style = styles;
}
fn print_whitespace(&mut self, count: usize, styles: InlineStyles) {
self.set_style(styles);
for _ in 0..count {
print!(" ");
}
self.cursor += count;
}
fn print_token(&mut self, token: &Token) {
self.set_style(token.styles);
print!("{}", token.text);
self.cursor += token.text.chars().count();
}
fn print_word(&mut self, word: &Word) {
word.tokens.iter().for_each(|token| self.print_token(token));
}
fn print_prefix(&mut self, prefix: &str) {
if let Some(prefix_style) = self.prefix_style {
self.set_style(prefix_style);
print!("{prefix}");
self.reset_style();
} else {
print!("{prefix}");
}
self.cursor += prefix.chars().count();
}
fn print_blockquote(&mut self, prefix: &str, depth: usize) {
for _ in 0..depth {
print!("{prefix}");
self.cursor += prefix.chars().count();
}
}
fn print_line(&mut self, line: &LineBuilder, page_width: usize) {
self.cursor = 0;
self.print_blockquote(self.blockquote_prefix, line.blockquote);
self.print_whitespace(self.margin_left + line.indent * 2, Default::default());
self.print_prefix(&line.prefix);
let page_width = page_width - self.cursor;
if line.full {
let line_width = line.words.iter().map(Word::width).sum::<usize>();
assert!(
line_width < page_width,
"Line width: {line_width}, Page width: {page_width}"
);
let need_spaces = page_width - line_width;
let word_breaks = line.words.len() - 1;
let even_spaces = (need_spaces / word_breaks).max(1);
let odd_spaces = need_spaces % word_breaks;
for (i, word) in line.words.iter().enumerate() {
if i != 0 {
let mut spaces = even_spaces;
if i - 1 < odd_spaces {
spaces += 1;
}
self.print_whitespace(spaces, word.tokens[0].styles);
}
self.print_word(word);
}
self.reset_style();
} else {
for (i, word) in line.words.iter().enumerate() {
if i != 0 {
self.print_whitespace(1, word.tokens[0].styles);
}
self.print_word(word);
}
}
println!();
}
fn begin_verbatim(&mut self) {
print!("{}", self.verbatim_prefix);
if let Some(style) = self.verbatim_style {
self.set_style(style);
}
}
fn end_verbatim(&mut self) {
if self.verbatim_style.is_some() {
self.reset_style();
}
}
fn print_verbatim(&mut self, ch: char) {
print!("{ch}");
}
}
pub struct TerminalTypesetter {
style_stack: Vec<InlineStyle>,
list_stack: Vec<ListStyle>,
line: LineBuilder,
indent: usize,
page_width: usize,
printer: TerminalPrinter,
last_empty: bool,
verbatim: bool,
verbatim_empty: bool,
blockquote: usize,
}
impl Default for TerminalTypesetter {
fn default() -> Self {
Self {
style_stack: vec![],
list_stack: vec![],
line: LineBuilder::default(),
indent: 0,
page_width: 80,
printer: TerminalPrinter::default(),
last_empty: true,
verbatim: false,
verbatim_empty: true,
blockquote: 0,
}
}
}
impl TerminalTypesetter {
pub fn set_page_width(&mut self, width: usize) {
self.page_width = width.max(40);
}
fn styles(&self) -> InlineStyles {
let mut styles = InlineStyles::default();
for style in self.style_stack.iter() {
match style {
InlineStyle::Bold => styles.bold = true,
InlineStyle::Italic => styles.italic = true,
}
}
styles
}
fn append_char(&mut self, ch: char) {
if self.verbatim {
if self.verbatim_empty {
self.printer.begin_verbatim();
for _ in 0..self.indent {
print!(" ");
}
}
if ch == '\n' {
self.printer.end_verbatim();
self.verbatim_empty = true;
} else {
self.verbatim_empty = false;
}
self.printer.print_verbatim(ch);
return;
}
if self
.line
.width(self.printer.blockquote_prefix.chars().count())
+ 1
>= self.page_width
{
// Finish the line, next word goes to the next line
self.line.full = true;
// Ugliness reduction by refusing to take trailing short words
let popped = self.line.words.pop_if(|word| word.width() < 3);
let mut word = std::mem::take(&mut self.line.word);
word.push(ch, self.styles());
self.finish_line();
self.line.words.extend(popped);
self.line.word = word;
return;
}
self.line.push(ch, self.styles());
}
fn increment_list_index(&mut self) {
if let Some(last) = self.list_stack.last_mut() {
match last {
ListStyle::Numbered(index) => *index += 1,
_ => (),
}
}
}
fn set_indent(&mut self) {
let indent = self.indent;
self.line.indent = indent;
self.line.blockquote = self.blockquote;
}
fn hard_break(&mut self, set_indent: bool) {
let empty = self.last_empty;
self.finish_line();
if !empty && self.blockquote == 0 {
println!();
self.last_empty = true;
}
if set_indent {
self.set_indent();
}
}
}
impl Typesetter for TerminalTypesetter {
fn push_style(&mut self, style: InlineStyle) {
self.style_stack.push(style);
}
fn pop_style(&mut self) {
self.style_stack.pop();
}
fn append_text(&mut self, space: bool, text: &str) {
let space = space && !self.verbatim;
if space {
self.append_char(' ');
}
for ch in text.chars() {
self.append_char(ch);
}
}
fn finish_line(&mut self) {
self.line.finish_word();
if !self.line.is_empty() {
let line = std::mem::take(&mut self.line);
self.last_empty = false;
self.printer.print_line(&line, self.page_width);
self.set_indent();
} else {
self.last_empty = true;
}
}
fn push_list(&mut self, style: ListStyle) {
self.hard_break(false);
self.list_stack.push(style);
}
fn pop_list(&mut self) {
self.list_stack.pop();
self.hard_break(false);
}
fn push_list_item(&mut self) {
self.hard_break(false);
self.indent += 1;
if let Some(last) = self.list_stack.last() {
match last {
&ListStyle::Bullet(p) => {
self.line.prefix.push(p);
}
&ListStyle::Numbered(n) => {
self.line.prefix.push_str(&format!("{n}."));
}
}
self.line.prefix.push(' ');
}
}
fn pop_list_item(&mut self) {
self.increment_list_index();
self.indent -= 1;
self.hard_break(true);
}
fn push_paragraph(&mut self) {
self.hard_break(false);
}
fn pop_paragraph(&mut self) {
self.hard_break(true);
}
fn push_code_block(&mut self) {
self.hard_break(false);
self.verbatim_empty = true;
self.verbatim = true;
}
fn pop_code_block(&mut self) {
println!();
self.verbatim = false;
}
fn push_blockquote(&mut self) {
self.blockquote += 1;
self.hard_break(true);
}
fn pop_blockquote(&mut self) {
self.blockquote -= 1;
if self.blockquote == 0 {
println!();
}
self.line.prefix.clear();
self.hard_break(true);
}
}

View File

@ -0,0 +1,9 @@
pub trait StrExt {
fn display_width(&self) -> usize;
}
impl StrExt for str {
fn display_width(&self) -> usize {
todo!()
}
}

View File

@ -51,6 +51,7 @@ const PROGRAMS: &[(&str, &str)] = &[
("ps", "bin/ps"),
("top", "bin/top"),
("tst", "bin/tst"),
("md2txt", "bin/md2txt"),
// netutils
("netconf", "sbin/netconf"),
("dhcp-client", "sbin/dhcp-client"),