diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/ast.rs | 348 | ||||
| -rw-r--r-- | src/convert.rs | 68 | ||||
| -rw-r--r-- | src/convert/html.rs | 75 | ||||
| -rw-r--r-- | src/convert/markdown.rs | 77 | ||||
| -rw-r--r-- | src/lib.rs | 36 |
5 files changed, 604 insertions, 0 deletions
diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..69ad123 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,348 @@ +// This file is part of Germ <https://github.com/gemrest/germ>. +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 3. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// SPDX-License-Identifier: GPL-3.0-only + +//! Build AST trees from Gemtext + +/// A Gemtext AST node. +/// +/// Each Gemtext line is a `Node`, and some lines can even be grouped together, +/// such as the `Node::List` `Node`! +/// +/// # Gemtext Resources +/// +/// - [Gemtext Documentation](https://gemini.circumlunar.space/docs/gemtext.gmi) +/// - [Gemtext Cheatsheet](https://gemini.circumlunar.space/docs/cheatsheet.gmi). +/// - [Gemini Specification](https://gemini.circumlunar.space/docs/specification.gmi). +#[derive(Debug)] +pub enum Node { + /// A text line + /// + /// # Example + /// + /// ```gemini + /// This is a text line + /// ``` + Text(String), + /// A link line + /// + /// # Examples + /// + /// ```gemini + /// => /this-is-the-to This is the text + /// + /// => gemini://to.somewhere.link + /// ``` + Link { + /// The location that a link line is pointing to + /// + /// # Examples + /// + /// ```gemini + /// => /this-is-the-to This is the text + /// + /// => gemini://to.somewhere.link + /// ``` + to: String, + /// The text a link line *may* have + /// + /// # Examples + /// + /// ```gemini + /// => /this-is-the-to This line has text, unlike the next one. + /// + /// => gemini://to.somewhere.link + /// ``` + text: Option<String>, + }, + /// A heading line + /// + /// # Examples + /// + /// ```gemini + /// # This is a heading + /// + /// ## This is a sub-heading + /// + /// ### This is a sub-sub-heading + /// ``` + Heading { + /// The level of a heading + /// + /// # Examples + /// + /// ```gemini + /// # This is a level 1 heading + /// + /// ## This is a level 2 sub-heading + /// + /// ### This is a level 3 sub-sub-heading + /// ``` + level: usize, + /// The text of a heading + /// + /// # Examples + /// + /// ```gemini + /// # This is the headings text + /// + /// # This is also the headings text + /// ``` + text: String, + }, + /// A collection of sequential list item lines + /// + /// # Examples + /// + /// ```gemini + /// * These are + /// * sequential list + /// * items. + /// ``` + List(Vec<String>), + /// A blockquote line + /// + /// # Examples + /// + /// ```gemini + /// > This is a blockquote line + /// + /// > This is also a blockquote line + /// ``` + Blockquote(String), + /// A preformatted block + /// + /// # Examples + /// + /// Try to ignore the leading backslash in-front of the triple backticks, + /// they are there to not confuse the Markdown engine. + /// + /// ```gemini + /// \```This is the alt-text + /// This is the preformatted block + /// + /// This is the rest of the preformatted block + /// \``` + /// ``` + PreformattedText { + /// A preformatted blocks alt-text + /// + /// # Examples + /// + /// Try to ignore the leading backslash in-front of the triple backticks, + /// they are there to not confuse the Markdown engine. + /// + /// ```gemini + /// \```This is the alt-text + /// This is the preformatted block + /// + /// This is the rest of the preformatted block + /// \``` + /// ``` + alt_text: Option<String>, + /// A preformatted blocks content + /// + /// # Examples + /// + /// Try to ignore the leading backslash in-front of the triple backticks, + /// they are there to not confuse the Markdown engine. + /// + /// ```gemini + /// \```This is the alt-text + /// This is the preformatted blocks content + /// + /// This is the rest of the preformatted blocks content + /// \``` + /// ``` + text: String, + }, + /// A whitespace line, a line which contains nothing but whitespace. + Whitespace, +} + +/// Build an AST tree from Gemtext. +/// +/// # Example +/// +/// ```rust +/// germ::ast::build(r#"=> gemini://gem.rest/ GemRest"#); +/// ``` +#[must_use] +pub fn build(source: &str) -> Vec<Node> { + let mut ast = vec![]; + let mut in_preformatted = false; + let mut in_list = false; + let mut lines = source.lines(); + + // Iterate over all lines in the Gemtext `source` + while let Some(line) = lines.next() { + // Evaluate the Gemtext line and append its AST node to the `ast` tree + ast.append(&mut evaluate( + line, + &mut lines, + &mut in_preformatted, + &mut in_list, + )); + } + + ast +} + +fn evaluate( + line: &str, + lines: &mut std::str::Lines<'_>, + in_preformatted: &mut bool, + in_list: &mut bool, +) -> Vec<Node> { + let mut preformatted = String::new(); + let mut alt_text = String::new(); + let mut nodes = vec![]; + let mut line = line; + let mut list_items = vec![]; + + // Enter a not-so-infinite loop as sometimes, we may need to stay in an + // evaluation loop, e.g., multiline contexts: preformatted text, lists, etc. + loop { + // Match the first character of the Gemtext line to understand the line type + match line.get(0..1).unwrap_or("") { + "=" => { + // If the Gemtext line starts with an "=" ("=>"), it is a link line, so + // splitting it up should be easy enough. + let line = line.get(2..).unwrap(); + let mut split = line + .split_whitespace() + .map(String::from) + .collect::<Vec<String>>() + .into_iter(); + + nodes.push(Node::Link { + to: split.next().expect("no location in link"), + text: split.next(), + }); + + break; + } + "#" => { + // If the Gemtext line starts with an "#", it is a heading, so let's + // find out how deep it goes. + let level = match line.get(0..3) { + Some(root) => + if root.contains("###") { + 3 + } else if root.contains("##") { + 2 + } else if root.contains('#') { + 1 + } else { + 0 + }, + None => 0, + }; + + nodes.push(Node::Heading { + level, + // Here, we are `get`ing the `&str` starting at the `level`-th index, + // then trimming the start. These operations effectively off the line + // identifier. + text: line.get(level..).unwrap_or("").trim_start().to_string(), + }); + + break; + } + "*" => { + // If the Gemtext line starts with an asterisk, it is a list item, so + // let's enter a list context. + if !*in_list { + *in_list = true; + } + + list_items.push(line.get(1..).unwrap_or("").trim_start().to_string()); + + line = lines.next().unwrap(); + } + ">" => { + // If the Gemtext line starts with an ">", it is a blockquote, so let's + // just clip off the line identifier. + nodes.push(Node::Blockquote( + line.get(1..).unwrap_or("").trim_start().to_string(), + )); + + break; + } + "`" => { + // If the Gemtext line starts with a backtick, it is a list item, so + // let's enter a preformatted text context. + *in_preformatted = !*in_preformatted; + + if *in_preformatted { + alt_text = line.get(3..).unwrap_or("").to_string(); + line = lines.next().unwrap(); + } else { + nodes.push(Node::PreformattedText { + alt_text: if alt_text.is_empty() { + None + } else { + Some(alt_text) + }, + text: preformatted, + }); + + break; + } + } + "" if !*in_preformatted => { + // If the line has nothing on it, it is a whitespace line, as long as we + // aren't in a preformatted line context. + nodes.push(Node::Whitespace); + + break; + } + // This as a catchall, it does a number of things. + _ => + if *in_preformatted { + // If we are in a preformatted line context, add the line to the + // preformatted blocks content and increment the line. + preformatted.push_str(&format!("{}\n", line)); + + line = lines.next().unwrap(); + } else { + // If we are in a list item and hit a catchall, that must mean that we + // encountered a line which is not a list line, so let's stop adding + // items to the list context. + if *in_list { + *in_list = false; + + nodes.push(Node::Text(line.to_string())); + + break; + } + + nodes.push(Node::Text(line.to_string())); + + break; + }, + } + } + + if !list_items.is_empty() { + nodes.reverse(); + nodes.push(Node::List(list_items)); + nodes.reverse(); + } + + nodes +} diff --git a/src/convert.rs b/src/convert.rs new file mode 100644 index 0000000..6661d4f --- /dev/null +++ b/src/convert.rs @@ -0,0 +1,68 @@ +// This file is part of Germ <https://github.com/gemrest/germ>. +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 3. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// SPDX-License-Identifier: GPL-3.0-only + +//! Convert Gemtext into many types of markup. + +mod html; +mod markdown; + +/// Different targets to convert Gemtext to +pub enum Target { + /// Convert Gemtext to HTML + HTML, + /// Convert Gemtext to Markdown + Markdown, +} + +/// Convert AST'd Gemtext into an alternative markup format. +/// +/// # Example +/// +/// ```rust +/// use germ::convert; +/// +/// convert::convert_from_ast( +/// germ::ast::build(r#"=> gemini://gem.rest/ GemRest"#), +/// convert::Target::HTML, +/// ); +/// ``` +pub fn convert_from_ast( + source: Vec<crate::ast::Node>, + target: Target, +) -> String { + match target { + Target::Markdown => markdown::convert(source), + Target::HTML => html::convert(source), + } +} + +/// Convert raw Gemtext into an alternative markup format. +/// +/// # Example +/// +/// ```rust +/// use germ::convert; +/// +/// convert::convert_from_string( +/// r#"=> gemini://gem.rest/ GemRest"#, +/// convert::Target::HTML, +/// ); +/// ``` +pub fn convert_from_string(source: &str, target: Target) -> String { + convert_from_ast(crate::ast::build(source), target) +} diff --git a/src/convert/html.rs b/src/convert/html.rs new file mode 100644 index 0000000..18aec1e --- /dev/null +++ b/src/convert/html.rs @@ -0,0 +1,75 @@ +// This file is part of Germ <https://github.com/gemrest/germ>. +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 3. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// SPDX-License-Identifier: GPL-3.0-only + +use crate::ast::Node; + +pub fn convert(source: Vec<Node>) -> String { + let mut html = String::new(); + + // Since we have an AST tree of the Gemtext, it is very easy to convert from + // this AST tree to an alternative markup format. + for node in source { + match node { + Node::Text(text) => html.push_str(&format!("<p>{}</p>", text)), + Node::Link { + to, + text, + } => { + html.push_str(&format!( + "<a href=\"{}\">{}</a><br>", + to, + text.unwrap_or(to.clone()) + )); + } + Node::Heading { + level, + text, + } => { + html.push_str(&format!( + "<{}>{}</{0}>", + match level { + 1 => "h1", + 2 => "h2", + 3 => "h3", + _ => "p", + }, + text + )); + } + Node::List(items) => + html.push_str(&format!( + "<ul>{}</ul>", + items + .into_iter() + .map(|i| format!("<li>{}</li>", i)) + .collect::<Vec<String>>() + .join("\n") + )), + Node::Blockquote(text) => + html.push_str(&format!("<blockquote>{}</blockquote>", text)), + Node::PreformattedText { + text, .. + } => { + html.push_str(&format!("<pre>{}</pre>", text)); + } + _ => {} + } + } + + html +} diff --git a/src/convert/markdown.rs b/src/convert/markdown.rs new file mode 100644 index 0000000..e9855a2 --- /dev/null +++ b/src/convert/markdown.rs @@ -0,0 +1,77 @@ +// This file is part of Germ <https://github.com/gemrest/germ>. +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 3. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// SPDX-License-Identifier: GPL-3.0-only + +use crate::ast::Node; + +pub fn convert(source: Vec<Node>) -> String { + let mut markdown = String::new(); + + // Since we have an AST tree of the Gemtext, it is very easy to convert from + // this AST tree to an alternative markup format. + for node in source { + match node { + Node::Text(text) => markdown.push_str(&text), + Node::Link { + to, + text, + } => + markdown.push_str(&if let Some(text) = text { + format!("[{}]({})", text, to) + } else { + format!("<{}>", to) + }), + Node::Heading { + level, + text, + } => { + markdown.push_str(&format!( + "{} {}", + match level { + 1 => "#", + 2 => "##", + 3 => "###", + _ => "", + }, + text + )); + } + Node::List(items) => + markdown.push_str( + &items + .into_iter() + .map(|i| format!("- {}", i)) + .collect::<Vec<String>>() + .join("\n"), + ), + Node::Blockquote(text) => markdown.push_str(&format!("> {}", text)), + Node::PreformattedText { + alt_text, + text, + } => { + markdown.push_str(&format!( + "```{}\n{}\n```", + alt_text.unwrap_or("".to_string()), + text + )); + } + Node::Whitespace => markdown.push('\n'), + } + } + + markdown +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..8b47af8 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,36 @@ +// This file is part of Germ <https://github.com/gemrest/germ>. +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 3. +// +// This program is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +// Copyright (C) 2022-2022 Fuwn <[email protected]> +// SPDX-License-Identifier: GPL-3.0-only + +#![deny( + warnings, + nonstandard_style, + unused, + future_incompatible, + rust_2018_idioms, + unsafe_code, + clippy::all, + clippy::nursery, + clippy::pedantic +)] +#![recursion_limit = "128"] + +#[cfg(feature = "ast")] +pub mod ast; + +#[cfg(feature = "convert")] +pub mod convert; |