diff options
| -rw-r--r-- | src/xml.rs | 138 |
1 files changed, 135 insertions, 3 deletions
@@ -3,6 +3,59 @@ use std::{ fmt::{Display, Write}, }; +fn escape_xml_text(value: &str) -> String { escape_xml_value(value, false) } + +fn escape_xml_attribute(value: &str) -> String { escape_xml_value(value, true) } + +const fn is_valid_xml_char(character: char) -> bool { + matches!( + character as u32, + 0x09 + | 0x0A + | 0x0D + | 0x20..=0xD7FF + | 0xE000..=0xFFFD + | 0x10000..=0x0010_FFFF + ) +} + +fn sanitize_xml_chars(value: &str) -> String { + value.chars().filter(|character| is_valid_xml_char(*character)).collect() +} + +fn escape_xml_value(value: &str, escape_attribute_whitespace: bool) -> String { + let sanitized = sanitize_xml_chars(value); + let mut escaped = String::with_capacity(sanitized.len()); + + for character in sanitized.chars() { + match character { + '&' => escaped.push_str("&"), + '<' => escaped.push_str("<"), + '>' => escaped.push_str(">"), + '"' => escaped.push_str("""), + '\'' => escaped.push_str("'"), + '\n' if escape_attribute_whitespace => escaped.push_str("
"), + '\r' if escape_attribute_whitespace => escaped.push_str("
"), + '\t' if escape_attribute_whitespace => escaped.push_str("	"), + _ => escaped.push(character), + } + } + + escaped +} + +fn escape_cdata_text(value: &str) -> String { + sanitize_xml_chars(value).replace("]]>", "]]]]><![CDATA[>") +} + +fn render_field(key: &str, value: &str) -> String { + if key == "description" { + format!("<{key}><![CDATA[{}]]></{key}>", escape_cdata_text(value)) + } else { + format!("<{key}>{}</{key}>", escape_xml_text(value)) + } +} + pub struct Item { fields: HashMap<String, String>, } @@ -21,7 +74,7 @@ impl Display for Item { f, "<item>{}</item>", self.fields.iter().fold(String::new(), |mut acc, (k, v)| { - let _ = write!(acc, "<{k}>{v}</{k}>"); + let _ = write!(acc, "{}", render_field(k, v)); acc }) @@ -70,12 +123,91 @@ impl Display for Writer { version=\"2.0\"><channel>{}<atom:link href=\"{}\" rel=\"self\" \ type=\"application/rss+xml\" />{}</channel></rss>", self.fields.iter().fold(String::new(), |mut acc, (k, v)| { - let _ = write!(acc, "<{k}>{v}</{k}>"); + let _ = write!(acc, "{}", render_field(k, v)); acc }), - self.link, + escape_xml_attribute(&self.link), self.content ) } } + +#[cfg(test)] +mod tests { + use super::{Item, Writer}; + + #[test] + fn multiline_description_is_wrapped_in_cdata() { + let mut item = Item::builder(); + + item.add_field("description", "first line\nsecond line"); + + let xml = item.to_string(); + + assert!(xml.contains( + "<description><![CDATA[first line\nsecond line]]></description>" + )); + } + + #[test] + fn cdata_end_marker_is_escaped_safely() { + let mut item = Item::builder(); + + item.add_field("description", "before ]]> after"); + + let xml = item.to_string(); + + assert!(xml.contains( + "<description><![CDATA[before ]]]]><![CDATA[> after]]></description>" + )); + } + + #[test] + fn non_description_fields_are_xml_escaped() { + let mut writer = Writer::builder(); + + writer.add_field("title", "Fish & Chips <3"); + + let xml = writer.to_string(); + + assert!(xml.contains("<title>Fish & Chips <3</title>")); + } + + #[test] + fn text_fields_escape_quotes_and_apostrophes() { + let mut writer = Writer::builder(); + + writer.add_field("title", "\"quoted\" and 'single'"); + + let xml = writer.to_string(); + + assert!( + xml.contains("<title>"quoted" and 'single'</title>") + ); + } + + #[test] + fn link_attribute_is_xml_escaped() { + let mut writer = Writer::builder(); + + writer.add_link("https://example.com/?a=1&b=\"two\"'three'<four>"); + + let xml = writer.to_string(); + + assert!(xml.contains( + "<atom:link href=\"https://example.com/?a=1&b="two"'three'<four>\" rel=\"self\" type=\"application/rss+xml\" />" + )); + } + + #[test] + fn invalid_xml_chars_are_filtered() { + let mut writer = Writer::builder(); + + writer.add_field("title", "ok\u{0001}\u{0002}\u{0000}text"); + + let xml = writer.to_string(); + + assert!(xml.contains("<title>oktext</title>")); + } +} |