From 599fcce0314c87f1b16958bfb3166da660f054f3 Mon Sep 17 00:00:00 2001 From: Fuwn Date: Sat, 14 Feb 2026 06:20:54 -0800 Subject: fix(xml): Implement full XML 1.0 escaping and RSS-safe CDATA handling --- src/xml.rs | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 135 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/xml.rs b/src/xml.rs index d3b088e..9ca5b0c 100644 --- a/src/xml.rs +++ b/src/xml.rs @@ -3,6 +3,59 @@ use std::{ fmt::{Display, Write}, }; +fn escape_xml_text(value: &str) -> String { escape_xml_value(value, false) } + +fn escape_xml_attribute(value: &str) -> String { escape_xml_value(value, true) } + +const fn is_valid_xml_char(character: char) -> bool { + matches!( + character as u32, + 0x09 + | 0x0A + | 0x0D + | 0x20..=0xD7FF + | 0xE000..=0xFFFD + | 0x10000..=0x0010_FFFF + ) +} + +fn sanitize_xml_chars(value: &str) -> String { + value.chars().filter(|character| is_valid_xml_char(*character)).collect() +} + +fn escape_xml_value(value: &str, escape_attribute_whitespace: bool) -> String { + let sanitized = sanitize_xml_chars(value); + let mut escaped = String::with_capacity(sanitized.len()); + + for character in sanitized.chars() { + match character { + '&' => escaped.push_str("&"), + '<' => escaped.push_str("<"), + '>' => escaped.push_str(">"), + '"' => escaped.push_str("""), + '\'' => escaped.push_str("'"), + '\n' if escape_attribute_whitespace => escaped.push_str(" "), + '\r' if escape_attribute_whitespace => escaped.push_str(" "), + '\t' if escape_attribute_whitespace => escaped.push_str(" "), + _ => escaped.push(character), + } + } + + escaped +} + +fn escape_cdata_text(value: &str) -> String { + sanitize_xml_chars(value).replace("]]>", "]]]]>") +} + +fn render_field(key: &str, value: &str) -> String { + if key == "description" { + format!("<{key}>", escape_cdata_text(value)) + } else { + format!("<{key}>{}", escape_xml_text(value)) + } +} + pub struct Item { fields: HashMap, } @@ -21,7 +74,7 @@ impl Display for Item { f, "{}", self.fields.iter().fold(String::new(), |mut acc, (k, v)| { - let _ = write!(acc, "<{k}>{v}"); + let _ = write!(acc, "{}", render_field(k, v)); acc }) @@ -70,12 +123,91 @@ impl Display for Writer { version=\"2.0\">{}{}", self.fields.iter().fold(String::new(), |mut acc, (k, v)| { - let _ = write!(acc, "<{k}>{v}"); + let _ = write!(acc, "{}", render_field(k, v)); acc }), - self.link, + escape_xml_attribute(&self.link), self.content ) } } + +#[cfg(test)] +mod tests { + use super::{Item, Writer}; + + #[test] + fn multiline_description_is_wrapped_in_cdata() { + let mut item = Item::builder(); + + item.add_field("description", "first line\nsecond line"); + + let xml = item.to_string(); + + assert!(xml.contains( + "" + )); + } + + #[test] + fn cdata_end_marker_is_escaped_safely() { + let mut item = Item::builder(); + + item.add_field("description", "before ]]> after"); + + let xml = item.to_string(); + + assert!(xml.contains( + " after]]>" + )); + } + + #[test] + fn non_description_fields_are_xml_escaped() { + let mut writer = Writer::builder(); + + writer.add_field("title", "Fish & Chips <3"); + + let xml = writer.to_string(); + + assert!(xml.contains("Fish & Chips <3")); + } + + #[test] + fn text_fields_escape_quotes_and_apostrophes() { + let mut writer = Writer::builder(); + + writer.add_field("title", "\"quoted\" and 'single'"); + + let xml = writer.to_string(); + + assert!( + xml.contains(""quoted" and 'single'") + ); + } + + #[test] + fn link_attribute_is_xml_escaped() { + let mut writer = Writer::builder(); + + writer.add_link("https://example.com/?a=1&b=\"two\"'three'"); + + let xml = writer.to_string(); + + assert!(xml.contains( + "" + )); + } + + #[test] + fn invalid_xml_chars_are_filtered() { + let mut writer = Writer::builder(); + + writer.add_field("title", "ok\u{0001}\u{0002}\u{0000}text"); + + let xml = writer.to_string(); + + assert!(xml.contains("oktext")); + } +} -- cgit v1.2.3