aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/xml.rs138
1 files changed, 135 insertions, 3 deletions
diff --git a/src/xml.rs b/src/xml.rs
index d3b088e..9ca5b0c 100644
--- a/src/xml.rs
+++ b/src/xml.rs
@@ -3,6 +3,59 @@ use std::{
fmt::{Display, Write},
};
+fn escape_xml_text(value: &str) -> String { escape_xml_value(value, false) }
+
+fn escape_xml_attribute(value: &str) -> String { escape_xml_value(value, true) }
+
+const fn is_valid_xml_char(character: char) -> bool {
+ matches!(
+ character as u32,
+ 0x09
+ | 0x0A
+ | 0x0D
+ | 0x20..=0xD7FF
+ | 0xE000..=0xFFFD
+ | 0x10000..=0x0010_FFFF
+ )
+}
+
+fn sanitize_xml_chars(value: &str) -> String {
+ value.chars().filter(|character| is_valid_xml_char(*character)).collect()
+}
+
+fn escape_xml_value(value: &str, escape_attribute_whitespace: bool) -> String {
+ let sanitized = sanitize_xml_chars(value);
+ let mut escaped = String::with_capacity(sanitized.len());
+
+ for character in sanitized.chars() {
+ match character {
+ '&' => escaped.push_str("&"),
+ '<' => escaped.push_str("&lt;"),
+ '>' => escaped.push_str("&gt;"),
+ '"' => escaped.push_str("&quot;"),
+ '\'' => escaped.push_str("&apos;"),
+ '\n' if escape_attribute_whitespace => escaped.push_str("&#xA;"),
+ '\r' if escape_attribute_whitespace => escaped.push_str("&#xD;"),
+ '\t' if escape_attribute_whitespace => escaped.push_str("&#x9;"),
+ _ => escaped.push(character),
+ }
+ }
+
+ escaped
+}
+
+fn escape_cdata_text(value: &str) -> String {
+ sanitize_xml_chars(value).replace("]]>", "]]]]><![CDATA[>")
+}
+
+fn render_field(key: &str, value: &str) -> String {
+ if key == "description" {
+ format!("<{key}><![CDATA[{}]]></{key}>", escape_cdata_text(value))
+ } else {
+ format!("<{key}>{}</{key}>", escape_xml_text(value))
+ }
+}
+
pub struct Item {
fields: HashMap<String, String>,
}
@@ -21,7 +74,7 @@ impl Display for Item {
f,
"<item>{}</item>",
self.fields.iter().fold(String::new(), |mut acc, (k, v)| {
- let _ = write!(acc, "<{k}>{v}</{k}>");
+ let _ = write!(acc, "{}", render_field(k, v));
acc
})
@@ -70,12 +123,91 @@ impl Display for Writer {
version=\"2.0\"><channel>{}<atom:link href=\"{}\" rel=\"self\" \
type=\"application/rss+xml\" />{}</channel></rss>",
self.fields.iter().fold(String::new(), |mut acc, (k, v)| {
- let _ = write!(acc, "<{k}>{v}</{k}>");
+ let _ = write!(acc, "{}", render_field(k, v));
acc
}),
- self.link,
+ escape_xml_attribute(&self.link),
self.content
)
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::{Item, Writer};
+
+ #[test]
+ fn multiline_description_is_wrapped_in_cdata() {
+ let mut item = Item::builder();
+
+ item.add_field("description", "first line\nsecond line");
+
+ let xml = item.to_string();
+
+ assert!(xml.contains(
+ "<description><![CDATA[first line\nsecond line]]></description>"
+ ));
+ }
+
+ #[test]
+ fn cdata_end_marker_is_escaped_safely() {
+ let mut item = Item::builder();
+
+ item.add_field("description", "before ]]> after");
+
+ let xml = item.to_string();
+
+ assert!(xml.contains(
+ "<description><![CDATA[before ]]]]><![CDATA[> after]]></description>"
+ ));
+ }
+
+ #[test]
+ fn non_description_fields_are_xml_escaped() {
+ let mut writer = Writer::builder();
+
+ writer.add_field("title", "Fish & Chips <3");
+
+ let xml = writer.to_string();
+
+ assert!(xml.contains("<title>Fish &amp; Chips &lt;3</title>"));
+ }
+
+ #[test]
+ fn text_fields_escape_quotes_and_apostrophes() {
+ let mut writer = Writer::builder();
+
+ writer.add_field("title", "\"quoted\" and 'single'");
+
+ let xml = writer.to_string();
+
+ assert!(
+ xml.contains("<title>&quot;quoted&quot; and &apos;single&apos;</title>")
+ );
+ }
+
+ #[test]
+ fn link_attribute_is_xml_escaped() {
+ let mut writer = Writer::builder();
+
+ writer.add_link("https://example.com/?a=1&b=\"two\"'three'<four>");
+
+ let xml = writer.to_string();
+
+ assert!(xml.contains(
+ "<atom:link href=\"https://example.com/?a=1&amp;b=&quot;two&quot;&apos;three&apos;&lt;four&gt;\" rel=\"self\" type=\"application/rss+xml\" />"
+ ));
+ }
+
+ #[test]
+ fn invalid_xml_chars_are_filtered() {
+ let mut writer = Writer::builder();
+
+ writer.add_field("title", "ok\u{0001}\u{0002}\u{0000}text");
+
+ let xml = writer.to_string();
+
+ assert!(xml.contains("<title>oktext</title>"));
+ }
+}