aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFuwn <[email protected]>2025-09-11 05:57:01 +0000
committerFuwn <[email protected]>2025-09-11 05:57:01 +0000
commit28581ff44e9b6d98879807ecc78c8cd709af371a (patch)
tree2ba228efc41ef90623106bb9c1ed75dc3b8f251c
parentfix(ast): Gracefully handle malformed link lines (diff)
downloadgerm-28581ff44e9b6d98879807ecc78c8cd709af371a.tar.xz
germ-28581ff44e9b6d98879807ecc78c8cd709af371a.zip
fix(ast): Improve UTF-8 handling
-rw-r--r--src/ast/container.rs12
-rw-r--r--tests/ast.rs41
2 files changed, 48 insertions, 5 deletions
diff --git a/src/ast/container.rs b/src/ast/container.rs
index bf4166c..46c548b 100644
--- a/src/ast/container.rs
+++ b/src/ast/container.rs
@@ -224,10 +224,14 @@ impl Ast {
nodes.push(Node::Heading {
level,
- // Here, we are `get`ing the `&str` starting at the `level`-th
- // index, then trimming the start. These operations
- // effectively off the line identifier.
- text: line.get(level..).unwrap_or("").trim_start().to_string(),
+ // Here, the text after the heading markers is safely extracted.
+ // `chars().skip()` is used to safely handle UTF-8 boundaries.
+ text: line
+ .chars()
+ .skip(level)
+ .collect::<String>()
+ .trim_start()
+ .to_string(),
});
break;
diff --git a/tests/ast.rs b/tests/ast.rs
index 7a71f5c..5249931 100644
--- a/tests/ast.rs
+++ b/tests/ast.rs
@@ -95,7 +95,46 @@ mod test {
assert_eq!(to, "");
assert_eq!(text, &None);
} else {
- panic!("Expected Link node");
+ panic!("Expected link node");
+ }
+ }
+
+ #[test]
+ fn build_heading_with_unicode_and_edge_cases() {
+ // Unicode characters
+ let ast = Ast::from_string("# Hello, 世界!");
+
+ assert_eq!(ast.inner().len(), 1);
+
+ if let Node::Heading { level, text } = ast.inner().first().unwrap() {
+ assert_eq!(level, &1);
+ assert_eq!(text, "Hello, 世界!");
+ } else {
+ panic!("Expected heading node");
+ }
+
+ // Only hashes
+ let ast = Ast::from_string("###");
+
+ assert_eq!(ast.inner().len(), 1);
+
+ if let Node::Heading { level, text } = ast.inner().first().unwrap() {
+ assert_eq!(level, &3);
+ assert_eq!(text, "");
+ } else {
+ panic!("Expected heading node");
+ }
+
+ // Many hashes
+ let ast = Ast::from_string("########## Very Deep Heading");
+
+ assert_eq!(ast.inner().len(), 1);
+
+ if let Node::Heading { level, text } = ast.inner().first().unwrap() {
+ assert_eq!(level, &10);
+ assert_eq!(text, "Very Deep Heading");
+ } else {
+ panic!("Expected heading node");
}
}
}