aws_smithy_xml/
unescape.rs
1use crate::decode::XmlDecodeError;
7use std::borrow::Cow;
8
9pub(crate) fn unescape(s: &str) -> Result<Cow<'_, str>, XmlDecodeError> {
19 if !s.contains('&') {
21 return Ok(Cow::Borrowed(s));
22 }
23 let mut res = String::with_capacity(s.len());
25 let mut sections = s.split('&');
27 if let Some(prefix) = sections.next() {
29 res.push_str(prefix);
30 }
31 for section in sections {
32 match section.find(';') {
34 Some(idx) => {
35 let entity = §ion[..idx];
36 match entity {
37 "lt" => res.push('<'),
38 "gt" => res.push('>'),
39 "amp" => res.push('&'),
40 "quot" => res.push('"'),
41 "apos" => res.push('\''),
42 entity => {
43 let (entity, radix) = if let Some(entity) = entity.strip_prefix("#x") {
45 (entity, 16)
46 } else if let Some(entity) = entity.strip_prefix('#') {
47 (entity, 10)
49 } else {
50 return Err(XmlDecodeError::invalid_escape(entity));
51 };
52 let char_code = u32::from_str_radix(entity, radix).map_err(|_| {
53 XmlDecodeError::invalid_escape(format!(
54 "expected numeric escape in base {}; got: {}",
55 radix, &entity
56 ))
57 })?;
58 let chr = std::char::from_u32(char_code).ok_or_else(|| {
59 XmlDecodeError::invalid_escape(format!(
60 "invalid char code: {}",
61 char_code
62 ))
63 })?;
64 res.push(chr);
65 }
66 }
67 res.push_str(§ion[idx + 1..])
69 }
70 None => return Err(XmlDecodeError::invalid_escape("unterminated pattern")),
71 }
72 }
73 Ok(Cow::Owned(res))
74}
75
76#[cfg(test)]
77mod test {
78 use crate::unescape::unescape;
79 use std::borrow::Cow;
80
81 #[test]
82 fn basic_unescape() {
83 assert_eq!(
84 unescape("< > ' " &").unwrap(),
85 "< > ' \" &"
86 );
87 assert_eq!(
88 unescape("Since a > b, b is less than a").unwrap(),
89 "Since a > b, b is less than a"
90 );
91 }
92
93 #[test]
94 fn no_need_to_escape() {
95 assert_eq!(unescape("hello 🍕!").unwrap(), Cow::Borrowed("hello 🍕!"));
96 }
97
98 #[test]
99 fn complex_unescape() {
100 assert_eq!(
102 unescape("a<b>c"d'e&f;;").unwrap(),
103 "a<b>c\"d'e&f;;"
104 );
105 assert_eq!(unescape("&lt;").unwrap(), "<")
106 }
107
108 #[test]
109 fn newline_encoding() {
110 assert_eq!(unescape(" ").unwrap(), "\n");
111 assert_eq!(unescape("
").unwrap(), "\r");
112 }
113
114 #[test]
115 fn xml_eol_encoding() {
116 assert_eq!(unescape("
 
").unwrap(), "\n \n");
117 assert_eq!(
118 unescape("a
 b
 c
").unwrap(),
119 "a\r\n b\n c\r"
120 );
121 assert_eq!(
122 unescape("a
… b…").unwrap(),
123 "a\r\u{0085} b\u{0085}"
124 );
125 assert_eq!(
126 unescape("a

 b… c
").unwrap(),
127 "a\r\u{2028} b\u{0085} c\u{2028}"
128 );
129 }
130
131 #[test]
132 fn invalid_escapes() {
133 unescape("<e;").expect_err("lte does not make a ≤");
134 unescape("<").expect_err("unterminated escape sequence");
135 unescape("&#Q1234;").expect_err("Q does not began a numeric sequence");
136 unescape(".14;").expect_err("decimal escape");
137 unescape("&#xZZ").expect_err("Z is not hex");
138 unescape("here is a & but without an escape sequence...").expect_err("naked &");
139 }
140
141 use proptest::prelude::*;
142 proptest! {
143 #[test]
144 fn no_panics(s: String) {
145 let unescaped = unescape(&s);
146 if s.contains('&') {
148 assert!(
149 matches!(unescaped, Ok(Cow::Owned(_)) | Err(_))
150 );
151 }
152 }
153 }
154}