1use crate::unescape::unescape;
7use std::borrow::Cow;
8use std::error::Error;
9use std::fmt::{Display, Formatter};
10use xmlparser::{ElementEnd, Token, Tokenizer};
11
12pub type Depth = usize;
13
14#[derive(Debug)]
18enum XmlDecodeErrorKind {
19 InvalidXml(xmlparser::Error),
20 InvalidEscape { esc: String },
21 Custom(Cow<'static, str>),
22 Unhandled(Box<dyn std::error::Error + Send + Sync + 'static>),
23}
24
25#[derive(Debug)]
26pub struct XmlDecodeError {
27 kind: XmlDecodeErrorKind,
28}
29
30impl Display for XmlDecodeError {
31 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
32 match &self.kind {
33 XmlDecodeErrorKind::InvalidXml(_) => write!(f, "XML parse error"),
34 XmlDecodeErrorKind::InvalidEscape { esc } => write!(f, "invalid XML escape: {}", esc),
35 XmlDecodeErrorKind::Custom(msg) => write!(f, "error parsing XML: {}", msg),
36 XmlDecodeErrorKind::Unhandled(_) => write!(f, "error parsing XML"),
37 }
38 }
39}
40
41impl Error for XmlDecodeError {
42 fn source(&self) -> Option<&(dyn Error + 'static)> {
43 match &self.kind {
44 XmlDecodeErrorKind::InvalidXml(source) => Some(source as _),
45 XmlDecodeErrorKind::Unhandled(source) => Some(source.as_ref() as _),
46 XmlDecodeErrorKind::InvalidEscape { .. } | XmlDecodeErrorKind::Custom(..) => None,
47 }
48 }
49}
50
51impl XmlDecodeError {
52 pub(crate) fn invalid_xml(error: xmlparser::Error) -> Self {
53 Self {
54 kind: XmlDecodeErrorKind::InvalidXml(error),
55 }
56 }
57
58 pub(crate) fn invalid_escape(esc: impl Into<String>) -> Self {
59 Self {
60 kind: XmlDecodeErrorKind::InvalidEscape { esc: esc.into() },
61 }
62 }
63
64 pub fn custom(msg: impl Into<Cow<'static, str>>) -> Self {
65 Self {
66 kind: XmlDecodeErrorKind::Custom(msg.into()),
67 }
68 }
69
70 pub fn unhandled(error: impl Into<Box<dyn Error + Send + Sync + 'static>>) -> Self {
71 Self {
72 kind: XmlDecodeErrorKind::Unhandled(error.into()),
73 }
74 }
75}
76
77#[derive(PartialEq, Debug)]
78pub struct Name<'a> {
79 pub prefix: &'a str,
80 pub local: &'a str,
81}
82
83impl Name<'_> {
84 pub fn matches(&self, tag_name: &str) -> bool {
86 let split = tag_name.find(':');
87 match split {
88 None => tag_name == self.local,
89 Some(idx) => {
90 let (prefix, local) = tag_name.split_at(idx);
91 let local = &local[1..];
92 self.local == local && self.prefix == prefix
93 }
94 }
95 }
96}
97
98#[derive(Debug, PartialEq)]
99pub struct Attr<'a> {
100 name: Name<'a>,
101 value: Cow<'a, str>,
103}
104
105#[derive(Debug, PartialEq)]
106pub struct StartEl<'a> {
107 name: Name<'a>,
108 attributes: Vec<Attr<'a>>,
109 closed: bool,
110 depth: Depth,
111}
112
113impl<'a> StartEl<'a> {
121 pub fn depth(&self) -> Depth {
122 self.depth
123 }
124
125 fn new(local: &'a str, prefix: &'a str, depth: Depth) -> Self {
126 Self {
127 name: Name { prefix, local },
128 attributes: vec![],
129 closed: false,
130 depth,
131 }
132 }
133
134 pub fn attr<'b>(&'b self, key: &'b str) -> Option<&'b str> {
138 self.attributes
139 .iter()
140 .find(|attr| attr.name.matches(key))
141 .map(|attr| attr.value.as_ref())
142 }
143
144 pub fn matches(&self, pat: &str) -> bool {
147 self.name.matches(pat)
148 }
149
150 pub fn local(&self) -> &str {
157 self.name.local
158 }
159
160 pub fn prefix(&self) -> &str {
166 self.name.prefix
167 }
168
169 fn end_el(&self, el: ElementEnd<'_>, depth: Depth) -> bool {
171 if depth != self.depth {
172 return false;
173 }
174 match el {
175 ElementEnd::Open => false,
176 ElementEnd::Close(prefix, local) => {
177 prefix.as_str() == self.name.prefix && local.as_str() == self.name.local
178 }
179 ElementEnd::Empty => false,
180 }
181 }
182}
183
184pub struct Document<'a> {
189 tokenizer: Tokenizer<'a>,
190 depth: Depth,
191}
192
193impl<'a> TryFrom<&'a [u8]> for Document<'a> {
194 type Error = XmlDecodeError;
195
196 fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
197 Ok(Document::new(
198 std::str::from_utf8(value).map_err(XmlDecodeError::unhandled)?,
199 ))
200 }
201}
202
203impl<'inp> Document<'inp> {
204 pub fn new(doc: &'inp str) -> Self {
205 Document {
206 tokenizer: Tokenizer::from(doc),
207 depth: 0,
208 }
209 }
210
211 pub fn next_start_element<'a>(&'a mut self) -> Option<StartEl<'inp>> {
227 next_start_element(self)
228 }
229
230 pub fn root_element<'a>(&'a mut self) -> Result<ScopedDecoder<'inp, 'a>, XmlDecodeError> {
232 let start_el = self
233 .next_start_element()
234 .ok_or_else(|| XmlDecodeError::custom("no root element"))?;
235 Ok(ScopedDecoder {
236 doc: self,
237 start_el,
238 terminated: false,
239 })
240 }
241
242 pub fn scoped_to<'a>(&'a mut self, start_el: StartEl<'inp>) -> ScopedDecoder<'inp, 'a> {
248 ScopedDecoder {
249 doc: self,
250 start_el,
251 terminated: false,
252 }
253 }
254}
255
256#[derive(Debug)]
259pub struct XmlToken<'inp>(Token<'inp>);
260
261impl<'inp> Iterator for Document<'inp> {
272 type Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>;
273 fn next<'a>(&'a mut self) -> Option<Result<(XmlToken<'inp>, Depth), XmlDecodeError>> {
274 let tok = self.tokenizer.next()?;
275 let tok = match tok {
276 Err(e) => return Some(Err(XmlDecodeError::invalid_xml(e))),
277 Ok(tok) => tok,
278 };
279 match tok {
281 Token::ElementEnd {
282 end: ElementEnd::Close(_, _),
283 ..
284 } => {
285 self.depth -= 1;
286 }
287 Token::ElementEnd {
288 end: ElementEnd::Empty,
289 ..
290 } => self.depth -= 1,
291 t @ Token::ElementStart { .. } => {
292 self.depth += 1;
293 return Some(Ok((XmlToken(t), self.depth - 1)));
296 }
297 _ => {}
298 }
299 Some(Ok((XmlToken(tok), self.depth)))
300 }
301}
302
303pub struct ScopedDecoder<'inp, 'a> {
308 doc: &'a mut Document<'inp>,
309 start_el: StartEl<'inp>,
310 terminated: bool,
311}
312
313impl Drop for ScopedDecoder<'_, '_> {
316 fn drop(&mut self) {
317 for _ in self {}
318 }
319}
320
321impl<'inp> ScopedDecoder<'inp, '_> {
322 pub fn start_el<'a>(&'a self) -> &'a StartEl<'inp> {
324 &self.start_el
325 }
326
327 pub fn next_tag<'a>(&'a mut self) -> Option<ScopedDecoder<'inp, 'a>> {
342 let next_tag = next_start_element(self)?;
343 Some(self.nested_decoder(next_tag))
344 }
345
346 fn nested_decoder<'a>(&'a mut self, start_el: StartEl<'inp>) -> ScopedDecoder<'inp, 'a> {
347 ScopedDecoder {
348 doc: self.doc,
349 start_el,
350 terminated: false,
351 }
352 }
353}
354
355impl<'inp, 'a> Iterator for ScopedDecoder<'inp, 'a> {
356 type Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>;
357
358 fn next(&mut self) -> Option<Self::Item> {
359 if self.start_el.closed {
360 self.terminated = true;
361 }
362 if self.terminated {
363 return None;
364 }
365 let (tok, depth) = match self.doc.next() {
366 Some(Ok((tok, depth))) => (tok, depth),
367 other => return other,
368 };
369
370 match tok.0 {
371 Token::ElementEnd { end, .. } if self.start_el.end_el(end, depth) => {
372 self.terminated = true;
373 return None;
374 }
375 _ => {}
376 }
377 Some(Ok((tok, depth)))
378 }
379}
380
381fn next_start_element<'a, 'inp>(
383 tokens: &'a mut impl Iterator<Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>>,
384) -> Option<StartEl<'inp>> {
385 let mut out = StartEl::new("", "", 0);
386 loop {
387 match tokens.next()? {
388 Ok((XmlToken(Token::ElementStart { local, prefix, .. }), depth)) => {
389 out.name.local = local.as_str();
390 out.name.prefix = prefix.as_str();
391 out.depth = depth;
392 }
393 Ok((
394 XmlToken(Token::Attribute {
395 prefix,
396 local,
397 value,
398 ..
399 }),
400 _,
401 )) => out.attributes.push(Attr {
402 name: Name {
403 local: local.as_str(),
404 prefix: prefix.as_str(),
405 },
406 value: unescape(value.as_str()).ok()?,
407 }),
408 Ok((
409 XmlToken(Token::ElementEnd {
410 end: ElementEnd::Open,
411 ..
412 }),
413 _,
414 )) => break,
415 Ok((
416 XmlToken(Token::ElementEnd {
417 end: ElementEnd::Empty,
418 ..
419 }),
420 _,
421 )) => {
422 out.closed = true;
423 break;
424 }
425 _ => {}
426 }
427 }
428 Some(out)
429}
430
431pub fn try_data<'a, 'inp>(
436 tokens: &'a mut impl Iterator<Item = Result<(XmlToken<'inp>, Depth), XmlDecodeError>>,
437) -> Result<Cow<'inp, str>, XmlDecodeError> {
438 loop {
439 match tokens.next().map(|opt| opt.map(|opt| opt.0)) {
440 None => return Ok(Cow::Borrowed("")),
441 Some(Ok(XmlToken(Token::Text { text }))) => return unescape(text.as_str()),
442 Some(Ok(e @ XmlToken(Token::ElementStart { .. }))) => {
443 return Err(XmlDecodeError::custom(format!(
444 "looking for a data element, found: {:?}",
445 e
446 )))
447 }
448 Some(Err(e)) => return Err(e),
449 _ => {}
450 }
451 }
452}
453
454#[cfg(test)]
455mod test {
456 use crate::decode::{try_data, Attr, Depth, Document, Name, StartEl};
457
458 fn closed<'a>(local: &'a str, prefix: &'a str, depth: Depth) -> StartEl<'a> {
460 let mut s = StartEl::new(local, prefix, depth);
461 s.closed = true;
462 s
463 }
464
465 #[test]
466 fn scoped_tokens() {
467 let xml = r#"<Response><A></A></Response>"#;
468 let mut doc = Document::new(xml);
469 let mut root = doc.root_element().expect("valid document");
470 assert_eq!(root.start_el().local(), "Response");
471 assert_eq!(root.next_tag().expect("tag exists").start_el().local(), "A");
472 assert!(root.next_tag().is_none());
473 }
474
475 #[test]
476 fn handle_depth_properly() {
477 let xml = r#"<Response><Response></Response><A/></Response>"#;
478 let mut doc = Document::new(xml);
479 let mut scoped = doc.root_element().expect("valid document");
480 assert_eq!(
481 scoped.next_tag().unwrap().start_el(),
482 &StartEl::new("Response", "", 1)
483 );
484 let closed_a = closed("A", "", 1);
485 assert_eq!(scoped.next_tag().unwrap().start_el(), &closed_a);
486 assert!(scoped.next_tag().is_none())
487 }
488
489 #[test]
490 fn self_closing() {
491 let xml = r#"<Response/>"#;
492 let mut doc = Document::new(xml);
493 let mut scoped = doc.root_element().expect("valid doc");
494 assert!(scoped.start_el.closed);
495 assert!(scoped.next_tag().is_none())
496 }
497
498 #[test]
499 fn terminate_scope() {
500 let xml = r#"<Response><Struct><A></A><Also/></Struct><More/></Response>"#;
501 let mut doc = Document::new(xml);
502 let mut response_iter = doc.root_element().expect("valid doc");
503 let mut struct_iter = response_iter.next_tag().unwrap();
504 assert_eq!(
505 struct_iter.next_tag().as_ref().map(|t| t.start_el()),
506 Some(&StartEl::new("A", "", 2))
507 );
508 drop(struct_iter);
511 assert_eq!(
512 response_iter.next_tag().unwrap().start_el(),
513 &closed("More", "", 1)
514 );
515 }
516
517 #[test]
518 fn read_data_invalid() {
519 let xml = r#"<Response><A></A></Response>"#;
520 let mut doc = Document::new(xml);
521 let mut resp = doc.root_element().unwrap();
522 try_data(&mut resp).expect_err("no data");
523 }
524
525 #[test]
526 fn read_data() {
527 let xml = r#"<Response>hello</Response>"#;
528 let mut doc = Document::new(xml);
529 let mut scoped = doc.root_element().unwrap();
530 assert_eq!(try_data(&mut scoped).unwrap(), "hello");
531 }
532
533 #[test]
535 fn read_data_whitespace() {
536 let xml = r#"<Response> hello </Response>"#;
537 let mut doc = Document::new(xml);
538 let mut scoped = doc.root_element().unwrap();
539 assert_eq!(try_data(&mut scoped).unwrap(), " hello ");
540 }
541
542 #[test]
543 fn ignore_insignificant_whitespace() {
544 let xml = r#"<Response> <A> </A> </Response>"#;
545 let mut doc = Document::new(xml);
546 let mut resp = doc.root_element().unwrap();
547 let mut a = resp.next_tag().expect("should be a");
548 let data = try_data(&mut a).expect("valid");
549 assert_eq!(data, " ");
550 }
551
552 #[test]
553 fn read_attributes() {
554 let xml = r#"<Response xsi:type="CanonicalUser">hello</Response>"#;
555 let mut tokenizer = Document::new(xml);
556 let root = tokenizer.root_element().unwrap();
557
558 assert_eq!(
559 root.start_el().attributes,
560 vec![Attr {
561 name: Name {
562 prefix: "xsi",
563 local: "type"
564 },
565 value: "CanonicalUser".into()
566 }]
567 )
568 }
569
570 #[test]
571 fn unescape_data() {
572 let xml = r#"<Response key=""hey">">></Response>"#;
573 let mut doc = Document::new(xml);
574 let mut root = doc.root_element().unwrap();
575 assert_eq!(try_data(&mut root).unwrap(), ">");
576 assert_eq!(root.start_el().attr("key"), Some("\"hey\">"));
577 }
578
579 #[test]
580 fn nested_self_closer() {
581 let xml = r#"<XmlListsInputOutput>
582 <stringList/>
583 <stringSet></stringSet>
584 </XmlListsInputOutput>"#;
585 let mut doc = Document::new(xml);
586 let mut root = doc.root_element().unwrap();
587 let mut string_list = root.next_tag().unwrap();
588 assert_eq!(string_list.start_el(), &closed("stringList", "", 1));
589 assert!(string_list.next_tag().is_none());
590 drop(string_list);
591 assert_eq!(
592 root.next_tag().unwrap().start_el(),
593 &StartEl::new("stringSet", "", 1)
594 );
595 }
596
597 #[test]
598 fn confusing_nested_same_name_tag() {
599 let root_tags = &["a", "b", "c", "d"];
602 let xml = r#"<XmlListsInputOutput>
603 <a/>
604 <b>
605 <c/>
606 <b></b>
607 <here/>
608 </b>
609 <c></c>
610 <d>more</d>
611 </XmlListsInputOutput>"#;
612 let mut doc = Document::new(xml);
613 let mut root = doc.root_element().unwrap();
614 let mut cmp = vec![];
615 while let Some(tag) = root.next_tag() {
616 cmp.push(tag.start_el().local().to_owned());
617 }
618 assert_eq!(root_tags, cmp.as_slice());
619 }
620}