ariadne/
source.rs

1use super::*;
2
3use std::{
4    path::{Path, PathBuf},
5    collections::{HashMap, hash_map::Entry},
6    fs, mem::replace,
7};
8
9/// A trait implemented by [`Source`] caches.
10pub trait Cache<Id: ?Sized> {
11    /// Fetch the [`Source`] identified by the given ID, if possible.
12    // TODO: Don't box
13    fn fetch(&mut self, id: &Id) -> Result<&Source, Box<dyn fmt::Debug + '_>>;
14
15    /// Display the given ID. as a single inline value.
16    ///
17    /// This function may make use of attributes from the [`Fmt`] trait.
18    // TODO: Don't box
19    fn display<'a>(&self, id: &'a Id) -> Option<Box<dyn fmt::Display + 'a>>;
20}
21
22impl<'b, C: Cache<Id>, Id: ?Sized> Cache<Id> for &'b mut C {
23    fn fetch(&mut self, id: &Id) -> Result<&Source, Box<dyn fmt::Debug + '_>> { C::fetch(self, id) }
24    fn display<'a>(&self, id: &'a Id) -> Option<Box<dyn fmt::Display + 'a>> { C::display(self, id) }
25}
26
27impl<C: Cache<Id>, Id: ?Sized> Cache<Id> for Box<C> {
28    fn fetch(&mut self, id: &Id) -> Result<&Source, Box<dyn fmt::Debug + '_>> { C::fetch(self, id) }
29    fn display<'a>(&self, id: &'a Id) -> Option<Box<dyn fmt::Display + 'a>> { C::display(self, id) }
30}
31
32/// A type representing a single line of a [`Source`].
33#[derive(Clone, Debug, Hash, PartialEq, Eq)]
34pub struct Line {
35    offset: usize,
36    len: usize,
37    chars: String,
38}
39
40impl Line {
41    /// Get the offset of this line in the original [`Source`] (i.e: the number of characters that precede it).
42    pub fn offset(&self) -> usize { self.offset }
43
44    /// Get the character length of this line.
45    pub fn len(&self) -> usize { self.len }
46
47    /// Get the offset span of this line in the original [`Source`].
48    pub fn span(&self) -> Range<usize> { self.offset..self.offset + self.len }
49
50    /// Return an iterator over the characters in the line, excluding trailing whitespace.
51    pub fn chars(&self) -> impl Iterator<Item = char> + '_ { self.chars.chars() }
52}
53
54/// A type representing a single source that may be referred to by [`Span`]s.
55///
56/// In most cases, a source is a single input file.
57#[derive(Clone, Debug, Hash, PartialEq, Eq)]
58pub struct Source {
59    lines: Vec<Line>,
60    len: usize,
61}
62
63impl<S: AsRef<str>> From<S> for Source {
64    /// Generate a [`Source`] from the given [`str`].
65    ///
66    /// Note that this function can be expensive for long strings. Use an implementor of [`Cache`] where possible.
67    fn from(s: S) -> Self {
68        let mut offset = 0;
69        // (Last line, last line ends with CR)
70        let mut last_line: Option<(Line, bool)> = None;
71        let mut lines: Vec<Line> = s
72            .as_ref()
73            .split_inclusive([
74                '\r', // Carriage return
75                '\n', // Line feed
76                '\x0B', // Vertical tab
77                '\x0C', // Form feed
78                '\u{0085}', // Next line
79                '\u{2028}', // Line separator
80                '\u{2029}' // Paragraph separator
81            ])
82            .flat_map(|line| {
83                // Returns last line and set `last_line` to current `line`
84                // A hack that makes `flat_map` deals with consecutive lines
85
86                if let Some((last, ends_with_cr)) = last_line.as_mut() {
87                    if *ends_with_cr && line == "\n" {
88                        last.len += 1;
89                        offset += 1;
90                        return replace(&mut last_line, None).map(|(l, _)| l);
91                    }
92                }
93
94                let len = line.chars().count();
95                let ends_with_cr = line.ends_with('\r');
96                let line = Line {
97                    offset,
98                    len,
99                    chars: line.trim_end().to_owned(),
100                };
101                offset += len;
102                replace(&mut last_line, Some((line, ends_with_cr))).map(|(l, _)| l)
103            })
104            .collect();
105
106        if let Some((l, _)) = last_line {
107            lines.push(l);
108        }
109
110        Self {
111            lines,
112            len: offset,
113        }
114    }
115}
116
117impl Source {
118    /// Get the length of the total number of characters in the source.
119    pub fn len(&self) -> usize { self.len }
120
121    /// Return an iterator over the characters in the source.
122    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
123        self.lines.iter().map(|l| l.chars()).flatten()
124    }
125
126    /// Get access to a specific, zero-indexed [`Line`].
127    pub fn line(&self, idx: usize) -> Option<&Line> { self.lines.get(idx) }
128
129    /// Return an iterator over the [`Line`]s in this source.
130    pub fn lines(&self) -> impl ExactSizeIterator<Item = &Line> + '_ { self.lines.iter() }
131
132    /// Get the line that the given offset appears on, and the line/column numbers of the offset.
133    ///
134    /// Note that the line/column numbers are zero-indexed.
135    pub fn get_offset_line(&self, offset: usize) -> Option<(&Line, usize, usize)> {
136        if offset <= self.len {
137            let idx = self.lines
138                .binary_search_by_key(&offset, |line| line.offset)
139                .unwrap_or_else(|idx| idx.saturating_sub(1));
140            let line = &self.lines[idx];
141            assert!(offset >= line.offset, "offset = {}, line.offset = {}", offset, line.offset);
142            Some((line, idx, offset - line.offset))
143        } else {
144            None
145        }
146    }
147
148    /// Get the range of lines that this span runs across.
149    ///
150    /// The resulting range is guaranteed to contain valid line indices (i.e: those that can be used for
151    /// [`Source::line`]).
152    pub fn get_line_range<S: Span>(&self, span: &S) -> Range<usize> {
153        let start = self.get_offset_line(span.start()).map_or(0, |(_, l, _)| l);
154        let end = self.get_offset_line(span.end().saturating_sub(1).max(span.start())).map_or(self.lines.len(), |(_, l, _)| l + 1);
155        start..end
156    }
157}
158
159impl Cache<()> for Source {
160    fn fetch(&mut self, _: &()) -> Result<&Source, Box<dyn fmt::Debug + '_>> { Ok(self) }
161    fn display(&self, _: &()) -> Option<Box<dyn fmt::Display>> { None }
162}
163
164impl<Id: fmt::Display + Eq> Cache<Id> for (Id, Source) {
165    fn fetch(&mut self, id: &Id) -> Result<&Source, Box<dyn fmt::Debug + '_>> {
166        if id == &self.0 { Ok(&self.1) } else { Err(Box::new(format!("Failed to fetch source '{}'", id))) }
167    }
168    fn display<'a>(&self, id: &'a Id) -> Option<Box<dyn fmt::Display + 'a>> { Some(Box::new(id)) }
169}
170
171/// A [`Cache`] that fetches [`Source`]s from the filesystem.
172#[derive(Default, Debug, Clone)]
173pub struct FileCache {
174    files: HashMap<PathBuf, Source>,
175}
176
177impl Cache<Path> for FileCache {
178    fn fetch(&mut self, path: &Path) -> Result<&Source, Box<dyn fmt::Debug + '_>> {
179        Ok(match self.files.entry(path.to_path_buf()) { // TODO: Don't allocate here
180            Entry::Occupied(entry) => entry.into_mut(),
181            Entry::Vacant(entry) => entry.insert(Source::from(&fs::read_to_string(path).map_err(|e| Box::new(e) as _)?)),
182        })
183    }
184    fn display<'a>(&self, path: &'a Path) -> Option<Box<dyn fmt::Display + 'a>> { Some(Box::new(path.display())) }
185}
186
187/// A [`Cache`] that fetches [`Source`]s using the provided function.
188#[derive(Debug, Clone)]
189pub struct FnCache<Id, F> {
190    sources: HashMap<Id, Source>,
191    get: F,
192}
193
194impl<Id, F> FnCache<Id, F> {
195    /// Create a new [`FnCache`] with the given fetch function.
196    pub fn new(get: F) -> Self {
197        Self {
198            sources: HashMap::default(),
199            get,
200        }
201    }
202
203    /// Pre-insert a selection of [`Source`]s into this cache.
204    pub fn with_sources(mut self, sources: HashMap<Id, Source>) -> Self
205        where Id: Eq + Hash
206    {
207        self.sources.reserve(sources.len());
208        for (id, src) in sources {
209            self.sources.insert(id, src);
210        }
211        self
212    }
213}
214
215impl<Id: fmt::Display + Hash + PartialEq + Eq + Clone, F> Cache<Id> for FnCache<Id, F>
216    where F: for<'a> FnMut(&'a Id) -> Result<String, Box<dyn fmt::Debug>>
217{
218    fn fetch(&mut self, id: &Id) -> Result<&Source, Box<dyn fmt::Debug + '_>> {
219        Ok(match self.sources.entry(id.clone()) {
220            Entry::Occupied(entry) => entry.into_mut(),
221            Entry::Vacant(entry) => entry.insert(Source::from((self.get)(id)?)),
222        })
223    }
224    fn display<'a>(&self, id: &'a Id) -> Option<Box<dyn fmt::Display + 'a>> { Some(Box::new(id)) }
225}
226
227/// Create a [`Cache`] from a collection of ID/strings, where each corresponds to a [`Source`].
228pub fn sources<Id, S, I>(iter: I) -> impl Cache<Id>
229where
230    Id: fmt::Display + Hash + PartialEq + Eq + Clone + 'static,
231    I: IntoIterator<Item = (Id, S)>,
232    S: AsRef<str>,
233{
234    FnCache::new((move |id| Err(Box::new(format!("Failed to fetch source '{}'", id)) as _)) as fn(&_) -> _)
235        .with_sources(iter
236            .into_iter()
237            .map(|(id, s)| (id, Source::from(s.as_ref())))
238            .collect())
239}
240
241#[cfg(test)]
242mod tests {
243    use std::iter::zip;
244
245    use super::Source;
246
247    #[test]
248    fn source_from() {
249        fn test(lines: Vec<&str>) {
250            let source: String = lines.iter().map(|s| *s).collect();
251            let source = Source::from(source);
252            
253            assert_eq!(source.lines.len(), lines.len());
254
255            let mut offset = 0;
256            for (source_line, raw_line) in zip(source.lines.into_iter(), lines.into_iter()) {
257                assert_eq!(source_line.offset, offset);
258                assert_eq!(source_line.len, raw_line.chars().count());
259                assert_eq!(source_line.chars, raw_line.trim_end());
260                offset += source_line.len;
261            }
262            
263            assert_eq!(source.len, offset);
264        }
265
266        test(vec![]); // Empty string
267
268        test(vec!["Single line"]);
269        test(vec!["Single line with LF\n"]);
270        test(vec!["Single line with CRLF\r\n"]);
271
272        test(vec!["Two\r\n", "lines\n"]);
273        test(vec!["Some\n", "more\r\n", "lines"]);
274        test(vec!["\n", "\r\n", "\n", "Empty Lines"]);
275
276        test(vec!["Trailing spaces  \n", "are trimmed\t"]);
277
278        // Line endings other than LF or CRLF
279        test(vec!["CR\r", "VT\x0B", "FF\x0C", "NEL\u{0085}", "LS\u{2028}", "PS\u{2029}"]);
280    }
281}