dunce/
lib.rs

1//! Filesystem paths in Windows are a total mess. This crate normalizes paths to the most
2//! compatible (but still correct) format, so that you don't have to worry about the mess.
3//!
4//! In Windows the regular/legacy paths (`C:\foo`) are supported by all programs, but have
5//! lots of bizarre restrictions for backwards compatibility with MS-DOS.
6//!
7//! And there are Windows NT UNC paths (`\\?\C:\foo`), which are more robust and with fewer
8//! gotchas, but are rarely supported by Windows programs. Even Microsoft's own!
9//!
10//! This crate converts paths to legacy format whenever possible, but leaves UNC paths as-is
11//! when they can't be unambiguously expressed in a simpler way. This allows legacy programs
12//! to access all paths they can possibly access, and UNC-aware programs to access all paths.
13//!
14//! On non-Windows platforms these functions leave paths unmodified, so it's safe to use them
15//! unconditionally for all platforms.
16//!
17//! Parsing is based on <https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx>
18//!
19//! [Project homepage](https://lib.rs/crates/dunce).
20#![doc(html_logo_url = "https://assets.gitlab-static.net/uploads/-/system/project/avatar/4717715/dyc.png")]
21
22#[cfg(any(windows, test))]
23use std::ffi::OsStr;
24use std::fs;
25use std::io;
26#[cfg(windows)]
27use std::os::windows::ffi::OsStrExt;
28#[cfg(windows)]
29use std::path::{Component, Prefix};
30use std::path::{Path, PathBuf};
31
32/// Takes any path, and when possible, converts Windows UNC paths to regular paths.
33/// If the path can't be converted, it's returned unmodified.
34///
35/// On non-Windows this is no-op.
36///
37/// `\\?\C:\Windows` will be converted to `C:\Windows`,
38/// but `\\?\C:\COM` will be left as-is (due to a reserved filename).
39///
40/// Use this to pass arbitrary paths to programs that may not be UNC-aware.
41///
42/// It's generally safe to pass UNC paths to legacy programs, because
43/// these paths contain a reserved prefix, so will gracefully fail
44/// if used with legacy APIs that don't support UNC.
45///
46/// This function does not perform any I/O.
47///
48/// Currently paths with unpaired surrogates aren't converted even if they
49/// could be, due to limitations of Rust's `OsStr` API.
50///
51/// To check if a path remained as UNC, use `path.as_os_str().as_encoded_bytes().starts_with(b"\\\\")`.
52#[inline]
53pub fn simplified(path: &Path) -> &Path {
54    if is_safe_to_strip_unc(path) {
55        // unfortunately we can't safely strip prefix from a non-Unicode path
56        path.to_str().and_then(|s| s.get(4..)).map_or(path, Path::new)
57    } else {
58        path
59    }
60}
61
62/// Like `std::fs::canonicalize()`, but on Windows it outputs the most
63/// compatible form of a path instead of UNC.
64#[inline(always)]
65pub fn canonicalize<P: AsRef<Path>>(path: P) -> io::Result<PathBuf> {
66    let path = path.as_ref();
67
68    #[cfg(not(windows))]
69    {
70        fs::canonicalize(path)
71    }
72    #[cfg(windows)]
73    {
74        canonicalize_win(path)
75    }
76}
77
78#[cfg(windows)]
79fn canonicalize_win(path: &Path) -> io::Result<PathBuf> {
80    let real_path = fs::canonicalize(path)?;
81    Ok(if is_safe_to_strip_unc(&real_path) {
82        real_path.to_str().and_then(|s| s.get(4..)).map(PathBuf::from).unwrap_or(real_path)
83    } else {
84        real_path
85    })
86}
87
88pub use self::canonicalize as realpath;
89
90#[cfg(any(windows,test))]
91fn windows_char_len(s: &OsStr) -> usize {
92    #[cfg(not(windows))]
93    let len = s.to_string_lossy().chars().map(|c| if c as u32 <= 0xFFFF {1} else {2}).sum();
94    #[cfg(windows)]
95    let len = s.encode_wide().count();
96    len
97}
98
99#[cfg(any(windows,test))]
100fn is_valid_filename(file_name: &OsStr) -> bool {
101    if file_name.len() > 255 && windows_char_len(file_name) > 255 {
102        return false;
103    }
104
105    // Non-unicode is safe, but Rust can't reasonably losslessly operate on such strings
106    let byte_str = if let Some(s) = file_name.to_str() {
107        s.as_bytes()
108    } else {
109        return false;
110    };
111    if byte_str.is_empty() {
112        return false;
113    }
114    // Only ASCII subset is checked, and WTF-8/UTF-8 is safe for that
115    if byte_str.iter().any(|&c| matches!(c, 0..=31 | b'<' | b'>' | b':' | b'"' | b'/' | b'\\' | b'|' | b'?' | b'*')) {
116        return false
117    }
118    // Filename can't end with . or space (except before extension, but this checks the whole name)
119    if matches!(byte_str.last(), Some(b' ' | b'.')) {
120        return false;
121    }
122    true
123}
124
125#[cfg(any(windows, test))]
126const RESERVED_NAMES: [&str; 22] = [
127    "AUX", "NUL", "PRN", "CON", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8",
128    "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
129];
130
131#[cfg(any(windows, test))]
132fn is_reserved<P: AsRef<OsStr>>(file_name: P) -> bool {
133    // con.txt is reserved too
134    // all reserved DOS names have ASCII-compatible stem
135    if let Some(name) = Path::new(&file_name).file_stem().and_then(|s| s.to_str()) {
136        // "con.. .txt" is "CON" for DOS
137        let trimmed = right_trim(name);
138        if trimmed.len() <= 4 && RESERVED_NAMES.into_iter().any(|name| trimmed.eq_ignore_ascii_case(name)) {
139            return true;
140        }
141    }
142    false
143}
144
145#[cfg(not(windows))]
146#[inline]
147const fn is_safe_to_strip_unc(_path: &Path) -> bool {
148    false
149}
150
151#[cfg(windows)]
152fn is_safe_to_strip_unc(path: &Path) -> bool {
153    let mut components = path.components();
154    match components.next() {
155        Some(Component::Prefix(p)) => match p.kind() {
156            Prefix::VerbatimDisk(..) => {},
157            _ => return false, // Other kinds of UNC paths
158        },
159        _ => return false, // relative or empty
160    }
161
162    for component in components {
163        match component {
164            Component::RootDir => {},
165            Component::Normal(file_name) => {
166                // it doesn't allocate in most cases,
167                // and checks are interested only in the ASCII subset, so lossy is fine
168                if !is_valid_filename(file_name) || is_reserved(file_name) {
169                    return false;
170                }
171            }
172            _ => return false, // UNC paths take things like ".." literally
173        };
174    }
175
176    let path_os_str = path.as_os_str();
177    // However, if the path is going to be used as a directory it's 248
178    if path_os_str.len() > 260 && windows_char_len(path_os_str) > 260 {
179        return false;
180    }
181    true
182}
183
184/// Trim '.' and ' '
185#[cfg(any(windows, test))]
186fn right_trim(s: &str) -> &str {
187    s.trim_end_matches([' ','.'])
188}
189
190#[test]
191fn trim_test() {
192    assert_eq!("a", right_trim("a."));
193    assert_eq!("ą", right_trim("ą."));
194    assert_eq!("a", right_trim("a "));
195    assert_eq!("ąą", right_trim("ąą "));
196    assert_eq!("a", right_trim("a. . . ....   "));
197    assert_eq!("a. . . ..ź", right_trim("a. . . ..ź..   "));
198    assert_eq!(" b", right_trim(" b"));
199    assert_eq!(" べ", right_trim(" べ"));
200    assert_eq!("c. c", right_trim("c. c."));
201    assert_eq!("。", right_trim("。"));
202    assert_eq!("", right_trim(""));
203}
204
205#[test]
206fn reserved() {
207    assert!(is_reserved("CON"));
208    assert!(is_reserved("con"));
209    assert!(is_reserved("con.con"));
210    assert!(is_reserved("COM4"));
211    assert!(is_reserved("COM4.txt"));
212    assert!(is_reserved("COM4 .txt"));
213    assert!(is_reserved("con."));
214    assert!(is_reserved("con ."));
215    assert!(is_reserved("con  "));
216    assert!(is_reserved("con . "));
217    assert!(is_reserved("con . .txt"));
218    assert!(is_reserved("con.....txt"));
219    assert!(is_reserved("PrN....."));
220
221    assert!(!is_reserved(" PrN....."));
222    assert!(!is_reserved(" CON"));
223    assert!(!is_reserved("COM0"));
224    assert!(!is_reserved("COM77"));
225    assert!(!is_reserved(" CON "));
226    assert!(!is_reserved(".CON"));
227    assert!(!is_reserved("@CON"));
228    assert!(!is_reserved("not.CON"));
229    assert!(!is_reserved("CON。"));
230}
231
232#[test]
233fn len() {
234    assert_eq!(1, windows_char_len(OsStr::new("a")));
235    assert_eq!(1, windows_char_len(OsStr::new("€")));
236    assert_eq!(1, windows_char_len(OsStr::new("本")));
237    assert_eq!(2, windows_char_len(OsStr::new("🧐")));
238    assert_eq!(2, windows_char_len(OsStr::new("®®")));
239}
240
241#[test]
242fn valid() {
243    assert!(!is_valid_filename("..".as_ref()));
244    assert!(!is_valid_filename(".".as_ref()));
245    assert!(!is_valid_filename("aaaaaaaaaa:".as_ref()));
246    assert!(!is_valid_filename("ą:ą".as_ref()));
247    assert!(!is_valid_filename("".as_ref()));
248    assert!(!is_valid_filename("a ".as_ref()));
249    assert!(!is_valid_filename(" a. ".as_ref()));
250    assert!(!is_valid_filename("a/".as_ref()));
251    assert!(!is_valid_filename("/a".as_ref()));
252    assert!(!is_valid_filename("/".as_ref()));
253    assert!(!is_valid_filename("\\".as_ref()));
254    assert!(!is_valid_filename("\\a".as_ref()));
255    assert!(!is_valid_filename("<x>".as_ref()));
256    assert!(!is_valid_filename("a*".as_ref()));
257    assert!(!is_valid_filename("?x".as_ref()));
258    assert!(!is_valid_filename("a\0a".as_ref()));
259    assert!(!is_valid_filename("\x1f".as_ref()));
260    assert!(!is_valid_filename(::std::iter::repeat("a").take(257).collect::<String>().as_ref()));
261
262    assert!(is_valid_filename(::std::iter::repeat("®").take(254).collect::<String>().as_ref()));
263    assert!(is_valid_filename("ファイル".as_ref()));
264    assert!(is_valid_filename("a".as_ref()));
265    assert!(is_valid_filename("a.aaaaaaaa".as_ref()));
266    assert!(is_valid_filename("a........a".as_ref()));
267    assert!(is_valid_filename("       b".as_ref()));
268}
269
270#[test]
271#[cfg(windows)]
272fn realpath_test() {
273    assert_eq!(r"C:\WINDOWS", canonicalize(r"C:\Windows").unwrap().to_str().unwrap().to_uppercase());
274    assert_ne!(r".", canonicalize(r".").unwrap().to_str().unwrap());
275}
276
277#[test]
278#[cfg(windows)]
279fn strip() {
280    assert_eq!(Path::new(r"C:\foo\😀"), simplified(Path::new(r"\\?\C:\foo\😀")));
281    assert_eq!(Path::new(r"\\?\serv\"), simplified(Path::new(r"\\?\serv\")));
282    assert_eq!(Path::new(r"\\.\C:\notdisk"), simplified(Path::new(r"\\.\C:\notdisk")));
283    assert_eq!(Path::new(r"\\?\GLOBALROOT\Device\ImDisk0\path\to\file.txt"), simplified(Path::new(r"\\?\GLOBALROOT\Device\ImDisk0\path\to\file.txt")));
284}
285
286#[test]
287#[cfg(windows)]
288fn safe() {
289    assert!(is_safe_to_strip_unc(Path::new(r"\\?\C:\foo\bar")));
290    assert!(is_safe_to_strip_unc(Path::new(r"\\?\Z:\foo\bar\")));
291    assert!(is_safe_to_strip_unc(Path::new(r"\\?\Z:\😀\🎃\")));
292    assert!(is_safe_to_strip_unc(Path::new(r"\\?\c:\foo")));
293
294    let long = ::std::iter::repeat("®").take(160).collect::<String>();
295    assert!(is_safe_to_strip_unc(Path::new(&format!(r"\\?\c:\{}", long))));
296    assert!(!is_safe_to_strip_unc(Path::new(&format!(r"\\?\c:\{}\{}", long, long))));
297
298    assert!(!is_safe_to_strip_unc(Path::new(r"\\?\C:\foo\.\bar")));
299    assert!(!is_safe_to_strip_unc(Path::new(r"\\?\C:\foo\..\bar")));
300    assert!(!is_safe_to_strip_unc(Path::new(r"\\?\c\foo")));
301    assert!(!is_safe_to_strip_unc(Path::new(r"\\?\c\foo/bar")));
302    assert!(!is_safe_to_strip_unc(Path::new(r"\\?\c:foo")));
303    assert!(!is_safe_to_strip_unc(Path::new(r"\\?\cc:foo")));
304    assert!(!is_safe_to_strip_unc(Path::new(r"\\?\c:foo\bar")));
305}