urlencoding/
enc.rs

1use std::borrow::Cow;
2use std::fmt;
3use std::io;
4use std::str;
5
6/// Wrapper type that implements `Display`. Encodes on the fly, without allocating.
7/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding.
8///
9/// ```rust
10/// use urlencoding::Encoded;
11/// format!("{}", Encoded("hello!"));
12/// ```
13#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
14#[repr(transparent)]
15pub struct Encoded<Str>(pub Str);
16
17impl<Str: AsRef<[u8]>> Encoded<Str> {
18    /// Long way of writing `Encoded(data)`
19    ///
20    /// Takes any string-like type or a slice of bytes, either owned or borrowed.
21    #[inline(always)]
22    pub fn new(string: Str) -> Self {
23        Self(string)
24    }
25
26    #[inline(always)]
27    pub fn to_str(&self) -> Cow<str> {
28        encode_binary(self.0.as_ref())
29    }
30
31    /// Perform urlencoding to a string
32    #[inline]
33    #[allow(clippy::inherent_to_string_shadow_display)]
34    pub fn to_string(&self) -> String {
35        self.to_str().into_owned()
36    }
37
38    /// Perform urlencoding into a writer
39    #[inline]
40    pub fn write<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
41        encode_into(self.0.as_ref(), false, |s| writer.write_all(s.as_bytes()))?;
42        Ok(())
43    }
44
45    /// Perform urlencoding into a string
46    #[inline]
47    pub fn append_to(&self, string: &mut String) {
48        append_string(self.0.as_ref(), string, false);
49    }
50}
51
52impl<'a> Encoded<&'a str> {
53    /// Same as new, but hints a more specific type, so you can avoid errors about `AsRef<[u8]>` not implemented
54    /// on references-to-references.
55    #[inline(always)]
56    pub fn str(string: &'a str) -> Self {
57        Self(string)
58    }
59}
60
61impl<String: AsRef<[u8]>> fmt::Display for Encoded<String> {
62    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
63        encode_into(self.0.as_ref(), false, |s| f.write_str(s))?;
64        Ok(())
65    }
66}
67
68/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding.
69///
70/// Call `.into_owned()` if you need a `String`
71#[inline(always)]
72pub fn encode(data: &str) -> Cow<str> {
73    encode_binary(data.as_bytes())
74}
75
76/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`.
77#[inline]
78pub fn encode_binary(data: &[u8]) -> Cow<str> {
79    // add maybe extra capacity, but try not to exceed allocator's bucket size
80    let mut escaped = String::with_capacity(data.len() | 15);
81    let unmodified = append_string(data, &mut escaped, true);
82    if unmodified {
83        return Cow::Borrowed(unsafe {
84            // encode_into has checked it's ASCII
85            str::from_utf8_unchecked(data)
86        });
87    }
88    Cow::Owned(escaped)
89}
90
91fn append_string(data: &[u8], escaped: &mut String, may_skip: bool) -> bool {
92    encode_into(data, may_skip, |s| {
93        escaped.push_str(s);
94        Ok::<_, std::convert::Infallible>(())
95    }).unwrap()
96}
97
98fn encode_into<E>(mut data: &[u8], may_skip_write: bool, mut push_str: impl FnMut(&str) -> Result<(), E>) -> Result<bool, E> {
99    let mut pushed = false;
100    loop {
101        // Fast path to skip over safe chars at the beginning of the remaining string
102        let ascii_len = data.iter()
103            .take_while(|&&c| matches!(c, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' |  b'-' | b'.' | b'_' | b'~')).count();
104
105        let (safe, rest) = if ascii_len >= data.len() {
106            if !pushed && may_skip_write {
107                return Ok(true);
108            }
109            (data, &[][..]) // redundatnt to optimize out a panic in split_at
110        } else {
111            data.split_at(ascii_len)
112        };
113        pushed = true;
114        if !safe.is_empty() {
115            push_str(unsafe { str::from_utf8_unchecked(safe) })?;
116        }
117        if rest.is_empty() {
118            break;
119        }
120
121        match rest.split_first() {
122            Some((byte, rest)) => {
123                let enc = &[b'%', to_hex_digit(byte >> 4), to_hex_digit(byte & 15)];
124                push_str(unsafe { str::from_utf8_unchecked(enc) })?;
125                data = rest;
126            }
127            None => break,
128        };
129    }
130    Ok(false)
131}
132
133#[inline]
134fn to_hex_digit(digit: u8) -> u8 {
135    match digit {
136        0..=9 => b'0' + digit,
137        10..=255 => b'A' - 10 + digit,
138    }
139}