radix_trie/
keys.rs

1use endian_type::{BigEndian, LittleEndian};
2use std::ffi::OsString;
3use std::path::{Path, PathBuf};
4
5use nibble_vec::Nibblet;
6
7/// Trait for types which can be used to key a Radix Trie.
8///
9/// Types that implement this trait should be convertible to a vector of half-bytes (nibbles)
10/// such that no two instances of the type convert to the same vector.
11/// To protect against faulty behaviour, the trie will **panic** if it finds two distinct keys
12/// of type `K` which encode to the same `Nibblet`, so be careful!
13///
14/// If you would like to implement this trait for your own type, you need to implement
15/// *either* `encode_bytes` or `encode`. You only need to implement one of the two.
16/// If you don't implement one, your code will **panic** as soon you use the trie.
17/// There is no performance penalty for implementing `encode_bytes` instead of `encode`,
18/// so it is preferred except in the case where you require half-byte precision.
19///
20/// Many standard types implement this trait already. Integer types are encoded *big-endian*
21/// by default but can be encoded little-endian using the `LittleEndian<T>` wrapper type.
22pub trait TrieKey: PartialEq + Eq {
23    /// Encode a value as a vector of bytes.
24    fn encode_bytes(&self) -> Vec<u8> {
25        panic!("implement this method or TrieKey::encode");
26    }
27
28    /// Encode a value as a NibbleVec.
29    #[inline]
30    fn encode(&self) -> Nibblet {
31        Nibblet::from_byte_vec(self.encode_bytes())
32    }
33}
34
35/// Key comparison result.
36#[derive(Debug)]
37pub enum KeyMatch {
38    /// The keys match up to the given index.
39    Partial(usize),
40    /// The first key is a prefix of the second.
41    FirstPrefix,
42    /// The second key is a prefix of the first.
43    SecondPrefix,
44    /// The keys match exactly.
45    Full,
46}
47
48/// Compare two Trie keys.
49///
50/// Compares `first[start_idx .. ]` to `second`, i.e. only looks at a slice of the first key.
51#[inline]
52pub fn match_keys(start_idx: usize, first: &Nibblet, second: &Nibblet) -> KeyMatch {
53    let first_len = first.len() - start_idx;
54    let min_length = ::std::cmp::min(first_len, second.len());
55
56    for i in 0..min_length {
57        if first.get(start_idx + i) != second.get(i) {
58            return KeyMatch::Partial(i);
59        }
60    }
61
62    match (first_len, second.len()) {
63        (x, y) if x < y => KeyMatch::FirstPrefix,
64        (x, y) if x == y => KeyMatch::Full,
65        _ => KeyMatch::SecondPrefix,
66    }
67}
68
69/// Check two keys for equality and panic if they differ.
70#[inline]
71pub fn check_keys<K: ?Sized>(key1: &K, key2: &K)
72where
73    K: TrieKey,
74{
75    if *key1 != *key2 {
76        panic!("multiple-keys with the same bit representation.");
77    }
78}
79
80// --- TrieKey Implementations for standard types --- ///
81
82// This blanket implementation goes into play when specialization is stabilized
83// impl<T> TrieKey for T where T: Into<Vec<u8>> + Clone + Eq + PartialEq {
84// fn encode_bytes(&self) -> Vec<u8> {
85// self.clone().into()
86// }
87// }
88
89impl TrieKey for Vec<u8> {
90    #[inline]
91    fn encode_bytes(&self) -> Vec<u8> {
92        self.clone()
93    }
94}
95
96impl TrieKey for [u8] {
97    #[inline]
98    fn encode_bytes(&self) -> Vec<u8> {
99        self.to_vec()
100    }
101}
102
103impl TrieKey for String {
104    #[inline]
105    fn encode_bytes(&self) -> Vec<u8> {
106        self.as_bytes().encode_bytes()
107    }
108}
109
110impl TrieKey for str {
111    #[inline]
112    fn encode_bytes(&self) -> Vec<u8> {
113        self.as_bytes().encode_bytes()
114    }
115}
116
117impl<'a, T: ?Sized + TrieKey> TrieKey for &'a T {
118    #[inline]
119    fn encode_bytes(&self) -> Vec<u8> {
120        (**self).encode_bytes()
121    }
122}
123
124impl<'a, T: ?Sized + TrieKey> TrieKey for &'a mut T {
125    #[inline]
126    fn encode_bytes(&self) -> Vec<u8> {
127        (**self).encode_bytes()
128    }
129}
130
131impl TrieKey for i8 {
132    #[inline]
133    fn encode_bytes(&self) -> Vec<u8> {
134        let mut v: Vec<u8> = Vec::with_capacity(1);
135        v.push(*self as u8);
136        v
137    }
138}
139
140impl TrieKey for u8 {
141    #[inline]
142    fn encode_bytes(&self) -> Vec<u8> {
143        let mut v: Vec<u8> = Vec::with_capacity(1);
144        v.push(*self);
145        v
146    }
147}
148
149#[cfg(unix)]
150impl TrieKey for PathBuf {
151    fn encode_bytes(&self) -> Vec<u8> {
152        use std::os::unix::ffi::OsStringExt;
153        let str: OsString = self.clone().into();
154        str.into_vec()
155    }
156}
157
158#[cfg(unix)]
159impl TrieKey for Path {
160    fn encode_bytes(&self) -> Vec<u8> {
161        use std::os::unix::ffi::OsStrExt;
162        self.as_os_str().as_bytes().encode_bytes()
163    }
164}
165
166impl<T> TrieKey for LittleEndian<T>
167where
168    T: Eq + Copy,
169{
170    fn encode_bytes(&self) -> Vec<u8> {
171        self.as_bytes().encode_bytes()
172    }
173}
174
175impl<T> TrieKey for BigEndian<T>
176where
177    T: Eq + Copy,
178{
179    fn encode_bytes(&self) -> Vec<u8> {
180        self.as_bytes().to_vec()
181    }
182}
183
184macro_rules! int_keys {
185    ( $( $t:ty ),* ) => {
186        $(
187        impl TrieKey for $t {
188            fn encode_bytes(&self) -> Vec<u8> {
189                let be: BigEndian<$t> = From::from(*self);
190                be.encode_bytes()
191            }
192        }
193        )*
194    };
195}
196
197int_keys!(u16, u32, u64, i16, i32, i64, usize, isize);
198
199macro_rules! vec_int_keys {
200  ( $( $t:ty ),* ) => {
201      $(
202      impl TrieKey for Vec<$t> {
203          fn encode_bytes(&self) -> Vec<u8> {
204              let mut v = Vec::<u8>::with_capacity(self.len() * std::mem::size_of::<$t>());
205              for u in self {
206                  v.extend_from_slice(&u.to_be_bytes());
207              }
208              v
209          }
210      }
211      )*
212   };
213}
214
215vec_int_keys!(u16, u32, u64, i16, i32, i64, usize, isize);
216
217#[cfg(test)]
218mod test {
219    pub trait DefaultTrieKey {
220        fn encode_bytes(&self) -> Vec<u8>;
221    }
222
223    impl<T: Into<Vec<u8>> + Clone + PartialEq + Eq> DefaultTrieKey for T {
224        #[inline]
225        fn encode_bytes(&self) -> Vec<u8> {
226            self.clone().into()
227        }
228    }
229
230    pub trait AsTrieKey {
231        fn encode_bytes(&self) -> Vec<u8>;
232    }
233
234    impl<T: AsRef<[u8]> + Clone + PartialEq + Eq> AsTrieKey for &T {
235        #[inline]
236        fn encode_bytes(&self) -> Vec<u8> {
237            self.as_ref().to_vec()
238        }
239    }
240
241    macro_rules! encode_bytes {
242        ($e:expr) => {
243            (&$e).encode_bytes()
244        };
245    }
246
247    #[test]
248    fn test_autoref_specialization() {
249        let _ = encode_bytes!([0_u8]);
250        let _ = encode_bytes!("hello");
251        let _ = encode_bytes!("hello".to_string());
252    }
253}