icu_locid/extensions/unicode/keywords.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use core::borrow::Borrow;
6use core::cmp::Ordering;
7use core::iter::FromIterator;
8use litemap::LiteMap;
9use writeable::Writeable;
10
11use super::Key;
12use super::Value;
13#[allow(deprecated)]
14use crate::ordering::SubtagOrderingResult;
15use crate::shortvec::ShortBoxSlice;
16
17/// A list of [`Key`]-[`Value`] pairs representing functional information
18/// about locale's internationalization preferences.
19///
20/// Here are examples of fields used in Unicode:
21/// - `hc` - Hour Cycle (`h11`, `h12`, `h23`, `h24`)
22/// - `ca` - Calendar (`buddhist`, `gregory`, ...)
23/// - `fw` - First Day Of the Week (`sun`, `mon`, `sat`, ...)
24///
25/// You can find the full list in [`Unicode BCP 47 U Extension`] section of LDML.
26///
27/// [`Unicode BCP 47 U Extension`]: https://unicode.org/reports/tr35/tr35.html#Key_And_Type_Definitions_
28///
29/// # Examples
30///
31/// Manually build up a [`Keywords`] object:
32///
33/// ```
34/// use icu::locid::extensions::unicode::{key, value, Keywords};
35///
36/// let keywords = [(key!("hc"), value!("h23"))]
37/// .into_iter()
38/// .collect::<Keywords>();
39///
40/// assert_eq!(&keywords.to_string(), "hc-h23");
41/// ```
42///
43/// Access a [`Keywords`] object from a [`Locale`]:
44///
45/// ```
46/// use icu::locid::{
47/// extensions::unicode::{key, value},
48/// Locale,
49/// };
50///
51/// let loc: Locale = "und-u-hc-h23-kc-true".parse().expect("Valid BCP-47");
52///
53/// assert_eq!(loc.extensions.unicode.keywords.get(&key!("ca")), None);
54/// assert_eq!(
55/// loc.extensions.unicode.keywords.get(&key!("hc")),
56/// Some(&value!("h23"))
57/// );
58/// assert_eq!(
59/// loc.extensions.unicode.keywords.get(&key!("kc")),
60/// Some(&value!("true"))
61/// );
62///
63/// assert_eq!(loc.extensions.unicode.keywords.to_string(), "hc-h23-kc");
64/// ```
65///
66/// [`Locale`]: crate::Locale
67#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
68pub struct Keywords(LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>);
69
70impl Keywords {
71 /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`.
72 ///
73 /// # Examples
74 ///
75 /// ```
76 /// use icu::locid::extensions::unicode::Keywords;
77 ///
78 /// assert_eq!(Keywords::new(), Keywords::default());
79 /// ```
80 #[inline]
81 pub const fn new() -> Self {
82 Self(LiteMap::new())
83 }
84
85 /// Create a new list of key-value pairs having exactly one pair, callable in a `const` context.
86 #[inline]
87 pub const fn new_single(key: Key, value: Value) -> Self {
88 Self(LiteMap::from_sorted_store_unchecked(
89 ShortBoxSlice::new_single((key, value)),
90 ))
91 }
92
93 /// Returns `true` if there are no keywords.
94 ///
95 /// # Examples
96 ///
97 /// ```
98 /// use icu::locid::locale;
99 /// use icu::locid::Locale;
100 ///
101 /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid").unwrap();
102 /// let loc2 = locale!("und-u-ca-buddhist");
103 ///
104 /// assert!(loc1.extensions.unicode.keywords.is_empty());
105 /// assert!(!loc2.extensions.unicode.keywords.is_empty());
106 /// ```
107 pub fn is_empty(&self) -> bool {
108 self.0.is_empty()
109 }
110
111 /// Returns `true` if the list contains a [`Value`] for the specified [`Key`].
112 ///
113 ///
114 /// # Examples
115 ///
116 /// ```
117 /// use icu::locid::extensions::unicode::{key, value, Keywords};
118 ///
119 /// let keywords = [(key!("ca"), value!("gregory"))]
120 /// .into_iter()
121 /// .collect::<Keywords>();
122 ///
123 /// assert!(&keywords.contains_key(&key!("ca")));
124 /// ```
125 pub fn contains_key<Q>(&self, key: &Q) -> bool
126 where
127 Key: Borrow<Q>,
128 Q: Ord,
129 {
130 self.0.contains_key(key)
131 }
132
133 /// Returns a reference to the [`Value`] corresponding to the [`Key`].
134 ///
135 ///
136 /// # Examples
137 ///
138 /// ```
139 /// use icu::locid::extensions::unicode::{key, value, Keywords};
140 ///
141 /// let keywords = [(key!("ca"), value!("buddhist"))]
142 /// .into_iter()
143 /// .collect::<Keywords>();
144 ///
145 /// assert_eq!(keywords.get(&key!("ca")), Some(&value!("buddhist")));
146 /// ```
147 pub fn get<Q>(&self, key: &Q) -> Option<&Value>
148 where
149 Key: Borrow<Q>,
150 Q: Ord,
151 {
152 self.0.get(key)
153 }
154
155 /// Returns a mutable reference to the [`Value`] corresponding to the [`Key`].
156 ///
157 /// Returns `None` if the key doesn't exist or if the key has no value.
158 ///
159 /// # Examples
160 ///
161 /// ```
162 /// use icu::locid::extensions::unicode::{key, value, Keywords};
163 ///
164 /// let mut keywords = [(key!("ca"), value!("buddhist"))]
165 /// .into_iter()
166 /// .collect::<Keywords>();
167 ///
168 /// if let Some(value) = keywords.get_mut(&key!("ca")) {
169 /// *value = value!("gregory");
170 /// }
171 /// assert_eq!(keywords.get(&key!("ca")), Some(&value!("gregory")));
172 /// ```
173 pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut Value>
174 where
175 Key: Borrow<Q>,
176 Q: Ord,
177 {
178 self.0.get_mut(key)
179 }
180
181 /// Sets the specified keyword, returning the old value if it already existed.
182 ///
183 /// # Examples
184 ///
185 /// ```
186 /// use icu::locid::extensions::unicode::{key, value};
187 /// use icu::locid::Locale;
188 ///
189 /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
190 /// .parse()
191 /// .expect("valid BCP-47 identifier");
192 /// let old_value = loc
193 /// .extensions
194 /// .unicode
195 /// .keywords
196 /// .set(key!("ca"), value!("japanese"));
197 ///
198 /// assert_eq!(old_value, Some(value!("buddhist")));
199 /// assert_eq!(loc, "und-u-hello-ca-japanese-hc-h12".parse().unwrap());
200 /// ```
201 pub fn set(&mut self, key: Key, value: Value) -> Option<Value> {
202 self.0.insert(key, value)
203 }
204
205 /// Removes the specified keyword, returning the old value if it existed.
206 ///
207 /// # Examples
208 ///
209 /// ```
210 /// use icu::locid::extensions::unicode::key;
211 /// use icu::locid::Locale;
212 ///
213 /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
214 /// .parse()
215 /// .expect("valid BCP-47 identifier");
216 /// loc.extensions.unicode.keywords.remove(key!("ca"));
217 /// assert_eq!(loc, "und-u-hello-hc-h12".parse().unwrap());
218 /// ```
219 pub fn remove<Q: Borrow<Key>>(&mut self, key: Q) -> Option<Value> {
220 self.0.remove(key.borrow())
221 }
222
223 /// Clears all Unicode extension keywords, leaving Unicode attributes.
224 ///
225 /// Returns the old Unicode extension keywords.
226 ///
227 /// # Example
228 ///
229 /// ```
230 /// use icu::locid::Locale;
231 ///
232 /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12".parse().unwrap();
233 /// loc.extensions.unicode.keywords.clear();
234 /// assert_eq!(loc, "und-u-hello".parse().unwrap());
235 /// ```
236 pub fn clear(&mut self) -> Self {
237 core::mem::take(self)
238 }
239
240 /// Retains a subset of keywords as specified by the predicate function.
241 ///
242 /// # Examples
243 ///
244 /// ```
245 /// use icu::locid::extensions::unicode::key;
246 /// use icu::locid::Locale;
247 ///
248 /// let mut loc: Locale = "und-u-ca-buddhist-hc-h12-ms-metric".parse().unwrap();
249 ///
250 /// loc.extensions
251 /// .unicode
252 /// .keywords
253 /// .retain_by_key(|&k| k == key!("hc"));
254 /// assert_eq!(loc, "und-u-hc-h12".parse().unwrap());
255 ///
256 /// loc.extensions
257 /// .unicode
258 /// .keywords
259 /// .retain_by_key(|&k| k == key!("ms"));
260 /// assert_eq!(loc, Locale::UND);
261 /// ```
262 pub fn retain_by_key<F>(&mut self, mut predicate: F)
263 where
264 F: FnMut(&Key) -> bool,
265 {
266 self.0.retain(|k, _| predicate(k))
267 }
268
269 /// Compare this [`Keywords`] with BCP-47 bytes.
270 ///
271 /// The return value is equivalent to what would happen if you first converted this
272 /// [`Keywords`] to a BCP-47 string and then performed a byte comparison.
273 ///
274 /// This function is case-sensitive and results in a *total order*, so it is appropriate for
275 /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
276 ///
277 /// # Examples
278 ///
279 /// ```
280 /// use icu::locid::Locale;
281 /// use std::cmp::Ordering;
282 ///
283 /// let bcp47_strings: &[&str] =
284 /// &["ca-hebrew", "ca-japanese", "ca-japanese-nu-latn", "nu-latn"];
285 ///
286 /// for ab in bcp47_strings.windows(2) {
287 /// let a = ab[0];
288 /// let b = ab[1];
289 /// assert!(a.cmp(b) == Ordering::Less);
290 /// let a_kwds = format!("und-u-{}", a)
291 /// .parse::<Locale>()
292 /// .unwrap()
293 /// .extensions
294 /// .unicode
295 /// .keywords;
296 /// assert!(a_kwds.strict_cmp(a.as_bytes()) == Ordering::Equal);
297 /// assert!(a_kwds.strict_cmp(b.as_bytes()) == Ordering::Less);
298 /// }
299 /// ```
300 pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
301 self.writeable_cmp_bytes(other)
302 }
303
304 /// Compare this [`Keywords`] with an iterator of BCP-47 subtags.
305 ///
306 /// This function has the same equality semantics as [`Keywords::strict_cmp`]. It is intended as
307 /// a more modular version that allows multiple subtag iterators to be chained together.
308 ///
309 /// For an additional example, see [`SubtagOrderingResult`].
310 ///
311 /// # Examples
312 ///
313 /// ```
314 /// use icu::locid::locale;
315 /// use std::cmp::Ordering;
316 ///
317 /// let subtags: &[&[u8]] = &[b"ca", b"buddhist"];
318 ///
319 /// let kwds = locale!("und-u-ca-buddhist").extensions.unicode.keywords;
320 /// assert_eq!(
321 /// Ordering::Equal,
322 /// kwds.strict_cmp_iter(subtags.iter().copied()).end()
323 /// );
324 ///
325 /// let kwds = locale!("und").extensions.unicode.keywords;
326 /// assert_eq!(
327 /// Ordering::Less,
328 /// kwds.strict_cmp_iter(subtags.iter().copied()).end()
329 /// );
330 ///
331 /// let kwds = locale!("und-u-nu-latn").extensions.unicode.keywords;
332 /// assert_eq!(
333 /// Ordering::Greater,
334 /// kwds.strict_cmp_iter(subtags.iter().copied()).end()
335 /// );
336 /// ```
337 #[deprecated(since = "1.5.0", note = "if you need this, please file an issue")]
338 #[allow(deprecated)]
339 pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
340 where
341 I: Iterator<Item = &'l [u8]>,
342 {
343 let r = self.for_each_subtag_str(&mut |subtag| {
344 if let Some(other) = subtags.next() {
345 match subtag.as_bytes().cmp(other) {
346 Ordering::Equal => Ok(()),
347 not_equal => Err(not_equal),
348 }
349 } else {
350 Err(Ordering::Greater)
351 }
352 });
353 match r {
354 Ok(_) => SubtagOrderingResult::Subtags(subtags),
355 Err(o) => SubtagOrderingResult::Ordering(o),
356 }
357 }
358
359 pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
360 where
361 F: FnMut(&str) -> Result<(), E>,
362 {
363 for (k, v) in self.0.iter() {
364 f(k.as_str())?;
365 v.for_each_subtag_str(f)?;
366 }
367 Ok(())
368 }
369
370 /// This needs to be its own method to help with type inference in helpers.rs
371 #[cfg(test)]
372 pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self {
373 v.into_iter().collect()
374 }
375}
376
377impl From<LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>> for Keywords {
378 fn from(map: LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>) -> Self {
379 Self(map)
380 }
381}
382
383impl FromIterator<(Key, Value)> for Keywords {
384 fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self {
385 LiteMap::from_iter(iter).into()
386 }
387}
388
389impl_writeable_for_key_value!(Keywords, "ca", "islamic-civil", "mm", "mm");