icu_locid/extensions/unicode/
keywords.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use core::borrow::Borrow;
6use core::cmp::Ordering;
7use core::iter::FromIterator;
8use litemap::LiteMap;
9use writeable::Writeable;
10
11use super::Key;
12use super::Value;
13#[allow(deprecated)]
14use crate::ordering::SubtagOrderingResult;
15use crate::shortvec::ShortBoxSlice;
16
17/// A list of [`Key`]-[`Value`] pairs representing functional information
18/// about locale's internationalization preferences.
19///
20/// Here are examples of fields used in Unicode:
21/// - `hc` - Hour Cycle (`h11`, `h12`, `h23`, `h24`)
22/// - `ca` - Calendar (`buddhist`, `gregory`, ...)
23/// - `fw` - First Day Of the Week (`sun`, `mon`, `sat`, ...)
24///
25/// You can find the full list in [`Unicode BCP 47 U Extension`] section of LDML.
26///
27/// [`Unicode BCP 47 U Extension`]: https://unicode.org/reports/tr35/tr35.html#Key_And_Type_Definitions_
28///
29/// # Examples
30///
31/// Manually build up a [`Keywords`] object:
32///
33/// ```
34/// use icu::locid::extensions::unicode::{key, value, Keywords};
35///
36/// let keywords = [(key!("hc"), value!("h23"))]
37///     .into_iter()
38///     .collect::<Keywords>();
39///
40/// assert_eq!(&keywords.to_string(), "hc-h23");
41/// ```
42///
43/// Access a [`Keywords`] object from a [`Locale`]:
44///
45/// ```
46/// use icu::locid::{
47///     extensions::unicode::{key, value},
48///     Locale,
49/// };
50///
51/// let loc: Locale = "und-u-hc-h23-kc-true".parse().expect("Valid BCP-47");
52///
53/// assert_eq!(loc.extensions.unicode.keywords.get(&key!("ca")), None);
54/// assert_eq!(
55///     loc.extensions.unicode.keywords.get(&key!("hc")),
56///     Some(&value!("h23"))
57/// );
58/// assert_eq!(
59///     loc.extensions.unicode.keywords.get(&key!("kc")),
60///     Some(&value!("true"))
61/// );
62///
63/// assert_eq!(loc.extensions.unicode.keywords.to_string(), "hc-h23-kc");
64/// ```
65///
66/// [`Locale`]: crate::Locale
67#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
68pub struct Keywords(LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>);
69
70impl Keywords {
71    /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`.
72    ///
73    /// # Examples
74    ///
75    /// ```
76    /// use icu::locid::extensions::unicode::Keywords;
77    ///
78    /// assert_eq!(Keywords::new(), Keywords::default());
79    /// ```
80    #[inline]
81    pub const fn new() -> Self {
82        Self(LiteMap::new())
83    }
84
85    /// Create a new list of key-value pairs having exactly one pair, callable in a `const` context.
86    #[inline]
87    pub const fn new_single(key: Key, value: Value) -> Self {
88        Self(LiteMap::from_sorted_store_unchecked(
89            ShortBoxSlice::new_single((key, value)),
90        ))
91    }
92
93    /// Returns `true` if there are no keywords.
94    ///
95    /// # Examples
96    ///
97    /// ```
98    /// use icu::locid::locale;
99    /// use icu::locid::Locale;
100    ///
101    /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid").unwrap();
102    /// let loc2 = locale!("und-u-ca-buddhist");
103    ///
104    /// assert!(loc1.extensions.unicode.keywords.is_empty());
105    /// assert!(!loc2.extensions.unicode.keywords.is_empty());
106    /// ```
107    pub fn is_empty(&self) -> bool {
108        self.0.is_empty()
109    }
110
111    /// Returns `true` if the list contains a [`Value`] for the specified [`Key`].
112    ///
113    ///
114    /// # Examples
115    ///
116    /// ```
117    /// use icu::locid::extensions::unicode::{key, value, Keywords};
118    ///
119    /// let keywords = [(key!("ca"), value!("gregory"))]
120    ///     .into_iter()
121    ///     .collect::<Keywords>();
122    ///
123    /// assert!(&keywords.contains_key(&key!("ca")));
124    /// ```
125    pub fn contains_key<Q>(&self, key: &Q) -> bool
126    where
127        Key: Borrow<Q>,
128        Q: Ord,
129    {
130        self.0.contains_key(key)
131    }
132
133    /// Returns a reference to the [`Value`] corresponding to the [`Key`].
134    ///
135    ///
136    /// # Examples
137    ///
138    /// ```
139    /// use icu::locid::extensions::unicode::{key, value, Keywords};
140    ///
141    /// let keywords = [(key!("ca"), value!("buddhist"))]
142    ///     .into_iter()
143    ///     .collect::<Keywords>();
144    ///
145    /// assert_eq!(keywords.get(&key!("ca")), Some(&value!("buddhist")));
146    /// ```
147    pub fn get<Q>(&self, key: &Q) -> Option<&Value>
148    where
149        Key: Borrow<Q>,
150        Q: Ord,
151    {
152        self.0.get(key)
153    }
154
155    /// Returns a mutable reference to the [`Value`] corresponding to the [`Key`].
156    ///
157    /// Returns `None` if the key doesn't exist or if the key has no value.
158    ///
159    /// # Examples
160    ///
161    /// ```
162    /// use icu::locid::extensions::unicode::{key, value, Keywords};
163    ///
164    /// let mut keywords = [(key!("ca"), value!("buddhist"))]
165    ///     .into_iter()
166    ///     .collect::<Keywords>();
167    ///
168    /// if let Some(value) = keywords.get_mut(&key!("ca")) {
169    ///     *value = value!("gregory");
170    /// }
171    /// assert_eq!(keywords.get(&key!("ca")), Some(&value!("gregory")));
172    /// ```
173    pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut Value>
174    where
175        Key: Borrow<Q>,
176        Q: Ord,
177    {
178        self.0.get_mut(key)
179    }
180
181    /// Sets the specified keyword, returning the old value if it already existed.
182    ///
183    /// # Examples
184    ///
185    /// ```
186    /// use icu::locid::extensions::unicode::{key, value};
187    /// use icu::locid::Locale;
188    ///
189    /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
190    ///     .parse()
191    ///     .expect("valid BCP-47 identifier");
192    /// let old_value = loc
193    ///     .extensions
194    ///     .unicode
195    ///     .keywords
196    ///     .set(key!("ca"), value!("japanese"));
197    ///
198    /// assert_eq!(old_value, Some(value!("buddhist")));
199    /// assert_eq!(loc, "und-u-hello-ca-japanese-hc-h12".parse().unwrap());
200    /// ```
201    pub fn set(&mut self, key: Key, value: Value) -> Option<Value> {
202        self.0.insert(key, value)
203    }
204
205    /// Removes the specified keyword, returning the old value if it existed.
206    ///
207    /// # Examples
208    ///
209    /// ```
210    /// use icu::locid::extensions::unicode::key;
211    /// use icu::locid::Locale;
212    ///
213    /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
214    ///     .parse()
215    ///     .expect("valid BCP-47 identifier");
216    /// loc.extensions.unicode.keywords.remove(key!("ca"));
217    /// assert_eq!(loc, "und-u-hello-hc-h12".parse().unwrap());
218    /// ```
219    pub fn remove<Q: Borrow<Key>>(&mut self, key: Q) -> Option<Value> {
220        self.0.remove(key.borrow())
221    }
222
223    /// Clears all Unicode extension keywords, leaving Unicode attributes.
224    ///
225    /// Returns the old Unicode extension keywords.
226    ///
227    /// # Example
228    ///
229    /// ```
230    /// use icu::locid::Locale;
231    ///
232    /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12".parse().unwrap();
233    /// loc.extensions.unicode.keywords.clear();
234    /// assert_eq!(loc, "und-u-hello".parse().unwrap());
235    /// ```
236    pub fn clear(&mut self) -> Self {
237        core::mem::take(self)
238    }
239
240    /// Retains a subset of keywords as specified by the predicate function.
241    ///
242    /// # Examples
243    ///
244    /// ```
245    /// use icu::locid::extensions::unicode::key;
246    /// use icu::locid::Locale;
247    ///
248    /// let mut loc: Locale = "und-u-ca-buddhist-hc-h12-ms-metric".parse().unwrap();
249    ///
250    /// loc.extensions
251    ///     .unicode
252    ///     .keywords
253    ///     .retain_by_key(|&k| k == key!("hc"));
254    /// assert_eq!(loc, "und-u-hc-h12".parse().unwrap());
255    ///
256    /// loc.extensions
257    ///     .unicode
258    ///     .keywords
259    ///     .retain_by_key(|&k| k == key!("ms"));
260    /// assert_eq!(loc, Locale::UND);
261    /// ```
262    pub fn retain_by_key<F>(&mut self, mut predicate: F)
263    where
264        F: FnMut(&Key) -> bool,
265    {
266        self.0.retain(|k, _| predicate(k))
267    }
268
269    /// Compare this [`Keywords`] with BCP-47 bytes.
270    ///
271    /// The return value is equivalent to what would happen if you first converted this
272    /// [`Keywords`] to a BCP-47 string and then performed a byte comparison.
273    ///
274    /// This function is case-sensitive and results in a *total order*, so it is appropriate for
275    /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
276    ///
277    /// # Examples
278    ///
279    /// ```
280    /// use icu::locid::Locale;
281    /// use std::cmp::Ordering;
282    ///
283    /// let bcp47_strings: &[&str] =
284    ///     &["ca-hebrew", "ca-japanese", "ca-japanese-nu-latn", "nu-latn"];
285    ///
286    /// for ab in bcp47_strings.windows(2) {
287    ///     let a = ab[0];
288    ///     let b = ab[1];
289    ///     assert!(a.cmp(b) == Ordering::Less);
290    ///     let a_kwds = format!("und-u-{}", a)
291    ///         .parse::<Locale>()
292    ///         .unwrap()
293    ///         .extensions
294    ///         .unicode
295    ///         .keywords;
296    ///     assert!(a_kwds.strict_cmp(a.as_bytes()) == Ordering::Equal);
297    ///     assert!(a_kwds.strict_cmp(b.as_bytes()) == Ordering::Less);
298    /// }
299    /// ```
300    pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
301        self.writeable_cmp_bytes(other)
302    }
303
304    /// Compare this [`Keywords`] with an iterator of BCP-47 subtags.
305    ///
306    /// This function has the same equality semantics as [`Keywords::strict_cmp`]. It is intended as
307    /// a more modular version that allows multiple subtag iterators to be chained together.
308    ///
309    /// For an additional example, see [`SubtagOrderingResult`].
310    ///
311    /// # Examples
312    ///
313    /// ```
314    /// use icu::locid::locale;
315    /// use std::cmp::Ordering;
316    ///
317    /// let subtags: &[&[u8]] = &[b"ca", b"buddhist"];
318    ///
319    /// let kwds = locale!("und-u-ca-buddhist").extensions.unicode.keywords;
320    /// assert_eq!(
321    ///     Ordering::Equal,
322    ///     kwds.strict_cmp_iter(subtags.iter().copied()).end()
323    /// );
324    ///
325    /// let kwds = locale!("und").extensions.unicode.keywords;
326    /// assert_eq!(
327    ///     Ordering::Less,
328    ///     kwds.strict_cmp_iter(subtags.iter().copied()).end()
329    /// );
330    ///
331    /// let kwds = locale!("und-u-nu-latn").extensions.unicode.keywords;
332    /// assert_eq!(
333    ///     Ordering::Greater,
334    ///     kwds.strict_cmp_iter(subtags.iter().copied()).end()
335    /// );
336    /// ```
337    #[deprecated(since = "1.5.0", note = "if you need this, please file an issue")]
338    #[allow(deprecated)]
339    pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
340    where
341        I: Iterator<Item = &'l [u8]>,
342    {
343        let r = self.for_each_subtag_str(&mut |subtag| {
344            if let Some(other) = subtags.next() {
345                match subtag.as_bytes().cmp(other) {
346                    Ordering::Equal => Ok(()),
347                    not_equal => Err(not_equal),
348                }
349            } else {
350                Err(Ordering::Greater)
351            }
352        });
353        match r {
354            Ok(_) => SubtagOrderingResult::Subtags(subtags),
355            Err(o) => SubtagOrderingResult::Ordering(o),
356        }
357    }
358
359    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
360    where
361        F: FnMut(&str) -> Result<(), E>,
362    {
363        for (k, v) in self.0.iter() {
364            f(k.as_str())?;
365            v.for_each_subtag_str(f)?;
366        }
367        Ok(())
368    }
369
370    /// This needs to be its own method to help with type inference in helpers.rs
371    #[cfg(test)]
372    pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self {
373        v.into_iter().collect()
374    }
375}
376
377impl From<LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>> for Keywords {
378    fn from(map: LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>) -> Self {
379        Self(map)
380    }
381}
382
383impl FromIterator<(Key, Value)> for Keywords {
384    fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self {
385        LiteMap::from_iter(iter).into()
386    }
387}
388
389impl_writeable_for_key_value!(Keywords, "ca", "islamic-civil", "mm", "mm");