icu_locid/extensions/unicode/
mod.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Unicode Extensions provide information about user preferences in a given locale.
6//!
7//! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and
8//! [`Attributes`].
9//!
10//!
11//! # Examples
12//!
13//! ```
14//! use icu::locid::extensions::unicode::{attribute, key, value, Unicode};
15//! use icu::locid::Locale;
16//!
17//! let loc: Locale = "en-US-u-foobar-hc-h12".parse().expect("Parsing failed.");
18//!
19//! assert_eq!(
20//!     loc.extensions.unicode.keywords.get(&key!("hc")),
21//!     Some(&value!("h12"))
22//! );
23//! assert!(loc
24//!     .extensions
25//!     .unicode
26//!     .attributes
27//!     .contains(&attribute!("foobar")));
28//! ```
29mod attribute;
30mod attributes;
31mod key;
32mod keywords;
33mod value;
34
35use core::cmp::Ordering;
36
37#[doc(inline)]
38pub use attribute::{attribute, Attribute};
39pub use attributes::Attributes;
40#[doc(inline)]
41pub use key::{key, Key};
42pub use keywords::Keywords;
43#[doc(inline)]
44pub use value::{value, Value};
45
46use crate::parser::ParserError;
47use crate::parser::SubtagIterator;
48use crate::shortvec::ShortBoxSlice;
49use litemap::LiteMap;
50
51/// Unicode Extensions provide information about user preferences in a given locale.
52///
53/// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale
54/// Identifier`] specification.
55///
56/// Unicode extensions provide subtags that specify language and/or locale-based behavior
57/// or refinements to language tags, according to work done by the Unicode Consortium.
58/// (See [`RFC 6067`] for details).
59///
60/// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension
61/// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt
62/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
63///
64/// # Examples
65///
66/// ```
67/// use icu::locid::extensions::unicode::{key, value};
68/// use icu::locid::Locale;
69///
70/// let loc: Locale =
71///     "de-u-hc-h12-ca-buddhist".parse().expect("Parsing failed.");
72///
73/// assert_eq!(
74///     loc.extensions.unicode.keywords.get(&key!("ca")),
75///     Some(&value!("buddhist"))
76/// );
77/// ```
78#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
79#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
80pub struct Unicode {
81    /// The key-value pairs present in this locale extension, with each extension key subtag
82    /// associated to its provided value subtag.
83    pub keywords: Keywords,
84    /// A canonically ordered sequence of single standalone subtags for this locale extension.
85    pub attributes: Attributes,
86}
87
88impl Unicode {
89    /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`.
90    ///
91    /// # Examples
92    ///
93    /// ```
94    /// use icu::locid::extensions::unicode::Unicode;
95    ///
96    /// assert_eq!(Unicode::new(), Unicode::default());
97    /// ```
98    #[inline]
99    pub const fn new() -> Self {
100        Self {
101            keywords: Keywords::new(),
102            attributes: Attributes::new(),
103        }
104    }
105
106    /// Returns [`true`] if there list of keywords and attributes is empty.
107    ///
108    /// # Examples
109    ///
110    /// ```
111    /// use icu::locid::Locale;
112    ///
113    /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed.");
114    ///
115    /// assert!(!loc.extensions.unicode.is_empty());
116    /// ```
117    pub fn is_empty(&self) -> bool {
118        self.keywords.is_empty() && self.attributes.is_empty()
119    }
120
121    /// Clears all Unicode extension keywords and attributes, effectively removing
122    /// the Unicode extension.
123    ///
124    /// # Example
125    ///
126    /// ```
127    /// use icu::locid::Locale;
128    ///
129    /// let mut loc: Locale =
130    ///     "und-t-mul-u-hello-ca-buddhist-hc-h12".parse().unwrap();
131    /// loc.extensions.unicode.clear();
132    /// assert_eq!(loc, "und-t-mul".parse().unwrap());
133    /// ```
134    pub fn clear(&mut self) {
135        self.keywords.clear();
136        self.attributes.clear();
137    }
138
139    pub(crate) fn as_tuple(&self) -> (&Attributes, &Keywords) {
140        (&self.attributes, &self.keywords)
141    }
142
143    /// Returns an ordering suitable for use in [`BTreeSet`].
144    ///
145    /// The ordering may or may not be equivalent to string ordering, and it
146    /// may or may not be stable across ICU4X releases.
147    ///
148    /// [`BTreeSet`]: alloc::collections::BTreeSet
149    pub fn total_cmp(&self, other: &Self) -> Ordering {
150        self.as_tuple().cmp(&other.as_tuple())
151    }
152
153    pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
154        let mut attributes = ShortBoxSlice::new();
155
156        while let Some(subtag) = iter.peek() {
157            if let Ok(attr) = Attribute::try_from_bytes(subtag) {
158                if let Err(idx) = attributes.binary_search(&attr) {
159                    attributes.insert(idx, attr);
160                }
161            } else {
162                break;
163            }
164            iter.next();
165        }
166
167        let mut keywords = LiteMap::new();
168
169        let mut current_keyword = None;
170        let mut current_value = ShortBoxSlice::new();
171
172        while let Some(subtag) = iter.peek() {
173            let slen = subtag.len();
174            if slen == 2 {
175                if let Some(kw) = current_keyword.take() {
176                    keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
177                    current_value = ShortBoxSlice::new();
178                }
179                current_keyword = Some(Key::try_from_bytes(subtag)?);
180            } else if current_keyword.is_some() {
181                match Value::parse_subtag(subtag) {
182                    Ok(Some(t)) => current_value.push(t),
183                    Ok(None) => {}
184                    Err(_) => break,
185                }
186            } else {
187                break;
188            }
189            iter.next();
190        }
191
192        if let Some(kw) = current_keyword.take() {
193            keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
194        }
195
196        // Ensure we've defined at least one attribute or keyword
197        if attributes.is_empty() && keywords.is_empty() {
198            return Err(ParserError::InvalidExtension);
199        }
200
201        Ok(Self {
202            keywords: keywords.into(),
203            attributes: Attributes::from_short_slice_unchecked(attributes),
204        })
205    }
206
207    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
208    where
209        F: FnMut(&str) -> Result<(), E>,
210    {
211        if self.is_empty() {
212            return Ok(());
213        }
214        f("u")?;
215        self.attributes.for_each_subtag_str(f)?;
216        self.keywords.for_each_subtag_str(f)?;
217        Ok(())
218    }
219}
220
221writeable::impl_display_with_writeable!(Unicode);
222
223impl writeable::Writeable for Unicode {
224    fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
225        if self.is_empty() {
226            return Ok(());
227        }
228        sink.write_str("u")?;
229        if !self.attributes.is_empty() {
230            sink.write_char('-')?;
231            writeable::Writeable::write_to(&self.attributes, sink)?;
232        }
233        if !self.keywords.is_empty() {
234            sink.write_char('-')?;
235            writeable::Writeable::write_to(&self.keywords, sink)?;
236        }
237        Ok(())
238    }
239
240    fn writeable_length_hint(&self) -> writeable::LengthHint {
241        if self.is_empty() {
242            return writeable::LengthHint::exact(0);
243        }
244        let mut result = writeable::LengthHint::exact(1);
245        if !self.attributes.is_empty() {
246            result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1;
247        }
248        if !self.keywords.is_empty() {
249            result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1;
250        }
251        result
252    }
253}