icu_locid/extensions/unicode/mod.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Unicode Extensions provide information about user preferences in a given locale.
6//!
7//! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and
8//! [`Attributes`].
9//!
10//!
11//! # Examples
12//!
13//! ```
14//! use icu::locid::extensions::unicode::{attribute, key, value, Unicode};
15//! use icu::locid::Locale;
16//!
17//! let loc: Locale = "en-US-u-foobar-hc-h12".parse().expect("Parsing failed.");
18//!
19//! assert_eq!(
20//! loc.extensions.unicode.keywords.get(&key!("hc")),
21//! Some(&value!("h12"))
22//! );
23//! assert!(loc
24//! .extensions
25//! .unicode
26//! .attributes
27//! .contains(&attribute!("foobar")));
28//! ```
29mod attribute;
30mod attributes;
31mod key;
32mod keywords;
33mod value;
34
35use core::cmp::Ordering;
36
37#[doc(inline)]
38pub use attribute::{attribute, Attribute};
39pub use attributes::Attributes;
40#[doc(inline)]
41pub use key::{key, Key};
42pub use keywords::Keywords;
43#[doc(inline)]
44pub use value::{value, Value};
45
46use crate::parser::ParserError;
47use crate::parser::SubtagIterator;
48use crate::shortvec::ShortBoxSlice;
49use litemap::LiteMap;
50
51/// Unicode Extensions provide information about user preferences in a given locale.
52///
53/// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale
54/// Identifier`] specification.
55///
56/// Unicode extensions provide subtags that specify language and/or locale-based behavior
57/// or refinements to language tags, according to work done by the Unicode Consortium.
58/// (See [`RFC 6067`] for details).
59///
60/// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension
61/// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt
62/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
63///
64/// # Examples
65///
66/// ```
67/// use icu::locid::extensions::unicode::{key, value};
68/// use icu::locid::Locale;
69///
70/// let loc: Locale =
71/// "de-u-hc-h12-ca-buddhist".parse().expect("Parsing failed.");
72///
73/// assert_eq!(
74/// loc.extensions.unicode.keywords.get(&key!("ca")),
75/// Some(&value!("buddhist"))
76/// );
77/// ```
78#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
79#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
80pub struct Unicode {
81 /// The key-value pairs present in this locale extension, with each extension key subtag
82 /// associated to its provided value subtag.
83 pub keywords: Keywords,
84 /// A canonically ordered sequence of single standalone subtags for this locale extension.
85 pub attributes: Attributes,
86}
87
88impl Unicode {
89 /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`.
90 ///
91 /// # Examples
92 ///
93 /// ```
94 /// use icu::locid::extensions::unicode::Unicode;
95 ///
96 /// assert_eq!(Unicode::new(), Unicode::default());
97 /// ```
98 #[inline]
99 pub const fn new() -> Self {
100 Self {
101 keywords: Keywords::new(),
102 attributes: Attributes::new(),
103 }
104 }
105
106 /// Returns [`true`] if there list of keywords and attributes is empty.
107 ///
108 /// # Examples
109 ///
110 /// ```
111 /// use icu::locid::Locale;
112 ///
113 /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed.");
114 ///
115 /// assert!(!loc.extensions.unicode.is_empty());
116 /// ```
117 pub fn is_empty(&self) -> bool {
118 self.keywords.is_empty() && self.attributes.is_empty()
119 }
120
121 /// Clears all Unicode extension keywords and attributes, effectively removing
122 /// the Unicode extension.
123 ///
124 /// # Example
125 ///
126 /// ```
127 /// use icu::locid::Locale;
128 ///
129 /// let mut loc: Locale =
130 /// "und-t-mul-u-hello-ca-buddhist-hc-h12".parse().unwrap();
131 /// loc.extensions.unicode.clear();
132 /// assert_eq!(loc, "und-t-mul".parse().unwrap());
133 /// ```
134 pub fn clear(&mut self) {
135 self.keywords.clear();
136 self.attributes.clear();
137 }
138
139 pub(crate) fn as_tuple(&self) -> (&Attributes, &Keywords) {
140 (&self.attributes, &self.keywords)
141 }
142
143 /// Returns an ordering suitable for use in [`BTreeSet`].
144 ///
145 /// The ordering may or may not be equivalent to string ordering, and it
146 /// may or may not be stable across ICU4X releases.
147 ///
148 /// [`BTreeSet`]: alloc::collections::BTreeSet
149 pub fn total_cmp(&self, other: &Self) -> Ordering {
150 self.as_tuple().cmp(&other.as_tuple())
151 }
152
153 pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
154 let mut attributes = ShortBoxSlice::new();
155
156 while let Some(subtag) = iter.peek() {
157 if let Ok(attr) = Attribute::try_from_bytes(subtag) {
158 if let Err(idx) = attributes.binary_search(&attr) {
159 attributes.insert(idx, attr);
160 }
161 } else {
162 break;
163 }
164 iter.next();
165 }
166
167 let mut keywords = LiteMap::new();
168
169 let mut current_keyword = None;
170 let mut current_value = ShortBoxSlice::new();
171
172 while let Some(subtag) = iter.peek() {
173 let slen = subtag.len();
174 if slen == 2 {
175 if let Some(kw) = current_keyword.take() {
176 keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
177 current_value = ShortBoxSlice::new();
178 }
179 current_keyword = Some(Key::try_from_bytes(subtag)?);
180 } else if current_keyword.is_some() {
181 match Value::parse_subtag(subtag) {
182 Ok(Some(t)) => current_value.push(t),
183 Ok(None) => {}
184 Err(_) => break,
185 }
186 } else {
187 break;
188 }
189 iter.next();
190 }
191
192 if let Some(kw) = current_keyword.take() {
193 keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
194 }
195
196 // Ensure we've defined at least one attribute or keyword
197 if attributes.is_empty() && keywords.is_empty() {
198 return Err(ParserError::InvalidExtension);
199 }
200
201 Ok(Self {
202 keywords: keywords.into(),
203 attributes: Attributes::from_short_slice_unchecked(attributes),
204 })
205 }
206
207 pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
208 where
209 F: FnMut(&str) -> Result<(), E>,
210 {
211 if self.is_empty() {
212 return Ok(());
213 }
214 f("u")?;
215 self.attributes.for_each_subtag_str(f)?;
216 self.keywords.for_each_subtag_str(f)?;
217 Ok(())
218 }
219}
220
221writeable::impl_display_with_writeable!(Unicode);
222
223impl writeable::Writeable for Unicode {
224 fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
225 if self.is_empty() {
226 return Ok(());
227 }
228 sink.write_str("u")?;
229 if !self.attributes.is_empty() {
230 sink.write_char('-')?;
231 writeable::Writeable::write_to(&self.attributes, sink)?;
232 }
233 if !self.keywords.is_empty() {
234 sink.write_char('-')?;
235 writeable::Writeable::write_to(&self.keywords, sink)?;
236 }
237 Ok(())
238 }
239
240 fn writeable_length_hint(&self) -> writeable::LengthHint {
241 if self.is_empty() {
242 return writeable::LengthHint::exact(0);
243 }
244 let mut result = writeable::LengthHint::exact(1);
245 if !self.attributes.is_empty() {
246 result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1;
247 }
248 if !self.keywords.is_empty() {
249 result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1;
250 }
251 result
252 }
253}