icu_provider/request.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::{DataError, DataErrorKind};
6use core::cmp::Ordering;
7use core::default::Default;
8use core::fmt;
9use core::fmt::Debug;
10use core::hash::Hash;
11use core::str::FromStr;
12use icu_locid::extensions::unicode as unicode_ext;
13use icu_locid::subtags::{Language, Region, Script, Variants};
14use icu_locid::{LanguageIdentifier, Locale};
15use writeable::{LengthHint, Writeable};
16
17#[cfg(feature = "experimental")]
18use alloc::string::String;
19#[cfg(feature = "experimental")]
20use core::ops::Deref;
21#[cfg(feature = "experimental")]
22use icu_locid::extensions::private::Subtag;
23#[cfg(feature = "experimental")]
24use tinystr::TinyAsciiStr;
25
26#[cfg(doc)]
27use icu_locid::subtags::Variant;
28
29/// The request type passed into all data provider implementations.
30#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
31#[allow(clippy::exhaustive_structs)] // this type is stable
32pub struct DataRequest<'a> {
33 /// The locale for which to load data.
34 ///
35 /// If locale fallback is enabled, the resulting data may be from a different locale
36 /// than the one requested here.
37 pub locale: &'a DataLocale,
38 /// Metadata that may affect the behavior of the data provider.
39 pub metadata: DataRequestMetadata,
40}
41
42impl fmt::Display for DataRequest<'_> {
43 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
44 fmt::Display::fmt(&self.locale, f)
45 }
46}
47
48/// Metadata for data requests. This is currently empty, but it may be extended with options
49/// for tuning locale fallback, buffer layout, and so forth.
50#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
51#[non_exhaustive]
52pub struct DataRequestMetadata {
53 /// Silent requests do not log errors. This can be used for exploratory querying, such as fallbacks.
54 pub silent: bool,
55}
56
57/// A locale type optimized for use in fallbacking and the ICU4X data pipeline.
58///
59/// [`DataLocale`] contains less functionality than [`Locale`] but more than
60/// [`LanguageIdentifier`] for better size and performance while still meeting
61/// the needs of the ICU4X data pipeline.
62///
63/// # Examples
64///
65/// Convert a [`Locale`] to a [`DataLocale`] and back:
66///
67/// ```
68/// use icu_locid::locale;
69/// use icu_provider::DataLocale;
70///
71/// let locale = locale!("en-u-ca-buddhist");
72/// let data_locale = DataLocale::from(locale);
73/// let locale = data_locale.into_locale();
74///
75/// assert_eq!(locale, locale!("en-u-ca-buddhist"));
76/// ```
77///
78/// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more
79/// efficient than cloning the [`Locale`], but less efficient than converting an owned
80/// [`Locale`]:
81///
82/// ```
83/// use icu_locid::locale;
84/// use icu_provider::DataLocale;
85///
86/// let locale1 = locale!("en-u-ca-buddhist");
87/// let data_locale = DataLocale::from(&locale1);
88/// let locale2 = data_locale.into_locale();
89///
90/// assert_eq!(locale1, locale2);
91/// ```
92///
93/// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]:
94///
95/// ```
96/// use icu_locid::langid;
97/// use icu_provider::DataLocale;
98///
99/// let langid = langid!("es-CA-valencia");
100/// let data_locale = DataLocale::from(langid);
101/// let langid = data_locale.get_langid();
102///
103/// assert_eq!(langid, langid!("es-CA-valencia"));
104/// ```
105///
106/// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data
107/// lookup and fallback. This may change in the future.
108///
109/// ```
110/// use icu_locid::{locale, Locale};
111/// use icu_provider::DataLocale;
112///
113/// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist"
114/// .parse::<Locale>()
115/// .unwrap();
116/// let data_locale = DataLocale::from(locale);
117///
118/// assert_eq!(data_locale.into_locale(), locale!("hi-u-ca-buddhist"));
119/// ```
120#[derive(PartialEq, Clone, Default, Eq, Hash)]
121pub struct DataLocale {
122 langid: LanguageIdentifier,
123 keywords: unicode_ext::Keywords,
124 #[cfg(feature = "experimental")]
125 aux: Option<AuxiliaryKeys>,
126}
127
128impl<'a> Default for &'a DataLocale {
129 fn default() -> Self {
130 static DEFAULT: DataLocale = DataLocale {
131 langid: LanguageIdentifier::UND,
132 keywords: unicode_ext::Keywords::new(),
133 #[cfg(feature = "experimental")]
134 aux: None,
135 };
136 &DEFAULT
137 }
138}
139
140impl fmt::Debug for DataLocale {
141 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
142 write!(f, "DataLocale{{{self}}}")
143 }
144}
145
146impl Writeable for DataLocale {
147 fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
148 self.langid.write_to(sink)?;
149 if !self.keywords.is_empty() {
150 sink.write_str("-u-")?;
151 self.keywords.write_to(sink)?;
152 }
153 #[cfg(feature = "experimental")]
154 if let Some(aux) = self.aux.as_ref() {
155 sink.write_str("-x-")?;
156 aux.write_to(sink)?;
157 }
158 Ok(())
159 }
160
161 fn writeable_length_hint(&self) -> LengthHint {
162 let mut length_hint = self.langid.writeable_length_hint();
163 if !self.keywords.is_empty() {
164 length_hint += self.keywords.writeable_length_hint() + 3;
165 }
166 #[cfg(feature = "experimental")]
167 if let Some(aux) = self.aux.as_ref() {
168 length_hint += aux.writeable_length_hint() + 3;
169 }
170 length_hint
171 }
172
173 fn write_to_string(&self) -> alloc::borrow::Cow<str> {
174 #[cfg_attr(not(feature = "experimental"), allow(unused_mut))]
175 let mut is_only_langid = self.keywords.is_empty();
176 #[cfg(feature = "experimental")]
177 {
178 is_only_langid = is_only_langid && self.aux.is_none();
179 }
180 if is_only_langid {
181 return self.langid.write_to_string();
182 }
183 let mut string =
184 alloc::string::String::with_capacity(self.writeable_length_hint().capacity());
185 let _ = self.write_to(&mut string);
186 alloc::borrow::Cow::Owned(string)
187 }
188}
189
190writeable::impl_display_with_writeable!(DataLocale);
191
192impl From<LanguageIdentifier> for DataLocale {
193 fn from(langid: LanguageIdentifier) -> Self {
194 Self {
195 langid,
196 keywords: unicode_ext::Keywords::new(),
197 #[cfg(feature = "experimental")]
198 aux: None,
199 }
200 }
201}
202
203impl From<Locale> for DataLocale {
204 fn from(locale: Locale) -> Self {
205 Self {
206 langid: locale.id,
207 keywords: locale.extensions.unicode.keywords,
208 #[cfg(feature = "experimental")]
209 aux: AuxiliaryKeys::try_from_iter(locale.extensions.private.iter().copied()).ok(),
210 }
211 }
212}
213
214impl From<&LanguageIdentifier> for DataLocale {
215 fn from(langid: &LanguageIdentifier) -> Self {
216 Self {
217 langid: langid.clone(),
218 keywords: unicode_ext::Keywords::new(),
219 #[cfg(feature = "experimental")]
220 aux: None,
221 }
222 }
223}
224
225impl From<&Locale> for DataLocale {
226 fn from(locale: &Locale) -> Self {
227 Self {
228 langid: locale.id.clone(),
229 keywords: locale.extensions.unicode.keywords.clone(),
230 #[cfg(feature = "experimental")]
231 aux: AuxiliaryKeys::try_from_iter(locale.extensions.private.iter().copied()).ok(),
232 }
233 }
234}
235
236impl FromStr for DataLocale {
237 type Err = DataError;
238 fn from_str(s: &str) -> Result<Self, Self::Err> {
239 let locale = Locale::from_str(s).map_err(|e| {
240 DataErrorKind::KeyLocaleSyntax
241 .into_error()
242 .with_display_context(s)
243 .with_display_context(&e)
244 })?;
245 Ok(DataLocale::from(locale))
246 }
247}
248
249impl DataLocale {
250 /// Compare this [`DataLocale`] with BCP-47 bytes.
251 ///
252 /// The return value is equivalent to what would happen if you first converted this
253 /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison.
254 ///
255 /// This function is case-sensitive and results in a *total order*, so it is appropriate for
256 /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
257 ///
258 /// # Examples
259 ///
260 /// ```
261 /// use icu_provider::DataLocale;
262 /// use std::cmp::Ordering;
263 ///
264 /// let bcp47_strings: &[&str] = &[
265 /// "ca",
266 /// "ca-ES",
267 /// "ca-ES-u-ca-buddhist",
268 /// "ca-ES-valencia",
269 /// "ca-ES-x-gbp",
270 /// "ca-ES-x-gbp-short",
271 /// "ca-ES-x-usd",
272 /// "ca-ES-xyzabc",
273 /// "ca-x-eur",
274 /// "cat",
275 /// "pl-Latn-PL",
276 /// "und",
277 /// "und-fonipa",
278 /// "und-u-ca-hebrew",
279 /// "und-u-ca-japanese",
280 /// "und-x-mxn",
281 /// "zh",
282 /// ];
283 ///
284 /// for ab in bcp47_strings.windows(2) {
285 /// let a = ab[0];
286 /// let b = ab[1];
287 /// assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}", a, b);
288 /// let a_loc: DataLocale = a.parse().unwrap();
289 /// assert_eq!(
290 /// a_loc.strict_cmp(a.as_bytes()),
291 /// Ordering::Equal,
292 /// "strict_cmp: {} == {}",
293 /// a_loc,
294 /// a
295 /// );
296 /// assert_eq!(
297 /// a_loc.strict_cmp(b.as_bytes()),
298 /// Ordering::Less,
299 /// "strict_cmp: {} < {}",
300 /// a_loc,
301 /// b
302 /// );
303 /// let b_loc: DataLocale = b.parse().unwrap();
304 /// assert_eq!(
305 /// b_loc.strict_cmp(b.as_bytes()),
306 /// Ordering::Equal,
307 /// "strict_cmp: {} == {}",
308 /// b_loc,
309 /// b
310 /// );
311 /// assert_eq!(
312 /// b_loc.strict_cmp(a.as_bytes()),
313 /// Ordering::Greater,
314 /// "strict_cmp: {} > {}",
315 /// b_loc,
316 /// a
317 /// );
318 /// }
319 /// ```
320 ///
321 /// Comparison against invalid strings:
322 ///
323 /// ```
324 /// use icu_provider::DataLocale;
325 ///
326 /// let invalid_strings: &[&str] = &[
327 /// // Less than "ca-ES"
328 /// "CA",
329 /// "ar-x-gbp-FOO",
330 /// // Greater than "ca-ES-x-gbp"
331 /// "ca_ES",
332 /// "ca-ES-x-gbp-FOO",
333 /// ];
334 ///
335 /// let data_locale = "ca-ES-x-gbp".parse::<DataLocale>().unwrap();
336 ///
337 /// for s in invalid_strings.iter() {
338 /// let expected_ordering = "ca-ES-x-gbp".cmp(s);
339 /// let actual_ordering = data_locale.strict_cmp(s.as_bytes());
340 /// assert_eq!(expected_ordering, actual_ordering, "{}", s);
341 /// }
342 /// ```
343 pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
344 self.writeable_cmp_bytes(other)
345 }
346}
347
348impl DataLocale {
349 /// Returns whether this [`DataLocale`] has all empty fields (no components).
350 ///
351 /// See also:
352 ///
353 /// - [`DataLocale::is_und()`]
354 /// - [`DataLocale::is_langid_und()`]
355 ///
356 /// # Examples
357 ///
358 /// ```
359 /// use icu_provider::DataLocale;
360 ///
361 /// assert!("und".parse::<DataLocale>().unwrap().is_empty());
362 /// assert!(!"und-u-ca-buddhist"
363 /// .parse::<DataLocale>()
364 /// .unwrap()
365 /// .is_empty());
366 /// assert!(!"und-x-aux".parse::<DataLocale>().unwrap().is_empty());
367 /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_empty());
368 /// ```
369 pub fn is_empty(&self) -> bool {
370 self == <&DataLocale>::default()
371 }
372
373 /// Returns an ordering suitable for use in [`BTreeSet`].
374 ///
375 /// The ordering may or may not be equivalent to string ordering, and it
376 /// may or may not be stable across ICU4X releases.
377 ///
378 /// [`BTreeSet`]: alloc::collections::BTreeSet
379 pub fn total_cmp(&self, other: &Self) -> Ordering {
380 self.langid
381 .total_cmp(&other.langid)
382 .then_with(|| self.keywords.cmp(&other.keywords))
383 .then_with(|| {
384 #[cfg(feature = "experimental")]
385 return self.aux.cmp(&other.aux);
386 #[cfg(not(feature = "experimental"))]
387 return Ordering::Equal;
388 })
389 }
390
391 /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion.
392 ///
393 /// This ignores auxiliary keys.
394 ///
395 /// See also:
396 ///
397 /// - [`DataLocale::is_empty()`]
398 /// - [`DataLocale::is_langid_und()`]
399 ///
400 /// # Examples
401 ///
402 /// ```
403 /// use icu_provider::DataLocale;
404 ///
405 /// assert!("und".parse::<DataLocale>().unwrap().is_und());
406 /// assert!(!"und-u-ca-buddhist".parse::<DataLocale>().unwrap().is_und());
407 /// assert!("und-x-aux".parse::<DataLocale>().unwrap().is_und());
408 /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_und());
409 /// ```
410 pub fn is_und(&self) -> bool {
411 self.langid == LanguageIdentifier::UND && self.keywords.is_empty()
412 }
413
414 /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`.
415 ///
416 /// This ignores extension keywords and auxiliary keys.
417 ///
418 /// See also:
419 ///
420 /// - [`DataLocale::is_empty()`]
421 /// - [`DataLocale::is_und()`]
422 ///
423 /// # Examples
424 ///
425 /// ```
426 /// use icu_provider::DataLocale;
427 ///
428 /// assert!("und".parse::<DataLocale>().unwrap().is_langid_und());
429 /// assert!("und-u-ca-buddhist"
430 /// .parse::<DataLocale>()
431 /// .unwrap()
432 /// .is_langid_und());
433 /// assert!("und-x-aux".parse::<DataLocale>().unwrap().is_langid_und());
434 /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_langid_und());
435 /// ```
436 pub fn is_langid_und(&self) -> bool {
437 self.langid == LanguageIdentifier::UND
438 }
439
440 /// Gets the [`LanguageIdentifier`] for this [`DataLocale`].
441 ///
442 /// This may allocate memory if there are variant subtags. If you need only the language,
443 /// script, and/or region subtag, use the specific getters for those subtags:
444 ///
445 /// - [`DataLocale::language()`]
446 /// - [`DataLocale::script()`]
447 /// - [`DataLocale::region()`]
448 ///
449 /// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`]
450 /// and then access the `id` field.
451 ///
452 /// # Examples
453 ///
454 /// ```
455 /// use icu_locid::langid;
456 /// use icu_provider::prelude::*;
457 ///
458 /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1");
459 ///
460 /// let req_no_langid = DataRequest {
461 /// locale: &Default::default(),
462 /// metadata: Default::default(),
463 /// };
464 ///
465 /// let req_with_langid = DataRequest {
466 /// locale: &langid!("ar-EG").into(),
467 /// metadata: Default::default(),
468 /// };
469 ///
470 /// assert_eq!(req_no_langid.locale.get_langid(), langid!("und"));
471 /// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG"));
472 /// ```
473 pub fn get_langid(&self) -> LanguageIdentifier {
474 self.langid.clone()
475 }
476
477 /// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`].
478 #[inline]
479 pub fn set_langid(&mut self, lid: LanguageIdentifier) {
480 self.langid = lid;
481 }
482
483 /// Converts this [`DataLocale`] into a [`Locale`].
484 ///
485 /// See also [`DataLocale::get_langid()`].
486 ///
487 /// # Examples
488 ///
489 /// ```
490 /// use icu_locid::{
491 /// langid, locale,
492 /// subtags::{language, region},
493 /// };
494 /// use icu_provider::prelude::*;
495 ///
496 /// let locale: DataLocale = locale!("it-IT-u-ca-coptic").into();
497 ///
498 /// assert_eq!(locale.get_langid(), langid!("it-IT"));
499 /// assert_eq!(locale.language(), language!("it"));
500 /// assert_eq!(locale.script(), None);
501 /// assert_eq!(locale.region(), Some(region!("IT")));
502 ///
503 /// let locale = locale.into_locale();
504 /// assert_eq!(locale, locale!("it-IT-u-ca-coptic"));
505 /// ```
506 ///
507 /// Auxiliary keys are retained:
508 ///
509 /// ```
510 /// use icu_provider::prelude::*;
511 /// use writeable::assert_writeable_eq;
512 ///
513 /// let data_locale: DataLocale = "und-u-nu-arab-x-gbp".parse().unwrap();
514 /// assert_writeable_eq!(data_locale, "und-u-nu-arab-x-gbp");
515 ///
516 /// let recovered_locale = data_locale.into_locale();
517 /// assert_writeable_eq!(recovered_locale, "und-u-nu-arab-x-gbp");
518 /// ```
519 pub fn into_locale(self) -> Locale {
520 let mut loc = Locale {
521 id: self.langid,
522 ..Default::default()
523 };
524 loc.extensions.unicode.keywords = self.keywords;
525 #[cfg(feature = "experimental")]
526 if let Some(aux) = self.aux {
527 loc.extensions.private =
528 icu_locid::extensions::private::Private::from_vec_unchecked(aux.iter().collect());
529 }
530 loc
531 }
532
533 /// Returns the [`Language`] for this [`DataLocale`].
534 #[inline]
535 pub fn language(&self) -> Language {
536 self.langid.language
537 }
538
539 /// Returns the [`Language`] for this [`DataLocale`].
540 #[inline]
541 pub fn set_language(&mut self, language: Language) {
542 self.langid.language = language;
543 }
544
545 /// Returns the [`Script`] for this [`DataLocale`].
546 #[inline]
547 pub fn script(&self) -> Option<Script> {
548 self.langid.script
549 }
550
551 /// Sets the [`Script`] for this [`DataLocale`].
552 #[inline]
553 pub fn set_script(&mut self, script: Option<Script>) {
554 self.langid.script = script;
555 }
556
557 /// Returns the [`Region`] for this [`DataLocale`].
558 #[inline]
559 pub fn region(&self) -> Option<Region> {
560 self.langid.region
561 }
562
563 /// Sets the [`Region`] for this [`DataLocale`].
564 #[inline]
565 pub fn set_region(&mut self, region: Option<Region>) {
566 self.langid.region = region;
567 }
568
569 /// Returns whether there are any [`Variant`] subtags in this [`DataLocale`].
570 #[inline]
571 pub fn has_variants(&self) -> bool {
572 !self.langid.variants.is_empty()
573 }
574
575 /// Sets all [`Variants`] on this [`DataLocale`], overwriting any that were there previously.
576 #[inline]
577 pub fn set_variants(&mut self, variants: Variants) {
578 self.langid.variants = variants;
579 }
580
581 /// Removes all [`Variant`] subtags in this [`DataLocale`].
582 #[inline]
583 pub fn clear_variants(&mut self) -> Variants {
584 self.langid.variants.clear()
585 }
586
587 /// Gets the value of the specified Unicode extension keyword for this [`DataLocale`].
588 #[inline]
589 pub fn get_unicode_ext(&self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> {
590 self.keywords.get(key).cloned()
591 }
592
593 /// Returns whether there are any Unicode extension keywords in this [`DataLocale`].
594 #[inline]
595 pub fn has_unicode_ext(&self) -> bool {
596 !self.keywords.is_empty()
597 }
598
599 /// Returns whether a specific Unicode extension keyword is present in this [`DataLocale`].
600 #[inline]
601 pub fn contains_unicode_ext(&self, key: &unicode_ext::Key) -> bool {
602 self.keywords.contains_key(key)
603 }
604
605 /// Returns whether this [`DataLocale`] contains a Unicode extension keyword
606 /// with the specified key and value.
607 ///
608 /// # Examples
609 ///
610 /// ```
611 /// use icu_locid::extensions::unicode::{key, value};
612 /// use icu_provider::prelude::*;
613 ///
614 /// let locale: DataLocale = "it-IT-u-ca-coptic".parse().expect("Valid BCP-47");
615 ///
616 /// assert_eq!(locale.get_unicode_ext(&key!("hc")), None);
617 /// assert_eq!(locale.get_unicode_ext(&key!("ca")), Some(value!("coptic")));
618 /// assert!(locale.matches_unicode_ext(&key!("ca"), &value!("coptic"),));
619 /// ```
620 #[inline]
621 pub fn matches_unicode_ext(&self, key: &unicode_ext::Key, value: &unicode_ext::Value) -> bool {
622 self.keywords.get(key) == Some(value)
623 }
624
625 /// Sets the value for a specific Unicode extension keyword on this [`DataLocale`].
626 #[inline]
627 pub fn set_unicode_ext(
628 &mut self,
629 key: unicode_ext::Key,
630 value: unicode_ext::Value,
631 ) -> Option<unicode_ext::Value> {
632 self.keywords.set(key, value)
633 }
634
635 /// Removes a specific Unicode extension keyword from this [`DataLocale`], returning
636 /// the value if it was present.
637 #[inline]
638 pub fn remove_unicode_ext(&mut self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> {
639 self.keywords.remove(key)
640 }
641
642 /// Retains a subset of keywords as specified by the predicate function.
643 #[inline]
644 pub fn retain_unicode_ext<F>(&mut self, predicate: F)
645 where
646 F: FnMut(&unicode_ext::Key) -> bool,
647 {
648 self.keywords.retain_by_key(predicate)
649 }
650
651 /// Gets the auxiliary key for this [`DataLocale`].
652 ///
653 /// For more information and examples, see [`AuxiliaryKeys`].
654 #[cfg(feature = "experimental")]
655 pub fn get_aux(&self) -> Option<&AuxiliaryKeys> {
656 self.aux.as_ref()
657 }
658
659 /// Returns whether this [`DataLocale`] has an auxiliary key.
660 ///
661 /// For more information and examples, see [`AuxiliaryKeys`].
662 #[cfg(feature = "experimental")]
663 pub fn has_aux(&self) -> bool {
664 self.aux.is_some()
665 }
666
667 /// Sets an auxiliary key on this [`DataLocale`].
668 ///
669 /// Returns the previous auxiliary key if present.
670 ///
671 /// For more information and examples, see [`AuxiliaryKeys`].
672 #[cfg(feature = "experimental")]
673 pub fn set_aux(&mut self, value: AuxiliaryKeys) -> Option<AuxiliaryKeys> {
674 self.aux.replace(value)
675 }
676
677 /// Remove an auxiliary key, if present. Returns the removed auxiliary key.
678 ///
679 /// # Examples
680 ///
681 /// ```
682 /// use icu_locid::langid;
683 /// use icu_provider::prelude::*;
684 /// use writeable::assert_writeable_eq;
685 ///
686 /// let mut data_locale: DataLocale = langid!("ar-EG").into();
687 /// let aux = "gbp"
688 /// .parse::<AuxiliaryKeys>()
689 /// .expect("contains valid characters");
690 /// data_locale.set_aux(aux);
691 /// assert_writeable_eq!(data_locale, "ar-EG-x-gbp");
692 ///
693 /// let maybe_aux = data_locale.remove_aux();
694 /// assert_writeable_eq!(data_locale, "ar-EG");
695 /// assert_writeable_eq!(maybe_aux.unwrap(), "gbp");
696 /// ```
697 #[cfg(feature = "experimental")]
698 pub fn remove_aux(&mut self) -> Option<AuxiliaryKeys> {
699 self.aux.take()
700 }
701}
702
703/// The "auxiliary key" is an annotation on [`DataLocale`] that can contain an arbitrary
704/// information that does not fit into the [`LanguageIdentifier`] or [`Keywords`].
705///
706/// A [`DataLocale`] can have multiple auxiliary keys, represented by this struct. The auxiliary
707/// keys are stored as private use subtags following `-x-`.
708///
709/// An auxiliary key currently allows 1-8 lowercase alphanumerics.
710///
711/// <div class="stab unstable">
712/// 🚧 This code is experimental; it may change at any time, in breaking or non-breaking ways,
713/// including in SemVer minor releases. It can be enabled with the "experimental" Cargo feature
714/// of the `icu_provider` crate. Use with caution.
715/// <a href="https://github.com/unicode-org/icu4x/issues/3632">#3632</a>
716/// </div>
717///
718/// # Examples
719///
720/// ```
721/// use icu_locid::langid;
722/// use icu_provider::prelude::*;
723/// use writeable::assert_writeable_eq;
724///
725/// let mut data_locale: DataLocale = langid!("ar-EG").into();
726/// assert_writeable_eq!(data_locale, "ar-EG");
727/// assert!(!data_locale.has_aux());
728/// assert_eq!(data_locale.get_aux(), None);
729///
730/// let aux = "gbp"
731/// .parse::<AuxiliaryKeys>()
732/// .expect("contains valid characters");
733///
734/// data_locale.set_aux(aux);
735/// assert_writeable_eq!(data_locale, "ar-EG-x-gbp");
736/// assert!(data_locale.has_aux());
737/// assert_eq!(data_locale.get_aux(), Some(&"gbp".parse().unwrap()));
738/// ```
739///
740/// Multiple auxiliary keys are allowed:
741///
742/// ```
743/// use icu_provider::prelude::*;
744/// use writeable::assert_writeable_eq;
745///
746/// let data_locale = "ar-EG-x-gbp-long".parse::<DataLocale>().unwrap();
747/// assert_writeable_eq!(data_locale, "ar-EG-x-gbp-long");
748/// assert_eq!(data_locale.get_aux().unwrap().iter().count(), 2);
749/// ```
750///
751/// Not all strings are valid auxiliary keys.
752/// The string must be well-formed and case-normalized:
753///
754/// ```
755/// use icu_provider::prelude::*;
756///
757/// assert!("abcdefg".parse::<AuxiliaryKeys>().is_ok());
758/// assert!("abc-xyz".parse::<AuxiliaryKeys>().is_ok());
759///
760/// assert!("".parse::<AuxiliaryKeys>().is_err());
761/// assert!("!@#$%".parse::<AuxiliaryKeys>().is_err());
762/// assert!("abc_xyz".parse::<AuxiliaryKeys>().is_err());
763/// assert!("ABC123".parse::<AuxiliaryKeys>().is_err());
764/// ```
765///
766/// [`Keywords`]: unicode_ext::Keywords
767#[derive(Debug, PartialEq, Clone, Eq, Hash, PartialOrd, Ord)]
768#[cfg(feature = "experimental")]
769pub struct AuxiliaryKeys {
770 value: AuxiliaryKeysInner,
771}
772
773#[cfg(feature = "experimental")]
774#[derive(Clone)]
775enum AuxiliaryKeysInner {
776 Boxed(alloc::boxed::Box<str>),
777 Stack(TinyAsciiStr<23>),
778 // NOTE: In the future, a `Static` variant could be added to allow `data_locale!("...")`
779 // Static(&'static str),
780}
781
782#[cfg(feature = "experimental")]
783impl Deref for AuxiliaryKeysInner {
784 type Target = str;
785 #[inline]
786 fn deref(&self) -> &Self::Target {
787 match self {
788 Self::Boxed(s) => s.deref(),
789 Self::Stack(s) => s.as_str(),
790 }
791 }
792}
793
794#[cfg(feature = "experimental")]
795impl PartialEq for AuxiliaryKeysInner {
796 #[inline]
797 fn eq(&self, other: &Self) -> bool {
798 self.deref() == other.deref()
799 }
800}
801
802#[cfg(feature = "experimental")]
803impl Eq for AuxiliaryKeysInner {}
804
805#[cfg(feature = "experimental")]
806impl PartialOrd for AuxiliaryKeysInner {
807 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
808 Some(self.cmp(other))
809 }
810}
811
812#[cfg(feature = "experimental")]
813impl Ord for AuxiliaryKeysInner {
814 fn cmp(&self, other: &Self) -> Ordering {
815 self.deref().cmp(other.deref())
816 }
817}
818
819#[cfg(feature = "experimental")]
820impl Debug for AuxiliaryKeysInner {
821 #[inline]
822 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
823 self.deref().fmt(f)
824 }
825}
826
827#[cfg(feature = "experimental")]
828impl Hash for AuxiliaryKeysInner {
829 #[inline]
830 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
831 self.deref().hash(state)
832 }
833}
834
835#[cfg(feature = "experimental")]
836writeable::impl_display_with_writeable!(AuxiliaryKeys);
837
838#[cfg(feature = "experimental")]
839impl Writeable for AuxiliaryKeys {
840 fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
841 self.value.write_to(sink)
842 }
843 fn writeable_length_hint(&self) -> LengthHint {
844 self.value.writeable_length_hint()
845 }
846 fn write_to_string(&self) -> alloc::borrow::Cow<str> {
847 self.value.write_to_string()
848 }
849}
850
851#[cfg(feature = "experimental")]
852impl FromStr for AuxiliaryKeys {
853 type Err = DataError;
854
855 fn from_str(s: &str) -> Result<Self, Self::Err> {
856 if !s.is_empty()
857 && s.split(Self::separator()).all(|b| {
858 if let Ok(subtag) = Subtag::from_str(b) {
859 // Enforces normalization:
860 b == subtag.as_str()
861 } else {
862 false
863 }
864 })
865 {
866 if s.len() <= 23 {
867 #[allow(clippy::unwrap_used)] // we just checked that the string is ascii
868 Ok(Self {
869 value: AuxiliaryKeysInner::Stack(s.parse().unwrap()),
870 })
871 } else {
872 Ok(Self {
873 value: AuxiliaryKeysInner::Boxed(s.into()),
874 })
875 }
876 } else {
877 Err(DataErrorKind::KeyLocaleSyntax
878 .into_error()
879 .with_display_context(s))
880 }
881 }
882}
883
884#[cfg(feature = "experimental")]
885impl AuxiliaryKeys {
886 /// Creates an [`AuxiliaryKeys`] from an iterator of individual keys.
887 ///
888 /// # Examples
889 ///
890 /// ```
891 /// use icu_locid::extensions::private::subtag;
892 /// use icu_provider::prelude::*;
893 ///
894 /// // Single auxiliary key:
895 /// let a = AuxiliaryKeys::try_from_iter([subtag!("abc")]).unwrap();
896 /// let b = "abc".parse::<AuxiliaryKeys>().unwrap();
897 /// assert_eq!(a, b);
898 ///
899 /// // Multiple auxiliary keys:
900 /// let a = AuxiliaryKeys::try_from_iter([subtag!("abc"), subtag!("defg")])
901 /// .unwrap();
902 /// let b = "abc-defg".parse::<AuxiliaryKeys>().unwrap();
903 /// assert_eq!(a, b);
904 /// ```
905 ///
906 /// The iterator can't be empty:
907 ///
908 /// ```
909 /// use icu_provider::prelude::*;
910 ///
911 /// assert!(AuxiliaryKeys::try_from_iter([]).is_err());
912 /// ```
913 pub fn try_from_iter(iter: impl IntoIterator<Item = Subtag>) -> Result<Self, DataError> {
914 // TODO: Avoid the allocation when possible
915 let mut builder = String::new();
916 for item in iter {
917 if !builder.is_empty() {
918 builder.push(AuxiliaryKeys::separator());
919 }
920 builder.push_str(item.as_str())
921 }
922 if builder.is_empty() {
923 return Err(DataErrorKind::KeyLocaleSyntax.with_str_context("empty aux iterator"));
924 }
925 if builder.len() <= 23 {
926 #[allow(clippy::unwrap_used)] // we just checked that the string is ascii
927 Ok(Self {
928 value: AuxiliaryKeysInner::Stack(builder.parse().unwrap()),
929 })
930 } else {
931 Ok(Self {
932 value: AuxiliaryKeysInner::Boxed(builder.into()),
933 })
934 }
935 }
936
937 /// Creates an [`AuxiliaryKeys`] from a single subtag.
938 ///
939 /// # Examples
940 ///
941 /// ```
942 /// use icu_locid::extensions::private::subtag;
943 /// use icu_provider::prelude::*;
944 ///
945 /// // Single auxiliary key:
946 /// let a = AuxiliaryKeys::from_subtag(subtag!("abc"));
947 /// let b = "abc".parse::<AuxiliaryKeys>().unwrap();
948 /// assert_eq!(a, b);
949 /// ```
950 pub const fn from_subtag(input: Subtag) -> Self {
951 Self {
952 value: AuxiliaryKeysInner::Stack(input.into_tinystr().resize()),
953 }
954 }
955
956 /// Iterates over the components of the auxiliary key.
957 ///
958 /// # Example
959 ///
960 /// ```
961 /// use icu_locid::extensions::private::subtag;
962 /// use icu_provider::AuxiliaryKeys;
963 ///
964 /// let aux: AuxiliaryKeys = "abc-defg".parse().unwrap();
965 /// assert_eq!(
966 /// aux.iter().collect::<Vec<_>>(),
967 /// vec![subtag!("abc"), subtag!("defg")]
968 /// );
969 /// ```
970 pub fn iter(&self) -> impl Iterator<Item = Subtag> + '_ {
971 self.value
972 .split(Self::separator())
973 .filter_map(|x| match x.parse() {
974 Ok(x) => Some(x),
975 Err(_) => {
976 debug_assert!(false, "failed to convert to subtag: {x}");
977 None
978 }
979 })
980 }
981
982 /// Returns the internal separator byte used for auxiliary keys in data locales.
983 ///
984 /// This is, according to BCP-47, an ASCII hyphen.
985 #[inline]
986 pub(crate) const fn separator() -> char {
987 '-'
988 }
989}
990
991#[cfg(feature = "experimental")]
992impl From<Subtag> for AuxiliaryKeys {
993 fn from(subtag: Subtag) -> Self {
994 #[allow(clippy::expect_used)] // subtags definitely fit within auxiliary keys
995 Self {
996 value: AuxiliaryKeysInner::Stack(
997 TinyAsciiStr::from_bytes(subtag.as_str().as_bytes())
998 .expect("Subtags are capped to 8 elements, AuxiliaryKeys supports up to 23"),
999 ),
1000 }
1001 }
1002}
1003
1004#[test]
1005fn test_data_locale_to_string() {
1006 struct TestCase {
1007 pub locale: &'static str,
1008 pub aux: Option<&'static str>,
1009 pub expected: &'static str,
1010 }
1011
1012 for cas in [
1013 TestCase {
1014 locale: "und",
1015 aux: None,
1016 expected: "und",
1017 },
1018 TestCase {
1019 locale: "und-u-cu-gbp",
1020 aux: None,
1021 expected: "und-u-cu-gbp",
1022 },
1023 TestCase {
1024 locale: "en-ZA-u-cu-gbp",
1025 aux: None,
1026 expected: "en-ZA-u-cu-gbp",
1027 },
1028 #[cfg(feature = "experimental")]
1029 TestCase {
1030 locale: "en-ZA-u-nu-arab",
1031 aux: Some("gbp"),
1032 expected: "en-ZA-u-nu-arab-x-gbp",
1033 },
1034 ] {
1035 let mut locale = cas.locale.parse::<DataLocale>().unwrap();
1036 #[cfg(feature = "experimental")]
1037 if let Some(aux) = cas.aux {
1038 locale.set_aux(aux.parse().unwrap());
1039 }
1040 writeable::assert_writeable_eq!(locale, cas.expected);
1041 }
1042}
1043
1044#[test]
1045fn test_data_locale_from_string() {
1046 #[derive(Debug)]
1047 struct TestCase {
1048 pub input: &'static str,
1049 pub success: bool,
1050 }
1051
1052 for cas in [
1053 TestCase {
1054 input: "und",
1055 success: true,
1056 },
1057 TestCase {
1058 input: "und-u-cu-gbp",
1059 success: true,
1060 },
1061 TestCase {
1062 input: "en-ZA-u-cu-gbp",
1063 success: true,
1064 },
1065 TestCase {
1066 input: "en...",
1067 success: false,
1068 },
1069 #[cfg(feature = "experimental")]
1070 TestCase {
1071 input: "en-ZA-u-nu-arab-x-gbp",
1072 success: true,
1073 },
1074 #[cfg(not(feature = "experimental"))]
1075 TestCase {
1076 input: "en-ZA-u-nu-arab-x-gbp",
1077 success: false,
1078 },
1079 ] {
1080 let data_locale = match (DataLocale::from_str(cas.input), cas.success) {
1081 (Ok(l), true) => l,
1082 (Err(_), false) => {
1083 continue;
1084 }
1085 (Ok(_), false) => {
1086 panic!("DataLocale parsed but it was supposed to fail: {cas:?}");
1087 }
1088 (Err(_), true) => {
1089 panic!("DataLocale was supposed to parse but it failed: {cas:?}");
1090 }
1091 };
1092 writeable::assert_writeable_eq!(data_locale, cas.input);
1093 }
1094}