icu_provider/
request.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::{DataError, DataErrorKind};
6use core::cmp::Ordering;
7use core::default::Default;
8use core::fmt;
9use core::fmt::Debug;
10use core::hash::Hash;
11use core::str::FromStr;
12use icu_locid::extensions::unicode as unicode_ext;
13use icu_locid::subtags::{Language, Region, Script, Variants};
14use icu_locid::{LanguageIdentifier, Locale};
15use writeable::{LengthHint, Writeable};
16
17#[cfg(feature = "experimental")]
18use alloc::string::String;
19#[cfg(feature = "experimental")]
20use core::ops::Deref;
21#[cfg(feature = "experimental")]
22use icu_locid::extensions::private::Subtag;
23#[cfg(feature = "experimental")]
24use tinystr::TinyAsciiStr;
25
26#[cfg(doc)]
27use icu_locid::subtags::Variant;
28
29/// The request type passed into all data provider implementations.
30#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
31#[allow(clippy::exhaustive_structs)] // this type is stable
32pub struct DataRequest<'a> {
33    /// The locale for which to load data.
34    ///
35    /// If locale fallback is enabled, the resulting data may be from a different locale
36    /// than the one requested here.
37    pub locale: &'a DataLocale,
38    /// Metadata that may affect the behavior of the data provider.
39    pub metadata: DataRequestMetadata,
40}
41
42impl fmt::Display for DataRequest<'_> {
43    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
44        fmt::Display::fmt(&self.locale, f)
45    }
46}
47
48/// Metadata for data requests. This is currently empty, but it may be extended with options
49/// for tuning locale fallback, buffer layout, and so forth.
50#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
51#[non_exhaustive]
52pub struct DataRequestMetadata {
53    /// Silent requests do not log errors. This can be used for exploratory querying, such as fallbacks.
54    pub silent: bool,
55}
56
57/// A locale type optimized for use in fallbacking and the ICU4X data pipeline.
58///
59/// [`DataLocale`] contains less functionality than [`Locale`] but more than
60/// [`LanguageIdentifier`] for better size and performance while still meeting
61/// the needs of the ICU4X data pipeline.
62///
63/// # Examples
64///
65/// Convert a [`Locale`] to a [`DataLocale`] and back:
66///
67/// ```
68/// use icu_locid::locale;
69/// use icu_provider::DataLocale;
70///
71/// let locale = locale!("en-u-ca-buddhist");
72/// let data_locale = DataLocale::from(locale);
73/// let locale = data_locale.into_locale();
74///
75/// assert_eq!(locale, locale!("en-u-ca-buddhist"));
76/// ```
77///
78/// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more
79/// efficient than cloning the [`Locale`], but less efficient than converting an owned
80/// [`Locale`]:
81///
82/// ```
83/// use icu_locid::locale;
84/// use icu_provider::DataLocale;
85///
86/// let locale1 = locale!("en-u-ca-buddhist");
87/// let data_locale = DataLocale::from(&locale1);
88/// let locale2 = data_locale.into_locale();
89///
90/// assert_eq!(locale1, locale2);
91/// ```
92///
93/// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]:
94///
95/// ```
96/// use icu_locid::langid;
97/// use icu_provider::DataLocale;
98///
99/// let langid = langid!("es-CA-valencia");
100/// let data_locale = DataLocale::from(langid);
101/// let langid = data_locale.get_langid();
102///
103/// assert_eq!(langid, langid!("es-CA-valencia"));
104/// ```
105///
106/// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data
107/// lookup and fallback. This may change in the future.
108///
109/// ```
110/// use icu_locid::{locale, Locale};
111/// use icu_provider::DataLocale;
112///
113/// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist"
114///     .parse::<Locale>()
115///     .unwrap();
116/// let data_locale = DataLocale::from(locale);
117///
118/// assert_eq!(data_locale.into_locale(), locale!("hi-u-ca-buddhist"));
119/// ```
120#[derive(PartialEq, Clone, Default, Eq, Hash)]
121pub struct DataLocale {
122    langid: LanguageIdentifier,
123    keywords: unicode_ext::Keywords,
124    #[cfg(feature = "experimental")]
125    aux: Option<AuxiliaryKeys>,
126}
127
128impl<'a> Default for &'a DataLocale {
129    fn default() -> Self {
130        static DEFAULT: DataLocale = DataLocale {
131            langid: LanguageIdentifier::UND,
132            keywords: unicode_ext::Keywords::new(),
133            #[cfg(feature = "experimental")]
134            aux: None,
135        };
136        &DEFAULT
137    }
138}
139
140impl fmt::Debug for DataLocale {
141    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
142        write!(f, "DataLocale{{{self}}}")
143    }
144}
145
146impl Writeable for DataLocale {
147    fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
148        self.langid.write_to(sink)?;
149        if !self.keywords.is_empty() {
150            sink.write_str("-u-")?;
151            self.keywords.write_to(sink)?;
152        }
153        #[cfg(feature = "experimental")]
154        if let Some(aux) = self.aux.as_ref() {
155            sink.write_str("-x-")?;
156            aux.write_to(sink)?;
157        }
158        Ok(())
159    }
160
161    fn writeable_length_hint(&self) -> LengthHint {
162        let mut length_hint = self.langid.writeable_length_hint();
163        if !self.keywords.is_empty() {
164            length_hint += self.keywords.writeable_length_hint() + 3;
165        }
166        #[cfg(feature = "experimental")]
167        if let Some(aux) = self.aux.as_ref() {
168            length_hint += aux.writeable_length_hint() + 3;
169        }
170        length_hint
171    }
172
173    fn write_to_string(&self) -> alloc::borrow::Cow<str> {
174        #[cfg_attr(not(feature = "experimental"), allow(unused_mut))]
175        let mut is_only_langid = self.keywords.is_empty();
176        #[cfg(feature = "experimental")]
177        {
178            is_only_langid = is_only_langid && self.aux.is_none();
179        }
180        if is_only_langid {
181            return self.langid.write_to_string();
182        }
183        let mut string =
184            alloc::string::String::with_capacity(self.writeable_length_hint().capacity());
185        let _ = self.write_to(&mut string);
186        alloc::borrow::Cow::Owned(string)
187    }
188}
189
190writeable::impl_display_with_writeable!(DataLocale);
191
192impl From<LanguageIdentifier> for DataLocale {
193    fn from(langid: LanguageIdentifier) -> Self {
194        Self {
195            langid,
196            keywords: unicode_ext::Keywords::new(),
197            #[cfg(feature = "experimental")]
198            aux: None,
199        }
200    }
201}
202
203impl From<Locale> for DataLocale {
204    fn from(locale: Locale) -> Self {
205        Self {
206            langid: locale.id,
207            keywords: locale.extensions.unicode.keywords,
208            #[cfg(feature = "experimental")]
209            aux: AuxiliaryKeys::try_from_iter(locale.extensions.private.iter().copied()).ok(),
210        }
211    }
212}
213
214impl From<&LanguageIdentifier> for DataLocale {
215    fn from(langid: &LanguageIdentifier) -> Self {
216        Self {
217            langid: langid.clone(),
218            keywords: unicode_ext::Keywords::new(),
219            #[cfg(feature = "experimental")]
220            aux: None,
221        }
222    }
223}
224
225impl From<&Locale> for DataLocale {
226    fn from(locale: &Locale) -> Self {
227        Self {
228            langid: locale.id.clone(),
229            keywords: locale.extensions.unicode.keywords.clone(),
230            #[cfg(feature = "experimental")]
231            aux: AuxiliaryKeys::try_from_iter(locale.extensions.private.iter().copied()).ok(),
232        }
233    }
234}
235
236impl FromStr for DataLocale {
237    type Err = DataError;
238    fn from_str(s: &str) -> Result<Self, Self::Err> {
239        let locale = Locale::from_str(s).map_err(|e| {
240            DataErrorKind::KeyLocaleSyntax
241                .into_error()
242                .with_display_context(s)
243                .with_display_context(&e)
244        })?;
245        Ok(DataLocale::from(locale))
246    }
247}
248
249impl DataLocale {
250    /// Compare this [`DataLocale`] with BCP-47 bytes.
251    ///
252    /// The return value is equivalent to what would happen if you first converted this
253    /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison.
254    ///
255    /// This function is case-sensitive and results in a *total order*, so it is appropriate for
256    /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
257    ///
258    /// # Examples
259    ///
260    /// ```
261    /// use icu_provider::DataLocale;
262    /// use std::cmp::Ordering;
263    ///
264    /// let bcp47_strings: &[&str] = &[
265    ///     "ca",
266    ///     "ca-ES",
267    ///     "ca-ES-u-ca-buddhist",
268    ///     "ca-ES-valencia",
269    ///     "ca-ES-x-gbp",
270    ///     "ca-ES-x-gbp-short",
271    ///     "ca-ES-x-usd",
272    ///     "ca-ES-xyzabc",
273    ///     "ca-x-eur",
274    ///     "cat",
275    ///     "pl-Latn-PL",
276    ///     "und",
277    ///     "und-fonipa",
278    ///     "und-u-ca-hebrew",
279    ///     "und-u-ca-japanese",
280    ///     "und-x-mxn",
281    ///     "zh",
282    /// ];
283    ///
284    /// for ab in bcp47_strings.windows(2) {
285    ///     let a = ab[0];
286    ///     let b = ab[1];
287    ///     assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}", a, b);
288    ///     let a_loc: DataLocale = a.parse().unwrap();
289    ///     assert_eq!(
290    ///         a_loc.strict_cmp(a.as_bytes()),
291    ///         Ordering::Equal,
292    ///         "strict_cmp: {} == {}",
293    ///         a_loc,
294    ///         a
295    ///     );
296    ///     assert_eq!(
297    ///         a_loc.strict_cmp(b.as_bytes()),
298    ///         Ordering::Less,
299    ///         "strict_cmp: {} < {}",
300    ///         a_loc,
301    ///         b
302    ///     );
303    ///     let b_loc: DataLocale = b.parse().unwrap();
304    ///     assert_eq!(
305    ///         b_loc.strict_cmp(b.as_bytes()),
306    ///         Ordering::Equal,
307    ///         "strict_cmp: {} == {}",
308    ///         b_loc,
309    ///         b
310    ///     );
311    ///     assert_eq!(
312    ///         b_loc.strict_cmp(a.as_bytes()),
313    ///         Ordering::Greater,
314    ///         "strict_cmp: {} > {}",
315    ///         b_loc,
316    ///         a
317    ///     );
318    /// }
319    /// ```
320    ///
321    /// Comparison against invalid strings:
322    ///
323    /// ```
324    /// use icu_provider::DataLocale;
325    ///
326    /// let invalid_strings: &[&str] = &[
327    ///     // Less than "ca-ES"
328    ///     "CA",
329    ///     "ar-x-gbp-FOO",
330    ///     // Greater than "ca-ES-x-gbp"
331    ///     "ca_ES",
332    ///     "ca-ES-x-gbp-FOO",
333    /// ];
334    ///
335    /// let data_locale = "ca-ES-x-gbp".parse::<DataLocale>().unwrap();
336    ///
337    /// for s in invalid_strings.iter() {
338    ///     let expected_ordering = "ca-ES-x-gbp".cmp(s);
339    ///     let actual_ordering = data_locale.strict_cmp(s.as_bytes());
340    ///     assert_eq!(expected_ordering, actual_ordering, "{}", s);
341    /// }
342    /// ```
343    pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
344        self.writeable_cmp_bytes(other)
345    }
346}
347
348impl DataLocale {
349    /// Returns whether this [`DataLocale`] has all empty fields (no components).
350    ///
351    /// See also:
352    ///
353    /// - [`DataLocale::is_und()`]
354    /// - [`DataLocale::is_langid_und()`]
355    ///
356    /// # Examples
357    ///
358    /// ```
359    /// use icu_provider::DataLocale;
360    ///
361    /// assert!("und".parse::<DataLocale>().unwrap().is_empty());
362    /// assert!(!"und-u-ca-buddhist"
363    ///     .parse::<DataLocale>()
364    ///     .unwrap()
365    ///     .is_empty());
366    /// assert!(!"und-x-aux".parse::<DataLocale>().unwrap().is_empty());
367    /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_empty());
368    /// ```
369    pub fn is_empty(&self) -> bool {
370        self == <&DataLocale>::default()
371    }
372
373    /// Returns an ordering suitable for use in [`BTreeSet`].
374    ///
375    /// The ordering may or may not be equivalent to string ordering, and it
376    /// may or may not be stable across ICU4X releases.
377    ///
378    /// [`BTreeSet`]: alloc::collections::BTreeSet
379    pub fn total_cmp(&self, other: &Self) -> Ordering {
380        self.langid
381            .total_cmp(&other.langid)
382            .then_with(|| self.keywords.cmp(&other.keywords))
383            .then_with(|| {
384                #[cfg(feature = "experimental")]
385                return self.aux.cmp(&other.aux);
386                #[cfg(not(feature = "experimental"))]
387                return Ordering::Equal;
388            })
389    }
390
391    /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion.
392    ///
393    /// This ignores auxiliary keys.
394    ///
395    /// See also:
396    ///
397    /// - [`DataLocale::is_empty()`]
398    /// - [`DataLocale::is_langid_und()`]
399    ///
400    /// # Examples
401    ///
402    /// ```
403    /// use icu_provider::DataLocale;
404    ///
405    /// assert!("und".parse::<DataLocale>().unwrap().is_und());
406    /// assert!(!"und-u-ca-buddhist".parse::<DataLocale>().unwrap().is_und());
407    /// assert!("und-x-aux".parse::<DataLocale>().unwrap().is_und());
408    /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_und());
409    /// ```
410    pub fn is_und(&self) -> bool {
411        self.langid == LanguageIdentifier::UND && self.keywords.is_empty()
412    }
413
414    /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`.
415    ///
416    /// This ignores extension keywords and auxiliary keys.
417    ///
418    /// See also:
419    ///
420    /// - [`DataLocale::is_empty()`]
421    /// - [`DataLocale::is_und()`]
422    ///
423    /// # Examples
424    ///
425    /// ```
426    /// use icu_provider::DataLocale;
427    ///
428    /// assert!("und".parse::<DataLocale>().unwrap().is_langid_und());
429    /// assert!("und-u-ca-buddhist"
430    ///     .parse::<DataLocale>()
431    ///     .unwrap()
432    ///     .is_langid_und());
433    /// assert!("und-x-aux".parse::<DataLocale>().unwrap().is_langid_und());
434    /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_langid_und());
435    /// ```
436    pub fn is_langid_und(&self) -> bool {
437        self.langid == LanguageIdentifier::UND
438    }
439
440    /// Gets the [`LanguageIdentifier`] for this [`DataLocale`].
441    ///
442    /// This may allocate memory if there are variant subtags. If you need only the language,
443    /// script, and/or region subtag, use the specific getters for those subtags:
444    ///
445    /// - [`DataLocale::language()`]
446    /// - [`DataLocale::script()`]
447    /// - [`DataLocale::region()`]
448    ///
449    /// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`]
450    /// and then access the `id` field.
451    ///
452    /// # Examples
453    ///
454    /// ```
455    /// use icu_locid::langid;
456    /// use icu_provider::prelude::*;
457    ///
458    /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1");
459    ///
460    /// let req_no_langid = DataRequest {
461    ///     locale: &Default::default(),
462    ///     metadata: Default::default(),
463    /// };
464    ///
465    /// let req_with_langid = DataRequest {
466    ///     locale: &langid!("ar-EG").into(),
467    ///     metadata: Default::default(),
468    /// };
469    ///
470    /// assert_eq!(req_no_langid.locale.get_langid(), langid!("und"));
471    /// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG"));
472    /// ```
473    pub fn get_langid(&self) -> LanguageIdentifier {
474        self.langid.clone()
475    }
476
477    /// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`].
478    #[inline]
479    pub fn set_langid(&mut self, lid: LanguageIdentifier) {
480        self.langid = lid;
481    }
482
483    /// Converts this [`DataLocale`] into a [`Locale`].
484    ///
485    /// See also [`DataLocale::get_langid()`].
486    ///
487    /// # Examples
488    ///
489    /// ```
490    /// use icu_locid::{
491    ///     langid, locale,
492    ///     subtags::{language, region},
493    /// };
494    /// use icu_provider::prelude::*;
495    ///
496    /// let locale: DataLocale = locale!("it-IT-u-ca-coptic").into();
497    ///
498    /// assert_eq!(locale.get_langid(), langid!("it-IT"));
499    /// assert_eq!(locale.language(), language!("it"));
500    /// assert_eq!(locale.script(), None);
501    /// assert_eq!(locale.region(), Some(region!("IT")));
502    ///
503    /// let locale = locale.into_locale();
504    /// assert_eq!(locale, locale!("it-IT-u-ca-coptic"));
505    /// ```
506    ///
507    /// Auxiliary keys are retained:
508    ///
509    /// ```
510    /// use icu_provider::prelude::*;
511    /// use writeable::assert_writeable_eq;
512    ///
513    /// let data_locale: DataLocale = "und-u-nu-arab-x-gbp".parse().unwrap();
514    /// assert_writeable_eq!(data_locale, "und-u-nu-arab-x-gbp");
515    ///
516    /// let recovered_locale = data_locale.into_locale();
517    /// assert_writeable_eq!(recovered_locale, "und-u-nu-arab-x-gbp");
518    /// ```
519    pub fn into_locale(self) -> Locale {
520        let mut loc = Locale {
521            id: self.langid,
522            ..Default::default()
523        };
524        loc.extensions.unicode.keywords = self.keywords;
525        #[cfg(feature = "experimental")]
526        if let Some(aux) = self.aux {
527            loc.extensions.private =
528                icu_locid::extensions::private::Private::from_vec_unchecked(aux.iter().collect());
529        }
530        loc
531    }
532
533    /// Returns the [`Language`] for this [`DataLocale`].
534    #[inline]
535    pub fn language(&self) -> Language {
536        self.langid.language
537    }
538
539    /// Returns the [`Language`] for this [`DataLocale`].
540    #[inline]
541    pub fn set_language(&mut self, language: Language) {
542        self.langid.language = language;
543    }
544
545    /// Returns the [`Script`] for this [`DataLocale`].
546    #[inline]
547    pub fn script(&self) -> Option<Script> {
548        self.langid.script
549    }
550
551    /// Sets the [`Script`] for this [`DataLocale`].
552    #[inline]
553    pub fn set_script(&mut self, script: Option<Script>) {
554        self.langid.script = script;
555    }
556
557    /// Returns the [`Region`] for this [`DataLocale`].
558    #[inline]
559    pub fn region(&self) -> Option<Region> {
560        self.langid.region
561    }
562
563    /// Sets the [`Region`] for this [`DataLocale`].
564    #[inline]
565    pub fn set_region(&mut self, region: Option<Region>) {
566        self.langid.region = region;
567    }
568
569    /// Returns whether there are any [`Variant`] subtags in this [`DataLocale`].
570    #[inline]
571    pub fn has_variants(&self) -> bool {
572        !self.langid.variants.is_empty()
573    }
574
575    /// Sets all [`Variants`] on this [`DataLocale`], overwriting any that were there previously.
576    #[inline]
577    pub fn set_variants(&mut self, variants: Variants) {
578        self.langid.variants = variants;
579    }
580
581    /// Removes all [`Variant`] subtags in this [`DataLocale`].
582    #[inline]
583    pub fn clear_variants(&mut self) -> Variants {
584        self.langid.variants.clear()
585    }
586
587    /// Gets the value of the specified Unicode extension keyword for this [`DataLocale`].
588    #[inline]
589    pub fn get_unicode_ext(&self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> {
590        self.keywords.get(key).cloned()
591    }
592
593    /// Returns whether there are any Unicode extension keywords in this [`DataLocale`].
594    #[inline]
595    pub fn has_unicode_ext(&self) -> bool {
596        !self.keywords.is_empty()
597    }
598
599    /// Returns whether a specific Unicode extension keyword is present in this [`DataLocale`].
600    #[inline]
601    pub fn contains_unicode_ext(&self, key: &unicode_ext::Key) -> bool {
602        self.keywords.contains_key(key)
603    }
604
605    /// Returns whether this [`DataLocale`] contains a Unicode extension keyword
606    /// with the specified key and value.
607    ///
608    /// # Examples
609    ///
610    /// ```
611    /// use icu_locid::extensions::unicode::{key, value};
612    /// use icu_provider::prelude::*;
613    ///
614    /// let locale: DataLocale = "it-IT-u-ca-coptic".parse().expect("Valid BCP-47");
615    ///
616    /// assert_eq!(locale.get_unicode_ext(&key!("hc")), None);
617    /// assert_eq!(locale.get_unicode_ext(&key!("ca")), Some(value!("coptic")));
618    /// assert!(locale.matches_unicode_ext(&key!("ca"), &value!("coptic"),));
619    /// ```
620    #[inline]
621    pub fn matches_unicode_ext(&self, key: &unicode_ext::Key, value: &unicode_ext::Value) -> bool {
622        self.keywords.get(key) == Some(value)
623    }
624
625    /// Sets the value for a specific Unicode extension keyword on this [`DataLocale`].
626    #[inline]
627    pub fn set_unicode_ext(
628        &mut self,
629        key: unicode_ext::Key,
630        value: unicode_ext::Value,
631    ) -> Option<unicode_ext::Value> {
632        self.keywords.set(key, value)
633    }
634
635    /// Removes a specific Unicode extension keyword from this [`DataLocale`], returning
636    /// the value if it was present.
637    #[inline]
638    pub fn remove_unicode_ext(&mut self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> {
639        self.keywords.remove(key)
640    }
641
642    /// Retains a subset of keywords as specified by the predicate function.
643    #[inline]
644    pub fn retain_unicode_ext<F>(&mut self, predicate: F)
645    where
646        F: FnMut(&unicode_ext::Key) -> bool,
647    {
648        self.keywords.retain_by_key(predicate)
649    }
650
651    /// Gets the auxiliary key for this [`DataLocale`].
652    ///
653    /// For more information and examples, see [`AuxiliaryKeys`].
654    #[cfg(feature = "experimental")]
655    pub fn get_aux(&self) -> Option<&AuxiliaryKeys> {
656        self.aux.as_ref()
657    }
658
659    /// Returns whether this [`DataLocale`] has an auxiliary key.
660    ///
661    /// For more information and examples, see [`AuxiliaryKeys`].
662    #[cfg(feature = "experimental")]
663    pub fn has_aux(&self) -> bool {
664        self.aux.is_some()
665    }
666
667    /// Sets an auxiliary key on this [`DataLocale`].
668    ///
669    /// Returns the previous auxiliary key if present.
670    ///
671    /// For more information and examples, see [`AuxiliaryKeys`].
672    #[cfg(feature = "experimental")]
673    pub fn set_aux(&mut self, value: AuxiliaryKeys) -> Option<AuxiliaryKeys> {
674        self.aux.replace(value)
675    }
676
677    /// Remove an auxiliary key, if present. Returns the removed auxiliary key.
678    ///
679    /// # Examples
680    ///
681    /// ```
682    /// use icu_locid::langid;
683    /// use icu_provider::prelude::*;
684    /// use writeable::assert_writeable_eq;
685    ///
686    /// let mut data_locale: DataLocale = langid!("ar-EG").into();
687    /// let aux = "gbp"
688    ///     .parse::<AuxiliaryKeys>()
689    ///     .expect("contains valid characters");
690    /// data_locale.set_aux(aux);
691    /// assert_writeable_eq!(data_locale, "ar-EG-x-gbp");
692    ///
693    /// let maybe_aux = data_locale.remove_aux();
694    /// assert_writeable_eq!(data_locale, "ar-EG");
695    /// assert_writeable_eq!(maybe_aux.unwrap(), "gbp");
696    /// ```
697    #[cfg(feature = "experimental")]
698    pub fn remove_aux(&mut self) -> Option<AuxiliaryKeys> {
699        self.aux.take()
700    }
701}
702
703/// The "auxiliary key" is an annotation on [`DataLocale`] that can contain an arbitrary
704/// information that does not fit into the [`LanguageIdentifier`] or [`Keywords`].
705///
706/// A [`DataLocale`] can have multiple auxiliary keys, represented by this struct. The auxiliary
707/// keys are stored as private use subtags following `-x-`.
708///
709/// An auxiliary key currently allows 1-8 lowercase alphanumerics.
710///
711/// <div class="stab unstable">
712/// 🚧 This code is experimental; it may change at any time, in breaking or non-breaking ways,
713/// including in SemVer minor releases. It can be enabled with the "experimental" Cargo feature
714/// of the `icu_provider` crate. Use with caution.
715/// <a href="https://github.com/unicode-org/icu4x/issues/3632">#3632</a>
716/// </div>
717///
718/// # Examples
719///
720/// ```
721/// use icu_locid::langid;
722/// use icu_provider::prelude::*;
723/// use writeable::assert_writeable_eq;
724///
725/// let mut data_locale: DataLocale = langid!("ar-EG").into();
726/// assert_writeable_eq!(data_locale, "ar-EG");
727/// assert!(!data_locale.has_aux());
728/// assert_eq!(data_locale.get_aux(), None);
729///
730/// let aux = "gbp"
731///     .parse::<AuxiliaryKeys>()
732///     .expect("contains valid characters");
733///
734/// data_locale.set_aux(aux);
735/// assert_writeable_eq!(data_locale, "ar-EG-x-gbp");
736/// assert!(data_locale.has_aux());
737/// assert_eq!(data_locale.get_aux(), Some(&"gbp".parse().unwrap()));
738/// ```
739///
740/// Multiple auxiliary keys are allowed:
741///
742/// ```
743/// use icu_provider::prelude::*;
744/// use writeable::assert_writeable_eq;
745///
746/// let data_locale = "ar-EG-x-gbp-long".parse::<DataLocale>().unwrap();
747/// assert_writeable_eq!(data_locale, "ar-EG-x-gbp-long");
748/// assert_eq!(data_locale.get_aux().unwrap().iter().count(), 2);
749/// ```
750///
751/// Not all strings are valid auxiliary keys.
752/// The string must be well-formed and case-normalized:
753///
754/// ```
755/// use icu_provider::prelude::*;
756///
757/// assert!("abcdefg".parse::<AuxiliaryKeys>().is_ok());
758/// assert!("abc-xyz".parse::<AuxiliaryKeys>().is_ok());
759///
760/// assert!("".parse::<AuxiliaryKeys>().is_err());
761/// assert!("!@#$%".parse::<AuxiliaryKeys>().is_err());
762/// assert!("abc_xyz".parse::<AuxiliaryKeys>().is_err());
763/// assert!("ABC123".parse::<AuxiliaryKeys>().is_err());
764/// ```
765///
766/// [`Keywords`]: unicode_ext::Keywords
767#[derive(Debug, PartialEq, Clone, Eq, Hash, PartialOrd, Ord)]
768#[cfg(feature = "experimental")]
769pub struct AuxiliaryKeys {
770    value: AuxiliaryKeysInner,
771}
772
773#[cfg(feature = "experimental")]
774#[derive(Clone)]
775enum AuxiliaryKeysInner {
776    Boxed(alloc::boxed::Box<str>),
777    Stack(TinyAsciiStr<23>),
778    // NOTE: In the future, a `Static` variant could be added to allow `data_locale!("...")`
779    // Static(&'static str),
780}
781
782#[cfg(feature = "experimental")]
783impl Deref for AuxiliaryKeysInner {
784    type Target = str;
785    #[inline]
786    fn deref(&self) -> &Self::Target {
787        match self {
788            Self::Boxed(s) => s.deref(),
789            Self::Stack(s) => s.as_str(),
790        }
791    }
792}
793
794#[cfg(feature = "experimental")]
795impl PartialEq for AuxiliaryKeysInner {
796    #[inline]
797    fn eq(&self, other: &Self) -> bool {
798        self.deref() == other.deref()
799    }
800}
801
802#[cfg(feature = "experimental")]
803impl Eq for AuxiliaryKeysInner {}
804
805#[cfg(feature = "experimental")]
806impl PartialOrd for AuxiliaryKeysInner {
807    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
808        Some(self.cmp(other))
809    }
810}
811
812#[cfg(feature = "experimental")]
813impl Ord for AuxiliaryKeysInner {
814    fn cmp(&self, other: &Self) -> Ordering {
815        self.deref().cmp(other.deref())
816    }
817}
818
819#[cfg(feature = "experimental")]
820impl Debug for AuxiliaryKeysInner {
821    #[inline]
822    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
823        self.deref().fmt(f)
824    }
825}
826
827#[cfg(feature = "experimental")]
828impl Hash for AuxiliaryKeysInner {
829    #[inline]
830    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
831        self.deref().hash(state)
832    }
833}
834
835#[cfg(feature = "experimental")]
836writeable::impl_display_with_writeable!(AuxiliaryKeys);
837
838#[cfg(feature = "experimental")]
839impl Writeable for AuxiliaryKeys {
840    fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
841        self.value.write_to(sink)
842    }
843    fn writeable_length_hint(&self) -> LengthHint {
844        self.value.writeable_length_hint()
845    }
846    fn write_to_string(&self) -> alloc::borrow::Cow<str> {
847        self.value.write_to_string()
848    }
849}
850
851#[cfg(feature = "experimental")]
852impl FromStr for AuxiliaryKeys {
853    type Err = DataError;
854
855    fn from_str(s: &str) -> Result<Self, Self::Err> {
856        if !s.is_empty()
857            && s.split(Self::separator()).all(|b| {
858                if let Ok(subtag) = Subtag::from_str(b) {
859                    // Enforces normalization:
860                    b == subtag.as_str()
861                } else {
862                    false
863                }
864            })
865        {
866            if s.len() <= 23 {
867                #[allow(clippy::unwrap_used)] // we just checked that the string is ascii
868                Ok(Self {
869                    value: AuxiliaryKeysInner::Stack(s.parse().unwrap()),
870                })
871            } else {
872                Ok(Self {
873                    value: AuxiliaryKeysInner::Boxed(s.into()),
874                })
875            }
876        } else {
877            Err(DataErrorKind::KeyLocaleSyntax
878                .into_error()
879                .with_display_context(s))
880        }
881    }
882}
883
884#[cfg(feature = "experimental")]
885impl AuxiliaryKeys {
886    /// Creates an [`AuxiliaryKeys`] from an iterator of individual keys.
887    ///
888    /// # Examples
889    ///
890    /// ```
891    /// use icu_locid::extensions::private::subtag;
892    /// use icu_provider::prelude::*;
893    ///
894    /// // Single auxiliary key:
895    /// let a = AuxiliaryKeys::try_from_iter([subtag!("abc")]).unwrap();
896    /// let b = "abc".parse::<AuxiliaryKeys>().unwrap();
897    /// assert_eq!(a, b);
898    ///
899    /// // Multiple auxiliary keys:
900    /// let a = AuxiliaryKeys::try_from_iter([subtag!("abc"), subtag!("defg")])
901    ///     .unwrap();
902    /// let b = "abc-defg".parse::<AuxiliaryKeys>().unwrap();
903    /// assert_eq!(a, b);
904    /// ```
905    ///
906    /// The iterator can't be empty:
907    ///
908    /// ```
909    /// use icu_provider::prelude::*;
910    ///
911    /// assert!(AuxiliaryKeys::try_from_iter([]).is_err());
912    /// ```
913    pub fn try_from_iter(iter: impl IntoIterator<Item = Subtag>) -> Result<Self, DataError> {
914        // TODO: Avoid the allocation when possible
915        let mut builder = String::new();
916        for item in iter {
917            if !builder.is_empty() {
918                builder.push(AuxiliaryKeys::separator());
919            }
920            builder.push_str(item.as_str())
921        }
922        if builder.is_empty() {
923            return Err(DataErrorKind::KeyLocaleSyntax.with_str_context("empty aux iterator"));
924        }
925        if builder.len() <= 23 {
926            #[allow(clippy::unwrap_used)] // we just checked that the string is ascii
927            Ok(Self {
928                value: AuxiliaryKeysInner::Stack(builder.parse().unwrap()),
929            })
930        } else {
931            Ok(Self {
932                value: AuxiliaryKeysInner::Boxed(builder.into()),
933            })
934        }
935    }
936
937    /// Creates an [`AuxiliaryKeys`] from a single subtag.
938    ///
939    /// # Examples
940    ///
941    /// ```
942    /// use icu_locid::extensions::private::subtag;
943    /// use icu_provider::prelude::*;
944    ///
945    /// // Single auxiliary key:
946    /// let a = AuxiliaryKeys::from_subtag(subtag!("abc"));
947    /// let b = "abc".parse::<AuxiliaryKeys>().unwrap();
948    /// assert_eq!(a, b);
949    /// ```
950    pub const fn from_subtag(input: Subtag) -> Self {
951        Self {
952            value: AuxiliaryKeysInner::Stack(input.into_tinystr().resize()),
953        }
954    }
955
956    /// Iterates over the components of the auxiliary key.
957    ///
958    /// # Example
959    ///
960    /// ```
961    /// use icu_locid::extensions::private::subtag;
962    /// use icu_provider::AuxiliaryKeys;
963    ///
964    /// let aux: AuxiliaryKeys = "abc-defg".parse().unwrap();
965    /// assert_eq!(
966    ///     aux.iter().collect::<Vec<_>>(),
967    ///     vec![subtag!("abc"), subtag!("defg")]
968    /// );
969    /// ```
970    pub fn iter(&self) -> impl Iterator<Item = Subtag> + '_ {
971        self.value
972            .split(Self::separator())
973            .filter_map(|x| match x.parse() {
974                Ok(x) => Some(x),
975                Err(_) => {
976                    debug_assert!(false, "failed to convert to subtag: {x}");
977                    None
978                }
979            })
980    }
981
982    /// Returns the internal separator byte used for auxiliary keys in data locales.
983    ///
984    /// This is, according to BCP-47, an ASCII hyphen.
985    #[inline]
986    pub(crate) const fn separator() -> char {
987        '-'
988    }
989}
990
991#[cfg(feature = "experimental")]
992impl From<Subtag> for AuxiliaryKeys {
993    fn from(subtag: Subtag) -> Self {
994        #[allow(clippy::expect_used)] // subtags definitely fit within auxiliary keys
995        Self {
996            value: AuxiliaryKeysInner::Stack(
997                TinyAsciiStr::from_bytes(subtag.as_str().as_bytes())
998                    .expect("Subtags are capped to 8 elements, AuxiliaryKeys supports up to 23"),
999            ),
1000        }
1001    }
1002}
1003
1004#[test]
1005fn test_data_locale_to_string() {
1006    struct TestCase {
1007        pub locale: &'static str,
1008        pub aux: Option<&'static str>,
1009        pub expected: &'static str,
1010    }
1011
1012    for cas in [
1013        TestCase {
1014            locale: "und",
1015            aux: None,
1016            expected: "und",
1017        },
1018        TestCase {
1019            locale: "und-u-cu-gbp",
1020            aux: None,
1021            expected: "und-u-cu-gbp",
1022        },
1023        TestCase {
1024            locale: "en-ZA-u-cu-gbp",
1025            aux: None,
1026            expected: "en-ZA-u-cu-gbp",
1027        },
1028        #[cfg(feature = "experimental")]
1029        TestCase {
1030            locale: "en-ZA-u-nu-arab",
1031            aux: Some("gbp"),
1032            expected: "en-ZA-u-nu-arab-x-gbp",
1033        },
1034    ] {
1035        let mut locale = cas.locale.parse::<DataLocale>().unwrap();
1036        #[cfg(feature = "experimental")]
1037        if let Some(aux) = cas.aux {
1038            locale.set_aux(aux.parse().unwrap());
1039        }
1040        writeable::assert_writeable_eq!(locale, cas.expected);
1041    }
1042}
1043
1044#[test]
1045fn test_data_locale_from_string() {
1046    #[derive(Debug)]
1047    struct TestCase {
1048        pub input: &'static str,
1049        pub success: bool,
1050    }
1051
1052    for cas in [
1053        TestCase {
1054            input: "und",
1055            success: true,
1056        },
1057        TestCase {
1058            input: "und-u-cu-gbp",
1059            success: true,
1060        },
1061        TestCase {
1062            input: "en-ZA-u-cu-gbp",
1063            success: true,
1064        },
1065        TestCase {
1066            input: "en...",
1067            success: false,
1068        },
1069        #[cfg(feature = "experimental")]
1070        TestCase {
1071            input: "en-ZA-u-nu-arab-x-gbp",
1072            success: true,
1073        },
1074        #[cfg(not(feature = "experimental"))]
1075        TestCase {
1076            input: "en-ZA-u-nu-arab-x-gbp",
1077            success: false,
1078        },
1079    ] {
1080        let data_locale = match (DataLocale::from_str(cas.input), cas.success) {
1081            (Ok(l), true) => l,
1082            (Err(_), false) => {
1083                continue;
1084            }
1085            (Ok(_), false) => {
1086                panic!("DataLocale parsed but it was supposed to fail: {cas:?}");
1087            }
1088            (Err(_), true) => {
1089                panic!("DataLocale was supposed to parse but it failed: {cas:?}");
1090            }
1091        };
1092        writeable::assert_writeable_eq!(data_locale, cas.input);
1093    }
1094}