icu_locid/extensions/unicode/
value.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::parser::{ParserError, SubtagIterator};
6use crate::shortvec::ShortBoxSlice;
7use core::ops::RangeInclusive;
8use core::str::FromStr;
9use tinystr::TinyAsciiStr;
10
11/// A value used in a list of [`Keywords`](super::Keywords).
12///
13/// The value has to be a sequence of one or more alphanumerical strings
14/// separated by `-`.
15/// Each part of the sequence has to be no shorter than three characters and no
16/// longer than 8.
17///
18///
19/// # Examples
20///
21/// ```
22/// use icu::locid::extensions::unicode::{value, Value};
23/// use writeable::assert_writeable_eq;
24///
25/// assert_writeable_eq!(value!("gregory"), "gregory");
26/// assert_writeable_eq!(
27///     "islamic-civil".parse::<Value>().unwrap(),
28///     "islamic-civil"
29/// );
30///
31/// // The value "true" has the special, empty string representation
32/// assert_eq!(value!("true").to_string(), "");
33/// ```
34#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Default)]
35pub struct Value(ShortBoxSlice<TinyAsciiStr<{ *VALUE_LENGTH.end() }>>);
36
37const VALUE_LENGTH: RangeInclusive<usize> = 3..=8;
38const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");
39
40impl Value {
41    /// A constructor which takes a utf8 slice, parses it and
42    /// produces a well-formed [`Value`].
43    ///
44    /// # Examples
45    ///
46    /// ```
47    /// use icu::locid::extensions::unicode::Value;
48    ///
49    /// Value::try_from_bytes(b"buddhist").expect("Parsing failed.");
50    /// ```
51    pub fn try_from_bytes(input: &[u8]) -> Result<Self, ParserError> {
52        let mut v = ShortBoxSlice::new();
53
54        if !input.is_empty() {
55            for subtag in SubtagIterator::new(input) {
56                let val = Self::subtag_from_bytes(subtag)?;
57                if let Some(val) = val {
58                    v.push(val);
59                }
60            }
61        }
62        Ok(Self(v))
63    }
64
65    /// Const constructor for when the value contains only a single subtag.
66    ///
67    /// # Examples
68    ///
69    /// ```
70    /// use icu::locid::extensions::unicode::Value;
71    ///
72    /// Value::try_from_single_subtag(b"buddhist").expect("valid subtag");
73    /// Value::try_from_single_subtag(b"#####").expect_err("invalid subtag");
74    /// Value::try_from_single_subtag(b"foo-bar").expect_err("not a single subtag");
75    /// ```
76    pub const fn try_from_single_subtag(subtag: &[u8]) -> Result<Self, ParserError> {
77        match Self::subtag_from_bytes(subtag) {
78            Err(_) => Err(ParserError::InvalidExtension),
79            Ok(option) => Ok(Self::from_tinystr(option)),
80        }
81    }
82
83    #[doc(hidden)]
84    pub fn as_tinystr_slice(&self) -> &[TinyAsciiStr<8>] {
85        &self.0
86    }
87
88    #[doc(hidden)]
89    pub const fn as_single_subtag(&self) -> Option<&TinyAsciiStr<8>> {
90        self.0.single()
91    }
92
93    #[doc(hidden)]
94    pub const fn from_tinystr(subtag: Option<TinyAsciiStr<8>>) -> Self {
95        match subtag {
96            None => Self(ShortBoxSlice::new()),
97            Some(val) => {
98                debug_assert!(val.is_ascii_alphanumeric());
99                debug_assert!(!matches!(val, TRUE_VALUE));
100                Self(ShortBoxSlice::new_single(val))
101            }
102        }
103    }
104
105    pub(crate) fn from_short_slice_unchecked(input: ShortBoxSlice<TinyAsciiStr<8>>) -> Self {
106        Self(input)
107    }
108
109    #[doc(hidden)]
110    pub const fn subtag_from_bytes(bytes: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
111        Self::parse_subtag_from_bytes_manual_slice(bytes, 0, bytes.len())
112    }
113
114    pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
115        Self::parse_subtag_from_bytes_manual_slice(t, 0, t.len())
116    }
117
118    pub(crate) const fn parse_subtag_from_bytes_manual_slice(
119        bytes: &[u8],
120        start: usize,
121        end: usize,
122    ) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
123        let slice_len = end - start;
124        if slice_len > *VALUE_LENGTH.end() || slice_len < *VALUE_LENGTH.start() {
125            return Err(ParserError::InvalidExtension);
126        }
127
128        match TinyAsciiStr::from_bytes_manual_slice(bytes, start, end) {
129            Ok(TRUE_VALUE) => Ok(None),
130            Ok(s) if s.is_ascii_alphanumeric() => Ok(Some(s.to_ascii_lowercase())),
131            Ok(_) => Err(ParserError::InvalidExtension),
132            Err(_) => Err(ParserError::InvalidSubtag),
133        }
134    }
135
136    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
137    where
138        F: FnMut(&str) -> Result<(), E>,
139    {
140        self.0.iter().map(TinyAsciiStr::as_str).try_for_each(f)
141    }
142}
143
144impl FromStr for Value {
145    type Err = ParserError;
146
147    fn from_str(source: &str) -> Result<Self, Self::Err> {
148        Self::try_from_bytes(source.as_bytes())
149    }
150}
151
152impl_writeable_for_subtag_list!(Value, "islamic", "civil");
153
154/// A macro allowing for compile-time construction of valid Unicode [`Value`] subtag.
155///
156/// The macro only supports single-subtag values.
157///
158/// # Examples
159///
160/// ```
161/// use icu::locid::extensions::unicode::{key, value};
162/// use icu::locid::Locale;
163///
164/// let loc: Locale = "de-u-ca-buddhist".parse().unwrap();
165///
166/// assert_eq!(
167///     loc.extensions.unicode.keywords.get(&key!("ca")),
168///     Some(&value!("buddhist"))
169/// );
170/// ```
171///
172/// [`Value`]: crate::extensions::unicode::Value
173#[macro_export]
174#[doc(hidden)]
175macro_rules! extensions_unicode_value {
176    ($value:literal) => {{
177        // What we want:
178        // const R: $crate::extensions::unicode::Value =
179        //     match $crate::extensions::unicode::Value::try_from_single_subtag($value.as_bytes()) {
180        //         Ok(r) => r,
181        //         #[allow(clippy::panic)] // const context
182        //         _ => panic!(concat!("Invalid Unicode extension value: ", $value)),
183        //     };
184        // Workaround until https://github.com/rust-lang/rust/issues/73255 lands:
185        const R: $crate::extensions::unicode::Value =
186            $crate::extensions::unicode::Value::from_tinystr(
187                match $crate::extensions::unicode::Value::subtag_from_bytes($value.as_bytes()) {
188                    Ok(r) => r,
189                    _ => panic!(concat!("Invalid Unicode extension value: ", $value)),
190                },
191            );
192        R
193    }};
194}
195#[doc(inline)]
196pub use extensions_unicode_value as value;