bytes_utils/string/
mod.rs

1//! [String]-like wrappers around [Bytes] and [BytesMut].
2//!
3//! The [Bytes] and [BytesMut] provide a buffer of bytes with ability to create owned slices into
4//! the same shared memory allocation. This allows cheap manipulation of data.
5//!
6//! Strings are mostly just byte buffers with extra APIs to manipulate them. The standard [String]
7//! type is built as a wrapper around [Vec]. We build similar wrappers around the [Bytes] and
8//! [BytesMut], gaining the ability to create owned shared slices for textual data as well.
9//!
10//! Users are expected to use the [Str] and [StrMut] types. Note that these are type aliases around
11//! the [StrInner] type. The latter is means to implement both in one go and contains all the
12//! documentation, but is not meant to be used directly.
13//!
14//! # Splitting
15//!
16//! The [prim@str] type from standard library (which the types here dereference to) allows for
17//! slicing and splitting in many convenient ways. They, however, return borrowed string slices
18//! (`&str`), which might pose some problems.
19//!
20//! The [Str], and to certain extent, the [StrMut] type additionally allow cheap splitting and
21//! slicing that produce owned [Str] and [StrMut] respectively. They are slightly more expensive
22//! than the slicing than the ones returning `&str`, but only by incrementing internal reference
23//! counts. They do not clone the actual string data, like `.to_owned()` on the standard library
24//! methods would. These methods are available in addition to the standard ones.
25//!
26//! There are three ways how this can be done:
27//!
28//! * By dedicated methods, like [lines_bytes][StrInner::lines_bytes] (in general, the name of the
29//!   standard method suffixed with `_bytes`).
30//! * By using the [BytesIter] iterator manually.
31//! * By using the standard-library methods, producing `&str` and translating it back to [Str] with
32//!   [slice][StrInner::slice] or [StrInner::slice_ref].
33//!
34//! # Examples
35//!
36//! ```rust
37//! # use bytes::Bytes;
38//! # use bytes_utils::{Str, StrMut};
39//! let mut builder = StrMut::new();
40//! builder += "Hello";
41//! builder.push(' ');
42//! builder.push_str("World");
43//! assert_eq!("Hello World", builder);
44//!
45//! let s1 = builder.split_built().freeze();
46//! // This is a cheap copy, in the form of incrementing a reference count.
47//! let s2 = s1.clone();
48//! assert_eq!("Hello World", s1);
49//! assert_eq!("Hello World", s2);
50//! // Slicing is cheap as well, even though the returned things are Str and therefore owned too.
51//! assert_eq!("ello", s1.slice(1..5));
52//! // We have taken the data out of the builder, but the rest of its capacity can be used for
53//! // further things.
54//! assert_eq!("", builder);
55//!
56//! // Creating from strings and similar works
57//! let a = Str::from("Hello");
58//! assert_eq!("Hello", a);
59//!
60//! let e = Str::new();
61//! assert_eq!("", e);
62//!
63//! // And from static str in O(1)
64//! let b = Str::from_static("World");
65//! assert_eq!("World", b);
66//!
67//! // And from Bytes too.
68//! let b = Str::try_from(Bytes::from_static(b"World")).expect("Must be utf8");
69//! assert_eq!("World", b);
70//! // Invalid utf8 is refused.
71//! Str::try_from(Bytes::from_static(&[0, 0, 255])).unwrap_err();
72//! ```
73
74use alloc::borrow::Cow;
75use alloc::boxed::Box;
76use alloc::string::String;
77use core::borrow::{Borrow, BorrowMut};
78use core::cmp::Ordering;
79use core::convert::Infallible;
80use core::fmt::{Debug, Display, Formatter, Result as FmtResult, Write};
81use core::hash::{Hash, Hasher};
82use core::iter::{self, FromIterator};
83use core::ops::{Add, AddAssign, Deref, DerefMut, Index, IndexMut};
84use core::str::{self, FromStr};
85
86#[cfg(feature = "std")]
87use std::error::Error;
88
89use bytes::{Bytes, BytesMut};
90use either::Either;
91
92#[cfg(feature = "serde")]
93mod serde_impl;
94
95/// Error when creating [Str] or [StrMut] from invalid UTF8 data.
96#[derive(Copy, Clone, Debug)]
97pub struct Utf8Error<S> {
98    e: core::str::Utf8Error,
99    inner: S,
100}
101
102impl<S> Utf8Error<S> {
103    /// Returns the byte buffer back to the caller.
104    pub fn into_inner(self) -> S {
105        self.inner
106    }
107
108    /// The inner description of why the data is invalid UTF8.
109    pub fn utf8_error(&self) -> str::Utf8Error {
110        self.e
111    }
112}
113
114impl<S> Display for Utf8Error<S> {
115    fn fmt(&self, fmt: &mut Formatter) -> FmtResult {
116        Display::fmt(&self.e, fmt)
117    }
118}
119
120#[cfg(feature = "std")]
121impl<S: Debug> Error for Utf8Error<S> {}
122
123/// Direction of iteration.
124///
125/// See [BytesIter].
126#[derive(Copy, Clone, Debug, Eq, PartialEq)]
127pub enum Direction {
128    /// Move forward (in the normal direction) in the string.
129    Forward,
130
131    /// Move backwards in the string.
132    Backward,
133}
134
135/// Manual splitting iterator.
136///
137/// The methods on [Str] and [StrMut] that iterate use this internally. But it can also be used
138/// manually to generate other iterators that split the original into parts.
139#[derive(Clone, Debug)]
140pub struct BytesIter<S, F> {
141    bytes: Option<S>,
142    extract: F,
143    direction: Direction,
144}
145
146impl<S, F> BytesIter<S, F>
147where
148    S: Storage,
149    F: FnMut(&str) -> Option<(usize, usize)>,
150{
151    /// A constructor of the iterator.
152    ///
153    /// The `direction` specifies in what order chunks should be yielded.
154    ///
155    /// The `ext` closure is always called with the rest of not yet split string. It shall return
156    /// the byte indices of the chunk and separator border. In case of forward iteration, it is the
157    /// end of them and the separator needs to end further to the string (or at the same position).
158    /// In the backwards direction, it is in reverse ‒ they specify their starts and the separator
159    /// is before the chunk.
160    ///
161    /// # Panics
162    ///
163    /// If the indices don't point at a character boundary, the iteration will panic. It'll also
164    /// panic if the returned indices are reversed or if they are out of bounds.
165    pub fn new(s: StrInner<S>, direction: Direction, ext: F) -> Self {
166        Self {
167            bytes: Some(s.0),
168            extract: ext,
169            direction,
170        }
171    }
172}
173
174impl<S, F> Iterator for BytesIter<S, F>
175where
176    S: Storage,
177    F: FnMut(&str) -> Option<(usize, usize)>,
178{
179    type Item = StrInner<S>;
180
181    fn next(&mut self) -> Option<StrInner<S>> {
182        let storage = self.bytes.take()?;
183        // Safety: we keep sure it is valid UTF8 on the API boundary.
184        let whole_str = unsafe { str::from_utf8_unchecked(storage.as_ref()) };
185        fn split<S: Storage>(storage: S, left: usize, right: usize) -> (S, S) {
186            let whole_str = unsafe { str::from_utf8_unchecked(storage.as_ref()) };
187            // Sanity-check we are not slicing in the middle of utf8 code point. This would
188            // panic if we do. It would also panic if we are out of range, which is also good.
189            assert!(whole_str.is_char_boundary(left));
190            assert!(whole_str.is_char_boundary(right));
191
192            // Now that we are sure this is legal, we are going to slice the byte data for real.
193            let (with_sep, end) = storage.split_at(right);
194            let (start, _sep) = with_sep.split_at(left);
195            (start, end)
196        }
197        match ((self.extract)(whole_str), self.direction) {
198            (Some((chunk_end, sep_end)), Direction::Forward) => {
199                assert!(chunk_end <= sep_end);
200                let (start, end) = split(storage, chunk_end, sep_end);
201
202                self.bytes = Some(end);
203                Some(StrInner(start))
204            }
205            (Some((chunk_start, sep_start)), Direction::Backward) => {
206                assert!(sep_start <= chunk_start);
207                let (start, end) = split(storage, sep_start, chunk_start);
208
209                self.bytes = Some(start);
210                Some(StrInner(end))
211            }
212            (None, _) => {
213                // No separator found -> return the whole rest (and keep None in ourselves)
214                Some(StrInner(storage))
215            }
216        }
217    }
218}
219
220/// Find a separator position, for use with the [BytesIter].
221fn sep_find<F: Fn(char) -> bool>(s: &str, is_sep: F) -> Option<(usize, usize)> {
222    let sep_start = s.find(&is_sep)?;
223    let sep_end = s[sep_start..]
224        .find(|c| !is_sep(c))
225        .map(|e| e + sep_start)
226        .unwrap_or_else(|| s.len());
227    Some((sep_start, sep_end))
228}
229
230/// Separator for an empty pattern.
231fn empty_sep(s: &str, limit: usize) -> Option<(usize, usize)> {
232    let char_end = s
233        .char_indices()
234        .skip(1)
235        .map(|(i, _)| i)
236        .chain(iter::once(s.len()).take((!s.is_empty()) as usize))
237        .take(limit)
238        .next()?;
239    Some((char_end, char_end))
240}
241
242fn rempty_sep(s: &str, limit: usize) -> Option<(usize, usize)> {
243    let char_start = s.char_indices().rev().map(|(i, _)| i).take(limit).next()?;
244    Some((char_start, char_start))
245}
246
247/// The backing storage for [StrInner]
248///
249/// This is currently a technical detail of the crate, users are not expected to implement this
250/// trait. Use [Str] or [StrMut] type aliases.
251///
252/// # Safety
253///
254/// The storage must act "sane". But what exactly it means is not yet analyzed and may change in
255/// future versions. Don't implement the trait (at least not yet).
256pub unsafe trait Storage: AsRef<[u8]> + Default + Sized {
257    /// A type that can be used to build the storage incrementally.
258    ///
259    /// For mutable storages, it may be itself. For immutable one, there needs to be a mutable
260    /// counterpart that can be converted to immutable later on.
261    type Creator: Default + StorageMut;
262
263    /// Converts the creator (mutable storage) to self.
264    ///
265    /// In case of mutable storages, this should be identity.
266    fn from_creator(creator: Self::Creator) -> Self;
267
268    /// Splits the storage at the given byte index and creates two non-overlapping instances.
269    fn split_at(self, at: usize) -> (Self, Self);
270}
271
272unsafe impl Storage for Bytes {
273    type Creator = BytesMut;
274    fn from_creator(creator: Self::Creator) -> Self {
275        creator.freeze()
276    }
277    fn split_at(mut self, at: usize) -> (Self, Self) {
278        let right = self.split_off(at);
279        (self, right)
280    }
281}
282
283unsafe impl Storage for BytesMut {
284    type Creator = BytesMut;
285    fn from_creator(creator: Self::Creator) -> Self {
286        creator
287    }
288    fn split_at(mut self, at: usize) -> (Self, Self) {
289        let right = self.split_off(at);
290        (self, right)
291    }
292}
293
294/// Trait for extra functionality of a mutable storage.
295///
296/// This is in addition to what an immutable storage must satisfy.
297///
298/// # Safety
299///
300/// The storage must act "sane". But what exactly it means is not yet analyzed and may change in
301/// future versions. Don't implement the trait (at least not yet).
302pub unsafe trait StorageMut: Storage + AsMut<[u8]> {
303    /// An immutable counter-part storage.
304    type Immutable: Storage<Creator = Self>;
305
306    /// Adds some more bytes to the end of the storage.
307    fn push_slice(&mut self, s: &[u8]);
308}
309
310unsafe impl StorageMut for BytesMut {
311    type Immutable = Bytes;
312    fn push_slice(&mut self, s: &[u8]) {
313        self.extend_from_slice(s)
314    }
315}
316
317/// Implementation of the [Str] and [StrMut] types.
318///
319/// For technical reasons, both are implemented in one go as this type. For the same reason, most
320/// of the documentation can be found here. Users are expected to use the [Str] and [StrMut]
321/// instead.
322#[derive(Clone, Default)]
323pub struct StrInner<S>(S);
324
325impl<S: Storage> StrInner<S> {
326    /// Creates an empty instance.
327    pub fn new() -> Self {
328        Self::default()
329    }
330
331    /// Extracts the inner byte storage.
332    pub fn into_inner(self) -> S {
333        self.0
334    }
335
336    /// Access to the inner storage.
337    pub fn inner(&self) -> &S {
338        &self.0
339    }
340
341    /// Creates an instance from an existing byte storage.
342    ///
343    /// It may fail if the content is not valid UTF8.
344    ///
345    /// A [try_from][TryFrom::try_from] may be used instead.
346    pub fn from_inner(s: S) -> Result<Self, Utf8Error<S>> {
347        match str::from_utf8(s.as_ref()) {
348            Ok(_) => Ok(Self(s)),
349            Err(e) => Err(Utf8Error { e, inner: s }),
350        }
351    }
352
353    /// Same as [from_inner][StrInner::from_inner], but without the checks.
354    ///
355    /// # Safety
356    ///
357    /// The caller must ensure content is valid UTF8.
358    pub const unsafe fn from_inner_unchecked(s: S) -> Self {
359        Self(s)
360    }
361
362    /// Splits the string into two at the given index.
363    ///
364    /// # Panics
365    ///
366    /// If the index is not at char boundary.
367    pub fn split_at_bytes(self, at: usize) -> (Self, Self) {
368        assert!(self.deref().is_char_boundary(at));
369        let (l, r) = self.0.split_at(at);
370        (Self(l), Self(r))
371    }
372
373    /// Splits into whitespace separated "words".
374    ///
375    /// This acts like [split_whitespace][str::split_whitespace], but yields owned instances. It
376    /// doesn't clone the content, it just increments some reference counts.
377    pub fn split_whitespace_bytes(self) -> impl Iterator<Item = Self> {
378        BytesIter::new(self, Direction::Forward, |s| {
379            sep_find(s, char::is_whitespace)
380        })
381        .filter(|s| !s.is_empty())
382    }
383
384    /// Splits into whitespace separated "words".
385    ///
386    /// This acts like [split_ascii_whitespace][str::split_ascii_whitespace], but yields owned
387    /// instances. This doesn't clone the content, it just increments some reference counts.
388    pub fn split_ascii_whitespace_bytes(self) -> impl Iterator<Item = Self> {
389        BytesIter::new(self, Direction::Forward, |s| {
390            sep_find(s, |c| c.is_ascii() && (c as u8).is_ascii_whitespace())
391        })
392        .filter(|s| !s.is_empty())
393    }
394
395    /// Splits into lines.
396    ///
397    /// This acts like [lines][str::lines], but yields owned instances. The content is not cloned,
398    /// this just increments some reference counts.
399    pub fn lines_bytes(self) -> impl Iterator<Item = Self> {
400        if self.is_empty() {
401            Either::Left(iter::empty())
402        } else {
403            let iter = BytesIter::new(self, Direction::Forward, |s| sep_find(s, |c| c == '\n'))
404                .map(|s| match s.chars().next() {
405                    Some('\r') => s.split_at_bytes(1).1,
406                    _ => s,
407                });
408            Either::Right(iter)
409        }
410    }
411
412    /// Splits with the provided separator.
413    ///
414    /// This acts somewhat like [split][str::split], but yields owned instances. Also, it accepts
415    /// only string patters (since the `Pattern` is not stable ☹). The content is not cloned, this
416    /// just increments some reference counts.
417    pub fn split_bytes<'s>(self, sep: &'s str) -> impl Iterator<Item = Self> + 's
418    where
419        S: 's,
420    {
421        if sep.is_empty() {
422            let bulk = BytesIter::new(self, Direction::Forward, |s| empty_sep(s, usize::MAX));
423            Either::Left(iter::once(Self::default()).chain(bulk))
424        } else {
425            let sep_find = move |s: &str| s.find(sep).map(|pos| (pos, pos + sep.len()));
426            Either::Right(BytesIter::new(self, Direction::Forward, sep_find))
427        }
428    }
429
430    /// Splits max. `n` times according to the given pattern.
431    ///
432    /// This acts somewhat like [splitn][str::splitn], but yields owned instances. Also, it accepts
433    /// only string patters (since the `Pattern` is not stable ☹). The content is not cloned, this
434    /// just increments some reference counts.
435    pub fn splitn_bytes<'s>(self, mut n: usize, sep: &'s str) -> impl Iterator<Item = Self> + 's
436    where
437        S: 's,
438    {
439        // TODO: This seems to work, but is ugly. Any idea how to simplify?
440        if sep.is_empty() {
441            if n <= 1 {
442                Either::Left(Either::Left(iter::once(self).take(n)))
443            } else {
444                n -= 1;
445                let bulk = BytesIter::new(self, Direction::Forward, move |s| {
446                    n -= 1;
447                    empty_sep(s, n)
448                });
449                Either::Left(Either::Right(iter::once(Self::default()).chain(bulk)))
450            }
451        } else {
452            let sep_find = move |s: &str| {
453                n -= 1;
454                if n == 0 {
455                    None
456                } else {
457                    s.find(sep).map(|pos| (pos, pos + sep.len()))
458                }
459            };
460            Either::Right(BytesIter::new(self, Direction::Forward, sep_find).take(n))
461        }
462    }
463
464    /// A reverse version of [split_bytes][Self::split_bytes].
465    pub fn rsplit_bytes<'s>(self, sep: &'s str) -> impl Iterator<Item = Self> + 's
466    where
467        S: 's,
468    {
469        if sep.is_empty() {
470            let bulk = BytesIter::new(self, Direction::Backward, |s| rempty_sep(s, usize::MAX));
471            Either::Left(iter::once(Self::default()).chain(bulk))
472        } else {
473            let sep_find = move |s: &str| s.rfind(sep).map(|pos| (pos + sep.len(), pos));
474            Either::Right(BytesIter::new(self, Direction::Backward, sep_find))
475        }
476    }
477
478    /// A reverse version of [splitn_bytes][Self::splitn_bytes].
479    pub fn rsplitn_bytes<'s>(self, mut n: usize, sep: &'s str) -> impl Iterator<Item = Self> + 's
480    where
481        S: 's,
482    {
483        // TODO: This seems to work, but is ugly. Any idea how to simplify?
484        if sep.is_empty() {
485            if n <= 1 {
486                Either::Left(Either::Left(iter::once(self).take(n)))
487            } else {
488                n -= 1;
489                let bulk = BytesIter::new(self, Direction::Backward, move |s| {
490                    n -= 1;
491                    rempty_sep(s, n)
492                });
493                Either::Left(Either::Right(iter::once(Self::default()).chain(bulk)))
494            }
495        } else {
496            let sep_find = move |s: &str| {
497                n -= 1;
498                if n == 0 {
499                    None
500                } else {
501                    s.rfind(sep).map(|pos| (pos + sep.len(), pos))
502                }
503            };
504            Either::Right(BytesIter::new(self, Direction::Backward, sep_find).take(n))
505        }
506    }
507}
508
509impl<S: StorageMut> StrInner<S> {
510    /// Appends a string.
511    pub fn push_str(&mut self, s: &str) {
512        self.0.push_slice(s.as_bytes());
513    }
514
515    /// Appends one character.
516    pub fn push(&mut self, c: char) {
517        self.push_str(c.encode_utf8(&mut [0; 4]));
518    }
519
520    /// Provides mutable access to the inner buffer.
521    ///
522    /// # Safety
523    ///
524    /// The caller must ensure that the content stays valid UTF8.
525    pub unsafe fn inner_mut(&mut self) -> &mut S {
526        &mut self.0
527    }
528
529    /// Turns the mutable variant into an immutable one.
530    ///
531    /// The advantage is that it can then be shared (also by small parts).
532    pub fn freeze(self) -> StrInner<S::Immutable> {
533        StrInner(S::Immutable::from_creator(self.0))
534    }
535}
536
537impl<S: Storage> Deref for StrInner<S> {
538    type Target = str;
539
540    fn deref(&self) -> &str {
541        unsafe { str::from_utf8_unchecked(self.0.as_ref()) }
542    }
543}
544
545impl<S: StorageMut> DerefMut for StrInner<S> {
546    fn deref_mut(&mut self) -> &mut str {
547        unsafe { str::from_utf8_unchecked_mut(self.0.as_mut()) }
548    }
549}
550
551impl<S, T> AsRef<T> for StrInner<S>
552where
553    S: Storage,
554    str: AsRef<T>,
555{
556    fn as_ref(&self) -> &T {
557        self.deref().as_ref()
558    }
559}
560
561impl<S: StorageMut> AsMut<str> for StrInner<S> {
562    fn as_mut(&mut self) -> &mut str {
563        self.deref_mut()
564    }
565}
566
567impl<S: Storage> Borrow<str> for StrInner<S> {
568    fn borrow(&self) -> &str {
569        self.deref()
570    }
571}
572
573impl<S: StorageMut> BorrowMut<str> for StrInner<S> {
574    fn borrow_mut(&mut self) -> &mut str {
575        self.deref_mut()
576    }
577}
578
579impl<S: Storage> Debug for StrInner<S> {
580    fn fmt(&self, fmt: &mut Formatter) -> FmtResult {
581        Debug::fmt(self.deref(), fmt)
582    }
583}
584
585impl<S: Storage> Display for StrInner<S> {
586    fn fmt(&self, fmt: &mut Formatter) -> FmtResult {
587        Display::fmt(self.deref(), fmt)
588    }
589}
590
591impl<S: Storage> Hash for StrInner<S> {
592    fn hash<H: Hasher>(&self, state: &mut H) {
593        self.deref().hash(state)
594    }
595}
596
597impl<S, I> Index<I> for StrInner<S>
598where
599    S: Storage,
600    str: Index<I>,
601{
602    type Output = <str as Index<I>>::Output;
603
604    fn index(&self, index: I) -> &Self::Output {
605        self.deref().index(index)
606    }
607}
608
609impl<S, I> IndexMut<I> for StrInner<S>
610where
611    S: StorageMut,
612    str: IndexMut<I>,
613{
614    fn index_mut(&mut self, index: I) -> &mut Self::Output {
615        self.deref_mut().index_mut(index)
616    }
617}
618
619impl<S: StorageMut> Add<&str> for StrInner<S> {
620    type Output = Self;
621
622    fn add(mut self, rhs: &str) -> Self::Output {
623        self.push_str(rhs);
624        self
625    }
626}
627
628impl<S: StorageMut> AddAssign<&str> for StrInner<S> {
629    fn add_assign(&mut self, rhs: &str) {
630        self.push_str(rhs);
631    }
632}
633
634impl<S: StorageMut> Extend<char> for StrInner<S> {
635    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
636        for c in iter {
637            self.push(c);
638        }
639    }
640}
641
642impl<'a, S: StorageMut> Extend<&'a char> for StrInner<S> {
643    fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
644        for c in iter {
645            self.push(*c);
646        }
647    }
648}
649
650macro_rules! impl_extend {
651    ($ty:ty $(, $lifetimes:lifetime )* ) => {
652        impl<$($lifetimes, )* S: StorageMut> Extend<$ty> for StrInner<S> {
653            fn extend<T: IntoIterator<Item = $ty>>(&mut self, iter: T) {
654                for i in iter {
655                    self.push_str(i.as_ref());
656                }
657            }
658        }
659
660        impl<$($lifetimes, )* S> FromIterator<$ty> for StrInner<S>
661        where
662            S: Storage,
663        {
664            fn from_iter<T: IntoIterator<Item = $ty>>(iter: T) -> Self {
665                let mut creator = StrInner(S::Creator::default());
666                creator.extend(iter);
667                StrInner(S::from_creator(creator.0))
668            }
669        }
670    };
671}
672
673impl_extend!(String);
674impl_extend!(Box<str>);
675impl_extend!(&'a String, 'a);
676impl_extend!(&'a str, 'a);
677impl_extend!(Cow<'a, str>, 'a);
678
679macro_rules! impl_from {
680    ($ty:ty $(, $lifetimes:lifetime )* ) => {
681        impl<$($lifetimes, )* S> From<$ty> for StrInner<S>
682        where
683            S: Storage,
684        {
685            fn from(s: $ty) -> Self {
686                iter::once(s).collect()
687            }
688        }
689    };
690}
691
692impl_from!(&'a String, 'a);
693impl_from!(&'a str, 'a);
694impl_from!(Cow<'a, str>, 'a);
695
696impl From<String> for Str {
697    fn from(s: String) -> Self {
698        let inner = Bytes::from(s.into_bytes());
699        // Safety: inner is constructed from a str
700        unsafe { Str::from_inner_unchecked(inner) }
701    }
702}
703
704impl From<Box<str>> for Str {
705    fn from(s: Box<str>) -> Self {
706        let s: Box<[u8]> = s.into();
707        let inner = Bytes::from(s);
708        // Safety: inner is constructed from a str
709        unsafe { Str::from_inner_unchecked(inner) }
710    }
711}
712
713macro_rules! impl_try_from {
714    ($ty: ty) => {
715        impl TryFrom<$ty> for StrInner<$ty> {
716            type Error = Utf8Error<$ty>;
717            fn try_from(s: $ty) -> Result<Self, Utf8Error<$ty>> {
718                Self::from_inner(s)
719            }
720        }
721
722        impl From<StrInner<$ty>> for $ty {
723            fn from(s: StrInner<$ty>) -> $ty {
724                s.0
725            }
726        }
727    };
728}
729
730impl_try_from!(Bytes);
731impl_try_from!(BytesMut);
732
733impl From<StrMut> for Str {
734    fn from(s: StrMut) -> Self {
735        s.freeze()
736    }
737}
738
739impl<S: Storage> FromStr for StrInner<S> {
740    type Err = Infallible;
741
742    fn from_str(s: &str) -> Result<Self, Self::Err> {
743        Ok(s.into())
744    }
745}
746
747impl<S: Storage> PartialEq for StrInner<S> {
748    fn eq(&self, other: &Self) -> bool {
749        self.deref() == other.deref()
750    }
751}
752
753impl<S: Storage> Eq for StrInner<S> {}
754
755impl<S: Storage> PartialOrd for StrInner<S> {
756    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
757        Some(Ord::cmp(self, other))
758    }
759}
760
761impl<S: Storage> Ord for StrInner<S> {
762    fn cmp(&self, other: &Self) -> Ordering {
763        self.deref().cmp(other.deref())
764    }
765}
766
767macro_rules! impl_partrial_eq {
768    ($ty: ty $(, $lifetimes:lifetime )* ) => {
769        impl<$($lifetimes, )* S: Storage> PartialEq<$ty> for StrInner<S> {
770            fn eq(&self, other: &$ty) -> bool {
771                self.deref() == other.deref()
772            }
773        }
774
775        impl<$($lifetimes, )* S: Storage> PartialEq<StrInner<S>> for $ty {
776            fn eq(&self, other: &StrInner<S>) -> bool {
777                self.deref() == other.deref()
778            }
779        }
780
781        impl<$($lifetimes, )* S: Storage> PartialOrd<$ty> for StrInner<S> {
782            fn partial_cmp(&self, other: &$ty) -> Option<Ordering> {
783                Some(self.deref().cmp(other.deref()))
784            }
785        }
786
787        impl<$($lifetimes, )* S: Storage> PartialOrd<StrInner<S>> for $ty {
788            fn partial_cmp(&self, other: &StrInner<S>) -> Option<Ordering> {
789                Some(self.deref().cmp(other.deref()))
790            }
791        }
792    };
793}
794
795impl_partrial_eq!(String);
796impl_partrial_eq!(Box<str>);
797impl_partrial_eq!(&'a str, 'a);
798impl_partrial_eq!(&'a mut str, 'a);
799impl_partrial_eq!(Cow<'a, str>, 'a);
800
801impl<S: StorageMut> Write for StrInner<S> {
802    fn write_str(&mut self, s: &str) -> FmtResult {
803        self.push_str(s);
804        Ok(())
805    }
806}
807/// The [format] macro, but returning [Str].
808///
809/// # Examples
810///
811/// ```
812/// use bytes_utils::{format_bytes, Str};
813/// let s: Str = format_bytes!("Hello {}", "world");
814/// assert_eq!("Hello world", s);
815/// ```
816#[macro_export]
817macro_rules! format_bytes {
818    ($($arg: tt)*) => {
819        $crate::format_bytes_mut!($($arg)*).freeze()
820    }
821}
822
823/// The [format] macro, but returning [StrMut].
824///
825/// # Examples
826///
827/// ```
828/// use bytes_utils::{format_bytes_mut, StrMut};
829/// let s: StrMut = format_bytes_mut!("Hello {}", "world");
830/// assert_eq!("Hello world", s);
831/// ```
832#[macro_export]
833macro_rules! format_bytes_mut {
834    ($($arg: tt)*) => {{
835        use std::fmt::Write;
836        let mut buf = $crate::StrMut::default();
837        write!(buf, $($arg)*).unwrap();
838        buf
839    }}
840}
841
842/// An immutable variant of [Bytes]-backed string.
843///
844/// The methods and their documentation are on [StrInner], but users are mostly expected to use
845/// this and the [StrMut] aliases.
846pub type Str = StrInner<Bytes>;
847
848impl Str {
849    /// Extracts a subslice of the string as an owned [Str].
850    ///
851    /// # Panics
852    ///
853    /// If the byte indices in the range are not on char boundaries.
854    pub fn slice<R>(&self, range: R) -> Str
855    where
856        str: Index<R, Output = str>,
857    {
858        self.slice_ref(&self[range])
859    }
860
861    /// Extracts owned representation of the slice passed.
862    ///
863    /// This method accepts a string sub-slice of `self`. It then extracts the slice but as the
864    /// [Str] type. This makes it easier to use "ordinary" string parsing/manipulation and then go
865    /// back to holding the [Bytes]-based representation.
866    ///
867    /// This is zero-copy, the common part will be shared by reference counting.
868    ///
869    /// # Panics
870    ///
871    /// If the provided slice is not a sub-slice of `self`. This is checked based on address of the
872    /// slice, not on the content.
873    ///
874    /// # Example
875    ///
876    /// ```rust
877    /// # use bytes_utils::Str;
878    /// let owned = Str::from("Hello World");
879    /// let borrowed_mid: &str = &owned[2..5];
880    ///
881    /// let mid: Str = owned.slice_ref(borrowed_mid);
882    /// assert_eq!("Hello World", owned);
883    /// assert_eq!("llo", mid);
884    /// ```
885    pub fn slice_ref(&self, subslice: &str) -> Self {
886        let sub = self.0.slice_ref(subslice.as_bytes());
887        Self(sub)
888    }
889
890    /// Create [`Str`] from static string in O(1).
891    pub const fn from_static(s: &'static str) -> Self {
892        let bytes = Bytes::from_static(s.as_bytes());
893        // Safety: bytes is constructed from str
894        unsafe { Str::from_inner_unchecked(bytes) }
895    }
896}
897
898/// A mutable variant of [BytesMut]-backed string.
899///
900/// Unlike [Str], this one allows modifications (mostly additions), but also doesn't allow
901/// overlapping/shared chunks.
902///
903/// This is internally backed by the [StrInner] type, so the documentation of the methods are on
904/// that.
905pub type StrMut = StrInner<BytesMut>;
906
907impl StrMut {
908    /// Splits and returns the part of already built string, but keeps the extra capacity.
909    pub fn split_built(&mut self) -> StrMut {
910        StrInner(self.0.split())
911    }
912}
913
914#[cfg(test)]
915mod tests {
916    use itertools::Itertools;
917    use std::panic;
918
919    use super::*;
920
921    #[test]
922    fn split_w_byte_index() {
923        let v = Str::from("😈 ").split_whitespace_bytes().collect_vec();
924        assert_eq!(1, v.len());
925        assert_eq!("😈", v[0]);
926    }
927
928    #[test]
929    fn split_same() {
930        let v = Str::from("a").split_bytes("a").collect_vec();
931        assert_eq!(2, v.len());
932        assert_eq!("", v[0]);
933        assert_eq!("", v[1]);
934    }
935
936    #[test]
937    fn split_empty_pat() {
938        let v = Str::from("a").split_bytes("").collect_vec();
939        assert_eq!(3, v.len());
940        assert_eq!("", v[0]);
941        assert_eq!("a", v[1]);
942        assert_eq!("", v[2]);
943    }
944
945    #[test]
946    fn slice_checks_char_boundaries() {
947        let v = Str::from("😈");
948        assert_eq!(4, v.len());
949        panic::catch_unwind(|| v.slice(1..)).unwrap_err();
950    }
951
952    #[test]
953    fn split_at_bytes_mid() {
954        let v = Str::from("hello");
955        let (l, r) = v.split_at_bytes(2);
956        assert_eq!("he", l);
957        assert_eq!("llo", r);
958    }
959
960    #[test]
961    fn split_at_bytes_begin() {
962        let v = Str::from("hello");
963        let (l, r) = v.split_at_bytes(0);
964        assert_eq!("", l);
965        assert_eq!("hello", r);
966    }
967
968    #[test]
969    fn split_at_bytes_end() {
970        let v = Str::from("hello");
971        let (l, r) = v.split_at_bytes(5);
972        assert_eq!("hello", l);
973        assert_eq!("", r);
974    }
975
976    #[test]
977    fn split_at_bytes_panic() {
978        let v = Str::from("😈");
979        assert_eq!(4, v.len());
980        panic::catch_unwind(|| v.split_at_bytes(2)).unwrap_err();
981    }
982
983    #[cfg(not(miri))]
984    mod proptests {
985        use proptest::prelude::*;
986
987        use super::*;
988
989        proptest! {
990            #[test]
991            fn split_whitespace(s: String) {
992                let bstring = Str::from(&s);
993
994                let bw = bstring.split_whitespace_bytes();
995                let sw = s.split_whitespace();
996
997                for (b, s) in bw.zip_eq(sw) {
998                    prop_assert_eq!(b, s);
999                }
1000            }
1001
1002            #[test]
1003            fn split_ascii_whitespace(s: String) {
1004                let bstring = Str::from(&s);
1005
1006                let bw = bstring.split_ascii_whitespace_bytes();
1007                let sw = s.split_ascii_whitespace();
1008
1009                for (b, s) in bw.zip_eq(sw) {
1010                    prop_assert_eq!(b, s);
1011                }
1012            }
1013
1014            #[test]
1015            fn lines(s: String) {
1016                let bstring = Str::from(&s);
1017
1018                let bl = bstring.lines_bytes();
1019                let sl = s.lines();
1020
1021                for (b, s) in bl.zip_eq(sl) {
1022                    prop_assert_eq!(b, s);
1023                }
1024            }
1025
1026            #[test]
1027            fn split(s: String, pat: String) {
1028                let bstring = Str::from(&s);
1029
1030                let bs = bstring.split_bytes(&pat);
1031                let ss = s.split(&pat);
1032
1033                for (b, s) in bs.zip_eq(ss) {
1034                    prop_assert_eq!(b, s);
1035                }
1036            }
1037
1038            #[test]
1039            fn split_n(s: String, pat: String, n in 0..5usize) {
1040                let bstring = Str::from(&s);
1041
1042                let bs = bstring.splitn_bytes(n, &pat);
1043                let ss = s.splitn(n, &pat);
1044
1045                for (b, s) in bs.zip_eq(ss) {
1046                    prop_assert_eq!(b, s);
1047                }
1048            }
1049
1050            #[test]
1051            fn rsplit(s: String, pat: String) {
1052                let bstring = Str::from(&s);
1053
1054                let bs = bstring.rsplit_bytes(&pat);
1055                let ss = s.rsplit(&pat);
1056
1057                for (b, s) in bs.zip_eq(ss) {
1058                    prop_assert_eq!(b, s);
1059                }
1060            }
1061
1062            #[test]
1063            fn rsplit_n(s: String, pat: String, n in 0..5usize) {
1064                let bstring = Str::from(&s);
1065
1066                let bs = bstring.rsplitn_bytes(n, &pat);
1067                let ss = s.rsplitn(n, &pat);
1068
1069                for (b, s) in bs.zip_eq(ss) {
1070                    prop_assert_eq!(b, s);
1071                }
1072            }
1073        }
1074    }
1075}