diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-10 13:11:11 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-10 13:11:11 -0600 |
| commit | 01959b16a21b22b5df5f16569c2a8e8f92beecef (patch) | |
| tree | 32afa5d747c5466345c59ec52161a7cba3d6d755 /vendor/potential_utf/src/writeable.rs | |
| parent | ff30574117a996df332e23d1fb6f65259b316b5b (diff) | |
chore: vendor dependencies
Diffstat (limited to 'vendor/potential_utf/src/writeable.rs')
| -rw-r--r-- | vendor/potential_utf/src/writeable.rs | 159 |
1 files changed, 159 insertions, 0 deletions
diff --git a/vendor/potential_utf/src/writeable.rs b/vendor/potential_utf/src/writeable.rs new file mode 100644 index 00000000..cd489914 --- /dev/null +++ b/vendor/potential_utf/src/writeable.rs @@ -0,0 +1,159 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::{PotentialUtf16, PotentialUtf8}; +use alloc::borrow::Cow; +use core::fmt::Write; +use writeable::{LengthHint, Part, PartsWrite, TryWriteable}; + +use core::{char::DecodeUtf16Error, fmt, str::Utf8Error}; + +/// This impl requires enabling the optional `writeable` Cargo feature +impl TryWriteable for &'_ PotentialUtf8 { + type Error = Utf8Error; + + fn try_write_to_parts<S: PartsWrite + ?Sized>( + &self, + sink: &mut S, + ) -> Result<Result<(), Self::Error>, fmt::Error> { + let mut remaining = &self.0; + let mut r = Ok(()); + loop { + match core::str::from_utf8(remaining) { + Ok(valid) => { + sink.write_str(valid)?; + return Ok(r); + } + Err(e) => { + // SAFETY: By Utf8Error invariants + let valid = unsafe { + core::str::from_utf8_unchecked(remaining.get_unchecked(..e.valid_up_to())) + }; + sink.write_str(valid)?; + sink.with_part(Part::ERROR, |s| s.write_char(char::REPLACEMENT_CHARACTER))?; + if r.is_ok() { + r = Err(e); + } + let Some(error_len) = e.error_len() else { + return Ok(r); // end of string + }; + // SAFETY: By Utf8Error invariants + remaining = unsafe { remaining.get_unchecked(e.valid_up_to() + error_len..) } + } + } + } + } + + fn writeable_length_hint(&self) -> LengthHint { + // Lower bound is all valid UTF-8, upper bound is all bytes with the high bit, which become replacement characters. + LengthHint::between(self.0.len(), self.0.len() * 3) + } + + fn try_write_to_string(&self) -> Result<Cow<str>, (Self::Error, Cow<str>)> { + match core::str::from_utf8(&self.0) { + Ok(valid) => Ok(Cow::Borrowed(valid)), + Err(e) => { + // SAFETY: By Utf8Error invariants + let valid = unsafe { + core::str::from_utf8_unchecked(self.0.get_unchecked(..e.valid_up_to())) + }; + + // Let's assume this is the only error + let mut out = alloc::string::String::with_capacity( + self.0.len() + char::REPLACEMENT_CHARACTER.len_utf8() + - e.error_len().unwrap_or(0), + ); + + out.push_str(valid); + out.push(char::REPLACEMENT_CHARACTER); + + // If there's more, we can use `try_write_to` + if let Some(error_len) = e.error_len() { + // SAFETY: By Utf8Error invariants + let remaining = unsafe { self.0.get_unchecked(e.valid_up_to() + error_len..) }; + let _discard = PotentialUtf8::from_bytes(remaining).try_write_to(&mut out); + } + + Err((e, Cow::Owned(out))) + } + } + } +} + +/// This impl requires enabling the optional `writeable` Cargo feature +impl TryWriteable for &'_ PotentialUtf16 { + type Error = DecodeUtf16Error; + + fn try_write_to_parts<S: PartsWrite + ?Sized>( + &self, + sink: &mut S, + ) -> Result<Result<(), Self::Error>, fmt::Error> { + let mut r = Ok(()); + for c in core::char::decode_utf16(self.0.iter().copied()) { + match c { + Ok(c) => sink.write_char(c)?, + Err(e) => { + if r.is_ok() { + r = Err(e); + } + sink.with_part(Part::ERROR, |s| s.write_char(char::REPLACEMENT_CHARACTER))?; + } + } + } + Ok(r) + } + + fn writeable_length_hint(&self) -> LengthHint { + // Lower bound is all ASCII, upper bound is all 3-byte code points (including replacement character) + LengthHint::between(self.0.len(), self.0.len() * 3) + } +} + +#[cfg(test)] +mod test { + #![allow(invalid_from_utf8)] // only way to construct the error + use super::*; + use writeable::assert_try_writeable_parts_eq; + + #[test] + fn test_utf8() { + assert_try_writeable_parts_eq!( + PotentialUtf8::from_bytes(b"Foo Bar"), + "Foo Bar", + Ok(()), + [] + ); + assert_try_writeable_parts_eq!( + PotentialUtf8::from_bytes(b"Foo\xFDBar"), + "Foo�Bar", + Err(core::str::from_utf8(b"Foo\xFDBar").unwrap_err()), + [(3, 6, Part::ERROR)] + ); + assert_try_writeable_parts_eq!( + PotentialUtf8::from_bytes(b"Foo\xFDBar\xff"), + "Foo�Bar�", + Err(core::str::from_utf8(b"Foo\xFDBar\xff").unwrap_err()), + [(3, 6, Part::ERROR), (9, 12, Part::ERROR)], + ); + } + + #[test] + fn test_utf16() { + assert_try_writeable_parts_eq!( + PotentialUtf16::from_slice(&[0xD83E, 0xDD73]), + "🥳", + Ok(()), + [] + ); + assert_try_writeable_parts_eq!( + PotentialUtf16::from_slice(&[0xD83E, 0x20, 0xDD73]), + "� �", + Err(core::char::decode_utf16([0xD83E].into_iter()) + .next() + .unwrap() + .unwrap_err()), + [(0, 3, Part::ERROR), (4, 7, Part::ERROR)] + ); + } +} |
