// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::{PotentialUtf16, PotentialUtf8}; use alloc::borrow::Cow; use core::fmt::Write; use writeable::{LengthHint, Part, PartsWrite, TryWriteable}; use core::{char::DecodeUtf16Error, fmt, str::Utf8Error}; /// This impl requires enabling the optional `writeable` Cargo feature impl TryWriteable for &'_ PotentialUtf8 { type Error = Utf8Error; fn try_write_to_parts( &self, sink: &mut S, ) -> Result, fmt::Error> { let mut remaining = &self.0; let mut r = Ok(()); loop { match core::str::from_utf8(remaining) { Ok(valid) => { sink.write_str(valid)?; return Ok(r); } Err(e) => { // SAFETY: By Utf8Error invariants let valid = unsafe { core::str::from_utf8_unchecked(remaining.get_unchecked(..e.valid_up_to())) }; sink.write_str(valid)?; sink.with_part(Part::ERROR, |s| s.write_char(char::REPLACEMENT_CHARACTER))?; if r.is_ok() { r = Err(e); } let Some(error_len) = e.error_len() else { return Ok(r); // end of string }; // SAFETY: By Utf8Error invariants remaining = unsafe { remaining.get_unchecked(e.valid_up_to() + error_len..) } } } } } fn writeable_length_hint(&self) -> LengthHint { // Lower bound is all valid UTF-8, upper bound is all bytes with the high bit, which become replacement characters. LengthHint::between(self.0.len(), self.0.len() * 3) } fn try_write_to_string(&self) -> Result, (Self::Error, Cow)> { match core::str::from_utf8(&self.0) { Ok(valid) => Ok(Cow::Borrowed(valid)), Err(e) => { // SAFETY: By Utf8Error invariants let valid = unsafe { core::str::from_utf8_unchecked(self.0.get_unchecked(..e.valid_up_to())) }; // Let's assume this is the only error let mut out = alloc::string::String::with_capacity( self.0.len() + char::REPLACEMENT_CHARACTER.len_utf8() - e.error_len().unwrap_or(0), ); out.push_str(valid); out.push(char::REPLACEMENT_CHARACTER); // If there's more, we can use `try_write_to` if let Some(error_len) = e.error_len() { // SAFETY: By Utf8Error invariants let remaining = unsafe { self.0.get_unchecked(e.valid_up_to() + error_len..) }; let _discard = PotentialUtf8::from_bytes(remaining).try_write_to(&mut out); } Err((e, Cow::Owned(out))) } } } } /// This impl requires enabling the optional `writeable` Cargo feature impl TryWriteable for &'_ PotentialUtf16 { type Error = DecodeUtf16Error; fn try_write_to_parts( &self, sink: &mut S, ) -> Result, fmt::Error> { let mut r = Ok(()); for c in core::char::decode_utf16(self.0.iter().copied()) { match c { Ok(c) => sink.write_char(c)?, Err(e) => { if r.is_ok() { r = Err(e); } sink.with_part(Part::ERROR, |s| s.write_char(char::REPLACEMENT_CHARACTER))?; } } } Ok(r) } fn writeable_length_hint(&self) -> LengthHint { // Lower bound is all ASCII, upper bound is all 3-byte code points (including replacement character) LengthHint::between(self.0.len(), self.0.len() * 3) } } #[cfg(test)] mod test { #![allow(invalid_from_utf8)] // only way to construct the error use super::*; use writeable::assert_try_writeable_parts_eq; #[test] fn test_utf8() { assert_try_writeable_parts_eq!( PotentialUtf8::from_bytes(b"Foo Bar"), "Foo Bar", Ok(()), [] ); assert_try_writeable_parts_eq!( PotentialUtf8::from_bytes(b"Foo\xFDBar"), "Foo�Bar", Err(core::str::from_utf8(b"Foo\xFDBar").unwrap_err()), [(3, 6, Part::ERROR)] ); assert_try_writeable_parts_eq!( PotentialUtf8::from_bytes(b"Foo\xFDBar\xff"), "Foo�Bar�", Err(core::str::from_utf8(b"Foo\xFDBar\xff").unwrap_err()), [(3, 6, Part::ERROR), (9, 12, Part::ERROR)], ); } #[test] fn test_utf16() { assert_try_writeable_parts_eq!( PotentialUtf16::from_slice(&[0xD83E, 0xDD73]), "🥳", Ok(()), [] ); assert_try_writeable_parts_eq!( PotentialUtf16::from_slice(&[0xD83E, 0x20, 0xDD73]), "� �", Err(core::char::decode_utf16([0xD83E].into_iter()) .next() .unwrap() .unwrap_err()), [(0, 3, Part::ERROR), (4, 7, Part::ERROR)] ); } }