From 8cdfa445d6629ffef4cb84967ff7017654045bc2 Mon Sep 17 00:00:00 2001 From: mo khan Date: Wed, 2 Jul 2025 18:36:06 -0600 Subject: chore: add vendor directory --- vendor/unicode-normalization/src/recompose.rs | 169 ++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 vendor/unicode-normalization/src/recompose.rs (limited to 'vendor/unicode-normalization/src/recompose.rs') diff --git a/vendor/unicode-normalization/src/recompose.rs b/vendor/unicode-normalization/src/recompose.rs new file mode 100644 index 00000000..4effa861 --- /dev/null +++ b/vendor/unicode-normalization/src/recompose.rs @@ -0,0 +1,169 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::decompose::Decompositions; +use core::{ + fmt::{self, Write}, + iter::FusedIterator, +}; +use tinyvec::TinyVec; + +#[derive(Clone)] +enum RecompositionState { + Composing, + Purging(usize), + Finished(usize), +} + +/// External iterator for a string recomposition's characters. +#[derive(Clone)] +pub struct Recompositions { + iter: Decompositions, + state: RecompositionState, + buffer: TinyVec<[char; 4]>, + composee: Option, + last_ccc: Option, +} + +impl> Recompositions { + /// Create a new recomposition iterator for canonical compositions (NFC) + /// + /// Note that this iterator can also be obtained by directly calling [`.nfc()`](crate::UnicodeNormalization::nfc) + /// on the iterator. + #[inline] + pub fn new_canonical(iter: I) -> Self { + Recompositions { + iter: Decompositions::new_canonical(iter), + state: self::RecompositionState::Composing, + buffer: TinyVec::new(), + composee: None, + last_ccc: None, + } + } + + /// Create a new recomposition iterator for compatability compositions (NFkC) + /// + /// Note that this iterator can also be obtained by directly calling [`.nfkc()`](crate::UnicodeNormalization::nfkc) + /// on the iterator. + #[inline] + pub fn new_compatible(iter: I) -> Self { + Recompositions { + iter: Decompositions::new_compatible(iter), + state: self::RecompositionState::Composing, + buffer: TinyVec::new(), + composee: None, + last_ccc: None, + } + } +} + +impl> Iterator for Recompositions { + type Item = char; + + #[inline] + fn next(&mut self) -> Option { + use self::RecompositionState::*; + + loop { + match self.state { + Composing => { + for ch in self.iter.by_ref() { + let ch_class = super::char::canonical_combining_class(ch); + let k = match self.composee { + None => { + if ch_class != 0 { + return Some(ch); + } + self.composee = Some(ch); + continue; + } + Some(k) => k, + }; + match self.last_ccc { + None => match super::char::compose(k, ch) { + Some(r) => { + self.composee = Some(r); + continue; + } + None => { + if ch_class == 0 { + self.composee = Some(ch); + return Some(k); + } + self.buffer.push(ch); + self.last_ccc = Some(ch_class); + } + }, + Some(l_class) => { + if l_class >= ch_class { + // `ch` is blocked from `composee` + if ch_class == 0 { + self.composee = Some(ch); + self.last_ccc = None; + self.state = Purging(0); + return Some(k); + } + self.buffer.push(ch); + self.last_ccc = Some(ch_class); + continue; + } + match super::char::compose(k, ch) { + Some(r) => { + self.composee = Some(r); + continue; + } + None => { + self.buffer.push(ch); + self.last_ccc = Some(ch_class); + } + } + } + } + } + self.state = Finished(0); + if self.composee.is_some() { + return self.composee.take(); + } + } + Purging(next) => match self.buffer.get(next).cloned() { + None => { + self.buffer.clear(); + self.state = Composing; + } + s => { + self.state = Purging(next + 1); + return s; + } + }, + Finished(next) => match self.buffer.get(next).cloned() { + None => { + self.buffer.clear(); + return self.composee.take(); + } + s => { + self.state = Finished(next + 1); + return s; + } + }, + } + } + } +} + +impl + FusedIterator> FusedIterator for Recompositions {} + +impl + Clone> fmt::Display for Recompositions { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for c in self.clone() { + f.write_char(c)?; + } + Ok(()) + } +} -- cgit v1.2.3