From 01959b16a21b22b5df5f16569c2a8e8f92beecef Mon Sep 17 00:00:00 2001 From: mo khan Date: Thu, 10 Jul 2025 13:11:11 -0600 Subject: chore: vendor dependencies --- vendor/urlencoding/src/dec.rs | 109 +++++++++++++++++++++++++++++++++ vendor/urlencoding/src/enc.rs | 139 ++++++++++++++++++++++++++++++++++++++++++ vendor/urlencoding/src/lib.rs | 133 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 381 insertions(+) create mode 100644 vendor/urlencoding/src/dec.rs create mode 100644 vendor/urlencoding/src/enc.rs create mode 100644 vendor/urlencoding/src/lib.rs (limited to 'vendor/urlencoding/src') diff --git a/vendor/urlencoding/src/dec.rs b/vendor/urlencoding/src/dec.rs new file mode 100644 index 00000000..d3e3c014 --- /dev/null +++ b/vendor/urlencoding/src/dec.rs @@ -0,0 +1,109 @@ +use std::borrow::Cow; +use std::string::FromUtf8Error; + +#[inline] +pub(crate) fn from_hex_digit(digit: u8) -> Option { + match digit { + b'0'..=b'9' => Some(digit - b'0'), + b'A'..=b'F' => Some(digit - b'A' + 10), + b'a'..=b'f' => Some(digit - b'a' + 10), + _ => None, + } +} + +/// Decode percent-encoded string assuming UTF-8 encoding. +/// +/// If you need a `String`, call `.into_owned()` (not `.to_owned()`). +/// +/// Unencoded `+` is preserved literally, and _not_ changed to a space. +pub fn decode(data: &str) -> Result, FromUtf8Error> { + match decode_binary(data.as_bytes()) { + Cow::Borrowed(_) => Ok(Cow::Borrowed(data)), + Cow::Owned(s) => Ok(Cow::Owned(String::from_utf8(s)?)), + } +} + +/// Decode percent-encoded string as binary data, in any encoding. +/// +/// Unencoded `+` is preserved literally, and _not_ changed to a space. +pub fn decode_binary(data: &[u8]) -> Cow<[u8]> { + let offset = data.iter().take_while(|&&c| c != b'%').count(); + if offset >= data.len() { + return Cow::Borrowed(data) + } + + let mut decoded: Vec = Vec::with_capacity(data.len()); + let mut out = NeverRealloc(&mut decoded); + + let (ascii, mut data) = data.split_at(offset); + out.extend_from_slice(ascii); + + loop { + let mut parts = data.splitn(2, |&c| c == b'%'); + // first the decoded non-% part + let non_escaped_part = parts.next().unwrap(); + let rest = parts.next(); + if rest.is_none() && out.0.is_empty() { + // if empty there were no '%' in the string + return data.into(); + } + out.extend_from_slice(non_escaped_part); + + // then decode one %xx + match rest { + Some(rest) => match rest.get(0..2) { + Some(&[first, second]) => match from_hex_digit(first) { + Some(first_val) => match from_hex_digit(second) { + Some(second_val) => { + out.push((first_val << 4) | second_val); + data = &rest[2..]; + }, + None => { + out.extend_from_slice(&[b'%', first]); + data = &rest[1..]; + }, + }, + None => { + out.push(b'%'); + data = rest; + }, + }, + _ => { + // too short + out.push(b'%'); + out.extend_from_slice(rest); + break; + }, + }, + None => break, + } + } + Cow::Owned(decoded) +} + + +struct NeverRealloc<'a, T>(pub &'a mut Vec); + +impl NeverRealloc<'_, T> { + #[inline] + pub fn push(&mut self, val: T) { + // these branches only exist to remove redundant reallocation code + // (the capacity is always sufficient) + if self.0.len() != self.0.capacity() { + self.0.push(val); + } + } + #[inline] + pub fn extend_from_slice(&mut self, val: &[T]) where T: Clone { + if self.0.capacity() - self.0.len() >= val.len() { + self.0.extend_from_slice(val); + } + } +} + +#[test] +fn dec_borrows() { + assert!(matches!(decode("hello"), Ok(Cow::Borrowed("hello")))); + assert!(matches!(decode("hello%20"), Ok(Cow::Owned(s)) if s == "hello ")); + assert!(matches!(decode("%20hello"), Ok(Cow::Owned(s)) if s == " hello")); +} diff --git a/vendor/urlencoding/src/enc.rs b/vendor/urlencoding/src/enc.rs new file mode 100644 index 00000000..b345c74c --- /dev/null +++ b/vendor/urlencoding/src/enc.rs @@ -0,0 +1,139 @@ +use std::borrow::Cow; +use std::fmt; +use std::io; +use std::str; + +/// Wrapper type that implements `Display`. Encodes on the fly, without allocating. +/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding. +/// +/// ```rust +/// use urlencoding::Encoded; +/// format!("{}", Encoded("hello!")); +/// ``` +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)] +#[repr(transparent)] +pub struct Encoded(pub Str); + +impl> Encoded { + /// Long way of writing `Encoded(data)` + /// + /// Takes any string-like type or a slice of bytes, either owned or borrowed. + #[inline(always)] + pub fn new(string: Str) -> Self { + Self(string) + } + + #[inline(always)] + pub fn to_str(&self) -> Cow { + encode_binary(self.0.as_ref()) + } + + /// Perform urlencoding to a string + #[inline] + #[allow(clippy::inherent_to_string_shadow_display)] + pub fn to_string(&self) -> String { + self.to_str().into_owned() + } + + /// Perform urlencoding into a writer + #[inline] + pub fn write(&self, writer: &mut W) -> io::Result<()> { + encode_into(self.0.as_ref(), false, |s| writer.write_all(s.as_bytes()))?; + Ok(()) + } + + /// Perform urlencoding into a string + #[inline] + pub fn append_to(&self, string: &mut String) { + append_string(self.0.as_ref(), string, false); + } +} + +impl<'a> Encoded<&'a str> { + /// Same as new, but hints a more specific type, so you can avoid errors about `AsRef<[u8]>` not implemented + /// on references-to-references. + #[inline(always)] + pub fn str(string: &'a str) -> Self { + Self(string) + } +} + +impl> fmt::Display for Encoded { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + encode_into(self.0.as_ref(), false, |s| f.write_str(s))?; + Ok(()) + } +} + +/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding. +/// +/// Call `.into_owned()` if you need a `String` +#[inline(always)] +pub fn encode(data: &str) -> Cow { + encode_binary(data.as_bytes()) +} + +/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. +#[inline] +pub fn encode_binary(data: &[u8]) -> Cow { + // add maybe extra capacity, but try not to exceed allocator's bucket size + let mut escaped = String::with_capacity(data.len() | 15); + let unmodified = append_string(data, &mut escaped, true); + if unmodified { + return Cow::Borrowed(unsafe { + // encode_into has checked it's ASCII + str::from_utf8_unchecked(data) + }); + } + Cow::Owned(escaped) +} + +fn append_string(data: &[u8], escaped: &mut String, may_skip: bool) -> bool { + encode_into(data, may_skip, |s| { + escaped.push_str(s); + Ok::<_, std::convert::Infallible>(()) + }).unwrap() +} + +fn encode_into(mut data: &[u8], may_skip_write: bool, mut push_str: impl FnMut(&str) -> Result<(), E>) -> Result { + let mut pushed = false; + loop { + // Fast path to skip over safe chars at the beginning of the remaining string + let ascii_len = data.iter() + .take_while(|&&c| matches!(c, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'.' | b'_' | b'~')).count(); + + let (safe, rest) = if ascii_len >= data.len() { + if !pushed && may_skip_write { + return Ok(true); + } + (data, &[][..]) // redundatnt to optimize out a panic in split_at + } else { + data.split_at(ascii_len) + }; + pushed = true; + if !safe.is_empty() { + push_str(unsafe { str::from_utf8_unchecked(safe) })?; + } + if rest.is_empty() { + break; + } + + match rest.split_first() { + Some((byte, rest)) => { + let enc = &[b'%', to_hex_digit(byte >> 4), to_hex_digit(byte & 15)]; + push_str(unsafe { str::from_utf8_unchecked(enc) })?; + data = rest; + } + None => break, + }; + } + Ok(false) +} + +#[inline] +fn to_hex_digit(digit: u8) -> u8 { + match digit { + 0..=9 => b'0' + digit, + 10..=255 => b'A' - 10 + digit, + } +} diff --git a/vendor/urlencoding/src/lib.rs b/vendor/urlencoding/src/lib.rs new file mode 100644 index 00000000..1f2e560f --- /dev/null +++ b/vendor/urlencoding/src/lib.rs @@ -0,0 +1,133 @@ +//! To encode a string, do the following: +//! +//! ```rust +//! use urlencoding::encode; +//! +//! let encoded = encode("This string will be URL encoded."); +//! println!("{}", encoded); +//! // This%20string%20will%20be%20URL%20encoded. +//! ``` +//! +//! To decode a string, it's only slightly different: +//! +//! ```rust +//! use urlencoding::decode; +//! +//! let decoded = decode("%F0%9F%91%BE%20Exterminate%21").expect("UTF-8"); +//! println!("{}", decoded); +//! // 👾 Exterminate! +//! ``` +//! +//! To decode allowing arbitrary bytes and invalid UTF-8: +//! +//! ```rust +//! use urlencoding::decode_binary; +//! +//! let binary = decode_binary(b"%F1%F2%F3%C0%C1%C2"); +//! let decoded = String::from_utf8_lossy(&binary); +//! ``` +//! +//! This library returns [`Cow`](https://doc.rust-lang.org/stable/std/borrow/enum.Cow.html) to avoid allocating when decoding/encoding is not needed. Call `.into_owned()` on the `Cow` to get a `Vec` or `String`. + +mod enc; +pub use enc::encode; +pub use enc::encode_binary; +pub use enc::Encoded; + +mod dec; +pub use dec::decode; +pub use dec::decode_binary; + +#[cfg(test)] +mod tests { + use super::*; + use crate::dec::from_hex_digit; + + #[test] + fn it_encodes_successfully() { + let expected = "this%20that"; + assert_eq!(expected, encode("this that")); + } + + #[test] + fn it_encodes_successfully_emoji() { + let emoji_string = "👾 Exterminate!"; + let expected = "%F0%9F%91%BE%20Exterminate%21"; + assert_eq!(expected, encode(emoji_string)); + } + + #[test] + fn it_decodes_successfully() { + let expected = String::from("this that"); + let encoded = "this%20that"; + assert_eq!(expected, decode(encoded).unwrap()); + } + + #[test] + fn it_decodes_successfully_emoji() { + let expected = String::from("👾 Exterminate!"); + let encoded = "%F0%9F%91%BE%20Exterminate%21"; + assert_eq!(expected, decode(encoded).unwrap()); + } + + #[test] + fn it_decodes_unsuccessfully_emoji() { + let bad_encoded_string = "👾 Exterminate!"; + + assert_eq!(bad_encoded_string, decode(bad_encoded_string).unwrap()); + } + + + #[test] + fn misc() { + assert_eq!(3, from_hex_digit(b'3').unwrap()); + assert_eq!(10, from_hex_digit(b'a').unwrap()); + assert_eq!(15, from_hex_digit(b'F').unwrap()); + assert_eq!(None, from_hex_digit(b'G')); + assert_eq!(None, from_hex_digit(9)); + + assert_eq!("pureascii", encode("pureascii")); + assert_eq!("pureascii", decode("pureascii").unwrap()); + assert_eq!("", encode("")); + assert_eq!("", decode("").unwrap()); + assert_eq!("%26a%25b%21c.d%3Fe", encode("&a%b!c.d?e")); + assert_eq!("%00", encode("\0")); + assert_eq!("%00x", encode("\0x")); + assert_eq!("x%00", encode("x\0")); + assert_eq!("x%00x", encode("x\0x")); + assert_eq!("aa%00%00bb", encode("aa\0\0bb")); + assert_eq!("\0", decode("\0").unwrap()); + assert!(decode("%F0%0F%91%BE%20Hello%21").is_err()); + assert_eq!("this that", decode("this%20that").unwrap()); + assert_eq!("this that%", decode("this%20that%").unwrap()); + assert_eq!("this that%2", decode("this%20that%2").unwrap()); + assert_eq!("this that%%", decode("this%20that%%").unwrap()); + assert_eq!("this that%2%", decode("this%20that%2%").unwrap()); + assert_eq!("this%2that", decode("this%2that").unwrap()); + assert_eq!("this%%2that", decode("this%%2that").unwrap()); + assert_eq!("this%2x&that", decode("this%2x%26that").unwrap()); + // assert_eq!("this%2&that", decode("this%2%26that").unwrap()); + } + + #[test] + fn lazy_writer() { + let mut s = "he".to_string(); + Encoded("llo").append_to(&mut s); + assert_eq!("hello", s); + + assert_eq!("hello", Encoded("hello").to_string()); + assert_eq!("hello", format!("{}", Encoded("hello"))); + assert_eq!("hello", Encoded("hello").to_str()); + assert!(matches!(Encoded("hello").to_str(), std::borrow::Cow::Borrowed(_))); + } + + #[test] + fn whatwg_examples() { + assert_eq!(*decode_binary(b"%25%s%1G"), b"%%s%1G"[..]); + assert_eq!(*decode_binary("‽%25%2E".as_bytes()), b"\xE2\x80\xBD\x25\x2E"[..]); + assert_eq!(encode("≡"), "%E2%89%A1"); + assert_eq!(encode("‽"), "%E2%80%BD"); + assert_eq!(encode("Say what‽"), "Say%20what%E2%80%BD"); + } + +} -- cgit v1.2.3