diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-10 13:11:11 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-10 13:11:11 -0600 |
| commit | 01959b16a21b22b5df5f16569c2a8e8f92beecef (patch) | |
| tree | 32afa5d747c5466345c59ec52161a7cba3d6d755 /vendor/urlencoding | |
| parent | ff30574117a996df332e23d1fb6f65259b316b5b (diff) | |
chore: vendor dependencies
Diffstat (limited to 'vendor/urlencoding')
| -rw-r--r-- | vendor/urlencoding/.cargo-checksum.json | 1 | ||||
| -rw-r--r-- | vendor/urlencoding/Cargo.toml | 41 | ||||
| -rw-r--r-- | vendor/urlencoding/LICENSE | 20 | ||||
| -rw-r--r-- | vendor/urlencoding/README.md | 44 | ||||
| -rw-r--r-- | vendor/urlencoding/benches/bench.rs | 65 | ||||
| -rw-r--r-- | vendor/urlencoding/src/dec.rs | 109 | ||||
| -rw-r--r-- | vendor/urlencoding/src/enc.rs | 139 | ||||
| -rw-r--r-- | vendor/urlencoding/src/lib.rs | 133 |
8 files changed, 552 insertions, 0 deletions
diff --git a/vendor/urlencoding/.cargo-checksum.json b/vendor/urlencoding/.cargo-checksum.json new file mode 100644 index 00000000..abc814f7 --- /dev/null +++ b/vendor/urlencoding/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"7336fe00c121d144202b7e1787fbed8fed38898b160b94bba3e82ab90d31e2a9","LICENSE":"5a952e4a62a3022a7f39e9f0eb8d0638addd3efffb3f4092890923668152296e","README.md":"aef1306cb06344947dfb4e5e0d980303f31500fe95c7526c8bd3247f9cfcc78f","benches/bench.rs":"20a525b93dbe07cec14c29ded08af5df5b73655d5a244d42271a4547a59f90ab","src/dec.rs":"27f7a50d426fd295e88e440358e084bb1b2d54283c6cbbbc99ddce7383bb37a5","src/enc.rs":"50242a4f6b63deb98e016b6a6ebc122f519c4bead7c2c36ccce24d2268541ec8","src/lib.rs":"8be9baf406ede16922deb1947601a6e5405579521fa74fea41d1d3095dfe13de"},"package":"daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"}
\ No newline at end of file diff --git a/vendor/urlencoding/Cargo.toml b/vendor/urlencoding/Cargo.toml new file mode 100644 index 00000000..613faa43 --- /dev/null +++ b/vendor/urlencoding/Cargo.toml @@ -0,0 +1,41 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "urlencoding" +version = "2.1.3" +authors = [ + "Kornel <kornel@geekhood.net>", + "Bertram Truong <b@bertramtruong.com>", +] +description = "A Rust library for doing URL percentage encoding." +homepage = "https://lib.rs/urlencoding" +readme = "README.md" +keywords = [ + "url", + "percent", + "escape", + "urlencode", + "urldecode", +] +categories = [ + "encoding", + "web-programming", +] +license = "MIT" +repository = "https://github.com/kornelski/rust_urlencoding" + +[package.metadata.docs.rs] +targets = ["x86_64-unknown-linux-gnu"] + +[badges.maintenance] +status = "looking-for-maintainer" diff --git a/vendor/urlencoding/LICENSE b/vendor/urlencoding/LICENSE new file mode 100644 index 00000000..52dd0552 --- /dev/null +++ b/vendor/urlencoding/LICENSE @@ -0,0 +1,20 @@ +© 2016 Bertram Truong +© 2021 Kornel Lesiński + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/urlencoding/README.md b/vendor/urlencoding/README.md new file mode 100644 index 00000000..3134060f --- /dev/null +++ b/vendor/urlencoding/README.md @@ -0,0 +1,44 @@ +# urlencoding + +[](https://lib.rs/crates/urlencoding) + +A tiny Rust library for doing URL percentage encoding and decoding. It percent-encodes everything except alphanumerics and `-`, `_`, `.`, `~`. + +When decoding `+` is not treated as a space. Error recovery from incomplete percent-escapes follows the [WHATWG URL standard](https://url.spec.whatwg.org/). + +## Usage + +To encode a string, do the following: + +```rust +use urlencoding::encode; + +let encoded = encode("This string will be URL encoded."); +println!("{}", encoded); +// This%20string%20will%20be%20URL%20encoded. +``` + +To decode a string, it's only slightly different: + +```rust +use urlencoding::decode; + +let decoded = decode("%F0%9F%91%BE%20Exterminate%21")?; +println!("{}", decoded); +// 👾 Exterminate! +``` + +To decode allowing arbitrary bytes and invalid UTF-8: + +```rust +use urlencoding::decode_binary; + +let binary = decode_binary(b"%F1%F2%F3%C0%C1%C2"); +let decoded = String::from_utf8_lossy(&binary); +``` + +This library returns [`Cow`](https://doc.rust-lang.org/stable/std/borrow/enum.Cow.html) to avoid allocating when decoding/encoding is not needed. Call `.into_owned()` on the `Cow` to get a `Vec` or `String`. + +## License + +This project is licensed under the MIT license. For more information see the `LICENSE` file. diff --git a/vendor/urlencoding/benches/bench.rs b/vendor/urlencoding/benches/bench.rs new file mode 100644 index 00000000..b6dc18c4 --- /dev/null +++ b/vendor/urlencoding/benches/bench.rs @@ -0,0 +1,65 @@ +#![feature(test)] +extern crate test; + +use urlencoding::*; +use test::Bencher; + +#[bench] +fn bench_enc_nop_short(b: &mut Bencher) { + b.iter(|| { + encode("hello") + }) +} +#[bench] +fn bench_enc_nop_long(b: &mut Bencher) { + b.iter(|| { + encode("Lorem-ipsum-dolor-sit-amet-consectetur-adipisicing-elit-sed-do-eiusmod-tempor-incididunt-ut-labore-et-dolore-magna-aliqua.Ut-enim-ad-minim-veniam-quis-nostrud\ + -exercitation-ullamco-laboris-nisi-ut-aliquip-ex-ea-commodo-consequat.Duis-aute-irure-dolor-in-reprehenderit-in-voluptate-velit-esse-cillum-dolore-eu-fugiat-nulla\ + -pariatur.Excepteur-sint-occaecat-cupidatat-non-proident-sunt-in-culpa-qui-officia-deserunt-mollit-anim-id-est-laborum.") + }) +} + +#[bench] +fn bench_dec_nop_short(b: &mut Bencher) { + b.iter(|| { + decode("hello") + }) +} +#[bench] +fn bench_dec_nop_long(b: &mut Bencher) { + b.iter(|| { + decode("Lorem-ipsum-dolor-sit-amet-consectetur-adipisicing-elit-sed-do-eiusmod-tempor-incididunt-ut-labore-et-dolore-magna-aliqua.Ut-enim-ad-minim-veniam-quis-nostrud\ + -exercitation-ullamco-laboris-nisi-ut-aliquip-ex-ea-commodo-consequat.Duis-aute-irure-dolor-in-reprehenderit-in-voluptate-velit-esse-cillum-dolore-eu-fugiat-nulla\ + -pariatur.Excepteur-sint-occaecat-cupidatat-non-proident-sunt-in-culpa-qui-officia-deserunt-mollit-anim-id-est-laborum.") + }) +} + +#[bench] +fn bench_enc_chg_short(b: &mut Bencher) { + b.iter(|| { + encode("he!!o") + }) +} +#[bench] +fn bench_enc_chg_long(b: &mut Bencher) { + b.iter(|| { + encode("Lorem ipsum dolor sit amet consectetur adipisicing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.Ut enim ad minim veniam quis nostrud\ + exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla\ + pariatur. Excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum.") + }) +} + +#[bench] +fn bench_dec_chg_short(b: &mut Bencher) { + b.iter(|| { + decode("he%26%26o") + }) +} +#[bench] +fn bench_dec_chg_long(b: &mut Bencher) { + b.iter(|| { + decode("Lorem%20ipsum%20dolor%20sit%20amet%20consectetur%20adipisicing%20elit%20sed%20do%20eiusmod%20tempor%20incididunt%20ut%20labore%20et%20dolore%20magna%20aliqua.Ut%20enim%20ad%20minim%20veniam%20quis%20nostrud\ + %20exercitation%20ullamco%20laboris%20nisi%20ut%20aliquip%20ex%20ea%20commodo%20consequat.Duis%20aute%20irure%20dolor%20in%20reprehenderit%20in%20voluptate%20velit%20esse%20cillum%20dolore%20eu%20fugiat%20nulla\ + %20pariatur.Excepteur%20sint%20occaecat%20cupidatat%20non%20proident%20sunt%20in%20culpa%20qui%20officia%20deserunt%20mollit%20anim%20id%20est%20laborum.") + }) +} diff --git a/vendor/urlencoding/src/dec.rs b/vendor/urlencoding/src/dec.rs new file mode 100644 index 00000000..d3e3c014 --- /dev/null +++ b/vendor/urlencoding/src/dec.rs @@ -0,0 +1,109 @@ +use std::borrow::Cow; +use std::string::FromUtf8Error; + +#[inline] +pub(crate) fn from_hex_digit(digit: u8) -> Option<u8> { + match digit { + b'0'..=b'9' => Some(digit - b'0'), + b'A'..=b'F' => Some(digit - b'A' + 10), + b'a'..=b'f' => Some(digit - b'a' + 10), + _ => None, + } +} + +/// Decode percent-encoded string assuming UTF-8 encoding. +/// +/// If you need a `String`, call `.into_owned()` (not `.to_owned()`). +/// +/// Unencoded `+` is preserved literally, and _not_ changed to a space. +pub fn decode(data: &str) -> Result<Cow<str>, FromUtf8Error> { + match decode_binary(data.as_bytes()) { + Cow::Borrowed(_) => Ok(Cow::Borrowed(data)), + Cow::Owned(s) => Ok(Cow::Owned(String::from_utf8(s)?)), + } +} + +/// Decode percent-encoded string as binary data, in any encoding. +/// +/// Unencoded `+` is preserved literally, and _not_ changed to a space. +pub fn decode_binary(data: &[u8]) -> Cow<[u8]> { + let offset = data.iter().take_while(|&&c| c != b'%').count(); + if offset >= data.len() { + return Cow::Borrowed(data) + } + + let mut decoded: Vec<u8> = Vec::with_capacity(data.len()); + let mut out = NeverRealloc(&mut decoded); + + let (ascii, mut data) = data.split_at(offset); + out.extend_from_slice(ascii); + + loop { + let mut parts = data.splitn(2, |&c| c == b'%'); + // first the decoded non-% part + let non_escaped_part = parts.next().unwrap(); + let rest = parts.next(); + if rest.is_none() && out.0.is_empty() { + // if empty there were no '%' in the string + return data.into(); + } + out.extend_from_slice(non_escaped_part); + + // then decode one %xx + match rest { + Some(rest) => match rest.get(0..2) { + Some(&[first, second]) => match from_hex_digit(first) { + Some(first_val) => match from_hex_digit(second) { + Some(second_val) => { + out.push((first_val << 4) | second_val); + data = &rest[2..]; + }, + None => { + out.extend_from_slice(&[b'%', first]); + data = &rest[1..]; + }, + }, + None => { + out.push(b'%'); + data = rest; + }, + }, + _ => { + // too short + out.push(b'%'); + out.extend_from_slice(rest); + break; + }, + }, + None => break, + } + } + Cow::Owned(decoded) +} + + +struct NeverRealloc<'a, T>(pub &'a mut Vec<T>); + +impl<T> NeverRealloc<'_, T> { + #[inline] + pub fn push(&mut self, val: T) { + // these branches only exist to remove redundant reallocation code + // (the capacity is always sufficient) + if self.0.len() != self.0.capacity() { + self.0.push(val); + } + } + #[inline] + pub fn extend_from_slice(&mut self, val: &[T]) where T: Clone { + if self.0.capacity() - self.0.len() >= val.len() { + self.0.extend_from_slice(val); + } + } +} + +#[test] +fn dec_borrows() { + assert!(matches!(decode("hello"), Ok(Cow::Borrowed("hello")))); + assert!(matches!(decode("hello%20"), Ok(Cow::Owned(s)) if s == "hello ")); + assert!(matches!(decode("%20hello"), Ok(Cow::Owned(s)) if s == " hello")); +} diff --git a/vendor/urlencoding/src/enc.rs b/vendor/urlencoding/src/enc.rs new file mode 100644 index 00000000..b345c74c --- /dev/null +++ b/vendor/urlencoding/src/enc.rs @@ -0,0 +1,139 @@ +use std::borrow::Cow; +use std::fmt; +use std::io; +use std::str; + +/// Wrapper type that implements `Display`. Encodes on the fly, without allocating. +/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding. +/// +/// ```rust +/// use urlencoding::Encoded; +/// format!("{}", Encoded("hello!")); +/// ``` +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)] +#[repr(transparent)] +pub struct Encoded<Str>(pub Str); + +impl<Str: AsRef<[u8]>> Encoded<Str> { + /// Long way of writing `Encoded(data)` + /// + /// Takes any string-like type or a slice of bytes, either owned or borrowed. + #[inline(always)] + pub fn new(string: Str) -> Self { + Self(string) + } + + #[inline(always)] + pub fn to_str(&self) -> Cow<str> { + encode_binary(self.0.as_ref()) + } + + /// Perform urlencoding to a string + #[inline] + #[allow(clippy::inherent_to_string_shadow_display)] + pub fn to_string(&self) -> String { + self.to_str().into_owned() + } + + /// Perform urlencoding into a writer + #[inline] + pub fn write<W: io::Write>(&self, writer: &mut W) -> io::Result<()> { + encode_into(self.0.as_ref(), false, |s| writer.write_all(s.as_bytes()))?; + Ok(()) + } + + /// Perform urlencoding into a string + #[inline] + pub fn append_to(&self, string: &mut String) { + append_string(self.0.as_ref(), string, false); + } +} + +impl<'a> Encoded<&'a str> { + /// Same as new, but hints a more specific type, so you can avoid errors about `AsRef<[u8]>` not implemented + /// on references-to-references. + #[inline(always)] + pub fn str(string: &'a str) -> Self { + Self(string) + } +} + +impl<String: AsRef<[u8]>> fmt::Display for Encoded<String> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + encode_into(self.0.as_ref(), false, |s| f.write_str(s))?; + Ok(()) + } +} + +/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding. +/// +/// Call `.into_owned()` if you need a `String` +#[inline(always)] +pub fn encode(data: &str) -> Cow<str> { + encode_binary(data.as_bytes()) +} + +/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. +#[inline] +pub fn encode_binary(data: &[u8]) -> Cow<str> { + // add maybe extra capacity, but try not to exceed allocator's bucket size + let mut escaped = String::with_capacity(data.len() | 15); + let unmodified = append_string(data, &mut escaped, true); + if unmodified { + return Cow::Borrowed(unsafe { + // encode_into has checked it's ASCII + str::from_utf8_unchecked(data) + }); + } + Cow::Owned(escaped) +} + +fn append_string(data: &[u8], escaped: &mut String, may_skip: bool) -> bool { + encode_into(data, may_skip, |s| { + escaped.push_str(s); + Ok::<_, std::convert::Infallible>(()) + }).unwrap() +} + +fn encode_into<E>(mut data: &[u8], may_skip_write: bool, mut push_str: impl FnMut(&str) -> Result<(), E>) -> Result<bool, E> { + let mut pushed = false; + loop { + // Fast path to skip over safe chars at the beginning of the remaining string + let ascii_len = data.iter() + .take_while(|&&c| matches!(c, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'.' | b'_' | b'~')).count(); + + let (safe, rest) = if ascii_len >= data.len() { + if !pushed && may_skip_write { + return Ok(true); + } + (data, &[][..]) // redundatnt to optimize out a panic in split_at + } else { + data.split_at(ascii_len) + }; + pushed = true; + if !safe.is_empty() { + push_str(unsafe { str::from_utf8_unchecked(safe) })?; + } + if rest.is_empty() { + break; + } + + match rest.split_first() { + Some((byte, rest)) => { + let enc = &[b'%', to_hex_digit(byte >> 4), to_hex_digit(byte & 15)]; + push_str(unsafe { str::from_utf8_unchecked(enc) })?; + data = rest; + } + None => break, + }; + } + Ok(false) +} + +#[inline] +fn to_hex_digit(digit: u8) -> u8 { + match digit { + 0..=9 => b'0' + digit, + 10..=255 => b'A' - 10 + digit, + } +} diff --git a/vendor/urlencoding/src/lib.rs b/vendor/urlencoding/src/lib.rs new file mode 100644 index 00000000..1f2e560f --- /dev/null +++ b/vendor/urlencoding/src/lib.rs @@ -0,0 +1,133 @@ +//! To encode a string, do the following: +//! +//! ```rust +//! use urlencoding::encode; +//! +//! let encoded = encode("This string will be URL encoded."); +//! println!("{}", encoded); +//! // This%20string%20will%20be%20URL%20encoded. +//! ``` +//! +//! To decode a string, it's only slightly different: +//! +//! ```rust +//! use urlencoding::decode; +//! +//! let decoded = decode("%F0%9F%91%BE%20Exterminate%21").expect("UTF-8"); +//! println!("{}", decoded); +//! // 👾 Exterminate! +//! ``` +//! +//! To decode allowing arbitrary bytes and invalid UTF-8: +//! +//! ```rust +//! use urlencoding::decode_binary; +//! +//! let binary = decode_binary(b"%F1%F2%F3%C0%C1%C2"); +//! let decoded = String::from_utf8_lossy(&binary); +//! ``` +//! +//! This library returns [`Cow`](https://doc.rust-lang.org/stable/std/borrow/enum.Cow.html) to avoid allocating when decoding/encoding is not needed. Call `.into_owned()` on the `Cow` to get a `Vec` or `String`. + +mod enc; +pub use enc::encode; +pub use enc::encode_binary; +pub use enc::Encoded; + +mod dec; +pub use dec::decode; +pub use dec::decode_binary; + +#[cfg(test)] +mod tests { + use super::*; + use crate::dec::from_hex_digit; + + #[test] + fn it_encodes_successfully() { + let expected = "this%20that"; + assert_eq!(expected, encode("this that")); + } + + #[test] + fn it_encodes_successfully_emoji() { + let emoji_string = "👾 Exterminate!"; + let expected = "%F0%9F%91%BE%20Exterminate%21"; + assert_eq!(expected, encode(emoji_string)); + } + + #[test] + fn it_decodes_successfully() { + let expected = String::from("this that"); + let encoded = "this%20that"; + assert_eq!(expected, decode(encoded).unwrap()); + } + + #[test] + fn it_decodes_successfully_emoji() { + let expected = String::from("👾 Exterminate!"); + let encoded = "%F0%9F%91%BE%20Exterminate%21"; + assert_eq!(expected, decode(encoded).unwrap()); + } + + #[test] + fn it_decodes_unsuccessfully_emoji() { + let bad_encoded_string = "👾 Exterminate!"; + + assert_eq!(bad_encoded_string, decode(bad_encoded_string).unwrap()); + } + + + #[test] + fn misc() { + assert_eq!(3, from_hex_digit(b'3').unwrap()); + assert_eq!(10, from_hex_digit(b'a').unwrap()); + assert_eq!(15, from_hex_digit(b'F').unwrap()); + assert_eq!(None, from_hex_digit(b'G')); + assert_eq!(None, from_hex_digit(9)); + + assert_eq!("pureascii", encode("pureascii")); + assert_eq!("pureascii", decode("pureascii").unwrap()); + assert_eq!("", encode("")); + assert_eq!("", decode("").unwrap()); + assert_eq!("%26a%25b%21c.d%3Fe", encode("&a%b!c.d?e")); + assert_eq!("%00", encode("\0")); + assert_eq!("%00x", encode("\0x")); + assert_eq!("x%00", encode("x\0")); + assert_eq!("x%00x", encode("x\0x")); + assert_eq!("aa%00%00bb", encode("aa\0\0bb")); + assert_eq!("\0", decode("\0").unwrap()); + assert!(decode("%F0%0F%91%BE%20Hello%21").is_err()); + assert_eq!("this that", decode("this%20that").unwrap()); + assert_eq!("this that%", decode("this%20that%").unwrap()); + assert_eq!("this that%2", decode("this%20that%2").unwrap()); + assert_eq!("this that%%", decode("this%20that%%").unwrap()); + assert_eq!("this that%2%", decode("this%20that%2%").unwrap()); + assert_eq!("this%2that", decode("this%2that").unwrap()); + assert_eq!("this%%2that", decode("this%%2that").unwrap()); + assert_eq!("this%2x&that", decode("this%2x%26that").unwrap()); + // assert_eq!("this%2&that", decode("this%2%26that").unwrap()); + } + + #[test] + fn lazy_writer() { + let mut s = "he".to_string(); + Encoded("llo").append_to(&mut s); + assert_eq!("hello", s); + + assert_eq!("hello", Encoded("hello").to_string()); + assert_eq!("hello", format!("{}", Encoded("hello"))); + assert_eq!("hello", Encoded("hello").to_str()); + assert!(matches!(Encoded("hello").to_str(), std::borrow::Cow::Borrowed(_))); + } + + #[test] + fn whatwg_examples() { + assert_eq!(*decode_binary(b"%25%s%1G"), b"%%s%1G"[..]); + assert_eq!(*decode_binary("‽%25%2E".as_bytes()), b"\xE2\x80\xBD\x25\x2E"[..]); + assert_eq!(encode("≡"), "%E2%89%A1"); + assert_eq!(encode("‽"), "%E2%80%BD"); + assert_eq!(encode("Say what‽"), "Say%20what%E2%80%BD"); + } + +} |
