summaryrefslogtreecommitdiff
path: root/vendor/urlencoding/src
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/urlencoding/src')
-rw-r--r--vendor/urlencoding/src/dec.rs109
-rw-r--r--vendor/urlencoding/src/enc.rs139
-rw-r--r--vendor/urlencoding/src/lib.rs133
3 files changed, 381 insertions, 0 deletions
diff --git a/vendor/urlencoding/src/dec.rs b/vendor/urlencoding/src/dec.rs
new file mode 100644
index 00000000..d3e3c014
--- /dev/null
+++ b/vendor/urlencoding/src/dec.rs
@@ -0,0 +1,109 @@
+use std::borrow::Cow;
+use std::string::FromUtf8Error;
+
+#[inline]
+pub(crate) fn from_hex_digit(digit: u8) -> Option<u8> {
+ match digit {
+ b'0'..=b'9' => Some(digit - b'0'),
+ b'A'..=b'F' => Some(digit - b'A' + 10),
+ b'a'..=b'f' => Some(digit - b'a' + 10),
+ _ => None,
+ }
+}
+
+/// Decode percent-encoded string assuming UTF-8 encoding.
+///
+/// If you need a `String`, call `.into_owned()` (not `.to_owned()`).
+///
+/// Unencoded `+` is preserved literally, and _not_ changed to a space.
+pub fn decode(data: &str) -> Result<Cow<str>, FromUtf8Error> {
+ match decode_binary(data.as_bytes()) {
+ Cow::Borrowed(_) => Ok(Cow::Borrowed(data)),
+ Cow::Owned(s) => Ok(Cow::Owned(String::from_utf8(s)?)),
+ }
+}
+
+/// Decode percent-encoded string as binary data, in any encoding.
+///
+/// Unencoded `+` is preserved literally, and _not_ changed to a space.
+pub fn decode_binary(data: &[u8]) -> Cow<[u8]> {
+ let offset = data.iter().take_while(|&&c| c != b'%').count();
+ if offset >= data.len() {
+ return Cow::Borrowed(data)
+ }
+
+ let mut decoded: Vec<u8> = Vec::with_capacity(data.len());
+ let mut out = NeverRealloc(&mut decoded);
+
+ let (ascii, mut data) = data.split_at(offset);
+ out.extend_from_slice(ascii);
+
+ loop {
+ let mut parts = data.splitn(2, |&c| c == b'%');
+ // first the decoded non-% part
+ let non_escaped_part = parts.next().unwrap();
+ let rest = parts.next();
+ if rest.is_none() && out.0.is_empty() {
+ // if empty there were no '%' in the string
+ return data.into();
+ }
+ out.extend_from_slice(non_escaped_part);
+
+ // then decode one %xx
+ match rest {
+ Some(rest) => match rest.get(0..2) {
+ Some(&[first, second]) => match from_hex_digit(first) {
+ Some(first_val) => match from_hex_digit(second) {
+ Some(second_val) => {
+ out.push((first_val << 4) | second_val);
+ data = &rest[2..];
+ },
+ None => {
+ out.extend_from_slice(&[b'%', first]);
+ data = &rest[1..];
+ },
+ },
+ None => {
+ out.push(b'%');
+ data = rest;
+ },
+ },
+ _ => {
+ // too short
+ out.push(b'%');
+ out.extend_from_slice(rest);
+ break;
+ },
+ },
+ None => break,
+ }
+ }
+ Cow::Owned(decoded)
+}
+
+
+struct NeverRealloc<'a, T>(pub &'a mut Vec<T>);
+
+impl<T> NeverRealloc<'_, T> {
+ #[inline]
+ pub fn push(&mut self, val: T) {
+ // these branches only exist to remove redundant reallocation code
+ // (the capacity is always sufficient)
+ if self.0.len() != self.0.capacity() {
+ self.0.push(val);
+ }
+ }
+ #[inline]
+ pub fn extend_from_slice(&mut self, val: &[T]) where T: Clone {
+ if self.0.capacity() - self.0.len() >= val.len() {
+ self.0.extend_from_slice(val);
+ }
+ }
+}
+
+#[test]
+fn dec_borrows() {
+ assert!(matches!(decode("hello"), Ok(Cow::Borrowed("hello"))));
+ assert!(matches!(decode("hello%20"), Ok(Cow::Owned(s)) if s == "hello "));
+ assert!(matches!(decode("%20hello"), Ok(Cow::Owned(s)) if s == " hello"));
+}
diff --git a/vendor/urlencoding/src/enc.rs b/vendor/urlencoding/src/enc.rs
new file mode 100644
index 00000000..b345c74c
--- /dev/null
+++ b/vendor/urlencoding/src/enc.rs
@@ -0,0 +1,139 @@
+use std::borrow::Cow;
+use std::fmt;
+use std::io;
+use std::str;
+
+/// Wrapper type that implements `Display`. Encodes on the fly, without allocating.
+/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding.
+///
+/// ```rust
+/// use urlencoding::Encoded;
+/// format!("{}", Encoded("hello!"));
+/// ```
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
+#[repr(transparent)]
+pub struct Encoded<Str>(pub Str);
+
+impl<Str: AsRef<[u8]>> Encoded<Str> {
+ /// Long way of writing `Encoded(data)`
+ ///
+ /// Takes any string-like type or a slice of bytes, either owned or borrowed.
+ #[inline(always)]
+ pub fn new(string: Str) -> Self {
+ Self(string)
+ }
+
+ #[inline(always)]
+ pub fn to_str(&self) -> Cow<str> {
+ encode_binary(self.0.as_ref())
+ }
+
+ /// Perform urlencoding to a string
+ #[inline]
+ #[allow(clippy::inherent_to_string_shadow_display)]
+ pub fn to_string(&self) -> String {
+ self.to_str().into_owned()
+ }
+
+ /// Perform urlencoding into a writer
+ #[inline]
+ pub fn write<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
+ encode_into(self.0.as_ref(), false, |s| writer.write_all(s.as_bytes()))?;
+ Ok(())
+ }
+
+ /// Perform urlencoding into a string
+ #[inline]
+ pub fn append_to(&self, string: &mut String) {
+ append_string(self.0.as_ref(), string, false);
+ }
+}
+
+impl<'a> Encoded<&'a str> {
+ /// Same as new, but hints a more specific type, so you can avoid errors about `AsRef<[u8]>` not implemented
+ /// on references-to-references.
+ #[inline(always)]
+ pub fn str(string: &'a str) -> Self {
+ Self(string)
+ }
+}
+
+impl<String: AsRef<[u8]>> fmt::Display for Encoded<String> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ encode_into(self.0.as_ref(), false, |s| f.write_str(s))?;
+ Ok(())
+ }
+}
+
+/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding.
+///
+/// Call `.into_owned()` if you need a `String`
+#[inline(always)]
+pub fn encode(data: &str) -> Cow<str> {
+ encode_binary(data.as_bytes())
+}
+
+/// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`.
+#[inline]
+pub fn encode_binary(data: &[u8]) -> Cow<str> {
+ // add maybe extra capacity, but try not to exceed allocator's bucket size
+ let mut escaped = String::with_capacity(data.len() | 15);
+ let unmodified = append_string(data, &mut escaped, true);
+ if unmodified {
+ return Cow::Borrowed(unsafe {
+ // encode_into has checked it's ASCII
+ str::from_utf8_unchecked(data)
+ });
+ }
+ Cow::Owned(escaped)
+}
+
+fn append_string(data: &[u8], escaped: &mut String, may_skip: bool) -> bool {
+ encode_into(data, may_skip, |s| {
+ escaped.push_str(s);
+ Ok::<_, std::convert::Infallible>(())
+ }).unwrap()
+}
+
+fn encode_into<E>(mut data: &[u8], may_skip_write: bool, mut push_str: impl FnMut(&str) -> Result<(), E>) -> Result<bool, E> {
+ let mut pushed = false;
+ loop {
+ // Fast path to skip over safe chars at the beginning of the remaining string
+ let ascii_len = data.iter()
+ .take_while(|&&c| matches!(c, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'.' | b'_' | b'~')).count();
+
+ let (safe, rest) = if ascii_len >= data.len() {
+ if !pushed && may_skip_write {
+ return Ok(true);
+ }
+ (data, &[][..]) // redundatnt to optimize out a panic in split_at
+ } else {
+ data.split_at(ascii_len)
+ };
+ pushed = true;
+ if !safe.is_empty() {
+ push_str(unsafe { str::from_utf8_unchecked(safe) })?;
+ }
+ if rest.is_empty() {
+ break;
+ }
+
+ match rest.split_first() {
+ Some((byte, rest)) => {
+ let enc = &[b'%', to_hex_digit(byte >> 4), to_hex_digit(byte & 15)];
+ push_str(unsafe { str::from_utf8_unchecked(enc) })?;
+ data = rest;
+ }
+ None => break,
+ };
+ }
+ Ok(false)
+}
+
+#[inline]
+fn to_hex_digit(digit: u8) -> u8 {
+ match digit {
+ 0..=9 => b'0' + digit,
+ 10..=255 => b'A' - 10 + digit,
+ }
+}
diff --git a/vendor/urlencoding/src/lib.rs b/vendor/urlencoding/src/lib.rs
new file mode 100644
index 00000000..1f2e560f
--- /dev/null
+++ b/vendor/urlencoding/src/lib.rs
@@ -0,0 +1,133 @@
+//! To encode a string, do the following:
+//!
+//! ```rust
+//! use urlencoding::encode;
+//!
+//! let encoded = encode("This string will be URL encoded.");
+//! println!("{}", encoded);
+//! // This%20string%20will%20be%20URL%20encoded.
+//! ```
+//!
+//! To decode a string, it's only slightly different:
+//!
+//! ```rust
+//! use urlencoding::decode;
+//!
+//! let decoded = decode("%F0%9F%91%BE%20Exterminate%21").expect("UTF-8");
+//! println!("{}", decoded);
+//! // 👾 Exterminate!
+//! ```
+//!
+//! To decode allowing arbitrary bytes and invalid UTF-8:
+//!
+//! ```rust
+//! use urlencoding::decode_binary;
+//!
+//! let binary = decode_binary(b"%F1%F2%F3%C0%C1%C2");
+//! let decoded = String::from_utf8_lossy(&binary);
+//! ```
+//!
+//! This library returns [`Cow`](https://doc.rust-lang.org/stable/std/borrow/enum.Cow.html) to avoid allocating when decoding/encoding is not needed. Call `.into_owned()` on the `Cow` to get a `Vec` or `String`.
+
+mod enc;
+pub use enc::encode;
+pub use enc::encode_binary;
+pub use enc::Encoded;
+
+mod dec;
+pub use dec::decode;
+pub use dec::decode_binary;
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::dec::from_hex_digit;
+
+ #[test]
+ fn it_encodes_successfully() {
+ let expected = "this%20that";
+ assert_eq!(expected, encode("this that"));
+ }
+
+ #[test]
+ fn it_encodes_successfully_emoji() {
+ let emoji_string = "👾 Exterminate!";
+ let expected = "%F0%9F%91%BE%20Exterminate%21";
+ assert_eq!(expected, encode(emoji_string));
+ }
+
+ #[test]
+ fn it_decodes_successfully() {
+ let expected = String::from("this that");
+ let encoded = "this%20that";
+ assert_eq!(expected, decode(encoded).unwrap());
+ }
+
+ #[test]
+ fn it_decodes_successfully_emoji() {
+ let expected = String::from("👾 Exterminate!");
+ let encoded = "%F0%9F%91%BE%20Exterminate%21";
+ assert_eq!(expected, decode(encoded).unwrap());
+ }
+
+ #[test]
+ fn it_decodes_unsuccessfully_emoji() {
+ let bad_encoded_string = "👾 Exterminate!";
+
+ assert_eq!(bad_encoded_string, decode(bad_encoded_string).unwrap());
+ }
+
+
+ #[test]
+ fn misc() {
+ assert_eq!(3, from_hex_digit(b'3').unwrap());
+ assert_eq!(10, from_hex_digit(b'a').unwrap());
+ assert_eq!(15, from_hex_digit(b'F').unwrap());
+ assert_eq!(None, from_hex_digit(b'G'));
+ assert_eq!(None, from_hex_digit(9));
+
+ assert_eq!("pureascii", encode("pureascii"));
+ assert_eq!("pureascii", decode("pureascii").unwrap());
+ assert_eq!("", encode(""));
+ assert_eq!("", decode("").unwrap());
+ assert_eq!("%26a%25b%21c.d%3Fe", encode("&a%b!c.d?e"));
+ assert_eq!("%00", encode("\0"));
+ assert_eq!("%00x", encode("\0x"));
+ assert_eq!("x%00", encode("x\0"));
+ assert_eq!("x%00x", encode("x\0x"));
+ assert_eq!("aa%00%00bb", encode("aa\0\0bb"));
+ assert_eq!("\0", decode("\0").unwrap());
+ assert!(decode("%F0%0F%91%BE%20Hello%21").is_err());
+ assert_eq!("this that", decode("this%20that").unwrap());
+ assert_eq!("this that%", decode("this%20that%").unwrap());
+ assert_eq!("this that%2", decode("this%20that%2").unwrap());
+ assert_eq!("this that%%", decode("this%20that%%").unwrap());
+ assert_eq!("this that%2%", decode("this%20that%2%").unwrap());
+ assert_eq!("this%2that", decode("this%2that").unwrap());
+ assert_eq!("this%%2that", decode("this%%2that").unwrap());
+ assert_eq!("this%2x&that", decode("this%2x%26that").unwrap());
+ // assert_eq!("this%2&that", decode("this%2%26that").unwrap());
+ }
+
+ #[test]
+ fn lazy_writer() {
+ let mut s = "he".to_string();
+ Encoded("llo").append_to(&mut s);
+ assert_eq!("hello", s);
+
+ assert_eq!("hello", Encoded("hello").to_string());
+ assert_eq!("hello", format!("{}", Encoded("hello")));
+ assert_eq!("hello", Encoded("hello").to_str());
+ assert!(matches!(Encoded("hello").to_str(), std::borrow::Cow::Borrowed(_)));
+ }
+
+ #[test]
+ fn whatwg_examples() {
+ assert_eq!(*decode_binary(b"%25%s%1G"), b"%%s%1G"[..]);
+ assert_eq!(*decode_binary("‽%25%2E".as_bytes()), b"\xE2\x80\xBD\x25\x2E"[..]);
+ assert_eq!(encode("≡"), "%E2%89%A1");
+ assert_eq!(encode("‽"), "%E2%80%BD");
+ assert_eq!(encode("Say what‽"), "Say%20what%E2%80%BD");
+ }
+
+}