//! Characters. use crate::spec::Spec; /// A mask to test whether the character is continue character of `scheme`. // `ALPHA / DIGIT / "+" / "-" / "."` const MASK_SCHEME_CONTINUE: u8 = 1 << 0; /// A mask to test whether the character matches `unreserved`. // `unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"` const MASK_UNRESERVED: u8 = 1 << 1; /// A mask to test whether the character matches `gen-delims`. // `gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"` const MASK_GEN_DELIMS: u8 = 1 << 2; /// A mask to test whether the character matches `sub-delims`. // `sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="` const MASK_SUB_DELIMS: u8 = 1 << 3; /// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes). // `pchar = unreserved / pct-encoded / sub-delims / ":" / "@"` const MASK_PCHAR: u8 = 1 << 4; /// A mask to test whether the character can appear in `query` and `fragment`. // `query = *( pchar / "/" / "?" )` // `fragment = *( pchar / "/" / "?" )` const MASK_FRAG_QUERY: u8 = 1 << 5; /// A mask to test whether the character can appear in `userinfo` and address of `IPvFuture`. // `userinfo = *( unreserved / pct-encoded / sub-delims / ":" )` const MASK_USERINFO_IPVFUTUREADDR: u8 = 1 << 6; /// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes) or slash. const MASK_PCHAR_SLASH: u8 = 1 << 7; /// ASCII characters' properties. const TABLE: [u8; 128] = [ 0b_0000_0000, // NUL 0b_0000_0000, // SOH 0b_0000_0000, // STX 0b_0000_0000, // ETX 0b_0000_0000, // EOT 0b_0000_0000, // ENQ 0b_0000_0000, // ACK 0b_0000_0000, // BEL 0b_0000_0000, // BS 0b_0000_0000, // HT 0b_0000_0000, // LF 0b_0000_0000, // VT 0b_0000_0000, // FF 0b_0000_0000, // CR 0b_0000_0000, // SO 0b_0000_0000, // SI 0b_0000_0000, // DLE 0b_0000_0000, // DC1 0b_0000_0000, // DC2 0b_0000_0000, // DC3 0b_0000_0000, // DC4 0b_0000_0000, // NAK 0b_0000_0000, // SYN 0b_0000_0000, // ETB 0b_0000_0000, // CAN 0b_0000_0000, // EM 0b_0000_0000, // SUB 0b_0000_0000, // ESC 0b_0000_0000, // FS 0b_0000_0000, // GS 0b_0000_0000, // RS 0b_0000_0000, // US 0b_0000_0000, // SPACE 0b_1111_1000, // ! 0b_0000_0000, // " 0b_0000_0100, // # 0b_1111_1000, // $ 0b_0000_0000, // % 0b_1111_1000, // & 0b_1111_1000, // ' 0b_1111_1000, // ( 0b_1111_1000, // ) 0b_1111_1000, // * 0b_1111_1001, // + 0b_1111_1000, // , 0b_1111_0011, // - 0b_1111_0011, // . 0b_1010_0100, // / 0b_1111_0011, // 0 0b_1111_0011, // 1 0b_1111_0011, // 2 0b_1111_0011, // 3 0b_1111_0011, // 4 0b_1111_0011, // 5 0b_1111_0011, // 6 0b_1111_0011, // 7 0b_1111_0011, // 8 0b_1111_0011, // 9 0b_1111_0100, // : 0b_1111_1000, // ; 0b_0000_0000, // < 0b_1111_1000, // = 0b_0000_0000, // > 0b_0010_0100, // ? 0b_1011_0100, // @ 0b_1111_0011, // A 0b_1111_0011, // B 0b_1111_0011, // C 0b_1111_0011, // D 0b_1111_0011, // E 0b_1111_0011, // F 0b_1111_0011, // G 0b_1111_0011, // H 0b_1111_0011, // I 0b_1111_0011, // J 0b_1111_0011, // K 0b_1111_0011, // L 0b_1111_0011, // M 0b_1111_0011, // N 0b_1111_0011, // O 0b_1111_0011, // P 0b_1111_0011, // Q 0b_1111_0011, // R 0b_1111_0011, // S 0b_1111_0011, // T 0b_1111_0011, // U 0b_1111_0011, // V 0b_1111_0011, // W 0b_1111_0011, // X 0b_1111_0011, // Y 0b_1111_0011, // Z 0b_0000_0100, // [ 0b_0000_0000, // \ 0b_0000_0100, // ] 0b_0000_0000, // ^ 0b_1111_0010, // _ 0b_0000_0000, // ` 0b_1111_0011, // a 0b_1111_0011, // b 0b_1111_0011, // c 0b_1111_0011, // d 0b_1111_0011, // e 0b_1111_0011, // f 0b_1111_0011, // g 0b_1111_0011, // h 0b_1111_0011, // i 0b_1111_0011, // j 0b_1111_0011, // k 0b_1111_0011, // l 0b_1111_0011, // m 0b_1111_0011, // n 0b_1111_0011, // o 0b_1111_0011, // p 0b_1111_0011, // q 0b_1111_0011, // r 0b_1111_0011, // s 0b_1111_0011, // t 0b_1111_0011, // u 0b_1111_0011, // v 0b_1111_0011, // w 0b_1111_0011, // x 0b_1111_0011, // y 0b_1111_0011, // z 0b_0000_0000, // { 0b_0000_0000, // | 0b_0000_0000, // } 0b_1111_0010, // ~ 0b_0000_0000, // DEL ]; /// Returns `true` if the given ASCII character is allowed as continue character of `scheme` part. #[inline] #[must_use] pub(crate) const fn is_ascii_scheme_continue(c: u8) -> bool { (TABLE[c as usize] & MASK_SCHEME_CONTINUE) != 0 } /// Returns `true` if the given ASCII character matches `unreserved`. #[inline] #[must_use] pub(crate) const fn is_ascii_unreserved(c: u8) -> bool { (TABLE[c as usize] & MASK_UNRESERVED) != 0 } /// Returns true if the character is unreserved. #[inline] #[must_use] pub(crate) fn is_unreserved(c: char) -> bool { if c.is_ascii() { is_ascii_unreserved(c as u8) } else { S::is_nonascii_char_unreserved(c) } } ///// Returns `true` if the given ASCII character matches `gen-delims`. //#[inline] //#[must_use] //pub(crate) const fn is_ascii_gen_delims(c: u8) -> bool { // (TABLE[c as usize] & MASK_GEN_DELIMS) != 0 //} ///// Returns `true` if the given ASCII character matches `sub-delims`. //#[inline] //#[must_use] //pub(crate) const fn is_ascii_sub_delims(c: u8) -> bool { // (TABLE[c as usize] & MASK_SUB_DELIMS) != 0 //} ///// Returns `true` if the given ASCII character matches `reserved`. //#[inline] //#[must_use] //pub(crate) const fn is_ascii_reserved(c: u8) -> bool { // (TABLE[c as usize] & (MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0 //} /// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded`. #[inline] #[must_use] pub(crate) const fn is_ascii_pchar(c: u8) -> bool { (TABLE[c as usize] & MASK_PCHAR) != 0 } /// Returns `true` if the given ASCII character is allowed to appear in `query` and `fragment`. #[inline] #[must_use] pub(crate) const fn is_ascii_frag_query(c: u8) -> bool { (TABLE[c as usize] & MASK_FRAG_QUERY) != 0 } /// Returns `true` if the given non-ASCII character is allowed to appear in `iquery`. #[inline] #[must_use] pub(crate) fn is_nonascii_query(c: char) -> bool { S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c) } /// Returns `true` if the given non-ASCII character is allowed to appear in `ifragment`. #[inline] #[must_use] pub(crate) fn is_nonascii_fragment(c: char) -> bool { S::is_nonascii_char_unreserved(c) } /// Returns `true` if the given ASCII character is allowed to appear in `userinfo` and `IPvFuture`. #[inline] #[must_use] pub(crate) const fn is_ascii_userinfo_ipvfutureaddr(c: u8) -> bool { (TABLE[c as usize] & MASK_USERINFO_IPVFUTUREADDR) != 0 } /// Returns `true` if the given non-ASCII character is allowed to appear in `iuserinfo`. #[inline] #[must_use] pub(crate) fn is_nonascii_userinfo(c: char) -> bool { S::is_nonascii_char_unreserved(c) } /// Returns `true` if the given ASCII character is allowed to appear in `reg-name` #[inline] #[must_use] pub(crate) const fn is_ascii_regname(c: u8) -> bool { (TABLE[c as usize] & (MASK_UNRESERVED | MASK_SUB_DELIMS)) != 0 } /// Returns `true` if the given non-ASCII character is allowed to appear in `ireg-name`. #[inline] #[must_use] pub(crate) fn is_nonascii_regname(c: char) -> bool { S::is_nonascii_char_unreserved(c) } /// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded` or a slash. #[inline] #[must_use] pub(crate) const fn is_ascii_pchar_slash(c: u8) -> bool { (TABLE[c as usize] & MASK_PCHAR_SLASH) != 0 } /// Checks if the given character matches `ucschar` rule. #[must_use] pub(crate) fn is_ucschar(c: char) -> bool { matches!( u32::from(c), 0xA0..=0xD7FF | 0xF900..=0xFDCF | 0xFDF0..=0xFFEF | 0x1_0000..=0x1_FFFD | 0x2_0000..=0x2_FFFD | 0x3_0000..=0x3_FFFD | 0x4_0000..=0x4_FFFD | 0x5_0000..=0x5_FFFD | 0x6_0000..=0x6_FFFD | 0x7_0000..=0x7_FFFD | 0x8_0000..=0x8_FFFD | 0x9_0000..=0x9_FFFD | 0xA_0000..=0xA_FFFD | 0xB_0000..=0xB_FFFD | 0xC_0000..=0xC_FFFD | 0xD_0000..=0xD_FFFD | 0xE_1000..=0xE_FFFD ) } /// Returns true if the given value is a continue byte of UTF-8. #[inline(always)] #[must_use] pub(crate) fn is_utf8_byte_continue(byte: u8) -> bool { // `0x80..=0xbf` (i.e. `0b_1000_0000..=0b_1011_1111`) is not the first byte, // and `0xc0..=0xc1` (i.e. `0b_1100_0000..=0b_1100_0001` shouldn't appear // anywhere in UTF-8 byte sequence. // `0x80 as i8` is -128, and `0xc0 as i8` is -96. // // The first byte of the UTF-8 character is not `0b10xx_xxxx`, and // the continue bytes is `0b10xx_xxxx`. // `0b1011_1111 as i8` is -65, and `0b1000_0000 as i8` is -128. (byte as i8) < -64 } /// Returns true if the given ASCII character is `unreserved` or `reserved`. #[inline] #[must_use] pub(crate) const fn is_ascii_unreserved_or_reserved(c: u8) -> bool { (TABLE[c as usize] & (MASK_UNRESERVED | MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0 }