diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-10 13:11:11 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-10 13:11:11 -0600 |
| commit | 01959b16a21b22b5df5f16569c2a8e8f92beecef (patch) | |
| tree | 32afa5d747c5466345c59ec52161a7cba3d6d755 /vendor/iri-string | |
| parent | ff30574117a996df332e23d1fb6f65259b316b5b (diff) | |
chore: vendor dependencies
Diffstat (limited to 'vendor/iri-string')
74 files changed, 23495 insertions, 0 deletions
diff --git a/vendor/iri-string/.cargo-checksum.json b/vendor/iri-string/.cargo-checksum.json new file mode 100644 index 00000000..bbd65340 --- /dev/null +++ b/vendor/iri-string/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CHANGELOG.md":"1126f5cc700900c8a029a7771e5f7a92e781b0dbec9fde4f7c32fee5b331e0bd","Cargo.lock":"56e51c508029fce0ea74e79134ebead035ecb10723a326e9ded0c8287cb375ce","Cargo.toml":"5bf13c85d79172456ae4e50446de5dab927931bf73354bcce093c31717f429c7","LICENSE-APACHE.txt":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT.txt":"21ab3c8e9af71d0212606c98a210f71d6a86234b179b3babff0ccc4798b90f33","Makefile.toml":"d0f6ff5035d3b2f73094b950ea285d07de7e689b3f74bbbfa542e7b5d9c481a6","README.md":"469963f422aa61e8cb936877a3b8f6c301b2d8257ad7f91126c1d5bc47e8e09f","examples/flamegraph-parse.rs":"686e767af47fa3c54d36da08cf3c95098dacbd27e6890a91399df1e1688ae3b2","examples/flamegraph-resolve.rs":"593e4d46ff3acab2795ff9427b53d90f50da9d44001fa4a50c590b5a71924ab3","examples/normalize.rs":"0622e338b43c17eb494a18b7ca6247200ff3a5e1aeeb70ac77e6dd17056381c9","examples/parse.rs":"91a72ba9073d2a9114a476d69887a94e6350b17885cb12653135d88ec4b81339","examples/resolve.rs":"b98cef51efc8c9bc86fe2227a144334b4fd751021892b8d20e93c6733fcf547d","src/build.rs":"8220ff4ac52ba77a9041a52d031efcb7924b671606f1d14e8f6db9e68eb48717","src/components.rs":"8713be24b47a544372c8f4ff24efb95a3fa32b7150a6f1d64b35a546cb4ada3b","src/components/authority.rs":"4411ea71aa7fda4b46a8037977acc15a2719a598bf890b9ccaecded864ddbf8d","src/convert.rs":"bd57fea9a4ffa35ec672c186e539e09c9a616a763ef0bf2cbf981013d837f889","src/format.rs":"e460441016beda7b6a9141881654f3204bbb9b9267e1504ba4af26ee5023f488","src/lib.rs":"f521f656862af3aeac5d306e7db7cafc906fc431913e4eaf3a5494df35e553aa","src/mask_password.rs":"51aec19ca12dcbd6930ead8943c064dc3e4fa582b913a7182ca3c0ce1eb12cf1","src/normalize.rs":"b3d5083f7785c95f2586fa6b3df656290d8147ffa2b2cf46bb66913adb8f06c1","src/normalize/error.rs":"fddb70be796c8d72f688e6d64c00ef2c39e83bf433bdf624998f9b00b42b5d4b","src/normalize/path.rs":"4182be72a66c203682215660258ad08cc26725a5685a61f0ab4c925cfa70f8ba","src/normalize/pct_case.rs":"00113541af7ce8807dc8e4068716b1041de399d4415718e56780cac1b3822bfd","src/parser.rs":"e0afaea543534b18a03d811f20c0391b234f6f0ffbfb9e687abf04455e41fc71","src/parser/char.rs":"688352d220af1dd8740ffc172033c239cd98ea07e067159bde3aff832ff7c90f","src/parser/str.rs":"b7d7c222b3068cf2423ad6822f6225942a581421699984785b67889e484d91fb","src/parser/str/maybe_pct_encoded.rs":"3b6490af0d652586c4ce5f88e6920d55c2d7bcfbbdcd9939bfac896bbb7f0d2b","src/parser/trusted.rs":"744d7fbe63c853b91b44b74707408cc868c0d06516dd03e0acb7017603ef798c","src/parser/trusted/authority.rs":"9ef0528429ba452ee8dc45de808c2f974d46235672f5c380398f57a0f6b7575e","src/parser/validate.rs":"aa55c673f0f14ca256fc8b75b582bf5de48a5f2718cb766481ef3b34ebbcd972","src/parser/validate/authority.rs":"c793b871f0a8395e5dfc3c31a0fb9c588bbc8f49ea8a5ed10b9f78929b90cf56","src/parser/validate/path.rs":"dae0d406ace1bfb29a55325fc3c13572af9d2d06f114a012ab18df9fd57ddb60","src/percent_encode.rs":"f5eb2313b85b82f5b7a08a30c586db043943d7c55c63e837d613ca2b0a798fd8","src/raw.rs":"25123241b475b1a196fb50bef0fadf27cd9425728924ce9b921d5fea42140d16","src/resolve.rs":"db06ae293edf583557e82794262b7a24a022a79b982ba8778d97e14f784c54df","src/spec.rs":"5b2c4e20c5ed751af4667363a888d4e96d4faac260caf3d7bf37fe9512a4a5cc","src/spec/internal.rs":"e98126ec23278a7303080e017bf9d6efa49bea89a79a72fb355b4ba98f4254bf","src/template.rs":"200f37615cef5cefca9099dc222389b7ac8ce586623f8f7c924abb7784264ee5","src/template/components.rs":"adbf523aa952d98af057d0e8d08a70e26781e94636adf69606d0acc76be9ff51","src/template/context.rs":"9bbd9f424673fce730c463e548d018183731b0654a9ed079f330f28e5f4509f2","src/template/error.rs":"bcf6ff3f9eabc5ecdb732556641e1b9a8abb43202ef1e99f34e436d2ea1c98c7","src/template/expand.rs":"9d636444c8e2c884385cf4ca2b73e5aec3757e61fed4f06da50658b164036ee6","src/template/parser.rs":"128cfdd5ff3c86b97153d1fb385ac24642a55be8648bbfc5d3595e9dbf51e8bc","src/template/parser/char.rs":"f798e16c3bdb1415fd4f58ec14c5baa9a309af72d757de228d19786844854031","src/template/parser/validate.rs":"54fbba7a725a91ffd8ba3fa50be6db90aa7f45ea6a1944835432edb880fa59ff","src/template/simple_context.rs":"4596a33b21d76a36b34745585c93c191c2d4ce5ecd897243074ffbd0dd5ee34c","src/template/string.rs":"4b244cafca45e5f2c9cb1bb585e834769f4badd6021a189c9d0fefd6963b4180","src/template/string/owned.rs":"2f433266691922311c6ad0ef9db5c09aeb3536876ecbba29c17f331355494f84","src/types.rs":"677e540458ccf5b8c879fbdfe4bf8a3daf658d9816e826687da477a3cced8b6d","src/types/generic.rs":"5fc3627ba6a99d8a11bb4e133bb69d3ce217ebe41ede7a8d3f96f314ea37305b","src/types/generic/absolute.rs":"2a8d7b509c4a0f6f12209226507b182daae57f5643ae4283b1e14f14abd29d9e","src/types/generic/error.rs":"33b91f175b1576bc2aeab163b737d23d6da56217220d32432efaed8421188dad","src/types/generic/fragment.rs":"1e5f276aad9dfb7db83d9c27e9f0dc2e719ea3d34aec55675a968e660365ad20","src/types/generic/macros.rs":"0b54a80db43622d230c5e51240c5e71595b40686ad34c260e29793e68f039550","src/types/generic/normal.rs":"f2e7be412c0eb4a7091f0c862f7fb3d312844107f843b697fb6b4fa4ca8b7f8e","src/types/generic/query.rs":"23679b71fc2183e6bbcb0838214cb20c428c7d908e20c1331353c8183d02a731","src/types/generic/reference.rs":"c566320b8945f61c9525f38164451b6de433fa6caf9bc1e0622ae3991d35660c","src/types/generic/relative.rs":"ca08e889b0bb1f6025405e9873feba0d45609ef00fe8b8a34063447484a5b656","src/types/iri.rs":"1dcc0287c7c139c9b96013a06790f8c3f7be8280b3812736cb0f6b2fde60ab1e","src/types/uri.rs":"4625af17ce71a66955f1d87700484dcd44e4bb725ca1017f5dbbdd2c4fbb5850","src/validate.rs":"7ca5a57b208a82e65b7109a90b1a1bc347f57b628b8487efd551c5ff81027a9a","tests/build.rs":"29c5869d7c229c6d18090c84616ed54fc09fc7ceacf2d658173548bdfea5fc01","tests/components/mod.rs":"665c64ea1ab0f0765a694c2ec80bbd1b7ce74220c1379ae37dd95148ac5bc778","tests/gh-issues.rs":"32b0488ab5c337c21861e4dfd2634eb6b754367500eed1d5d9e7a49a721999d2","tests/iri.rs":"0ba655cf76b49a65fddf96526aa99855b06521f0442844b33e81965b2b3d8725","tests/normalize.rs":"267673cb2f9337d081c4e53cd1217466310b8bca2d86c4d53b2f5084ef33e335","tests/percent_encode.rs":"db4516d2c706b5d31e9fd93b6e3efe695d86814cba73a00c67b227f08fafccd1","tests/resolve.rs":"66979d2ba472c1a32a1f8ce11978338aa4de149fe1bc0e1112ca0356f50ae206","tests/resolve_refimpl/mod.rs":"d4ee007987153ce58a504df29c26ef15ebf2849ec7d6a1b2b006b69a462c9354","tests/serde.rs":"e6062e0208ebdf5f8b6b3a908530e0b5f2e3be5096b6094562cc3a84da0115cd","tests/string_types_interop.rs":"b27eb992d9b8f9137f89b280c0712c169c0c083f73b9bdc5f152af84437fe075","tests/template.rs":"5fef7d103abeffb87d395a7d7552522341420fa1227a4728343ad2a1f90ba67c","tests/utils/mod.rs":"8b9f45bb612cff7918996caa582671bc2230fe4562d4398ae0e3b98598b42132"},"package":"dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2"}
\ No newline at end of file diff --git a/vendor/iri-string/CHANGELOG.md b/vendor/iri-string/CHANGELOG.md new file mode 100644 index 00000000..af0374cb --- /dev/null +++ b/vendor/iri-string/CHANGELOG.md @@ -0,0 +1,1049 @@ +# Change Log + +## [Unreleased] + +## [0.7.8] + +* Fix unconditional failure at `RiReferenceStr::set_fragment()` + +### Fixed +* Fix unconditional failure at `RiReferenceStr::set_fragment()` + +## [0.7.7] + +* Fix URI template expansion with `template::DynamicContext` to call + the methods `on_expansion_start` and `on_expansion_end`. + +### Fixed +* Fix URI template expansion with `template::DynamicContext` to call + the methods `on_expansion_start` and `on_expansion_end`. + +## [0.7.6] + +* Add `fragment_str()` methods that returns a fragment in a raw string slice. + +### Added +* Add `fragment_str()` methods that returns a fragment in a raw string slice. + + List of added methods: + + `RiStr::fragment_str()` + + `RiReferenceStr::fragment_str()` + + `RiRelativeStr::fragment_str()` + +## [0.7.5] + +* Fix unsoundness of `template::UriTemplateStr` + +### Fixed +* Fix unsoundness of `template::UriTemplateStr` + * The type should have `#[repr(transparent)]` to compile safely but did not. + * Any creations and uses of the value are undefined behavior without the + fix, while the current version of the Rust compiler seems to happen to + generate the expected binary (without any guarantee). + +## [0.7.4] + +* Fix calculation of template expansion error location. + + Currently this just appears in an error message but not exposed through + any other public API. +* Add an iterator of variables that appears in a URI template. +* Support URI template expansion with a mutable context. + +### Added +* Add an iterator of variables that appears in a URI template. + + List of added items: + - `template::UriTemplateStr::variables()` method + - `template::UriTemplateVariables<'_>` iterator type +* Support URI template expansion with a mutable context. + + Add methods `template::UriTemplateStr::expand_dynamic()` and + `template::UriTemplateStr::expand_dynamic_to_string()`. + + Add `template::context::DynamicContext` trait for mutable context. + + Add `template::context::Visitor::purpose()` method and + `template::context::VisitPurpose` type to enable users to know for what + purpose the variable is being visited. + +### Fixed +* Fix calculation of template expansion error location. + + Currently this error location info just appears in an error message (from + `<template::Error as std::fmt::Display>::fmt`), but not exposed through + any other public API. + +## [0.7.3] + +* Add easy conversion from an expanded template into IRI/URI string types. + +### Added +* Add easy conversion from an expanded template into IRI/URI string types. + + List of added conversions: + - `TryFrom<template::Expanded<'_, S, C>> for types::RiAbsoluteString<S>` + - `TryFrom<template::Expanded<'_, S, C>> for types::RiReferenceString<S>` + - `TryFrom<template::Expanded<'_, S, C>> for types::RiRelativeString<S>` + - `TryFrom<template::Expanded<'_, S, C>> for types::RiString<S>` + +## [0.7.2] + +* Fix a bug that some abnormal IRIs that have no authority and end with `/.` + resulted in wrong normalization that generate unintentional authorities. + + Reported at [#36](https://github.com/lo48576/iri-string/issues/36). +* Fix a bug that the normalization incorrectly omits percent-encoded triplets + partially if they constitute invalid UTF-8 byte sequence. + + Reported at [#36](https://github.com/lo48576/iri-string/issues/36#issuecomment-2053688909). + +### Fixed +* Fix a bug that some abnormal IRIs that have no authority and end with `/.` + resulted in wrong normalization that generate unintentional authorities. + + Reported at [#36](https://github.com/lo48576/iri-string/issues/36). + + IRI resolution and normalization had this bug, but only for IRIs without authority. + + This happened when the resolution and normalization result should not contain + an authority but the path part resulted in `//.`. + - For example, `a:/.//.` and `a:/bar/..//.` should be normalized to `a:/.//`, + but the actual result was `a://` due to this bug. +* Fix a bug that the normalization incorrectly omits percent-encoded triplets + partially if they constitute invalid UTF-8 byte sequence. + + Reported at [#36](https://github.com/lo48576/iri-string/issues/36#issuecomment-2053688909). + + URIs and IRIs that only contains the percent-encoded triplets for valid UTF-8 byte + sequences won't be affected. + +## [0.7.1] + +* Add `new_unchecked()` methods to string types. +* Move `template::VarName` type into `template::context` module and deprecate the old name. +* Add `template::context::VarName::new()` method. +* Add component getters to `resolve::FixedBaseResolver`. +* Fix some lint warnings detected by newer clippy. + +### Added +* Add `new_unchecked()` methods to string types. + + List of added methods: + - `template::UriTemplateStr::new_unchecked()` + - `template::UriTemplateString::new_unchecked()` + - `types::RiAbsoluteStr::new_unchecked()` + - `types::RiAbsoluteString::new_unchecked()` + - `types::RiFragmentStr::new_unchecked()` + - `types::RiFragmentString::new_unchecked()` + - `types::RiQueryStr::new_unchecked()` + - `types::RiQueryString::new_unchecked()` + - `types::RiReferenceStr::new_unchecked()` + - `types::RiReferenceString::new_unchecked()` + - `types::RiRelativeStr::new_unchecked()` + - `types::RiRelativeString::new_unchecked()` + - `types::RiStr::new_unchecked()` + - `types::RiString::new_unchecked()` +* Add `template::context::VarName::new()` method. +* Add component getters to `resolve::FixedBaseResolver`. + + List of added methods: + - `resolve::FixedBaseResolver::scheme_str()` + - `resolve::FixedBaseResolver::authority_str()` + - `resolve::FixedBaseResolver::path_str()` + - `resolve::FixedBaseResolver::query()` + - `resolve::FixedBaseResolver::query_str()` + - `resolve::FixedBaseResolver::fragment_str()` + +### Changed (non-breaking) +* Move `template::VarName` type into `template::context` module and deprecate the old name. + + The old name (`template::VarName`) is still available while it is marked as deprecated. + +## [0.7.0] + +* Add `template` module that contains URI Template + ([RFC 6570](https://www.rfc-editor.org/rfc/rfc6570)) processor. +* Add `PercentEncoded::{unreserve,characters}` methods. +* Remove "WHATWG" normalization. + + Fixes the issue [#29](https://github.com/lo48576/iri-string/issues/29) and + [#30](https://github.com/lo48576/iri-string/issues/30). +* Add normalization that preserves relative path in some special condition. + +### Added +* Add `template` module that contains URI Template + ([RFC 6570](https://www.rfc-editor.org/rfc/rfc6570)) processor. + + The processor supports nostd environment. +* Add `PercentEncoded::{unreserve,characters}` methods. + + List of added methods: + - `percent_encode::PercentEncoded::characters()` + - `percent_encode::PercentEncoded::unreserve()` +* Add normalization that preserves relative path in some special condition. + + When the authority component is absent and the path is relative, the dot-segments + removal is not applied to the path. This behavior is inspired by WHATWG URL + Standard, but the implementation is not guaranteed to follow that spec. + + List of added items: + - `types::RiStr::normalize_but_preserve_authorityless_relative_path()` + - `types::RiStr::is_normalized_but_authorityless_relative_path_preserved()` + - `types::RiAbsoluteStr::normalize_but_preserve_authorityless_relative_path()` + - `types::RiAbsoluteStr::is_normalized_but_authorityless_relative_path_preserved()` + - `normalize::Normalized::and_normalize_but_preserve_authorityless_relative_path()` + - `normalize::Normalized::enable_normalization_preserving_authorityless_relative_path()` + + Note that this normalization algorithm is not compatible with RFC 3986 + algorithm for some inputs. + +### Changed (breaking) +* Remove non-compliant "WHATWG" normalization. + + Fixes the issue [#29](https://github.com/lo48576/iri-string/issues/29) and + [#30](https://github.com/lo48576/iri-string/issues/30). + + Previous implementations of normalization is described as "defined in WHATWG spec", + but they were not compliant to the spec. Specifically, when the authority + component is absent and the path is relative, WHATWG spec requires the path + to be treated as "opaque", but the old implementation applied dot-segments + removal to the path. + + List of removed items: + - `types::RiStr::is_normalized_whatwg()` + - `types::RiAbsoluteStr::is_normalized_whatwg()` + +## [0.6.0] + +* Bump MSRV to 1.60.0. +* Remove `memchr-std`, `serde-alloc`, and `serde-std` features. + + Now `alloc` and/or `std` features for additional dependencies are + automatically enabled when all of dependent featuers are enabled. + - See [Announcing Rust 1.60.0 | Rust Blog](https://blog.rust-lang.org/2022/04/07/Rust-1.60.0.html#new-syntax-for-cargo-features). +* Support escaping username and password by `percent_encode::PercentEncode`. +* Add `format` module that contains utilities for types with `Display` trait impl. + + Add `format::ToStringFallible` trait. + + Add `format::ToDedicatedString` trait. + + Add `format::write_to_slice` function and `format::CapacityOverflow` type. + + Add `format::try_append_to_string` function. +* Remove `task` module and `task::ProcessAndWrite` trait. + + Remove `task` module. + + Remove `ProcessAndWrite` trait implementation from `percent_encode::PercentEncoded` type. + + Remove `ProcessAndWrite` trait implementation from `convert::MappedToUri` type. +* Remove "task" types. + + Remove `normalize::NormalizationTask` type. + + Remove `normalize::NormalizationTask` type. + - Use `normalize::Normalized` instead. +* Change return types of some functions from task types or string types to `Display`-able types. + + Change return types of `{BorrowedIri}::encode_to_uri` to + `convert::MappedToUri<'_, Self>`. + + Change return type of `resolve::FixedBaseResolver::resolve()` method to `noramlize::Normalized`. + + Change return type of `{BorrowedIri}::normalize()` method to `normalize::Normalized`. + + Change return type of `{BorrowedIri}::resolve_against()` method to `normalize::Normalized`. + + Remove `BufferError` type. +* Change API of IRI-to-URI conversion. + + Rename `{OwnedIri}::encode_to_uri` to `{OwnedIri}::encode_to_uri_inline`. + + Add `{OwnedIri}::try_encode_to_uri_inline` method. + + Add `{OwnedIri}::try_encode_into_uri` method. +* Make the methods impl of `convert::MappedToUri<'_, T>` generic over the spec. +* Revome functions under `resolve` module. +* Add `normalize::Normalized` type. +* Remove some methods of `resolve::FixedBaseResolver`. +* Rename `{BorrowedIri}::is_normalized()` methods to `{BorrowedIri}::is_normalized_rfc3986()`. +* Remove some methods of borrowed IRI string types. +* Support password masking. + + Add `mask_password` module. + + Add `{BorrowedIri}::mask_password` method. + + Add `{OwnedIri}::remove_password_inline` and `{OwnedIri}::remove_nonempty_password_inline()` + methods. +* Remove deprecated `percent_encoding` module and aliases defined in it. +* Unify normalization of `build::Builder`. + + Add `build::Builder::normalize()` method. + + Add `build::Built::ensure_rfc3986_normalizable()` method. + + Change return type of `build::Builder::build()`. + + Remove `build::Builder::normalize_rfc3986()` and + `build::Builder::normalize_whatwg()` methods. + + Remove `build::Error` type. +* Stop accepting user part as `Option<&str>` type for `build::Builder::userinfo` +* Reject user with colon characters on IRI build. +* Allow builders to normalize `path` component of relative IRIs if safely possible. + +### Added +* Support escaping username and password by `percent_encode::PercentEncode`. + + List of added functions: + `percen_encode::PercentEncode::from_user()` + `percen_encode::PercentEncode::from_password()` +* Add `format::ToStringFallible` trait. + + This trait allows users to convert `Display`-able values into `String`, + but without panicking on OOM. + + List of types that implements this trait: + - `build::Built<'_, RiReferenceStr<S>>` + - `build::Built<'_, RiStr<S>>` + - `build::Built<'_, RiAbsoluteStr<S>>` + - `build::Built<'_, RiRelativeStr<S>>` +* Add `format::ToDedicatedString` trait. + + This trait allows users to convert `Display`-able values into owned + dedicated IRI string types, with or without panicking on OOM. + + List of added implementations: + - `build::Built<'_, RiReferenceStr<S>>` (Target = `RiReferenceString<S>`) + - `build::Built<'_, RiStr<S>>` (Target = `RiString<S>`) + - `build::Built<'_, RiAbsoluteStr<S>>` (Target = `RiAbsoluteString<S>`) + - `build::Built<'_, RiRelativeStr<S>>` (Target = `RiRelativeString<S>`) + - `convert::MappedToUri<'_, RiReferenceStr<S>>` (Target = `RiReferenceString<S>`) + - `convert::MappedToUri<'_, RiStr<S>>` (Target = `RiString<S>`) + - `convert::MappedToUri<'_, RiAbsoluteStr<S>>` (Target = `RiAbsoluteString<S>`) + - `convert::MappedToUri<'_, RiRelativeStr<S>>` (Target = `RiRelativeString<S>`) + - `convert::MappedToUri<'_, RiQueryStr<S>>` (Target = `RiQueryString<S>`) + - `convert::MappedToUri<'_, RiFragmentStr<S>>` (Target = `RiFragmentString<S>`) +* Add `format::write_to_slice` function and `format::CapacityOverflow` type. +* Add `format::try_append_to_string` function. +* Add `{OwnedIri}::try_encode_to_uri_inline` method. +* Add `{OwnedIri}::try_encode_into_uri` method. +* Add `normalize::Normalized` type. + + This replaces `normalize::NormalizationTask` type in previous versions. +* Add `mask_password` module. + + Items in this module let users hide or replace password to keep sensitive + information secret. + + List of added items: + - `mask_password::PasswordMasked` type + - `mask_password::PasswordReplaced` type +* Add `{BorrowedIri}::mask_password` method. + + List of added methods: + - `types::RiReferenceStr::mask_password()` + - `types::RiStr::mask_password()` + - `types::RiAbsoluteStr::mask_password()` + - `types::RiRelativeStr::mask_password()` +* Add `{OwnedIri}::remove_password_inline` and `{OwnedIri}::remove_nonempty_password_inline()` + methods. + + List of added methods: + - `types::RiReferenceString::remove_password_inline()` + - `types::RiReferenceString::remove_nonempty_password_inline()` + - `types::RiString::remove_password_inline()` + - `types::RiString::remove_nonempty_password_inline()` + - `types::RiAbsoluteString::remove_password_inline()` + - `types::RiAbsoluteString::remove_nonempty_password_inline()` + - `types::RiRelativeString::remove_password_inline()` + - `types::RiRelativeString::remove_nonempty_password_inline()` +* Add `build::Builder::normalize()` method. +* Add `build::Built::ensure_rfc3986_normalizable()` method. + +### Changed (breaking) +* Bump MSRV to 1.60.0. +* Remove `memchr-std`, `serde-alloc`, and `serde-std` features. + + Now `alloc` and/or `std` features for additional dependencies are + automatically enabled when all of dependent featuers are enabled. + - See [Announcing Rust 1.60.0 | Rust Blog](https://blog.rust-lang.org/2022/04/07/Rust-1.60.0.html#new-syntax-for-cargo-features). +* Remove `ProcessAndWrite` trait implementation from `percent_encode::PercentEncoded` type. +* Remove `ProcessAndWrite` trait implementation from `convert::MappedToUri` type. +* Change return types of `{BorrowedIri}::encode_to_uri` to + `convert::MappedToUri<'_, Self>`. + + The code `borrowed.encode_to_uri()` in older versions should be rewritten + to `borrowed.encode_to_uri().to_dedicated_string()`. +* Rename `{OwnedIri}::encode_to_uri` to `{OwnedIri}::encode_to_uri_inline`. +* Remove `normalize::NormalizationTask` type. + + Use `normalized::Normalized` type instead. +* Revome functions under `resolve` module. + + List of removed functions: + - `resolve::resolve()` + - `resolve::resolve_normalize()` + - `resolve::resolve_whatwg()` + - `resolve::resolve_normalize_whatwg()` + - `resolve::try_resolve()` + - `resolve::try_resolve_normalize()` + - `resolve::try_resolve_whatwg()` + - `resolve::try_resolve_normalize_whatwg()` +* Remove `normalize::NormalizationTask` type. + + Use `normalize::Normalized` instead. +* Remove some methods of `resolve::FixedBaseResolver`. + + List fo removed methods: + - `resolve::FixedBaseResolver::try_resolve()` + - `resolve::FixedBaseResolver::try_resolve_normalize()` + - `resolve::FixedBaseResolver::resolve_normalize()` + - `resolve::FixedBaseResolver::create_task()` + - `resolve::FixedBaseResolver::create_normalizing_task()` +* Change return type of `resolve::FixedBaseResolver::resolve()` to `noramlize::Normalized`. +* Rename `{BorrowedIri}::is_normalized` methods to `{BorrowedIri}::is_normalized_rfc3986`. + + List of renamed methods: + - `types::RiStr::is_normalized` to `types::RiStr::is_normalized_rfc3986` + - `types::RiAbsoluteStr::is_normalized` to `types::RiAbsoluteStr::is_normalized_rfc3986` +* Change return type of `{BorrowedIri}::normalize()` method to `normalize::Normalized`. + + List of affected methods: + - `types::RiStr::normalize()` + - `types::RiAbsoluteStr::normalize()` +* Remove some methods of borrowed IRI string types. + + List of removed methods: + - `types::RiReferenceStr::try_resolve_against()` + - `types::RiReferenceStr::try_resolve_whatwg_against()` + - `types::RiReferenceStr::resolve_whatwg_against()` + - `types::RiReferenceStr::try_resolve_normalize_against()` + - `types::RiReferenceStr::resolve_normalize_against()` + - `types::RiReferenceStr::try_resolve_normalize_whatwg_against()` + - `types::RiReferenceStr::resolve_normalize_whatwg_against()` + - `types::RiStr::try_normalize()` + - `types::RiStr::try_normalize_whatwg()` + - `types::RiStr::normalize_whatwg()` + - `types::RiAbsoluteStr::try_normalize()` + - `types::RiAbsoluteStr::try_normalize_whatwg()` + - `types::RiAbsoluteStr::normalize_whatwg()` + - `types::RiRelativeStr::try_resolve_against()` + - `types::RiRelativeStr::try_resolve_whatwg_against()` + - `types::RiRelativeStr::resolve_whatwg_against()` + - `types::RiRelativeStr::try_resolve_normalize_against()` + - `types::RiRelativeStr::resolve_normalize_against()` + - `types::RiRelativeStr::try_resolve_normalize_whatwg_against()` + - `types::RiRelativeStr::resolve_normalize_whatwg_against()` +* Change return type of `{BorrowedIri}::resolve()` method to `normalize::Normalized`. + + List of affected methods: + - `types::RiReferenceStr::resolve_against()` + - `types::RiRelativeStr::resolve_against()` +* Remove `BufferError` type. +* Remove `task` module. + + List of removed items: + - `task::Error` type + - `task::ProcessAndWrite` trait +* Remove deprecated `percent_encoding` module and aliases defined in it. + + List of removed items: + - `percent_encoding` module. + - `percent_encoding::PercentEncoded` type alias. + - `percent_encoding::PercentEncodedForIri` type alias. + - `percent_encoding::PercentEncodedForUri` type alias. +* Change return type of `build::Builder::build()`. + + Now it returns `Result<(), validate::Error>` instead of + `Result<(), build::Error>`. +* Remove `build::Builder::normalize_rfc3986()` and + `build::Builder::normalize_whatwg()` methods. + + Use `build::Builder::normalize()` and + `build::Builder::ensure_rfc3986_normalizable()` instead. +* Remove `build::Error` type. +* Stop accepting user part as `Option<&str>` type for `build::Builder::userinfo` + + Now user part should be non-optional (but possibly empty) `&str`. + +### Changed (non-breaking) +* Make methods of `convert::MappedToUri<'_, T>` generic over the spec. + + Now methods of `convert::MappedToUri<'_, T>` can be called for + `{BorrowedIri}<S> where S: Spec`. +* Reject user with colon characters on IRI build. + + Now `build::Builder::build()` fails when `user` part contains a colon (`:`). +* Allow builders to normalize `path` component of relative IRIs if safely possible. + +## [0.5.6] + +* Fix normalization bug. + + Previously, trailing colon of an authority (with empty port) was not + stripped. Now this is fixed. +* Add `ensure_rfc3986_normalizable()` methods to absolute IRI string types. +* Add IRI builder in `build` module. +* Deprecate `percent_encoding` module in favor of the new name `percent_encode`. + +### Added +* Add `ensure_rfc3986_normalizable()` methods to absolute IRI string types. + + List of added functions: + - `types::RiStr::ensure_rfc3986_normalizable()` + - `types::RiAbsoluteStr::ensure_rfc3986_normalizable()` +* Add IRI builder in `build` module. + + `Builder` type is a builder. + + `DisplayBulid` type is a validated build result (but not yet heap-allocates). + + `PortBuilder` and `UserinfoBuilder` types are intermediate types to + provide convenient generics to component setters. + + `Error` type is a builder error. + + `Buildable` trait indicates the syntax corresponding to the string types + (such as `IriStr` or `UriReferenceStr`) can be constructed by the builder. + +### Fixed +* Fix normalization bug. + + Previously, trailing colon of an authority (with empty port) was not + stripped. Now this is fixed. + +### Changed (non-breaking) +* Deprecate `percent_encoding` module in favor of the new name `percent_encode`. + + Previously exported items are still provided from `percent_encoding` + module to keep backward compatibility. + +## [0.5.5] + +* Add `RiQueryStr` and `RiQueryString` types for query. +* Add functions with `try_` prefix are introduced for normalization and + IRI resolution, and deprecate non-`try` versions. +* Add encoder types for percent-encoding in `percent_encoding` module. + +### Added +* Add `RiQueryStr` and `RiQueryString` types for query. +* Add functions with `try_` prefix are introduced for normalization and resolution. + + List of added functions: + - `types::RiStr::try_normalize()` + - `types::RiStr::try_normalize_whatwg()` + - `types::RiAbsoluteStr::try_normalize()` + - `types::RiAbsoluteStr::try_normalize_whatwg()` + - `resolve::try_resolve()` + - `resolve::try_resolve_whatwg()` + - `resolve::try_resolve_normalize()` + - `resolve::try_resolve_normalize_whatwg()` + - `types::RiReferenceStr::try_resolve_against()` + - `types::RiReferenceStr::try_resolve_normalize_against()` + - `types::RiReferenceStr::try_resolve_whatwg_against()` + - `types::RiReferenceStr::try_resolve_normalize_whatwg_against()` + - `types::RiRelativeStr::try_resolve_against()` + - `types::RiRelativeStr::try_resolve_normalize_against()` + - `types::RiRelativeStr::try_resolve_whatwg_against()` + - `types::RiRelativeStr::try_resolve_normalize_whatwg_against()` +* Add encoder types for percent-encoding in `percent_encoding` module. + + List of added types: + - `percent_encoding::PercentEncoded` + - `percent_encoding::PercentEncodedForIri` (type alias) + - `percent_encoding::PercentEncodedForUri` (type alias) + +### Changed (non-breaking) +* Deprecate non-`try` function names for normalization and resolution. + + List of deprecated functions: + - `types::RiStr::normalize()` + - `types::RiStr::normalize_whatwg()` + - `types::RiAbsoluteStr::normalize()` + - `types::RiAbsoluteStr::normalize_whatwg()` + - `resolve::resolve()` + - `resolve::resolve_whatwg()` + - `resolve::resolve_normalize()` + - `resolve::resolve_normalize_whatwg()` + - `types::RiReferenceStr::resolve_against()` + - `types::RiReferenceStr::resolve_normalize_against()` + - `types::RiReferenceStr::resolve_whatwg_against()` + - `types::RiReferenceStr::resolve_normalize_whatwg_against()` + - `types::RiRiAblosuteStrStr::resolve_against()` + - `types::RiRiAblosuteStrStr::resolve_normalize_against()` + - `types::RiRiAblosuteStrStr::resolve_whatwg_against()` + - `types::RiRiAblosuteStrStr::resolve_normalize_whatwg_against()` + + Use functions with `try_` prefix instead. + +## [0.5.4] + +* Implement IRI resolution and normalization that uses serialization algorithm + [described in WHATWG URL Standard](https://url.spec.whatwg.org/#url-serializing). + +### Added +* Implement IRI resolution and normalization that uses serialization algorithm + [described in WHATWG URL Standard](https://url.spec.whatwg.org/#url-serializing). + + They won't fail even when the input or result is abnormal (but of course + they may still fail on memory shortage). + + The difference between RFC 3986/3987 versions and WHATWG versions is, + handling of absent host and path starting with `//`. The RFC versions fail + since `scheme://not-a-host` is invalid, but WHATWG versions serializes the + result as `scheme:/.//not-a-host`. + + List of added functions: + - `resolve::resolve_whatwg()` + - `resolve::resolve_normalize_whatwg()` + - `normalize::NormalizationTask::enable_normalization()` + - `normalize::NormalizationTask::enable_whatwg_serialization()` + - `types::RiStr::is_normalized_whatwg()` + - `types::RiStr::normalize_whatwg()` + - `types::RiAbsoluteStr::is_normalized_whatwg()` + - `types::RiAbsoluteStr::normalize_whatwg()` + - `types::RiReferenceStr::resolve_normalize_whatwg_against()` + - `types::RiReferenceStr::resolve_whatwg_against()` + - `types::RiRelativeStr::resolve_normalize_whatwg_against()` + - `types::RiRelativeStr::resolve_whatwg_against()` + +## [0.5.3] + +* Decode percent-encoded unreserved characters on normalizaiton. +* Add `is_normalized` method to absolute URI/IRI types. +* Implement more conversion traits from string types to `Cow`, `Box`, `Rc`, and `Arc`. +* Improve documents. + +### Added +* Add `is_normalized` method to absolute URI/IRI types. + + They don't heap-allocate. +* Implement more conversion traits from string types to `Cow`, `Box`, `Rc`, and `Arc`. + + List of added conversions: + - `From<&'a $slice> for Cow<'a, $slice>` + - `From<&'_ $slice> for Box<$slice>` + - `From<&'_ $slice> for Rc<$slice>` + - `From<&'_ $slice> for Arc<$slice>` + - `From<$owned> for Cow<'_, $owned>` + - `From<$owned> for Box<$owned>` + +### Fixed +* Decode percent-encoded unreserved characters on normalizaiton. + + Previous implementation incorrectly leave unreserved percent-encoded + characters as is, but now this is fixed. + + +## [0.5.2] + +* Fix IPvFuture literal parsing again (<https://github.com/lo48576/iri-string/issues/17>). + +### Fixed +* Fix IPvFuture literal parsing again (<https://github.com/lo48576/iri-string/issues/17>). + +## [0.5.1] + +* Add `FixedBaseResolver::base()` method. +* Fix IP literal parsing and decomposition (<https://github.com/lo48576/iri-string/issues/17>). + +### Added +* Add `FixedBaseResolver::base()` method. + +### Fixed +* Fix IP literal parsing and decomposition (<https://github.com/lo48576/iri-string/issues/17>). + +## [0.5.0] + +This entry describes the changes since the previous stable release (v0.4.1). + +* Bump MSRV to 1.58.0. +* Add more conversions from/to IRI string types. + + Implement `TryFrom<&[u8]>` for the IRI string types. + + Implement `From<{owned URI}>` for the owned IRI string types. + + Add `as_slice` method to the owned string types. + + Add `convert::MappedToUri` type. + + Add `encode_to_uri()` method for the IRI string types. + + Add `encode_into_uri()` method for the owned IRI string types. + + Add `as_uri()` method for the borrowed IRI string types. + + Add `try_into_uri()` method for the owned IRI string types. +* Add `capacity()` method to the owned string types. +* Add components getters for borrowed string types. +* Add IRI normalization API and related types. +* Add normalizing variations for IRI resolution. +* Support nostd for IRI resolution. +* Change IRI resolution API incompatibly. + + Change number and types of parameters. + + Change return types. +* Let IRI resolution recognize percent-encoded period during normalization. +* Drop internal dependency to `nom` crate. +* Permit `serde`+`{alloc,std}` without `serde-{alloc,std}`. +* Update examples. + + Improve `parse` example to show more information. + + Add `normalize` example. +* Travis CI is no longer used. + + Checks should be run manually. See README for detail. + +### Added +* Add more conversions from/to IRI string types. + + Implement `TryFrom<&[u8]>` for the IRI string types. + + Implement `From<{owned URI}>` for the owned IRI string types. + + Add `as_slice` method to the owned string types. + + Add `convert::MappedToUri` type. + + Add `encode_to_uri()` method for the IRI string types. + + Add `encode_into_uri()` method for the owned IRI string types. + + Add `as_uri()` method for the borrowed IRI string types. + + Add `try_into_uri()` method for the owned IRI string types. +* Add `capacity()` method to the owned string types. +* Add components getters for borrowed string types. + + Add getters for major components of IRIs/URIs: + `scheme`, `authority`, `path`, and `query`. + + Add types and getters for subcomponents of `authority`: + `userinfo`, `host`, and `port`. + - `components::AuthorityComponents` type and `authority_components` method. +* Add IRI normalization API and related types. + + Add `{RiStr, RiAbsoluteStr}::normalize()` methods. + + Add `normalize::NormalizationTask`. +* Add normalizing variations for IRI resolution. + + `resolve_normalize` for `resolve`. + + `resolve_normalize_against` for `resolve_against`. +* Support nostd for IRI resolution. + + Add `resolve::FixedBaseResolver` to get `normalize::NormalizationTask`. + + Users can write the resolution/normalization result to user-provided buffer by `NormalizationTask`. +* Update examples. + + Add `normalize` example. + +### Changed (breaking) +* Bump MSRV to 1.58.0. + + Rust 1.58.0 is released at 2022-01-13. +* Change IRI resolution API incompatibly. + + Remove `is_strict: bool` parameter from `resolve::resolve()`. + + Make IRI resolution fallible. + - Now IRI resolution returns `Result`. + - For details about possible resolution/normalization failure, see the + documentation for `normalize` module and the issue [How `/..//bar` + should be resolved aganst `scheme:`? + (#8)](https://github.com/lo48576/iri-string/issues/8). + +### Changed (non-breaking) +* Let IRI resolution recognize percent-encoded period during normalization. + + For example, `/%2e%2e/` in the path are now recognized as `/../`, and + handled specially as "parent directory". +* Drop internal dependency to `nom` crate. + + Parsers are rewritten manually, and are now much faster than before. +* Permit `serde`+`{alloc,std}` without `serde-{alloc,std}`. + + Previously, compilation error are intentionally caused when both `serde` + and `{alloc,std}` are enabled but corresponding `serde-{alloc,std}` is not. + + Note that you still need to enable `serde-alloc` or `serde-std` to use + serde support for owned string types. + - This change only intends to support the cases when flags are + independently enabled from different indirect dependencies. +* Update examples. + + Improve `parse` example to show more information. + +## ([0.5.0] from [0.5.0-rc.0]) + +* Travis CI is no longer used. + + Checks should be run manually. See README for detail. + +## [0.5.0-rc.0] + +No more API changes are planned until v0.5.0. + +* Add more conversions from IRI to URI string types. + + Add `as_uri()` method for the borrowed IRI string types. + + Add `try_into_uri()` method for the owned IRI string types. +* Update examples. + + Improve `parse` example to show more information. + + Add `normalize` example. + +### Added +* Add more conversions from IRI to URI string types. + + Add `as_uri()` method for the borrowed IRI string types. + + Add `try_into_uri()` method for the owned IRI string types. +* Update examples. + + Add `normalize` example. + +### Changed (non-breaking) +* Update examples. + + Improve `parse` example to show more information. + +## [0.5.0-beta.4] + +* Add more conversions from/to IRI string types. + + Implement `From<{owned URI}>` for the owned IRI string types. + + Add `as_slice` method to the owned string types. + + Add `convert::MappedToUri` type. + + Add `encode_to_uri()` method for the IRI string types. + + Add `encode_into_uri()` method for the owned IRI string types. +* Refine task API + + Move some methods of `normalize::NormalizationTask` into newly added + `task::ProcessAndWrite` trait. + - `allocate_and_write`, `write_to_byte_slice`, `append_to_std_string`, + and `try_append_to_std_string` is moved. + + Change type parameter of `NormalizationTask` from a spec into a string slice type. + + Change error type for `NormalizationTask`. + + Remove `normalize::create_task()` function. + +### Added +* Add more conversions from/to IRI string types. + + Implement `From<{owned URI}>` for the owned IRI string types. + + Add `as_slice` method to the owned string types. + + Add `convert::MappedToUri` type. + + Add `encode_to_uri()` method for the IRI string types. + + Add `encode_into_uri()` method for the owned IRI string types. + +### Changed (breaking) +* Move some methods of `normalize::NormalizationTask` into newly added + `task::ProcessAndWrite` trait. + + `allocate_and_write`, `write_to_byte_slice`, `append_to_std_string`, + and `try_append_to_std_string` is moved. +* Change type parameter of `NormalizationTask` from a spec into a string slice type. + + Now `NormalizationTask<S>` should be changed to `NormalizationTask<RiStr<S>>` + or `NormalizationTask<RiAbsoluteStr<S>>`. + + This enables the task to return more appropriate type. For example, + returning `&RiAbsoluteStr<S>` rather than `&RiStr<S>` when the input IRI + type is `RiAbsoluteStr<S>`. +* Change error type for `NormalizationTask`. + + Now buffer error and processing error is split to different types. +* Remove `normalize::create_task()` function. + +## [0.5.0-beta.3] + +* Add `normalize` module, and unify it with IRI resolution. + + Move `resolve::{Error, ErrorKind}` to `normalize` module. + + Move and rename `resolve::ResolutionTask` to `normalize::NormalizationTask`. + + Add `normalize::create_task` function. + + Add `{RiStr, RiAbsoluteStr}::normalize()` methods. +* Add normalizing variations for IRI resolution. + +### Added +* Add `normalize` module. + + Add `normalize::create_task()` function. + + Add `{RiStr, RiAbsoluteStr}::normalize()` methods. +* Add normalizing variations for IRI resolution. + + `resolve_normalize` for `resolve`. + + `resolve_normalize_against` for `resolve_against`. + +### Changed (breaking) +* Move `resolve::{Error, ErrorKind}` to `normalize` module. +* Move and rename `resolve::ResolutionTask` to `normalize::NormalizationTask`. + + Now `resolve::FixedBaseResolver::create_task()` returns `NormalizationTask`. + +## [0.5.0-beta.2] + +* Fix a bug that `serde-std` feature did not enable serde support for owned types. + +### Fixed +* Fix a bug that `serde-std` feature did not enable serde support for owned types. + + Now `serde-std` enables `alloc` features automatically. + +## [0.5.0-beta.1] + +* Add getters for major components of IRIs/URIs: `scheme`, `authority`, `path`, and `query`. +* Add types and getters for subcomponents of `authority`: `userinfo`, `host`, and `port`. + + `components::AuthorityComponents` type and `authority_components` method. +* Fix a bug that `serde-std` feature did not enable serde support for owned types. + +### Added +* Add getters for major components of IRIs/URIs: `scheme`, `authority`, `path`, and `query`. + + Method names are `scheme_str`, `authority_str`, `path_str`, and `query_str`, respectively. + + Getters for `fragment` component is already provided. +* Add getter for subcomponents of `authority`: `userinfo`, `host`, and `port`. + + `components::AuthorityComponents` type and `authority_components` method. + +### Fixed +* Fix a bug that `serde-std` feature did not enable serde support for owned types. + + Now `serde-std` enables `alloc` features automatically. + +## [0.5.0-beta.0] + +* Bump MSRV to 1.58.0. +* Add conversion from a byte slice (`&[u8]`) into IRI string types. +* Add `capacity` method to allocated string types. +* Remove `is_strict: bool` parameter from `resolve::resolve()`. +* Add `resolve::FixedBaseResolver`, `resolve::ResolutionTask`, and `resolve::Error` types. + + Some methods for IRI resolution are now available even when `alloc` feature is disabled. + + See [IRI resolution using user-provided buffers (#6)](https://github.com/lo48576/iri-string/issues/6). +* Make IRI resolution fallible. + + Now `resolve()` and its family returns `Result<_, resolve::Error>`. + + See [How `/..//bar` should be resolved aganst `scheme:`? (#8)](https://github.com/lo48576/iri-string/issues/8). +* Make IRI resolution recognize percent-encoded period. + + Now `%2E` and `%2e` in path segment is handled as a plain period `.`. + + See [Recognize percent-encoded periods (`%2E`) during IRI resolution (#9)](https://github.com/lo48576/iri-string/issues/9) +* Make parsers faster. + + See [Make the parsers faster (#7)](https://github.com/lo48576/iri-string/issues/7) +* Drop internal dependency to `nom`. +* Stop emitting compilation error when both `serde` and `std`/`alloc` are enabled + without corresponding `serde-{std,alloc}` features. + +### Added +* Add conversion from a byte slice (`&[u8]`) into IRI string types. +* Add `capacity` method to allocated string types. + + `shrink_to_fit()` and `len()` already exists, so this would be useful to determine + when to do `shrink_to_fit`. +* Add `resolve::FixedBaseResolver`, `resolve::ResolutionTask`, and `resolve::Error` types. + + They provide more efficient and controllable IRI resolution. + + Some methods for IRI resolution are now available even when `alloc` feature is disabled. + +### Changed (breaking) +* Bump MSRV to 1.58.0. + + Rust 1.58.0 is released at 2022-01-13. +* Remove `is_strict: bool` parameter from `resolve::resolve()`. + + The IRI parsers provided by this crate is "strict", so resolution + algorithm should use an algorithm for the strict parser. +* Make IRI resolution fallible. + + Now `resolve()` and its family returns `Result<_, resolve::Error>`. + + For the reasons behind, see crate-level documentation. + + See [How `/..//bar` should be resolved aganst `scheme:`? (#8)](https://github.com/lo48576/iri-string/issues/8). +* Make IRI resolution recognize percent-encoded period. + + Now `%2E` and `%2e` in path segment is handled as a plain period `.`. + + Period is `unreserved` character, and can be escaped at any time + (see [RFC 3986 section 2.4](https://datatracker.ietf.org/doc/html/rfc3986#section-2.4). + This means that `%2E` and `%2e` in the path can be normalized to `.` before IRI resolution, + and thus they should also be handled specially during `remove_dot_segments` algorithm. + + See [Recognize percent-encoded periods (`%2E`) during IRI resolution (#9)](https://github.com/lo48576/iri-string/issues/9) + +### Changed (non-breaking) +* Make parsers faster. + + Parsers are rewritten, and they became very fast! + + Almost all usages are affected: type conversions, validations, and IRI resolutions. + + See [Make the parsers faster (#7)](https://github.com/lo48576/iri-string/issues/7) +* Drop internal dependency to `nom`. + + Parsers are rewritten without `nom`. +* Stop emitting compilation error when both `serde` and `std`/`alloc` are enabled + without corresponding `serde-{std,alloc}` features. + + `serde` and `std`/`alloc` might be enabled independently from the different + indirect dependencies, so this situation should not be compilation error. + +## [0.4.1] + +* Bump internal dependency. + + `nom` from v6 to v7. + +### Changed (non-breaking) +* Bump internal dependency. + + `nom` from v6 to v7. + +## [0.4.0] + +* MSRV is bumped to 1.48.0. +* Internal dependencies are bumped. + + `nom` crate is bumped to 6. +* `serde::{Serialize, Deserialize}` is now implemented only for types with valid spec types. +* Feature flags are refactored. + +### Changed (breaking) +* MSRV is bumped to 1.48.0. + + Rust 1.48.0 is released at 2020-11-19. +* `serde::{Serialize, Deserialize}` is now implemented only for types with valid spec types. + + Strictly this is a breaking change, but this only forbids the meaningless trait impls, + so no real world use cases won't be affected by this change. +* Feature flags are refactored. + + `serde-alloc` and `serde-std` flags are added to control serde's alloc and std support. + + Unintended dependency from `std` use flag to `serde` crate is now fixed. + Users who want to enable `serde` and `std` at the same time should also enable `serde-std` + feature. Same applies for `serde` and `alloc` pair. + +## [0.3.0] + +**This release contains huge changes, and CHANGELOG may be incomplete. +Beleive rustdoc rather than this CHANGELOG.** + +* Minimum supported Rust version is now 1.41 or above. +* Make IRI string types polymorphic, and rename some types. + + Now IRI types and URI types can share the same codebase. + + This makes it easy for users to implement functions for both IRI types and URI types. +* Add URI types. +* Remove `Deref` impls for IRI string types. +* Remove depraceted items. +* Add and change methods for IRI string types. +* `resolve::resolve_iri` is now (more) polymorphic, and renamed to `resolve::resolve`. +* Update some internal dependencies. + + This has no effect for usual users, and this does not introduce any API changes. + + By this change, the crate now successfully compiles with minimal dependency versions. +* Support `no_std` environment. + + `std` and `alloc` feature flags are added. + + `std` feature is enabled by default (and `std` enables `alloc` automatically). + +### Fixes +* Update some internal dependencies to make the crate buildable with minimal dependency versions. + + This has no effect for usual users, and this does not introduce any API changes. + + By this change, the crate now successfully compiles with minimal dependency versions. + - To test that, you can run + `cargo +nightly update -Z minimal-versions && cargo test --all-features`. + +### Changed (breaking) +* Make IRI string types polymorphic, and rename some types. + + Now IRI types and URI types can share the same codebase. + + This makes it easy for users to implement functions for both IRI types and URI types. + + Polymorphic types are named `types::Ri{,Absolute,Fragment,Reference,Relative}Str{,ing}`. + + Type aliases for monomorphized types are also provided, but naming convertions are the same. + They are named `{Iri,Uri}{..}Str{,ing}`. + - For example, there is `IriAbsoluteStr` instead of legacy `AbsoluteIriStr`. + + `types::CreationError` is now revived. + + `types::IriCreationError` is now removed in favor of `types::CreationError`. +* Remove depraceted items. + + `IriReferenceStr::resolve()` is now removed. + Use `IriReferenceStr::resolve_against()` instead. +* Remove `Deref` impls for IRI string types. + + IRI string types should not implement `Deref`, because they are not smart pointer types. +* Change methods types. + + `IriReferenceStr::resolve_against()` now returns `Cow<'_, IriStr>`, rather than `IriString`. +* `resolve::resolve_iri` is now polymorphic, and renamed to `resolve::resolve`. + + Now it can be used for both IRI types and URI types. + +### Changed (non-breaking) +* Support `no_std` environment. + + `std` and `alloc` feature flags are added. + + `std` feature is enabled by default (and `std` enables `alloc` automatically). + + In `no_std` environment with allocator support, you can enable `alloc` feature. +* Add methods for IRI string types. + + `len()` and `is_empty()` methods are added to all IRI string slice types. + + `IriStr::fragment()` is added. + + `RelativeIriStr::resolve_against()` is added. +* Add URI types. + +## [0.2.3] + +* Fixed a bug that URI validators wrongly accepts non-ASCII characters. + + Now they rejects non-ASCII characters correctly. +* Fixed a bug that abnormal URIs (such as `foo://` or `foo:////`) are wrongly rejected. + + Now they are accepted as valid IRIs. + +### Fixes +* Fixed a bug that URI validators wrongly accepts non-ASCII characters + (9b8011f54dab3c2f8da78dc2251353453317d8af). + + Now they rejects non-ASCII characters correctly. +* Fixed a bug that abnormal URIs (such as `foo://` or `foo:////`) are wrongly rejected + (7a40f4b72964d498970a356368dc320917d88e43). + + Now they are accepted as valid IRIs. + + Documents are added to explain why they are valid. + +### Improved +* More tests are added to ensure invalid URIs/IRIs are rejected as expected + (9b8011f54dab3c2f8da78dc2251353453317d8af). + +## [0.2.2] + +* `IriReferenceStr::resolve()` is renamed to `resolve_against()`. + + The old name will be kept until the next minor version bump to keep compatibility. + +### Changed (non-breaking) +* `IriReferenceStr::resolve()` is renamed to `resolve_against()` + (4d64ee9884713644b69b8f227f32637d877a9d5f). + + `resolve()` was an ambiguous name, and people cannot know which `foo.resolve(bar)` means: + "resolve foo against bar" or "foo resolves bar". + + The new name `resolve_against()` is more clear. `foo.resolve_against(bar)` can be natuarally + interpreted as "resolve foo against bar". + + The old name will be kept until the next minor version bump to keep compatibility. + +## [0.2.1] + +* `*Str::new()` methods are added. +* `IriFragmentStr::from_prefixed()` is added. +* `types::CreationError` is renamed to `types::IriCreationError`. + + The old name will be kept until the next minor version bump to keep compatibility. +* Reduced indirect dependencies + +### Added +* `*Str::new()` methods are added (39c8f735ccf6f28aaf2f16dcdc579fb3838bb5fb). + + Previously the string slices are created as `<&FooStr>::try_from(s)` (where `s: &str`), + but this is redundant. + Now `FooStr::new(s)` can be used instead of `<&FooStr>::try_from(s)` for `s: &str`. +* `IriFragmentStr::from_prefixed()` is added (34cec2f422ba8046134668bdb662f69c9db7f52c). + + This creates `IriFragmentStr` from the given string with leading hash (`#`) character. + For example, `IriFragmentStr::from_prefixed("#foo")` is same as `IriFragmentStr::new("foo")`. + +### Changed (non-breaking) +* `types::CreationError` is renamed to `types::IriCreationError` + (c6e930608f158281d059e632ffc6117bddf18ebc, c0e650c5e19f1775cf82960afc9610994afba66e). + + The old name will be kept until the next minor version bump to keep compatibility. +* Disabled `lexical` feature of `nom` crate (a2d5bcd02e02e80af1c4fc8c14d768ca519ef467). + + This reduces indirect dependencies. +* Migrate code generator from proc-macro crate to non-proc-macro one + (363337e720a9fdfa7e17153ffc63192bd49f7cc3). + + This reduces indirect dependencies, and may also reduce compilation time. + +## [0.2.0] + +* Use nom 5.0.0. + + This is non-breaking change. + +## [0.2.0-beta.1] + +* Implement `Clone` and `Copy` for validation error types. +* Let an error type contain source string for conversion from owned string. +* Add `shrink_to_fit()` methods for `types::iri::*String` types. +* Add `set_fragment()` methods for `types::iri::*String` types + (except for `AbsoluteIriString`). +* Add `as_str()` method for `types::iri::*Str` types. +* Add `types::iri::IriFragment{Str,String}` type. +* Move `fragment()` from `IriStr` to `IriReferenceStr`. + +### Changed (non-breaking) +* Implement `Clone` and `Copy` for validation error types + (`validate::{iri,uri}::Error`) (8c6af409963a). + +#### Added +* Add `shrink_to_fit()` methods for `types::iri::*String` types (c8671876229f). +* Add `set_fragment()` methods for `types::iri::*String` types + (except for `AbsoluteIriString`) (5ae09a327d93). +* Add `as_str()` method for `types::iri::*Str` types (0984140105a1). +* Add `types::iri::IriFragment{Str,String}` type (1c5e06192cf8). + + This represents fragment part of an IRI. + +### Changed (breaking) +* `types::iri::{AbsoluteIri,Iri,IriReference,RelativeIri}String::TryFrom<_>` now + returns `types::iri::CreationError` as an error (8c6af409963a). + + `CreationError` owns the source data so that it is not lost on conversion + failure. + + `CreationError::into_source()` returns the source data which cannot be + converted into an IRI type. + + Previously `validate::iri::Error` is used to represent error, but it does + not own the source data. +* Move `fragment()` from `IriStr` to `IriReferenceStr` (1c5e06192cf8). + + `v.fragment()` for `v: &IriStr` is still available thanks to `Deref`. + +## [0.2.0-beta.0] + +Totally rewritten. + +[Unreleased]: <https://github.com/lo48576/iri-string/compare/v0.7.8...develop> +[0.7.8]: <https://github.com/lo48576/iri-string/releases/tag/v0.7.8> +[0.7.7]: <https://github.com/lo48576/iri-string/releases/tag/v0.7.7> +[0.7.6]: <https://github.com/lo48576/iri-string/releases/tag/v0.7.6> +[0.7.5]: <https://github.com/lo48576/iri-string/releases/tag/v0.7.5> +[0.7.4]: <https://github.com/lo48576/iri-string/releases/tag/v0.7.4> +[0.7.3]: <https://github.com/lo48576/iri-string/releases/tag/v0.7.3> +[0.7.2]: <https://github.com/lo48576/iri-string/releases/tag/v0.7.2> +[0.7.1]: <https://github.com/lo48576/iri-string/releases/tag/v0.7.1> +[0.7.0]: <https://github.com/lo48576/iri-string/releases/tag/v0.7.0> +[0.6.0]: <https://github.com/lo48576/iri-string/releases/tag/v0.6.0> +[0.5.6]: <https://github.com/lo48576/iri-string/releases/tag/v0.5.6> +[0.5.5]: <https://github.com/lo48576/iri-string/releases/tag/v0.5.5> +[0.5.4]: <https://github.com/lo48576/iri-string/releases/tag/v0.5.4> +[0.5.3]: <https://github.com/lo48576/iri-string/releases/tag/v0.5.3> +[0.5.2]: <https://github.com/lo48576/iri-string/releases/tag/v0.5.2> +[0.5.1]: <https://github.com/lo48576/iri-string/releases/tag/v0.5.1> +[0.5.0]: <https://github.com/lo48576/iri-string/releases/tag/v0.5.0> +[0.5.0-rc.0]: <https://github.com/lo48576/iri-string/releases/tag/v0.5.0-rc.0> +[0.5.0-beta.4]: <https://github.com/lo48576/iri-string/releases/tag/v0.5.0-beta.4> +[0.5.0-beta.3]: <https://github.com/lo48576/iri-string/releases/tag/v0.5.0-beta.3> +[0.5.0-beta.2]: <https://github.com/lo48576/iri-string/releases/tag/v0.5.0-beta.2> +[0.5.0-beta.1]: <https://github.com/lo48576/iri-string/releases/tag/v0.5.0-beta.1> +[0.5.0-beta.0]: <https://github.com/lo48576/iri-string/releases/tag/v0.5.0-beta.0> +[0.4.1]: <https://github.com/lo48576/iri-string/releases/tag/v0.4.1> +[0.4.0]: <https://github.com/lo48576/iri-string/releases/tag/v0.4.0> +[0.3.0]: <https://github.com/lo48576/iri-string/releases/tag/v0.3.0> +[0.2.3]: <https://github.com/lo48576/iri-string/releases/tag/v0.2.3> +[0.2.2]: <https://github.com/lo48576/iri-string/releases/tag/v0.2.2> +[0.2.1]: <https://github.com/lo48576/iri-string/releases/tag/v0.2.1> +[0.2.0]: <https://github.com/lo48576/iri-string/releases/tag/v0.2.0> +[0.2.0-beta.1]: <https://github.com/lo48576/iri-string/releases/tag/v0.2.0-beta.1> +[0.2.0-beta.0]: <https://github.com/lo48576/iri-string/releases/tag/v0.2.0-beta.0> diff --git a/vendor/iri-string/Cargo.lock b/vendor/iri-string/Cargo.lock new file mode 100644 index 00000000..eff0a735 --- /dev/null +++ b/vendor/iri-string/Cargo.lock @@ -0,0 +1,82 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "iri-string" +version = "0.7.8" +dependencies = [ + "memchr", + "serde", + "serde_test", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "proc-macro2" +version = "1.0.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_test" +version = "1.0.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f901ee573cab6b3060453d2d5f0bae4e6d628c23c0a962ff9b5f1d7c8d4f1ed" +dependencies = [ + "serde", +] + +[[package]] +name = "syn" +version = "2.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" diff --git a/vendor/iri-string/Cargo.toml b/vendor/iri-string/Cargo.toml new file mode 100644 index 00000000..c8611a1f --- /dev/null +++ b/vendor/iri-string/Cargo.toml @@ -0,0 +1,129 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.60" +name = "iri-string" +version = "0.7.8" +authors = ["YOSHIOKA Takuma <nop_thread@nops.red>"] +build = false +autolib = false +autobins = false +autoexamples = false +autotests = false +autobenches = false +description = "IRI as string types" +readme = "README.md" +keywords = [ + "IRI", + "URI", +] +license = "MIT OR Apache-2.0" +repository = "https://github.com/lo48576/iri-string" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = [ + "--cfg", + "docsrs", +] + +[badges.maintenance] +status = "actively-developed" + +[features] +alloc = ["serde?/alloc"] +default = ["std"] +std = [ + "alloc", + "memchr?/std", + "serde?/std", +] + +[lib] +name = "iri_string" +path = "src/lib.rs" +bench = false + +[[example]] +name = "flamegraph-parse" +path = "examples/flamegraph-parse.rs" + +[[example]] +name = "flamegraph-resolve" +path = "examples/flamegraph-resolve.rs" +required-features = ["alloc"] + +[[example]] +name = "normalize" +path = "examples/normalize.rs" +required-features = ["std"] + +[[example]] +name = "parse" +path = "examples/parse.rs" +required-features = ["std"] + +[[example]] +name = "resolve" +path = "examples/resolve.rs" +required-features = ["std"] + +[[test]] +name = "build" +path = "tests/build.rs" + +[[test]] +name = "gh-issues" +path = "tests/gh-issues.rs" + +[[test]] +name = "iri" +path = "tests/iri.rs" + +[[test]] +name = "normalize" +path = "tests/normalize.rs" + +[[test]] +name = "percent_encode" +path = "tests/percent_encode.rs" + +[[test]] +name = "resolve" +path = "tests/resolve.rs" + +[[test]] +name = "serde" +path = "tests/serde.rs" + +[[test]] +name = "string_types_interop" +path = "tests/string_types_interop.rs" + +[[test]] +name = "template" +path = "tests/template.rs" + +[dependencies.memchr] +version = "2.4.1" +optional = true +default-features = false + +[dependencies.serde] +version = "1.0.103" +features = ["derive"] +optional = true +default-features = false + +[dev-dependencies.serde_test] +version = "1.0.104" diff --git a/vendor/iri-string/LICENSE-APACHE.txt b/vendor/iri-string/LICENSE-APACHE.txt new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/vendor/iri-string/LICENSE-APACHE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/iri-string/LICENSE-MIT.txt b/vendor/iri-string/LICENSE-MIT.txt new file mode 100644 index 00000000..9c9285b2 --- /dev/null +++ b/vendor/iri-string/LICENSE-MIT.txt @@ -0,0 +1,25 @@ +Copyright 2019-2024 YOSHIOKA Takuma + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/vendor/iri-string/Makefile.toml b/vendor/iri-string/Makefile.toml new file mode 100644 index 00000000..505763df --- /dev/null +++ b/vendor/iri-string/Makefile.toml @@ -0,0 +1,332 @@ +# # cargo-make config for CI. +# +# ## Variables to maintain +# * `MSRV` +# + Minimal supported rust version. +# + This should be consistent with `package.rust-version` field of `Cargo.toml`. +# +# ## Controllable variables +# * `VERBOSE` +# + If set to non-`false` or non-`0`, `--verbose` options are added for build and test. +# +# ## Profiles +# Profiles are mainly used to change features and dependencies. +# +# * `default` +# + Default features. +# + Up-to-date deps. +# * `feat-none` +# + No features. +# + Up-to-date deps. +# * `feat-alloc` +# + `alloc` feature. +# + Up-to-date deps. +# * `feat-std` +# + `std` feature. +# + Up-to-date deps. +# * `feat-memchr` +# + `memchr` feature. +# + Up-to-date deps. +# * `feat-serde` +# + `serde` feature. +# + Up-to-date deps. +# * `feat-serde-alloc` +# + `serde` and `alloc` features. +# + Up-to-date deps. +# * `feat-serde-std` +# + `serde` and `std` features. +# + Up-to-date deps. +# * `feat-all` +# + All features. +# + Up-to-date deps. +# * `minimal-deps-feat-none` +# + No features. +# + Minimal versions deps. +# * `minimal-deps-default` +# + Default features. +# + Minimal versions deps. +# * `minimal-deps-feat-all` +# + All features. +# + Minimal versions deps. +# +# ## Tasks +# +# ### For CLI +# * `manual-ci-all` +# + Invokes all required CI tasks, with required rust versions. +# * `ci-all-profiles` +# + Invokes required CI tasks for all profiles. +# * `ci-all-profiles` +# + Invokes required CI tasks for all profiles. +# +# ### For automated CI +# * `ci-once` +# + CI task that should be run at least once for one commit. +# + This does not need to be run with multiple tasks, or +# with multiple rust versions. +# * `ci` +# + CI task that should (or recommended to) be run for each profiles and/or +# for each toolchain. + +################################################################ + +[env] +# Minimal supported rust version. +MSRV = "1.60.0" + +FEATURES = "" +ALL_FEATURES = false +# `FEATURES = "default,..."` is recommended to enable default features. +# `ALL_FEATURES = true` does NOT require this to be `false`. +NO_DEFAULT_FEATURES = true +USE_MINIMAL_DEPENDENCIES = false + +[env.default] +FEATURES = "default" + +[env.feat-none] +FEATURES = "" + +[env.feat-alloc] +FEATURES = "alloc" + +[env.feat-std] +FEATURES = "std" + +[env.feat-memchr] +FEATURES = "memchr" + +[env.feat-serde] +FEATURES = "serde" + +[env.feat-serde-alloc] +FEATURES = "serde,alloc" + +[env.feat-serde-std] +FEATURES = "serde,std" + +[env.feat-all] +ALL_FEATURES = true + +[env.minimal-deps-feat-none] +FEATURES = "" +USE_MINIMAL_DEPENDENCIES = true + +[env.minimal-deps-feat-default] +FEATURES = "default" +USE_MINIMAL_DEPENDENCIES = true + +[env.minimal-deps-feat-all] +ALL_FEATURES = true +USE_MINIMAL_DEPENDENCIES = true + +################################################################ + +# For manual invocation from CLI. +[tasks.manual-ci-all] +VERBOSE = { value = "false", condition = { env_not_set = ["VERBOSE"] } } +script = [ +''' +cargo +${MSRV} make ci-once +cargo +${MSRV} make ci-all-profiles +cargo +stable make --profile default ci +cargo +stable make --profile feat-all ci +cargo +stable make --profile minimal-deps-feat-all ci +cargo +beta make --profile feat-all ci +cargo +nightly make --profile feat-all ci +cargo +nightly make --profile default check-miri +cargo +nightly make --profile feat-none check-miri +''' +] + +# For manual invocation from CLI. +[tasks.ci-all-profiles] +VERBOSE = { value = "false", condition = { env_not_set = ["VERBOSE"] } } +script = [ +''' +cargo make --profile default ci +cargo make --profile feat-none ci +cargo make --profile feat-alloc ci +cargo make --profile feat-std ci +cargo make --profile feat-memchr ci +cargo make --profile feat-serde ci +cargo make --profile feat-serde-alloc ci +cargo make --profile feat-serde-std ci +cargo make --profile feat-all ci +cargo make --profile minimal-deps-feat-none ci +cargo make --profile minimal-deps-feat-default ci +cargo make --profile minimal-deps-feat-all ci +''' +] + +# CI tasks to run only once per commit. +# Recommended to be run before `ci` task. +[tasks.ci-once] +dependencies = [ + "print-makers-env", + "ci-once-check", +] + +# CI tasks to run per profile. +[tasks.ci] +dependencies = [ + "print-makers-env", + "print-rust-version", + "prepare-dependencies", + "ci-check", + "ci-build", + "ci-test", + { name = "check-miri", condition = { channels = ["nightly"] } }, +] + +[tasks.ci-once-check] +dependencies = [ + "check-rustfmt", +] + +[tasks.ci-check] +dependencies = [ + "check-clippy", +] + +[tasks.ci-build] +dependencies = [ + "build", +] + +[tasks.ci-test] +dependencies = [ + "test", +] + +[tasks.print-makers-env] +script = [ +''' +echo "Environment:" +echo " PROFILE_NAME: ${CARGO_MAKE_PROFILE}" +echo " ALL_FEATURES: ${ALL_FEATURES}" +echo " NO_DEFAULT_FEATURES: ${NO_DEFAULT_FEATURES}" +echo " FEATURES: ${FEATURES}" +echo " VERBOSE: ${VERBOSE:-}" +echo " Rust version: ${CARGO_MAKE_RUST_VERSION}" +echo " Rust channel: ${CARGO_MAKE_RUST_CHANNEL}" +''' +] + +[tasks.prepare-dependencies] +run_task = [ + { name = "prepare-minimal-dependencies", condition = { env_true = ["USE_MINIMAL_DEPENDENCIES"] } }, + { name = "prepare-latest-dependencies" }, +] + +[tasks.prepare-minimal-dependencies] +condition = { env_true = ["USE_MINIMAL_DEPENDENCIES"] } +toolchain = "nightly" +command = "cargo" +args = ["update", "-Z", "minimal-versions"] + +[tasks.prepare-latest-dependencies] +command = "cargo" +args = ["update"] + +[tasks.cargo-clean] +command = "cargo" +args = ["clean"] + +[tasks.check-rustfmt] +run_task = [ + { name = "check-rustfmt-strict", condition = { rust_version = { equal = "$MSRV" } } }, + { name = "check-rustfmt-ignore-errors" }, +] + +[tasks.check-rustfmt-strict] +dependencies = ["print-rustfmt-version"] +command = "cargo" +args = ["fmt", "--all", "--", "--check"] + +[tasks.check-rustfmt-ignore-errors] +dependencies = ["print-rustfmt-version"] +command = "cargo" +args = ["fmt", "--all", "--", "--check"] + +[tasks.print-rustfmt-version] +install_crate = { rustup_component_name = "rustfmt" } +command = "cargo" +args = ["fmt", "--version"] + +[tasks.check-miri] +dependencies = ["print-miri-version", "cargo-clean"] +toolchain = "nightly" +command = "cargo" +args = [ + "miri", + "test", + "@@decode(NO_DEFAULT_FEATURES,true,--no-default-features,)", + "@@decode(ALL_FEATURES,true,--all-features,)", + "@@decode(FEATURES,,,--features=${FEATURES})", +] + +[tasks.print-miri-version] +install_crate = { rustup_component_name = "miri" } +toolchain = "nightly" +command = "cargo" +args = ["miri", "--version"] + +[tasks.check-clippy] +run_task = [ + { name = "check-clippy-strict", condition = { rust_version = { equal = "$MSRV" } } }, + { name = "check-clippy-ignore-errors" }, +] + +[tasks.check-clippy-strict] +dependencies = ["print-clippy-version"] +command = "cargo" +args = [ + "clippy", + "@@decode(NO_DEFAULT_FEATURES,true,--no-default-features,)", + "@@decode(ALL_FEATURES,true,--all-features,)", + "@@decode(FEATURES,,,--features=${FEATURES})", + "--", + "--deny", + "warnings", +] + +[tasks.check-clippy-ignore-errors] +dependencies = ["print-clippy-version"] +command = "cargo" +args = [ + "clippy", + "@@decode(NO_DEFAULT_FEATURES,true,--no-default-features,)", + "@@decode(ALL_FEATURES,true,--all-features,)", + "@@decode(FEATURES,,,--features=${FEATURES})", +] +ignore_errors = true + +[tasks.print-clippy-version] +install_crate = { rustup_component_name = "clippy" } +command = "cargo" +args = ["clippy", "--version"] + +[tasks.print-rust-version] +command = "rustc" +args = ["--version"] + +[tasks.build] +command = "cargo" +args = [ + "build", + "@@decode(NO_DEFAULT_FEATURES,true,--no-default-features,)", + "@@decode(ALL_FEATURES,true,--all-features,)", + "@@decode(FEATURES,,,--features=${FEATURES})", + "@@decode(VERBOSE,false,,0,,--verbose)", +] + +[tasks.test] +command = "cargo" +args = [ + "test", + "@@decode(NO_DEFAULT_FEATURES,true,--no-default-features,)", + "@@decode(ALL_FEATURES,true,--all-features,)", + "@@decode(FEATURES,,,--features=${FEATURES})", + "@@decode(VERBOSE,false,,0,,--verbose)", +] diff --git a/vendor/iri-string/README.md b/vendor/iri-string/README.md new file mode 100644 index 00000000..01c7607e --- /dev/null +++ b/vendor/iri-string/README.md @@ -0,0 +1,71 @@ +# iri-string + +[](https://crates.io/crates/iri-string) +[](https://docs.rs/iri-string) + + +String types for [IRI](https://tools.ietf.org/html/rfc3987)s (Internationalized Resource +Identifiers) and [URI](https://tools.ietf.org/html/rfc3986)s (Uniform Resource Identifiers). + +See the [documentation](https://docs.rs/iri-string) for details. + +## Features + +* `no_std` support. +* String types (both owned and borrowed) for RFC 3986 URIs and RFC 3987 IRIs. + + Native slice types, so highly operable with `Cow`, `ToOwned`, etc. + + URIs/IRIs validation. + + Conversions between URIs and IRIs. + + Decomposition into components. +* IRI reference resolution algorithm. +* IRI normalization algorithm. +* Masking password part of an IRI (optional and not automatic). +* Percent encoding of user-provided strings. +* IRI builder. +* RFC 6570 URI Template. + +### Feature flags + +#### Direct +* `alloc` (enabled by default) + + Enables types and functions which require memory allocation. + + Requires `std` or `alloc` crate available. +* `std` (enabled by default) + + Enables all `std` features (such as memory allocations and `std::error::Error` trait). + + Requires `std` crate available. + + This automatically enables `alloc` feature. + +#### memchr +* `memchr` + + Enables optimization for internal parsers, using [`memchr`] crate. + +[`memchr`]: https://crates.io/crates/memchr + +#### serde +* `serde` + + Implements `Serialize` and `Deserialize` traits for string types. + +## CI + +CI must pass on `develop` and `master` branches. +No automated online CI is set up (since they consumes credit too fast), so run +`cargo make manual-ci-all` locally before committing to these branches. +On other branches, tests and some lints (such as `dead_code`) are allowed to +fail, but all commits must be successfully compilable and must be formatted. + +## License + +Licensed under either of + +* Apache License, Version 2.0, ([LICENSE-APACHE.txt](LICENSE-APACHE.txt) or + <https://www.apache.org/licenses/LICENSE-2.0>) +* MIT license ([LICENSE-MIT.txt](LICENSE-MIT.txt) or + <https://opensource.org/licenses/MIT>) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. diff --git a/vendor/iri-string/examples/flamegraph-parse.rs b/vendor/iri-string/examples/flamegraph-parse.rs new file mode 100644 index 00000000..a0749318 --- /dev/null +++ b/vendor/iri-string/examples/flamegraph-parse.rs @@ -0,0 +1,26 @@ +use iri_string::types::IriReferenceStr; + +fn main() { + for _ in 0..1000000 { + let s = concat!( + "scheme://user:pw@sub.example.com:8080/a/b/c/%30/%31/%32%33%34", + "/foo/foo/../../../foo.foo/foo/foo/././././//////foo", + "/\u{03B1}\u{03B2}\u{03B3}/\u{03B1}\u{03B2}\u{03B3}/\u{03B1}\u{03B2}\u{03B3}", + "?k1=v1&k2=v2&k3=v3#fragment" + ); + + let domain = "scheme://sub.sub.sub.example.com:8080/a/b/c"; + let v4 = "scheme://198.51.100.23:8080/a/b/c"; + let v6 = "scheme://[2001:db8:0123::cafe]:8080/a/b/c"; + let v6v4 = "scheme://[2001:db8::198.51.100.23]:8080/a/b/c"; + let vfuture = "scheme://[v2.ipv2-does-not-exist]:8080/a/b/c"; + let _ = ( + IriReferenceStr::new(s), + IriReferenceStr::new(domain), + IriReferenceStr::new(v4), + IriReferenceStr::new(v6), + IriReferenceStr::new(v6v4), + IriReferenceStr::new(vfuture), + ); + } +} diff --git a/vendor/iri-string/examples/flamegraph-resolve.rs b/vendor/iri-string/examples/flamegraph-resolve.rs new file mode 100644 index 00000000..c22dfa6a --- /dev/null +++ b/vendor/iri-string/examples/flamegraph-resolve.rs @@ -0,0 +1,17 @@ +#![cfg(feature = "alloc")] +use iri_string::format::ToDedicatedString; +use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; + +fn main() { + let base = IriAbsoluteStr::new("https://sub.example.com/foo1/foo2/foo3/foo4/foo5") + .expect("should be valid IRI"); + let rel = IriReferenceStr::new(concat!( + "bar1/bar2/bar3/../bar4/../../bar5/bar6/bar7/../../../../..", + "/bar8/../../../bar9/././././././bar10/bar11", + )) + .expect("should be valid IRI"); + for _ in 0..1000000 { + let resolved = rel.resolve_against(base).to_dedicated_string(); + drop(resolved); + } +} diff --git a/vendor/iri-string/examples/normalize.rs b/vendor/iri-string/examples/normalize.rs new file mode 100644 index 00000000..2b28935f --- /dev/null +++ b/vendor/iri-string/examples/normalize.rs @@ -0,0 +1,145 @@ +//! An example to normalize an IRI from the CLI argument. + +use iri_string::format::ToDedicatedString; +use iri_string::types::{RiStr, RiString}; + +const USAGE: &str = "\ +USAGE: + normalize [FLAGS] [--] IRI + +FLAGS: + -h, --help Prints this help + -i, --iri Handle the input as an IRI (RFC 3987) + -u, --uri Handle the input as an URI (RFC 3986) + -a, --ascii Converts the output to an URI (RFC 3986) + -w, --whatwg Serialize normalization result according to WHATWG URL Standard. + +ARGS: + <IRI> IRI +"; + +fn print_help() { + eprintln!("{USAGE}"); +} + +fn help_and_exit() -> ! { + print_help(); + std::process::exit(1); +} + +fn die(msg: impl std::fmt::Display) -> ! { + eprintln!("ERROR: {msg}"); + eprintln!(); + print_help(); + std::process::exit(1); +} + +/// Syntax specification. +#[derive(Debug, Clone, Copy)] +enum Spec { + /// RFC 3986 URI. + Uri, + /// RFC 3987 IRI. + Iri, +} + +impl Default for Spec { + #[inline] + fn default() -> Self { + Self::Iri + } +} + +/// CLI options. +#[derive(Default, Debug, Clone)] +struct CliOpt { + /// IRI. + iri: String, + /// Syntax spec. + spec: Spec, + /// Whether to convert output to ASCII URI or not. + output_ascii: bool, + /// Whether to serialize in WHATWG URL Standard way. + whatwg_serialization: bool, +} + +impl CliOpt { + fn parse() -> Self { + let mut args = std::env::args(); + // Skip `argv[0]`. + args.next(); + + let mut iri = None; + let mut spec = None; + let mut output_ascii = false; + let mut whatwg_serialization = false; + + for arg in args.by_ref() { + match arg.as_str() { + "--ascii" | "-a" => output_ascii = true, + "--iri" | "-i" => spec = Some(Spec::Iri), + "--uri" | "-u" => spec = Some(Spec::Uri), + "--whatwg" | "-w" => whatwg_serialization = true, + "--help" | "-h" => help_and_exit(), + opt if opt.starts_with('-') => die(format_args!("Unknown option: {opt}")), + _ => { + if iri.replace(arg).is_some() { + die("IRI can be specified at most once"); + } + } + } + } + + for arg in args { + if iri.replace(arg).is_some() { + eprintln!("ERROR: IRI can be specified at most once"); + } + } + + let iri = iri.unwrap_or_else(|| die("IRI should be specified")); + let spec = spec.unwrap_or_default(); + Self { + iri, + spec, + output_ascii, + whatwg_serialization, + } + } +} + +fn main() { + let opt = CliOpt::parse(); + + match opt.spec { + Spec::Iri => process_iri(&opt), + Spec::Uri => process_uri(&opt), + } +} + +fn process_iri(opt: &CliOpt) { + let mut normalized = normalize::<iri_string::spec::IriSpec>(opt); + if opt.output_ascii { + normalized.encode_to_uri_inline(); + } + println!("{normalized}"); +} + +fn process_uri(opt: &CliOpt) { + let normalized = normalize::<iri_string::spec::UriSpec>(opt); + println!("{normalized}"); +} + +fn normalize<S: iri_string::spec::Spec>(opt: &CliOpt) -> RiString<S> { + let raw = &opt.iri.as_str(); + let iri = match RiStr::<S>::new(raw) { + Ok(v) => v, + Err(e) => die(format_args!("Failed to parse {raw:?}: {e:?}")), + }; + let normalized = iri.normalize(); + if !opt.whatwg_serialization { + if let Err(e) = normalized.ensure_rfc3986_normalizable() { + die(format_args!("Failed to normalize: {e:?}")); + } + } + normalized.to_dedicated_string() +} diff --git a/vendor/iri-string/examples/parse.rs b/vendor/iri-string/examples/parse.rs new file mode 100644 index 00000000..b447e461 --- /dev/null +++ b/vendor/iri-string/examples/parse.rs @@ -0,0 +1,159 @@ +//! An example to parse IRI from the CLI argument. + +use iri_string::types::{IriStr, RiReferenceStr, RiStr}; + +const USAGE: &str = "\ +USAGE: + parse [FLAGS] [--] IRI + +FLAGS: + -h, --help Prints this help + -i, --iri Handle the input as an IRI (RFC 3987) + -u, --uri Handle the input as an URI (RFC 3986) + +ARGS: + <IRI> IRI or URI +"; + +fn print_help() { + eprintln!("{}", USAGE); +} + +fn help_and_exit() -> ! { + print_help(); + std::process::exit(1); +} + +fn die(msg: impl std::fmt::Display) -> ! { + eprintln!("ERROR: {}", msg); + eprintln!(); + print_help(); + std::process::exit(1); +} + +/// Syntax specification. +#[derive(Debug, Clone, Copy)] +enum Spec { + /// RFC 3986 URI. + Uri, + /// RFC 3987 IRI. + Iri, +} + +impl Default for Spec { + #[inline] + fn default() -> Self { + Self::Iri + } +} + +/// CLI options. +#[derive(Default, Debug, Clone)] +struct CliOpt { + /// IRI. + iri: String, + /// Syntax spec. + spec: Spec, +} + +impl CliOpt { + fn parse() -> Self { + let mut args = std::env::args(); + // Skip `argv[0]`. + args.next(); + + let mut iri = None; + let mut spec = None; + + for arg in args.by_ref() { + match arg.as_str() { + "--iri" | "-i" => spec = Some(Spec::Iri), + "--uri" | "-u" => spec = Some(Spec::Uri), + "--help" | "-h" => help_and_exit(), + opt if opt.starts_with('-') => die(format_args!("Unknown option: {}", opt)), + _ => { + if iri.replace(arg).is_some() { + die("IRI can be specified at most once"); + } + } + } + } + + for arg in args { + if iri.replace(arg).is_some() { + eprintln!("ERROR: IRI can be specified at most once"); + } + } + + let iri = iri.unwrap_or_else(|| die("IRI should be specified")); + let spec = spec.unwrap_or_default(); + Self { iri, spec } + } +} + +fn main() { + let opt = CliOpt::parse(); + + match opt.spec { + Spec::Iri => parse_iri(&opt), + Spec::Uri => parse_uri(&opt), + } +} + +fn parse_iri(opt: &CliOpt) { + let iri = parse::<iri_string::spec::IriSpec>(opt); + let uri = iri.encode_to_uri(); + println!("ASCII: {:?}", uri); +} + +fn parse_uri(opt: &CliOpt) { + let iri = parse::<iri_string::spec::UriSpec>(opt); + println!("ASCII: {:?}", iri); +} + +fn parse<S: iri_string::spec::Spec>(opt: &CliOpt) -> &RiReferenceStr<S> +where + RiStr<S>: AsRef<RiStr<iri_string::spec::IriSpec>>, +{ + let raw = &opt.iri.as_str(); + let iri = match RiReferenceStr::<S>::new(raw) { + Ok(v) => v, + Err(e) => die(format_args!("Failed to parse {:?}: {}", raw, e)), + }; + println!("Successfully parsed: {:?}", iri); + + let absolute = iri.to_iri().ok(); + match absolute { + Some(_) => println!("IRI is ablolute."), + None => println!("IRI is relative."), + } + + print_components(iri); + if let Some(absolute) = absolute { + print_normalized(absolute.as_ref()); + } + + iri +} + +fn print_components<S: iri_string::spec::Spec>(iri: &RiReferenceStr<S>) { + println!("scheme: {:?}", iri.scheme_str()); + println!("authority: {:?}", iri.authority_str()); + if let Some(components) = iri.authority_components() { + println!(" userinfo: {:?}", components.userinfo()); + println!(" host: {:?}", components.host()); + println!(" port: {:?}", components.port()); + } + println!("path: {:?}", iri.path_str()); + println!("query: {:?}", iri.query_str()); + println!("fragment: {:?}", iri.fragment()); +} + +pub fn print_normalized(iri: &IriStr) { + println!("is_normalized_rfc3986: {}", iri.is_normalized_rfc3986()); + println!( + "is_normalized_but_authorityless_relative_path_preserved: {}", + iri.is_normalized_but_authorityless_relative_path_preserved() + ); + println!("normalized: {}", iri.normalize()); +} diff --git a/vendor/iri-string/examples/resolve.rs b/vendor/iri-string/examples/resolve.rs new file mode 100644 index 00000000..7316cf71 --- /dev/null +++ b/vendor/iri-string/examples/resolve.rs @@ -0,0 +1,154 @@ +//! An example to parse IRI from the CLI argument. + +use iri_string::types::{RiAbsoluteStr, RiReferenceStr}; + +const USAGE: &str = "\ +USAGE: + resolve [FLAGS] [--] BASE REFERENCE + +FLAGS: + -h, --help Prints this help + -i, --iri Handle the input as an IRI (RFC 3987) + -u, --uri Handle the input as an URI (RFC 3986) + -w, --whatwg Serialize normalization result according to WHATWG URL Standard. + +ARGS: + <BASE> Base IRI or URI to resolve REFERENCE against + <REFERENCE> IRI or URI to resolve +"; + +fn print_help() { + eprintln!("{}", USAGE); +} + +fn help_and_exit() -> ! { + print_help(); + std::process::exit(1); +} + +fn die(msg: impl std::fmt::Display) -> ! { + eprintln!("ERROR: {}", msg); + eprintln!(); + print_help(); + std::process::exit(1); +} + +/// Syntax specification. +#[derive(Debug, Clone, Copy)] +enum Spec { + /// RFC 3986 URI. + Uri, + /// RFC 3987 IRI. + Iri, +} + +impl Default for Spec { + #[inline] + fn default() -> Self { + Self::Iri + } +} + +/// CLI options. +#[derive(Default, Debug, Clone)] +struct CliOpt { + /// Base IRI. + base: String, + /// Reference IRI. + reference: String, + /// Syntax spec. + spec: Spec, + /// Whether to serialize in WHATWG URL Standard way. + whatwg_serialization: bool, +} + +impl CliOpt { + fn parse() -> Self { + let mut args = std::env::args(); + // Skip `argv[0]`. + args.next(); + + let mut base = None; + let mut reference = None; + let mut spec = None; + let mut whatwg_serialization = false; + + for arg in args.by_ref() { + match arg.as_str() { + "--iri" | "-i" => spec = Some(Spec::Iri), + "--uri" | "-u" => spec = Some(Spec::Uri), + "--whatwg" | "-w" => whatwg_serialization = true, + "--help" | "-h" => help_and_exit(), + opt if opt.starts_with('-') => die(format_args!("Unknown option: {}", opt)), + _ => { + if base.is_none() { + base = Some(arg); + } else if reference.is_none() { + reference = Some(arg); + } else { + die("IRI can be specified at most twice"); + } + } + } + } + + for arg in args { + if base.is_none() { + base = Some(arg); + } else if reference.is_none() { + reference = Some(arg); + } else { + die("IRI can be specified at most twice"); + } + } + + let base = base.unwrap_or_else(|| die("Base IRI should be specified")); + let reference = reference.unwrap_or_else(|| die("Reference IRI should be specified")); + let spec = spec.unwrap_or_default(); + Self { + base, + reference, + spec, + whatwg_serialization, + } + } +} + +fn main() { + let opt = CliOpt::parse(); + + match opt.spec { + Spec::Iri => parse::<iri_string::spec::IriSpec>(&opt), + Spec::Uri => parse::<iri_string::spec::UriSpec>(&opt), + } +} + +fn parse<S: iri_string::spec::Spec>(opt: &CliOpt) { + let base_raw = &opt.base.as_str(); + let reference_raw = &opt.reference.as_str(); + let base = match RiAbsoluteStr::<S>::new(base_raw) { + Ok(v) => v, + Err(e) => die(format_args!( + "Failed to parse {:?} as an IRI (without fragment): {}", + reference_raw, e + )), + }; + let reference = match RiReferenceStr::<S>::new(reference_raw) { + Ok(v) => v, + Err(e) => die(format_args!( + "Failed to parse {:?} as an IRI reference: {}", + reference_raw, e + )), + }; + + let resolved = reference.resolve_against(base); + if !opt.whatwg_serialization { + if let Err(e) = resolved.ensure_rfc3986_normalizable() { + die(format_args!( + "Failed to resolve {:?} against {:?}: {}", + reference_raw, base_raw, e + )); + } + } + println!("{}", resolved); +} diff --git a/vendor/iri-string/src/build.rs b/vendor/iri-string/src/build.rs new file mode 100644 index 00000000..39a57017 --- /dev/null +++ b/vendor/iri-string/src/build.rs @@ -0,0 +1,1234 @@ +//! URI/IRI builder. +//! +//! See the documentation of [`Builder`] type. + +use core::fmt::{self, Display as _, Write as _}; +use core::marker::PhantomData; + +#[cfg(feature = "alloc")] +use alloc::collections::TryReserveError; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::ToString; + +use crate::format::Censored; +#[cfg(feature = "alloc")] +use crate::format::{ToDedicatedString, ToStringFallible}; +use crate::normalize::{self, NormalizationMode, PathCharacteristic, PctCaseNormalized}; +use crate::parser::str::{find_split, prior_byte2}; +use crate::parser::validate as parser; +use crate::spec::Spec; +use crate::types::{RiAbsoluteStr, RiReferenceStr, RiRelativeStr, RiStr}; +#[cfg(feature = "alloc")] +use crate::types::{RiAbsoluteString, RiReferenceString, RiRelativeString, RiString}; +use crate::validate::Error; + +/// Port builder. +/// +/// This type is intended to be created by `From` trait implementations, and +/// to be passed to [`Builder::port`] method. +#[derive(Debug, Clone)] +pub struct PortBuilder<'a>(PortBuilderRepr<'a>); + +impl Default for PortBuilder<'_> { + #[inline] + fn default() -> Self { + Self(PortBuilderRepr::Empty) + } +} + +impl From<u8> for PortBuilder<'_> { + #[inline] + fn from(v: u8) -> Self { + Self(PortBuilderRepr::Integer(v.into())) + } +} + +impl From<u16> for PortBuilder<'_> { + #[inline] + fn from(v: u16) -> Self { + Self(PortBuilderRepr::Integer(v)) + } +} + +impl<'a> From<&'a str> for PortBuilder<'a> { + #[inline] + fn from(v: &'a str) -> Self { + Self(PortBuilderRepr::String(v)) + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<&'a alloc::string::String> for PortBuilder<'a> { + #[inline] + fn from(v: &'a alloc::string::String) -> Self { + Self(PortBuilderRepr::String(v.as_str())) + } +} + +/// Internal representation of a port builder. +#[derive(Debug, Clone, Copy)] +#[non_exhaustive] +enum PortBuilderRepr<'a> { + /// Empty port. + Empty, + /// Port as an integer. + /// + /// Note that RFC 3986 accepts any number of digits as a port, but + /// practically (at least in TCP/IP) `u16` is enough. + Integer(u16), + /// Port as a string. + String(&'a str), +} + +/// Userinfo builder. +/// +/// This type is intended to be created by `From` trait implementations, and +/// to be passed to [`Builder::userinfo`] method. +#[derive(Clone)] +pub struct UserinfoBuilder<'a>(UserinfoRepr<'a>); + +impl Default for UserinfoBuilder<'_> { + #[inline] + fn default() -> Self { + Self(UserinfoRepr::None) + } +} + +impl fmt::Debug for UserinfoBuilder<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut debug = f.debug_struct("UserinfoBuilder"); + if let Some((user, password)) = self.to_user_password() { + debug.field("user", &user); + // > Applications should not render as clear text any data after + // > the first colon (":") character found within a userinfo + // > subcomponent unless the data after the colon is the empty + // > string (indicating no password). + if matches!(password, None | Some("")) { + debug.field("password", &password); + } else { + debug.field("password", &Some(Censored)); + } + } + debug.finish() + } +} + +impl<'a> UserinfoBuilder<'a> { + /// Decomposes the userinfo into `user` and `password`. + #[must_use] + fn to_user_password(&self) -> Option<(&'a str, Option<&'a str>)> { + match &self.0 { + UserinfoRepr::None => None, + UserinfoRepr::Direct(s) => match find_split(s, b':') { + None => Some((s, None)), + Some((user, password)) => Some((user, Some(password))), + }, + UserinfoRepr::UserPass(user, password) => Some((*user, *password)), + } + } +} + +impl<'a> From<&'a str> for UserinfoBuilder<'a> { + #[inline] + fn from(direct: &'a str) -> Self { + Self(UserinfoRepr::Direct(direct)) + } +} + +impl<'a> From<(&'a str, &'a str)> for UserinfoBuilder<'a> { + #[inline] + fn from((user, password): (&'a str, &'a str)) -> Self { + Self(UserinfoRepr::UserPass(user, Some(password))) + } +} + +impl<'a> From<(&'a str, Option<&'a str>)> for UserinfoBuilder<'a> { + #[inline] + fn from((user, password): (&'a str, Option<&'a str>)) -> Self { + Self(UserinfoRepr::UserPass(user, password)) + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<&'a alloc::string::String> for UserinfoBuilder<'a> { + #[inline] + fn from(v: &'a alloc::string::String) -> Self { + Self::from(v.as_str()) + } +} + +/// Internal representation of a userinfo builder. +#[derive(Clone, Copy)] +enum UserinfoRepr<'a> { + /// Not specified (absent). + None, + /// Direct `userinfo` content. + Direct(&'a str), + /// User name and password. + UserPass(&'a str, Option<&'a str>), +} + +/// URI/IRI authority builder. +#[derive(Default, Debug, Clone)] +struct AuthorityBuilder<'a> { + /// Host. + host: HostRepr<'a>, + /// Port. + port: PortBuilder<'a>, + /// Userinfo. + userinfo: UserinfoBuilder<'a>, +} + +impl AuthorityBuilder<'_> { + /// Writes the authority to the given formatter. + fn fmt_write_to<S: Spec>(&self, f: &mut fmt::Formatter<'_>, normalize: bool) -> fmt::Result { + match &self.userinfo.0 { + UserinfoRepr::None => {} + UserinfoRepr::Direct(userinfo) => { + if normalize { + PctCaseNormalized::<S>::new(userinfo).fmt(f)?; + } else { + userinfo.fmt(f)?; + } + f.write_char('@')?; + } + UserinfoRepr::UserPass(user, password) => { + if normalize { + PctCaseNormalized::<S>::new(user).fmt(f)?; + } else { + f.write_str(user)?; + } + if let Some(password) = password { + f.write_char(':')?; + if normalize { + PctCaseNormalized::<S>::new(password).fmt(f)?; + } else { + password.fmt(f)?; + } + } + f.write_char('@')?; + } + } + + match self.host { + HostRepr::String(host) => { + if normalize { + normalize::normalize_host_port::<S>(f, host)?; + } else { + f.write_str(host)?; + } + } + #[cfg(feature = "std")] + HostRepr::IpAddr(ipaddr) => match ipaddr { + std::net::IpAddr::V4(v) => v.fmt(f)?, + std::net::IpAddr::V6(v) => write!(f, "[{v}]")?, + }, + } + + match self.port.0 { + PortBuilderRepr::Empty => {} + PortBuilderRepr::Integer(v) => write!(f, ":{v}")?, + PortBuilderRepr::String(v) => { + // Omit empty port if the normalization is enabled. + if !(v.is_empty() && normalize) { + write!(f, ":{v}")?; + } + } + } + + Ok(()) + } +} + +/// Host representation. +#[derive(Debug, Clone, Copy)] +enum HostRepr<'a> { + /// Direct string representation. + String(&'a str), + #[cfg(feature = "std")] + /// Dedicated IP address type. + IpAddr(std::net::IpAddr), +} + +impl Default for HostRepr<'_> { + #[inline] + fn default() -> Self { + Self::String("") + } +} + +/// URI/IRI reference builder. +/// +/// # Usage +/// +/// 1. Create builder by [`Builder::new()`][`Self::new`]. +/// 2. Set (or unset) components and set normalization mode as you wish. +/// 3. Validate by [`Builder::build()`][`Self::build`] and get [`Built`] value. +/// 4. Use [`core::fmt::Display`] trait to serialize the resulting [`Built`], +/// or use [`From`]/[`Into`] traits to convert into an allocated string types. +/// +/// ``` +/// # use iri_string::validate::Error; +/// use iri_string::build::Builder; +/// # #[cfg(not(feature = "alloc"))] +/// # use iri_string::types::IriStr; +/// # #[cfg(feature = "alloc")] +/// use iri_string::types::{IriStr, IriString}; +/// +/// // 1. Create builder. +/// let mut builder = Builder::new(); +/// +/// // 2. Set (or unset) component and normalization mode. +/// builder.scheme("http"); +/// builder.host("example.com"); +/// builder.path("/foo/../"); +/// builder.normalize(); +/// +/// // 3. Validate and create the result. +/// let built = builder.build::<IriStr>()?; +/// +/// # #[cfg(feature = "alloc")] { +/// // 4a. Serialize by `Display` trait (or `ToString`). +/// let s = built.to_string(); +/// assert_eq!(s, "http://example.com/"); +/// # } +/// +/// # #[cfg(feature = "alloc")] { +/// // 4b. Convert into an allocated string types. +/// // Thanks to pre-validation by `.build::<IriStr>()`, this conversion is infallible! +/// let s: IriString = built.into(); +/// assert_eq!(s, "http://example.com/"); +/// # } +/// +/// # Ok::<_, Error>(()) +/// ``` +#[derive(Default, Debug, Clone)] +pub struct Builder<'a> { + /// Scheme. + scheme: Option<&'a str>, + /// Authority. + authority: Option<AuthorityBuilder<'a>>, + /// Path. + path: &'a str, + /// Query (without the leading `?`). + query: Option<&'a str>, + /// Fragment (without the leading `#`). + fragment: Option<&'a str>, + /// Normalization mode. + normalize: bool, +} + +impl<'a> Builder<'a> { + /// Creates a builder with empty data. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let builder = Builder::new(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), ""); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Writes the authority to the given formatter. + /// + /// Don't expose this as public, since this method does not validate. + /// + /// # Preconditions + /// + /// The IRI string to be built should be a valid IRI reference. + /// Callers are responsible to validate the component values before calling + /// this method. + fn fmt_write_to<S: Spec>( + &self, + f: &mut fmt::Formatter<'_>, + path_is_absolute: bool, + ) -> fmt::Result { + if let Some(scheme) = self.scheme { + // Write the scheme. + if self.normalize { + normalize::normalize_scheme(f, scheme)?; + } else { + f.write_str(scheme)?; + } + f.write_char(':')?; + } + + if let Some(authority) = &self.authority { + f.write_str("//")?; + authority.fmt_write_to::<S>(f, self.normalize)?; + } + + if !self.normalize { + // No normalization. + f.write_str(self.path)?; + } else if self.scheme.is_some() || self.authority.is_some() || path_is_absolute { + // Apply full syntax-based normalization. + let op = normalize::NormalizationOp { + mode: NormalizationMode::Default, + }; + normalize::PathToNormalize::from_single_path(self.path).fmt_write_normalize::<S, _>( + f, + op, + self.authority.is_some(), + )?; + } else { + // The IRI reference starts with `path` component, and the path is relative. + // Skip path segment normalization. + PctCaseNormalized::<S>::new(self.path).fmt(f)?; + } + + if let Some(query) = self.query { + f.write_char('?')?; + if self.normalize { + normalize::normalize_query::<S>(f, query)?; + } else { + f.write_str(query)?; + } + } + + if let Some(fragment) = self.fragment { + f.write_char('#')?; + if self.normalize { + normalize::normalize_fragment::<S>(f, fragment)?; + } else { + f.write_str(fragment)?; + } + } + + Ok(()) + } + + /// Builds the proxy object that can be converted to the desired IRI string type. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriStr; + /// # #[cfg(feature = "alloc")] + /// use iri_string::types::IriString; + /// + /// let mut builder = Builder::new(); + /// + /// builder.scheme("http"); + /// builder.host("example.com"); + /// builder.path("/foo/bar"); + /// + /// let built = builder.build::<IriStr>()?; + /// + /// # #[cfg(feature = "alloc")] { + /// // The returned value implements `core::fmt::Display` and + /// // `core::string::ToString`. + /// assert_eq!(built.to_string(), "http://example.com/foo/bar"); + /// + /// // The returned value implements `Into<{iri_owned_string_type}>`. + /// let iri = IriString::from(built); + /// // `let iri: IriString = built.into();` is also OK. + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn build<T>(self) -> Result<Built<'a, T>, Error> + where + T: ?Sized + Buildable<'a>, + { + <T as private::Sealed<'a>>::validate_builder(self) + } +} + +// Setters does not return `&mut Self` or `Self` since it introduces needless +// ambiguity for users. +// For example, if setters return something and allows method chaining, can you +// correctly explain what happens with the code below without reading document? +// +// ```text +// let mut builder = Builder::new().foo("foo").bar("bar"); +// let baz = builder.baz("baz").clone().build(); +// // Should the result be foo+bar+qux, or foo+bar+baz+qux? +// let qux = builder.qux("qux").build(); +// ``` +impl<'a> Builder<'a> { + /// Sets the scheme. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.scheme("foo"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "foo:"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn scheme(&mut self, v: &'a str) { + self.scheme = Some(v); + } + + /// Unsets the scheme. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.scheme("foo"); + /// builder.unset_scheme(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), ""); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_scheme(&mut self) { + self.scheme = None; + } + + /// Sets the path. + /// + /// Note that no methods are provided to "unset" path since every IRI + /// references has a path component (although it can be empty). + /// If you want to "unset" the path, just set the empty string. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.path("foo/bar"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "foo/bar"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn path(&mut self, v: &'a str) { + self.path = v; + } + + /// Initializes the authority builder. + #[inline] + fn authority_builder(&mut self) -> &mut AuthorityBuilder<'a> { + self.authority.get_or_insert_with(AuthorityBuilder::default) + } + + /// Unsets the authority. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.host("example.com"); + /// builder.unset_authority(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), ""); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_authority(&mut self) { + self.authority = None; + } + + /// Sets the userinfo. + /// + /// `userinfo` component always have `user` part (but it can be empty). + /// + /// Note that `("", None)` is considered as an empty userinfo, rather than + /// unset userinfo. + /// Also note that the user part cannot have colon characters. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.userinfo("user:pass"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//user:pass@"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// You can specify `(user, password)` pair. + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// + /// builder.userinfo(("user", Some("pass"))); + /// # #[cfg(feature = "alloc")] { + /// assert_eq!( + /// builder.clone().build::<IriReferenceStr>()?.to_string(), + /// "//user:pass@" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// `("", None)` is considered as an empty userinfo. + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.userinfo(("", None)); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//@"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn userinfo<T: Into<UserinfoBuilder<'a>>>(&mut self, v: T) { + self.authority_builder().userinfo = v.into(); + } + + /// Unsets the port. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.userinfo("user:pass"); + /// // Note that this does not unset the entire authority. + /// // Now empty authority is set. + /// builder.unset_userinfo(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_userinfo(&mut self) { + self.authority_builder().userinfo = UserinfoBuilder::default(); + } + + /// Sets the reg-name or IP address (i.e. host) without port. + /// + /// Note that no methods are provided to "unset" host. + /// Depending on your situation, set empty string as a reg-name, or unset + /// the authority entirely by [`unset_authority`][`Self::unset_authority`] + /// method. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.host("example.com"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//example.com"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn host(&mut self, v: &'a str) { + self.authority_builder().host = HostRepr::String(v); + } + + /// Sets the IP address as a host. + /// + /// Note that no methods are provided to "unset" host. + /// Depending on your situation, set empty string as a reg-name, or unset + /// the authority entirely by [`unset_authority`][`Self::unset_authority`] + /// method. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "std")] { + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.ip_address(std::net::Ipv4Addr::new(192, 0, 2, 0)); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//192.0.2.0"); + /// # } + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[cfg(feature = "std")] + #[inline] + pub fn ip_address<T: Into<std::net::IpAddr>>(&mut self, addr: T) { + self.authority_builder().host = HostRepr::IpAddr(addr.into()); + } + + /// Sets the port. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.port(80_u16); + /// // Accepts other types that implements `Into<PortBuilder<'a>>`. + /// //builder.port(80_u8); + /// //builder.port("80"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//:80"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn port<T: Into<PortBuilder<'a>>>(&mut self, v: T) { + self.authority_builder().port = v.into(); + } + + /// Unsets the port. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.port(80_u16); + /// // Note that this does not unset the entire authority. + /// // Now empty authority is set. + /// builder.unset_port(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "//"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_port(&mut self) { + self.authority_builder().port = PortBuilder::default(); + } + + /// Sets the query. + /// + /// The string after `?` should be specified. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.query("q=example"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "?q=example"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn query(&mut self, v: &'a str) { + self.query = Some(v); + } + + /// Unsets the query. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.query("q=example"); + /// builder.unset_query(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), ""); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_query(&mut self) { + self.query = None; + } + + /// Sets the fragment. + /// + /// The string after `#` should be specified. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.fragment("anchor"); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "#anchor"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn fragment(&mut self, v: &'a str) { + self.fragment = Some(v); + } + + /// Unsets the fragment. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.fragment("anchor"); + /// builder.unset_fragment(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), ""); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_fragment(&mut self) { + self.fragment = None; + } + + /// Stop normalizing the result. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.scheme("http"); + /// // `%75%73%65%72` is "user". + /// builder.userinfo("%75%73%65%72"); + /// builder.host("EXAMPLE.COM"); + /// builder.port(""); + /// builder.path("/foo/../%2e%2e/bar/%2e/baz/."); + /// + /// builder.unset_normalize(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!( + /// iri.to_string(), + /// "http://%75%73%65%72@EXAMPLE.COM:/foo/../%2e%2e/bar/%2e/baz/." + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn unset_normalize(&mut self) { + self.normalize = false; + } + + /// Normalizes the result using RFC 3986 syntax-based normalization and + /// WHATWG URL Standard algorithm. + /// + /// # Normalization + /// + /// If `scheme` or `authority` component is present or the path is absolute, + /// the build result will fully normalized using full syntax-based normalization: + /// + /// * case normalization ([RFC 3986 6.2.2.1]), + /// * percent-encoding normalization ([RFC 3986 6.2.2.2]), and + /// * path segment normalization ([RFC 3986 6.2.2.2]). + /// + /// However, if both `scheme` and `authority` is absent and the path is relative + /// (including empty), i.e. the IRI reference to be built starts with the + /// relative `path` component, path segment normalization will be omitted. + /// This is because the path segment normalization depends on presence or + /// absense of the `authority` components, and will remove extra `..` + /// segments which should not be ignored. + /// + /// Note that `path` must already be empty or start with a slash **before + /// the normalizaiton** if `authority` is present. + /// + /// # WHATWG URL Standard + /// + /// If you need to avoid WHATWG URL Standard serialization, use + /// [`Built::ensure_rfc3986_normalizable`] method to test if the result is + /// normalizable without WHATWG spec. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::build::Builder; + /// use iri_string::types::IriReferenceStr; + /// + /// let mut builder = Builder::new(); + /// builder.scheme("http"); + /// // `%75%73%65%72` is "user". + /// builder.userinfo("%75%73%65%72"); + /// builder.host("EXAMPLE.COM"); + /// builder.port(""); + /// builder.path("/foo/../%2e%2e/bar/%2e/baz/."); + /// + /// builder.normalize(); + /// + /// let iri = builder.build::<IriReferenceStr>()?; + /// # #[cfg(feature = "alloc")] { + /// assert_eq!(iri.to_string(), "http://user@example.com/bar/baz/"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn normalize(&mut self) { + self.normalize = true; + } +} + +/// [`Display`]-able IRI build result. +/// +/// The value of this type can generate an IRI using [`From`]/[`Into`] traits or +/// [`Display`] trait. +/// +/// # Security consideration +/// +/// This can be stringified or directly printed by `std::fmt::Display`, but note +/// that this `Display` **does not hide the password part**. Be careful **not to +/// print the value using `Display for Built<_>` in public context**. +/// +/// [`From`]: `core::convert::From` +/// [`Into`]: `core::convert::Into` +/// [`Display`]: `core::fmt::Display` +#[derive(Debug)] +pub struct Built<'a, T: ?Sized> { + /// Builder with the validated content. + builder: Builder<'a>, + /// Whether the path is absolute. + path_is_absolute: bool, + /// String type. + _ty_str: PhantomData<fn() -> T>, +} + +impl<T: ?Sized> Clone for Built<'_, T> { + #[inline] + fn clone(&self) -> Self { + Self { + builder: self.builder.clone(), + path_is_absolute: self.path_is_absolute, + _ty_str: PhantomData, + } + } +} + +/// Implements conversions to a string. +macro_rules! impl_stringifiers { + ($borrowed:ident, $owned:ident) => { + impl<S: Spec> Built<'_, $borrowed<S>> { + /// Returns Ok`(())` if the IRI is normalizable by the RFC 3986 algorithm. + #[inline] + pub fn ensure_rfc3986_normalizable(&self) -> Result<(), normalize::Error> { + if self.builder.authority.is_none() { + let path = normalize::PathToNormalize::from_single_path(self.builder.path); + path.ensure_rfc3986_normalizable_with_authority_absent()?; + } + Ok(()) + } + } + + impl<S: Spec> fmt::Display for Built<'_, $borrowed<S>> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.builder.fmt_write_to::<S>(f, self.path_is_absolute) + } + } + + #[cfg(feature = "alloc")] + impl<S: Spec> ToDedicatedString for Built<'_, $borrowed<S>> { + type Target = $owned<S>; + + #[inline] + fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> { + let s = self.try_to_string()?; + Ok(TryFrom::try_from(s) + .expect("[validity] the IRI to be built is already validated")) + } + } + + #[cfg(feature = "alloc")] + impl<S: Spec> From<Built<'_, $borrowed<S>>> for $owned<S> { + #[inline] + fn from(builder: Built<'_, $borrowed<S>>) -> Self { + (&builder).into() + } + } + + #[cfg(feature = "alloc")] + impl<S: Spec> From<&Built<'_, $borrowed<S>>> for $owned<S> { + #[inline] + fn from(builder: &Built<'_, $borrowed<S>>) -> Self { + let s = builder.to_string(); + Self::try_from(s).expect("[validity] the IRI to be built is already validated") + } + } + }; +} + +impl_stringifiers!(RiReferenceStr, RiReferenceString); +impl_stringifiers!(RiStr, RiString); +impl_stringifiers!(RiAbsoluteStr, RiAbsoluteString); +impl_stringifiers!(RiRelativeStr, RiRelativeString); + +/// A trait for borrowed IRI string types buildable by the [`Builder`]. +pub trait Buildable<'a>: private::Sealed<'a> {} + +impl<'a, S: Spec> private::Sealed<'a> for RiReferenceStr<S> { + fn validate_builder(builder: Builder<'a>) -> Result<Built<'a, Self>, Error> { + let path_is_absolute = validate_builder_for_iri_reference::<S>(&builder)?; + + Ok(Built { + builder, + path_is_absolute, + _ty_str: PhantomData, + }) + } +} +impl<S: Spec> Buildable<'_> for RiReferenceStr<S> {} + +impl<'a, S: Spec> private::Sealed<'a> for RiStr<S> { + fn validate_builder(builder: Builder<'a>) -> Result<Built<'a, Self>, Error> { + if builder.scheme.is_none() { + return Err(Error::new()); + } + let path_is_absolute = validate_builder_for_iri_reference::<S>(&builder)?; + + Ok(Built { + builder, + path_is_absolute, + _ty_str: PhantomData, + }) + } +} +impl<S: Spec> Buildable<'_> for RiStr<S> {} + +impl<'a, S: Spec> private::Sealed<'a> for RiAbsoluteStr<S> { + fn validate_builder(builder: Builder<'a>) -> Result<Built<'a, Self>, Error> { + if builder.scheme.is_none() { + return Err(Error::new()); + } + if builder.fragment.is_some() { + return Err(Error::new()); + } + let path_is_absolute = validate_builder_for_iri_reference::<S>(&builder)?; + + Ok(Built { + builder, + path_is_absolute, + _ty_str: PhantomData, + }) + } +} +impl<S: Spec> Buildable<'_> for RiAbsoluteStr<S> {} + +impl<'a, S: Spec> private::Sealed<'a> for RiRelativeStr<S> { + fn validate_builder(builder: Builder<'a>) -> Result<Built<'a, Self>, Error> { + if builder.scheme.is_some() { + return Err(Error::new()); + } + let path_is_absolute = validate_builder_for_iri_reference::<S>(&builder)?; + + Ok(Built { + builder, + path_is_absolute, + _ty_str: PhantomData, + }) + } +} +impl<S: Spec> Buildable<'_> for RiRelativeStr<S> {} + +/// Checks whether the builder output is valid IRI reference. +/// +/// Returns whether the path is absolute. +fn validate_builder_for_iri_reference<S: Spec>(builder: &Builder<'_>) -> Result<bool, Error> { + if let Some(scheme) = builder.scheme { + parser::validate_scheme(scheme)?; + } + + if let Some(authority) = &builder.authority { + match &authority.userinfo.0 { + UserinfoRepr::None => {} + UserinfoRepr::Direct(userinfo) => { + parser::validate_userinfo::<S>(userinfo)?; + } + UserinfoRepr::UserPass(user, password) => { + // `user` is not allowed to have a colon, since the characters + // after the colon is parsed as the password. + if user.contains(':') { + return Err(Error::new()); + } + + // Note that the syntax of components inside `authority` + // (`user` and `password`) is not specified by RFC 3986. + parser::validate_userinfo::<S>(user)?; + if let Some(password) = password { + parser::validate_userinfo::<S>(password)?; + } + } + } + + match authority.host { + HostRepr::String(s) => parser::validate_host::<S>(s)?, + #[cfg(feature = "std")] + HostRepr::IpAddr(_) => {} + } + + if let PortBuilderRepr::String(s) = authority.port.0 { + if !s.bytes().all(|b| b.is_ascii_digit()) { + return Err(Error::new()); + } + } + } + + let path_is_absolute: bool; + let mut is_path_acceptable; + if builder.normalize { + if builder.scheme.is_some() || builder.authority.is_some() || builder.path.starts_with('/') + { + if builder.authority.is_some() { + // Note that the path should already be in an absolute form before normalization. + is_path_acceptable = builder.path.is_empty() || builder.path.starts_with('/'); + } else { + is_path_acceptable = true; + } + let op = normalize::NormalizationOp { + mode: NormalizationMode::Default, + }; + let path_characteristic = PathCharacteristic::from_path_to_display::<S>( + &normalize::PathToNormalize::from_single_path(builder.path), + op, + builder.authority.is_some(), + ); + path_is_absolute = path_characteristic.is_absolute(); + is_path_acceptable = is_path_acceptable + && match path_characteristic { + PathCharacteristic::CommonAbsolute | PathCharacteristic::CommonRelative => true, + PathCharacteristic::StartsWithDoubleSlash + | PathCharacteristic::RelativeFirstSegmentHasColon => { + builder.scheme.is_some() || builder.authority.is_some() + } + }; + } else { + path_is_absolute = false; + // If the path is relative (where neither scheme nor authority is + // available), the first segment should not contain a colon. + is_path_acceptable = prior_byte2(builder.path.as_bytes(), b'/', b':') != Some(b':'); + } + } else { + path_is_absolute = builder.path.starts_with('/'); + is_path_acceptable = if builder.authority.is_some() { + // The path should be absolute or empty. + path_is_absolute || builder.path.is_empty() + } else if builder.scheme.is_some() || path_is_absolute { + // The path should not start with '//'. + !builder.path.starts_with("//") + } else { + // If the path is relative (where neither scheme nor authority is + // available), the first segment should not contain a colon. + prior_byte2(builder.path.as_bytes(), b'/', b':') != Some(b':') + }; + } + if !is_path_acceptable { + return Err(Error::new()); + } + + if let Some(query) = builder.query { + parser::validate_query::<S>(query)?; + } + + if let Some(fragment) = builder.fragment { + parser::validate_fragment::<S>(fragment)?; + } + + Ok(path_is_absolute) +} + +/// Private module to put the trait to seal. +mod private { + use super::{Builder, Built, Error}; + + /// A trait for types buildable by the [`Builder`]. + pub trait Sealed<'a> { + /// Validates the content of the builder and returns the validated type if possible. + fn validate_builder(builder: Builder<'a>) -> Result<Built<'a, Self>, Error>; + } +} diff --git a/vendor/iri-string/src/components.rs b/vendor/iri-string/src/components.rs new file mode 100644 index 00000000..33e7ac45 --- /dev/null +++ b/vendor/iri-string/src/components.rs @@ -0,0 +1,267 @@ +//! Components of IRIs. + +mod authority; + +use core::num::NonZeroUsize; +use core::ops::{Range, RangeFrom, RangeTo}; + +use crate::parser::trusted as trusted_parser; +use crate::spec::Spec; +use crate::types::RiReferenceStr; + +pub use self::authority::AuthorityComponents; + +/// Positions to split an IRI into components. +#[derive(Debug, Clone, Copy)] +pub(crate) struct Splitter { + /// Scheme end. + scheme_end: Option<NonZeroUsize>, + /// Authority end. + /// + /// Note that absence of the authority and the empty authority is + /// distinguished. + authority_end: Option<NonZeroUsize>, + /// Query start (after the leading `?`). + query_start: Option<NonZeroUsize>, + /// Fragment start (after the leading `#`). + fragment_start: Option<NonZeroUsize>, +} + +impl Splitter { + /// Creates a new splitter. + #[inline] + #[must_use] + pub(crate) fn new( + scheme_end: Option<NonZeroUsize>, + authority_end: Option<NonZeroUsize>, + query_start: Option<NonZeroUsize>, + fragment_start: Option<NonZeroUsize>, + ) -> Self { + Self { + scheme_end, + authority_end, + query_start, + fragment_start, + } + } + + /// Decomposes an IRI into five major components: scheme, authority, path, query, and fragment. + #[must_use] + fn split_into_major( + self, + s: &str, + ) -> (Option<&str>, Option<&str>, &str, Option<&str>, Option<&str>) { + let (scheme, next_of_scheme) = match self.scheme_end { + // +1: ":".len() + Some(end) => (Some(&s[..end.get()]), end.get() + 1), + None => (None, 0), + }; + let (authority, next_of_authority) = match self.authority_end { + // +2: "//".len() + Some(end) => (Some(&s[(next_of_scheme + 2)..end.get()]), end.get()), + None => (None, next_of_scheme), + }; + let (fragment, end_of_prev_of_fragment) = match self.fragment_start { + // -1: "#".len() + Some(start) => (Some(&s[start.get()..]), start.get() - 1), + None => (None, s.len()), + }; + let (query, end_of_path) = match self.query_start { + Some(start) => ( + Some(&s[start.get()..end_of_prev_of_fragment]), + // -1: "?".len() + start.get() - 1, + ), + None => (None, end_of_prev_of_fragment), + }; + let path = &s[next_of_authority..end_of_path]; + (scheme, authority, path, query, fragment) + } + + /// Returns the range for the scheme part. + #[inline] + #[must_use] + fn scheme_range(self) -> Option<RangeTo<usize>> { + self.scheme_end.map(|end| ..end.get()) + } + + /// Returns the scheme as a string. + #[inline] + #[must_use] + pub(crate) fn scheme_str<'a>(&self, s: &'a str) -> Option<&'a str> { + self.scheme_range().map(|range| &s[range]) + } + + /// Returns true if the IRI has a scheme part, false otherwise. + #[inline] + #[must_use] + pub(crate) fn has_scheme(&self) -> bool { + self.scheme_end.is_some() + } + + /// Returns the range for the authority part. + #[inline] + #[must_use] + fn authority_range(self) -> Option<Range<usize>> { + let end = self.authority_end?.get(); + // 2: "//".len() + // +3: "://".len() + let start = self.scheme_end.map_or(2, |v| v.get() + 3); + Some(start..end) + } + + /// Returns the authority as a string. + #[inline] + #[must_use] + pub(crate) fn authority_str<'a>(&self, s: &'a str) -> Option<&'a str> { + self.authority_range().map(|range| &s[range]) + } + + /// Returns true if the IRI has an authority part, false otherwise. + #[inline] + #[must_use] + pub(crate) fn has_authority(&self) -> bool { + self.authority_end.is_some() + } + + /// Returns the range for the path part. + #[inline] + #[must_use] + fn path_range(self, full_len: usize) -> Range<usize> { + // -1: "?".len() and "#".len() + let end = self + .query_start + .or(self.fragment_start) + .map_or(full_len, |v| v.get() - 1); + let start = self.authority_end.map_or_else( + // +1: ":".len() + || self.scheme_end.map_or(0, |v| v.get() + 1), + NonZeroUsize::get, + ); + + start..end + } + + /// Returns the path as a string. + #[inline] + #[must_use] + pub(crate) fn path_str<'a>(&self, s: &'a str) -> &'a str { + &s[self.path_range(s.len())] + } + + /// Returns true if the path part of the IRI is empty. + #[inline] + #[must_use] + pub(crate) fn is_path_empty(&self, full_len: usize) -> bool { + self.path_range(full_len).is_empty() + } + + /// Returns the range for the query part excluding a prefix `?`. + #[inline] + #[must_use] + fn query_range(self, full_len: usize) -> Option<Range<usize>> { + let start = self.query_start?.get(); + // -1: "#".len() + let end = self.fragment_start.map_or(full_len, |v| v.get() - 1); + + Some(start..end) + } + + /// Returns the query as a string. + #[inline] + #[must_use] + pub(crate) fn query_str<'a>(&self, s: &'a str) -> Option<&'a str> { + self.query_range(s.len()).map(|range| &s[range]) + } + + /// Returns true if the IRI has a query part, false otherwise. + #[inline] + #[must_use] + pub(crate) fn has_query(&self) -> bool { + self.query_start.is_some() + } + + /// Returns the range for the fragment part excluding a prefix `#`. + #[inline] + #[must_use] + pub(crate) fn fragment_range(self) -> Option<RangeFrom<usize>> { + self.fragment_start.map(|v| v.get()..) + } + + /// Returns the fragment as a string. + #[inline] + #[must_use] + pub(crate) fn fragment_str<'a>(&self, s: &'a str) -> Option<&'a str> { + self.fragment_range().map(|range| &s[range]) + } +} + +/// Components of an IRI reference. +/// +/// See <https://tools.ietf.org/html/rfc3986#section-5.2.2>. +#[derive(Debug, Clone, Copy)] +pub(crate) struct RiReferenceComponents<'a, S: Spec> { + /// Original complete string. + pub(crate) iri: &'a RiReferenceStr<S>, + /// Positions to split the IRI into components. + pub(crate) splitter: Splitter, +} + +impl<'a, S: Spec> RiReferenceComponents<'a, S> { + /// Returns five major components: scheme, authority, path, query, and fragment. + #[inline] + #[must_use] + pub(crate) fn to_major( + self, + ) -> ( + Option<&'a str>, + Option<&'a str>, + &'a str, + Option<&'a str>, + Option<&'a str>, + ) { + self.splitter.split_into_major(self.iri.as_str()) + } + + /// Returns the IRI reference. + #[inline] + #[must_use] + pub(crate) fn iri(&self) -> &'a RiReferenceStr<S> { + self.iri + } + + /// Returns the scheme as a string. + #[inline] + #[must_use] + pub(crate) fn scheme_str(&self) -> Option<&str> { + self.splitter.scheme_str(self.iri.as_str()) + } + + /// Returns the authority as a string. + #[inline] + #[must_use] + pub(crate) fn authority_str(&self) -> Option<&str> { + self.splitter.authority_str(self.iri.as_str()) + } + + /// Returns the path as a string. + #[inline] + #[must_use] + pub(crate) fn path_str(&self) -> &str { + self.splitter.path_str(self.iri.as_str()) + } + + /// Returns the query as a string. + #[inline] + #[must_use] + pub(crate) fn query_str(&self) -> Option<&str> { + self.splitter.query_str(self.iri.as_str()) + } +} + +impl<'a, S: Spec> From<&'a RiReferenceStr<S>> for RiReferenceComponents<'a, S> { + #[inline] + fn from(s: &'a RiReferenceStr<S>) -> Self { + trusted_parser::decompose_iri_reference(s) + } +} diff --git a/vendor/iri-string/src/components/authority.rs b/vendor/iri-string/src/components/authority.rs new file mode 100644 index 00000000..a7f17744 --- /dev/null +++ b/vendor/iri-string/src/components/authority.rs @@ -0,0 +1,121 @@ +//! Subcomponents of authority. + +use crate::parser::trusted as trusted_parser; +use crate::spec::Spec; +use crate::types::RiReferenceStr; + +/// Subcomponents of authority. +/// +/// This is a return type of the `authority_components` method of the string +/// types (for example [`RiStr::authority_components`]. +/// +/// [`RiStr::authority_components`]: `crate::types::RiStr::authority_components` +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct AuthorityComponents<'a> { + /// Authority string, excluding the leading `//`. + pub(crate) authority: &'a str, + /// Start position of the `host`. + pub(crate) host_start: usize, + /// End position of the `host`. + pub(crate) host_end: usize, +} + +impl<'a> AuthorityComponents<'a> { + /// Creates a new `AuthorityComponents` from the IRI. + pub fn from_iri<S: Spec>(iri: &'a RiReferenceStr<S>) -> Option<Self> { + iri.authority_str() + .map(trusted_parser::authority::decompose_authority) + } + + /// Returns the `userinfo` part, excluding the following `@`. + #[must_use] + pub fn userinfo(&self) -> Option<&'a str> { + let userinfo_at = self.host_start.checked_sub(1)?; + debug_assert_eq!(self.authority.as_bytes()[userinfo_at], b'@'); + Some(&self.authority[..userinfo_at]) + } + + /// Returns the `host` part. + #[inline] + #[must_use] + pub fn host(&self) -> &'a str { + // NOTE: RFC 6874 support may need the internal logic to change. + &self.authority[self.host_start..self.host_end] + } + + /// Returns the `port` part, excluding the following `:`. + #[must_use] + pub fn port(&self) -> Option<&'a str> { + if self.host_end == self.authority.len() { + return None; + } + let port_colon = self.host_end; + debug_assert_eq!(self.authority.as_bytes()[port_colon], b':'); + Some(&self.authority[(port_colon + 1)..]) + } +} + +#[cfg(test)] +#[cfg(feature = "alloc")] +mod tests { + use super::*; + + #[cfg(all(feature = "alloc", not(feature = "std")))] + use alloc::string::String; + + use crate::types::IriReferenceStr; + + const USERINFO: &[&str] = &["", "user:password", "user"]; + + const PORT: &[&str] = &[ + "", + "0", + "0000", + "80", + "1234567890123456789012345678901234567890", + ]; + + const HOST: &[&str] = &[ + "", + "localhost", + "example.com", + "192.0.2.0", + "[2001:db8::1]", + "[2001:0db8:0:0:0:0:0:1]", + "[2001:0db8::192.0.2.255]", + "[v9999.this-is-futuristic-ip-address]", + ]; + + fn compose_to_relative_iri(userinfo: Option<&str>, host: &str, port: Option<&str>) -> String { + let mut buf = String::from("//"); + if let Some(userinfo) = userinfo { + buf.push_str(userinfo); + buf.push('@'); + } + buf.push_str(host); + if let Some(port) = port { + buf.push(':'); + buf.push_str(port); + } + buf + } + + #[test] + fn test_decompose_authority() { + for host in HOST.iter().copied() { + for userinfo in USERINFO.iter().map(|s| Some(*s)).chain(None) { + for port in PORT.iter().map(|s| Some(*s)).chain(None) { + let authority = compose_to_relative_iri(userinfo, host, port); + let authority = + IriReferenceStr::new(&authority).expect("test case should be valid"); + let components = AuthorityComponents::from_iri(authority) + .expect("relative path composed for this test should contain authority"); + + assert_eq!(components.host(), host); + assert_eq!(components.userinfo(), userinfo); + assert_eq!(components.port(), port); + } + } + } + } +} diff --git a/vendor/iri-string/src/convert.rs b/vendor/iri-string/src/convert.rs new file mode 100644 index 00000000..5b4ec9a4 --- /dev/null +++ b/vendor/iri-string/src/convert.rs @@ -0,0 +1,291 @@ +//! Conversion between URI/IRI types. + +use core::fmt; + +#[cfg(feature = "alloc")] +use alloc::collections::TryReserveError; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +#[cfg(feature = "alloc")] +use crate::format::{ToDedicatedString, ToStringFallible}; +use crate::spec::Spec; +use crate::types::{ + RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr, +}; +#[cfg(feature = "alloc")] +use crate::types::{ + RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, RiRelativeString, + RiString, +}; +#[cfg(feature = "alloc")] +use crate::types::{ + UriAbsoluteString, UriFragmentString, UriQueryString, UriReferenceString, UriRelativeString, + UriString, +}; + +/// Hexadecimal digits for a nibble. +const HEXDIGITS: [u8; 16] = [ + b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D', b'E', b'F', +]; + +/// A resource identifier mapped to a URI of some kind. +/// +/// Supported `Src` type are: +/// +/// * IRIs: +/// + [`IriAbsoluteStr`] (alias of `RiAbsoluteStr<IriSpec>`) +/// + [`IriReferenceStr`] (alias of `RiReferenceStr<IriSpec>`) +/// + [`IriRelativeStr`] (alias of `RiRelativeStr<IriSpec>`) +/// + [`IriStr`] (alias of `RiStr<IriSpec>`) +/// * URIs: +/// + [`UriAbsoluteStr`] (alias of `RiAbsoluteStr<UriSpec>`) +/// + [`UriReferenceStr`] (alias of `RiReferenceStr<UriSpec>`) +/// + [`UriRelativeStr`] (alias of `RiRelativeStr<UriSpec>`) +/// + [`UriStr`] (alias of `RiStr<UriSpec>`) +/// +/// # Examples +/// +/// ``` +/// use iri_string::convert::MappedToUri; +/// use iri_string::types::{IriStr, UriStr}; +/// +/// let src = IriStr::new("http://example.com/?alpha=\u{03B1}")?; +/// // The type is `MappedToUri<IriStr>`, but you usually don't need to specify. +/// let mapped = MappedToUri::from(src).to_string(); +/// assert_eq!(mapped, "http://example.com/?alpha=%CE%B1"); +/// # Ok::<_, iri_string::validate::Error>(()) +/// ``` +/// +/// [`IriAbsoluteStr`]: crate::types::IriAbsoluteStr +/// [`IriReferenceStr`]: crate::types::IriReferenceStr +/// [`IriRelativeStr`]: crate::types::IriRelativeStr +/// [`IriStr`]: crate::types::IriStr +/// [`UriAbsoluteStr`]: crate::types::UriAbsoluteStr +/// [`UriReferenceStr`]: crate::types::UriReferenceStr +/// [`UriRelativeStr`]: crate::types::UriRelativeStr +/// [`UriStr`]: crate::types::UriStr +#[derive(Debug, Clone, Copy)] +pub struct MappedToUri<'a, Src: ?Sized>(&'a Src); + +/// Implement conversions for an IRI string type. +macro_rules! impl_for_iri { + ($borrowed:ident, $owned:ident, $owned_uri:ident) => { + impl<S: Spec> fmt::Display for MappedToUri<'_, $borrowed<S>> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write_percent_encoded(f, self.0.as_str()) + } + } + + #[cfg(feature = "alloc")] + impl<S: Spec> ToDedicatedString for MappedToUri<'_, $borrowed<S>> { + type Target = $owned_uri; + + fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> { + let s = self.try_to_string()?; + Ok(TryFrom::try_from(s) + .expect("[validity] the IRI must be encoded into a valid URI")) + } + } + + impl<'a, S: Spec> From<&'a $borrowed<S>> for MappedToUri<'a, $borrowed<S>> { + #[inline] + fn from(iri: &'a $borrowed<S>) -> Self { + Self(iri) + } + } + + #[cfg(feature = "alloc")] + impl<'a, S: Spec> From<&'a $owned<S>> for MappedToUri<'a, $borrowed<S>> { + #[inline] + fn from(iri: &'a $owned<S>) -> Self { + Self(iri.as_slice()) + } + } + }; +} + +impl_for_iri!(RiReferenceStr, RiReferenceString, UriReferenceString); +impl_for_iri!(RiStr, RiString, UriString); +impl_for_iri!(RiAbsoluteStr, RiAbsoluteString, UriAbsoluteString); +impl_for_iri!(RiRelativeStr, RiRelativeString, UriRelativeString); +impl_for_iri!(RiQueryStr, RiQueryString, UriQueryString); +impl_for_iri!(RiFragmentStr, RiFragmentString, UriFragmentString); + +/// Percent-encodes and writes the IRI string using the given buffer. +fn write_percent_encoded(f: &mut fmt::Formatter<'_>, mut s: &str) -> fmt::Result { + while !s.is_empty() { + // Skip ASCII characters. + let non_ascii_pos = s.bytes().position(|b| !b.is_ascii()).unwrap_or(s.len()); + let (ascii, rest) = s.split_at(non_ascii_pos); + if !ascii.is_empty() { + f.write_str(ascii)?; + s = rest; + } + + if s.is_empty() { + return Ok(()); + } + + // Search for the next ASCII character. + let nonascii_end = s.bytes().position(|b| b.is_ascii()).unwrap_or(s.len()); + let (nonasciis, rest) = s.split_at(nonascii_end); + debug_assert!( + !nonasciis.is_empty(), + "string without non-ASCII characters should have caused early return" + ); + s = rest; + + // Escape non-ASCII characters as percent-encoded bytes. + // + // RFC 3987 (section 3.1 step 2) says "for each character in + // 'ucschar' or 'iprivate'", but this simply means "for each + // non-ASCII characters" since any non-ASCII characters that can + // appear in an IRI match `ucschar` or `iprivate`. + /// Number of source bytes to encode at once. + const NUM_BYTES_AT_ONCE: usize = 21; + percent_encode_bytes(f, nonasciis, &mut [0_u8; NUM_BYTES_AT_ONCE * 3])?; + } + + Ok(()) +} + +/// Percent-encode the string and pass the encoded chunks to the given function. +/// +/// `buf` is used as a temporary working buffer. It is initialized by this +/// function, so users can pass any mutable byte slice with enough size. +/// +/// # Precondition +/// +/// The length of `buf` must be 3 bytes or more. +fn percent_encode_bytes(f: &mut fmt::Formatter<'_>, s: &str, buf: &mut [u8]) -> fmt::Result { + /// Fill the buffer by percent-encoded bytes. + /// + /// Note that this function applies percent-encoding to every characters, + /// even if it is ASCII alphabet. + /// + /// # Precondition + /// + /// * The length of `buf` must be 3 bytes or more. + /// * All of the `buf[i * 3]` elements should already be set to `b'%'`. + // This function have many preconditions and I don't want checks for them + // to be mandatory, so make this nested inner function. + fn fill_by_percent_encoded<'a>(buf: &'a mut [u8], bytes: &mut core::str::Bytes<'_>) -> &'a str { + let src_len = bytes.len(); + // `<[u8; N]>::array_chunks_mut` is unstable as of Rust 1.58.1. + for (dest, byte) in buf.chunks_exact_mut(3).zip(bytes.by_ref()) { + debug_assert_eq!( + dest.len(), + 3, + "[validity] `chunks_exact()` must return a slice with the exact length" + ); + debug_assert_eq!( + dest[0], b'%', + "[precondition] the buffer must be properly initialized" + ); + + let upper = byte >> 4; + let lower = byte & 0b1111; + dest[1] = HEXDIGITS[usize::from(upper)]; + dest[2] = HEXDIGITS[usize::from(lower)]; + } + let num_dest_written = (src_len - bytes.len()) * 3; + let buf_filled = &buf[..num_dest_written]; + // SAFETY: `b'%'` and `HEXDIGITS[_]` are all ASCII characters, so + // `buf_filled` is filled with ASCII characters and is valid UTF-8 bytes. + unsafe { + debug_assert!(core::str::from_utf8(buf_filled).is_ok()); + core::str::from_utf8_unchecked(buf_filled) + } + } + + assert!( + buf.len() >= 3, + "[precondition] length of `buf` must be 3 bytes or more" + ); + + // Drop the elements that will never be used. + // The length to be used is always a multiple of three. + let buf_len = buf.len() / 3 * 3; + let buf = &mut buf[..buf_len]; + + // Fill some bytes with `%`. + // This will be vectorized by optimization (especially for long buffers), + // so no need to selectively set `buf[i * 3]`. + buf.fill(b'%'); + + let mut bytes = s.bytes(); + // `<core::str::Bytes as ExactSizeIterator>::is_empty` is unstable as of Rust 1.58.1. + while bytes.len() != 0 { + let encoded = fill_by_percent_encoded(buf, &mut bytes); + f.write_str(encoded)?; + } + + Ok(()) +} + +/// Percent-encodes the given IRI using the given buffer. +#[cfg(feature = "alloc")] +pub(crate) fn try_percent_encode_iri_inline( + iri: &mut String, +) -> Result<(), alloc::collections::TryReserveError> { + // Calculate the result length and extend the buffer. + let num_nonascii = count_nonascii(iri); + if num_nonascii == 0 { + // No need to escape. + return Ok(()); + } + let additional = num_nonascii * 2; + iri.try_reserve(additional)?; + let src_len = iri.len(); + + // Temporarily take the ownership of the internal buffer. + let mut buf = core::mem::take(iri).into_bytes(); + // `b'\0'` cannot appear in a valid IRI, so this default value would be + // useful in case of debugging. + buf.extend(core::iter::repeat(b'\0').take(additional)); + + // Fill the buffer from the tail to the head. + let mut dest_end = buf.len(); + let mut src_end = src_len; + let mut rest_nonascii = num_nonascii; + while rest_nonascii > 0 { + debug_assert!( + src_end > 0, + "[validity] the source position should not overrun" + ); + debug_assert!( + dest_end > 0, + "[validity] the destination position should not overrun" + ); + src_end -= 1; + dest_end -= 1; + let byte = buf[src_end]; + if byte.is_ascii() { + buf[dest_end] = byte; + // Use the ASCII character directly. + } else { + // Percent-encode the byte. + dest_end -= 2; + buf[dest_end] = b'%'; + let upper = byte >> 4; + let lower = byte & 0b1111; + buf[dest_end + 1] = HEXDIGITS[usize::from(upper)]; + buf[dest_end + 2] = HEXDIGITS[usize::from(lower)]; + rest_nonascii -= 1; + } + } + + // Move the result from the temporary buffer to the destination. + let s = String::from_utf8(buf).expect("[consistency] the encoding result is an ASCII string"); + *iri = s; + Ok(()) +} + +/// Returns the number of non-ASCII characters. +#[cfg(feature = "alloc")] +#[inline] +#[must_use] +fn count_nonascii(s: &str) -> usize { + s.bytes().filter(|b| !b.is_ascii()).count() +} diff --git a/vendor/iri-string/src/format.rs b/vendor/iri-string/src/format.rs new file mode 100644 index 00000000..ecc038d0 --- /dev/null +++ b/vendor/iri-string/src/format.rs @@ -0,0 +1,209 @@ +//! Utilities for formatting (especially `Display` trait). +//! +//! This module contains utilities for [`Display`][`core::fmt::Display`]-able +//! types. + +use core::fmt::{self, Write as _}; + +#[cfg(feature = "alloc")] +use alloc::collections::TryReserveError; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +/// Output buffer capacity overflow error. +#[derive(Debug, Clone, Copy)] +pub struct CapacityOverflowError; + +impl fmt::Display for CapacityOverflowError { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("buffer capacity overflow") + } +} + +#[cfg(feature = "std")] +impl std::error::Error for CapacityOverflowError {} + +/// Writer to the bytes buffer. +struct ByteBufWriter<'b> { + /// Destination buffer. + buffer: &'b mut [u8], + /// Position to write the next string fragment. + cursor: usize, +} + +impl fmt::Write for ByteBufWriter<'_> { + fn write_str(&mut self, s: &str) -> fmt::Result { + let dest = &mut self.buffer[self.cursor..]; + if dest.len() < s.len() { + return Err(fmt::Error); + } + dest[..s.len()].copy_from_slice(s.as_bytes()); + self.cursor += s.len(); + Ok(()) + } +} + +/// Writes to the bytes buffer. +pub fn write_to_slice<'a, T: fmt::Display>( + buf: &'a mut [u8], + value: &T, +) -> Result<&'a str, CapacityOverflowError> { + let mut writer = ByteBufWriter { + buffer: buf, + cursor: 0, + }; + if write!(writer, "{}", value).is_err() { + return Err(CapacityOverflowError); + } + let len = writer.cursor; + let result = core::str::from_utf8(&buf[..len]) + .expect("[validity] fmt::Display writes valid UTF-8 byte sequence"); + Ok(result) +} + +/// Writer that fails (not panics) on OOM. +#[cfg(feature = "alloc")] +struct StringWriter<'a> { + /// Destination buffer. + buffer: &'a mut String, + /// Memory allocation error. + error: Option<TryReserveError>, +} + +#[cfg(feature = "alloc")] +impl fmt::Write for StringWriter<'_> { + fn write_str(&mut self, s: &str) -> fmt::Result { + if self.error.is_some() { + return Err(fmt::Error); + } + if let Err(e) = self.buffer.try_reserve(s.len()) { + self.error = Some(e); + return Err(fmt::Error); + } + // This should never fail since `.try_reserve(s.len())` succeeded. + self.buffer.push_str(s); + Ok(()) + } +} + +/// Appends the data to the string. +/// +/// When allocation failure happens, incompletely appended strings won't be +/// stripped. Callers are responsible to clean up the destination if necessary. +#[cfg(feature = "alloc")] +pub fn try_append_to_string<T: fmt::Display>( + dest: &mut String, + value: &T, +) -> Result<(), TryReserveError> { + let mut writer = StringWriter { + buffer: dest, + error: None, + }; + if write!(writer, "{}", value).is_err() { + let e = writer + .error + .expect("[consistency] allocation error should be set on formatting failure"); + return Err(e); + } + Ok(()) +} + +/// Returns true if the two equals after they are converted to strings. +pub(crate) fn eq_str_display<T>(s: &str, d: &T) -> bool +where + T: ?Sized + fmt::Display, +{ + /// Dummy writer to compare the formatted object to the given string. + struct CmpWriter<'a>(&'a str); + impl fmt::Write for CmpWriter<'_> { + fn write_str(&mut self, s: &str) -> fmt::Result { + if self.0.len() < s.len() { + return Err(fmt::Error); + } + let (prefix, rest) = self.0.split_at(s.len()); + self.0 = rest; + if prefix == s { + Ok(()) + } else { + Err(fmt::Error) + } + } + } + + let mut writer = CmpWriter(s); + let succeeded = write!(writer, "{}", d).is_ok(); + succeeded && writer.0.is_empty() +} + +/// A debug-printable type to hide the sensitive information. +#[derive(Clone, Copy)] +pub(crate) struct Censored; + +impl core::fmt::Debug for Censored { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("{censored}") + } +} + +/// [`ToString`][`alloc::string::ToString`], but without panic. +#[cfg(feature = "alloc")] +pub trait ToStringFallible: alloc::string::ToString { + /// [`ToString::to_string`][`alloc::string::ToString::to_string`], but without panic on OOM. + fn try_to_string(&self) -> Result<String, TryReserveError>; +} + +#[cfg(feature = "alloc")] +impl<T: fmt::Display> ToStringFallible for T { + /// [`ToString::to_string`][`alloc::string::ToString::to_string`], but without panic on OOM. + #[inline] + fn try_to_string(&self) -> Result<String, TryReserveError> { + let mut buf = String::new(); + try_append_to_string(&mut buf, self)?; + Ok(buf) + } +} + +/// A trait for types that can be converted to a dedicated allocated string types. +#[cfg(feature = "alloc")] +pub trait ToDedicatedString { + /// Conversion target type. + type Target; + + /// Converts the value to the allocated string. + fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError>; + + /// Converts the value to the allocated string. + /// + /// # Panics + /// + /// Panics if memory allocation error occured. + #[inline] + #[must_use] + fn to_dedicated_string(&self) -> Self::Target { + self.try_to_dedicated_string() + .expect("failed to allocate enough memory") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn eq_str_display_1() { + assert!(eq_str_display("hello", "hello")); + assert!(eq_str_display("42", &42)); + + assert!(eq_str_display( + r#"\x00\t\r\n\xff\\"#, + &b"\x00\t\r\n\xff\\".escape_ascii() + )); + + assert!(!eq_str_display("hello", "world")); + assert!(!eq_str_display("hello world", "hello")); + assert!(!eq_str_display("hello", "hello world")); + assert!(!eq_str_display("42", &4)); + assert!(!eq_str_display("4", &42)); + } +} diff --git a/vendor/iri-string/src/lib.rs b/vendor/iri-string/src/lib.rs new file mode 100644 index 00000000..9be41a9b --- /dev/null +++ b/vendor/iri-string/src/lib.rs @@ -0,0 +1,159 @@ +//! String types for [RFC 3987 Internationalized Resource Identifiers (IRIs)][RFC 3987] and +//! [RFC 3986 Uniform Resource Identifiers (URIs)][RFC 3986]. +//! +//! Note that this crate does not have any extra knowledge about protocols. +//! Comparisons between IRI strings by `PartialEq` and `Eq` is implemented as [simple string +//! comparison](https://tools.ietf.org/html/rfc3986#section-6.2.1). +//! You should implement by yourself or use another crate to use such extra knowledge to compare +//! IRIs / URIs. +//! +//! # Capability +//! +//! This crate provides many features for IRIs / URIs. +//! +//! ## String types +//! +//! [`types` module][`types`] module provides various string types for IRIs and URIs. +//! The borrowed string types are unsized slice types (such as `[u8]` and `str`) +//! and not a sized struct, so they are highly interoperable with for example +//! `Cow` and `Rc`. Conversions between `&str` and borrwed IRI string types are easy. +//! +//! ## Resolvers +//! +//! [`resolve` module][`resolve`] provides IRI / URI references resolver. +//! However, you are recommended to use methods of string types such as +//! [`RiReferenceStr::resolve_against()`] or [`RiRelativeStr::resolve_against()`] +//! if you don't intend to resolve multiple IRIs against the same base. +//! +//! ## Validators +//! +//! Validator functions are provided from [`validate` module][`validate`]. +//! +//! ## Percent encoding +//! +//! [`percent_encode` module][`percent_encode`] provides a converter to encode +//! user-provided string into percent-encoded one (if syntax requires so). +//! +//! ## IRI builder +//! +//! [`build` module][`build`] provides IRI builder. +//! +//! ## URI template (RFC 6570) +//! +//! [`template` module][`template`] provides an RFC 6570 URI Template processor. +//! +//! # Feature flags +//! +//! ## `std` and `alloc` support +//! +//! This crate supports `no_std` usage. +//! +//! * `alloc` feature: +//! + Std library or `alloc` crate is required. +//! + This feature enables types and functions which require memory allocation, +//! e.g. `types::IriString` and `types::IriRelativeStr::resolve_against()`. +//! * `std` feature (**enabled by default**): +//! + Std library is required. +//! + This automatically enables `alloc` feature. +//! + The feature let the crate utilize std-specific stuff, such as `std::error::Error` trait. +//! * With neither of them: +//! + The crate can be used in `no_std` environment. +//! +//! ## Other features +//! +//! * `serde` +//! + Enables serde support. +//! + Implement `Serailize` and `Deserialize` traits for IRI / URI types. +//! * `memchr` +//! + Enables faster internal character search. +//! +//! # Rationale +//! +//! ## `foo:`, `foo:/`, `foo://`, `foo:///`, `foo:////`, ... are valid IRIs +//! +//! All of these are valid IRIs. +//! (On the other hand, all of them are invalid as relative IRI reference, because they don't +//! match `relative-part` rule, especially `path-noscheme`, as the first path component of the +//! relative path contains a colon.) +//! +//! * `foo:` +//! + Decomposed to `<scheme="foo">:<path-empty="">`. +//! * `foo:/` +//! + Decomposed to `<scheme="foo">:<path-absolute="/">`. +//! * `foo://` +//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="">`. +//! * `foo:///` +//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="/">`. +//! * `foo:////` +//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="//">`. +//! * `foo://///` +//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="///">`. +//! +//! RFC 3986 says that "if authority is absent, path cannot start with `//`". +//! +//! > When authority is present, the path must either be empty or begin with a slash ("/") +//! > character. When authority is not present, the path cannot begin with two slash characters +//! > ("//"). +//! > +//! > --- [RFC 3986, section 3. Syntax Components](https://tools.ietf.org/html/rfc3986#section-3). +//! +//! > If a URI contains an authority component, then the path component must either be empty or +//! > begin with a slash ("/") character. If a URI does not contain an authority component, then the +//! > path cannot begin with two slash characters ("//"). +//! > +//! > --- [RFC 3986, section 3.3. Path](https://tools.ietf.org/html/rfc3986#section-3.3) +//! +//! We should interpret them as "if `authority` rule is completely unused (i.e. does not match any +//! strings **including empty string**), path cannot start with `//`". +//! In other words, we should consider this as **explaining the ABNF of `hier-part` rule** +//! (especially why it does not use `path` rule), but **not adding extra restriction to the rule +//! written in ABNF**. +//! +//! This restriction is necessary to remove ambiguity in decomposition of some strings. +//! For example, it is natural to decompose `foo://` to `<scheme="foo">:<path="//">` or +//! `<scheme="foo">://<authority=""><path="">`. +//! The restriction, **which is already encoded to the ABNF rule**, tells us to always decompose to +//! the latter form, rather than the former one. +//! +//! Readers of the spec might be confused by "when authority is **present**" and "if a URI +//! **contains** an authority component, which is unclear. +//! However, based on the interpretation above, we should consider authority part with empty string +//! as satisfying the condition "authority is **present**". +//! +//! ## IRI resolution can fail +//! +//! For some inputs, resulting string of IRI normalization and resolution can be syntactically +//! correct but semantically wrong. In such cases, the normalizer and resolver provided by this +//! crate do not silently "fix" the IRI by non-standard processing, but just +//! fail by returning `Err(_)`. +//! +//! For details, see the documentation of [`normalize`] module. +//! +//! [RFC 3986]: https://tools.ietf.org/html/rfc3986 +//! [RFC 3987]: https://tools.ietf.org/html/rfc3987 +//! [`RiReferenceStr::resolve_against()`]: `types::RiReferenceStr::resolve_against` +//! [`RiRelativeStr::resolve_against()`]: `types::RiRelativeStr::resolve_against` +#![warn(missing_docs)] +#![warn(unsafe_op_in_unsafe_fn)] +#![warn(clippy::missing_docs_in_private_items)] +#![warn(clippy::undocumented_unsafe_blocks)] +#![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +#[cfg(feature = "alloc")] +extern crate alloc; + +pub mod build; +pub mod components; +pub mod convert; +pub mod format; +pub mod mask_password; +pub mod normalize; +pub(crate) mod parser; +pub mod percent_encode; +pub(crate) mod raw; +pub mod resolve; +pub mod spec; +pub mod template; +pub mod types; +pub mod validate; diff --git a/vendor/iri-string/src/mask_password.rs b/vendor/iri-string/src/mask_password.rs new file mode 100644 index 00000000..ea3fda3b --- /dev/null +++ b/vendor/iri-string/src/mask_password.rs @@ -0,0 +1,298 @@ +//! Password masker. + +use core::fmt::{self, Write as _}; +use core::ops::Range; + +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::borrow::ToOwned; +#[cfg(feature = "alloc")] +use alloc::collections::TryReserveError; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +use crate::components::AuthorityComponents; +#[cfg(feature = "alloc")] +use crate::format::ToDedicatedString; +use crate::spec::Spec; +use crate::types::{RiAbsoluteStr, RiReferenceStr, RiRelativeStr, RiStr}; +#[cfg(feature = "alloc")] +use crate::types::{RiAbsoluteString, RiReferenceString, RiRelativeString, RiString}; + +/// Returns the range of the password to hide. +pub(crate) fn password_range_to_hide<S: Spec>(iri: &RiReferenceStr<S>) -> Option<Range<usize>> { + /// Spec-agnostic internal implementation of `password_range_to_hide`. + fn inner(iri: &str, userinfo: &str) -> Option<Range<usize>> { + // Length (including `//`) before the `authority` compontent. + // 2: `"//".len()`. + let authority_start = 2 + iri + .find("//") + .expect("[validity] `authority` component must be prefixed with `//`"); + let end = authority_start + userinfo.len(); + let start = authority_start + userinfo.find(':').map_or_else(|| userinfo.len(), |v| v + 1); + Some(start..end) + } + + let authority_components = AuthorityComponents::from_iri(iri)?; + let userinfo = authority_components.userinfo()?; + inner(iri.as_str(), userinfo) +} + +/// Writes the URI with the password part replaced. +fn write_with_masked_password<D>( + f: &mut fmt::Formatter<'_>, + s: &str, + pw_range: Range<usize>, + alt: &D, +) -> fmt::Result +where + D: ?Sized + fmt::Display, +{ + debug_assert!( + s.len() >= pw_range.end, + "[consistency] password range must be inside the IRI" + ); + + f.write_str(&s[..pw_range.start])?; + alt.fmt(f)?; + f.write_str(&s[pw_range.end..])?; + Ok(()) +} + +/// Writes an IRI with the password part trimmed. +fn write_trim_password(f: &mut fmt::Formatter<'_>, s: &str, pw_range: Range<usize>) -> fmt::Result { + write_with_masked_password(f, s, pw_range, "") +} + +/// A wrapper of an IRI string that masks the non-empty password when `Display`ed. +/// +/// This is a retrun type of `mask_password` method of IRI string types (such as +/// [`RiStr::mask_password`]). +/// +/// # Examples +/// +/// ``` +/// # use iri_string::validate::Error; +/// # #[cfg(feature = "alloc")] { +/// use iri_string::types::UriReferenceStr; +/// +/// let iri = UriReferenceStr::new("http://user:password@example.com/path?query")?; +/// let masked = iri.mask_password(); +/// assert_eq!(masked.to_string(), "http://user:@example.com/path?query"); +/// +/// assert_eq!( +/// masked.replace_password("${password}").to_string(), +/// "http://user:${password}@example.com/path?query" +/// ); +/// # } +/// # Ok::<_, Error>(()) +/// ``` +/// +/// [`RiStr::mask_password`]: `crate::types::RiStr::mask_password` +#[derive(Clone, Copy)] +pub struct PasswordMasked<'a, T: ?Sized> { + /// IRI reference. + iri_ref: &'a T, +} + +impl<'a, T: ?Sized> PasswordMasked<'a, T> { + /// Creates a new `PasswordMasked` object. + #[inline] + #[must_use] + pub(crate) fn new(iri_ref: &'a T) -> Self { + Self { iri_ref } + } +} + +/// Implements traits for `PasswordMasked`. +macro_rules! impl_mask { + ($borrowed:ident, $owned:ident) => { + impl<'a, S: Spec> PasswordMasked<'a, $borrowed<S>> { + /// Replaces the password with the given arbitrary content. + /// + /// Note that the result might be invalid as an IRI since arbitrary string + /// can go to the place of the password. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://user:password@example.com/path?query")?; + /// let masked = iri.mask_password(); + /// + /// assert_eq!( + /// masked.replace_password("${password}").to_string(), + /// "http://user:${password}@example.com/path?query" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn replace_password<D>(&self, alt: D) -> PasswordReplaced<'a, $borrowed<S>, D> + where + D: fmt::Display, + { + PasswordReplaced::with_replacer(self.iri_ref, move |_| alt) + } + + /// Replaces the password with the given arbitrary content. + /// + /// Note that the result might be invalid as an IRI since arbitrary string + /// can go to the place of the password. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://user:password@example.com/path?query")?; + /// let masked = iri.mask_password(); + /// + /// let replaced = masked + /// .replace_password_with(|password| format!("{{{} chars}}", password.len())); + /// assert_eq!( + /// replaced.to_string(), + /// "http://user:{8 chars}@example.com/path?query" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn replace_password_with<F, D>( + &self, + replace: F, + ) -> PasswordReplaced<'a, $borrowed<S>, D> + where + F: FnOnce(&str) -> D, + D: fmt::Display, + { + PasswordReplaced::with_replacer(self.iri_ref, replace) + } + } + + impl<S: Spec> fmt::Display for PasswordMasked<'_, $borrowed<S>> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match password_range_to_hide(self.iri_ref.as_ref()) { + Some(pw_range) => write_trim_password(f, self.iri_ref.as_str(), pw_range), + None => self.iri_ref.fmt(f), + } + } + } + + impl<S: Spec> fmt::Debug for PasswordMasked<'_, $borrowed<S>> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_char('<')?; + fmt::Display::fmt(self, f)?; + f.write_char('>') + } + } + + #[cfg(feature = "alloc")] + impl<S: Spec> ToDedicatedString for PasswordMasked<'_, $borrowed<S>> { + type Target = $owned<S>; + + fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> { + let pw_range = match password_range_to_hide(self.iri_ref.as_ref()) { + Some(pw_range) => pw_range, + None => return Ok(self.iri_ref.to_owned()), + }; + let mut s = String::new(); + let iri_ref = self.iri_ref.as_str(); + s.try_reserve(iri_ref.len() - (pw_range.end - pw_range.start))?; + s.push_str(&iri_ref[..pw_range.start]); + s.push_str(&iri_ref[pw_range.end..]); + // SAFETY: IRI remains valid and type does not change if + // the password is trimmed. + let iri = unsafe { <$owned<S>>::new_maybe_unchecked(s) }; + Ok(iri) + } + } + }; +} + +impl_mask!(RiReferenceStr, RiReferenceString); +impl_mask!(RiStr, RiString); +impl_mask!(RiAbsoluteStr, RiAbsoluteString); +impl_mask!(RiRelativeStr, RiRelativeString); + +/// A wrapper of an IRI string that replaces the non-empty password when `Display`ed. +/// +/// This is a retrun type of `mask_password` method of IRI string types (such as +/// [`RiStr::mask_password`]). +/// +/// Note that the result might be invalid as an IRI since arbitrary string can +/// go to the place of the password. +#[cfg_attr( + feature = "alloc", + doc = "Because of this, [`ToDedicatedString`] trait is not implemented for this type." +)] +/// +/// [`PasswordMasked::replace_password`]: `PasswordMasked::replace_password` +pub struct PasswordReplaced<'a, T: ?Sized, D> { + /// IRI reference. + iri_ref: &'a T, + /// Password range and alternative content. + password: Option<(Range<usize>, D)>, +} + +impl<'a, T, D> PasswordReplaced<'a, T, D> +where + T: ?Sized, + D: fmt::Display, +{ + /// Creates a new `PasswordMasked` object. + /// + /// # Precondition + /// + /// The given string must be a valid IRI reference. + #[inline] + #[must_use] + pub(crate) fn with_replacer<S, F>(iri_ref: &'a T, replace: F) -> Self + where + S: Spec, + T: AsRef<RiReferenceStr<S>>, + F: FnOnce(&str) -> D, + { + let iri_ref_asref = iri_ref.as_ref(); + let password = password_range_to_hide(iri_ref_asref) + .map(move |pw_range| (pw_range.clone(), replace(&iri_ref_asref.as_str()[pw_range]))); + Self { iri_ref, password } + } +} + +/// Implements traits for `PasswordReplaced`. +macro_rules! impl_replace { + ($borrowed:ident, $owned:ident) => { + impl<S: Spec, D: fmt::Display> fmt::Display for PasswordReplaced<'_, $borrowed<S>, D> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.password { + Some((pw_range, alt)) => { + write_with_masked_password(f, self.iri_ref.as_str(), pw_range.clone(), alt) + } + None => self.iri_ref.fmt(f), + } + } + } + + impl<S: Spec, D: fmt::Display> fmt::Debug for PasswordReplaced<'_, $borrowed<S>, D> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_char('<')?; + fmt::Display::fmt(self, f)?; + f.write_char('>') + } + } + }; +} + +impl_replace!(RiReferenceStr, RiReferenceString); +impl_replace!(RiStr, RiString); +impl_replace!(RiAbsoluteStr, RiAbsoluteString); +impl_replace!(RiRelativeStr, RiRelativeString); diff --git a/vendor/iri-string/src/normalize.rs b/vendor/iri-string/src/normalize.rs new file mode 100644 index 00000000..a00fa44a --- /dev/null +++ b/vendor/iri-string/src/normalize.rs @@ -0,0 +1,691 @@ +//! Normalization. +//! +//! # IRI normalization (and resolution) can fail +//! +//! Though this is not explicitly stated in RFC 3986, IRI normalization can fail. +//! For example, `foo:.///bar`, `foo:./..//bar`, and `foo:/..//bar` are all +//! normalized to `foo://bar` as a string. However, IRI without authority (note +//! that this is different from "with empty authority") cannot have a path +//! starting with `//`, since it is ambiguous and can be interpreted as an IRI +//! with authority. So, `foo://bar` is decomposed as scheme `foo`, authority +//! `bar`, and empty path. The expected result is the combination of scheme +//! `foo`, no authority, and path `//bar` (though this is not possible to +//! serialize), so the algorithm fails as it cannot return the intended result. +//! +//! IRI resolution can also fail since it (conditionally) invokes normalization +//! during the resolution process. For example, resolving a reference `.///bar` +//! or `/..//bar` against the base `foo:` fail. +//! +//! Thus, IRI resolution can fail for some abnormal cases. +//! +//! Note that this kind of failure can happen only when the base IRI has no +//! authority and empty path. This would be rare in the wild, since many people +//! would use an IRI with authority part, such as `http://`. +//! +//! If you are handling `scheme://`-style URIs and IRIs, don't worry about the +//! failure. Currently no cases are known to fail when at least one of the base +//! IRI or the relative IRI contains authorities. +//! +//! To know what will happen on resolution failure, see the module documentation +//! for [`resolve`][`crate::resolve`]. +//! +//! ## Examples +//! +//! ### Normalization failure +//! +//! ``` +//! # #[cfg(feature = "alloc")] { +//! use iri_string::normalize::Error; +//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; +//! +//! let base = IriAbsoluteStr::new("foo:.///bar")?; +//! assert!( +//! base.normalize().ensure_rfc3986_normalizable().is_err(), +//! "this normalization should fails without WAHTWG URL Standard serialization" +//! ); +//! # } +//! # Ok::<_, iri_string::validate::Error>(()) +//! ``` +//! +//! ### Resolution failure +//! +//! ``` +//! # #[cfg(feature = "alloc")] { +//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; +//! +//! let base = IriAbsoluteStr::new("scheme:")?; +//! { +//! let reference = IriReferenceStr::new(".///bar")?; +//! let result = reference.resolve_against(base) +//! .ensure_rfc3986_normalizable(); +//! assert!(result.is_err()); +//! } +//! +//! { +//! let reference2 = IriReferenceStr::new("/..//bar")?; +//! // Resulting string will be `scheme://bar`, but `bar` should be a path +//! // segment, not a host. So, the semantically correct target IRI cannot +//! // be represented. +//! let result2 = reference2.resolve_against(base) +//! .ensure_rfc3986_normalizable(); +//! assert!(result2.is_err()); +//! } +//! # } +//! # Ok::<_, iri_string::validate::Error>(()) +//! ``` + +mod error; +mod path; +mod pct_case; + +use core::fmt::{self, Display as _, Write as _}; +use core::marker::PhantomData; + +#[cfg(feature = "alloc")] +use alloc::collections::TryReserveError; + +use crate::components::{RiReferenceComponents, Splitter}; +#[cfg(feature = "alloc")] +use crate::format::{ToDedicatedString, ToStringFallible}; +use crate::parser::str::rfind_split_hole; +use crate::parser::trusted::is_ascii_only_host; +use crate::spec::Spec; +use crate::types::{RiAbsoluteStr, RiReferenceStr, RiStr}; +#[cfg(feature = "alloc")] +use crate::types::{RiAbsoluteString, RiString}; + +pub use self::error::Error; +pub(crate) use self::path::{Path, PathCharacteristic, PathToNormalize}; +pub(crate) use self::pct_case::{ + is_pct_case_normalized, NormalizedAsciiOnlyHost, PctCaseNormalized, +}; + +/// Normalization algorithm. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum NormalizationMode { + /// No normalization. + None, + /// Default normalization mode. + /// + /// Applies RFC 3986 normalization whenever possible. When not possible, + /// applies serialization algorithm defined in WHATWG URL standard. + Default, + /// WHATWG-like normalization mode. + /// + /// Preserves relative path as is (modulo case/pct normalization) when the + /// authority component is absent. + PreserveAuthoritylessRelativePath, +} + +impl NormalizationMode { + /// Returns true if case normalization and percent-encoding normalization should be applied. + /// + /// Note that even when this option is `true`, plain US-ASCII characters + /// won't be automatically lowered. Users should apply case normalization + /// for US-ASCII only `host` component by themselves. + #[inline] + #[must_use] + fn case_pct_normalization(self) -> bool { + match self { + Self::None => false, + Self::Default | Self::PreserveAuthoritylessRelativePath => true, + } + } +} + +/// Normalizedness check algorithm. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum NormalizednessCheckMode { + /// Default algorithm (corresponding to [`NormalizationMode::Default`]). + Default, + /// Strict RFC 3986 normalization. + Rfc3986, + /// WHATWG-like normalization algorithm (corresponding to + /// [`NormalizationMode::PreserveAuthoritylessRelativePath`]). + PreserveAuthoritylessRelativePath, +} + +/// Normalization operation. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct NormalizationOp { + /// Normalization mode. + pub(crate) mode: NormalizationMode, +} + +/// Spec-agnostic IRI normalization/resolution input. +#[derive(Debug, Clone, Copy)] +pub(crate) struct NormalizationInput<'a> { + /// Target scheme. + scheme: &'a str, + /// Target authority. + authority: Option<&'a str>, + /// Target path without dot-removal. + path: Path<'a>, + /// Target query. + query: Option<&'a str>, + /// Target fragment. + fragment: Option<&'a str>, + /// Normalization type. + op: NormalizationOp, +} + +impl<'a> NormalizationInput<'a> { + /// Creates a `NormalizedInput` from IRIs to resolve. + #[inline] + #[must_use] + pub(crate) fn with_resolution_params<S: Spec>( + base_components: &RiReferenceComponents<'a, S>, + reference: &'a RiReferenceStr<S>, + ) -> Self { + let r = RiReferenceComponents::from(reference); + + Self::create_normalization_input( + r.iri.as_str(), + &r.splitter, + base_components.iri.as_str(), + &base_components.splitter, + ) + } + + /// Creates a `NormalizationInput` from components to resolve an IRI. + #[must_use] + fn create_normalization_input( + r_iri: &'a str, + r: &Splitter, + b_iri: &'a str, + b: &Splitter, + ) -> Self { + /// The toplevel component the reference has. + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + enum RefToplevel { + /// Scheme. + Scheme, + /// Authority. + Authority, + /// Path. + Path, + /// Query. + Query, + /// Reference is empty or has only fragment. + None, + } + + impl RefToplevel { + /// Choose a component from either of the reference or the base, + /// based on the toplevel component of the reference. + #[inline] + #[must_use] + fn choose_then<T, F, G>(self, component: RefToplevel, reference: F, base: G) -> T + where + F: FnOnce() -> T, + G: FnOnce() -> T, + { + if self <= component { + reference() + } else { + base() + } + } + } + + let ref_toplevel = if r.has_scheme() { + RefToplevel::Scheme + } else if r.has_authority() { + RefToplevel::Authority + } else if !r.is_path_empty(r_iri.len()) { + RefToplevel::Path + } else if r.has_query() { + RefToplevel::Query + } else { + RefToplevel::None + }; + + let path = match ref_toplevel { + RefToplevel::Scheme | RefToplevel::Authority => { + Path::NeedsProcessing(PathToNormalize::from_single_path(r.path_str(r_iri))) + } + RefToplevel::Path => { + let r_path = r.path_str(r_iri); + if r_path.starts_with('/') { + Path::NeedsProcessing(PathToNormalize::from_single_path(r_path)) + } else { + // About this branch, see + // <https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.3>. + // + // > o If the base URI has a defined authority component and an empty + // > path, then return a string consisting of "/" concatenated with the + // > reference's path; otherwise, + let b_path = b.path_str(b_iri); + let b_path = if b.has_authority() && b_path.is_empty() { + "/" + } else { + b_path + }; + Path::NeedsProcessing(PathToNormalize::from_paths_to_be_resolved( + b_path, r_path, + )) + } + } + RefToplevel::Query | RefToplevel::None => Path::Done(b.path_str(b_iri)), + }; + + Self { + scheme: r.scheme_str(r_iri).unwrap_or_else(|| { + b.scheme_str(b_iri) + .expect("[validity] non-relative IRI must have a scheme") + }), + authority: ref_toplevel.choose_then( + RefToplevel::Authority, + || r.authority_str(r_iri), + || b.authority_str(b_iri), + ), + path, + query: ref_toplevel.choose_then( + RefToplevel::Query, + || r.query_str(r_iri), + || b.query_str(b_iri), + ), + fragment: r.fragment_str(r_iri), + op: NormalizationOp { + mode: NormalizationMode::None, + }, + } + } +} + +impl<'a, S: Spec> From<&'a RiStr<S>> for NormalizationInput<'a> { + fn from(iri: &'a RiStr<S>) -> Self { + let components = RiReferenceComponents::<S>::from(iri.as_ref()); + let (scheme, authority, path, query, fragment) = components.to_major(); + let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`"); + let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path)); + + NormalizationInput { + scheme, + authority, + path, + query, + fragment, + op: NormalizationOp { + mode: NormalizationMode::None, + }, + } + } +} + +#[cfg(feature = "alloc")] +impl<'a, S: Spec> From<&'a RiString<S>> for NormalizationInput<'a> { + #[inline] + fn from(iri: &'a RiString<S>) -> Self { + Self::from(iri.as_slice()) + } +} + +impl<'a, S: Spec> From<&'a RiAbsoluteStr<S>> for NormalizationInput<'a> { + fn from(iri: &'a RiAbsoluteStr<S>) -> Self { + let components = RiReferenceComponents::<S>::from(iri.as_ref()); + let (scheme, authority, path, query, fragment) = components.to_major(); + let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`"); + let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path)); + + NormalizationInput { + scheme, + authority, + path, + query, + fragment, + op: NormalizationOp { + mode: NormalizationMode::None, + }, + } + } +} + +#[cfg(feature = "alloc")] +impl<'a, S: Spec> From<&'a RiAbsoluteString<S>> for NormalizationInput<'a> { + #[inline] + fn from(iri: &'a RiAbsoluteString<S>) -> Self { + Self::from(iri.as_slice()) + } +} + +impl NormalizationInput<'_> { + /// Checks if the path is normalizable by RFC 3986 algorithm. + /// + /// Returns `Ok(())` when normalizable, returns `Err(_)` if not. + pub(crate) fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> { + if self.authority.is_some() { + return Ok(()); + } + match self.path { + Path::Done(_) => Ok(()), + Path::NeedsProcessing(path) => path.ensure_rfc3986_normalizable_with_authority_absent(), + } + } +} + +/// Writable as a normalized IRI. +/// +/// Note that this implicitly apply serialization rule defined by WHATWG URL +/// Standard (to handle normalization impossible by RFC 3986) because `Display` +/// should not fail by reasons other than backend I/O failure. If you make the +/// normalization fail in such cases, check if the path starts with `/./`. +/// When the normalization succeeds by RFC 3986 algorithm, the path never starts +/// with `/./`. +struct NormalizedInner<'a, S> { + /// Spec-agnostic normalization input. + input: NormalizationInput<'a>, + /// Spec. + _spec: PhantomData<fn() -> S>, +} + +impl<S: Spec> fmt::Debug for NormalizedInner<'_, S> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Normalized") + .field("input", &self.input) + .finish() + } +} + +impl<'a, S: Spec> NormalizedInner<'a, S> { + /// Creates a new `Normalized` object from the given input. + #[inline] + #[must_use] + fn from_input(input: NormalizationInput<'a>) -> Self { + Self { + input, + _spec: PhantomData, + } + } +} + +impl<S: Spec> fmt::Display for NormalizedInner<'_, S> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Write the scheme. + if self.input.op.mode.case_pct_normalization() { + normalize_scheme(f, self.input.scheme)?; + } else { + f.write_str(self.input.scheme)?; + } + f.write_str(":")?; + + // Write the authority if available. + if let Some(authority) = self.input.authority { + f.write_str("//")?; + if self.input.op.mode.case_pct_normalization() { + normalize_authority::<S>(f, authority)?; + } else { + // No case/pct normalization. + f.write_str(authority)?; + } + } + + // Process and write the path. + match self.input.path { + Path::Done(s) => { + if self.input.op.mode.case_pct_normalization() { + // Normalize the path. + PathToNormalize::from_single_path(s).fmt_write_normalize::<S, _>( + f, + self.input.op, + self.input.authority.is_some(), + )? + } else { + // No normalization. + f.write_str(s)? + } + } + Path::NeedsProcessing(path) => { + path.fmt_write_normalize::<S, _>(f, self.input.op, self.input.authority.is_some())? + } + } + + // Write the query if available. + if let Some(query) = self.input.query { + f.write_char('?')?; + if self.input.op.mode.case_pct_normalization() { + normalize_query::<S>(f, query)?; + } else { + f.write_str(query)?; + } + } + + // Write the fragment if available. + if let Some(fragment) = self.input.fragment { + f.write_char('#')?; + if self.input.op.mode.case_pct_normalization() { + normalize_fragment::<S>(f, fragment)?; + } else { + f.write_str(fragment)?; + } + } + + Ok(()) + } +} + +/// Writes the normalized scheme. +pub(crate) fn normalize_scheme(f: &mut fmt::Formatter<'_>, scheme: &str) -> fmt::Result { + // Apply case normalization. + // + // > namely, that the scheme and US-ASCII only host are case + // > insensitive and therefore should be normalized to lowercase. + // > + // > --- <https://datatracker.ietf.org/doc/html/rfc3987#section-5.3.2.1>. + // + // Note that `scheme` consists of only ASCII characters and contains + // no percent-encoded characters. + scheme + .chars() + .map(|c| c.to_ascii_lowercase()) + .try_for_each(|c| f.write_char(c)) +} + +/// Writes the normalized authority. +fn normalize_authority<S: Spec>(f: &mut fmt::Formatter<'_>, authority: &str) -> fmt::Result { + let host_port = match rfind_split_hole(authority, b'@') { + Some((userinfo, host_port)) => { + // Don't lowercase `userinfo` even if it is ASCII only. `userinfo` + // is not a part of `host`. + PctCaseNormalized::<S>::new(userinfo).fmt(f)?; + f.write_char('@')?; + host_port + } + None => authority, + }; + normalize_host_port::<S>(f, host_port) +} + +/// Writes the normalized host and port. +pub(crate) fn normalize_host_port<S: Spec>( + f: &mut fmt::Formatter<'_>, + host_port: &str, +) -> fmt::Result { + // If the suffix is a colon, it is a delimiter between the host and empty + // port. An empty port should be removed during normalization (see RFC 3986 + // section 3.2.3), so strip it. + // + // > URI producers and normalizers should omit the port component and its + // > ":" delimiter if port is empty or if its value would be the same as + // > that of the scheme's default. + // > + // > --- [RFC 3986 section 3.2.3. Port](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.3) + let host_port = host_port.strip_suffix(':').unwrap_or(host_port); + + // Apply case normalization and percent-encoding normalization to `host`. + // Optional `":" port` part only consists of an ASCII colon and ASCII + // digits, so this won't affect to the test result. + if is_ascii_only_host(host_port) { + // If the host is ASCII characters only, make plain alphabets lower case. + NormalizedAsciiOnlyHost::new(host_port).fmt(f) + } else { + PctCaseNormalized::<S>::new(host_port).fmt(f) + } +} + +/// Writes the normalized query without the '?' prefix. +pub(crate) fn normalize_query<S: Spec>(f: &mut fmt::Formatter<'_>, query: &str) -> fmt::Result { + // Apply percent-encoding normalization. + PctCaseNormalized::<S>::new(query).fmt(f) +} + +/// Writes the normalized query without the '#' prefix. +pub(crate) fn normalize_fragment<S: Spec>( + f: &mut fmt::Formatter<'_>, + fragment: &str, +) -> fmt::Result { + // Apply percent-encoding normalization. + PctCaseNormalized::<S>::new(fragment).fmt(f) +} + +/// Normalized OR resolved IRI. +/// +/// Resolved IRI can be represented by this type. In that case, the result might +/// not be normalized. If you want the IRI resolution result to be normalized, +/// use [`enable_normalization`][`Self::enable_normalization`] method. +/// +/// [`Display`]: `core::fmt::Display` +pub struct Normalized<'a, T: ?Sized> { + /// Spec-agnostic normalization input. + input: NormalizationInput<'a>, + /// Expected result type. + _ty_str: PhantomData<fn() -> T>, +} + +impl<T: ?Sized> fmt::Debug for Normalized<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Normalized") + .field("input", &self.input) + .finish() + } +} + +impl<'a, T: ?Sized> Normalized<'a, T> { + /// Creates a new `Normalized` object from the given input. + #[inline] + #[must_use] + pub(crate) fn from_input(input: NormalizationInput<'a>) -> Self { + Self { + input, + _ty_str: PhantomData, + } + } + + /// Enables the normalization. + /// + /// This lets the normalizer apply the case normalization, percent-encoding + /// normalization, and dot segments removal. + #[inline] + pub fn enable_normalization(&mut self) { + self.input.op.mode = NormalizationMode::Default; + } + + /// Enables the normalization that preserve relative path under some condition. + /// + /// Note that this normalization algorithm is not compatible with RFC 3986 + /// algorithm for some inputs. + /// + /// See [`RiStr::normalize_but_preserve_authorityless_relative_path()`] + /// for detail. + #[inline] + pub fn enable_normalization_preserving_authorityless_relative_path(&mut self) { + self.input.op.mode = NormalizationMode::PreserveAuthoritylessRelativePath; + } + + /// Returns `Self` with normalization enabled. + #[inline] + #[must_use] + pub fn and_normalize(mut self) -> Self { + self.enable_normalization(); + self + } + + /// Returns `Self` with special normalization enabled. + /// + /// Note that this normalization algorithm is not compatible with RFC 3986 + /// algorithm for some inputs. + /// + /// See [`RiStr::normalize_but_preserve_authorityless_relative_path()`] + /// for detail. + #[inline] + #[must_use] + pub fn and_normalize_but_preserve_authorityless_relative_path(mut self) -> Self { + self.enable_normalization_preserving_authorityless_relative_path(); + self + } + + /// Checks if the path is normalizable by RFC 3986 algorithm. + /// + /// Returns `Ok(())` when normalizable, returns `Err(_)` if not. + #[inline] + pub fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> { + self.input.ensure_rfc3986_normalizable() + } +} + +impl<S: Spec> fmt::Display for Normalized<'_, RiStr<S>> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + NormalizedInner::<S>::from_input(self.input).fmt(f) + } +} + +impl<S: Spec> fmt::Display for Normalized<'_, RiAbsoluteStr<S>> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + NormalizedInner::<S>::from_input(self.input).fmt(f) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> ToDedicatedString for Normalized<'_, RiStr<S>> { + type Target = RiString<S>; + + fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> { + let s = self.try_to_string()?; + Ok(TryFrom::try_from(s).expect("[validity] the normalization result must be a valid IRI")) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> From<Normalized<'_, RiStr<S>>> for RiString<S> { + #[inline] + fn from(v: Normalized<'_, RiStr<S>>) -> Self { + v.to_dedicated_string() + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> From<&Normalized<'_, RiStr<S>>> for RiString<S> { + #[inline] + fn from(v: &Normalized<'_, RiStr<S>>) -> Self { + v.to_dedicated_string() + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> ToDedicatedString for Normalized<'_, RiAbsoluteStr<S>> { + type Target = RiAbsoluteString<S>; + + fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> { + let s = self.try_to_string()?; + Ok(TryFrom::try_from(s).expect("[validity] the normalization result must be a valid IRI")) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> From<Normalized<'_, RiAbsoluteStr<S>>> for RiAbsoluteString<S> { + #[inline] + fn from(v: Normalized<'_, RiAbsoluteStr<S>>) -> Self { + v.to_dedicated_string() + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> From<&Normalized<'_, RiAbsoluteStr<S>>> for RiAbsoluteString<S> { + #[inline] + fn from(v: &Normalized<'_, RiAbsoluteStr<S>>) -> Self { + v.to_dedicated_string() + } +} diff --git a/vendor/iri-string/src/normalize/error.rs b/vendor/iri-string/src/normalize/error.rs new file mode 100644 index 00000000..a5c5c895 --- /dev/null +++ b/vendor/iri-string/src/normalize/error.rs @@ -0,0 +1,26 @@ +//! Normalization and resolution error. + +use core::fmt; + +/// IRI normalization and resolution error. +/// +/// For detail about resolution failure, see [the module documentation][`crate::resolve`]. +#[derive(Debug, Clone)] +pub struct Error(()); + +impl Error { + /// Creates a new error. + pub(crate) fn new() -> Self { + Self(()) + } +} + +impl fmt::Display for Error { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("unresolvable IRI") + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Error {} diff --git a/vendor/iri-string/src/normalize/path.rs b/vendor/iri-string/src/normalize/path.rs new file mode 100644 index 00000000..4f3e3397 --- /dev/null +++ b/vendor/iri-string/src/normalize/path.rs @@ -0,0 +1,620 @@ +//! Path normalization. + +use core::fmt; +use core::ops::Range; + +use crate::parser::str::{find_split_hole, rfind}; +use crate::spec::{Spec, UriSpec}; + +use super::pct_case::PctCaseNormalized; +use super::{Error, NormalizationMode, NormalizationOp}; + +/// Path that is (possibly) not yet processed or being processed. +#[derive(Debug, Clone, Copy)] +pub(crate) enum Path<'a> { + /// The result. No more processing is needed. + Done(&'a str), + /// Not yet completely processed path. + NeedsProcessing(PathToNormalize<'a>), +} + +/// Path that needs merge and/or dot segment removal. +/// +/// # Invariants +/// +/// If the first field (prefix field) is not `None`, it must end with a slash. +#[derive(Debug, Clone, Copy)] +pub(crate) struct PathToNormalize<'a>(Option<&'a str>, &'a str); + +impl<'a> PathToNormalize<'a> { + /// Creates a `PathToNormalize` from the given single path. + #[inline] + #[must_use] + pub(crate) fn from_single_path(path: &'a str) -> Self { + Self(None, path) + } + + /// Creates a `PathToNormalize` from the given base and reference paths to be resolved. + #[must_use] + pub(crate) fn from_paths_to_be_resolved(base: &'a str, reference: &'a str) -> Self { + if reference.starts_with('/') { + return Self(None, reference); + } + + match rfind(base.as_bytes(), b'/') { + Some(last_slash_pos) => Self(Some(&base[..=last_slash_pos]), reference), + None => Self(None, reference), + } + } + + /// Returns true if the path is empty string. + #[inline] + #[must_use] + fn is_empty(&self) -> bool { + // If `self.0` is `Some(_)`, it ends with a slash, i.e. it is not empty. + self.0.is_none() && self.1.is_empty() + } + + /// Returns the length of the not yet normalized path. + #[inline] + #[must_use] + pub(super) fn len(&self) -> usize { + self.len_prefix() + self.1.len() + } + + /// Returns the length of the prefix part. + /// + /// Returns 0 if the prefix part is empty. + #[inline] + #[must_use] + fn len_prefix(&self) -> usize { + self.0.map_or(0, |s| s.len()) + } + + /// Returns a byte at the given position. + #[must_use] + fn byte_at(&self, mut i: usize) -> Option<u8> { + if let Some(prefix) = self.0 { + if i < prefix.len() { + return Some(prefix.as_bytes()[i]); + } + i -= prefix.len(); + } + self.1.as_bytes().get(i).copied() + } + + /// Returns the position of the next slash of the byte at the given position. + #[must_use] + fn find_next_slash(&self, scan_start: usize) -> Option<usize> { + if let Some(prefix) = self.0 { + let prefix_len = prefix.len(); + if scan_start < prefix_len { + prefix[scan_start..].find('/').map(|rel| rel + scan_start) + } else { + let local_i = scan_start - prefix_len; + self.1[local_i..].find('/').map(|rel| rel + scan_start) + } + } else { + self.1[scan_start..].find('/').map(|rel| rel + scan_start) + } + } + + /// Removes the `len` characters from the beginning of `self`. + fn remove_start(&mut self, len: usize) { + if let Some(prefix) = self.0 { + if let Some(suffix_trim_len) = len.checked_sub(prefix.len()) { + self.0 = None; + self.1 = &self.1[suffix_trim_len..]; + } else { + self.0 = Some(&prefix[len..]); + } + } else { + self.1 = &self.1[len..]; + } + } + + /// Removes the prefix that are ignorable on normalization. + // Skips the prefix dot segments without leading slashes (such as `./`, + // `../`, and `../.././`). + // This is necessary because such segments should be removed with the + // FOLLOWING slashes, not leading slashes. + fn remove_ignorable_prefix(&mut self) { + while let Some(seg) = PathSegmentsIter::new(self).next() { + if seg.has_leading_slash { + // The first segment starting with a slash is not target. + break; + } + match seg.kind(self) { + SegmentKind::Dot | SegmentKind::DotDot => { + // Attempt to skip the following slash by `+ 1`. + let skip = self.len().min(seg.range.end + 1); + self.remove_start(skip); + } + SegmentKind::Normal => break, + } + } + } +} + +impl PathToNormalize<'_> { + /// Writes the normalized path. + pub(crate) fn fmt_write_normalize<S: Spec, W: fmt::Write>( + &self, + f: &mut W, + op: NormalizationOp, + authority_is_present: bool, + ) -> fmt::Result { + debug_assert!( + self.0.map_or(true, |s| s.ends_with('/')), + "[validity] the prefix field of `PathToNormalize` should end with a slash" + ); + + if self.is_empty() { + return Ok(()); + } + + if (op.mode == NormalizationMode::PreserveAuthoritylessRelativePath) + && !authority_is_present + && self.byte_at(0) != Some(b'/') + { + // Treat the path as "opaque", i.e. do not apply dot segments removal. + // See <https://github.com/lo48576/iri-string/issues/29>. + debug_assert!( + op.mode.case_pct_normalization(), + "[consistency] case/pct normalization should still be applied" + ); + if let Some(prefix) = self.0 { + write!(f, "{}", PctCaseNormalized::<S>::new(prefix))?; + } + write!(f, "{}", PctCaseNormalized::<S>::new(self.1))?; + return Ok(()); + } + + let mut rest = *self; + + // Skip the prefix dot segments without leading slashes (such as `./`, + // `../`, and `../.././`). + // This is necessary because such segments should be removed with the + // FOLLOWING slashes, not leading slashes. + rest.remove_ignorable_prefix(); + if rest.is_empty() { + // Path consists of only `/.`s and `/..`s. + // In this case, if the authority component is present, the result + // should be `/`, not empty. + if authority_is_present { + f.write_char('/')?; + } + return Ok(()); + } + + // None: No segments are written yet. + // Some(false): Something other than `/` is already written as the path. + // Some(true): Only a `/` is written as the path. + let mut only_a_slash_is_written = None; + let mut too_deep_area_may_have_dot_segments = true; + while !rest.is_empty() && too_deep_area_may_have_dot_segments { + /// The size of the queue to track the path segments. + /// + /// This should be nonzero. + const QUEUE_SIZE: usize = 8; + + { + // Skip `/.` and `/..` segments at the head. + let mut skipped_len = 0; + for seg in PathSegmentsIter::new(&rest) { + match seg.kind(&rest) { + SegmentKind::Dot | SegmentKind::DotDot => { + debug_assert!( + seg.has_leading_slash, + "[consistency] `.` or `..` segments without a + leading slash have already been skipped" + ); + skipped_len = seg.range.end; + } + _ => break, + } + } + rest.remove_start(skipped_len); + if rest.is_empty() { + // Finished with a dot segment. + // The last `/.` or `/..` should be replaced to `/`. + if !authority_is_present && (only_a_slash_is_written == Some(true)) { + // Insert a dot segment to break the prefix `//`. + // Without this, the path starts with `//` and it may + // be confused with the prefix of an authority. + f.write_str(".//")?; + } else { + f.write_char('/')?; + } + break; + } + } + + let mut queue: [Option<&'_ str>; QUEUE_SIZE] = Default::default(); + let mut level: usize = 0; + let mut first_segment_has_leading_slash = false; + + // Find higher path segments. + let mut end = 0; + for seg in PathSegmentsIter::new(&rest) { + let kind = seg.kind(&rest); + match kind { + SegmentKind::Dot => { + too_deep_area_may_have_dot_segments = true; + } + SegmentKind::DotDot => { + level = level.saturating_sub(1); + too_deep_area_may_have_dot_segments = true; + if level < queue.len() { + queue[level] = None; + } + } + SegmentKind::Normal => { + if level < queue.len() { + queue[level] = Some(seg.segment(&rest)); + too_deep_area_may_have_dot_segments = false; + end = seg.range.end; + if level == 0 { + first_segment_has_leading_slash = seg.has_leading_slash; + } + } + level += 1; + } + } + } + + // Write the path segments as possible, and update the internal state. + for segname in queue.iter().flatten() { + Self::emit_segment::<S, _>( + f, + &mut only_a_slash_is_written, + first_segment_has_leading_slash, + segname, + authority_is_present, + op, + )?; + } + + rest.remove_start(end); + } + + if !rest.is_empty() { + // No need of searching dot segments anymore. + assert!( + !too_deep_area_may_have_dot_segments, + "[consistency] loop condition of the previous loop" + ); + // Apply only normalization (if needed). + for seg in PathSegmentsIter::new(&rest) { + assert_eq!( + seg.kind(&rest), + SegmentKind::Normal, + "[consistency] already confirmed that there are no more dot segments" + ); + let segname = seg.segment(&rest); + Self::emit_segment::<S, _>( + f, + &mut only_a_slash_is_written, + seg.has_leading_slash, + segname, + authority_is_present, + op, + )?; + } + } + + Ok(()) + } + + /// Emits a non-dot segment and update the current state. + // + // `first_segment_has_leading_slash` can be any value if the segment is not the first one. + fn emit_segment<S: Spec, W: fmt::Write>( + f: &mut W, + only_a_slash_is_written: &mut Option<bool>, + first_segment_has_leading_slash: bool, + segname: &str, + authority_is_present: bool, + op: NormalizationOp, + ) -> fmt::Result { + // Omit the leading slash of the segment only if the segment is + // the first one and marked as not having a leading slash. + match *only_a_slash_is_written { + None => { + // First segment. + // This pass can be possible if `./` is repeated `QUEUE_SIZE` + // times at the beginning. + if first_segment_has_leading_slash { + f.write_char('/')?; + } + *only_a_slash_is_written = + Some(first_segment_has_leading_slash && segname.is_empty()); + } + Some(only_a_slash) => { + if only_a_slash && !authority_is_present { + // Apply serialization like WHATWG URL Standard. + // This prevents `<scheme=foo>:<path=//bar>` from written as + // `foo://bar`, which is interpreted as + // `<scheme=foo>://<authority=bar>`. Prepending `./`, the + // serialization result would be `foo:/.//bar`, which is safe. + f.write_str("./")?; + *only_a_slash_is_written = Some(false); + } + f.write_char('/')?; + } + } + + // Write the segment name. + if op.mode.case_pct_normalization() { + write!(f, "{}", PctCaseNormalized::<S>::new(segname)) + } else { + f.write_str(segname) + } + } + + /// Checks if the path is normalizable by RFC 3986 algorithm when the authority is absent. + /// + /// Returns `Ok(())` when normalizable, returns `Err(_)` if not. + pub(crate) fn ensure_rfc3986_normalizable_with_authority_absent(&self) -> Result<(), Error> { + /// A sink to get the prefix of the input. + #[derive(Default)] + struct PrefixRetriever { + /// The buffer to remember the prefix of the input. + buf: [u8; 3], + /// The next write position in the buffer. + cursor: usize, + } + impl PrefixRetriever { + /// Returns the read prefix data. + #[inline] + #[must_use] + fn as_bytes(&self) -> &[u8] { + &self.buf[..self.cursor] + } + } + impl fmt::Write for PrefixRetriever { + fn write_str(&mut self, s: &str) -> fmt::Result { + if !s.is_empty() && (self.cursor >= self.buf.len()) { + // Enough bytes are read. + return Err(fmt::Error); + } + self.buf[self.cursor..] + .iter_mut() + .zip(s.bytes()) + .for_each(|(dest, src)| *dest = src); + self.cursor = self.cursor.saturating_add(s.len()).min(self.buf.len()); + Ok(()) + } + } + + let mut prefix = PrefixRetriever::default(); + // The failure of this write indicates more than 3 characters are read. + // This is safe to ignore since the check needs only 3 characters. + let _ = self.fmt_write_normalize::<UriSpec, _>( + &mut prefix, + NormalizationOp { + mode: NormalizationMode::None, + }, + // Assume the authority is absent. + false, + ); + + if prefix.as_bytes() == b"/./" { + Err(Error::new()) + } else { + Ok(()) + } + } +} + +/// Characteristic of a path. +#[derive(Debug, Clone, Copy)] +pub(crate) enum PathCharacteristic { + /// Absolute path, not special. + CommonAbsolute, + /// Absolute path, not special. + CommonRelative, + /// The first path segment of the relative path has one or more colon characters. + RelativeFirstSegmentHasColon, + /// The path starts with the double slash. + StartsWithDoubleSlash, +} + +impl PathCharacteristic { + /// Returns true if the path is absolute. + #[inline] + #[must_use] + pub(crate) fn is_absolute(self) -> bool { + matches!(self, Self::CommonAbsolute | Self::StartsWithDoubleSlash) + } + + /// Returns the characteristic of the path. + pub(crate) fn from_path_to_display<S: Spec>( + path: &PathToNormalize<'_>, + op: NormalizationOp, + authority_is_present: bool, + ) -> Self { + /// Dummy writer to get necessary values. + #[derive(Default, Clone, Copy)] + struct Writer { + /// Result. + result: Option<PathCharacteristic>, + /// Whether the normalized path is absolute. + is_absolute: Option<bool>, + } + impl fmt::Write for Writer { + fn write_str(&mut self, mut s: &str) -> fmt::Result { + if self.result.is_some() { + // Nothing more to do. + return Err(fmt::Error); + } + while !s.is_empty() { + if self.is_absolute.is_none() { + // The first input. + match s.strip_prefix('/') { + Some(rest) => { + self.is_absolute = Some(true); + s = rest; + } + None => { + self.is_absolute = Some(false); + } + } + continue; + } + if self.is_absolute == Some(true) { + let result = if s.starts_with('/') { + PathCharacteristic::StartsWithDoubleSlash + } else { + PathCharacteristic::CommonAbsolute + }; + self.result = Some(result); + return Err(fmt::Error); + } + // Processing the first segment of the relative path. + match find_split_hole(s, b'/') { + Some((first_seg, _rest)) => { + let result = if first_seg.contains(':') { + PathCharacteristic::RelativeFirstSegmentHasColon + } else { + PathCharacteristic::CommonRelative + }; + self.result = Some(result); + return Err(fmt::Error); + } + None => { + // `s` might not be the complete first segment. + if s.contains(':') { + self.result = + Some(PathCharacteristic::RelativeFirstSegmentHasColon); + return Err(fmt::Error); + } + break; + } + } + } + Ok(()) + } + } + + let mut writer = Writer::default(); + match path.fmt_write_normalize::<S, _>(&mut writer, op, authority_is_present) { + // Empty path. + Ok(_) => PathCharacteristic::CommonRelative, + Err(_) => writer + .result + .expect("[consistency] the formatting quits early by `Err` when the check is done"), + } + } +} + +/// Path segment kind. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum SegmentKind { + /// `.` or the equivalents. + Dot, + /// `..` or the equivalents. + DotDot, + /// Other normal (not special) segments. + Normal, +} + +impl SegmentKind { + /// Creates a new `SegmentKind` from the given segment name. + #[must_use] + fn from_segment(s: &str) -> Self { + match s { + "." | "%2E" | "%2e" => SegmentKind::Dot, + ".." | ".%2E" | ".%2e" | "%2E." | "%2E%2E" | "%2E%2e" | "%2e." | "%2e%2E" + | "%2e%2e" => SegmentKind::DotDot, + _ => SegmentKind::Normal, + } + } +} + +/// A segment with optional leading slash. +#[derive(Debug, Clone)] +struct PathSegment { + /// Presence of a leading slash. + has_leading_slash: bool, + /// Range of the segment name (without any slashes). + range: Range<usize>, +} + +impl PathSegment { + /// Returns the segment without any slashes. + #[inline] + #[must_use] + fn segment<'a>(&self, path: &PathToNormalize<'a>) -> &'a str { + if let Some(prefix) = path.0 { + let prefix_len = prefix.len(); + if self.range.end <= prefix_len { + &prefix[self.range.clone()] + } else { + let range = (self.range.start - prefix_len)..(self.range.end - prefix_len); + &path.1[range] + } + } else { + &path.1[self.range.clone()] + } + } + + /// Returns the segment kind. + #[inline] + #[must_use] + fn kind(&self, path: &PathToNormalize<'_>) -> SegmentKind { + SegmentKind::from_segment(self.segment(path)) + } +} + +/// Iterator of path segments. +struct PathSegmentsIter<'a> { + /// Path. + path: &'a PathToNormalize<'a>, + /// Current cursor position. + cursor: usize, +} + +impl<'a> PathSegmentsIter<'a> { + /// Creates a new iterator of path segments. + #[inline] + #[must_use] + fn new(path: &'a PathToNormalize<'a>) -> Self { + Self { path, cursor: 0 } + } +} + +impl Iterator for PathSegmentsIter<'_> { + type Item = PathSegment; + + fn next(&mut self) -> Option<Self::Item> { + let path_len = self.path.len(); + if self.cursor >= path_len { + return None; + } + let has_leading_slash = self.path.byte_at(self.cursor) == Some(b'/'); + + let prefix_len = self.path.len_prefix(); + if (prefix_len != 0) && (self.cursor == prefix_len - 1) { + debug_assert!(has_leading_slash); + let end = self.path.1.find('/').unwrap_or(self.path.1.len()) + prefix_len; + self.cursor = end; + return Some(PathSegment { + has_leading_slash, + range: prefix_len..end, + }); + } + + if has_leading_slash { + // Skip the leading slash. + self.cursor += 1; + }; + let start = self.cursor; + self.cursor = self.path.find_next_slash(self.cursor).unwrap_or(path_len); + + Some(PathSegment { + has_leading_slash, + range: start..self.cursor, + }) + } +} diff --git a/vendor/iri-string/src/normalize/pct_case.rs b/vendor/iri-string/src/normalize/pct_case.rs new file mode 100644 index 00000000..75e0a777 --- /dev/null +++ b/vendor/iri-string/src/normalize/pct_case.rs @@ -0,0 +1,358 @@ +//! Percent-encoding normalization and case normalization. + +use core::cmp::Ordering; +use core::fmt::{self, Write as _}; +use core::marker::PhantomData; + +use crate::format::eq_str_display; +use crate::parser::char::{is_ascii_unreserved, is_unreserved, is_utf8_byte_continue}; +use crate::parser::str::{find_split_hole, take_first_char}; +use crate::parser::trusted::take_xdigits2; +use crate::spec::Spec; + +/// Returns true if the given string is percent-encoding normalized and case +/// normalized. +/// +/// Note that normalization of ASCII-only host requires additional case +/// normalization, so checking by this function is not sufficient for that case. +pub(crate) fn is_pct_case_normalized<S: Spec>(s: &str) -> bool { + eq_str_display(s, &PctCaseNormalized::<S>::new(s)) +} + +/// Returns a character for the slice. +/// +/// Essentially equivalent to `core::str::from_utf8(bytes).unwrap().and_then(|s| s.get(0))`, +/// but this function fully trusts that the input is a valid UTF-8 string with +/// only one character. +fn into_char_trusted(bytes: &[u8]) -> Result<char, ()> { + /// The bit mask to get the content part in a continue byte. + const CONTINUE_BYTE_MASK: u8 = 0b_0011_1111; + /// Minimum valid values for a code point in a UTF-8 sequence of 2, 3, and 4 bytes. + const MIN: [u32; 3] = [0x80, 0x800, 0x1_0000]; + + let len = bytes.len(); + let c: u32 = match len { + 2 => (u32::from(bytes[0] & 0b_0001_1111) << 6) | u32::from(bytes[1] & CONTINUE_BYTE_MASK), + 3 => { + (u32::from(bytes[0] & 0b_0000_1111) << 12) + | (u32::from(bytes[1] & CONTINUE_BYTE_MASK) << 6) + | u32::from(bytes[2] & CONTINUE_BYTE_MASK) + } + 4 => { + (u32::from(bytes[0] & 0b_0000_0111) << 18) + | (u32::from(bytes[1] & CONTINUE_BYTE_MASK) << 12) + | (u32::from(bytes[2] & CONTINUE_BYTE_MASK) << 6) + | u32::from(bytes[3] & CONTINUE_BYTE_MASK) + } + len => unreachable!( + "[consistency] expected 2, 3, or 4 bytes for a character, but got {len} as the length" + ), + }; + if c < MIN[len - 2] { + // Redundant UTF-8 encoding. + return Err(()); + } + // Can be an invalid Unicode code point. + char::from_u32(c).ok_or(()) +} + +/// Writable as a normalized path segment percent-encoding IRI. +/// +/// This wrapper does the things below when being formatted: +/// +/// * Decode unnecessarily percent-encoded characters. +/// * Convert alphabetic characters uppercase in percent-encoded triplets. +/// +/// Note that this does not newly encode raw characters. +/// +/// # Safety +/// +/// The given string should be the valid path segment. +#[derive(Debug, Clone, Copy)] +pub(crate) struct PctCaseNormalized<'a, S> { + /// Valid segment name to normalize. + segname: &'a str, + /// Spec. + _spec: PhantomData<fn() -> S>, +} + +impl<'a, S: Spec> PctCaseNormalized<'a, S> { + /// Creates a new `PctCaseNormalized` value. + #[inline] + #[must_use] + pub(crate) fn new(source: &'a str) -> Self { + Self { + segname: source, + _spec: PhantomData, + } + } +} + +impl<S: Spec> fmt::Display for PctCaseNormalized<'_, S> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut rest = self.segname; + + 'outer_loop: while !rest.is_empty() { + // Scan the next percent-encoded triplet. + let (prefix, after_percent) = match find_split_hole(rest, b'%') { + Some(v) => v, + None => return f.write_str(rest), + }; + // Write the string before the percent-encoded triplet. + f.write_str(prefix)?; + // Decode the percent-encoded triplet. + let (first_decoded, after_first_triplet) = take_xdigits2(after_percent); + rest = after_first_triplet; + + if first_decoded.is_ascii() { + if is_ascii_unreserved(first_decoded) { + // Unreserved. Print the decoded. + f.write_char(char::from(first_decoded))?; + } else { + write!(f, "%{:02X}", first_decoded)?; + } + continue 'outer_loop; + } + + // Continue byte cannot be the first byte of a character. + if is_utf8_byte_continue(first_decoded) { + write!(f, "%{:02X}", first_decoded)?; + continue 'outer_loop; + } + + // Get the expected length of decoded char. + let expected_char_len = match (first_decoded & 0xf0).cmp(&0b1110_0000) { + Ordering::Less => 2, + Ordering::Equal => 3, + Ordering::Greater => 4, + }; + + // Get continue bytes. + let c_buf = &mut [first_decoded, 0, 0, 0][..expected_char_len]; + for (i, buf_dest) in c_buf[1..].iter_mut().enumerate() { + match take_first_char(rest) { + Some(('%', after_percent)) => { + let (byte, after_triplet) = take_xdigits2(after_percent); + if !is_utf8_byte_continue(byte) { + // Note that `byte` can start the new string. + // Leave the byte in the `rest` for next try (i.e. + // don't update `rest` in this case). + c_buf[..=i] + .iter() + .try_for_each(|b| write!(f, "%{:02X}", b))?; + continue 'outer_loop; + } + *buf_dest = byte; + rest = after_triplet; + } + // If the next character is not `%`, decoded bytes so far + // won't be valid UTF-8 byte sequence. + // Write the read percent-encoded triplets without decoding. + // Note that all characters in `&c_buf[1..]` (if available) + // will be decoded to "continue byte" of UTF-8, so they + // cannot be the start of a valid UTF-8 byte sequence if + // decoded. + Some((c, after_percent)) => { + c_buf[..=i] + .iter() + .try_for_each(|b| write!(f, "%{:02X}", b))?; + f.write_char(c)?; + rest = after_percent; + continue 'outer_loop; + } + None => { + c_buf[..=i] + .iter() + .try_for_each(|b| write!(f, "%{:02X}", b))?; + // Reached the end of the string. + break 'outer_loop; + } + } + } + + // Decode the bytes into a character. + match into_char_trusted(&c_buf[..expected_char_len]) { + Ok(decoded_c) => { + if is_unreserved::<S>(decoded_c) { + // Unreserved. Print the decoded. + f.write_char(decoded_c)?; + } else { + c_buf[0..expected_char_len] + .iter() + .try_for_each(|b| write!(f, "%{:02X}", b))?; + } + } + Err(_) => { + // Skip decoding of the entire sequence of pct-encoded triplets loaded + // in `c_buf`. This is valid from the reasons below. + // + // * The first byte in `c_buf` is valid as the first byte, and it tells the + // expected number of bytes for a code unit. The cases the bytes being too + // short and the sequence being incomplete have already been handled, and + // the execution does not reach here then. + // * All of the non-first bytes are checked if they are valid as UTF8 continue + // bytes by `is_utf8_byte_continue()`. If they're not, the decoding of + // that codepoint is aborted and the bytes in the buffer are immediately + // emitted as pct-encoded, and the execution does not reach here. This + // means that the bytes in the current `c_buf` have passed these tests. + // * Since all of the the non-first bytes are UTF8 continue bytes, any of + // them cannot start the new valid UTF-8 byte sequence. This means that + // if the bytes in the buffer does not consitute a valid UTF-8 bytes + // sequence, the whole buffer can immediately be emmitted as pct-encoded. + + debug_assert!( + c_buf[1..expected_char_len] + .iter() + .copied() + .all(is_utf8_byte_continue), + "[consistency] all non-first bytes have been \ + confirmed that they are UTF-8 continue bytes" + ); + // Note that the first pct-encoded triplet is stripped from + // `after_first_triplet`. + rest = &after_first_triplet[((expected_char_len - 1) * 3)..]; + c_buf[0..expected_char_len] + .iter() + .try_for_each(|b| write!(f, "%{:02X}", b))?; + } + } + } + + Ok(()) + } +} + +/// Writable as a normalized ASCII-only `host` (and optionally `port` followed). +#[derive(Debug, Clone, Copy)] +pub(crate) struct NormalizedAsciiOnlyHost<'a> { + /// Valid host (and additionaly port) to normalize. + host_port: &'a str, +} + +impl<'a> NormalizedAsciiOnlyHost<'a> { + /// Creates a new `NormalizedAsciiOnlyHost` value. + /// + /// # Preconditions + /// + /// The given string should be the valid ASCII-only `host` or + /// `host ":" port` after percent-encoding normalization. + /// In other words, [`parser::trusted::is_ascii_only_host`] should return + /// true for the given value. + /// + /// [`parser::trusted::is_ascii_only_host`]: `crate::parser::trusted::is_ascii_only_host` + #[inline] + #[must_use] + pub(crate) fn new(host_port: &'a str) -> Self { + Self { host_port } + } +} + +impl fmt::Display for NormalizedAsciiOnlyHost<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut rest = self.host_port; + + while !rest.is_empty() { + // Scan the next percent-encoded triplet. + let (prefix, after_percent) = match find_split_hole(rest, b'%') { + Some(v) => v, + None => { + return rest + .chars() + .try_for_each(|c| f.write_char(c.to_ascii_lowercase())); + } + }; + // Write the string before the percent-encoded triplet. + prefix + .chars() + .try_for_each(|c| f.write_char(c.to_ascii_lowercase()))?; + // Decode the percent-encoded triplet. + let (first_decoded, after_triplet) = take_xdigits2(after_percent); + rest = after_triplet; + + assert!( + first_decoded.is_ascii(), + "[consistency] this function requires ASCII-only host as an argument" + ); + + if is_ascii_unreserved(first_decoded) { + // Unreserved. Convert to lowercase and print. + f.write_char(char::from(first_decoded.to_ascii_lowercase()))?; + } else { + write!(f, "%{:02X}", first_decoded)?; + } + } + + Ok(()) + } +} + +#[cfg(test)] +#[cfg(feature = "alloc")] +mod tests { + use super::*; + + #[cfg(all(feature = "alloc", not(feature = "std")))] + use alloc::string::ToString; + + use crate::spec::{IriSpec, UriSpec}; + + #[test] + fn invalid_utf8() { + assert_eq!( + PctCaseNormalized::<UriSpec>::new("%80%cc%cc%cc").to_string(), + "%80%CC%CC%CC" + ); + assert_eq!( + PctCaseNormalized::<IriSpec>::new("%80%cc%cc%cc").to_string(), + "%80%CC%CC%CC" + ); + } + + #[test] + fn iri_unreserved() { + assert_eq!( + PctCaseNormalized::<UriSpec>::new("%ce%b1").to_string(), + "%CE%B1" + ); + assert_eq!( + PctCaseNormalized::<IriSpec>::new("%ce%b1").to_string(), + "\u{03B1}" + ); + } + + #[test] + fn iri_middle_decode() { + assert_eq!( + PctCaseNormalized::<UriSpec>::new("%ce%ce%b1%b1").to_string(), + "%CE%CE%B1%B1" + ); + assert_eq!( + PctCaseNormalized::<IriSpec>::new("%ce%ce%b1%b1").to_string(), + "%CE\u{03B1}%B1" + ); + } + + #[test] + fn ascii_reserved() { + assert_eq!(PctCaseNormalized::<UriSpec>::new("%3f").to_string(), "%3F"); + assert_eq!(PctCaseNormalized::<IriSpec>::new("%3f").to_string(), "%3F"); + } + + #[test] + fn ascii_forbidden() { + assert_eq!( + PctCaseNormalized::<UriSpec>::new("%3c%3e").to_string(), + "%3C%3E" + ); + assert_eq!( + PctCaseNormalized::<IriSpec>::new("%3c%3e").to_string(), + "%3C%3E" + ); + } + + #[test] + fn ascii_unreserved() { + assert_eq!(PctCaseNormalized::<UriSpec>::new("%7ea").to_string(), "~a"); + assert_eq!(PctCaseNormalized::<IriSpec>::new("%7ea").to_string(), "~a"); + } +} diff --git a/vendor/iri-string/src/parser.rs b/vendor/iri-string/src/parser.rs new file mode 100644 index 00000000..35a4d475 --- /dev/null +++ b/vendor/iri-string/src/parser.rs @@ -0,0 +1,6 @@ +//! Common stuff for parsing. + +pub(crate) mod char; +pub(crate) mod str; +pub(crate) mod trusted; +pub(crate) mod validate; diff --git a/vendor/iri-string/src/parser/char.rs b/vendor/iri-string/src/parser/char.rs new file mode 100644 index 00000000..2455498e --- /dev/null +++ b/vendor/iri-string/src/parser/char.rs @@ -0,0 +1,323 @@ +//! Characters. + +use crate::spec::Spec; + +/// A mask to test whether the character is continue character of `scheme`. +// `ALPHA / DIGIT / "+" / "-" / "."` +const MASK_SCHEME_CONTINUE: u8 = 1 << 0; + +/// A mask to test whether the character matches `unreserved`. +// `unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"` +const MASK_UNRESERVED: u8 = 1 << 1; + +/// A mask to test whether the character matches `gen-delims`. +// `gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"` +const MASK_GEN_DELIMS: u8 = 1 << 2; + +/// A mask to test whether the character matches `sub-delims`. +// `sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="` +const MASK_SUB_DELIMS: u8 = 1 << 3; + +/// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes). +// `pchar = unreserved / pct-encoded / sub-delims / ":" / "@"` +const MASK_PCHAR: u8 = 1 << 4; + +/// A mask to test whether the character can appear in `query` and `fragment`. +// `query = *( pchar / "/" / "?" )` +// `fragment = *( pchar / "/" / "?" )` +const MASK_FRAG_QUERY: u8 = 1 << 5; + +/// A mask to test whether the character can appear in `userinfo` and address of `IPvFuture`. +// `userinfo = *( unreserved / pct-encoded / sub-delims / ":" )` +const MASK_USERINFO_IPVFUTUREADDR: u8 = 1 << 6; + +/// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes) or slash. +const MASK_PCHAR_SLASH: u8 = 1 << 7; + +/// ASCII characters' properties. +const TABLE: [u8; 128] = [ + 0b_0000_0000, // NUL + 0b_0000_0000, // SOH + 0b_0000_0000, // STX + 0b_0000_0000, // ETX + 0b_0000_0000, // EOT + 0b_0000_0000, // ENQ + 0b_0000_0000, // ACK + 0b_0000_0000, // BEL + 0b_0000_0000, // BS + 0b_0000_0000, // HT + 0b_0000_0000, // LF + 0b_0000_0000, // VT + 0b_0000_0000, // FF + 0b_0000_0000, // CR + 0b_0000_0000, // SO + 0b_0000_0000, // SI + 0b_0000_0000, // DLE + 0b_0000_0000, // DC1 + 0b_0000_0000, // DC2 + 0b_0000_0000, // DC3 + 0b_0000_0000, // DC4 + 0b_0000_0000, // NAK + 0b_0000_0000, // SYN + 0b_0000_0000, // ETB + 0b_0000_0000, // CAN + 0b_0000_0000, // EM + 0b_0000_0000, // SUB + 0b_0000_0000, // ESC + 0b_0000_0000, // FS + 0b_0000_0000, // GS + 0b_0000_0000, // RS + 0b_0000_0000, // US + 0b_0000_0000, // SPACE + 0b_1111_1000, // ! + 0b_0000_0000, // " + 0b_0000_0100, // # + 0b_1111_1000, // $ + 0b_0000_0000, // % + 0b_1111_1000, // & + 0b_1111_1000, // ' + 0b_1111_1000, // ( + 0b_1111_1000, // ) + 0b_1111_1000, // * + 0b_1111_1001, // + + 0b_1111_1000, // , + 0b_1111_0011, // - + 0b_1111_0011, // . + 0b_1010_0100, // / + 0b_1111_0011, // 0 + 0b_1111_0011, // 1 + 0b_1111_0011, // 2 + 0b_1111_0011, // 3 + 0b_1111_0011, // 4 + 0b_1111_0011, // 5 + 0b_1111_0011, // 6 + 0b_1111_0011, // 7 + 0b_1111_0011, // 8 + 0b_1111_0011, // 9 + 0b_1111_0100, // : + 0b_1111_1000, // ; + 0b_0000_0000, // < + 0b_1111_1000, // = + 0b_0000_0000, // > + 0b_0010_0100, // ? + 0b_1011_0100, // @ + 0b_1111_0011, // A + 0b_1111_0011, // B + 0b_1111_0011, // C + 0b_1111_0011, // D + 0b_1111_0011, // E + 0b_1111_0011, // F + 0b_1111_0011, // G + 0b_1111_0011, // H + 0b_1111_0011, // I + 0b_1111_0011, // J + 0b_1111_0011, // K + 0b_1111_0011, // L + 0b_1111_0011, // M + 0b_1111_0011, // N + 0b_1111_0011, // O + 0b_1111_0011, // P + 0b_1111_0011, // Q + 0b_1111_0011, // R + 0b_1111_0011, // S + 0b_1111_0011, // T + 0b_1111_0011, // U + 0b_1111_0011, // V + 0b_1111_0011, // W + 0b_1111_0011, // X + 0b_1111_0011, // Y + 0b_1111_0011, // Z + 0b_0000_0100, // [ + 0b_0000_0000, // \ + 0b_0000_0100, // ] + 0b_0000_0000, // ^ + 0b_1111_0010, // _ + 0b_0000_0000, // ` + 0b_1111_0011, // a + 0b_1111_0011, // b + 0b_1111_0011, // c + 0b_1111_0011, // d + 0b_1111_0011, // e + 0b_1111_0011, // f + 0b_1111_0011, // g + 0b_1111_0011, // h + 0b_1111_0011, // i + 0b_1111_0011, // j + 0b_1111_0011, // k + 0b_1111_0011, // l + 0b_1111_0011, // m + 0b_1111_0011, // n + 0b_1111_0011, // o + 0b_1111_0011, // p + 0b_1111_0011, // q + 0b_1111_0011, // r + 0b_1111_0011, // s + 0b_1111_0011, // t + 0b_1111_0011, // u + 0b_1111_0011, // v + 0b_1111_0011, // w + 0b_1111_0011, // x + 0b_1111_0011, // y + 0b_1111_0011, // z + 0b_0000_0000, // { + 0b_0000_0000, // | + 0b_0000_0000, // } + 0b_1111_0010, // ~ + 0b_0000_0000, // DEL +]; + +/// Returns `true` if the given ASCII character is allowed as continue character of `scheme` part. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_scheme_continue(c: u8) -> bool { + (TABLE[c as usize] & MASK_SCHEME_CONTINUE) != 0 +} + +/// Returns `true` if the given ASCII character matches `unreserved`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_unreserved(c: u8) -> bool { + (TABLE[c as usize] & MASK_UNRESERVED) != 0 +} + +/// Returns true if the character is unreserved. +#[inline] +#[must_use] +pub(crate) fn is_unreserved<S: Spec>(c: char) -> bool { + if c.is_ascii() { + is_ascii_unreserved(c as u8) + } else { + S::is_nonascii_char_unreserved(c) + } +} + +///// Returns `true` if the given ASCII character matches `gen-delims`. +//#[inline] +//#[must_use] +//pub(crate) const fn is_ascii_gen_delims(c: u8) -> bool { +// (TABLE[c as usize] & MASK_GEN_DELIMS) != 0 +//} + +///// Returns `true` if the given ASCII character matches `sub-delims`. +//#[inline] +//#[must_use] +//pub(crate) const fn is_ascii_sub_delims(c: u8) -> bool { +// (TABLE[c as usize] & MASK_SUB_DELIMS) != 0 +//} + +///// Returns `true` if the given ASCII character matches `reserved`. +//#[inline] +//#[must_use] +//pub(crate) const fn is_ascii_reserved(c: u8) -> bool { +// (TABLE[c as usize] & (MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0 +//} + +/// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_pchar(c: u8) -> bool { + (TABLE[c as usize] & MASK_PCHAR) != 0 +} + +/// Returns `true` if the given ASCII character is allowed to appear in `query` and `fragment`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_frag_query(c: u8) -> bool { + (TABLE[c as usize] & MASK_FRAG_QUERY) != 0 +} + +/// Returns `true` if the given non-ASCII character is allowed to appear in `iquery`. +#[inline] +#[must_use] +pub(crate) fn is_nonascii_query<S: Spec>(c: char) -> bool { + S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c) +} + +/// Returns `true` if the given non-ASCII character is allowed to appear in `ifragment`. +#[inline] +#[must_use] +pub(crate) fn is_nonascii_fragment<S: Spec>(c: char) -> bool { + S::is_nonascii_char_unreserved(c) +} + +/// Returns `true` if the given ASCII character is allowed to appear in `userinfo` and `IPvFuture`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_userinfo_ipvfutureaddr(c: u8) -> bool { + (TABLE[c as usize] & MASK_USERINFO_IPVFUTUREADDR) != 0 +} + +/// Returns `true` if the given non-ASCII character is allowed to appear in `iuserinfo`. +#[inline] +#[must_use] +pub(crate) fn is_nonascii_userinfo<S: Spec>(c: char) -> bool { + S::is_nonascii_char_unreserved(c) +} + +/// Returns `true` if the given ASCII character is allowed to appear in `reg-name` +#[inline] +#[must_use] +pub(crate) const fn is_ascii_regname(c: u8) -> bool { + (TABLE[c as usize] & (MASK_UNRESERVED | MASK_SUB_DELIMS)) != 0 +} + +/// Returns `true` if the given non-ASCII character is allowed to appear in `ireg-name`. +#[inline] +#[must_use] +pub(crate) fn is_nonascii_regname<S: Spec>(c: char) -> bool { + S::is_nonascii_char_unreserved(c) +} + +/// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded` or a slash. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_pchar_slash(c: u8) -> bool { + (TABLE[c as usize] & MASK_PCHAR_SLASH) != 0 +} + +/// Checks if the given character matches `ucschar` rule. +#[must_use] +pub(crate) fn is_ucschar(c: char) -> bool { + matches!( + u32::from(c), + 0xA0..=0xD7FF | + 0xF900..=0xFDCF | + 0xFDF0..=0xFFEF | + 0x1_0000..=0x1_FFFD | + 0x2_0000..=0x2_FFFD | + 0x3_0000..=0x3_FFFD | + 0x4_0000..=0x4_FFFD | + 0x5_0000..=0x5_FFFD | + 0x6_0000..=0x6_FFFD | + 0x7_0000..=0x7_FFFD | + 0x8_0000..=0x8_FFFD | + 0x9_0000..=0x9_FFFD | + 0xA_0000..=0xA_FFFD | + 0xB_0000..=0xB_FFFD | + 0xC_0000..=0xC_FFFD | + 0xD_0000..=0xD_FFFD | + 0xE_1000..=0xE_FFFD + ) +} + +/// Returns true if the given value is a continue byte of UTF-8. +#[inline(always)] +#[must_use] +pub(crate) fn is_utf8_byte_continue(byte: u8) -> bool { + // `0x80..=0xbf` (i.e. `0b_1000_0000..=0b_1011_1111`) is not the first byte, + // and `0xc0..=0xc1` (i.e. `0b_1100_0000..=0b_1100_0001` shouldn't appear + // anywhere in UTF-8 byte sequence. + // `0x80 as i8` is -128, and `0xc0 as i8` is -96. + // + // The first byte of the UTF-8 character is not `0b10xx_xxxx`, and + // the continue bytes is `0b10xx_xxxx`. + // `0b1011_1111 as i8` is -65, and `0b1000_0000 as i8` is -128. + (byte as i8) < -64 +} + +/// Returns true if the given ASCII character is `unreserved` or `reserved`. +#[inline] +#[must_use] +pub(crate) const fn is_ascii_unreserved_or_reserved(c: u8) -> bool { + (TABLE[c as usize] & (MASK_UNRESERVED | MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0 +} diff --git a/vendor/iri-string/src/parser/str.rs b/vendor/iri-string/src/parser/str.rs new file mode 100644 index 00000000..0f564bfa --- /dev/null +++ b/vendor/iri-string/src/parser/str.rs @@ -0,0 +1,390 @@ +//! Functions for common string operations. + +pub(crate) use self::maybe_pct_encoded::{ + process_percent_encoded_best_effort, PctEncodedFragments, +}; + +mod maybe_pct_encoded; + +/// Returns the inner string if wrapped. +#[must_use] +pub(crate) fn get_wrapped_inner(s: &str, open: u8, close: u8) -> Option<&str> { + let (prefix, suffix) = match s.as_bytes() { + [prefix, suffix] | [prefix, .., suffix] => (*prefix, *suffix), + _ => return None, + }; + if (prefix == open) && (suffix == close) { + Some(&s[1..(s.len() - 1)]) + } else { + None + } +} + +/// Returns the byte that appears first. +#[cfg(not(feature = "memchr"))] +#[inline] +#[must_use] +pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> { + haystack + .iter() + .copied() + .find(|&b| b == needle1 || b == needle2) +} + +/// Returns the byte that appears first. +#[cfg(feature = "memchr")] +#[inline] +#[must_use] +pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> { + memchr::memchr2(needle1, needle2, haystack).map(|pos| haystack[pos]) +} + +/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character. +#[cfg(not(feature = "memchr"))] +#[inline] +#[must_use] +pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> { + haystack.iter().rposition(|&b| b == needle) +} + +/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character. +#[cfg(feature = "memchr")] +#[inline] +#[must_use] +pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> { + memchr::memrchr(needle, haystack) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If `needle` is not found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If `needle` is not found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> { + memchr::memchr(needle, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) +} + +/// Finds the last needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .rposition(|b| b == needle1 || b == needle2) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the last needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + memchr::memrchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> { + memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split3( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, +) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2 || b == needle3) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and the rest. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split3( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, +) -> Option<(&str, &str)> { + memchr::memchr3(needle1, needle2, needle3, haystack.as_bytes()) + .map(|pos| haystack.split_at(pos)) +} + +/// Finds the first needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .position(|b| b == needle) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Finds the first needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + memchr::memchr(needle, haystack.as_bytes()) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split2_hole( + haystack: &str, + needle1: u8, + needle2: u8, +) -> Option<(&str, u8, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2) + .map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split2_hole( + haystack: &str, + needle1: u8, + needle2: u8, +) -> Option<(&str, u8, &str)> { + memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn find_split4_hole( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, + needle4: u8, +) -> Option<(&str, u8, &str)> { + haystack + .bytes() + .position(|b| b == needle1 || b == needle2 || b == needle3 || b == needle4) + .map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the first needle, and returns the string before it, the needle, and the string after it. +/// +/// If no needles are found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn find_split4_hole( + haystack: &str, + needle1: u8, + needle2: u8, + needle3: u8, + needle4: u8, +) -> Option<(&str, u8, &str)> { + let bytes = haystack.as_bytes(); + let pos = match memchr::memchr3(needle1, needle2, needle3, bytes) { + Some(prefix_len) => memchr::memchr(needle4, &bytes[..prefix_len]).or(Some(prefix_len)), + None => memchr::memchr(needle4, bytes), + }; + pos.map(|pos| { + ( + &haystack[..pos], + haystack.as_bytes()[pos], + &haystack[(pos + 1)..], + ) + }) +} + +/// Finds the last needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(not(feature = "memchr"))] +#[must_use] +pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + haystack + .bytes() + .rposition(|b| b == needle) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Finds the last needle, and returns the string before it and after it. +/// +/// If `needle` is not found, returns `None`. +#[cfg(feature = "memchr")] +#[must_use] +pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> { + memchr::memrchr(needle, haystack.as_bytes()) + .map(|pos| (&haystack[..pos], &haystack[(pos + 1)..])) +} + +/// Returns `true` if the string only contains the allowed characters. +#[must_use] +fn satisfy_chars<F, G>(mut s: &str, pred_ascii: F, pred_nonascii: G) -> bool +where + F: Copy + Fn(u8) -> bool, + G: Copy + Fn(char) -> bool, +{ + while !s.is_empty() { + match s.bytes().position(|b| !b.is_ascii()) { + Some(nonascii_pos) => { + // Valdiate ASCII prefix. + if nonascii_pos != 0 { + let (prefix, rest) = s.split_at(nonascii_pos); + if !prefix.bytes().all(pred_ascii) { + return false; + } + s = rest; + } + + // Extract non-ASCII part and validate it. + let (prefix, rest) = match s.bytes().position(|b| b.is_ascii()) { + Some(ascii_pos) => s.split_at(ascii_pos), + None => (s, ""), + }; + if !prefix.chars().all(pred_nonascii) { + return false; + } + s = rest; + } + None => { + // All chars are ASCII. + return s.bytes().all(pred_ascii); + } + } + } + + true +} + +/// Returns `true` if the string only contains the allowed characters and percent-encoded char. +#[must_use] +pub(crate) fn satisfy_chars_with_pct_encoded<F, G>( + mut s: &str, + pred_ascii: F, + pred_nonascii: G, +) -> bool +where + F: Copy + Fn(u8) -> bool, + G: Copy + Fn(char) -> bool, +{ + while let Some((prefix, suffix)) = find_split_hole(s, b'%') { + // Verify strings before the percent-encoded char. + if !prefix.is_empty() && !satisfy_chars(prefix, pred_ascii, pred_nonascii) { + return false; + } + + // Verify the percent-encoded char. + if !starts_with_double_hexdigits(suffix.as_bytes()) { + return false; + } + + // Advance the cursor. + s = &suffix[2..]; + } + + // Verify the rest. + satisfy_chars(s, pred_ascii, pred_nonascii) +} + +/// Returns `true` if the given string starts with two hexadecimal digits. +#[must_use] +pub(crate) fn starts_with_double_hexdigits(s: &[u8]) -> bool { + match s { + [x, y] | [x, y, ..] => x.is_ascii_hexdigit() && y.is_ascii_hexdigit(), + _ => false, + } +} + +/// Strips the first character if it is the given ASCII character, and returns the rest. +/// +/// # Precondition +/// +/// The given ASCII character (`prefix`) should be an ASCII character. +#[must_use] +pub(crate) fn strip_ascii_char_prefix(s: &str, prefix: u8) -> Option<&str> { + debug_assert!(prefix.is_ascii()); + if s.as_bytes().first().copied() == Some(prefix) { + Some(&s[1..]) + } else { + None + } +} + +/// Splits the given string into the first character and the rest. +/// +/// Returns `(first_char, rest_str)`. +#[must_use] +pub(crate) fn take_first_char(s: &str) -> Option<(char, &str)> { + let mut chars = s.chars(); + let c = chars.next()?; + let rest = chars.as_str(); + Some((c, rest)) +} diff --git a/vendor/iri-string/src/parser/str/maybe_pct_encoded.rs b/vendor/iri-string/src/parser/str/maybe_pct_encoded.rs new file mode 100644 index 00000000..617f006a --- /dev/null +++ b/vendor/iri-string/src/parser/str/maybe_pct_encoded.rs @@ -0,0 +1,369 @@ +//! Processor for possibly- or invalidly-percent-encoded strings. + +use core::fmt::{self, Write as _}; +use core::marker::PhantomData; +use core::num::NonZeroU8; +use core::ops::ControlFlow; + +use crate::parser::str::find_split; +use crate::parser::trusted::hexdigits_to_byte; + +/// Fragment in a possibly percent-encoded (and possibly broken) string. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum PctEncodedFragments<'a> { + /// String fragment without percent-encoded triplets. + NoPctStr(&'a str), + /// Stray `%` (percent) character. + StrayPercent, + /// Valid percent-encoded triplets for a character. + Char(&'a str, char), + /// Percent-encoded triplets that does not consists of a valid UTF-8 sequence. + InvalidUtf8PctTriplets(&'a str), +} + +/// Processes characters in a string which may contain (possibly invalid) percent-encoded triplets. +pub(crate) fn process_percent_encoded_best_effort<T, F, B>( + v: T, + mut f: F, +) -> Result<ControlFlow<B>, fmt::Error> +where + T: fmt::Display, + F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>, +{ + let mut buf = [0_u8; 12]; + let mut writer = DecomposeWriter { + f: &mut f, + decoder: Default::default(), + buf: &mut buf, + result: ControlFlow::Continue(()), + _r: PhantomData, + }; + + if write!(writer, "{v}").is_err() { + match writer.result { + ControlFlow::Continue(_) => return Err(fmt::Error), + ControlFlow::Break(v) => return Ok(ControlFlow::Break(v)), + } + } + + // Flush the internal buffer of the decoder. + if let Some(len) = writer.decoder.flush(&mut buf).map(|v| usize::from(v.get())) { + let len_suffix = len % 3; + let triplets_end = len - len_suffix; + let triplets = core::str::from_utf8(&buf[..triplets_end]) + .expect("[validity] percent-encoded triplets consist of ASCII characters"); + if let ControlFlow::Break(v) = f(PctEncodedFragments::InvalidUtf8PctTriplets(triplets)) { + return Ok(ControlFlow::Break(v)); + } + + if len_suffix > 0 { + if let ControlFlow::Break(v) = f(PctEncodedFragments::StrayPercent) { + return Ok(ControlFlow::Break(v)); + } + } + if len_suffix > 1 { + let after_percent = core::str::from_utf8( + &buf[(triplets_end + 1)..(triplets_end + len_suffix)], + ) + .expect("[consistency] percent-encoded triplets contains only ASCII characters"); + if let ControlFlow::Break(v) = f(PctEncodedFragments::NoPctStr(after_percent)) { + return Ok(ControlFlow::Break(v)); + } + } + } + + Ok(ControlFlow::Continue(())) +} + +/// Writer to decompose the input into fragments. +struct DecomposeWriter<'a, F, B> { + /// Output function. + f: &'a mut F, + /// Decoder. + decoder: DecoderBuffer, + /// Buffer. + buf: &'a mut [u8], + /// Result of the last output function call. + result: ControlFlow<B>, + /// Dummy field for the type parameter of the return type of the function `f`. + _r: PhantomData<fn() -> B>, +} +impl<F, B> DecomposeWriter<'_, F, B> +where + F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>, +{ + /// Returns `Ok(_)` if the stored result is `Continue`, and `Err(_)` otherwise. + #[inline(always)] + fn result_continue_or_err(&self) -> fmt::Result { + if self.result.is_break() { + return Err(fmt::Error); + } + Ok(()) + } + + /// Calls the output functions with the undecodable fragments. + fn output_as_undecodable(&mut self, len_undecodable: u8) -> fmt::Result { + let len_written = usize::from(len_undecodable); + let frag = core::str::from_utf8(&self.buf[..len_written]) + .expect("[validity] `DecoderBuffer` writes a valid ASCII string"); + let len_incomplete = len_written % 3; + let len_complete = len_written - len_incomplete; + self.result = (self.f)(PctEncodedFragments::InvalidUtf8PctTriplets( + &frag[..len_complete], + )); + self.result_continue_or_err()?; + if len_incomplete > 0 { + // At least the first `%` exists. + self.result = (self.f)(PctEncodedFragments::StrayPercent); + if self.result.is_break() { + return Err(fmt::Error); + } + if len_incomplete > 1 { + // A following hexdigit is available. + debug_assert_eq!( + len_incomplete, 2, + "[consistency] the length of incomplete percent-encoded \ + triplet must be less than 2 bytes" + ); + self.result = (self.f)(PctEncodedFragments::NoPctStr( + &frag[(len_complete + 1)..len_written], + )); + self.result_continue_or_err()?; + } + } + Ok(()) + } +} + +impl<F, B> fmt::Write for DecomposeWriter<'_, F, B> +where + F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>, +{ + fn write_str(&mut self, s: &str) -> fmt::Result { + self.result_continue_or_err()?; + let mut rest = s; + while !rest.is_empty() { + let (len_consumed, result) = self.decoder.push_encoded(self.buf, rest); + if len_consumed == 0 { + // `rest` does not start with the percent-encoded triplets. + // Flush the decoder before attempting to decode more data. + if let Some(len_written) = self.decoder.flush(self.buf).map(NonZeroU8::get) { + self.output_as_undecodable(len_written)?; + rest = &rest[usize::from(len_written)..]; + } + + // Write plain string prefix (if found). + let (plain_prefix, suffix) = find_split(rest, b'%').unwrap_or((rest, "")); + debug_assert!( + !plain_prefix.is_empty(), + "[consistency] `len_consumed == 0` indicates non-empty \ + `rest` not starting with `%`" + ); + self.result = (self.f)(PctEncodedFragments::NoPctStr(plain_prefix)); + self.result_continue_or_err()?; + rest = suffix; + continue; + } + + // Process decoding result. + match result { + PushResult::Decoded(len_written, c) => { + let len_written = usize::from(len_written.get()); + let frag = core::str::from_utf8(&self.buf[..len_written]) + .expect("[validity] `DecoderBuffer` writes a valid ASCII string"); + self.result = (self.f)(PctEncodedFragments::Char(frag, c)); + self.result_continue_or_err()?; + } + PushResult::Undecodable(len_written) => { + self.output_as_undecodable(len_written)?; + } + PushResult::NeedMoreBytes => { + // Nothing to write at this time. + } + } + rest = &rest[len_consumed..]; + } + Ok(()) + } +} + +/// A type for result of feeding data to [`DecoderBuffer`]. +#[derive(Debug, Clone, Copy)] +enum PushResult { + /// Input is still incomplete, needs more bytes to get the decoding result. + NeedMoreBytes, + /// Bytes decodable to valid UTF-8 sequence. + // `.0`: Length of decodable fragment. + // `.1`: Decoded character. + Decoded(NonZeroU8, char), + /// Valid percent-encoded triplets but not decodable to valid UTF-8 sequence. + // `.0`: Length of undecodable fragment. + Undecodable(u8), +} + +/// Buffer to contain (and to decode) incomplete percent-encoded triplets. +#[derive(Default, Debug, Clone, Copy)] +struct DecoderBuffer { + /// Percent-encoded triplets that possibly consists a valid UTF-8 sequence after decoded. + // + // `3 * 4`: 3 ASCII characters for single percent-encoded triplet, and + // 4 triplets at most for single Unicode codepoint in UTF-8. + encoded: [u8; 12], + /// Decoded bytes. + decoded: [u8; 4], + /// Number of bytes available in `buf_encoded` buffer. + /// + /// `buf_encoded_len / 3` also indicates the length of data in `decoded`. + len_encoded: u8, +} + +impl DecoderBuffer { + /// Writes the data of the given length to the destination, and remove that part from buffer. + fn write_and_pop(&mut self, dest: &mut [u8], remove_len: u8) { + let new_len = self.len_encoded - remove_len; + let remove_len = usize::from(remove_len); + let src_range = remove_len..usize::from(self.len_encoded); + dest[..remove_len].copy_from_slice(&self.encoded[..remove_len]); + + if new_len == 0 { + *self = Self::default(); + return; + } + self.encoded.copy_within(src_range, 0); + self.decoded + .copy_within((remove_len / 3)..usize::from(self.len_encoded / 3), 0); + self.len_encoded = new_len; + } + + /// Pushes a byte of a (possible) percent-encoded tripet to the buffer. + fn push_single_encoded_byte(&mut self, byte: u8) { + debug_assert!( + self.len_encoded < 12, + "[consistency] four percent-encoded triplets are enough for a unicode code point" + ); + let pos_enc = usize::from(self.len_encoded); + self.len_encoded += 1; + self.encoded[pos_enc] = byte; + if self.len_encoded % 3 == 0 { + // A new percent-encoded triplet is read. Decode and remember. + let pos_dec = usize::from(self.len_encoded / 3 - 1); + let upper = self.encoded[pos_enc - 1]; + let lower = byte; + debug_assert!( + upper.is_ascii_hexdigit() && lower.is_ascii_hexdigit(), + "[consistency] the `encoded` buffer should contain valid percent-encoded triplets" + ); + self.decoded[pos_dec] = hexdigits_to_byte([upper, lower]); + } + } + + /// Pushes the (possibly) encoded string to the buffer. + /// + /// When the push result is not `PctTripletPushResult::NeedMoreBytes`, the + /// caller should call `Self::clear()` before pushing more bytes. + /// + /// # Preconditions + /// + /// * `buf` should be more than 12 bytes. If not, this method may panic. + #[must_use] + pub(crate) fn push_encoded(&mut self, buf: &mut [u8], s: &str) -> (usize, PushResult) { + debug_assert!( + buf.len() >= 12, + "[internal precondition] destination buffer should be at least 12 bytes" + ); + let mut chars = s.chars(); + let mut len_triplet_incomplete = self.len_encoded % 3; + for c in &mut chars { + if len_triplet_incomplete == 0 { + // Expect `%`. + if c != '%' { + // Undecodable. + // `-1`: the last byte is peeked but not consumed. + let len_consumed = s.len() - chars.as_str().len() - 1; + let len_result = self.len_encoded; + self.write_and_pop(buf, len_result); + return (len_consumed, PushResult::Undecodable(len_result)); + } + self.push_single_encoded_byte(b'%'); + len_triplet_incomplete = 1; + continue; + } + + // Expect a nibble. + if !c.is_ascii_hexdigit() { + // Undecodable. + // `-1`: the last byte is peeked but not consumed. + let len_consumed = s.len() - chars.as_str().len() - 1; + let len_result = self.len_encoded; + self.write_and_pop(buf, len_result); + return (len_consumed, PushResult::Undecodable(len_result)); + } + self.push_single_encoded_byte(c as u8); + if len_triplet_incomplete == 1 { + len_triplet_incomplete = 2; + continue; + } else { + // Now a new percent-encoded triplet is read! + debug_assert_eq!(len_triplet_incomplete, 2); + len_triplet_incomplete = 0; + } + + // Now a new percent-encoded triplet is read. + // Check if the buffer contains a valid decodable content. + let len_decoded = usize::from(self.len_encoded) / 3; + match core::str::from_utf8(&self.decoded[..len_decoded]) { + Ok(decoded_str) => { + // Successfully decoded. + let len_consumed = s.len() - chars.as_str().len(); + let c = decoded_str + .chars() + .next() + .expect("[validity] `decoded` buffer is nonempty"); + let len_result = NonZeroU8::new(self.len_encoded).expect( + "[consistency] `encoded` buffer is nonempty since \ + `push_single_encoded_byte()` was called", + ); + self.write_and_pop(buf, len_result.get()); + return (len_consumed, PushResult::Decoded(len_result, c)); + } + Err(e) => { + // Undecodable. + assert_eq!( + e.valid_up_to(), + 0, + "[consistency] `decoded` buffer contains at most one character" + ); + let skip_len_decoded = match e.error_len() { + // Unexpected EOF. Wait for remaining input. + None => continue, + // Skip invalid bytes. + Some(v) => v, + }; + let len_consumed = s.len() - chars.as_str().len(); + let len_result = skip_len_decoded as u8 * 3; + assert_ne!( + skip_len_decoded, 0, + "[consistency] empty bytes cannot be invalid" + ); + self.write_and_pop(buf, len_result); + return (len_consumed, PushResult::Undecodable(len_result)); + } + }; + } + let len_consumed = s.len() - chars.as_str().len(); + (len_consumed, PushResult::NeedMoreBytes) + } + + /// Writes the incomplete data completely to the destination, and clears the internal buffer. + #[must_use] + pub(crate) fn flush(&mut self, buf: &mut [u8]) -> Option<NonZeroU8> { + let len_result = NonZeroU8::new(self.len_encoded)?; + // Emit the current (undecodable) buffer as is. + self.write_and_pop(buf, len_result.get()); + debug_assert_eq!( + self.len_encoded, 0, + "[consistency] the buffer should be cleared after flushed" + ); + Some(len_result) + } +} diff --git a/vendor/iri-string/src/parser/trusted.rs b/vendor/iri-string/src/parser/trusted.rs new file mode 100644 index 00000000..f15c075e --- /dev/null +++ b/vendor/iri-string/src/parser/trusted.rs @@ -0,0 +1,476 @@ +//! Fast parsers for trusted (already validated) input. +//! +//! Using this in wrong way will lead to unexpected wrong result. + +pub(crate) mod authority; + +use core::cmp::Ordering; +use core::num::NonZeroUsize; + +use crate::components::{RiReferenceComponents, Splitter}; +use crate::format::eq_str_display; +use crate::normalize::{is_pct_case_normalized, NormalizedAsciiOnlyHost, NormalizednessCheckMode}; +use crate::parser::str::{find_split2, find_split3, find_split4_hole, find_split_hole}; +use crate::spec::Spec; +use crate::types::RiReferenceStr; + +/// Eats a `scheme` and a following colon, and returns the rest and the scheme. +/// +/// Returns `(rest, scheme)`. +/// +/// This should be called at the head of an absolute IRIs/URIs. +#[must_use] +fn scheme_colon(i: &str) -> (&str, &str) { + let (scheme, rest) = + find_split_hole(i, b':').expect("[precondition] absolute IRIs must have `scheme` part"); + (rest, scheme) +} + +/// Eats a `scheme` and a following colon if available, and returns the rest and the scheme. +/// +/// This should be called at the head of an `IRI-reference` or similar. +#[must_use] +fn scheme_colon_opt(i: &str) -> (&str, Option<&str>) { + match find_split4_hole(i, b':', b'/', b'?', b'#') { + Some((scheme, b':', rest)) => (rest, Some(scheme)), + _ => (i, None), + } +} + +/// Eats double slash and the following authority if available, and returns the authority. +/// +/// This should be called at the head of an `IRI-reference`, or at the result of `scheme_colon`. +#[must_use] +fn slash_slash_authority_opt(i: &str) -> (&str, Option<&str>) { + let s = match i.strip_prefix("//") { + Some(rest) => rest, + None => return (i, None), + }; + // `i` might match `path-abempty` (which can start with `//`), but it is not + // allowed as `relative-part`, so no need to care `path-abempty` rule here. + // A slash, question mark, and hash character won't appear in `authority`. + match find_split3(s, b'/', b'?', b'#') { + Some((authority, rest)) => (rest, Some(authority)), + None => ("", Some(s)), + } +} + +/// Eats a string until the query, and returns that part (excluding `?` for the query). +#[must_use] +fn until_query(i: &str) -> (&str, &str) { + // `?` won't appear before the query part. + match find_split2(i, b'?', b'#') { + Some((before_query, rest)) => (rest, before_query), + None => ("", i), + } +} + +/// Decomposes query and fragment, if available. +/// +/// The string must starts with `?`, or `#`, or be empty. +#[must_use] +fn decompose_query_and_fragment(i: &str) -> (Option<&str>, Option<&str>) { + match i.as_bytes().first().copied() { + None => (None, None), + Some(b'?') => { + let rest = &i[1..]; + match find_split_hole(rest, b'#') { + Some((query, fragment)) => (Some(query), Some(fragment)), + None => (Some(rest), None), + } + } + Some(c) => { + debug_assert_eq!(c, b'#'); + (None, Some(&i[1..])) + } + } +} + +/// Decomposes the given valid `IRI-reference`. +#[must_use] +pub(crate) fn decompose_iri_reference<S: Spec>( + i: &RiReferenceStr<S>, +) -> RiReferenceComponents<'_, S> { + /// Inner function to avoid unnecessary monomorphizations on `S`. + fn decompose(i: &str) -> Splitter { + let len = i.len(); + + let (i, scheme_end) = { + let (i, scheme) = scheme_colon_opt(i); + let end = scheme.and_then(|s| NonZeroUsize::new(s.len())); + (i, end) + }; + let (i, authority_end) = { + // 2: "//".len() + let start = len - i.len() + 2; + // `authority` does not contain the two slashes of `://'. + let (i, authority) = slash_slash_authority_opt(i); + let end = authority.and_then(|s| NonZeroUsize::new(start + s.len())); + (i, end) + }; + let (i, _path) = until_query(i); + + let (query_start, fragment_start) = { + // This could theoretically be zero if `len` is `usize::MAX` and + // `i` has neither a query nor a fragment. However, this is + // practically impossible. + let after_first_prefix = NonZeroUsize::new((len - i.len()).wrapping_add(1)); + + let (query, fragment) = decompose_query_and_fragment(i); + match (query.is_some(), fragment) { + (true, Some(fragment)) => { + (after_first_prefix, NonZeroUsize::new(len - fragment.len())) + } + (true, None) => (after_first_prefix, None), + (false, Some(_fragment)) => (None, after_first_prefix), + (false, None) => (None, None), + } + }; + + Splitter::new(scheme_end, authority_end, query_start, fragment_start) + } + + RiReferenceComponents { + iri: i, + splitter: decompose(i.as_str()), + } +} + +/// Extracts `scheme` part from an IRI reference. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn extract_scheme(i: &str) -> Option<&str> { + scheme_colon_opt(i).1 +} + +/// Extracts `scheme` part from an absolute IRI. +/// +/// # Precondition +/// +/// The given string must be a valid absolute IRI. +#[inline] +#[must_use] +pub(crate) fn extract_scheme_absolute(i: &str) -> &str { + scheme_colon(i).1 +} + +/// Extracts `authority` part from an IRI reference. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn extract_authority(i: &str) -> Option<&str> { + let (i, _scheme) = scheme_colon_opt(i); + slash_slash_authority_opt(i).1 +} + +/// Extracts `authority` part from an absolute IRI. +/// +/// # Precondition +/// +/// The given string must be a valid absolute IRI. +#[inline] +#[must_use] +pub(crate) fn extract_authority_absolute(i: &str) -> Option<&str> { + let (i, _scheme) = scheme_colon(i); + slash_slash_authority_opt(i).1 +} + +/// Extracts `authority` part from a relative IRI. +/// +/// # Precondition +/// +/// The given string must be a valid relative IRI. +#[inline] +#[must_use] +pub(crate) fn extract_authority_relative(i: &str) -> Option<&str> { + slash_slash_authority_opt(i).1 +} + +/// Extracts `path` part from an IRI reference. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn extract_path(i: &str) -> &str { + let (i, _scheme) = scheme_colon_opt(i); + let (i, _authority) = slash_slash_authority_opt(i); + until_query(i).1 +} + +/// Extracts `path` part from an absolute IRI. +/// +/// # Precondition +/// +/// The given string must be a valid absolute IRI. +#[inline] +#[must_use] +pub(crate) fn extract_path_absolute(i: &str) -> &str { + let (i, _scheme) = scheme_colon(i); + let (i, _authority) = slash_slash_authority_opt(i); + until_query(i).1 +} + +/// Extracts `path` part from a relative IRI. +/// +/// # Precondition +/// +/// The given string must be a valid relative IRI. +#[inline] +#[must_use] +pub(crate) fn extract_path_relative(i: &str) -> &str { + let (i, _authority) = slash_slash_authority_opt(i); + until_query(i).1 +} + +/// Extracts `query` part from an IRI reference. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn extract_query(i: &str) -> Option<&str> { + let (i, _before_query) = until_query(i); + decompose_query_and_fragment(i).0 +} + +/// Extracts `query` part from an `absolute-IRI` string. +/// +/// # Precondition +/// +/// The given string must be a valid `absolute-IRI` string. +#[must_use] +pub(crate) fn extract_query_absolute_iri(i: &str) -> Option<&str> { + let (i, _before_query) = until_query(i); + if i.is_empty() { + None + } else { + debug_assert_eq!( + i.as_bytes().first(), + Some(&b'?'), + "`absolute-IRI` string must not have `fragment part" + ); + Some(&i[1..]) + } +} + +/// Splits an IRI string into the prefix and the fragment part. +/// +/// A leading `#` character is truncated if the fragment part exists. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn split_fragment(iri: &str) -> (&str, Option<&str>) { + // It is completely OK to find the first `#` character from valid IRI to get fragment part, + // because the spec says that there are no `#` characters before the fragment part. + // + // > ``` + // > scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + // > ``` + // > + // > --- [RFC 3986, section 3.1. Scheme](https://tools.ietf.org/html/rfc3986#section-3.1) + // + // > The authority component is preceded by a double slash ("//") and is terminated by the + // > next slash ("/"), question mark ("?"), or number sign ("#") character, or by the end + // > of the URI. + // > + // > --- [RFC 3986, section 3.2. Authority](https://tools.ietf.org/html/rfc3986#section-3.2) + // + // > The path is terminated by the first question mark ("?") or number sign ("#") + // > character, or by the end of the URI. + // > + // > --- [RFC 3986, section 3.3. Path](https://tools.ietf.org/html/rfc3986#section-3.3) + // + // > The query component is indicated by the first question mark ("?") character and + // > terminated by a number sign ("#") character or by the end of the URI. + // > + // > --- [RFC 3986, section 3.4. Query](https://tools.ietf.org/html/rfc3986#section-3.4) + match find_split_hole(iri, b'#') { + Some((prefix, fragment)) => (prefix, Some(fragment)), + None => (iri, None), + } +} + +/// Returns the fragment part of the given IRI. +/// +/// A leading `#` character of the fragment is truncated. +#[inline] +#[must_use] +pub(crate) fn extract_fragment(iri: &str) -> Option<&str> { + split_fragment(iri).1 +} + +/// Returns `Ok(_)` if the string is normalized. +/// +/// If this function returns `true`, normalization input and output will be identical. +/// +/// In this function, "normalized" means that any of the normalization below +/// won't change the input on normalization: +/// +/// * syntax-based normalization, +/// * case normalization, +/// * percent-encoding normalization, and +/// * path segment normalizaiton. +/// +/// Note that scheme-based normalization is not considered. +#[must_use] +pub(crate) fn is_normalized<S: Spec>(i: &str, mode: NormalizednessCheckMode) -> bool { + let (i, scheme) = scheme_colon(i); + let (after_authority, authority) = slash_slash_authority_opt(i); + let (_after_path, path) = until_query(after_authority); + + // Syntax-based normalization: uppercase chars in `scheme` should be + // converted to lowercase. + if scheme.bytes().any(|b| b.is_ascii_uppercase()) { + return false; + } + + // Case normalization: ASCII alphabets in US-ASCII only `host` should be + // normalized to lowercase. + // Case normalization: ASCII alphabets in percent-encoding triplet should be + // normalized to uppercase. + // Percent-encoding normalization: unresreved characters should be decoded + // in `userinfo`, `host`, `path`, `query`, and `fragments`. + // Path segment normalization: the path should not have dot segments (`.` + // and/or `..`). + // + // Note that `authority` can have percent-encoded `userinfo`. + if let Some(authority) = authority { + let authority_components = authority::decompose_authority(authority); + + // Check `host`. + let host = authority_components.host(); + let host_is_normalized = if is_ascii_only_host(host) { + eq_str_display(host, &NormalizedAsciiOnlyHost::new(host)) + } else { + // If the host is not ASCII-only, conversion to lowercase is not performed. + is_pct_case_normalized::<S>(host) + }; + if !host_is_normalized { + return false; + } + + // Check pencent encodings in `userinfo`. + if let Some(userinfo) = authority_components.userinfo() { + if !is_pct_case_normalized::<S>(userinfo) { + return false; + } + } + } + + // Check `path`. + // + // Syntax-based normalization: Dot segments might be removed. + // Note that we don't have to care `%2e` and `%2E` since `.` is unreserved + // and they will be decoded if not normalized. + // Also note that WHATWG serialization will use `/.//` as a path prefix if + // the path is absolute and won't modify the path if the path is relative. + // + // Percent-encoding normalization: unresreved characters should be decoded + // in `path`, `query`, and `fragments`. + let path_span_no_dot_segments = if authority.is_some() { + Some(path) + } else { + match mode { + NormalizednessCheckMode::Default => Some(path.strip_prefix("/.//").unwrap_or(path)), + NormalizednessCheckMode::Rfc3986 => Some(path), + NormalizednessCheckMode::PreserveAuthoritylessRelativePath => { + if path.starts_with('/') { + // Absolute. + Some(path.strip_prefix("/.//").unwrap_or(path)) + } else { + // Relative. Treat the path as "opaque". No span to check. + None + } + } + } + }; + if let Some(path_span_no_dot_segments) = path_span_no_dot_segments { + if path_span_no_dot_segments + .split('/') + .any(|segment| matches!(segment, "." | "..")) + { + return false; + } + } + is_pct_case_normalized::<S>(after_authority) +} + +/// Decodes two hexdigits into a byte. +/// +/// # Preconditions +/// +/// The parameters `upper` and `lower` should be an ASCII hexadecimal digit. +#[must_use] +pub(super) fn hexdigits_to_byte([upper, lower]: [u8; 2]) -> u8 { + let i_upper = match (upper & 0xf0).cmp(&0x40) { + Ordering::Less => upper - b'0', + Ordering::Equal => upper - (b'A' - 10), + Ordering::Greater => upper - (b'a' - 10), + }; + let i_lower = match (lower & 0xf0).cmp(&0x40) { + Ordering::Less => lower - b'0', + Ordering::Equal => lower - (b'A' - 10), + Ordering::Greater => lower - (b'a' - 10), + }; + (i_upper << 4) + i_lower +} + +/// Converts the first two hexdigit bytes in the buffer into a byte. +/// +/// # Panics +/// +/// Panics if the string does not start with two hexdigits. +#[must_use] +pub(crate) fn take_xdigits2(s: &str) -> (u8, &str) { + let mut bytes = s.bytes(); + let upper_xdigit = bytes + .next() + .expect("[validity] at least two bytes should follow the `%` in a valid IRI reference"); + let lower_xdigit = bytes + .next() + .expect("[validity] at least two bytes should follow the `%` in a valid IRI reference"); + let v = hexdigits_to_byte([upper_xdigit, lower_xdigit]); + (v, &s[2..]) +} + +/// Returns true if the given `host`/`ihost` string consists of only US-ASCII characters. +/// +/// # Precondition +/// +/// The given string should be valid `host` or `host ":" port` string. +#[must_use] +pub(crate) fn is_ascii_only_host(mut host: &str) -> bool { + while let Some((i, c)) = host + .char_indices() + .find(|(_i, c)| !c.is_ascii() || *c == '%') + { + if c != '%' { + // Non-ASCII character found. + debug_assert!(!c.is_ascii()); + return false; + } + // Percent-encoded character found. + let after_pct = &host[(i + 1)..]; + let (byte, rest) = take_xdigits2(after_pct); + if !byte.is_ascii() { + return false; + } + host = rest; + } + + // Neither non-ASCII characters nor percent-encoded characters found. + true +} diff --git a/vendor/iri-string/src/parser/trusted/authority.rs b/vendor/iri-string/src/parser/trusted/authority.rs new file mode 100644 index 00000000..83e41298 --- /dev/null +++ b/vendor/iri-string/src/parser/trusted/authority.rs @@ -0,0 +1,32 @@ +//! Parsers for trusted `authority` string. + +use crate::components::AuthorityComponents; +use crate::parser::str::{find_split_hole, rfind_split2}; + +/// Decomposes the authority into `(userinfo, host, port)`. +/// +/// The leading `:` is truncated. +/// +/// # Precondition +/// +/// The given string must be a valid IRI reference. +#[inline] +#[must_use] +pub(crate) fn decompose_authority(authority: &str) -> AuthorityComponents<'_> { + let i = authority; + let (i, host_start) = match find_split_hole(i, b'@') { + Some((userinfo, rest)) => (rest, userinfo.len() + 1), + None => (authority, 0), + }; + let colon_port_len = match rfind_split2(i, b':', b']') { + Some((_, suffix)) if suffix.starts_with(':') => suffix.len(), + _ => 0, + }; + let host_end = authority.len() - colon_port_len; + + AuthorityComponents { + authority, + host_start, + host_end, + } +} diff --git a/vendor/iri-string/src/parser/validate.rs b/vendor/iri-string/src/parser/validate.rs new file mode 100644 index 00000000..59625394 --- /dev/null +++ b/vendor/iri-string/src/parser/validate.rs @@ -0,0 +1,225 @@ +//! Validating parsers for non-trusted (possibly invalid) input. + +mod authority; +mod path; + +use crate::parser::char; +use crate::parser::str::{ + find_split, find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded, +}; +use crate::spec::Spec; +use crate::validate::Error; + +use self::authority::validate_authority; +pub(crate) use self::authority::{validate_host, validate_userinfo}; +pub(crate) use self::path::validate_path; +use self::path::{ + validate_path_abempty, validate_path_absolute_authority_absent, + validate_path_relative_authority_absent, +}; + +/// Returns `Ok(_)` if the string matches `scheme`. +pub(crate) fn validate_scheme(i: &str) -> Result<(), Error> { + debug_assert!(!i.is_empty()); + let bytes = i.as_bytes(); + if bytes[0].is_ascii_alphabetic() + && bytes[1..] + .iter() + .all(|&b| b.is_ascii() && char::is_ascii_scheme_continue(b)) + { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `query` or `iquery`. +pub(crate) fn validate_query<S: Spec>(i: &str) -> Result<(), Error> { + let is_valid = + satisfy_chars_with_pct_encoded(i, char::is_ascii_frag_query, char::is_nonascii_query::<S>); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `authority path-abempty` rule sequence. +fn validate_authority_path_abempty<S: Spec>(i: &str) -> Result<(), Error> { + let (maybe_authority, maybe_path) = match find_split(i, b'/') { + Some(v) => v, + None => (i, ""), + }; + validate_authority::<S>(maybe_authority)?; + validate_path_abempty::<S>(maybe_path) +} + +/// Returns `Ok(_)` if the string matches `URI`/`IRI` rules. +#[inline] +pub(crate) fn validate_uri<S: Spec>(i: &str) -> Result<(), Error> { + validate_uri_reference_common::<S>(i, UriReferenceRule::Absolute) +} + +/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules. +#[inline] +pub(crate) fn validate_uri_reference<S: Spec>(i: &str) -> Result<(), Error> { + validate_uri_reference_common::<S>(i, UriReferenceRule::Any) +} + +/// Returns `Ok(_)` if the string matches `absolute-URI`/`absolute-IRI` rules. +#[inline] +pub(crate) fn validate_absolute_uri<S: Spec>(i: &str) -> Result<(), Error> { + validate_uri_reference_common::<S>(i, UriReferenceRule::AbsoluteWithoutFragment) +} + +/// Syntax rule for URI/IRI references. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +enum UriReferenceRule { + /// `URI` and `IRI`. + /// + /// This can have a fragment. + Absolute, + /// `absolute-URI` and `absolute-IRI`. + /// + /// This cannot have a fragment. + AbsoluteWithoutFragment, + /// `URI-reference` and `IRI-reference`. + /// + /// This can be relative. + Any, +} + +impl UriReferenceRule { + /// Returns `true` is the relative reference is allowed. + #[inline] + #[must_use] + fn is_relative_allowed(self) -> bool { + self == Self::Any + } + + /// Returns `true` is the fragment part is allowed. + #[inline] + #[must_use] + fn is_fragment_allowed(self) -> bool { + matches!(self, Self::Absolute | Self::Any) + } +} + +/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules. +fn validate_uri_reference_common<S: Spec>( + i: &str, + ref_rule: UriReferenceRule, +) -> Result<(), Error> { + // Validate `scheme ":"`. + let (i, _scheme) = match find_split_hole(i, b':') { + None => { + if ref_rule.is_relative_allowed() { + return validate_relative_ref::<S>(i); + } else { + return Err(Error::new()); + } + } + Some(("", _)) => return Err(Error::new()), + Some((maybe_scheme, rest)) => { + if validate_scheme(maybe_scheme).is_err() { + // The string before the first colon is not a scheme. + // Falling back to `relative-ref` parsing. + if ref_rule.is_relative_allowed() { + return validate_relative_ref::<S>(i); + } else { + return Err(Error::new()); + } + } + (rest, maybe_scheme) + } + }; + + // Validate `hier-part`. + let after_path = match i.strip_prefix("//") { + Some(i) => { + let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') { + Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))), + None => (i, None), + }; + validate_authority_path_abempty::<S>(maybe_authority_path)?; + after_path + } + None => { + let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') { + Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))), + None => (i, None), + }; + // Authority is absent. + validate_path_absolute_authority_absent::<S>(maybe_path)?; + after_path + } + }; + + // Validate `[ "?" query ] [ "#" fragment ]`. + if let Some((first, rest)) = after_path { + validate_after_path::<S>(first, rest, ref_rule.is_fragment_allowed())?; + } + Ok(()) +} + +/// Returns `Ok(_)` if the string matches `relative-ref`/`irelative-ref` rules. +pub(crate) fn validate_relative_ref<S: Spec>(i: &str) -> Result<(), Error> { + // Validate `relative-part`. + let after_path = match i.strip_prefix("//") { + Some(i) => { + let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') { + Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))), + None => (i, None), + }; + validate_authority_path_abempty::<S>(maybe_authority_path)?; + after_path + } + None => { + let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') { + Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))), + None => (i, None), + }; + // Authority is absent. + validate_path_relative_authority_absent::<S>(maybe_path)?; + after_path + } + }; + + // Validate `[ "?" query ] [ "#" fragment ]`. + if let Some((first, rest)) = after_path { + validate_after_path::<S>(first, rest, true)?; + } + Ok(()) +} + +/// Returns `Ok(_)` if the string matches `[ "?" query ] [ "#" fragment ]` (or IRI version). +fn validate_after_path<S: Spec>(first: u8, rest: &str, accept_fragment: bool) -> Result<(), Error> { + let (maybe_query, maybe_fragment) = if first == b'?' { + match find_split_hole(rest, b'#') { + Some(v) => v, + None => (rest, ""), + } + } else { + debug_assert_eq!(first, b'#'); + ("", rest) + }; + validate_query::<S>(maybe_query)?; + if !accept_fragment && !maybe_fragment.is_empty() { + return Err(Error::new()); + } + validate_fragment::<S>(maybe_fragment) +} + +/// Returns `Ok(_)` if the string matches `fragment`/`ifragment` rules. +pub(crate) fn validate_fragment<S: Spec>(i: &str) -> Result<(), Error> { + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_frag_query, + char::is_nonascii_fragment::<S>, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} diff --git a/vendor/iri-string/src/parser/validate/authority.rs b/vendor/iri-string/src/parser/validate/authority.rs new file mode 100644 index 00000000..fb41085e --- /dev/null +++ b/vendor/iri-string/src/parser/validate/authority.rs @@ -0,0 +1,296 @@ +//! Parsers for authority. + +use core::mem; + +use crate::parser::char; +use crate::parser::str::{ + find_split_hole, get_wrapped_inner, rfind_split_hole, satisfy_chars_with_pct_encoded, + strip_ascii_char_prefix, +}; +use crate::spec::Spec; +use crate::validate::Error; + +/// Returns `Ok(_)` if the string matches `userinfo` or `iuserinfo`. +pub(crate) fn validate_userinfo<S: Spec>(i: &str) -> Result<(), Error> { + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_userinfo_ipvfutureaddr, + char::is_nonascii_userinfo::<S>, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `true` if the string matches `dec-octet`. +/// +/// In other words, this tests whether the string is decimal "0" to "255". +#[must_use] +fn is_dec_octet(i: &str) -> bool { + matches!( + i.as_bytes(), + [b'0'..=b'9'] + | [b'1'..=b'9', b'0'..=b'9'] + | [b'1', b'0'..=b'9', b'0'..=b'9'] + | [b'2', b'0'..=b'4', b'0'..=b'9'] + | [b'2', b'5', b'0'..=b'5'] + ) +} + +/// Returns `Ok(_)` if the string matches `IPv4address`. +fn validate_ipv4address(i: &str) -> Result<(), Error> { + let (first, rest) = find_split_hole(i, b'.').ok_or_else(Error::new)?; + if !is_dec_octet(first) { + return Err(Error::new()); + } + let (second, rest) = find_split_hole(rest, b'.').ok_or_else(Error::new)?; + if !is_dec_octet(second) { + return Err(Error::new()); + } + let (third, fourth) = find_split_hole(rest, b'.').ok_or_else(Error::new)?; + if is_dec_octet(third) && is_dec_octet(fourth) { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// A part of IPv6 addr. +#[derive(Clone, Copy)] +enum V6AddrPart { + /// `[0-9a-fA-F]{1,4}::`. + H16Omit, + /// `[0-9a-fA-F]{1,4}:`. + H16Cont, + /// `[0-9a-fA-F]{1,4}`. + H16End, + /// IPv4 address. + V4, + /// `::`. + Omit, +} + +/// Splits the IPv6 address string into the next component and the rest substring. +fn split_v6_addr_part(i: &str) -> Result<(&str, V6AddrPart), Error> { + debug_assert!(!i.is_empty()); + match find_split_hole(i, b':') { + Some((prefix, rest)) => { + if prefix.len() >= 5 { + return Err(Error::new()); + } + + if prefix.is_empty() { + return match strip_ascii_char_prefix(rest, b':') { + Some(rest) => Ok((rest, V6AddrPart::Omit)), + None => Err(Error::new()), + }; + } + + // Should be `h16`. + debug_assert!((1..=4).contains(&prefix.len())); + if !prefix.bytes().all(|b| b.is_ascii_hexdigit()) { + return Err(Error::new()); + } + match strip_ascii_char_prefix(rest, b':') { + Some(rest) => Ok((rest, V6AddrPart::H16Omit)), + None => Ok((rest, V6AddrPart::H16Cont)), + } + } + None => { + if i.len() >= 5 { + // Possibly `IPv4address`. + validate_ipv4address(i)?; + return Ok(("", V6AddrPart::V4)); + } + if i.bytes().all(|b| b.is_ascii_hexdigit()) { + Ok(("", V6AddrPart::H16End)) + } else { + Err(Error::new()) + } + } + } +} + +/// Returns `Ok(_)` if the string matches `IPv6address`. +fn validate_ipv6address(mut i: &str) -> Result<(), Error> { + let mut h16_count = 0; + let mut is_omitted = false; + while !i.is_empty() { + let (rest, part) = split_v6_addr_part(i)?; + match part { + V6AddrPart::H16Omit => { + h16_count += 1; + if mem::replace(&mut is_omitted, true) { + // Omitted twice. + return Err(Error::new()); + } + } + V6AddrPart::H16Cont => { + h16_count += 1; + if rest.is_empty() { + // `H16Cont` cannot be the last part of an IPv6 address. + return Err(Error::new()); + } + } + V6AddrPart::H16End => { + h16_count += 1; + break; + } + V6AddrPart::V4 => { + debug_assert!(rest.is_empty()); + h16_count += 2; + break; + } + V6AddrPart::Omit => { + if mem::replace(&mut is_omitted, true) { + // Omitted twice. + return Err(Error::new()); + } + } + } + if h16_count > 8 { + return Err(Error::new()); + } + i = rest; + } + let is_valid = if is_omitted { + h16_count < 8 + } else { + h16_count == 8 + }; + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `authority` or `iauthority`. +pub(super) fn validate_authority<S: Spec>(i: &str) -> Result<(), Error> { + // Strip and validate `userinfo`. + let (i, _userinfo) = match find_split_hole(i, b'@') { + Some((maybe_userinfo, i)) => { + validate_userinfo::<S>(maybe_userinfo)?; + (i, Some(maybe_userinfo)) + } + None => (i, None), + }; + // `host` can contain colons, but `port` cannot. + // Strip and validate `port`. + let (maybe_host, _port) = match rfind_split_hole(i, b':') { + Some((maybe_host, maybe_port)) => { + if maybe_port.bytes().all(|b| b.is_ascii_digit()) { + (maybe_host, Some(maybe_port)) + } else { + (i, None) + } + } + None => (i, None), + }; + // Validate `host`. + validate_host::<S>(maybe_host) +} + +/// Validates `host`. +pub(crate) fn validate_host<S: Spec>(i: &str) -> Result<(), Error> { + match get_wrapped_inner(i, b'[', b']') { + Some(maybe_addr) => { + // `IP-literal`. + // Note that `v` here is case insensitive. See RFC 3987 section 3.2.2. + if let Some(maybe_addr_rest) = strip_ascii_char_prefix(maybe_addr, b'v') + .or_else(|| strip_ascii_char_prefix(maybe_addr, b'V')) + { + // `IPvFuture`. + let (maybe_ver, maybe_addr) = + find_split_hole(maybe_addr_rest, b'.').ok_or_else(Error::new)?; + // Validate version. + if maybe_ver.is_empty() || !maybe_ver.bytes().all(|b| b.is_ascii_hexdigit()) { + return Err(Error::new()); + } + // Validate address. + if !maybe_addr.is_empty() + && maybe_addr.is_ascii() + && maybe_addr + .bytes() + .all(char::is_ascii_userinfo_ipvfutureaddr) + { + Ok(()) + } else { + Err(Error::new()) + } + } else { + // `IPv6address`. + validate_ipv6address(maybe_addr) + } + } + None => { + // `IPv4address` or `reg-name`. No need to distinguish them here. + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_regname, + char::is_nonascii_regname::<S>, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } + } + } +} + +#[cfg(test)] +#[cfg(feature = "alloc")] +mod tests { + use super::*; + + use alloc::format; + + macro_rules! assert_validate { + ($parser:expr, $($input:expr),* $(,)?) => {{ + $({ + let input = $input; + let input: &str = input.as_ref(); + assert!($parser(input).is_ok(), "input={:?}", input); + })* + }}; + } + + #[test] + fn test_ipv6address() { + use core::cmp::Ordering; + + assert_validate!(validate_ipv6address, "a:bB:cCc:dDdD:e:F:a:B"); + assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1:1"); + assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1.1.1.1"); + assert_validate!(validate_ipv6address, "2001:db8::7"); + + // Generate IPv6 addresses with `::`. + let make_sub = |n: usize| { + let mut s = "1:".repeat(n); + s.pop(); + s + }; + for len_pref in 0..=7 { + let prefix = make_sub(len_pref); + for len_suf in 1..=(7 - len_pref) { + assert_validate!( + validate_ipv6address, + &format!("{}::{}", prefix, make_sub(len_suf)) + ); + match len_suf.cmp(&2) { + Ordering::Greater => assert_validate!( + validate_ipv6address, + &format!("{}::{}:1.1.1.1", prefix, make_sub(len_suf - 2)) + ), + Ordering::Equal => { + assert_validate!(validate_ipv6address, &format!("{}::1.1.1.1", prefix)) + } + Ordering::Less => {} + } + } + } + } +} diff --git a/vendor/iri-string/src/parser/validate/path.rs b/vendor/iri-string/src/parser/validate/path.rs new file mode 100644 index 00000000..1b09c84b --- /dev/null +++ b/vendor/iri-string/src/parser/validate/path.rs @@ -0,0 +1,91 @@ +//! Parsers for path. + +use crate::parser::char; +use crate::parser::str::{find_split2_hole, satisfy_chars_with_pct_encoded}; +use crate::spec::Spec; +use crate::validate::Error; + +/// Returns `Ok(_)` if the string matches `path-abempty` or `ipath-abempty`. +pub(super) fn validate_path_abempty<S: Spec>(i: &str) -> Result<(), Error> { + if i.is_empty() { + return Ok(()); + } + let i = match i.strip_prefix('/') { + Some(rest) => rest, + None => return Err(Error::new()), + }; + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_pchar_slash, + S::is_nonascii_char_unreserved, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `hier-part` or `ihier-part` modulo +/// `"//" authority path-abempty`. +pub(super) fn validate_path_absolute_authority_absent<S: Spec>(i: &str) -> Result<(), Error> { + if i.is_empty() { + // `path-empty`. + return Ok(()); + } + if i.starts_with("//") { + unreachable!("this case should be handled by the caller"); + } + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_pchar_slash, + S::is_nonascii_char_unreserved, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `relative-part` or `irelative-part` modulo +/// `"//" authority path-abempty`. +pub(super) fn validate_path_relative_authority_absent<S: Spec>(i: &str) -> Result<(), Error> { + if i.starts_with("//") { + unreachable!("this case should be handled by the caller"); + } + let is_valid = match find_split2_hole(i, b'/', b':') { + Some((_, b'/', _)) | None => satisfy_chars_with_pct_encoded( + i, + char::is_ascii_pchar_slash, + S::is_nonascii_char_unreserved, + ), + Some((_, c, _)) => { + debug_assert_eq!(c, b':'); + // `foo:bar`-style. This does not match `path-noscheme`. + return Err(Error::new()); + } + }; + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} + +/// Returns `Ok(_)` if the string matches `path`/`ipath` rules. +pub(crate) fn validate_path<S: Spec>(i: &str) -> Result<(), Error> { + if i.starts_with("//") { + return Err(Error::new()); + } + let is_valid = satisfy_chars_with_pct_encoded( + i, + char::is_ascii_pchar_slash, + S::is_nonascii_char_unreserved, + ); + if is_valid { + Ok(()) + } else { + Err(Error::new()) + } +} diff --git a/vendor/iri-string/src/percent_encode.rs b/vendor/iri-string/src/percent_encode.rs new file mode 100644 index 00000000..b5997a03 --- /dev/null +++ b/vendor/iri-string/src/percent_encode.rs @@ -0,0 +1,378 @@ +//! Percent encoding. + +use core::fmt::{self, Write as _}; +use core::marker::PhantomData; + +use crate::parser::char; +use crate::spec::{IriSpec, Spec, UriSpec}; + +/// A proxy to percent-encode a string as a part of URI. +pub type PercentEncodedForUri<T> = PercentEncoded<T, UriSpec>; + +/// A proxy to percent-encode a string as a part of IRI. +pub type PercentEncodedForIri<T> = PercentEncoded<T, IriSpec>; + +/// Context for percent encoding. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +enum Context { + /// Encode the string as a reg-name (usually called as "hostname"). + RegName, + /// Encode the string as a user name or a password (inside the `userinfo` component). + UserOrPassword, + /// Encode the string as a path segment. + /// + /// A slash (`/`) will be encoded to `%2F`. + PathSegment, + /// Encode the string as path segments joined with `/`. + /// + /// A slash (`/`) will be used as is. + Path, + /// Encode the string as a query string (without the `?` prefix). + Query, + /// Encode the string as a fragment string (without the `#` prefix). + Fragment, + /// Encode all characters except for `unreserved` characters. + Unreserve, + /// Encode characters only if they cannot appear anywhere in an IRI reference. + /// + /// `%` character will be always encoded. + Character, +} + +/// A proxy to percent-encode a string. +/// +/// Type aliases [`PercentEncodedForIri`] and [`PercentEncodedForUri`] are provided. +/// You can use them to make the expression simpler, for example write +/// `PercentEncodedForUri::from_path(foo)` instead of +/// `PercentEncoded::<_, UriSpec>::from_path(foo)`. +#[derive(Debug, Clone, Copy)] +pub struct PercentEncoded<T, S> { + /// Source string context. + context: Context, + /// Raw string before being encoded. + raw: T, + /// Spec. + _spec: PhantomData<fn() -> S>, +} + +impl<T: fmt::Display, S: Spec> PercentEncoded<T, S> { + /// Creates an encoded string from a raw reg-name (i.e. hostname or domain). + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "alpha.\u{03B1}.example.com"; + /// let encoded = "alpha.%CE%B1.example.com"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_reg_name(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_reg_name(raw: T) -> Self { + Self { + context: Context::RegName, + raw, + _spec: PhantomData, + } + } + + /// Creates an encoded string from a raw user name (inside `userinfo` component). + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "user:\u{03B1}"; + /// // The first `:` will be interpreted as a delimiter, so colons will be escaped. + /// let encoded = "user%3A%CE%B1"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_user(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_user(raw: T) -> Self { + Self { + context: Context::UserOrPassword, + raw, + _spec: PhantomData, + } + } + + /// Creates an encoded string from a raw user name (inside `userinfo` component). + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "password:\u{03B1}"; + /// // The first `:` will be interpreted as a delimiter, and the colon + /// // inside the password will be the first one if the user name is empty, + /// // so colons will be escaped. + /// let encoded = "password%3A%CE%B1"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_password(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_password(raw: T) -> Self { + Self { + context: Context::UserOrPassword, + raw, + _spec: PhantomData, + } + } + + /// Creates an encoded string from a raw path segment. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "alpha/\u{03B1}?#"; + /// // Note that `/` is encoded to `%2F`. + /// let encoded = "alpha%2F%CE%B1%3F%23"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_path_segment(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_path_segment(raw: T) -> Self { + Self { + context: Context::PathSegment, + raw, + _spec: PhantomData, + } + } + + /// Creates an encoded string from a raw path. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "alpha/\u{03B1}?#"; + /// // Note that `/` is NOT percent encoded. + /// let encoded = "alpha/%CE%B1%3F%23"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_path(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_path(raw: T) -> Self { + Self { + context: Context::Path, + raw, + _spec: PhantomData, + } + } + + /// Creates an encoded string from a raw query. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "alpha/\u{03B1}?#"; + /// let encoded = "alpha/%CE%B1?%23"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_query(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_query(raw: T) -> Self { + Self { + context: Context::Query, + raw, + _spec: PhantomData, + } + } + + /// Creates an encoded string from a raw fragment. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let raw = "alpha/\u{03B1}?#"; + /// let encoded = "alpha/%CE%B1?%23"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::from_fragment(raw).to_string(), + /// encoded + /// ); + /// # } + /// ``` + pub fn from_fragment(raw: T) -> Self { + Self { + context: Context::Fragment, + raw, + _spec: PhantomData, + } + } + + /// Creates a string consists of only `unreserved` string and percent-encoded triplets. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let unreserved = "%a0-._~\u{03B1}"; + /// let unreserved_encoded = "%25a0-._~%CE%B1"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::unreserve(unreserved).to_string(), + /// unreserved_encoded + /// ); + /// + /// let reserved = ":/?#[]@ !$&'()*+,;="; + /// let reserved_encoded = + /// "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::unreserve(reserved).to_string(), + /// reserved_encoded + /// ); + /// # } + /// ``` + #[inline] + #[must_use] + pub fn unreserve(raw: T) -> Self { + Self { + context: Context::Unreserve, + raw, + _spec: PhantomData, + } + } + + /// Percent-encodes characters only if they cannot appear anywhere in an IRI reference. + /// + /// `%` character will be always encoded. In other words, this conversion + /// is not aware of percent-encoded triplets. + /// + /// Note that this encoding process does not guarantee that the resulting + /// string is a valid IRI reference. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] { + /// use iri_string::percent_encode::PercentEncoded; + /// use iri_string::spec::UriSpec; + /// + /// let unreserved = "%a0-._~\u{03B1}"; + /// let unreserved_encoded = "%25a0-._~%CE%B1"; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::characters(unreserved).to_string(), + /// unreserved_encoded + /// ); + /// + /// let reserved = ":/?#[]@ !$&'()*+,;="; + /// // Note that `%20` cannot appear directly in an IRI reference. + /// let expected = ":/?#[]@%20!$&'()*+,;="; + /// assert_eq!( + /// PercentEncoded::<_, UriSpec>::characters(reserved).to_string(), + /// expected + /// ); + /// # } + /// ``` + #[inline] + #[must_use] + pub fn characters(raw: T) -> Self { + Self { + context: Context::Character, + raw, + _spec: PhantomData, + } + } +} + +impl<T: fmt::Display, S: Spec> fmt::Display for PercentEncoded<T, S> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + /// Filter that encodes a character before written if necessary. + struct Filter<'a, 'b, S> { + /// Encoding context. + context: Context, + /// Writer. + writer: &'a mut fmt::Formatter<'b>, + /// Spec. + _spec: PhantomData<fn() -> S>, + } + impl<S: Spec> fmt::Write for Filter<'_, '_, S> { + fn write_str(&mut self, s: &str) -> fmt::Result { + s.chars().try_for_each(|c| self.write_char(c)) + } + fn write_char(&mut self, c: char) -> fmt::Result { + let is_valid_char = match (self.context, c.is_ascii()) { + (Context::RegName, true) => char::is_ascii_regname(c as u8), + (Context::RegName, false) => char::is_nonascii_regname::<S>(c), + (Context::UserOrPassword, true) => { + c != ':' && char::is_ascii_userinfo_ipvfutureaddr(c as u8) + } + (Context::UserOrPassword, false) => char::is_nonascii_userinfo::<S>(c), + (Context::PathSegment, true) => char::is_ascii_pchar(c as u8), + (Context::PathSegment, false) => S::is_nonascii_char_unreserved(c), + (Context::Path, true) => c == '/' || char::is_ascii_pchar(c as u8), + (Context::Path, false) => S::is_nonascii_char_unreserved(c), + (Context::Query, true) => c == '/' || char::is_ascii_frag_query(c as u8), + (Context::Query, false) => char::is_nonascii_query::<S>(c), + (Context::Fragment, true) => c == '/' || char::is_ascii_frag_query(c as u8), + (Context::Fragment, false) => char::is_nonascii_fragment::<S>(c), + (Context::Unreserve, true) => char::is_ascii_unreserved(c as u8), + (Context::Unreserve, false) => S::is_nonascii_char_unreserved(c), + (Context::Character, true) => char::is_ascii_unreserved_or_reserved(c as u8), + (Context::Character, false) => { + S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c) + } + }; + if is_valid_char { + self.writer.write_char(c) + } else { + write_pct_encoded_char(&mut self.writer, c) + } + } + } + let mut filter = Filter { + context: self.context, + writer: f, + _spec: PhantomData::<fn() -> S>, + }; + write!(filter, "{}", self.raw) + } +} + +/// Percent-encodes the given character and writes it. +#[inline] +fn write_pct_encoded_char<W: fmt::Write>(writer: &mut W, c: char) -> fmt::Result { + let mut buf = [0_u8; 4]; + let buf = c.encode_utf8(&mut buf); + buf.bytes().try_for_each(|b| write!(writer, "%{:02X}", b)) +} diff --git a/vendor/iri-string/src/raw.rs b/vendor/iri-string/src/raw.rs new file mode 100644 index 00000000..652f60a9 --- /dev/null +++ b/vendor/iri-string/src/raw.rs @@ -0,0 +1,55 @@ +//! Raw IRI strings manipulation. +//! +//! Note that functions in this module may operates on raw `&str` types. +//! It is caller's responsilibility to guarantee that the given string satisfies the precondition. + +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +#[cfg(feature = "alloc")] +use crate::parser::trusted as trusted_parser; + +/// Sets the fragment part to the given string. +/// +/// Removes fragment part (and following `#` character) if `None` is given. +#[cfg(feature = "alloc")] +pub(crate) fn set_fragment(s: &mut String, fragment: Option<&str>) { + remove_fragment(s); + if let Some(fragment) = fragment { + s.reserve(fragment.len() + 1); + s.push('#'); + s.push_str(fragment); + } +} + +/// Removes the fragment part from the string. +#[cfg(feature = "alloc")] +#[inline] +pub(crate) fn remove_fragment(s: &mut String) { + if let Some(colon_pos) = s.find('#') { + s.truncate(colon_pos); + } +} + +/// Splits the string into the prefix and the fragment part. +/// +/// A leading `#` character is truncated if the fragment part exists. +#[cfg(feature = "alloc")] +pub(crate) fn split_fragment_owned(mut s: String) -> (String, Option<String>) { + let prefix_len = match trusted_parser::split_fragment(&s) { + (_, None) => return (s, None), + (prefix, Some(_fragment)) => prefix.len(), + }; + + // `+ 1` is for leading `#` character. + let fragment = s.split_off(prefix_len + 1); + // Current `s` contains a trailing `#` character, which should be removed. + { + // Remove a trailing `#`. + let hash = s.pop(); + assert_eq!(hash, Some('#')); + } + assert_eq!(s.len(), prefix_len); + + (s, Some(fragment)) +} diff --git a/vendor/iri-string/src/resolve.rs b/vendor/iri-string/src/resolve.rs new file mode 100644 index 00000000..d29c6874 --- /dev/null +++ b/vendor/iri-string/src/resolve.rs @@ -0,0 +1,344 @@ +//! URI and IRI resolvers. +//! +//! # IRI resolution can fail without WHATWG URL Standard serialization +//! +//! ## Pure RFC 3986 algorithm +//! +//! Though this is not explicitly stated in RFC 3986, IRI resolution can fail. +//! Below are examples: +//! +//! * base=`scheme:`, ref=`.///bar`. +//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority. +//! * base=`scheme:foo`, ref=`.///bar`. +//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority. +//! * base=`scheme:`, ref=`/..//baz`. +//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority. +//! * base=`scheme:foo/bar`, ref=`..//baz`. +//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority. +//! +//! IRI without authority (note that this is different from "with empty authority") +//! cannot have a path starting with `//`, since it is ambiguous and can be +//! interpreted as an IRI with authority. For the above examples, `scheme://bar` +//! is not valid output, as `bar` in `scheme://bar` will be interpreted as an +//! authority, not a path. +//! +//! Thus, IRI resolution by pure RFC 3986 algorithm can fail for some abnormal +//! cases. +//! +//! Note that this kind of failure can happen only when the base IRI has no +//! authority and empty path. This would be rare in the wild, since many people +//! would use an IRI with authority part, such as `http://`. +//! +//! If you are handling `scheme://`-style URIs and IRIs, don't worry about the +//! failure. Currently no cases are known to fail when at least one of the base +//! IRI or the relative IRI contains authorities. +//! +//! If you want this kind of abnormal IRI resolution to succeed and to be +//! idempotent, check the resolution result using +//! [`Normalized::ensure_rfc3986_normalizable`] (see the section below). +//! +//! ## WHATWG serialization +//! +//! To handle IRI resolution failure, WHATWG URL Standard defines serialization +//! algorithm for this kind of result, and it makes IRI resolution (and even +//! normalization) infallible and idempotent. +//! +//! IRI resolution and normalization provided by this crate automatically +//! applies this special rule if necessary, so they are infallible. If you want +//! to detect resolution/normalization failure, use +//! [`Normalized::ensure_rfc3986_normalizable`] method. +//! +//! ## Examples +//! +//! ``` +//! # #[cfg(feature = "alloc")] { +//! use iri_string::format::ToDedicatedString; +//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; +//! +//! let base = IriAbsoluteStr::new("scheme:")?; +//! { +//! let reference = IriReferenceStr::new(".///not-a-host")?; +//! let result = reference.resolve_against(base); +//! assert!(result.ensure_rfc3986_normalizable().is_err()); +//! assert_eq!(result.to_dedicated_string(), "scheme:/.//not-a-host"); +//! } +//! +//! { +//! let reference2 = IriReferenceStr::new("/..//not-a-host")?; +//! // Resulting string will be `scheme://not-a-host`, but `not-a-host` +//! // should be a path segment, not a host. So, the semantically correct +//! // target IRI cannot be represented by RFC 3986 IRI resolution. +//! let result2 = reference2.resolve_against(base); +//! assert!(result2.ensure_rfc3986_normalizable().is_err()); +//! +//! // Algorithm defined in WHATWG URL Standard addresses this case. +//! assert_eq!(result2.to_dedicated_string(), "scheme:/.//not-a-host"); +//! } +//! # } +//! # Ok::<_, iri_string::validate::Error>(()) +//! ``` + +use crate::components::RiReferenceComponents; +use crate::normalize::{NormalizationInput, Normalized}; +use crate::spec::Spec; +use crate::types::{RiAbsoluteStr, RiQueryStr, RiReferenceStr, RiStr}; + +/// A resolver against the fixed base. +#[derive(Debug, Clone, Copy)] +pub struct FixedBaseResolver<'a, S: Spec> { + /// Components of the base IRI. + base_components: RiReferenceComponents<'a, S>, +} + +impl<'a, S: Spec> FixedBaseResolver<'a, S> { + /// Creates a new resolver with the given base. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # // `ToDedicatedString` is available only when + /// # // `alloc` feature is enabled. + /// #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; + /// + /// let base = IriAbsoluteStr::new("http://example.com/base/")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// let reference = IriReferenceStr::new("../there")?; + /// let resolved = resolver.resolve(reference); + /// + /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn new(base: &'a RiAbsoluteStr<S>) -> Self { + Self { + base_components: RiReferenceComponents::from(base.as_ref()), + } + } + + /// Returns the base. + /// + /// # Examples + /// + /// ``` + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; + /// + /// let base = IriAbsoluteStr::new("http://example.com/base/")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// assert_eq!(resolver.base(), base); + /// # Ok::<_, iri_string::validate::Error>(()) + /// ``` + #[must_use] + pub fn base(&self) -> &'a RiAbsoluteStr<S> { + // SAFETY: `base_components` can only be created from `&RiAbsoluteStr<S>`, + // and the type of `base_components` does not allow modification of the + // content after it is created. + unsafe { RiAbsoluteStr::new_maybe_unchecked(self.base_components.iri().as_str()) } + } +} + +/// Components getters. +/// +/// These getters are more efficient than calling through the result of `.base()`. +impl<S: Spec> FixedBaseResolver<'_, S> { + /// Returns the scheme. + /// + /// The following colon is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let base = IriAbsoluteStr::new("http://example.com/base/?query")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// assert_eq!(resolver.scheme_str(), "http"); + /// assert_eq!(base.scheme_str(), "http"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn scheme_str(&self) -> &str { + self.base_components + .scheme_str() + .expect("[validity] absolute IRI should have the scheme part") + } + + /// Returns the authority. + /// + /// The leading `//` is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// assert_eq!(resolver.authority_str(), Some("user:pass@example.com")); + /// assert_eq!(base.authority_str(), Some("user:pass@example.com")); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_str(&self) -> Option<&str> { + self.base_components.authority_str() + } + + /// Returns the path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// assert_eq!(resolver.path_str(), "/base/"); + /// assert_eq!(base.path_str(), "/base/"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn path_str(&self) -> &str { + self.base_components.path_str() + } + + /// Returns the query. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::{IriAbsoluteStr, IriQueryStr}; + /// + /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?; + /// let resolver = FixedBaseResolver::new(base); + /// let query = IriQueryStr::new("query")?; + /// + /// assert_eq!(resolver.query(), Some(query)); + /// assert_eq!(base.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query(&self) -> Option<&RiQueryStr<S>> { + let query_raw = self.query_str()?; + let query = RiQueryStr::new(query_raw) + .expect("[validity] must be valid query if present in an absolute-IRI"); + Some(query) + } + + /// Returns the query in a raw string slice. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// assert_eq!(resolver.query_str(), Some("query")); + /// assert_eq!(base.query_str(), Some("query")); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query_str(&self) -> Option<&str> { + self.base_components.query_str() + } +} + +impl<'a, S: Spec> FixedBaseResolver<'a, S> { + /// Resolves the given reference against the fixed base. + /// + /// The task returned by this method does **not** normalize the resolution + /// result. However, `..` and `.` are recognized even when they are + /// percent-encoded. + /// + /// # Failures + /// + /// This function itself does not fail, but resolution algorithm defined by + /// RFC 3986 can fail. In that case, serialization algorithm defined by + /// WHATWG URL Standard would be automatically applied. + /// + /// See the documentation of [`Normalized`]. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # // `ToDedicatedString` is available only when + /// # // `alloc` feature is enabled. + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; + /// + /// let base = IriAbsoluteStr::new("http://example.com/base/")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// let reference = IriReferenceStr::new("../there")?; + /// let resolved = resolver.resolve(reference); + /// + /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Note that `..` and `.` path segments are recognized even when they are + /// percent-encoded. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # // `ToDedicatedString` is available only when + /// # // `alloc` feature is enabled. + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::resolve::FixedBaseResolver; + /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; + /// + /// let base = IriAbsoluteStr::new("HTTP://example.COM/base/base2/")?; + /// let resolver = FixedBaseResolver::new(base); + /// + /// // `%2e%2e` is recognized as `..`. + /// // However, `dot%2edot` is NOT normalized into `dot.dot`. + /// let reference = IriReferenceStr::new("%2e%2e/../dot%2edot")?; + /// let resolved = resolver.resolve(reference); + /// + /// // Resolved but not normalized. + /// assert_eq!(resolved.to_dedicated_string(), "HTTP://example.COM/dot%2edot"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn resolve(&self, reference: &'a RiReferenceStr<S>) -> Normalized<'a, RiStr<S>> { + let input = NormalizationInput::with_resolution_params(&self.base_components, reference); + Normalized::from_input(input) + } +} diff --git a/vendor/iri-string/src/spec.rs b/vendor/iri-string/src/spec.rs new file mode 100644 index 00000000..21e8315e --- /dev/null +++ b/vendor/iri-string/src/spec.rs @@ -0,0 +1,34 @@ +//! IRI specs. + +use core::fmt; + +// Note that this MUST be private module. +// See <https://rust-lang.github.io/api-guidelines/future-proofing.html> about +// sealed trait. +mod internal; + +/// A trait for spec types. +/// +/// This trait is not intended to be implemented by crate users. +// Note that all types which implement `Spec` also implement `SpecInternal`. +pub trait Spec: internal::Sealed + Copy + fmt::Debug {} + +/// A type that represents specification of IRI. +/// +/// About IRI, see [RFC 3987]. +/// +/// [RFC 3987]: https://tools.ietf.org/html/rfc3987 +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum IriSpec {} + +impl Spec for IriSpec {} + +/// A type that represents specification of URI. +/// +/// About URI, see [RFC 3986]. +/// +/// [RFC 3986]: https://tools.ietf.org/html/rfc3986 +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum UriSpec {} + +impl Spec for UriSpec {} diff --git a/vendor/iri-string/src/spec/internal.rs b/vendor/iri-string/src/spec/internal.rs new file mode 100644 index 00000000..3ad2ee34 --- /dev/null +++ b/vendor/iri-string/src/spec/internal.rs @@ -0,0 +1,58 @@ +//! A private module for sealed trait and internal implementations. +//! +//! Note that this MUST be a private module. +//! See [Rust API Guidelines][sealed-trait] about the necessity of being private. +//! +//! [sealed-trait]: +//! https://rust-lang.github.io/api-guidelines/future-proofing.html#sealed-traits-protect-against-downstream-implementations-c-sealed + +use crate::parser::char::is_ucschar; +use crate::spec::{IriSpec, UriSpec}; + +/// A trait to prohibit user-defined types from implementing `Spec`. +/// +/// About sealed trait, see [Rust API Guidelines][future-proofing]. +/// +/// [future-proofing]: https://rust-lang.github.io/api-guidelines/future-proofing.html +pub trait Sealed: SpecInternal {} + +impl Sealed for IriSpec {} +impl Sealed for UriSpec {} + +/// Internal implementations for spec types. +pub trait SpecInternal: Sized { + /// Checks if the given non-ASCII character matches `unreserved` or `iunreserved` rule. + #[must_use] + fn is_nonascii_char_unreserved(c: char) -> bool; + /// Checks if the given character matches `iprivate` rule. + #[must_use] + fn is_nonascii_char_private(c: char) -> bool; +} + +impl SpecInternal for IriSpec { + #[inline] + fn is_nonascii_char_unreserved(c: char) -> bool { + is_ucschar(c) + } + + fn is_nonascii_char_private(c: char) -> bool { + matches!( + u32::from(c), + 0xE000..=0xF8FF | + 0xF_0000..=0xF_FFFD | + 0x10_0000..=0x10_FFFD + ) + } +} + +impl SpecInternal for UriSpec { + #[inline] + fn is_nonascii_char_unreserved(_: char) -> bool { + false + } + + #[inline] + fn is_nonascii_char_private(_: char) -> bool { + false + } +} diff --git a/vendor/iri-string/src/template.rs b/vendor/iri-string/src/template.rs new file mode 100644 index 00000000..3c647ff2 --- /dev/null +++ b/vendor/iri-string/src/template.rs @@ -0,0 +1,200 @@ +//! Processor for [RFC 6570] URI Template. +//! +//! [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html +//! +//! # Usage +//! +//! 1. Prepare a template. +//! * You can create a template as [`UriTemplateStr`] +#![cfg_attr( + feature = "alloc", + doc = " type (borrowed) or [`UriTemplateString`] type (owned)." +)] +#![cfg_attr(not(feature = "alloc"), doc = " type.")] +//! 2. Prepare a context. +//! * Create a value of type that implements [`Context`] trait. +#![cfg_attr( + feature = "alloc", + doc = " * Or, if you use [`SimpleContext`], insert key-value pairs into it." +)] +//! 3. Expand. +//! * Pass the context to [`UriTemplateStr::expand`] method of the template. +//! 4. Use the result. +//! * Returned [`Expanded`] object can be directly printed since it +//! implements [`Display`][`core::fmt::Display`] trait. Or, you can call +//! `.to_string()` method of the `alloc::string::ToString` trait to +//! convert it to a `String`. +//! +//! # Examples +//! +//! ## Custom context type +//! +//! For details, see [the documentation of `context` module][`context`]. +//! +//! ``` +//! # use iri_string::template::Error; +//! use core::fmt; +//! use iri_string::spec::{IriSpec, Spec, UriSpec}; +//! use iri_string::template::UriTemplateStr; +//! use iri_string::template::context::{Context, VarName, Visitor}; +//! +//! struct UserInfo { +//! username: &'static str, +//! utf8_available: bool, +//! } +//! +//! impl Context for UserInfo { +//! fn visit<V: Visitor>( +//! &self, +//! visitor: V, +//! ) -> V::Result { +//! match visitor.var_name().as_str() { +//! "username" => visitor.visit_string(self.username), +//! "utf8" => { +//! if self.utf8_available { +//! // U+2713 CHECK MARK +//! visitor.visit_string("\u{2713}") +//! } else { +//! visitor.visit_undefined() +//! } +//! } +//! _ => visitor.visit_undefined() +//! } +//! } +//! } +//! +//! let context = UserInfo { +//! username: "foo", +//! utf8_available: true, +//! }; +//! +//! let template = UriTemplateStr::new("/users/{username}{?utf8}")?; +//! +//! # #[cfg(feature = "alloc")] { +//! assert_eq!( +//! template.expand::<UriSpec, _>(&context)?.to_string(), +//! "/users/foo?utf8=%E2%9C%93" +//! ); +//! assert_eq!( +//! template.expand::<IriSpec, _>(&context)?.to_string(), +//! "/users/foo?utf8=\u{2713}" +//! ); +//! # } +//! # Ok::<_, Error>(()) +//! ``` +//! +//! ## `SimpleContext` type (enabled by `alloc` feature flag) +//! +//! ``` +//! # use iri_string::template::Error; +//! # #[cfg(feature = "alloc")] { +//! use iri_string::spec::{IriSpec, UriSpec}; +//! use iri_string::template::UriTemplateStr; +//! use iri_string::template::simple_context::SimpleContext; +//! +//! let mut context = SimpleContext::new(); +//! context.insert("username", "foo"); +//! // U+2713 CHECK MARK +//! context.insert("utf8", "\u{2713}"); +//! +//! let template = UriTemplateStr::new("/users/{username}{?utf8}")?; +//! +//! assert_eq!( +//! template.expand::<UriSpec, _>(&context)?.to_string(), +//! "/users/foo?utf8=%E2%9C%93" +//! ); +//! assert_eq!( +//! template.expand::<IriSpec, _>(&context)?.to_string(), +//! "/users/foo?utf8=\u{2713}" +//! ); +//! # } +//! # Ok::<_, Error>(()) +//! ``` +//! +#![cfg_attr( + feature = "alloc", + doc = "[`SimpleContext`]: `simple_context::SimpleContext`" +)] +mod components; +pub mod context; +mod error; +mod expand; +mod parser; +#[cfg(feature = "alloc")] +pub mod simple_context; +mod string; + +pub use self::context::{Context, DynamicContext}; +#[cfg(feature = "alloc")] +pub use self::error::CreationError; +pub use self::error::Error; +pub use self::expand::Expanded; +#[cfg(feature = "alloc")] +pub use self::string::UriTemplateString; +pub use self::string::{UriTemplateStr, UriTemplateVariables}; + +/// Deprecated old name of [`template::context::VarName`]. +/// +/// [`template::context::VarName`]: `components::VarName` +#[deprecated( + since = "0.7.1", + note = "renamed (moved) to `template::context::VarName`" +)] +pub type VarName<'a> = self::components::VarName<'a>; + +/// Variable value type. +#[derive(Debug, Clone, Copy)] +enum ValueType { + /// Undefined (i.e. null). + Undefined, + /// String value. + String, + /// List. + List, + /// Associative array. + Assoc, +} + +impl ValueType { + /// Returns the value type for an undefined variable. + #[inline] + #[must_use] + pub const fn undefined() -> Self { + ValueType::Undefined + } + + /// Returns the value type for a string variable. + #[inline] + #[must_use] + pub const fn string() -> Self { + ValueType::String + } + + /// Returns the value type for an empty list variable. + #[inline] + #[must_use] + pub const fn empty_list() -> Self { + ValueType::Undefined + } + + /// Returns the value type for a nonempty list variable. + #[inline] + #[must_use] + pub const fn nonempty_list() -> Self { + ValueType::List + } + + /// Returns the value type for an empty associative array variable. + #[inline] + #[must_use] + pub const fn empty_assoc() -> Self { + ValueType::Undefined + } + + /// Returns the value type for a nonempty associative array variable. + #[inline] + #[must_use] + pub const fn nonempty_assoc() -> Self { + ValueType::Assoc + } +} diff --git a/vendor/iri-string/src/template/components.rs b/vendor/iri-string/src/template/components.rs new file mode 100644 index 00000000..7eb83a58 --- /dev/null +++ b/vendor/iri-string/src/template/components.rs @@ -0,0 +1,332 @@ +//! Syntax components of URI templates. + +use core::mem; + +use crate::parser::str::find_split_hole; +use crate::template::error::Error; +use crate::template::parser::validate as validate_parser; + +/// Expression body. +/// +/// This does not contain the wrapping braces (`{` and `}`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) struct ExprBody<'a>(&'a str); + +impl<'a> ExprBody<'a> { + /// Creates a new expression body. + /// + /// # Precondition + /// + /// The given string should be a valid expression body. + #[inline] + #[must_use] + pub(super) fn new(s: &'a str) -> Self { + debug_assert!( + !s.is_empty(), + "[precondition] valid expression body is not empty" + ); + + Self(s) + } + + /// Decomposes the expression into an `operator` and `variable-list`. + /// + /// # Panics + /// + /// May panic if the input is invalid. + #[must_use] + pub(super) fn decompose(&self) -> (Operator, VarListStr<'a>) { + debug_assert!( + !self.0.is_empty(), + "[precondition] valid expression body is not empty" + ); + let first = self.0.as_bytes()[0]; + if first.is_ascii_alphanumeric() || (first == b'_') || (first == b'%') { + // The first byte is a part of the variable list. + (Operator::String, VarListStr::new(self.0)) + } else { + let op = Operator::from_byte(first).unwrap_or_else(|| { + unreachable!( + "[precondition] valid expression has (optional) \ + valid operator, but got a byte {first:#02x?}" + ) + }); + (op, VarListStr::new(&self.0[1..])) + } + } + + /// Returns the raw expression in a string slice. + #[inline] + #[must_use] + pub(super) fn as_str(&self) -> &'a str { + self.0 + } +} + +/// Variable name. +// QUESTION: Should hexdigits in percent-encoded triplets be compared case sensitively? +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct VarName<'a>(&'a str); + +impl<'a> VarName<'a> { + /// Creates a `VarName` from the trusted string. + /// + /// # Precondition + /// + /// The given string should be a valid variable name. + #[inline] + #[must_use] + pub(super) fn from_trusted(s: &'a str) -> Self { + Self(s) + } + + /// Creates a `VarName` from the string. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// use iri_string::template::context::VarName; + /// + /// let name = VarName::new("hello")?; + /// assert_eq!(name.as_str(), "hello"); + /// + /// assert!(VarName::new("0+non-variable-name").is_err()); + /// + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn new(s: &'a str) -> Result<Self, Error> { + match validate_parser::validate_varname(s, 0) { + Ok(_) => Ok(Self::from_trusted(s)), + Err(e) => Err(e), + } + } + + /// Returns the varibale name. + #[inline] + #[must_use] + pub fn as_str(&self) -> &'a str { + self.0 + } +} + +/// Variable specifier. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct VarSpec<'a> { + /// Variable name. + name: VarName<'a>, + /// Variable modifier. + modifier: Modifier, +} + +impl<'a> VarSpec<'a> { + /// Returns the varibale name. + #[inline] + #[must_use] + pub(super) fn name(&self) -> VarName<'a> { + self.name + } + + /// Returns the modifier. + #[inline] + #[must_use] + pub(super) fn modifier(&self) -> Modifier { + self.modifier + } + + /// Parses the trusted varspec string. + /// + /// # Panics + /// + /// May panic if the input is invalid. + #[must_use] + pub(super) fn parse_trusted(s: &'a str) -> Self { + if let Some(varname) = s.strip_suffix('*') { + // `varname "*"`. + return Self { + name: VarName::from_trusted(varname), + modifier: Modifier::Explode, + }; + } + // `varname ":" max-length` or `varname`. + match find_split_hole(s, b':') { + Some((varname, max_len)) => { + let max_len: u16 = max_len + .parse() + .expect("[precondition] the input should be valid `varspec`"); + Self { + name: VarName::from_trusted(varname), + modifier: Modifier::MaxLen(max_len), + } + } + None => Self { + name: VarName(s), + modifier: Modifier::None, + }, + } + } +} + +/// Variable list. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) struct VarListStr<'a>(&'a str); + +impl<'a> VarListStr<'a> { + /// Creates a new variable list. + /// + /// # Precondition + /// + /// The given string should be a valid variable list. + #[inline] + #[must_use] + pub(super) fn new(s: &'a str) -> Self { + Self(s) + } +} + +impl<'a> IntoIterator for VarListStr<'a> { + type IntoIter = VarListIter<'a>; + type Item = (usize, VarSpec<'a>); + + #[inline] + fn into_iter(self) -> Self::IntoIter { + VarListIter { rest: self.0 } + } +} + +/// Iterator of variable specs. +#[derive(Debug, Clone)] +pub(super) struct VarListIter<'a> { + /// Remaining input. + rest: &'a str, +} + +impl<'a> Iterator for VarListIter<'a> { + /// A pair of the length of the varspec and the varspec itself. + type Item = (usize, VarSpec<'a>); + + fn next(&mut self) -> Option<Self::Item> { + match find_split_hole(self.rest, b',') { + Some((prefix, new_rest)) => { + self.rest = new_rest; + Some((prefix.len(), VarSpec::parse_trusted(prefix))) + } + None => { + if self.rest.is_empty() { + None + } else { + Some(( + self.rest.len(), + VarSpec::parse_trusted(mem::take(&mut self.rest)), + )) + } + } + } + } +} + +/// Variable modifier. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(super) enum Modifier { + /// No modifiers. + None, + /// Max length, greater than 0 and less than 10000. + MaxLen(u16), + /// Explode the variable, e.g. the var spec has `*`. + Explode, +} + +/// Operator that is possibly reserved for future extension. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(super) enum MaybeOperator { + /// Working operator. + Operator(Operator), + /// Reserved for future extensions. + Reserved(OperatorReservedForFuture), +} + +impl MaybeOperator { + /// Returns the operator for the given character. + pub(super) fn from_byte(b: u8) -> Option<Self> { + match b { + b'+' => Some(Self::Operator(Operator::Reserved)), + b'#' => Some(Self::Operator(Operator::Fragment)), + b'.' => Some(Self::Operator(Operator::Label)), + b'/' => Some(Self::Operator(Operator::PathSegments)), + b';' => Some(Self::Operator(Operator::PathParams)), + b'?' => Some(Self::Operator(Operator::FormQuery)), + b'&' => Some(Self::Operator(Operator::FormQueryCont)), + b'=' => Some(Self::Reserved(OperatorReservedForFuture::Equals)), + b',' => Some(Self::Reserved(OperatorReservedForFuture::Comma)), + b'!' => Some(Self::Reserved(OperatorReservedForFuture::Exclamation)), + b'@' => Some(Self::Reserved(OperatorReservedForFuture::AtSign)), + b'|' => Some(Self::Reserved(OperatorReservedForFuture::Pipe)), + _ => None, + } + } +} + +/// Working operator. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(super) enum Operator { + /// No operator. String expansion. + String, + /// Reserved expansion by `+`. + Reserved, + /// Fragment expansion by `#`. + Fragment, + /// Label expansion by `.`. + Label, + /// Path segments by `/`. + PathSegments, + /// Path-style parameters by `;`. + PathParams, + /// Form-style query by `?`. + FormQuery, + /// Form-style query continuation by `&`. + FormQueryCont, +} + +impl Operator { + /// Returns the operator for the given character. + #[must_use] + pub(super) fn from_byte(b: u8) -> Option<Self> { + match b { + b'+' => Some(Self::Reserved), + b'#' => Some(Self::Fragment), + b'.' => Some(Self::Label), + b'/' => Some(Self::PathSegments), + b';' => Some(Self::PathParams), + b'?' => Some(Self::FormQuery), + b'&' => Some(Self::FormQueryCont), + _ => None, + } + } + + /// Returns the string length of the operator. + #[inline] + #[must_use] + pub(super) const fn len(self) -> usize { + if matches!(self, Self::String) { + 0 + } else { + 1 + } + } +} + +/// Operator reserved for future extension. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(super) enum OperatorReservedForFuture { + /// Reserved `=` operator. + Equals, + /// Reserved `,` operator. + Comma, + /// Reserved `!` operator. + Exclamation, + /// Reserved `@` operator. + AtSign, + /// Reserved `|` operator. + Pipe, +} diff --git a/vendor/iri-string/src/template/context.rs b/vendor/iri-string/src/template/context.rs new file mode 100644 index 00000000..ea3f14bb --- /dev/null +++ b/vendor/iri-string/src/template/context.rs @@ -0,0 +1,339 @@ +//! Template expansion context. +//! +//! # Examples +//! +//! 1. Define your context type. +//! 2. Implement [`Context`] trait (and [`Context::visit`] method) for the type. +//! 1. Get variable name by [`Visitor::var_name`] method. +//! 2. Feed the corresponding value(s) by one of `Visitor::visit_*` methods. +//! +//! Note that contexts should return consistent result across multiple visits for +//! the same variable. In other words, `Context::visit` should return the same +//! result for the same `Visitor::var_name()` during the context is borrowed. +//! If this condition is violated, the URI template processor can return +//! invalid result or panic at worst. +//! +//! ``` +//! use iri_string::template::context::{Context, Visitor, ListVisitor, AssocVisitor}; +//! +//! struct MyContext { +//! name: &'static str, +//! id: u64, +//! tags: &'static [&'static str], +//! children: &'static [(&'static str, usize)], +//! } +//! +//! impl Context for MyContext { +//! fn visit<V: Visitor>(&self, visitor: V) -> V::Result { +//! let name = visitor.var_name().as_str(); +//! match name { +//! "name" => visitor.visit_string(self.name), +//! "id" => visitor.visit_string(self.id), +//! "tags" => visitor.visit_list().visit_items_and_finish(self.tags), +//! "children" => visitor +//! .visit_assoc() +//! .visit_entries_and_finish(self.children.iter().copied()), +//! _ => visitor.visit_undefined(), +//! } +//! } +//! } +//! ``` +// +// # Developers note +// +// Visitor types **should not** be cloneable in order to enforce just one +// visitor is used to visit a variable. If visitors are cloneable, it can make +// the wrong usage to be available, i.e. storing cloned visitors somewhere and +// using the wrong one. +// +// However, if visitors are made cloneable by any chance, it does not indicate +// the whole implementation will be broken. Users can only use the visitors +// through visitor traits (and their API do not allow cloning), so the logic +// would work as expected if the internal usage of the visitors are correct. +// Making visitors noncloneable is an optional safety guard (with no overhead). + +use core::fmt; +use core::ops::ControlFlow; + +pub use crate::template::components::VarName; + +/// A trait for types that can behave as a static URI template expansion context. +/// +/// This type is for use with [`UriTemplateStr::expand`] method. +/// +/// See [the module documentation][`crate::template`] for usage. +/// +/// [`UriTemplateStr::expand`]: `crate::template::UriTemplateStr::expand` +pub trait Context: Sized { + /// Visits a variable. + /// + /// To get variable name, use [`Visitor::var_name()`]. + #[must_use] + fn visit<V: Visitor>(&self, visitor: V) -> V::Result; +} + +/// A trait for types that can behave as a dynamic (mutable) URI template expansion context. +/// +/// This type is for use with [`UriTemplateStr::expand_dynamic`] method and its +/// family. +/// +/// Note that "dynamic" here does not mean that the value of variables can +/// change during a template expansion. The value should be fixed and consistent +/// during each expansion, but the context is allowed to mutate itself if it +/// does not break this rule. +/// +/// # Exmaples +/// +/// ``` +/// # #[cfg(feature = "alloc")] +/// # extern crate alloc; +/// # use iri_string::template::Error; +/// # #[cfg(feature = "alloc")] { +/// # use alloc::string::String; +/// use iri_string::template::UriTemplateStr; +/// use iri_string::template::context::{DynamicContext, Visitor, VisitPurpose}; +/// use iri_string::spec::UriSpec; +/// +/// struct MyContext<'a> { +/// /// Target path. +/// target: &'a str, +/// /// Username. +/// username: Option<&'a str>, +/// /// A flag to remember whether the URI template +/// /// attempted to use `username` variable. +/// username_visited: bool, +/// } +/// +/// impl DynamicContext for MyContext<'_> { +/// fn on_expansion_start(&mut self) { +/// // Reset the state. +/// self.username_visited = false; +/// } +/// fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result { +/// match visitor.var_name().as_str() { +/// "target" => visitor.visit_string(self.target), +/// "username" => { +/// if visitor.purpose() == VisitPurpose::Expand { +/// // The variable `username` is being used +/// // on the template expansion. +/// // Don't care whether `username` is defined or not. +/// self.username_visited = true; +/// } +/// if let Some(username) = &self.username { +/// visitor.visit_string(username) +/// } else { +/// visitor.visit_undefined() +/// } +/// } +/// _ => visitor.visit_undefined(), +/// } +/// } +/// } +/// +/// let mut context = MyContext { +/// target: "/posts/1", +/// username: Some("the_admin"), +/// username_visited: false, +/// }; +/// let mut buf = String::new(); +/// +/// // No access to the variable `username`. +/// let template1 = UriTemplateStr::new("{+target}")?; +/// template1.expand_dynamic::<UriSpec, _, _>(&mut buf, &mut context)?; +/// assert_eq!(buf, "/posts/1"); +/// assert!(!context.username_visited); +/// +/// buf.clear(); +/// // Will access to the variable `username`. +/// let template2 = UriTemplateStr::new("{+target}{?username}")?; +/// template2.expand_dynamic::<UriSpec, _, _>(&mut buf, &mut context)?; +/// assert_eq!(buf, "/posts/1?username=the_admin"); +/// assert!(context.username_visited); +/// +/// buf.clear(); +/// context.username = None; +/// // Will access to the variable `username` but it is undefined. +/// template2.expand_dynamic::<UriSpec, _, _>(&mut buf, &mut context)?; +/// assert_eq!(buf, "/posts/1"); +/// assert!( +/// context.username_visited, +/// "`MyContext` can know and remember whether `visit_dynamic()` is called +/// for `username`, even if its value is undefined" +/// ); +/// # } +/// # Ok::<_, Error>(()) +/// ``` +/// +/// [`UriTemplateStr::expand_dynamic`]: `crate::template::UriTemplateStr::expand_dynamic` +pub trait DynamicContext: Sized { + /// Visits a variable. + /// + /// To get variable name, use [`Visitor::var_name()`]. + /// + /// # Restriction + /// + /// The visit results should be consistent and unchanged between the last + /// time [`on_expansion_start`][`Self::on_expansion_start`] was called and + /// the next time [`on_expansion_end`][`Self::on_expansion_end`] will be + /// called. If this condition is violated, template expansion will produce + /// wrong result or may panic at worst. + #[must_use] + fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result; + + /// A callback that is called before the expansion of a URI template. + #[inline] + fn on_expansion_start(&mut self) {} + + /// A callback that is called after the expansion of a URI template. + #[inline] + fn on_expansion_end(&mut self) {} +} + +impl<C: Context> DynamicContext for C { + #[inline] + fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result { + self.visit(visitor) + } +} + +/// A purpose of a visit. +/// +/// This enum is nonexhaustive since this partially exposes the internal +/// implementation of the template expansion, and thus this is subject to +/// change. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum VisitPurpose { + /// A visit for type checking. + Typecheck, + /// A visit for template expansion to retrieve the value. + Expand, +} + +/// Variable visitor. +/// +/// See [the module documentation][self] for usage. +// NOTE (internal): Visitor types **should not** be cloneable. +pub trait Visitor: Sized + private::Sealed { + /// Result of the visit. + type Result; + /// List visitor. + type ListVisitor: ListVisitor<Result = Self::Result>; + /// Associative array visitor. + type AssocVisitor: AssocVisitor<Result = Self::Result>; + + /// Returns the name of the variable to visit. + #[must_use] + fn var_name(&self) -> VarName<'_>; + /// Returns the purpose of the visit. + /// + /// The template expansion algorithm checks the types for some variables + /// depending on its usage. To get the usage count correctly, you should + /// only count visits with [`VisitPurpose::Expand`]. + /// + /// If you need to know whether the variable is accessed and does not + /// need dynamic context generation or access counts, consider using + /// [`UriTemplateStr::variables`] method to iterate the variables in the + /// URI template. + /// + /// [`UriTemplateStr::variables`]: `crate::template::UriTemplateStr::variables` + #[must_use] + fn purpose(&self) -> VisitPurpose; + /// Visits an undefined variable, i.e. indicates that the requested variable is unavailable. + #[must_use] + fn visit_undefined(self) -> Self::Result; + /// Visits a string variable. + #[must_use] + fn visit_string<T: fmt::Display>(self, v: T) -> Self::Result; + /// Visits a list variable. + #[must_use] + fn visit_list(self) -> Self::ListVisitor; + /// Visits an associative array variable. + #[must_use] + fn visit_assoc(self) -> Self::AssocVisitor; +} + +/// List visitor. +/// +/// See [the module documentation][self] for usage. +// NOTE (internal): Visitor types **should not** be cloneable. +pub trait ListVisitor: Sized + private::Sealed { + /// Result of the visit. + type Result; + + /// Visits an item. + /// + /// If this returned `ControlFlow::Break(v)`, [`Context::visit`] should also + /// return this `v`. + /// + /// To feed multiple items at once, do + /// `items.into_iter().try_for_each(|item| self.visit_item(item))` for example. + #[must_use] + fn visit_item<T: fmt::Display>(&mut self, item: T) -> ControlFlow<Self::Result>; + /// Finishes visiting the list. + #[must_use] + fn finish(self) -> Self::Result; + + /// Visits items and finish. + #[must_use] + fn visit_items_and_finish<T, I>(mut self, items: I) -> Self::Result + where + T: fmt::Display, + I: IntoIterator<Item = T>, + { + match items.into_iter().try_for_each(|item| self.visit_item(item)) { + ControlFlow::Break(v) => v, + ControlFlow::Continue(()) => self.finish(), + } + } +} + +/// Associative array visitor. +/// +/// See [the module documentation][self] for usage. +// NOTE (internal): Visitor types **should not** be cloneable. +pub trait AssocVisitor: Sized + private::Sealed { + /// Result of the visit. + type Result; + + /// Visits an entry. + /// + /// If this returned `ControlFlow::Break(v)`, [`Context::visit`] should also + /// return this `v`. + /// + /// To feed multiple items at once, do + /// `entries.into_iter().try_for_each(|(key, value)| self.visit_entry(key, value))` + /// for example. + #[must_use] + fn visit_entry<K: fmt::Display, V: fmt::Display>( + &mut self, + key: K, + value: V, + ) -> ControlFlow<Self::Result>; + /// Finishes visiting the associative array. + #[must_use] + fn finish(self) -> Self::Result; + + /// Visits entries and finish. + #[must_use] + fn visit_entries_and_finish<K, V, I>(mut self, entries: I) -> Self::Result + where + K: fmt::Display, + V: fmt::Display, + I: IntoIterator<Item = (K, V)>, + { + match entries + .into_iter() + .try_for_each(|(key, value)| self.visit_entry(key, value)) + { + ControlFlow::Break(v) => v, + ControlFlow::Continue(()) => self.finish(), + } + } +} + +/// Private module to put the trait to seal. +pub(super) mod private { + /// A trait for visitor types of variables in a context. + pub trait Sealed {} +} diff --git a/vendor/iri-string/src/template/error.rs b/vendor/iri-string/src/template/error.rs new file mode 100644 index 00000000..f5206a4b --- /dev/null +++ b/vendor/iri-string/src/template/error.rs @@ -0,0 +1,154 @@ +//! Errors related to URI templates. + +use core::fmt; + +#[cfg(feature = "std")] +use std::error; + +/// Template construction and expansion error kind. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum ErrorKind { + /// Cannot write to the backend. + WriteFailed, + /// Expression is not closed. + ExpressionNotClosed, + /// Invalid character. + InvalidCharacter, + /// Invalid expression. + InvalidExpression, + /// Invalid percent-encoded triplets. + InvalidPercentEncoding, + /// Invalid UTF-8 bytes. + InvalidUtf8, + /// Unexpected value type for the variable. + UnexpectedValueType, + /// Unsupported operator, including operators reserved for future. + UnsupportedOperator, +} + +impl ErrorKind { + /// Returns the error message. + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::WriteFailed => "failed to write to the backend writer", + Self::ExpressionNotClosed => "expression not closed", + Self::InvalidCharacter => "invalid character", + Self::InvalidExpression => "invalid expression", + Self::InvalidPercentEncoding => "invalid percent-encoded triplets", + Self::InvalidUtf8 => "invalid utf-8 byte sequence", + Self::UnexpectedValueType => "unexpected value type for the variable", + Self::UnsupportedOperator => "unsupported operator", + } + } +} + +/// Template construction and expansion error. +/// +// Note that this type should implement `Copy` trait. +// To return additional non-`Copy` data as an error, use wrapper type +// (as `std::string::FromUtf8Error` contains `std::str::Utf8Error`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Error { + /// Error kind. + kind: ErrorKind, + /// Location (byte position of the error). + location: usize, +} + +impl Error { + /// Creates a new `Error`. + /// + /// For internal use. + #[inline] + #[must_use] + pub(super) fn new(kind: ErrorKind, location: usize) -> Self { + Self { kind, location } + } + + /// Returns the byte position the error is detected. + /// + /// NOTE: This is not a part of the public API since the value to be + /// returned (i.e., the definition of the "position" of an error) is not + /// guaranteed to be stable. + #[cfg(test)] + pub(super) fn location(&self) -> usize { + self.location + } +} + +impl fmt::Display for Error { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "invalid URI template: {} (at {}-th byte)", + self.kind.as_str(), + self.location + ) + } +} + +#[cfg(feature = "std")] +impl error::Error for Error {} + +/// Error on conversion into a URI template type. +// TODO: Unifiable to `types::CreationError`? +#[cfg(feature = "alloc")] +pub struct CreationError<T> { + /// Soruce data. + source: T, + /// Validation error. + error: Error, +} + +#[cfg(feature = "alloc")] +impl<T> CreationError<T> { + /// Returns the source data. + #[must_use] + pub fn into_source(self) -> T { + self.source + } + + /// Returns the validation error. + #[must_use] + pub fn validation_error(&self) -> Error { + self.error + } + + /// Creates a new `CreationError`. + #[must_use] + pub(crate) fn new(error: Error, source: T) -> Self { + Self { source, error } + } +} + +#[cfg(feature = "alloc")] +impl<T: fmt::Debug> fmt::Debug for CreationError<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CreationError") + .field("source", &self.source) + .field("error", &self.error) + .finish() + } +} + +#[cfg(feature = "alloc")] +impl<T: Clone> Clone for CreationError<T> { + fn clone(&self) -> Self { + Self { + source: self.source.clone(), + error: self.error, + } + } +} + +#[cfg(feature = "alloc")] +impl<T> fmt::Display for CreationError<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.error.fmt(f) + } +} + +#[cfg(feature = "std")] +impl<T: fmt::Debug> error::Error for CreationError<T> {} diff --git a/vendor/iri-string/src/template/expand.rs b/vendor/iri-string/src/template/expand.rs new file mode 100644 index 00000000..605043ab --- /dev/null +++ b/vendor/iri-string/src/template/expand.rs @@ -0,0 +1,1039 @@ +//! Expansion. + +use core::fmt::{self, Write as _}; +use core::marker::PhantomData; +use core::mem; +use core::ops::ControlFlow; + +#[cfg(feature = "alloc")] +use alloc::string::{String, ToString}; + +use crate::parser::str::{find_split, find_split_hole}; +use crate::parser::str::{process_percent_encoded_best_effort, PctEncodedFragments}; +use crate::percent_encode::PercentEncoded; +use crate::spec::Spec; +use crate::template::components::{ExprBody, Modifier, Operator, VarName, VarSpec}; +use crate::template::context::{ + private::Sealed as VisitorSealed, AssocVisitor, Context, DynamicContext, ListVisitor, + VisitPurpose, Visitor, +}; +use crate::template::error::{Error, ErrorKind}; +use crate::template::{UriTemplateStr, ValueType}; +#[cfg(feature = "alloc")] +use crate::types; + +/// A chunk in a template string. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum Chunk<'a> { + /// Literal. + Literal(&'a str), + /// Expression excluding the wrapping braces. + Expr(ExprBody<'a>), +} + +/// Iterator of template chunks. +#[derive(Debug, Clone)] +pub(super) struct Chunks<'a> { + /// Template. + template: &'a str, +} + +impl<'a> Chunks<'a> { + /// Creates a new iterator. + #[inline] + #[must_use] + pub(super) fn new(template: &'a UriTemplateStr) -> Self { + Self { + template: template.as_str(), + } + } +} + +impl<'a> Iterator for Chunks<'a> { + type Item = Chunk<'a>; + + fn next(&mut self) -> Option<Self::Item> { + if self.template.is_empty() { + return None; + } + match find_split(self.template, b'{') { + Some(("", _)) => { + let (expr_body, rest) = find_split_hole(&self.template[1..], b'}') + .expect("[validity] expression inside a template must be closed"); + self.template = rest; + Some(Chunk::Expr(ExprBody::new(expr_body))) + } + Some((lit, rest)) => { + self.template = rest; + Some(Chunk::Literal(lit)) + } + None => Some(Chunk::Literal(mem::take(&mut self.template))), + } + } +} + +/// Template expansion result. +#[derive(Debug, Clone, Copy)] +pub struct Expanded<'a, S, C> { + /// Compiled template. + template: &'a UriTemplateStr, + /// Context. + context: &'a C, + /// Spec. + _spec: PhantomData<fn() -> S>, +} + +impl<'a, S: Spec, C: Context> Expanded<'a, S, C> { + /// Creates a new `Expanded` object. + #[inline] + pub(super) fn new(template: &'a UriTemplateStr, context: &'a C) -> Result<Self, Error> { + Self::typecheck_context(template, context)?; + Ok(Self { + template, + context, + _spec: PhantomData, + }) + } + + /// Checks if the types of variables are allowed for the corresponding expressions in the template. + fn typecheck_context(template: &UriTemplateStr, context: &C) -> Result<(), Error> { + let mut pos = 0; + for chunk in Chunks::new(template) { + let (expr_len, (op, varlist)) = match chunk { + Chunk::Expr(expr_body) => (expr_body.as_str().len(), expr_body.decompose()), + Chunk::Literal(lit) => { + pos += lit.len(); + continue; + } + }; + // +2: wrapping braces (`{` and `}`). + let chunk_end_pos = pos + expr_len + 2; + // +1: opening brace `{`. + pos += op.len() + 1; + for (varspec_len, varspec) in varlist { + let ty = context.visit(TypeVisitor::new(varspec.name())); + let modifier = varspec.modifier(); + + if matches!(modifier, Modifier::MaxLen(_)) + && matches!(ty, ValueType::List | ValueType::Assoc) + { + // > Prefix modifiers are not applicable to variables that + // > have composite values. + // + // --- [RFC 6570 Section 2.4.1. Prefix](https://www.rfc-editor.org/rfc/rfc6570.html#section-2.4.1) + return Err(Error::new(ErrorKind::UnexpectedValueType, pos)); + } + + // +1: A trailing comman (`,`) or a closing brace (`}`). + pos += varspec_len + 1; + } + assert_eq!(pos, chunk_end_pos); + } + Ok(()) + } +} + +impl<S: Spec, C: Context> fmt::Display for Expanded<'_, S, C> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for chunk in Chunks::new(self.template) { + let expr = match chunk { + Chunk::Literal(lit) => { + f.write_str(lit)?; + continue; + } + Chunk::Expr(body) => body, + }; + expand::<S, _>(f, expr, self.context)?; + } + + Ok(()) + } +} + +/// Implement `TryFrom<Expanded<...>> for SomeUriStringType`. +macro_rules! impl_try_from_expanded { + ($ty_outer:ident) => { + #[cfg(feature = "alloc")] + impl<S: Spec, C: Context> TryFrom<Expanded<'_, S, C>> for types::$ty_outer<S> { + type Error = types::CreationError<String>; + + #[inline] + fn try_from(v: Expanded<'_, S, C>) -> Result<Self, Self::Error> { + Self::try_from(v.to_string()) + } + } + }; +} + +// Not implementing `TryFrom<Expand<...>>` for query and fragment strings +// since they cannot behave as a query or a fragment only by themselves. +// Query strings in practical starts with `?` prefix but `RiQueryStr{,ing}` +// strips that, and so do fragment strings (but `#` instead of `?`). +// Because of this, query and fragment string types won't be used to represent +// a relative IRIs without combining the prefix. +// +// In contrast, RFC 6570 URI Template expects that the users are constructing a +// "working" IRIs, including the necessary prefixes for syntax components. +// For example, fragment expansion `{#var}`, where `var` is "hello", expands to +// `#hello`, including the prefix `#`. This means that a URI template will be +// used to generate neither `RiQueryStr{,ing}` nor `RiFragmentStr{,ing}` strings. +impl_try_from_expanded!(RiAbsoluteString); +impl_try_from_expanded!(RiReferenceString); +impl_try_from_expanded!(RiRelativeString); +impl_try_from_expanded!(RiString); + +/// Expands the whole template with the dynamic context. +pub(super) fn expand_whole_dynamic<S: Spec, W: fmt::Write, C: DynamicContext>( + template: &UriTemplateStr, + writer: &mut W, + context: &mut C, +) -> Result<(), Error> { + context.on_expansion_start(); + let result = expand_whole_dynamic_impl::<S, W, C>(template, writer, context); + context.on_expansion_end(); + result +} + +/// Expands the whole template with the dynamic context. +/// +/// Note that the caller is responsible to set up or finalize the `context`. +fn expand_whole_dynamic_impl<S: Spec, W: fmt::Write, C: DynamicContext>( + template: &UriTemplateStr, + writer: &mut W, + context: &mut C, +) -> Result<(), Error> { + let mut pos = 0; + for chunk in Chunks::new(template) { + let expr = match chunk { + Chunk::Literal(lit) => { + writer + .write_str(lit) + .map_err(|_| Error::new(ErrorKind::WriteFailed, pos))?; + pos += lit.len(); + continue; + } + Chunk::Expr(body) => body, + }; + expand_expr_mut::<S, _, _>(writer, &mut pos, expr, context)?; + } + + Ok(()) +} + +/// Expands the expression using the given operator and the dynamic context. +fn expand_expr_mut<S: Spec, W: fmt::Write, C: DynamicContext>( + writer: &mut W, + pos: &mut usize, + expr: ExprBody<'_>, + context: &mut C, +) -> Result<(), Error> { + let (op, varlist) = expr.decompose(); + + let mut is_first_varspec = true; + // +2: wrapping braces (`{` and `}`). + let chunk_end_pos = *pos + expr.as_str().len() + 2; + // +1: opening brace `{`. + *pos += op.len() + 1; + for (varspec_len, varspec) in varlist { + // Check the type before the actual expansion. + let ty = context.visit_dynamic(TypeVisitor::new(varspec.name())); + let modifier = varspec.modifier(); + + if matches!(modifier, Modifier::MaxLen(_)) + && matches!(ty, ValueType::List | ValueType::Assoc) + { + // > Prefix modifiers are not applicable to variables that + // > have composite values. + // + // --- [RFC 6570 Section 2.4.1. Prefix](https://www.rfc-editor.org/rfc/rfc6570.html#section-2.4.1) + return Err(Error::new(ErrorKind::UnexpectedValueType, *pos)); + } + + // Typecheck passed. Expand. + let visitor = ValueVisitor::<S, _>::new(writer, varspec, op, &mut is_first_varspec); + let token = context + .visit_dynamic(visitor) + .map_err(|_| Error::new(ErrorKind::WriteFailed, *pos))?; + let writer_ptr = token.writer_ptr(); + if writer_ptr != writer as *mut _ { + // Invalid `VisitDoneToken` was returned. This cannot usually happen + // without intentional unnatural usage. + panic!("invalid `VisitDoneToken` was returned"); + } + + // +1: A trailing comman (`,`) or a closing brace (`}`). + *pos += varspec_len + 1; + } + assert_eq!(*pos, chunk_end_pos); + + Ok(()) +} + +/// Properties of an operator. +/// +/// See [RFC 6570 Appendix A](https://www.rfc-editor.org/rfc/rfc6570#appendix-A). +#[derive(Debug, Clone, Copy)] +struct OpProps { + /// Prefix for the first element. + first: &'static str, + /// Separator. + sep: &'static str, + /// Whether or not the expansion includes the variable or key name. + named: bool, + /// Result string if the variable is empty. + ifemp: &'static str, + /// Whether or not the reserved values can be written without being encoded. + allow_reserved: bool, +} + +impl OpProps { + /// Properties for all known operators. + const PROPS: [Self; 8] = [ + // String + Self { + first: "", + sep: ",", + named: false, + ifemp: "", + allow_reserved: false, + }, + // Reserved + Self { + first: "", + sep: ",", + named: false, + ifemp: "", + allow_reserved: true, + }, + // Fragment + Self { + first: "#", + sep: ",", + named: false, + ifemp: "", + allow_reserved: true, + }, + // Label + Self { + first: ".", + sep: ".", + named: false, + ifemp: "", + allow_reserved: false, + }, + // PathSegments + Self { + first: "/", + sep: "/", + named: false, + ifemp: "", + allow_reserved: false, + }, + // PathParams + Self { + first: ";", + sep: ";", + named: true, + ifemp: "", + allow_reserved: false, + }, + // FormQuery + Self { + first: "?", + sep: "&", + named: true, + ifemp: "=", + allow_reserved: false, + }, + // FormQueryCont + Self { + first: "&", + sep: "&", + named: true, + ifemp: "=", + allow_reserved: false, + }, + ]; + + /// Returns the properties for the operator. + #[must_use] + #[inline] + pub(super) fn from_op(op: Operator) -> &'static Self { + let index = match op { + Operator::String => 0, + Operator::Reserved => 1, + Operator::Fragment => 2, + Operator::Label => 3, + Operator::PathSegments => 4, + Operator::PathParams => 5, + Operator::FormQuery => 6, + Operator::FormQueryCont => 7, + }; + &Self::PROPS[index] + } +} + +/// Expands the expression using the given operator. +fn expand<S: Spec, C: Context>( + f: &mut fmt::Formatter<'_>, + expr: ExprBody<'_>, + context: &C, +) -> fmt::Result { + let (op, varlist) = expr.decompose(); + + let mut is_first_varspec = true; + for (_varspec_len, varspec) in varlist { + let visitor = ValueVisitor::<S, _>::new(f, varspec, op, &mut is_first_varspec); + let token = context.visit(visitor)?; + let writer_ptr = token.writer_ptr(); + if writer_ptr != f as *mut _ { + // Invalid `VisitDoneToken` was returned. This cannot usually happen + // without intentional unnatural usage. + panic!("invalid `VisitDoneToken` was returned"); + } + } + + Ok(()) +} + +/// Escapes the given value and writes it. +#[inline] +fn escape_write<S: Spec, T: fmt::Display, W: fmt::Write>( + f: &mut W, + v: T, + allow_reserved: bool, +) -> fmt::Result { + if allow_reserved { + let result = process_percent_encoded_best_effort(v, |frag| { + let result = match frag { + PctEncodedFragments::Char(s, _) => f.write_str(s), + PctEncodedFragments::NoPctStr(s) => { + write!(f, "{}", PercentEncoded::<_, S>::characters(s)) + } + PctEncodedFragments::StrayPercent => f.write_str("%25"), + PctEncodedFragments::InvalidUtf8PctTriplets(s) => f.write_str(s), + }; + if result.is_err() { + return ControlFlow::Break(result); + } + ControlFlow::Continue(()) + }); + match result { + Ok(ControlFlow::Break(Ok(_)) | ControlFlow::Continue(_)) => Ok(()), + Ok(ControlFlow::Break(Err(e))) | Err(e) => Err(e), + } + } else { + /// Writer that escapes the unreserved characters and writes them. + struct UnreservePercentEncodeWriter<'a, S, W> { + /// Inner writer. + writer: &'a mut W, + /// Spec. + _spec: PhantomData<fn() -> S>, + } + impl<S: Spec, W: fmt::Write> fmt::Write for UnreservePercentEncodeWriter<'_, S, W> { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + write!(self.writer, "{}", PercentEncoded::<_, S>::unreserve(s)) + } + } + let mut writer = UnreservePercentEncodeWriter::<S, W> { + writer: f, + _spec: PhantomData, + }; + write!(writer, "{v}") + } +} + +/// Truncates the given value as a string, escapes the value, and writes it. +fn escape_write_with_maxlen<S: Spec, T: fmt::Display, W: fmt::Write>( + writer: &mut PrefixOnceWriter<'_, W>, + v: T, + allow_reserved: bool, + max_len: Option<u16>, +) -> fmt::Result { + if allow_reserved { + let mut max_len = max_len.map_or(usize::MAX, usize::from); + let result = process_percent_encoded_best_effort(v, |frag| { + if max_len == 0 { + return ControlFlow::Break(Ok(())); + } + let result = + match frag { + PctEncodedFragments::Char(s, _) => { + max_len -= 1; + writer.write_str(s) + } + PctEncodedFragments::NoPctStr(s) => { + let mut chars = s.char_indices(); + let count = + chars.by_ref().take(max_len).last().map(|(i, _)| i).expect( + "[consistency] decomposed string fragment must not be empty", + ); + let sub_len = s.len() - chars.as_str().len(); + max_len -= count; + write!( + writer, + "{}", + PercentEncoded::<_, S>::characters(&s[..sub_len]) + ) + } + PctEncodedFragments::StrayPercent => { + max_len -= 1; + writer.write_str("%25") + } + PctEncodedFragments::InvalidUtf8PctTriplets(s) => { + let count = max_len.min(s.len() / 3); + let sub_len = count * 3; + max_len -= count; + writer.write_str(&s[..sub_len]) + } + }; + if result.is_err() { + return ControlFlow::Break(result); + } + ControlFlow::Continue(()) + }); + match result { + Ok(ControlFlow::Break(Ok(_)) | ControlFlow::Continue(_)) => Ok(()), + Ok(ControlFlow::Break(Err(e))) | Err(e) => Err(e), + } + } else { + match max_len { + Some(max_len) => { + let mut writer = TruncatePercentEncodeWriter::<S, _> { + inner: writer, + rest_num_chars: usize::from(max_len), + _spec: PhantomData, + }; + write!(writer, "{v}") + } + None => write!(writer, "{}", PercentEncoded::<_, S>::unreserve(v)), + } + } +} + +/// A writer that truncates the input to the given length and writes to the backend. +struct TruncatePercentEncodeWriter<'a, S, W> { + /// Inner writer. + inner: &'a mut W, + /// Maximum number of characters to be written. + rest_num_chars: usize, + /// Spec. + _spec: PhantomData<fn() -> S>, +} + +impl<S: Spec, W: fmt::Write> fmt::Write for TruncatePercentEncodeWriter<'_, S, W> { + fn write_str(&mut self, s: &str) -> fmt::Result { + if self.rest_num_chars == 0 { + return Ok(()); + } + let mut chars = s.char_indices(); + let skip_count = chars + .by_ref() + .take(self.rest_num_chars) + .last() + .map_or(0, |(i, _)| i + 1); + let len = s.len() - chars.as_str().len(); + let truncated = &s[..len]; + write!( + self.inner, + "{}", + PercentEncoded::<_, S>::unreserve(truncated) + )?; + self.rest_num_chars -= skip_count; + Ok(()) + } +} + +/// A writer that writes a prefix only once if and only if some value is written. +struct PrefixOnceWriter<'a, W> { + /// Inner writer. + inner: &'a mut W, + /// Prefix to write. + prefix: Option<&'a str>, +} + +impl<'a, W: fmt::Write> PrefixOnceWriter<'a, W> { + /// Creates a new writer with no prefix. + #[inline] + #[must_use] + fn new(inner: &'a mut W) -> Self { + Self { + inner, + prefix: None, + } + } + + /// Creates a new writer with a prefix. + #[inline] + #[must_use] + fn with_prefix(inner: &'a mut W, prefix: &'a str) -> Self { + Self { + inner, + prefix: Some(prefix), + } + } + + /// Returns true if the writer have not yet written the prefix. + #[inline] + #[must_use] + fn has_unwritten_prefix(&self) -> bool { + self.prefix.is_some() + } +} + +impl<W: fmt::Write> fmt::Write for PrefixOnceWriter<'_, W> { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + if let Some(prefix) = self.prefix.take() { + self.inner.write_str(prefix)?; + } + self.inner.write_str(s) + } +} + +/// An opaque token value that proves some variable is visited. +// This should not be able to be created by any means other than `VarVisitor::visit_foo()`. +// Do not derive any traits that allows the value to be generated or cloned. +struct VisitDoneToken<'a, S, W>(ValueVisitor<'a, S, W>); + +impl<'a, S: Spec, W: fmt::Write> VisitDoneToken<'a, S, W> { + /// Creates a new token. + #[inline] + #[must_use] + fn new(visitor: ValueVisitor<'a, S, W>) -> Self { + Self(visitor) + } + + /// Returns the raw pointer to the backend formatter. + #[inline] + #[must_use] + fn writer_ptr(&self) -> *const W { + self.0.writer_ptr() + } +} + +impl<S: Spec, W: fmt::Write> fmt::Debug for VisitDoneToken<'_, S, W> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("VisitDoneToken") + } +} + +/// Visitor to retrieve a variable value. +// Single `ValueVisitor` should be used for single expansion. +// Do not derive any traits that allows the value to be generated or cloned. +struct ValueVisitor<'a, S, W> { + /// Formatter. + writer: &'a mut W, + /// Varspec. + varspec: VarSpec<'a>, + /// Operator. + op: Operator, + /// Whether the variable to visit is the first one in an expression. + is_first_varspec: &'a mut bool, + /// Spec. + _spec: PhantomData<fn() -> S>, +} + +impl<'a, S: Spec, W: fmt::Write> ValueVisitor<'a, S, W> { + /// Creates a visitor. + #[inline] + #[must_use] + fn new( + f: &'a mut W, + varspec: VarSpec<'a>, + op: Operator, + is_first_varspec: &'a mut bool, + ) -> Self { + Self { + writer: f, + varspec, + op, + is_first_varspec, + _spec: PhantomData, + } + } + + /// Returns the raw pointer to the backend formatter. + #[inline] + #[must_use] + fn writer_ptr(&self) -> *const W { + self.writer as &_ as *const _ + } +} + +impl<S: Spec, W: fmt::Write> VisitorSealed for ValueVisitor<'_, S, W> {} + +impl<'a, S: Spec, W: fmt::Write> Visitor for ValueVisitor<'a, S, W> { + type Result = Result<VisitDoneToken<'a, S, W>, fmt::Error>; + type ListVisitor = ListValueVisitor<'a, S, W>; + type AssocVisitor = AssocValueVisitor<'a, S, W>; + + /// Returns the name of the variable to visit. + #[inline] + #[must_use] + fn var_name(&self) -> VarName<'a> { + self.varspec.name() + } + + #[inline] + fn purpose(&self) -> VisitPurpose { + VisitPurpose::Expand + } + + /// Visits an undefined variable, i.e. indicates that the requested variable is unavailable. + #[inline] + fn visit_undefined(self) -> Self::Result { + Ok(VisitDoneToken::new(self)) + } + + /// Visits a string variable. + #[inline] + fn visit_string<T: fmt::Display>(self, v: T) -> Self::Result { + let oppr = OpProps::from_op(self.op); + + if mem::replace(self.is_first_varspec, false) { + self.writer.write_str(oppr.first)?; + } else { + self.writer.write_str(oppr.sep)?; + } + let mut writer = if oppr.named { + self.writer.write_str(self.varspec.name().as_str())?; + PrefixOnceWriter::with_prefix(self.writer, "=") + } else { + PrefixOnceWriter::new(self.writer) + }; + + let max_len = match self.varspec.modifier() { + Modifier::None | Modifier::Explode => None, + Modifier::MaxLen(max_len) => Some(max_len), + }; + escape_write_with_maxlen::<S, T, W>(&mut writer, v, oppr.allow_reserved, max_len)?; + if writer.has_unwritten_prefix() { + self.writer.write_str(oppr.ifemp)?; + } + Ok(VisitDoneToken::new(self)) + } + + /// Visits a list variable. + #[inline] + #[must_use] + fn visit_list(self) -> Self::ListVisitor { + let oppr = OpProps::from_op(self.op); + ListValueVisitor { + visitor: self, + num_elems: 0, + oppr, + } + } + + /// Visits an associative array variable. + #[inline] + #[must_use] + fn visit_assoc(self) -> Self::AssocVisitor { + let oppr = OpProps::from_op(self.op); + AssocValueVisitor { + visitor: self, + num_elems: 0, + oppr, + } + } +} + +/// Visitor to retrieve value of a list variable. +// RFC 6570 section 2.3: +// +// > A variable defined as a list value is considered undefined if the +// > list contains zero members. A variable defined as an associative +// > array of (name, value) pairs is considered undefined if the array +// > contains zero members or if all member names in the array are +// > associated with undefined values. +// +// Single variable visitor should be used for single expansion. +// Do not derive any traits that allows the value to be generated or cloned. +struct ListValueVisitor<'a, S, W> { + /// Visitor. + visitor: ValueVisitor<'a, S, W>, + /// Number of already emitted elements. + num_elems: usize, + /// Operator props. + oppr: &'static OpProps, +} + +impl<S: Spec, W: fmt::Write> ListValueVisitor<'_, S, W> { + /// Visits an item. + fn visit_item_impl<T: fmt::Display>(&mut self, item: T) -> fmt::Result { + let modifier = self.visitor.varspec.modifier(); + let is_explode = match modifier { + Modifier::MaxLen(_) => panic!( + "value type changed since `UriTemplateStr::expand()`: \ + prefix modifier is not applicable to a list" + ), + Modifier::None => false, + Modifier::Explode => true, + }; + + // Write prefix for each variable. + if self.num_elems == 0 { + if mem::replace(self.visitor.is_first_varspec, false) { + self.visitor.writer.write_str(self.oppr.first)?; + } else { + self.visitor.writer.write_str(self.oppr.sep)?; + } + if self.oppr.named { + self.visitor + .writer + .write_str(self.visitor.varspec.name().as_str())?; + self.visitor.writer.write_char('=')?; + } + } else { + // Write prefix for the non-first item. + match (self.oppr.named, is_explode) { + (_, false) => self.visitor.writer.write_char(',')?, + (false, true) => self.visitor.writer.write_str(self.oppr.sep)?, + (true, true) => { + self.visitor.writer.write_str(self.oppr.sep)?; + escape_write::<S, _, _>( + self.visitor.writer, + self.visitor.varspec.name().as_str(), + self.oppr.allow_reserved, + )?; + self.visitor.writer.write_char('=')?; + } + } + } + + escape_write::<S, _, _>(self.visitor.writer, item, self.oppr.allow_reserved)?; + + self.num_elems += 1; + Ok(()) + } +} + +impl<S: Spec, W: fmt::Write> VisitorSealed for ListValueVisitor<'_, S, W> {} + +impl<'a, S: Spec, W: fmt::Write> ListVisitor for ListValueVisitor<'a, S, W> { + type Result = Result<VisitDoneToken<'a, S, W>, fmt::Error>; + + /// Visits an item. + #[inline] + fn visit_item<T: fmt::Display>(&mut self, item: T) -> ControlFlow<Self::Result> { + match self.visit_item_impl(item) { + Ok(_) => ControlFlow::Continue(()), + Err(e) => ControlFlow::Break(Err(e)), + } + } + + /// Finishes visiting the list. + #[inline] + fn finish(self) -> Self::Result { + Ok(VisitDoneToken::new(self.visitor)) + } +} + +/// Visitor to retrieve entries of an associative array variable. +// RFC 6570 section 2.3: +// +// > A variable defined as a list value is considered undefined if the +// > list contains zero members. A variable defined as an associative +// > array of (name, value) pairs is considered undefined if the array +// > contains zero members or if all member names in the array are +// > associated with undefined values. +// +// Single variable visitor should be used for single expansion. +// Do not derive any traits that allows the value to be generated or cloned. +struct AssocValueVisitor<'a, S, W> { + /// Visitor. + visitor: ValueVisitor<'a, S, W>, + /// Number of already emitted elements. + num_elems: usize, + /// Operator props. + oppr: &'static OpProps, +} + +impl<S: Spec, W: fmt::Write> AssocValueVisitor<'_, S, W> { + /// Visits an entry. + fn visit_entry_impl<K: fmt::Display, V: fmt::Display>( + &mut self, + key: K, + value: V, + ) -> fmt::Result { + let modifier = self.visitor.varspec.modifier(); + let is_explode = match modifier { + Modifier::MaxLen(_) => panic!( + "value type changed since `UriTemplateStr::expand()`: \ + prefix modifier is not applicable to an associative array" + ), + Modifier::None => false, + Modifier::Explode => true, + }; + + // Write prefix for each variable. + if self.num_elems == 0 { + if mem::replace(self.visitor.is_first_varspec, false) { + self.visitor.writer.write_str(self.oppr.first)?; + } else { + self.visitor.writer.write_str(self.oppr.sep)?; + } + if is_explode { + escape_write::<S, _, _>(self.visitor.writer, key, self.oppr.allow_reserved)?; + self.visitor.writer.write_char('=')?; + } else { + if self.oppr.named { + escape_write::<S, _, _>( + self.visitor.writer, + self.visitor.varspec.name().as_str(), + self.oppr.allow_reserved, + )?; + self.visitor.writer.write_char('=')?; + } + escape_write::<S, _, _>(self.visitor.writer, key, self.oppr.allow_reserved)?; + self.visitor.writer.write_char(',')?; + } + } else { + // Write prefix for the non-first item. + match (self.oppr.named, is_explode) { + (_, false) => { + self.visitor.writer.write_char(',')?; + escape_write::<S, _, _>(self.visitor.writer, key, self.oppr.allow_reserved)?; + self.visitor.writer.write_char(',')?; + } + (false, true) => { + self.visitor.writer.write_str(self.oppr.sep)?; + escape_write::<S, _, _>(self.visitor.writer, key, self.oppr.allow_reserved)?; + self.visitor.writer.write_char('=')?; + } + (true, true) => { + self.visitor.writer.write_str(self.oppr.sep)?; + escape_write::<S, _, _>(self.visitor.writer, key, self.oppr.allow_reserved)?; + self.visitor.writer.write_char('=')?; + } + } + } + + escape_write::<S, _, _>(self.visitor.writer, value, self.oppr.allow_reserved)?; + + self.num_elems += 1; + Ok(()) + } +} + +impl<S: Spec, W: fmt::Write> VisitorSealed for AssocValueVisitor<'_, S, W> {} + +impl<'a, S: Spec, W: fmt::Write> AssocVisitor for AssocValueVisitor<'a, S, W> { + type Result = Result<VisitDoneToken<'a, S, W>, fmt::Error>; + + /// Visits an entry. + #[inline] + fn visit_entry<K: fmt::Display, V: fmt::Display>( + &mut self, + key: K, + value: V, + ) -> ControlFlow<Self::Result> { + match self.visit_entry_impl(key, value) { + Ok(_) => ControlFlow::Continue(()), + Err(e) => ControlFlow::Break(Err(e)), + } + } + + /// Finishes visiting the associative array. + #[inline] + fn finish(self) -> Self::Result { + Ok(VisitDoneToken::new(self.visitor)) + } +} + +/// Visitor to retrieve effective type of a variable. +struct TypeVisitor<'a> { + /// Variable name. + var_name: VarName<'a>, +} + +impl<'a> TypeVisitor<'a> { + /// Creates a new type visitor. + #[inline] + #[must_use] + fn new(var_name: VarName<'a>) -> Self { + Self { var_name } + } +} + +impl VisitorSealed for TypeVisitor<'_> {} + +impl<'a> Visitor for TypeVisitor<'a> { + type Result = ValueType; + type ListVisitor = ListTypeVisitor; + type AssocVisitor = AssocTypeVisitor; + + #[inline] + fn var_name(&self) -> VarName<'a> { + self.var_name + } + #[inline] + fn purpose(&self) -> VisitPurpose { + VisitPurpose::Typecheck + } + #[inline] + fn visit_undefined(self) -> Self::Result { + ValueType::undefined() + } + #[inline] + fn visit_string<T: fmt::Display>(self, _: T) -> Self::Result { + ValueType::string() + } + #[inline] + fn visit_list(self) -> Self::ListVisitor { + ListTypeVisitor + } + #[inline] + fn visit_assoc(self) -> Self::AssocVisitor { + AssocTypeVisitor + } +} + +/// Visitor to retrieve effective type of a list variable. +struct ListTypeVisitor; + +impl VisitorSealed for ListTypeVisitor {} + +impl ListVisitor for ListTypeVisitor { + type Result = ValueType; + + /// Visits an item. + #[inline] + fn visit_item<T: fmt::Display>(&mut self, _item: T) -> ControlFlow<Self::Result> { + ControlFlow::Break(ValueType::nonempty_list()) + } + + /// Finishes visiting the list. + #[inline] + fn finish(self) -> Self::Result { + ValueType::empty_list() + } +} + +/// Visitor to retrieve effective type of an associative array variable. +struct AssocTypeVisitor; + +impl VisitorSealed for AssocTypeVisitor {} + +impl AssocVisitor for AssocTypeVisitor { + type Result = ValueType; + + /// Visits an item. + #[inline] + fn visit_entry<K: fmt::Display, V: fmt::Display>( + &mut self, + _key: K, + _value: V, + ) -> ControlFlow<Self::Result> { + ControlFlow::Break(ValueType::nonempty_assoc()) + } + + /// Finishes visiting the list. + #[inline] + fn finish(self) -> Self::Result { + ValueType::empty_assoc() + } +} diff --git a/vendor/iri-string/src/template/parser.rs b/vendor/iri-string/src/template/parser.rs new file mode 100644 index 00000000..6d5443a8 --- /dev/null +++ b/vendor/iri-string/src/template/parser.rs @@ -0,0 +1,6 @@ +//! URI Template parser. + +pub(super) mod char; +pub(super) mod validate; + +pub(super) use self::validate::validate_template_str; diff --git a/vendor/iri-string/src/template/parser/char.rs b/vendor/iri-string/src/template/parser/char.rs new file mode 100644 index 00000000..9ad4a6d8 --- /dev/null +++ b/vendor/iri-string/src/template/parser/char.rs @@ -0,0 +1,190 @@ +//! Characters. + +/// Properties of ASCII characters. +/// +/// About `'` (single quote) being considered as a literal: see +/// [Errata ID 6937](https://www.rfc-editor.org/errata/eid6937). +const CHARS_TABLE: [u8; 128] = [ + 0b_0000_0000, // NUL + 0b_0000_0000, // SOH + 0b_0000_0000, // STX + 0b_0000_0000, // ETX + 0b_0000_0000, // EOT + 0b_0000_0000, // ENQ + 0b_0000_0000, // ACK + 0b_0000_0000, // BEL + 0b_0000_0000, // BS + 0b_0000_0000, // HT + 0b_0000_0000, // LF + 0b_0000_0000, // VT + 0b_0000_0000, // FF + 0b_0000_0000, // CR + 0b_0000_0000, // SO + 0b_0000_0000, // SI + 0b_0000_0000, // DLE + 0b_0000_0000, // DC1 + 0b_0000_0000, // DC2 + 0b_0000_0000, // DC3 + 0b_0000_0000, // DC4 + 0b_0000_0000, // NAK + 0b_0000_0000, // SYN + 0b_0000_0000, // ETB + 0b_0000_0000, // CAN + 0b_0000_0000, // EM + 0b_0000_0000, // SUB + 0b_0000_0000, // ESC + 0b_0000_0000, // FS + 0b_0000_0000, // GS + 0b_0000_0000, // RS + 0b_0000_0000, // US + 0b_0000_0000, // SPACE + 0b_0000_0001, // ! + 0b_0000_0000, // " + 0b_0000_0001, // # + 0b_0000_0001, // $ + 0b_0000_0000, // % + 0b_0000_0001, // & + 0b_0000_0001, // ' + 0b_0000_0001, // ( + 0b_0000_0001, // ) + 0b_0000_0001, // * + 0b_0000_0001, // + + 0b_0000_0001, // , + 0b_0000_0001, // - + 0b_0000_0101, // . + 0b_0000_0001, // / + 0b_0000_0111, // 0 + 0b_0000_0111, // 1 + 0b_0000_0111, // 2 + 0b_0000_0111, // 3 + 0b_0000_0111, // 4 + 0b_0000_0111, // 5 + 0b_0000_0111, // 6 + 0b_0000_0111, // 7 + 0b_0000_0111, // 8 + 0b_0000_0111, // 9 + 0b_0000_0001, // : + 0b_0000_0001, // ; + 0b_0000_0000, // < + 0b_0000_0001, // = + 0b_0000_0000, // > + 0b_0000_0001, // ? + 0b_0000_0001, // @ + 0b_0000_0111, // A + 0b_0000_0111, // B + 0b_0000_0111, // C + 0b_0000_0111, // D + 0b_0000_0111, // E + 0b_0000_0111, // F + 0b_0000_0111, // G + 0b_0000_0111, // H + 0b_0000_0111, // I + 0b_0000_0111, // J + 0b_0000_0111, // K + 0b_0000_0111, // L + 0b_0000_0111, // M + 0b_0000_0111, // N + 0b_0000_0111, // O + 0b_0000_0111, // P + 0b_0000_0111, // Q + 0b_0000_0111, // R + 0b_0000_0111, // S + 0b_0000_0111, // T + 0b_0000_0111, // U + 0b_0000_0111, // V + 0b_0000_0111, // W + 0b_0000_0111, // X + 0b_0000_0111, // Y + 0b_0000_0111, // Z + 0b_0000_0001, // [ + 0b_0000_0000, // \ + 0b_0000_0001, // ] + 0b_0000_0000, // ^ + 0b_0000_0111, // _ + 0b_0000_0000, // ` + 0b_0000_0111, // a + 0b_0000_0111, // b + 0b_0000_0111, // c + 0b_0000_0111, // d + 0b_0000_0111, // e + 0b_0000_0111, // f + 0b_0000_0111, // g + 0b_0000_0111, // h + 0b_0000_0111, // i + 0b_0000_0111, // j + 0b_0000_0111, // k + 0b_0000_0111, // l + 0b_0000_0111, // m + 0b_0000_0111, // n + 0b_0000_0111, // o + 0b_0000_0111, // p + 0b_0000_0111, // q + 0b_0000_0111, // r + 0b_0000_0111, // s + 0b_0000_0111, // t + 0b_0000_0111, // u + 0b_0000_0111, // v + 0b_0000_0111, // w + 0b_0000_0111, // x + 0b_0000_0111, // y + 0b_0000_0111, // z + 0b_0000_0000, // { + 0b_0000_0000, // | + 0b_0000_0000, // } + 0b_0000_0001, // ~ + 0b_0000_0000, // DEL +]; + +/// A mask to test whether the character matches `literals` rule defined in [RFC 6570]. +/// +/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.1 +const CHARS_TABLE_MASK_LITERAL: u8 = 1 << 0; + +/// A mask to test whether the character matches `varchar` rule defined in [RFC 6570]. +/// +/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.3 +const CHARS_TABLE_MASK_VARCHAR_START: u8 = 1 << 1; + +/// A mask to test whether the character matches `varchar` rule defined in [RFC 6570] or a period. +/// +/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.3 +const CHARS_TABLE_MASK_VARCHAR_CONTINUE: u8 = 1 << 2; + +/// Returns true if the given ASCII character is allowed in a literal string. +/// +/// # Precondition +/// +/// The given byte should be an ASCII character, i.e. should be less than 128. +#[inline] +#[must_use] +pub(super) const fn is_ascii_literal_char(c: u8) -> bool { + (CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_LITERAL) != 0 +} + +/// Returns true if the given ASCII character is allowed as the beginning of the `varname`. +/// +/// Note that this does not return true for `%` character. It is caller's +/// responsibility to test validity of percent-encoded triplets. +/// +/// # Precondition +/// +/// The given byte should be an ASCII character, i.e. should be less than 128. +#[inline] +#[must_use] +pub(super) const fn is_ascii_varchar_start(c: u8) -> bool { + (CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_VARCHAR_START) != 0 +} + +/// Returns true if the given ASCII character is allowed as the non-beginning of the `varname`. +/// +/// Note that this does not return true for `%` character. It is caller's +/// responsibility to test validity of percent-encoded triplets. +/// +/// # Precondition +/// +/// The given byte should be an ASCII character, i.e. should be less than 128. +#[inline] +#[must_use] +pub(super) const fn is_ascii_varchar_continue(c: u8) -> bool { + (CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_VARCHAR_CONTINUE) != 0 +} diff --git a/vendor/iri-string/src/template/parser/validate.rs b/vendor/iri-string/src/template/parser/validate.rs new file mode 100644 index 00000000..67ab6c01 --- /dev/null +++ b/vendor/iri-string/src/template/parser/validate.rs @@ -0,0 +1,161 @@ +//! Validating parsers. + +use crate::parser::str::{ + find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded, starts_with_double_hexdigits, +}; +use crate::template::components::MaybeOperator; +use crate::template::error::{Error, ErrorKind}; + +use crate::template::parser::char::{ + is_ascii_literal_char, is_ascii_varchar_continue, is_ascii_varchar_start, +}; + +/// Returns `Ok(())` if the given string is a valid literal. +fn validate_literal(s: &str, offset: usize) -> Result<(), Error> { + match s + .chars() + .position(|c| !c.is_ascii() || !is_ascii_literal_char(c as u8)) + { + Some(pos) => Err(Error::new(ErrorKind::InvalidCharacter, offset + pos)), + None => Ok(()), + } +} + +/// Returns `Ok(())` if the given string is a valid varspec. +fn validate_varspec(s: &str, offset: usize) -> Result<(), Error> { + match find_split2_hole(s, b':', b'*') { + Some((maybe_varname, b':', maybe_len)) => { + validate_varname(maybe_varname, offset)?; + if !(1..=5).contains(&maybe_len.len()) { + return Err(Error::new( + ErrorKind::InvalidExpression, + offset + maybe_varname.len() + 2, + )); + } + if let Some(pos) = maybe_len.bytes().position(|b| !b.is_ascii_digit()) { + return Err(Error::new( + ErrorKind::InvalidExpression, + offset + maybe_varname.len() + 2 + pos, + )); + } + } + Some((maybe_varname, b'*', extra)) => { + validate_varname(maybe_varname, offset)?; + if !extra.is_empty() { + return Err(Error::new( + ErrorKind::InvalidExpression, + offset + maybe_varname.len() + 1, + )); + } + } + Some((_, sep, _)) => unreachable!("[consistency] the byte {sep:#02x} is not searched"), + None => validate_varname(s, offset)?, + } + Ok(()) +} + +/// Returns `Ok(())` if the given string is a valid varname. +pub(crate) fn validate_varname(s: &str, offset: usize) -> Result<(), Error> { + let rest = match s.as_bytes().first() { + Some(b'%') if starts_with_double_hexdigits(&s.as_bytes()[1..]) => &s[3..], + Some(b) if b.is_ascii() && is_ascii_varchar_start(*b) => &s[1..], + _ => return Err(Error::new(ErrorKind::InvalidExpression, offset)), + }; + let is_valid = satisfy_chars_with_pct_encoded(rest, is_ascii_varchar_continue, |_| false); + if !is_valid { + return Err(Error::new(ErrorKind::InvalidExpression, offset)); + } + Ok(()) +} + +/// Returns `Ok(())` if the given string is a valid expression. +/// +/// "Expression" here is the expression body inside `{` and `}`, but not including braces. +fn validate_expr_body(s: &str, mut offset: usize) -> Result<(), Error> { + if s.is_empty() { + return Err(Error::new(ErrorKind::InvalidExpression, offset)); + } + + // Skip the operator. + let maybe_variable_list = match MaybeOperator::from_byte(s.as_bytes()[0]) { + Some(MaybeOperator::Operator(_)) => { + offset += 1; + &s[1..] + } + Some(MaybeOperator::Reserved(_)) => { + return Err(Error::new(ErrorKind::UnsupportedOperator, offset)); + } + None => s, + }; + + // Validate varspecs. + for (spec_i, maybe_varspec) in maybe_variable_list.split(',').enumerate() { + if spec_i != 0 { + // Add the length of the leading separator `,`. + offset += 1; + } + validate_varspec(maybe_varspec, offset)?; + offset += maybe_varspec.len(); + } + + Ok(()) +} + +/// Validates whether the given string is valid as a URI template. +/// +/// Returns `Ok(())` if the given string is a valid URI template. +pub(in crate::template) fn validate_template_str(s: &str) -> Result<(), Error> { + let mut rest = s; + let mut offset = 0; + while !rest.is_empty() { + rest = match find_split2_hole(rest, b'%', b'{') { + Some((literal, b'%', xdigits2_and_rest)) => { + validate_literal(literal, offset)?; + + if xdigits2_and_rest.len() < 2 { + return Err(Error::new( + ErrorKind::InvalidPercentEncoding, + offset + literal.len(), + )); + } + let (xdigits2, new_rest) = xdigits2_and_rest.split_at(2); + if !xdigits2.as_bytes()[0].is_ascii_hexdigit() { + return Err(Error::new( + ErrorKind::InvalidPercentEncoding, + offset + literal.len() + 1, + )); + } + if !xdigits2.as_bytes()[1].is_ascii_hexdigit() { + return Err(Error::new( + ErrorKind::InvalidPercentEncoding, + offset + literal.len() + 2, + )); + } + new_rest + } + Some((literal, b'{', expr_and_rest)) => { + validate_literal(literal, offset)?; + + let (expr, new_rest) = match find_split_hole(expr_and_rest, b'}') { + Some(v) => v, + None => { + return Err(Error::new( + ErrorKind::ExpressionNotClosed, + offset + literal.len(), + )) + } + }; + + // +1 is `+ "{".len()`. + validate_expr_body(expr, offset + literal.len() + 1)?; + + new_rest + } + Some(_) => unreachable!("[consistency] searching only `%` and `{{`"), + None => return validate_literal(rest, offset), + }; + offset = s.len() - rest.len(); + } + + Ok(()) +} diff --git a/vendor/iri-string/src/template/simple_context.rs b/vendor/iri-string/src/template/simple_context.rs new file mode 100644 index 00000000..5c19dc79 --- /dev/null +++ b/vendor/iri-string/src/template/simple_context.rs @@ -0,0 +1,218 @@ +//! Simple general-purpose context type. + +use core::ops::ControlFlow; + +use alloc::collections::BTreeMap; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::vec::Vec; + +use crate::template::context::{Context, VarName, Visitor}; + +/// Value. +#[derive(Debug, Clone)] +pub enum Value { + /// Undefined (i.e. null). + Undefined, + /// String value. + String(String), + /// List. + List(Vec<String>), + /// Associative array. + Assoc(Vec<(String, String)>), +} + +impl From<&str> for Value { + #[inline] + fn from(v: &str) -> Self { + Self::String(v.into()) + } +} + +impl From<String> for Value { + #[inline] + fn from(v: String) -> Self { + Self::String(v) + } +} + +/// Simple template expansion context. +#[derive(Default, Debug, Clone)] +pub struct SimpleContext { + /// Variable values. + // Any map types (including `HashMap`) is ok, but the hash map is not provided by `alloc`. + // + // QUESTION: Should hexdigits in percent-encoded triplets in varnames be + // compared case sensitively? + variables: BTreeMap<String, Value>, +} + +impl SimpleContext { + /// Creates a new empty context. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::spec::UriSpec; + /// use iri_string::template::UriTemplateStr; + /// use iri_string::template::simple_context::SimpleContext; + /// + /// let empty_ctx = SimpleContext::new(); + /// let template = UriTemplateStr::new("{no_such_variable}")?; + /// let expanded = template.expand::<UriSpec, _>(&empty_ctx)?; + /// + /// assert_eq!( + /// expanded.to_string(), + /// "" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Inserts a variable. + /// + /// Passing [`Value::Undefined`] removes the value from the context. + /// + /// The entry will be inserted or removed even if the key is invalid as a + /// variable name. Such entries will be simply ignored on expansion. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::spec::UriSpec; + /// use iri_string::template::UriTemplateStr; + /// use iri_string::template::simple_context::SimpleContext; + /// + /// let mut context = SimpleContext::new(); + /// context.insert("username", "foo"); + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// let expanded = template.expand::<UriSpec, _>(&context)?; + /// + /// assert_eq!( + /// expanded.to_string(), + /// "/users/foo" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Passing [`Value::Undefined`] removes the value from the context. + /// + /// ``` + /// # use iri_string::template::Error; + /// ## [cfg(feature = "alloc")] { + /// use iri_string::spec::UriSpec; + /// use iri_string::template::UriTemplateStr; + /// use iri_string::template::simple_context::{SimpleContext, Value}; + /// + /// let mut context = SimpleContext::new(); + /// context.insert("username", "foo"); + /// context.insert("username", Value::Undefined); + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// let expanded = template.expand::<UriSpec, _>(&context)?; + /// + /// assert_eq!( + /// expanded.to_string(), + /// "/users/" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn insert<K, V>(&mut self, key: K, value: V) -> Option<Value> + where + K: Into<String>, + V: Into<Value>, + { + let key = key.into(); + match value.into() { + Value::Undefined => self.variables.remove(&key), + value => self.variables.insert(key, value), + } + } + + /// Removes all entries in the context. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::spec::UriSpec; + /// use iri_string::template::UriTemplateStr; + /// use iri_string::template::simple_context::SimpleContext; + /// + /// let template = UriTemplateStr::new("{foo,bar}")?; + /// let mut context = SimpleContext::new(); + /// + /// context.insert("foo", "FOO"); + /// context.insert("bar", "BAR"); + /// assert_eq!( + /// template.expand::<UriSpec, _>(&context)?.to_string(), + /// "FOO,BAR" + /// ); + /// + /// context.clear(); + /// assert_eq!( + /// template.expand::<UriSpec, _>(&context)?.to_string(), + /// "" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn clear(&mut self) { + self.variables.clear(); + } + + /// Returns a reference to the value for the key. + // + // QUESTION: Should hexdigits in percent-encoded triplets in varnames be + // compared case sensitively? + #[inline] + #[must_use] + pub fn get(&self, key: VarName<'_>) -> Option<&Value> { + self.variables.get(key.as_str()) + } +} + +impl Context for SimpleContext { + fn visit<V: Visitor>(&self, visitor: V) -> V::Result { + use crate::template::context::{AssocVisitor, ListVisitor}; + + let name = visitor.var_name().as_str(); + match self.variables.get(name) { + None | Some(Value::Undefined) => visitor.visit_undefined(), + Some(Value::String(s)) => visitor.visit_string(s), + Some(Value::List(list)) => { + let mut visitor = visitor.visit_list(); + if let ControlFlow::Break(res) = + list.iter().try_for_each(|item| visitor.visit_item(item)) + { + return res; + } + visitor.finish() + } + Some(Value::Assoc(list)) => { + let mut visitor = visitor.visit_assoc(); + if let ControlFlow::Break(res) = + list.iter().try_for_each(|(k, v)| visitor.visit_entry(k, v)) + { + return res; + } + visitor.finish() + } + } + } +} diff --git a/vendor/iri-string/src/template/string.rs b/vendor/iri-string/src/template/string.rs new file mode 100644 index 00000000..9ba53a75 --- /dev/null +++ b/vendor/iri-string/src/template/string.rs @@ -0,0 +1,647 @@ +//! Template string types. + +use core::fmt; + +#[cfg(feature = "alloc")] +use alloc::borrow::Cow; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::boxed::Box; +#[cfg(feature = "alloc")] +use alloc::rc::Rc; +#[cfg(feature = "alloc")] +use alloc::string::String; +#[cfg(feature = "alloc")] +use alloc::sync::Arc; + +use crate::spec::Spec; +use crate::template::components::{VarListIter, VarName}; +use crate::template::context::{Context, DynamicContext}; +use crate::template::error::{Error, ErrorKind}; +use crate::template::expand::{expand_whole_dynamic, Chunk, Chunks, Expanded}; +use crate::template::parser::validate_template_str; + +#[cfg(feature = "alloc")] +pub use self::owned::UriTemplateString; + +/// Implements `PartialEq` and `PartialOrd`. +macro_rules! impl_cmp { + ($ty_common:ty, $ty_lhs:ty, $ty_rhs:ty) => { + impl PartialEq<$ty_rhs> for $ty_lhs { + #[inline] + fn eq(&self, o: &$ty_rhs) -> bool { + <$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref()) + } + } + impl PartialEq<$ty_lhs> for $ty_rhs { + #[inline] + fn eq(&self, o: &$ty_lhs) -> bool { + <$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref()) + } + } + impl PartialOrd<$ty_rhs> for $ty_lhs { + #[inline] + fn partial_cmp(&self, o: &$ty_rhs) -> Option<core::cmp::Ordering> { + <$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref()) + } + } + impl PartialOrd<$ty_lhs> for $ty_rhs { + #[inline] + fn partial_cmp(&self, o: &$ty_lhs) -> Option<core::cmp::Ordering> { + <$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref()) + } + } + }; +} + +#[cfg(feature = "alloc")] +mod owned; + +/// A borrowed slice of a URI template. +/// +/// URI Template is defined by [RFC 6570]. +/// +/// Note that "URI Template" can also be used for IRI. +/// +/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html +/// +/// # Valid values +/// +/// This type can have a URI template string. +/// +/// # Applied errata +/// +/// [Errata ID 6937](https://www.rfc-editor.org/errata/eid6937) is applied, so +/// single quotes are allowed to appear in an URI template. +/// +/// ``` +/// # use iri_string::template::Error; +/// use iri_string::template::UriTemplateStr; +/// +/// let template = UriTemplateStr::new("'quoted'")?; +/// # Ok::<_, Error>(()) +/// ``` +#[cfg_attr(feature = "serde", derive(serde::Serialize))] +#[cfg_attr(feature = "serde", serde(transparent))] +#[repr(transparent)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct UriTemplateStr { + /// The raw string. + inner: str, +} + +impl UriTemplateStr { + /// Creates a new string. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// use iri_string::template::UriTemplateStr; + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn new(s: &str) -> Result<&Self, Error> { + TryFrom::try_from(s) + } + + /// Creates a new string without validation. + /// + /// This does not validate the given string, so it is caller's + /// responsibility to ensure the given string is valid. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + /// If not, any use of the returned value or the call of this + /// function itself may result in undefined behavior. + #[inline] + #[must_use] + pub unsafe fn new_unchecked(s: &str) -> &Self { + // SAFETY: `new_always_unchecked` requires the same precondition + // as `new_always_unchecked`. + unsafe { Self::new_always_unchecked(s) } + } + + /// Creates a new string without any validation. + /// + /// This does not validate the given string at any time. + /// + /// Intended for internal use. + /// + /// # Safety + /// + /// The given string must be valid. + #[inline] + #[must_use] + unsafe fn new_always_unchecked(s: &str) -> &Self { + // SAFETY: the cast is safe since `Self` type has `repr(transparent)` + // attribute and the content is guaranteed as valid by the + // precondition of the function. + unsafe { &*(s as *const str as *const Self) } + } + + /// Returns the template as a plain `&str`. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// use iri_string::template::UriTemplateStr; + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// assert_eq!(template.as_str(), "/users/{username}"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn as_str(&self) -> &str { + self.as_ref() + } + + /// Returns the template string length. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// use iri_string::template::UriTemplateStr; + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// assert_eq!(template.len(), "/users/{username}".len()); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn len(&self) -> usize { + self.as_str().len() + } + + /// Returns whether the string is empty. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// use iri_string::template::UriTemplateStr; + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// assert!(!template.is_empty()); + /// + /// let empty = UriTemplateStr::new("")?; + /// assert!(empty.is_empty()); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn is_empty(&self) -> bool { + self.as_str().is_empty() + } +} + +impl UriTemplateStr { + /// Expands the template with the given context. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::spec::UriSpec; + /// use iri_string::template::UriTemplateStr; + /// use iri_string::template::simple_context::SimpleContext; + /// + /// let mut context = SimpleContext::new(); + /// context.insert("username", "foo"); + /// + /// let template = UriTemplateStr::new("/users/{username}")?; + /// let expanded = template.expand::<UriSpec, _>(&context)?; + /// + /// assert_eq!( + /// expanded.to_string(), + /// "/users/foo" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// You can control allowed characters in the output by changing spec type. + /// + /// ``` + /// # use iri_string::template::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::spec::{IriSpec, UriSpec}; + /// use iri_string::template::UriTemplateStr; + /// use iri_string::template::simple_context::SimpleContext; + /// + /// let mut context = SimpleContext::new(); + /// context.insert("alpha", "\u{03B1}"); + /// + /// let template = UriTemplateStr::new("{?alpha}")?; + /// + /// assert_eq!( + /// template.expand::<UriSpec, _>(&context)?.to_string(), + /// "?alpha=%CE%B1", + /// "a URI cannot contain Unicode alpha (U+03B1), so it should be escaped" + /// ); + /// assert_eq!( + /// template.expand::<IriSpec, _>(&context)?.to_string(), + /// "?alpha=\u{03B1}", + /// "an IRI can contain Unicode alpha (U+03B1), so it written as is" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn expand<'a, S: Spec, C: Context>( + &'a self, + context: &'a C, + ) -> Result<Expanded<'a, S, C>, Error> { + Expanded::new(self, context) + } + + /// Expands the template with the given dynamic context. + /// + #[cfg_attr( + feature = "alloc", + doc = concat!( + "If you need the allocated [`String`], use", + "[`expand_dynamic_to_string`][`Self::expand_dynamic_to_string`]." + ) + )] + /// + /// See the documentation for [`DynamicContext`] for usage. + pub fn expand_dynamic<S: Spec, W: fmt::Write, C: DynamicContext>( + &self, + writer: &mut W, + context: &mut C, + ) -> Result<(), Error> { + expand_whole_dynamic::<S, _, _>(self, writer, context) + } + + /// Expands the template into a string, with the given dynamic context. + /// + /// This is basically [`expand_dynamic`][`Self::expand_dynamic`] method + /// that returns an owned string instead of writing to the given writer. + /// + /// See the documentation for [`DynamicContext`] for usage. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "alloc")] + /// # extern crate alloc; + /// # use iri_string::template::Error; + /// # #[cfg(feature = "alloc")] { + /// # use alloc::string::String; + /// use iri_string::template::UriTemplateStr; + /// # use iri_string::template::context::{DynamicContext, Visitor, VisitPurpose}; + /// use iri_string::spec::UriSpec; + /// + /// struct MyContext<'a> { + /// // See the documentation for `DynamicContext`. + /// # /// Target path. + /// # target: &'a str, + /// # /// Username. + /// # username: Option<&'a str>, + /// # /// A flag to remember whether the URI template + /// # /// attempted to use `username` variable. + /// # username_visited: bool, + /// } + /// # + /// # impl DynamicContext for MyContext<'_> { + /// # fn on_expansion_start(&mut self) { + /// # // Reset the state. + /// # self.username_visited = false; + /// # } + /// # fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result { + /// # match visitor.var_name().as_str() { + /// # "target" => visitor.visit_string(self.target), + /// # "username" => { + /// # if visitor.purpose() == VisitPurpose::Expand { + /// # // The variable `username` is being used + /// # // on the template expansion. + /// # // Don't care whether `username` is defined or not. + /// # self.username_visited = true; + /// # } + /// # if let Some(username) = &self.username { + /// # visitor.visit_string(username) + /// # } else { + /// # visitor.visit_undefined() + /// # } + /// # } + /// # _ => visitor.visit_undefined(), + /// # } + /// # } + /// # } + /// + /// let mut context = MyContext { + /// target: "/posts/1", + /// username: Some("the_admin"), + /// username_visited: false, + /// }; + /// + /// // No access to the variable `username`. + /// let template = UriTemplateStr::new("{+target}{?username}")?; + /// let s = template.expand_dynamic_to_string::<UriSpec, _>(&mut context)?; + /// assert_eq!(s, "/posts/1?username=the_admin"); + /// assert!(context.username_visited); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[cfg(feature = "alloc")] + pub fn expand_dynamic_to_string<S: Spec, C: DynamicContext>( + &self, + context: &mut C, + ) -> Result<String, Error> { + let mut buf = String::new(); + expand_whole_dynamic::<S, _, _>(self, &mut buf, context)?; + Ok(buf) + } + + /// Returns an iterator of variables in the template. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::template::Error; + /// use iri_string::template::UriTemplateStr; + /// + /// let template = UriTemplateStr::new("foo{/bar*,baz:4}{?qux}{&bar*}")?; + /// let mut vars = template.variables(); + /// assert_eq!(vars.next().map(|var| var.as_str()), Some("bar")); + /// assert_eq!(vars.next().map(|var| var.as_str()), Some("baz")); + /// assert_eq!(vars.next().map(|var| var.as_str()), Some("qux")); + /// assert_eq!(vars.next().map(|var| var.as_str()), Some("bar")); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn variables(&self) -> UriTemplateVariables<'_> { + UriTemplateVariables::new(self) + } +} + +impl fmt::Debug for UriTemplateStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("UriTemplateStr").field(&&self.inner).finish() + } +} + +impl AsRef<str> for UriTemplateStr { + #[inline] + fn as_ref(&self) -> &str { + &self.inner + } +} + +impl AsRef<UriTemplateStr> for UriTemplateStr { + #[inline] + fn as_ref(&self) -> &UriTemplateStr { + self + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<&'a UriTemplateStr> for Cow<'a, UriTemplateStr> { + #[inline] + fn from(s: &'a UriTemplateStr) -> Self { + Cow::Borrowed(s) + } +} + +#[cfg(feature = "alloc")] +impl From<&UriTemplateStr> for Arc<UriTemplateStr> { + fn from(s: &UriTemplateStr) -> Self { + let inner: &str = s.as_str(); + let buf = Arc::<str>::from(inner); + // SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so + // the memory layouts of `Arc<str>` and `Arc<UriTemplateStr>` are + // compatible. + unsafe { + let raw: *const str = Arc::into_raw(buf); + Self::from_raw(raw as *const UriTemplateStr) + } + } +} + +#[cfg(feature = "alloc")] +impl From<&UriTemplateStr> for Box<UriTemplateStr> { + fn from(s: &UriTemplateStr) -> Self { + let inner: &str = s.as_str(); + let buf = Box::<str>::from(inner); + // SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so + // the memory layouts of `Box<str>` and `Box<UriTemplateStr>` are + // compatible. + unsafe { + let raw: *mut str = Box::into_raw(buf); + Self::from_raw(raw as *mut UriTemplateStr) + } + } +} + +#[cfg(feature = "alloc")] +impl From<&UriTemplateStr> for Rc<UriTemplateStr> { + fn from(s: &UriTemplateStr) -> Self { + let inner: &str = s.as_str(); + let buf = Rc::<str>::from(inner); + // SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so + // the memory layouts of `Rc<str>` and `Rc<UriTemplateStr>` are + // compatible. + unsafe { + let raw: *const str = Rc::into_raw(buf); + Self::from_raw(raw as *const UriTemplateStr) + } + } +} + +impl<'a> From<&'a UriTemplateStr> for &'a str { + #[inline] + fn from(s: &'a UriTemplateStr) -> &'a str { + s.as_ref() + } +} + +impl<'a> TryFrom<&'a str> for &'a UriTemplateStr { + type Error = Error; + + #[inline] + fn try_from(s: &'a str) -> Result<Self, Self::Error> { + match validate_template_str(s) { + // SAFETY: just checked the string is valid. + Ok(()) => Ok(unsafe { UriTemplateStr::new_always_unchecked(s) }), + Err(e) => Err(e), + } + } +} + +impl<'a> TryFrom<&'a [u8]> for &'a UriTemplateStr { + type Error = Error; + + #[inline] + fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> { + let s = core::str::from_utf8(bytes) + .map_err(|e| Error::new(ErrorKind::InvalidUtf8, e.valid_up_to()))?; + match validate_template_str(s) { + // SAFETY: just checked the string is valid. + Ok(()) => Ok(unsafe { UriTemplateStr::new_always_unchecked(s) }), + Err(e) => Err(e), + } + } +} + +impl_cmp!(str, str, UriTemplateStr); +impl_cmp!(str, &str, UriTemplateStr); +impl_cmp!(str, str, &UriTemplateStr); + +impl fmt::Display for UriTemplateStr { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +/// Serde deserializer implementation. +#[cfg(feature = "serde")] +mod __serde_slice { + use super::UriTemplateStr; + + use core::fmt; + + use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, + }; + + /// Custom borrowed string visitor. + #[derive(Debug, Clone, Copy)] + struct CustomStrVisitor; + + impl<'de> Visitor<'de> for CustomStrVisitor { + type Value = &'de UriTemplateStr; + + #[inline] + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("URI template string") + } + + #[inline] + fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E> + where + E: de::Error, + { + <&'de UriTemplateStr as TryFrom<&'de str>>::try_from(v).map_err(E::custom) + } + } + + // About `'de` and `'a`, see + // <https://serde.rs/lifetimes.html#the-deserializede-lifetime>. + impl<'a, 'de: 'a> Deserialize<'de> for &'a UriTemplateStr { + #[inline] + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + deserializer.deserialize_string(CustomStrVisitor) + } + } +} + +/// An iterator of variables in a URI template. +#[derive(Debug, Clone)] +pub struct UriTemplateVariables<'a> { + /// Chunks iterator. + chunks: Chunks<'a>, + /// Variables in the last chunk. + vars_in_chunk: Option<VarListIter<'a>>, +} + +impl<'a> UriTemplateVariables<'a> { + /// Creates a variables iterator from the URI template. + #[inline] + #[must_use] + fn new(template: &'a UriTemplateStr) -> Self { + Self { + chunks: Chunks::new(template), + vars_in_chunk: None, + } + } +} + +impl<'a> Iterator for UriTemplateVariables<'a> { + type Item = VarName<'a>; + + fn next(&mut self) -> Option<Self::Item> { + loop { + if let Some(vars) = &mut self.vars_in_chunk { + match vars.next() { + Some((_len, spec)) => return Some(spec.name()), + None => self.vars_in_chunk = None, + } + } + let expr = self.chunks.find_map(|chunk| match chunk { + Chunk::Literal(_) => None, + Chunk::Expr(v) => Some(v), + }); + self.vars_in_chunk = match expr { + Some(expr) => Some(expr.decompose().1.into_iter()), + None => return None, + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::spec::IriSpec; + use crate::template::context::{AssocVisitor, ListVisitor, Visitor}; + + struct TestContext; + impl Context for TestContext { + fn visit<V: Visitor>(&self, visitor: V) -> V::Result { + match visitor.var_name().as_str() { + "str" => visitor.visit_string("string"), + "list" => visitor + .visit_list() + .visit_items_and_finish(["item0", "item1", "item2"]), + "assoc" => visitor + .visit_assoc() + .visit_entries_and_finish([("key0", "value0"), ("key1", "value1")]), + _ => visitor.visit_undefined(), + } + } + } + + #[test] + fn expand_error_pos() { + { + let e = UriTemplateStr::new("foo{list:4}") + .unwrap() + .expand::<IriSpec, _>(&TestContext) + .err() + .map(|e| e.location()); + assert_eq!(e, Some("foo{".len())); + } + + { + let e = UriTemplateStr::new("foo{/list*,list:4}") + .unwrap() + .expand::<IriSpec, _>(&TestContext) + .err() + .map(|e| e.location()); + assert_eq!(e, Some("foo{/list*,".len())); + } + + { + let e = UriTemplateStr::new("foo{/str:3,list*,assoc:4}") + .unwrap() + .expand::<IriSpec, _>(&TestContext) + .err() + .map(|e| e.location()); + assert_eq!(e, Some("foo{/str:3,list*,".len())); + } + } +} diff --git a/vendor/iri-string/src/template/string/owned.rs b/vendor/iri-string/src/template/string/owned.rs new file mode 100644 index 00000000..afd201b3 --- /dev/null +++ b/vendor/iri-string/src/template/string/owned.rs @@ -0,0 +1,296 @@ +//! Owned `UriTemplateString`. + +use core::fmt; + +use alloc::borrow::Cow; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::borrow::ToOwned; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::boxed::Box; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +use crate::template::error::{CreationError, Error, ErrorKind}; +use crate::template::parser::validate_template_str; +use crate::template::string::UriTemplateStr; + +/// An owned slice of a URI template. +/// +/// URI Template is defined by [RFC 6570]. +/// +/// Note that "URI Template" can also be used for IRI. +/// +/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html +/// +/// # Valid values +/// +/// This type can have a URI template string. +// Note that `From<$ty> for {Arc,Rc}<$slice>` is currently not implemented since +// this won't reuse allocated memory and hides internal memory reallocation. See +// <https://github.com/lo48576/iri-string/issues/20#issuecomment-1105207849>. +// However, this is not decided with firm belief or opinion, so there would be +// a chance that they are implemented in future. +#[cfg_attr(feature = "serde", derive(serde::Serialize))] +#[cfg_attr(feature = "serde", serde(transparent))] +#[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct UriTemplateString { + /// Inner data. + inner: String, +} + +impl UriTemplateString { + /// Creates a new string without validation. + /// + /// This does not validate the given string, so it is caller's + /// responsibility to ensure the given string is valid. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + /// If not, any use of the returned value or the call of this + /// function itself may result in undefined behavior. + #[inline] + #[must_use] + pub unsafe fn new_unchecked(s: alloc::string::String) -> Self { + // The construction itself can be written in safe Rust, but + // every other place including unsafe functions expects + // `self.inner` to be syntactically valid as `Self`. In order to + // make them safe, the construction should validate the value + // or at least should require users to validate the value by + // making the function `unsafe`. + Self { inner: s } + } + + /// Shrinks the capacity of the inner buffer to match its length. + #[inline] + pub fn shrink_to_fit(&mut self) { + self.inner.shrink_to_fit() + } + + /// Returns the internal buffer capacity in bytes. + #[inline] + #[must_use] + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + /// Returns the borrowed IRI string slice. + /// + /// This is equivalent to `&*self`. + #[inline] + #[must_use] + pub fn as_slice(&self) -> &UriTemplateStr { + self.as_ref() + } + + /// Appends the template string. + #[inline] + pub fn append(&mut self, other: &UriTemplateStr) { + self.inner.push_str(other.as_str()); + debug_assert!(validate_template_str(self.as_str()).is_ok()); + } +} + +impl AsRef<str> for UriTemplateString { + #[inline] + fn as_ref(&self) -> &str { + &self.inner + } +} + +impl AsRef<UriTemplateStr> for UriTemplateString { + #[inline] + fn as_ref(&self) -> &UriTemplateStr { + // SAFETY: `UriTemplateString and `UriTemplateStr` requires same validation, + // so the content of `self: &UriTemplateString` must be valid as `UriTemplateStr`. + unsafe { UriTemplateStr::new_always_unchecked(AsRef::<str>::as_ref(self)) } + } +} + +impl core::borrow::Borrow<str> for UriTemplateString { + #[inline] + fn borrow(&self) -> &str { + self.as_ref() + } +} + +impl core::borrow::Borrow<UriTemplateStr> for UriTemplateString { + #[inline] + fn borrow(&self) -> &UriTemplateStr { + self.as_ref() + } +} + +impl ToOwned for UriTemplateStr { + type Owned = UriTemplateString; + + #[inline] + fn to_owned(&self) -> Self::Owned { + self.into() + } +} + +impl From<&'_ UriTemplateStr> for UriTemplateString { + #[inline] + fn from(s: &UriTemplateStr) -> Self { + // This is safe because `s` must be valid. + Self { + inner: alloc::string::String::from(s.as_str()), + } + } +} + +impl From<UriTemplateString> for alloc::string::String { + #[inline] + fn from(s: UriTemplateString) -> Self { + s.inner + } +} + +impl<'a> From<UriTemplateString> for Cow<'a, UriTemplateStr> { + #[inline] + fn from(s: UriTemplateString) -> Cow<'a, UriTemplateStr> { + Cow::Owned(s) + } +} + +impl From<UriTemplateString> for Box<UriTemplateStr> { + #[inline] + fn from(s: UriTemplateString) -> Box<UriTemplateStr> { + let inner: String = s.into(); + let buf = Box::<str>::from(inner); + // SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so + // the memory layouts of `Box<str>` and `Box<UriTemplateStr>` are + // compatible. Additionally, `UriTemplateString` and `UriTemplateStr` + // require the same syntax. + unsafe { + let raw: *mut str = Box::into_raw(buf); + Box::<UriTemplateStr>::from_raw(raw as *mut UriTemplateStr) + } + } +} + +impl TryFrom<&'_ str> for UriTemplateString { + type Error = Error; + + #[inline] + fn try_from(s: &str) -> Result<Self, Self::Error> { + <&UriTemplateStr>::try_from(s).map(Into::into) + } +} + +impl TryFrom<&'_ [u8]> for UriTemplateString { + type Error = Error; + + #[inline] + fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> { + let s = core::str::from_utf8(bytes) + .map_err(|e| Error::new(ErrorKind::InvalidUtf8, e.valid_up_to()))?; + <&UriTemplateStr>::try_from(s).map(Into::into) + } +} + +impl core::convert::TryFrom<alloc::string::String> for UriTemplateString { + type Error = CreationError<String>; + + #[inline] + fn try_from(s: alloc::string::String) -> Result<Self, Self::Error> { + match <&UriTemplateStr>::try_from(s.as_str()) { + Ok(_) => { + // This is safe because `<&UriTemplateStr>::try_from(s)?` ensures + // that the string `s` is valid. + Ok(Self { inner: s }) + } + Err(e) => Err(CreationError::new(e, s)), + } + } +} + +impl alloc::str::FromStr for UriTemplateString { + type Err = Error; + + #[inline] + fn from_str(s: &str) -> Result<Self, Self::Err> { + TryFrom::try_from(s) + } +} + +impl core::ops::Deref for UriTemplateString { + type Target = UriTemplateStr; + + #[inline] + fn deref(&self) -> &UriTemplateStr { + self.as_ref() + } +} + +impl_cmp!(str, UriTemplateStr, Cow<'_, str>); +impl_cmp!(str, &UriTemplateStr, Cow<'_, str>); + +impl_cmp!(str, str, UriTemplateString); +impl_cmp!(str, &str, UriTemplateString); +impl_cmp!(str, Cow<'_, str>, UriTemplateString); +impl_cmp!(str, String, UriTemplateString); + +impl fmt::Display for UriTemplateString { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +/// Serde deserializer implementation. +#[cfg(feature = "serde")] +mod __serde_owned { + use super::UriTemplateString; + + use core::fmt; + + #[cfg(all(feature = "alloc", feature = "serde", not(feature = "std")))] + use alloc::string::String; + + use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, + }; + + /// Custom owned string visitor. + #[derive(Debug, Clone, Copy)] + struct CustomStringVisitor; + + impl Visitor<'_> for CustomStringVisitor { + type Value = UriTemplateString; + + #[inline] + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("URI template string") + } + + #[inline] + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> + where + E: de::Error, + { + <UriTemplateString as TryFrom<&str>>::try_from(v).map_err(E::custom) + } + + #[cfg(feature = "serde")] + #[inline] + fn visit_string<E>(self, v: String) -> Result<Self::Value, E> + where + E: de::Error, + { + <UriTemplateString as TryFrom<String>>::try_from(v).map_err(E::custom) + } + } + + impl<'de> Deserialize<'de> for UriTemplateString { + #[inline] + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + deserializer.deserialize_str(CustomStringVisitor) + } + } +} diff --git a/vendor/iri-string/src/types.rs b/vendor/iri-string/src/types.rs new file mode 100644 index 00000000..38e734c9 --- /dev/null +++ b/vendor/iri-string/src/types.rs @@ -0,0 +1,224 @@ +//! URI and IRI types. +//! +//! # URI and IRI +//! +//! IRIs (Internationalized Resource Identifiers) are defined in [RFC 3987], +//! and URIs (Uniform Resource Identifiers) are defined in [RFC 3986]. +//! +//! URI consists of only ASCII characters, and is a subset of IRI. +//! +//! IRIs are defined as below: +//! +//! ```text +//! IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ] +//! IRI-reference = IRI / irelative-ref +//! absolute-IRI = scheme ":" ihier-part [ "?" iquery ] +//! irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] +//! (`irelative-part` is roughly same as `ihier-part`.) +//! ``` +//! +//! Definitions for URIs are almost same, but they cannot have non-ASCII characters. +//! +//! # Types +//! +//! Types can be categorized by: +//! +//! * syntax, +//! * spec, and +//! * ownership. +//! +//! ## Syntax +//! +//! Since URIs and IRIs have almost same syntax and share algorithms, they are implemented by +//! generic types. +//! +//! * [`RiStr`] and [`RiString`] +//! + String types for `IRI` and `URI` rules. +//! * [`RiAbsoluteStr`] and [`RiAbsoluteString`] +//! + String types for `absolute-IRI` and `absolute-URI` rules. +//! * [`RiReferenceStr`] and [`RiReferenceString`] +//! + String types for `IRI-reference` and `URI-reference` rules. +//! * [`RiRelativeStr`] and [`RiRelativeString`] +//! + String types for `irelative-ref` and `relative-ref` rules. +//! * [`RiFragmentStr`] and [`RiFragmentString`] +//! + String types for `ifragment` and `fragment` rules. +//! + Note that these types represents a substring of an IRI / URI references. +//! They are not intended to used directly as an IRI / URI references. +//! +//! "Ri" stands for "Resource Identifier". +//! +//! ## Spec +//! +//! These types have a type parameter, which represents RFC specification. +//! [`IriSpec`] represents [RFC 3987] spec, and [`UriSpec`] represents [RFC 3986] spec. +//! For example, `RiAbsoluteStr<IriSpec>` can have `absolute-IRI` string value, +//! and `RiReferenceStr<UriSpec>` can have `URI-reference` string value. +//! +//! ## Ownership +//! +//! String-like types have usually two variations, borrowed and owned. +//! +//! Borrowed types (such as `str`, `Path`, `OsStr`) are unsized, and used by reference style. +//! Owned types (such as `String`, `PathBuf`, `OsString`) are sized, and requires heap allocation. +//! Owned types can be coerced to a borrowed type (for example, `&String` is automatically coerced +//! to `&str` in many context). +//! +//! IRI / URI types have same variations, `RiFooStr` and `RiFooString` +//! (`Foo` part represents syntax). +//! They are very similar to `&str` and `String`. +//! `Deref` is implemented, `RiFooStr::len()` is available, `&RiFooString` can be coerced to +//! `&RiFooStr`, `Cow<'_, RiFooStr>` and `Box<RiFooStr>` is available, and so on. +//! +//! # Hierarchy and safe conversion +//! +//! IRI syntaxes have the hierarchy below. +//! +//! ```text +//! RiReferenceStr +//! |-- RiStr +//! | `-- RiAbsoluteStr +//! `-- RiRelativeStr +//! ``` +//! +//! Therefore, the conversions below are safe and cheap: +//! +//! * `RiStr -> RiReferenceStr` +//! * `RiAbsoluteStr -> RiStr` +//! * `RiAbsoluteStr -> RiReferenceStr` +//! * `RiRelativeStr -> RiReferenceStr` +//! +//! For safely convertible types (consider `FooStr -> BarStr` is safe), traits +//! below are implemented: +//! +//! * `AsRef<BarStr> for FooStr` +//! * `AsRef<BarStr> for FooString` +//! * `From<FooString> for BarString` +//! * `PartialEq<FooStr> for BarStr`, and lots of impls like that +//! + `PartialEq` and `ParitalOrd`. +//! + Slice, owned, `Cow`, reference, etc... +//! +//! ## Fallible conversions +//! +//! Fallible conversions are implemented from plain string into IRI strings. +//! +//! * `TryFrom<&str> for &FooStr` +//! * `TryFrom<&str> for FooString` +//! * `TryFrom<String> for FooString` +//! * `FromStr for FooString` +//! +//! Some IRI string types provide more convenient methods to convert between IRI types. +//! For example, [`RiReferenceString::into_iri()`] tries to convert an IRI reference into an IRI, +//! and returns `Result<IriString, IriRelativeString>`. +//! This is because an IRI reference is valid as an IRI or a relative IRI reference. +//! Such methods are usually more efficient than using `TryFrom` for plain strings, because they +//! prevents you from losing ownership of a string, and does a conversion without extra memory +//! allocation. +//! +//! # Aliases +//! +//! This module contains type aliases for RFC 3986 URI types and RFC 3987 IRI types. +//! +//! `IriFooStr{,ing}` are aliases of `RiFooStr{,ing}<IriSpec>`, and `UriFooStr{,ing}` are aliases +//! of `RiFooStr{,ing}<UriSpec>`. +//! +//! # Wrapped string types +//! +//! Similar to string types in std (such as `str`, `std::path::Path`, and `std::ffi::OsStr`), +//! IRI string types in this crate provides convenient conversions to: +//! +//! * `std::box::Box`, +//! * `std::borrow::Cow`, +//! * `std::rc::Rc`, and +//! * `std::sync::Arc`. +//! +//! ``` +//! # use iri_string::validate::Error; +//! # #[cfg(feature = "std")] { +//! use std::borrow::Cow; +//! use std::rc::Rc; +//! use std::sync::Arc; +//! +//! use iri_string::types::IriStr; +//! +//! let iri = IriStr::new("http://example.com/")?; +//! let iri_owned = iri.to_owned(); +//! +//! // From slice. +//! let cow_1_1: Cow<'_, IriStr> = iri.into(); +//! let cow_1_2 = Cow::<'_, IriStr>::from(iri); +//! assert!(matches!(cow_1_1, Cow::Borrowed(_))); +//! assert!(matches!(cow_1_2, Cow::Borrowed(_))); +//! // From owned. +//! let cow_2_1: Cow<'_, IriStr> = iri_owned.clone().into(); +//! let cow_2_2 = Cow::<'_, IriStr>::from(iri_owned.clone()); +//! assert!(matches!(cow_2_1, Cow::Owned(_))); +//! assert!(matches!(cow_2_2, Cow::Owned(_))); +//! +//! // From slice. +//! let box_1_1: Box<IriStr> = iri.into(); +//! let box_1_2 = Box::<IriStr>::from(iri); +//! // From owned. +//! let box_2_1: Box<IriStr> = iri_owned.clone().into(); +//! let box_2_2 = Box::<IriStr>::from(iri_owned.clone()); +//! +//! // From slice. +//! let rc_1_1: Rc<IriStr> = iri.into(); +//! let rc_1_2 = Rc::<IriStr>::from(iri); +//! // From owned. +//! // Note that `From<owned> for Rc<borrowed>` is not implemented for now. +//! // Get borrowed string by `.as_slice()` and convert it. +//! let rc_2_1: Rc<IriStr> = iri_owned.clone().as_slice().into(); +//! let rc_2_2 = Rc::<IriStr>::from(iri_owned.clone().as_slice()); +//! +//! // From slice. +//! let arc_1_1: Arc<IriStr> = iri.into(); +//! let arc_1_2 = Arc::<IriStr>::from(iri); +//! // From owned. +//! // Note that `From<owned> for Arc<borrowed>` is not implemented for now. +//! // Get borrowed string by `.as_slice()` and convert it. +//! let arc_2_1: Arc<IriStr> = iri_owned.clone().as_slice().into(); +//! let arc_2_2 = Arc::<IriStr>::from(iri_owned.clone().as_slice()); +//! # } +//! # Ok::<_, Error>(()) +//! ``` +//! +//! [RFC 3986]: https://tools.ietf.org/html/rfc3986 +//! [RFC 3987]: https://tools.ietf.org/html/rfc3987 +//! [`RiStr`]: struct.RiStr.html +//! [`RiString`]: struct.RiString.html +//! [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html +//! [`RiAbsoluteString`]: struct.RiAbsoluteString.html +//! [`RiFragmentStr`]: struct.RiFragmentStr.html +//! [`RiFragmentString`]: struct.RiFragmentString.html +//! [`RiReferenceStr`]: struct.RiReferenceStr.html +//! [`RiReferenceString`]: struct.RiReferenceString.html +//! [`RiReferenceString::into_iri()`]: struct.RiReferenceString.html#method.into_iri +//! [`RiRelativeStr`]: struct.RiRelativeStr.html +//! [`RiRelativeString`]: struct.RiRelativeString.html +//! [`IriSpec`]: ../spec/enum.IriSpec.html +//! [`UriSpec`]: ../spec/enum.UriSpec.html + +#[cfg(feature = "alloc")] +pub use self::{ + generic::{ + CreationError, RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, + RiRelativeString, RiString, + }, + iri::{ + IriAbsoluteString, IriFragmentString, IriQueryString, IriReferenceString, + IriRelativeString, IriString, + }, + uri::{ + UriAbsoluteString, UriFragmentString, UriQueryString, UriReferenceString, + UriRelativeString, UriString, + }, +}; +pub use self::{ + generic::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr}, + iri::{IriAbsoluteStr, IriFragmentStr, IriQueryStr, IriReferenceStr, IriRelativeStr, IriStr}, + uri::{UriAbsoluteStr, UriFragmentStr, UriQueryStr, UriReferenceStr, UriRelativeStr, UriStr}, +}; + +pub(crate) mod generic; +mod iri; +mod uri; diff --git a/vendor/iri-string/src/types/generic.rs b/vendor/iri-string/src/types/generic.rs new file mode 100644 index 00000000..9e631069 --- /dev/null +++ b/vendor/iri-string/src/types/generic.rs @@ -0,0 +1,57 @@ +//! Generic resource identifier types. +//! +//! ```text +//! IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ] +//! IRI-reference = IRI / irelative-ref +//! absolute-IRI = scheme ":" ihier-part [ "?" iquery ] +//! irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] +//! (`irelative-part` is roughly same as `ihier-part`.) +//! ``` +//! +//! Hierarchy: +//! +//! ```text +//! RiReferenceStr +//! |-- RiStr +//! | `-- RiAbsoluteStr +//! `-- RiRelativeStr +//! ``` +//! +//! Therefore, the conversions below are safe and cheap: +//! +//! * `RiStr -> RiReferenceStr` +//! * `RiAbsoluteStr -> RiStr` +//! * `RiAbsoluteStr -> RiReferenceStr` +//! * `RiRelativeStr -> RiReferenceStr` +//! +//! For safely convertible types (consider `FooStr -> BarStr` is safe), traits +//! below are implemented: +//! +//! * `AsRef<BarStr> for FooStr` +//! * `AsRef<BarStr> for FooString` +//! * `From<FooString> for BarString` +//! * `PartialEq<FooStr> for BarStr` and lots of impls like that +//! + `PartialEq` and `ParitalOrd`. +//! + Slice, owned, `Cow`, reference, etc... + +pub use self::{ + absolute::RiAbsoluteStr, fragment::RiFragmentStr, normal::RiStr, query::RiQueryStr, + reference::RiReferenceStr, relative::RiRelativeStr, +}; +#[cfg(feature = "alloc")] +pub use self::{ + absolute::RiAbsoluteString, error::CreationError, fragment::RiFragmentString, normal::RiString, + query::RiQueryString, reference::RiReferenceString, relative::RiRelativeString, +}; + +#[macro_use] +mod macros; + +mod absolute; +#[cfg(feature = "alloc")] +mod error; +mod fragment; +mod normal; +mod query; +mod reference; +mod relative; diff --git a/vendor/iri-string/src/types/generic/absolute.rs b/vendor/iri-string/src/types/generic/absolute.rs new file mode 100644 index 00000000..8e588ede --- /dev/null +++ b/vendor/iri-string/src/types/generic/absolute.rs @@ -0,0 +1,728 @@ +//! Absolute IRI (without fragment part). + +use crate::components::AuthorityComponents; +#[cfg(feature = "alloc")] +use crate::mask_password::password_range_to_hide; +use crate::mask_password::PasswordMasked; +use crate::normalize::{Error, NormalizationInput, Normalized, NormalizednessCheckMode}; +use crate::parser::trusted as trusted_parser; +use crate::spec::Spec; +use crate::types::{RiQueryStr, RiReferenceStr, RiStr}; +#[cfg(feature = "alloc")] +use crate::types::{RiReferenceString, RiString}; +use crate::validate::absolute_iri; + +define_custom_string_slice! { + /// A borrowed slice of an absolute IRI without fragment part. + /// + /// This corresponds to [`absolute-IRI` rule] in [RFC 3987] + /// (and [`absolute-URI` rule] in [RFC 3986]). + /// In other words, this is [`RiStr`] without fragment part. + /// + /// If you want to accept fragment part, use [`RiStr`]. + /// + /// # Valid values + /// + /// This type can have an absolute IRI without fragment part. + /// + /// ``` + /// # use iri_string::types::IriAbsoluteStr; + /// assert!(IriAbsoluteStr::new("https://example.com/foo?bar=baz").is_ok()); + /// assert!(IriAbsoluteStr::new("foo:bar").is_ok()); + /// // Scheme `foo` and empty path. + /// assert!(IriAbsoluteStr::new("foo:").is_ok()); + /// // `foo://.../` below are all allowed. See the crate documentation for detail. + /// assert!(IriAbsoluteStr::new("foo:/").is_ok()); + /// assert!(IriAbsoluteStr::new("foo://").is_ok()); + /// assert!(IriAbsoluteStr::new("foo:///").is_ok()); + /// assert!(IriAbsoluteStr::new("foo:////").is_ok()); + /// assert!(IriAbsoluteStr::new("foo://///").is_ok()); + /// + /// ``` + /// + /// Relative IRI is not allowed. + /// + /// ``` + /// # use iri_string::types::IriAbsoluteStr; + /// // This is relative path. + /// assert!(IriAbsoluteStr::new("foo/bar").is_err()); + /// // `/foo/bar` is an absolute path, but it is authority-relative. + /// assert!(IriAbsoluteStr::new("/foo/bar").is_err()); + /// // `//foo/bar` is termed "network-path reference", + /// // or usually called "protocol-relative reference". + /// assert!(IriAbsoluteStr::new("//foo/bar").is_err()); + /// // Empty string is not a valid absolute IRI. + /// assert!(IriAbsoluteStr::new("").is_err()); + /// ``` + /// + /// Fragment part (such as trailing `#foo`) is not allowed. + /// + /// ``` + /// # use iri_string::types::IriAbsoluteStr; + /// // Fragment part is not allowed. + /// assert!(IriAbsoluteStr::new("https://example.com/foo?bar=baz#qux").is_err()); + /// ``` + /// + /// Some characters and sequences cannot used in an absolute IRI. + /// + /// ``` + /// # use iri_string::types::IriAbsoluteStr; + /// // `<` and `>` cannot directly appear in an absolute IRI. + /// assert!(IriAbsoluteStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an absolute IRI. + /// assert!(IriAbsoluteStr::new("%").is_err()); + /// assert!(IriAbsoluteStr::new("%GG").is_err()); + /// ``` + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`absolute-IRI` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`absolute-URI` rule]: https://tools.ietf.org/html/rfc3986#section-4.3 + /// [`RiStr`]: struct.RiStr.html + struct RiAbsoluteStr { + validator = absolute_iri, + expecting_msg = "Absolute IRI string", + } +} + +#[cfg(feature = "alloc")] +define_custom_string_owned! { + /// An owned string of an absolute IRI without fragment part. + /// + /// This corresponds to [`absolute-IRI` rule] in [RFC 3987] + /// (and [`absolute-URI` rule] in [RFC 3986]). + /// The rule for `absolute-IRI` is `scheme ":" ihier-part [ "?" iquery ]`. + /// In other words, this is [`RiString`] without fragment part. + /// + /// If you want to accept fragment part, use [`RiString`]. + /// + /// For details, see the document for [`RiAbsoluteStr`]. + /// + /// Enabled by `alloc` or `std` feature. + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`absolute-IRI` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`absolute-URI` rule]: https://tools.ietf.org/html/rfc3986#section-4.3 + /// [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html + /// [`RiString`]: struct.RiString.html + struct RiAbsoluteString { + validator = absolute_iri, + slice = RiAbsoluteStr, + expecting_msg = "Absolute IRI string", + } +} + +impl<S: Spec> RiAbsoluteStr<S> { + /// Returns Ok`(())` if the IRI is normalizable by the RFC 3986 algorithm. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/%2e/bar/..")?; + /// assert!(iri.ensure_rfc3986_normalizable().is_ok()); + /// + /// let iri2 = IriAbsoluteStr::new("scheme:/..//bar")?; + /// // The normalization result would be `scheme://bar` according to RFC + /// // 3986, but it is unintended and should be treated as a failure. + /// // This crate automatically handles this case so that `.normalize()` won't fail. + /// assert!(!iri.ensure_rfc3986_normalizable().is_err()); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> { + NormalizationInput::from(self).ensure_rfc3986_normalizable() + } + + /// Returns `true` if the IRI is already normalized. + /// + /// This returns the same result as `self.normalize().to_string() == self`, + /// but does this more efficiently without heap allocation. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?; + /// assert!(!iri.is_normalized()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query"); + /// assert!(normalized.is_normalized()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:/.///foo")?; + /// // Already normalized. + /// assert!(iri.is_normalized()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:relative/..//not-a-host")?; + /// // Default normalization algorithm assumes the path part to be NOT opaque. + /// assert!(!iri.is_normalized()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "scheme:/.//not-a-host"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn is_normalized(&self) -> bool { + trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Default) + } + + /// Returns `true` if the IRI is already normalized. + /// + /// This returns the same result as + /// `self.ensure_rfc3986_normalizable() && (self.normalize().to_string() == self)`, + /// does this more efficiently without heap allocation. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?; + /// assert!(!iri.is_normalized_rfc3986()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query"); + /// assert!(normalized.is_normalized_rfc3986()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:/.///foo")?; + /// // Not normalized in the sense of RFC 3986. + /// assert!(!iri.is_normalized_rfc3986()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:relative/..//not-a-host")?; + /// // RFC 3986 normalization algorithm assumes the path part to be NOT opaque. + /// assert!(!iri.is_normalized_rfc3986()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "scheme:/.//not-a-host"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn is_normalized_rfc3986(&self) -> bool { + trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Rfc3986) + } + + /// Returns `true` if the IRI is already normalized in the sense of + /// [`normalize_but_preserve_authorityless_relative_path`] method. + /// + /// This returns the same result as + /// `self.normalize_but_preserve_authorityless_relative_path().to_string() == self`, + /// but does this more efficiently without heap allocation. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?; + /// assert!(!iri.is_normalized_but_authorityless_relative_path_preserved()); + /// + /// let normalized = iri + /// .normalize_but_preserve_authorityless_relative_path() + /// .to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query"); + /// assert!(normalized.is_normalized()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:/.///foo")?; + /// // Already normalized in the sense of + /// // `normalize_but_opaque_authorityless_relative_path()` method. + /// assert!(iri.is_normalized_but_authorityless_relative_path_preserved()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:relative/..//not-a-host")?; + /// // Relative path is treated as opaque since the autority component is absent. + /// assert!(iri.is_normalized_but_authorityless_relative_path_preserved()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// [`normalize_but_preserve_authorityless_relative_path`]: + /// `Self::normalize_but_preserve_authorityless_relative_path` + #[inline] + #[must_use] + pub fn is_normalized_but_authorityless_relative_path_preserved(&self) -> bool { + trusted_parser::is_normalized::<S>( + self.as_str(), + NormalizednessCheckMode::PreserveAuthoritylessRelativePath, + ) + } + + /// Returns the normalized IRI. + /// + /// # Notes + /// + /// For some abnormal IRIs, the normalization can produce semantically + /// incorrect string that looks syntactically valid. To avoid security + /// issues by this trap, the normalization algorithm by this crate + /// automatically applies the workaround. + /// + /// If you worry about this, test by + /// [`RiAbsoluteStr::ensure_rfc3986_normalizable`] method or + /// [`Normalized::ensure_rfc3986_normalizable`] before using the result + /// string. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?; + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn normalize(&self) -> Normalized<'_, Self> { + Normalized::from_input(NormalizationInput::from(self)).and_normalize() + } + + /// Returns the normalized IRI, but preserving dot segments in relative path + /// if the authority component is absent. + /// + /// This normalization would be similar to that of [WHATWG URL Standard] + /// while this implementation is not guaranteed to stricly follow the spec. + /// + /// Note that this normalization algorithm is not compatible with RFC 3986 + /// algorithm for some inputs. + /// + /// Note that case normalization and percent-encoding normalization will + /// still be applied to any path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?; + /// + /// let normalized = iri + /// .normalize_but_preserve_authorityless_relative_path() + /// .to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("scheme:relative/../f%6f%6f")?; + /// + /// let normalized = iri + /// .normalize_but_preserve_authorityless_relative_path() + /// .to_dedicated_string(); + /// assert_eq!(normalized, "scheme:relative/../foo"); + /// // `.normalize()` would normalize this to `scheme:/foo`. + /// # assert_eq!(iri.normalize().to_dedicated_string(), "scheme:/foo"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// [WHATWG URL Standard]: https://url.spec.whatwg.org/ + #[inline] + #[must_use] + pub fn normalize_but_preserve_authorityless_relative_path(&self) -> Normalized<'_, Self> { + Normalized::from_input(NormalizationInput::from(self)) + .and_normalize_but_preserve_authorityless_relative_path() + } + + /// Returns the proxy to the IRI with password masking feature. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("http://user:password@example.com/path?query")?; + /// let masked = iri.mask_password(); + /// assert_eq!(masked.to_dedicated_string(), "http://user:@example.com/path?query"); + /// + /// assert_eq!( + /// masked.replace_password("${password}").to_string(), + /// "http://user:${password}@example.com/path?query" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn mask_password(&self) -> PasswordMasked<'_, Self> { + PasswordMasked::new(self) + } +} + +/// Components getters. +impl<S: Spec> RiAbsoluteStr<S> { + /// Returns the scheme. + /// + /// The following colon is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?; + /// assert_eq!(iri.scheme_str(), "http"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn scheme_str(&self) -> &str { + trusted_parser::extract_scheme_absolute(self.as_str()) + } + + /// Returns the authority. + /// + /// The leading `//` is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?; + /// assert_eq!(iri.authority_str(), Some("example.com")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_str(&self) -> Option<&str> { + trusted_parser::extract_authority_absolute(self.as_str()) + } + + /// Returns the path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?; + /// assert_eq!(iri.path_str(), "/pathpath"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.path_str(), "uuid:10db315b-fcd1-4428-aca8-15babc9a2da2"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn path_str(&self) -> &str { + trusted_parser::extract_path_absolute(self.as_str()) + } + + /// Returns the query. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::{IriAbsoluteStr, IriQueryStr}; + /// + /// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?; + /// let query = IriQueryStr::new("queryquery")?; + /// assert_eq!(iri.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.query(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query(&self) -> Option<&RiQueryStr<S>> { + trusted_parser::extract_query_absolute_iri(self.as_str()).map(|query| { + // SAFETY: `trusted_parser::extract_query_absolute_iri()` must return + // the query part of an IRI (including the leading `?` character), + // and the returned string consists of allowed characters since it + // is a substring of the source IRI. + unsafe { RiQueryStr::new_maybe_unchecked(query) } + }) + } + + /// Returns the query in a raw string slice. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?; + /// assert_eq!(iri.query_str(), Some("queryquery")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.query_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query_str(&self) -> Option<&str> { + trusted_parser::extract_query_absolute_iri(self.as_str()) + } + + /// Returns the authority components. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("http://user:pass@example.com:8080/pathpath?queryquery")?; + /// let authority = iri.authority_components() + /// .expect("authority is available"); + /// assert_eq!(authority.userinfo(), Some("user:pass")); + /// assert_eq!(authority.host(), "example.com"); + /// assert_eq!(authority.port(), Some("8080")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriAbsoluteStr; + /// + /// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> { + AuthorityComponents::from_iri(self.as_ref()) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> RiAbsoluteString<S> { + /// Removes the password completely (including separator colon) from `self` even if it is empty. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriAbsoluteString; + /// + /// let mut iri = IriAbsoluteString::try_from("http://user:password@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "http://user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Even if the password is empty, the password and separator will be removed. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriAbsoluteString; + /// + /// let mut iri = IriAbsoluteString::try_from("http://user:@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "http://user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice().as_ref()) { + Some(v) => v, + None => return, + }; + let separator_colon = pw_range.start - 1; + // SAFETY: the IRI must still be valid after the password component and + // the leading separator colon is removed. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(separator_colon..pw_range.end); + debug_assert!( + RiAbsoluteStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component is removed" + ); + } + } + + /// Replaces the non-empty password in `self` to the empty password. + /// + /// This leaves the separator colon if the password part was available. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriAbsoluteString; + /// + /// let mut iri = IriAbsoluteString::try_from("http://user:password@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "http://user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the password is empty, it is left as is. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriAbsoluteString; + /// + /// let mut iri = IriAbsoluteString::try_from("http://user:@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "http://user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_nonempty_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice().as_ref()) { + Some(v) if !v.is_empty() => v, + _ => return, + }; + debug_assert_eq!( + self.as_str().as_bytes().get(pw_range.start - 1).copied(), + Some(b':'), + "[validity] the password component must be prefixed with a separator colon" + ); + // SAFETY: the IRI must be valid after the password is replaced with empty string. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(pw_range); + debug_assert!( + RiAbsoluteStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component is removed" + ); + } + } +} + +impl_trivial_conv_between_iri! { + from_slice: RiAbsoluteStr, + from_owned: RiAbsoluteString, + to_slice: RiStr, + to_owned: RiString, +} + +impl_trivial_conv_between_iri! { + from_slice: RiAbsoluteStr, + from_owned: RiAbsoluteString, + to_slice: RiReferenceStr, + to_owned: RiReferenceString, +} diff --git a/vendor/iri-string/src/types/generic/error.rs b/vendor/iri-string/src/types/generic/error.rs new file mode 100644 index 00000000..b11f58b2 --- /dev/null +++ b/vendor/iri-string/src/types/generic/error.rs @@ -0,0 +1,70 @@ +//! Resource identifier creation error. + +use core::fmt; + +#[cfg(feature = "std")] +use std::error; + +use crate::validate::Error; + +/// Error on conversion into an IRI type. +/// +/// Enabled by `alloc` or `std` feature. +// This type itself does not require `alloc` or `std, but the type is used only when `alloc` +// feature is enabled. To avoid exporting unused stuff, the type (and the `types::generic::error` +// module) is available only when necessary. +// +// Note that all types which implement `Spec` also implement `SpecInternal`. +pub struct CreationError<T> { + /// Soruce data. + source: T, + /// Validation error. + error: Error, +} + +impl<T> CreationError<T> { + /// Returns the source data. + #[must_use] + pub fn into_source(self) -> T { + self.source + } + + /// Returns the validation error. + #[must_use] + pub fn validation_error(&self) -> Error { + self.error + } + + /// Creates a new `CreationError`. + #[must_use] + pub(crate) fn new(error: Error, source: T) -> Self { + Self { source, error } + } +} + +impl<T: fmt::Debug> fmt::Debug for CreationError<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CreationError") + .field("source", &self.source) + .field("error", &self.error) + .finish() + } +} + +impl<T: Clone> Clone for CreationError<T> { + fn clone(&self) -> Self { + Self { + source: self.source.clone(), + error: self.error, + } + } +} + +impl<T> fmt::Display for CreationError<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.error.fmt(f) + } +} + +#[cfg(feature = "std")] +impl<T: fmt::Debug> error::Error for CreationError<T> {} diff --git a/vendor/iri-string/src/types/generic/fragment.rs b/vendor/iri-string/src/types/generic/fragment.rs new file mode 100644 index 00000000..2fdc1613 --- /dev/null +++ b/vendor/iri-string/src/types/generic/fragment.rs @@ -0,0 +1,108 @@ +//! Fragment string. + +use crate::{ + spec::Spec, + validate::{fragment, Error}, +}; + +define_custom_string_slice! { + /// A borrowed slice of an IRI fragment (i.e. after the first `#` character). + /// + /// This corresponds to [`ifragment` rule] in [RFC 3987] (and [`fragment` rule] in [RFC 3986]). + /// The rule for `ifragment` is `*( ipchar / "/" / "?" )`. + /// + /// # Valid values + /// + /// This type can have an IRI fragment. + /// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`. + /// + /// ``` + /// # use iri_string::types::IriFragmentStr; + /// assert!(IriFragmentStr::new("").is_ok()); + /// assert!(IriFragmentStr::new("foo").is_ok()); + /// assert!(IriFragmentStr::new("foo/bar").is_ok()); + /// assert!(IriFragmentStr::new("/foo/bar").is_ok()); + /// assert!(IriFragmentStr::new("//foo/bar").is_ok()); + /// assert!(IriFragmentStr::new("https://user:pass@example.com:8080").is_ok()); + /// assert!(IriFragmentStr::new("https://example.com/").is_ok()); + /// ``` + /// + /// Some characters and sequences cannot used in a fragment. + /// + /// ``` + /// # use iri_string::types::IriFragmentStr; + /// // `<` and `>` cannot directly appear in an IRI reference. + /// assert!(IriFragmentStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI reference. + /// assert!(IriFragmentStr::new("%").is_err()); + /// assert!(IriFragmentStr::new("%GG").is_err()); + /// // Hash sign `#` cannot appear in an IRI fragment. + /// assert!(IriFragmentStr::new("#hash").is_err()); + /// ``` + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`fragment` rule]: https://tools.ietf.org/html/rfc3986#section-3.5 + /// [`ifragment` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + struct RiFragmentStr { + validator = fragment, + expecting_msg = "IRI fragment string", + } +} + +#[cfg(feature = "alloc")] +define_custom_string_owned! { + /// An owned string of an IRI fragment (i.e. after the first `#` character). + /// + /// This corresponds to [`ifragment` rule] in [RFC 3987] (and [`fragment` rule] in [RFC 3986]). + /// The rule for `absolute-IRI` is `*( ipchar / "/" / "?" )`. + /// + /// For details, see the documentation for [`RiFragmentStr`]. + /// + /// Enabled by `alloc` or `std` feature. + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`fragment` rule]: https://tools.ietf.org/html/rfc3986#section-3.5 + /// [`ifragment` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`RiFragmentStr`]: struct.RiFragmentStr.html + struct RiFragmentString { + validator = fragment, + slice = RiFragmentStr, + expecting_msg = "IRI fragment string", + } +} + +impl<S: Spec> RiFragmentStr<S> { + /// Creates a new `&RiFragmentStr` from the fragment part prefixed by `#`. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::types::IriFragmentStr; + /// assert!(IriFragmentStr::from_prefixed("#").is_ok()); + /// assert!(IriFragmentStr::from_prefixed("#foo").is_ok()); + /// assert!(IriFragmentStr::from_prefixed("#foo/bar").is_ok()); + /// assert!(IriFragmentStr::from_prefixed("#/foo/bar").is_ok()); + /// assert!(IriFragmentStr::from_prefixed("#//foo/bar").is_ok()); + /// assert!(IriFragmentStr::from_prefixed("#https://user:pass@example.com:8080").is_ok()); + /// assert!(IriFragmentStr::from_prefixed("#https://example.com/").is_ok()); + /// + /// // `<` and `>` cannot directly appear in an IRI. + /// assert!(IriFragmentStr::from_prefixed("#<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI. + /// assert!(IriFragmentStr::new("#%").is_err()); + /// assert!(IriFragmentStr::new("#%GG").is_err()); + /// // `#` prefix is expected. + /// assert!(IriFragmentStr::from_prefixed("").is_err()); + /// assert!(IriFragmentStr::from_prefixed("foo").is_err()); + /// // Hash sign `#` cannot appear in an IRI fragment. + /// assert!(IriFragmentStr::from_prefixed("##hash").is_err()); + /// ``` + pub fn from_prefixed(s: &str) -> Result<&Self, Error> { + if !s.starts_with('#') { + return Err(Error::new()); + } + TryFrom::try_from(&s[1..]) + } +} diff --git a/vendor/iri-string/src/types/generic/macros.rs b/vendor/iri-string/src/types/generic/macros.rs new file mode 100644 index 00000000..7aaa82df --- /dev/null +++ b/vendor/iri-string/src/types/generic/macros.rs @@ -0,0 +1,1041 @@ +//! Macros to define resource identifier types. + +/// Implements type conversion from slice into smart pointer. +macro_rules! impl_from_slice_into_smartptr { + ( + // Generic slice type. + ty: $ty:ident, + // Smart pointer item path (without type parameter). + smartptr: $($smartptr:ident)::*, + // Pointer mutability for `into_raw` and `from_raw`. + // Use `mut` for `Box`, and `const` for `Arc` and `Rc`. + mutability: $mut:ident, + ) => { + #[cfg(feature = "alloc")] + impl<S: crate::spec::Spec> From<&$ty<S>> for $($smartptr)::* <$ty<S>> { + fn from(s: &$ty<S>) -> Self { + let inner: &str = s.as_str(); + let buf = $($smartptr)::* ::<str>::from(inner); + // SAFETY: `$ty<S>` has `repr(transparent)` attribute, so the + // memory layouts of `$smartptr<str>` and `$smartptr<$ty<S>>` + // are compatible. + unsafe { + let raw: *$mut str = $($smartptr)::* ::into_raw(buf); + $($smartptr)::* ::<$ty<S>>::from_raw(raw as *$mut $ty<S>) + } + } + } + }; +} + +/// Implements `PartialEq` and `PartialOrd`. +macro_rules! impl_cmp { + ($ty_common:ty, $ty_lhs:ty, $ty_rhs:ty) => { + impl<S: crate::spec::Spec> PartialEq<$ty_rhs> for $ty_lhs { + #[inline] + fn eq(&self, o: &$ty_rhs) -> bool { + <$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref()) + } + } + impl<S: crate::spec::Spec> PartialEq<$ty_lhs> for $ty_rhs { + #[inline] + fn eq(&self, o: &$ty_lhs) -> bool { + <$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref()) + } + } + impl<S: crate::spec::Spec> PartialOrd<$ty_rhs> for $ty_lhs { + #[inline] + fn partial_cmp(&self, o: &$ty_rhs) -> Option<core::cmp::Ordering> { + <$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref()) + } + } + impl<S: crate::spec::Spec> PartialOrd<$ty_lhs> for $ty_rhs { + #[inline] + fn partial_cmp(&self, o: &$ty_lhs) -> Option<core::cmp::Ordering> { + <$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref()) + } + } + }; +} + +/// Implements `PartialEq` and `PartialOrd` with two independent spec type parameter. +macro_rules! impl_cmp2 { + ($ty_common:ty, $ty_lhs:ty, $ty_rhs:ty) => { + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialEq<$ty_rhs> for $ty_lhs { + #[inline] + fn eq(&self, o: &$ty_rhs) -> bool { + <$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref()) + } + } + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialEq<$ty_lhs> for $ty_rhs { + #[inline] + fn eq(&self, o: &$ty_lhs) -> bool { + <$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref()) + } + } + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialOrd<$ty_rhs> for $ty_lhs { + #[inline] + fn partial_cmp(&self, o: &$ty_rhs) -> Option<core::cmp::Ordering> { + <$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref()) + } + } + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialOrd<$ty_lhs> for $ty_rhs { + #[inline] + fn partial_cmp(&self, o: &$ty_lhs) -> Option<core::cmp::Ordering> { + <$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref()) + } + } + }; +} + +/// Implements `PartialEq` and `PartialOrd` with two independent spec type parameter. +macro_rules! impl_cmp2_as_str { + ($ty_lhs:ty, $ty_rhs:ty) => { + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialEq<$ty_rhs> for $ty_lhs { + #[inline] + fn eq(&self, o: &$ty_rhs) -> bool { + PartialEq::eq(self.as_str(), o.as_str()) + } + } + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialEq<$ty_lhs> for $ty_rhs { + #[inline] + fn eq(&self, o: &$ty_lhs) -> bool { + PartialEq::eq(self.as_str(), o.as_str()) + } + } + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialOrd<$ty_rhs> for $ty_lhs { + #[inline] + fn partial_cmp(&self, o: &$ty_rhs) -> Option<core::cmp::Ordering> { + PartialOrd::partial_cmp(self.as_str(), o.as_str()) + } + } + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialOrd<$ty_lhs> for $ty_rhs { + #[inline] + fn partial_cmp(&self, o: &$ty_lhs) -> Option<core::cmp::Ordering> { + PartialOrd::partial_cmp(self.as_str(), o.as_str()) + } + } + }; +} + +/// Define the custom string slice type, and implements methods and traits. +/// +/// Methods to be implemented: +/// +/// * `pub fn new()` +/// * `pub fn new_unchecked()` +/// * `pub(crate) fn new_maybe_unchecked()` +/// * `fn new_always_unchecked()` +/// * `pub fn as_str()` +/// * `pub fn len()` +/// * `pub fn is_empty()` +/// +/// Traits to be implemented: +/// +/// * fundamental +/// + `Debug for $ty` +/// + `Eq for $ty` +/// + `Ord for $ty` +/// + `Hash for $ty` +/// * type conversion +/// + `AsRef<&str> for $ty` +/// + `AsRef<&$ty> for $ty` +/// + `From<&$ty>` for Cow<$ty>` +/// + `From<&$ty>` for Arc<$ty>` +/// + `From<&$ty>` for Box<$ty>` +/// + `From<&$ty>` for Rc<$ty>` +/// + `From<&$ty> for &str` +/// + `TryFrom<&str> for &$ty` +/// + `TryFrom<&[u8]> for &$ty` +/// * comparison (only `PartialEq` impls are listed, but `PartialOrd` is also implemented). +/// + `PartialEq<$ty> for $ty` +/// + `str` and `$ty` +/// - `PartialEq<str> for $ty` +/// - `PartialEq<$ty> for str` +/// - `PartialEq<&str> for $ty` +/// - `PartialEq<$ty> for &str` +/// - `PartialEq<str> for &$ty` +/// - `PartialEq<&$ty> for str` +/// + `$ty` and `$ty` +/// - `PartialEq<&$ty> for $ty` +/// - `PartialEq<$ty> for &$ty` +/// * other +/// + `Display for $ty` +/// * serde +/// + `serde::Serialize` +/// + `serde::Deserialize` +macro_rules! define_custom_string_slice { + ( + $(#[$meta:meta])* + struct $ty:ident { + validator = $validate:ident, + expecting_msg = $expecting:expr, + } + ) => { + $(#[$meta])* + // `#[derive(..)]` cannot be used here, because it adds `S: DerivedTrait` bounds automatically. + #[repr(transparent)] + #[cfg_attr(feature = "serde", derive(serde::Serialize))] + #[cfg_attr(feature = "serde", serde(bound = "S: crate::spec::Spec"))] + #[cfg_attr(feature = "serde", serde(transparent))] + pub struct $ty<S> { + /// Spec. + #[cfg_attr(feature = "serde", serde(skip))] + _spec: core::marker::PhantomData<fn() -> S>, + /// Inner data. + inner: str, + } + + impl<S: crate::spec::Spec> $ty<S> { + /// Creates a new string. + #[inline] + pub fn new(s: &str) -> Result<&Self, crate::validate::Error> { + core::convert::TryFrom::try_from(s) + } + + /// Creates a new string without validation. + /// + /// This does not validate the given string, so it is caller's + /// responsibility to ensure the given string is valid. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + /// If not, any use of the returned value or the call of this + /// function itself may result in undefined behavior. + #[inline] + #[must_use] + pub unsafe fn new_unchecked(s: &str) -> &Self { + // SAFETY: `new_always_unchecked` requires the same precondition + // as `new_always_unchecked`. + unsafe { Self::new_always_unchecked(s) } + } + + /// Creates a new string maybe without validation. + /// + /// This does validation on debug build. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + #[must_use] + pub(crate) unsafe fn new_maybe_unchecked(s: &str) -> &Self { + debug_assert_eq!($validate::<S>(s), Ok(())); + // SAFETY: `new_always_unchecked` requires the same precondition + // as `new_always_unchecked`. Additionally in debug build, just + // checked the content is actually valid by `$validate::<S>(s)`. + unsafe { Self::new_always_unchecked(s) } + } + + /// Creates a new string without any validation. + /// + /// This does not validate the given string at any time. + /// + /// Intended for internal use. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + #[inline] + #[must_use] + unsafe fn new_always_unchecked(s: &str) -> &Self { + // SAFETY: the cast is safe since `Self` type has `repr(transparent)` + // attribute and the content is guaranteed as valid by the + // precondition of the function. + unsafe { &*(s as *const str as *const Self) } + } + + /// Returns `&str`. + #[inline] + #[must_use] + pub fn as_str(&self) -> &str { + self.as_ref() + } + + /// Returns the string length. + #[inline] + #[must_use] + pub fn len(&self) -> usize { + self.as_str().len() + } + + /// Returns whether the string is empty. + #[inline] + #[must_use] + pub fn is_empty(&self) -> bool { + self.as_str().is_empty() + } + } + + impl<S: crate::spec::Spec> core::fmt::Debug for $ty<S> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple(stringify!($ty)).field(&&self.inner).finish() + } + } + + impl<S: crate::spec::Spec> PartialEq for $ty<S> { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.inner == other.inner + } + } + + impl<S: crate::spec::Spec> Eq for $ty<S> {} + + impl<S: crate::spec::Spec> PartialOrd for $ty<S> { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { + Some(self.inner.cmp(&other.inner)) + } + } + + impl<S: crate::spec::Spec> Ord for $ty<S> { + #[inline] + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.inner.cmp(&other.inner) + } + } + + impl<S: crate::spec::Spec> core::hash::Hash for $ty<S> { + #[inline] + fn hash<H: core::hash::Hasher>(&self, state: &mut H) { + self.inner.hash(state); + } + } + + impl<S: crate::spec::Spec> AsRef<str> for $ty<S> { + #[inline] + fn as_ref(&self) -> &str { + &self.inner + } + } + + impl<S: crate::spec::Spec> AsRef<$ty<S>> for $ty<S> { + #[inline] + fn as_ref(&self) -> &$ty<S> { + self + } + } + + #[cfg(feature = "alloc")] + impl<'a, S: crate::spec::Spec> From<&'a $ty<S>> for alloc::borrow::Cow<'a, $ty<S>> { + #[inline] + fn from(s: &'a $ty<S>) -> Self { + alloc::borrow::Cow::Borrowed(s) + } + } + + impl_from_slice_into_smartptr! { + ty: $ty, + smartptr: alloc::sync::Arc, + mutability: const, + } + + impl_from_slice_into_smartptr! { + ty: $ty, + smartptr: alloc::boxed::Box, + mutability: mut, + } + + impl_from_slice_into_smartptr! { + ty: $ty, + smartptr: alloc::rc::Rc, + mutability: const, + } + + impl<'a, S: crate::spec::Spec> From<&'a $ty<S>> for &'a str { + #[inline] + fn from(s: &'a $ty<S>) -> &'a str { + s.as_ref() + } + } + + impl<'a, S: crate::spec::Spec> core::convert::TryFrom<&'a str> for &'a $ty<S> { + type Error = crate::validate::Error; + + #[inline] + fn try_from(s: &'a str) -> Result<Self, Self::Error> { + match $validate::<S>(s) { + // SAFETY: just checked `s` is valid as `$ty`. + Ok(()) => Ok(unsafe { $ty::new_always_unchecked(s) }), + Err(e) => Err(e), + } + } + } + + impl<'a, S: crate::spec::Spec> core::convert::TryFrom<&'a [u8]> for &'a $ty<S> { + type Error = crate::validate::Error; + + #[inline] + fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> { + let s = core::str::from_utf8(bytes).map_err(|_| crate::validate::Error::new())?; + match $validate::<S>(s) { + // SAFETY: just checked `s` is valid as `$ty`. + Ok(()) => Ok(unsafe { $ty::new_always_unchecked(s) }), + Err(e) => Err(e), + } + } + } + + impl_cmp!(str, str, $ty<S>); + impl_cmp!(str, &str, $ty<S>); + impl_cmp!(str, str, &$ty<S>); + impl_cmp2!(str, &$ty<S>, $ty<T>); + + impl<S: crate::spec::Spec> core::fmt::Display for $ty<S> { + #[inline] + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str(self.as_str()) + } + } + + /// Serde deserializer implementation. + #[cfg(feature = "serde")] + mod __serde_slice { + use super::$ty; + + use core::{convert::TryFrom, fmt, marker::PhantomData}; + + use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, + }; + + /// Custom borrowed string visitor. + #[derive(Debug, Clone, Copy)] + struct CustomStrVisitor<S>(PhantomData<fn() -> S>); + + impl<'de, S: 'de + crate::spec::Spec> Visitor<'de> for CustomStrVisitor<S> { + type Value = &'de $ty<S>; + + #[inline] + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str($expecting) + } + + #[inline] + fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E> + where + E: de::Error, + { + <&'de $ty<S> as TryFrom<&'de str>>::try_from(v).map_err(E::custom) + } + } + + // About `'de` and `'a`, see + // <https://serde.rs/lifetimes.html#the-deserializede-lifetime>. + impl<'de: 'a, 'a, S: 'de + crate::spec::Spec> Deserialize<'de> for &'a $ty<S> { + #[inline] + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + deserializer.deserialize_string(CustomStrVisitor::<S>(PhantomData)) + } + } + } + }; +} + +/// Define the custom owned string type, and implements methods and traits. +/// +/// Methods to be implemented: +/// +/// * `pub fn new_unchecked()` +/// * `pub(crate) fn new_maybe_unchecked()` +/// * `pub(crate) fn new_always_unchecked()` +/// * `pub fn shrink_to_fit()` +/// +/// Traits to be implemented: +/// +/// * fundamental +/// + `Debug for $ty` +/// + `Clone for $ty` +/// + `Eq for $ty` +/// + `Ord for $ty` +/// + `Hash for $ty` +/// * type conversion +/// + `AsRef<str> for $ty` +/// + `AsRef<$slice> for $ty` +/// + `Borrow<str> for $ty` +/// + `Borrow<$slice> for $ty` +/// + `ToOwned<Owned = $ty> for $slice` +/// + `From<&$slice> for $ty` +/// + `From<$ty> for String` +/// + `From<$ty> for Cow<'_, $slice>` +/// + `From<$ty> for Box<$slice>` +/// + `TryFrom<&str> for $ty` +/// + `TryFrom<&[u8]> for $ty` +/// + `TryFrom<String> for $ty` +/// + `FromStr for $ty` +/// + `Deref<Target = $slice> for $ty` +/// * comparison (only `PartialEq` impls are listed, but `PartialOrd` is also implemented. +/// + `PartialEq<$ty> for $ty` +/// + `$slice` and `str` +/// - `PartialEq<$slice> for Cow<'_, str>` +/// - `PartialEq<Cow<'_, str>> for $slice` +/// - `PartialEq<&$slice> for Cow<'_, str>` +/// - `PartialEq<Cow<'_, str>> for &$slice` +/// + `$slice` and `Cow<$slice>` +/// - `PartialEq<$slice> for Cow<'_, $slice>` +/// - `PartialEq<Cow<'_, $slice>> for $slice` +/// - `PartialEq<&$slice> for Cow<'_, $slice>` +/// - `PartialEq<Cow<'_, $slice>> for &$slice` +/// + `str` and `$ty` +/// - `PartialEq<str> for $ty` +/// - `PartialEq<$ty> for str` +/// - `PartialEq<&str> for $ty` +/// - `PartialEq<$ty> for &str` +/// - `PartialEq<Cow<'_, str>> for $ty` +/// - `PartialEq<$ty> for Cow<'_, str>` +/// + `String` and `$ty` +/// - `PartialEq<String> for $ty` +/// - `PartialEq<$ty> for String` +/// + `$slice` and `$ty` +/// - `PartialEq<$slice> for $ty` +/// - `PartialEq<$ty> for $slice` +/// - `PartialEq<&$slice> for $ty` +/// - `PartialEq<$ty> for &$slice` +/// - `PartialEq<Cow<'_, $slice>> for $ty` +/// - `PartialEq<$ty> for Cow<'_, $slice>` +/// * other +/// + `Display for $ty` +/// * serde +/// + `serde::Serialize` +/// + `serde::Deserialize` +// Note that `From<$ty> for {Arc,Rc}<$slice>` is currently not implemented since +// this won't reuse allocated memory and hides internal memory reallocation. See +// <https://github.com/lo48576/iri-string/issues/20#issuecomment-1105207849>. +// However, this is not decided with firm belief or opinion, so there would be +// a chance that they are implemented in future. +#[cfg(feature = "alloc")] +macro_rules! define_custom_string_owned { + ( + $(#[$meta:meta])* + struct $ty:ident { + validator = $validate:ident, + slice = $slice:ident, + expecting_msg = $expecting:expr, + } + ) => { + $(#[$meta])* + // `#[derive(..)]` cannot be used here, because it adds `S: DerivedTrait` bounds automatically. + #[cfg(feature = "alloc")] + #[cfg_attr(all(feature = "serde", feature = "alloc"), derive(serde::Serialize))] + #[cfg_attr(all(feature = "serde", feature = "alloc"), serde(bound = "S: crate::spec::Spec"))] + #[cfg_attr(all(feature = "serde", feature = "alloc"), serde(transparent))] + pub struct $ty<S> { + /// Spec. + #[cfg_attr(all(feature = "serde", feature = "alloc"), serde(skip))] + _spec: core::marker::PhantomData<fn() -> S>, + /// Inner data. + inner: alloc::string::String, + } + + impl<S: crate::spec::Spec> $ty<S> { + /// Creates a new string without validation. + /// + /// This does not validate the given string, so it is caller's + /// responsibility to ensure the given string is valid. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + /// If not, any use of the returned value or the call of this + /// function itself may result in undefined behavior. + #[inline] + #[must_use] + pub unsafe fn new_unchecked(s: alloc::string::String) -> Self { + // SAFETY: `new_always_unchecked` requires the same precondition + // as `new_always_unchecked`. + unsafe { Self::new_always_unchecked(s) } + } + + /// Creates a new string maybe without validation. + /// + /// This does not validate the given string at any time. + /// + /// Intended for internal use. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + #[inline] + #[must_use] + pub(crate) unsafe fn new_always_unchecked(s: alloc::string::String) -> Self { + // The construction itself can be written in safe Rust, but + // every other place including unsafe functions expects + // `self.inner` to be syntactically valid as `Self`. In order to + // make them safe, the construction should validate the value + // or at least should require users to validate the value by + // making the function `unsafe`. + Self { + _spec: core::marker::PhantomData, + inner: s, + } + } + + /// Creates a new string maybe without validation. + /// + /// This does validation on debug build. + /// + /// # Safety + /// + /// The given string must be syntactically valid as `Self` type. + #[must_use] + pub(crate) unsafe fn new_maybe_unchecked(s: alloc::string::String) -> Self { + debug_assert_eq!( + $validate::<S>(&s), + Ok(()), + "[precondition] the given string must be valid" + ); + // SAFETY: `new_always_unchecked` requires the same precondition + // as `new_always_unchecked`. Additionally in debug build, just + // checked the content is actually valid by `$validate::<S>(s)`. + unsafe { Self::new_always_unchecked(s) } + } + + /// Returns a mutable reference to the inner string buffer. + /// + /// This may be useful to implement inline modification algorithm, + /// but be careful as this method itself cannot validate the new + /// content. + /// + /// # Safety + /// + /// The content after modification must be syntactically valid as + /// `Self` type. + /// If not, any use of the returned value or the call of this + /// function itself may result in undefined behavior. + #[inline] + #[must_use] + // TODO: Use wrapper type to enforce validation on finish? + pub(crate) unsafe fn as_inner_mut(&mut self) -> &mut alloc::string::String { + &mut self.inner + } + + /// Shrinks the capacity of the inner buffer to match its length. + #[inline] + pub fn shrink_to_fit(&mut self) { + self.inner.shrink_to_fit() + } + + /// Returns the internal buffer capacity in bytes. + #[inline] + #[must_use] + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + /// Returns the borrowed IRI string slice. + /// + /// This is equivalent to `&*self`. + #[inline] + #[must_use] + pub fn as_slice(&self) -> &$slice<S> { + self.as_ref() + } + } + + impl<S: crate::spec::Spec> core::fmt::Debug for $ty<S> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple(stringify!($ty)).field(&&self.inner).finish() + } + } + + impl<S: crate::spec::Spec> Clone for $ty<S> { + #[inline] + fn clone(&self) -> Self { + // This is safe because `self` must be valid. + Self { + _spec: core::marker::PhantomData, + inner: self.inner.clone(), + } + } + } + + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialEq<$ty<T>> for $ty<S> { + #[inline] + fn eq(&self, other: &$ty<T>) -> bool { + self.inner == other.inner + } + } + + impl<S: crate::spec::Spec> Eq for $ty<S> {} + + impl<S: crate::spec::Spec, T: crate::spec::Spec> PartialOrd<$ty<T>> for $ty<S> { + #[inline] + fn partial_cmp(&self, other: &$ty<T>) -> Option<core::cmp::Ordering> { + self.inner.partial_cmp(&other.inner) + } + } + + impl<S: crate::spec::Spec> Ord for $ty<S> { + #[inline] + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.inner.cmp(&other.inner) + } + } + + impl<S: crate::spec::Spec> core::hash::Hash for $ty<S> { + #[inline] + fn hash<H: core::hash::Hasher>(&self, state: &mut H) { + self.inner.hash(state); + } + } + + impl<S: crate::spec::Spec> AsRef<str> for $ty<S> { + #[inline] + fn as_ref(&self) -> &str { + &self.inner + } + } + + impl<S: crate::spec::Spec> AsRef<$slice<S>> for $ty<S> { + #[inline] + fn as_ref(&self) -> &$slice<S> { + // SAFETY: `$ty<S>` and `$slice<S>` requires same validation, so + // the content of `self: &$ty<S>` must be valid as `$slice<S>`. + unsafe { $slice::new_always_unchecked(AsRef::<str>::as_ref(self)) } + } + } + + impl<S: crate::spec::Spec> core::borrow::Borrow<str> for $ty<S> { + #[inline] + fn borrow(&self) -> &str { + self.as_ref() + } + } + + impl<S: crate::spec::Spec> core::borrow::Borrow<$slice<S>> for $ty<S> { + #[inline] + fn borrow(&self) -> &$slice<S> { + self.as_ref() + } + } + + impl<S: crate::spec::Spec> alloc::borrow::ToOwned for $slice<S> { + type Owned = $ty<S>; + + #[inline] + fn to_owned(&self) -> Self::Owned { + self.into() + } + } + + impl<S: crate::spec::Spec> From<&'_ $slice<S>> for $ty<S> { + #[inline] + fn from(s: &$slice<S>) -> Self { + // This is safe because `s` must be valid. + $ty { + _spec: core::marker::PhantomData, + inner: alloc::string::String::from(s.as_str()), + } + } + } + + impl<S: crate::spec::Spec> From<$ty<S>> for alloc::string::String { + #[inline] + fn from(s: $ty<S>) -> Self { + s.inner + } + } + + impl<'a, S: crate::spec::Spec> From<$ty<S>> for alloc::borrow::Cow<'a, $slice<S>> { + #[inline] + fn from(s: $ty<S>) -> alloc::borrow::Cow<'a, $slice<S>> { + alloc::borrow::Cow::Owned(s) + } + } + + impl<S: crate::spec::Spec> From<$ty<S>> for alloc::boxed::Box<$slice<S>> { + #[inline] + fn from(s: $ty<S>) -> alloc::boxed::Box<$slice<S>> { + let inner: alloc::string::String = s.into(); + let buf = alloc::boxed::Box::<str>::from(inner); + // SAFETY: `$slice<S>` has `repr(transparent)` attribute, so + // the memory layouts of `Box<str>` and `Box<$slice<S>>` are + // compatible. Additionally, `$ty<S>` and `$slice<S>` require + // the same syntax (it is the macro user's responsibility to + // guarantee). + unsafe { + let raw: *mut str = alloc::boxed::Box::into_raw(buf); + alloc::boxed::Box::<$slice<S>>::from_raw(raw as *mut $slice<S>) + } + } + } + + impl<S: crate::spec::Spec> core::convert::TryFrom<&'_ str> for $ty<S> { + type Error = crate::validate::Error; + + #[inline] + fn try_from(s: &str) -> Result<Self, Self::Error> { + <&$slice<S>>::try_from(s).map(Into::into) + } + } + + impl<S: crate::spec::Spec> core::convert::TryFrom<&'_ [u8]> for $ty<S> { + type Error = crate::validate::Error; + + #[inline] + fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> { + let s = core::str::from_utf8(bytes).map_err(|_| crate::validate::Error::new())?; + <&$slice<S>>::try_from(s).map(Into::into) + } + } + + impl<S: crate::spec::Spec> core::convert::TryFrom<alloc::string::String> for $ty<S> { + type Error = crate::types::CreationError<alloc::string::String>; + + #[inline] + fn try_from(s: alloc::string::String) -> Result<Self, Self::Error> { + match <&$slice<S>>::try_from(s.as_str()) { + Ok(_) => { + // This is safe because `<&$slice<S>>::try_from(s)?` ensures + // that the string `s` is valid. + Ok(Self { + _spec: core::marker::PhantomData, + inner: s, + }) + } + Err(e) => Err(crate::types::CreationError::new(e, s)), + } + } + } + + impl<S: crate::spec::Spec> alloc::str::FromStr for $ty<S> { + type Err = crate::validate::Error; + + #[inline] + fn from_str(s: &str) -> Result<Self, Self::Err> { + core::convert::TryFrom::try_from(s) + } + } + + impl<S: crate::spec::Spec> core::ops::Deref for $ty<S> { + type Target = $slice<S>; + + #[inline] + fn deref(&self) -> &$slice<S> { + self.as_ref() + } + } + + impl_cmp!(str, $slice<S>, alloc::borrow::Cow<'_, str>); + impl_cmp!(str, &$slice<S>, alloc::borrow::Cow<'_, str>); + impl_cmp2_as_str!(&$slice<S>, alloc::borrow::Cow<'_, $slice<T>>); + + impl_cmp!(str, str, $ty<S>); + impl_cmp!(str, &str, $ty<S>); + impl_cmp!(str, alloc::borrow::Cow<'_, str>, $ty<S>); + impl_cmp!(str, alloc::string::String, $ty<S>); + impl_cmp2!(str, $slice<S>, $ty<T>); + impl_cmp2!(str, &$slice<S>, $ty<T>); + impl_cmp2_as_str!(alloc::borrow::Cow<'_, $slice<S>>, $ty<T>); + + impl<S: crate::spec::Spec> core::fmt::Display for $ty<S> { + #[inline] + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str(self.as_str()) + } + } + + /// Serde deserializer implementation. + #[cfg(all(feature = "alloc", feature = "serde"))] + mod __serde_owned { + use super::$ty; + + use core::{convert::TryFrom, fmt, marker::PhantomData}; + + #[cfg(all(feature = "serde", feature = "alloc"))] + use alloc::string::String; + + use serde::{ + de::{self, Visitor}, + Deserialize, Deserializer, + }; + + /// Custom owned string visitor. + #[derive(Debug, Clone, Copy)] + struct CustomStringVisitor<S>(PhantomData<fn() -> S>); + + impl<'de, S: crate::spec::Spec> Visitor<'de> for CustomStringVisitor<S> { + type Value = $ty<S>; + + #[inline] + fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str($expecting) + } + + #[inline] + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> + where + E: de::Error, + { + <$ty<S> as TryFrom<&str>>::try_from(v).map_err(E::custom) + } + + #[cfg(all(feature = "serde", feature = "alloc"))] + #[inline] + fn visit_string<E>(self, v: String) -> Result<Self::Value, E> + where + E: de::Error, + { + <$ty<S> as TryFrom<String>>::try_from(v).map_err(E::custom) + } + } + + impl<'de, S: crate::spec::Spec> Deserialize<'de> for $ty<S> { + #[inline] + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + deserializer.deserialize_str(CustomStringVisitor::<S>(PhantomData)) + } + } + } + }; +} + +/// Implements trivial conversions and other useful traits between two IRI types. +/// +/// Implemented traits: +/// +/// * type conversion +/// + `AsRef<$to_slice> for $from_slice` +/// + `AsRef<$to_slice> for $from_owned` +/// + `From<$from_slice> for $to_slice` +/// + `From<$from_owned> for $to_owned` +/// + `TryFrom<&$to_slice> for &$from_slice` +/// + `TryFrom<$to_owned> for $from_owned` +/// * comparison (only `PartialEq` impls are listed, but `PartialOrd` is also implemented). +/// + `$from_slice` and `$to_slice` +/// - `PartialEq<$from_slice> for $to_slice` +/// - `PartialEq<$to_slice> for $from_slice` +/// - `PartialEq<&$from_slice> for $to_slice` +/// - `PartialEq<$to_slice> for &$from_slice` +/// - `PartialEq<$from_slice> for &$to_slice` +/// - `PartialEq<&$to_slice> for $from_slice` +/// - `PartialEq<$from_slice> for Cow<'_, $to_slice>` +/// - `PartialEq<Cow<'_, $to_slice>> for $from_slice` +/// - `PartialEq<&$from_slice> for Cow<'_, $to_slice>` +/// - `PartialEq<Cow<'_, $to_slice>> for &$from_slice` +/// - `PartialEq<Cow<'_, $from_slice>> for $to_slice` +/// - `PartialEq<$to_slice> for Cow<'_, $from_slice>` +/// - `PartialEq<Cow<'_, $from_slice>> for &$to_slice` +/// - `PartialEq<&$to_slice> for Cow<'_, $from_slice>` +/// + `$from_slice` and `$to_owned` +/// - `PartialEq<$from_slice> for $to_owned` +/// - `PartialEq<$to_owned> for $from_slice` +/// - `PartialEq<&$from_slice> for $to_owned` +/// - `PartialEq<$to_owned> for &$from_slice` +/// - `PartialEq<Cow<'_, $from_slice>> for $to_owned` +/// - `PartialEq<$to_owned> for Cow<'_, $from_slice>` +/// + `$from_owned` and `$to_slice` +/// - `PartialEq<$from_owned> for $to_slice` +/// - `PartialEq<$to_slice> for $from_owned` +/// - `PartialEq<$from_owned> for &$to_slice` +/// - `PartialEq<&$to_slice> for $from_owned` +/// - `PartialEq<$from_owned> for Cow<'_, $to_slice>` +/// - `PartialEq<Cow<'_, $to_slice>> for $from_owned` +/// + `$from_owned` and `$to_owned` +/// - `PartialEq<$from_owned> for $to_owned` +/// - `PartialEq<$to_owned> for $from_owned` +macro_rules! impl_trivial_conv_between_iri { + ( + from_slice: $from_slice:ident, + from_owned: $from_owned:ident, + to_slice: $to_slice:ident, + to_owned: $to_owned:ident, + ) => { + impl<S: crate::spec::Spec> AsRef<$to_slice<S>> for $from_slice<S> { + #[inline] + fn as_ref(&self) -> &$to_slice<S> { + // SAFETY: `$from_slice<S>` should be subset of `$to_slice<S>`. + // The caller of `impl_trivial_conv_between_iri!` macro is + // responsible for guaranteeing that. + unsafe { <$to_slice<S>>::new_maybe_unchecked(self.as_str()) } + } + } + + #[cfg(feature = "alloc")] + impl<S: crate::spec::Spec> AsRef<$to_slice<S>> for $from_owned<S> { + #[inline] + fn as_ref(&self) -> &$to_slice<S> { + AsRef::<$from_slice<S>>::as_ref(self).as_ref() + } + } + + impl<'a, S: crate::spec::Spec> From<&'a $from_slice<S>> for &'a $to_slice<S> { + #[inline] + fn from(s: &'a $from_slice<S>) -> &'a $to_slice<S> { + s.as_ref() + } + } + + #[cfg(feature = "alloc")] + impl<S: crate::spec::Spec> From<$from_owned<S>> for $to_owned<S> { + #[inline] + fn from(s: $from_owned<S>) -> $to_owned<S> { + // SAFETY: `$from_slice<S>` should be subset of `$to_slice<S>`. + // The caller of `impl_trivial_conv_between_iri!` macro is + // responsible for guaranteeing that. + unsafe { <$to_owned<S>>::new_maybe_unchecked(s.into()) } + } + } + + impl<'a, S: crate::spec::Spec> core::convert::TryFrom<&'a $to_slice<S>> + for &'a $from_slice<S> + { + type Error = crate::validate::Error; + + #[inline] + fn try_from(s: &'a $to_slice<S>) -> Result<Self, Self::Error> { + Self::try_from(s.as_str()) + } + } + + #[cfg(feature = "alloc")] + impl<S: crate::spec::Spec> core::convert::TryFrom<$to_owned<S>> for $from_owned<S> { + type Error = crate::types::CreationError<$to_owned<S>>; + + fn try_from(s: $to_owned<S>) -> Result<Self, Self::Error> { + match <&$from_slice<S>>::try_from(s.as_str()) { + // SAFETY: just checked `s.as_str()` is valid as `$from_slice<S>`, and it + // requires the same syntax as `$from_owned<S>`. + Ok(_) => Ok(unsafe { <$from_owned<S>>::new_always_unchecked(s.into()) }), + Err(e) => Err(crate::types::CreationError::new(e, s)), + } + } + } + + impl_cmp2_as_str!($from_slice<S>, $to_slice<T>); + impl_cmp2_as_str!(&$from_slice<S>, $to_slice<T>); + impl_cmp2_as_str!($from_slice<S>, &$to_slice<T>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!($from_slice<S>, alloc::borrow::Cow<'_, $to_slice<T>>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!(&$from_slice<S>, alloc::borrow::Cow<'_, $to_slice<T>>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!(alloc::borrow::Cow<'_, $from_slice<S>>, $to_slice<T>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!(alloc::borrow::Cow<'_, $from_slice<S>>, &$to_slice<T>); + + #[cfg(feature = "alloc")] + impl_cmp2_as_str!($from_slice<S>, $to_owned<T>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!(&$from_slice<S>, $to_owned<T>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!(alloc::borrow::Cow<'_, $from_slice<S>>, $to_owned<T>); + + #[cfg(feature = "alloc")] + impl_cmp2_as_str!($from_owned<S>, $to_slice<T>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!($from_owned<S>, &$to_slice<T>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!($from_owned<S>, alloc::borrow::Cow<'_, $to_slice<T>>); + #[cfg(feature = "alloc")] + impl_cmp2_as_str!($from_owned<S>, $to_owned<T>); + }; +} diff --git a/vendor/iri-string/src/types/generic/normal.rs b/vendor/iri-string/src/types/generic/normal.rs new file mode 100644 index 00000000..641a022b --- /dev/null +++ b/vendor/iri-string/src/types/generic/normal.rs @@ -0,0 +1,944 @@ +//! Usual absolute IRI (fragment part being allowed). + +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +use crate::components::AuthorityComponents; +#[cfg(feature = "alloc")] +use crate::mask_password::password_range_to_hide; +use crate::mask_password::PasswordMasked; +use crate::normalize::{Error, NormalizationInput, Normalized, NormalizednessCheckMode}; +use crate::parser::trusted as trusted_parser; +#[cfg(feature = "alloc")] +use crate::raw; +use crate::spec::Spec; +use crate::types::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr}; +#[cfg(feature = "alloc")] +use crate::types::{RiAbsoluteString, RiFragmentString, RiReferenceString}; +use crate::validate::iri; + +define_custom_string_slice! { + /// A borrowed string of an absolute IRI possibly with fragment part. + /// + /// This corresponds to [`IRI` rule] in [RFC 3987] (and [`URI` rule] in [RFC 3986]). + /// The rule for `IRI` is `scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]`. + /// In other words, this is [`RiAbsoluteStr`] with fragment part allowed. + /// + /// # Valid values + /// + /// This type can have an IRI (which is absolute, and may have fragment part). + /// + /// ``` + /// # use iri_string::types::IriStr; + /// assert!(IriStr::new("https://user:pass@example.com:8080").is_ok()); + /// assert!(IriStr::new("https://example.com/").is_ok()); + /// assert!(IriStr::new("https://example.com/foo?bar=baz").is_ok()); + /// assert!(IriStr::new("https://example.com/foo?bar=baz#qux").is_ok()); + /// assert!(IriStr::new("foo:bar").is_ok()); + /// assert!(IriStr::new("foo:").is_ok()); + /// // `foo://.../` below are all allowed. See the crate documentation for detail. + /// assert!(IriStr::new("foo:/").is_ok()); + /// assert!(IriStr::new("foo://").is_ok()); + /// assert!(IriStr::new("foo:///").is_ok()); + /// assert!(IriStr::new("foo:////").is_ok()); + /// assert!(IriStr::new("foo://///").is_ok()); + /// ``` + /// + /// Relative IRI reference is not allowed. + /// + /// ``` + /// # use iri_string::types::IriStr; + /// // This is relative path. + /// assert!(IriStr::new("foo/bar").is_err()); + /// // `/foo/bar` is an absolute path, but it is authority-relative. + /// assert!(IriStr::new("/foo/bar").is_err()); + /// // `//foo/bar` is termed "network-path reference", + /// // or usually called "protocol-relative reference". + /// assert!(IriStr::new("//foo/bar").is_err()); + /// // Same-document reference is relative. + /// assert!(IriStr::new("#foo").is_err()); + /// // Empty string is not a valid absolute IRI. + /// assert!(IriStr::new("").is_err()); + /// ``` + /// + /// Some characters and sequences cannot used in an IRI. + /// + /// ``` + /// # use iri_string::types::IriStr; + /// // `<` and `>` cannot directly appear in an IRI. + /// assert!(IriStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI. + /// assert!(IriStr::new("%").is_err()); + /// assert!(IriStr::new("%GG").is_err()); + /// ``` + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`IRI` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`URI` rule]: https://tools.ietf.org/html/rfc3986#section-3 + /// [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html + struct RiStr { + validator = iri, + expecting_msg = "IRI string", + } +} + +#[cfg(feature = "alloc")] +define_custom_string_owned! { + /// An owned string of an absolute IRI possibly with fragment part. + /// + /// This corresponds to [`IRI` rule] in [RFC 3987] (and [`URI` rule] in [RFC 3986]). + /// The rule for `IRI` is `scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]`. + /// In other words, this is [`RiAbsoluteString`] with fragment part allowed. + /// + /// For details, see the document for [`RiStr`]. + /// + /// Enabled by `alloc` or `std` feature. + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`IRI` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`URI` rule]: https://tools.ietf.org/html/rfc3986#section-3 + /// [`RiAbsoluteString`]: struct.RiAbsoluteString.html + struct RiString { + validator = iri, + slice = RiStr, + expecting_msg = "IRI string", + } +} + +impl<S: Spec> RiStr<S> { + /// Splits the IRI into an absolute IRI part and a fragment part. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// If the IRI has a fragment part, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?; + /// let (absolute, fragment) = iri.to_absolute_and_fragment(); + /// let fragment_expected = IriFragmentStr::new("corge")?; + /// assert_eq!(absolute, "foo://bar/baz?qux=quux"); + /// assert_eq!(fragment, Some(fragment_expected)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// When the fragment part exists but is empty string, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#")?; + /// let (absolute, fragment) = iri.to_absolute_and_fragment(); + /// let fragment_expected = IriFragmentStr::new("")?; + /// assert_eq!(absolute, "foo://bar/baz?qux=quux"); + /// assert_eq!(fragment, Some(fragment_expected)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the IRI has no fragment, `None` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux")?; + /// let (absolute, fragment) = iri.to_absolute_and_fragment(); + /// assert_eq!(absolute, "foo://bar/baz?qux=quux"); + /// assert_eq!(fragment, None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn to_absolute_and_fragment(&self) -> (&RiAbsoluteStr<S>, Option<&RiFragmentStr<S>>) { + let (prefix, fragment) = trusted_parser::split_fragment(self.as_str()); + // SAFETY: an IRI without fragment part is also an absolute IRI. + let prefix = unsafe { RiAbsoluteStr::new_maybe_unchecked(prefix) }; + let fragment = fragment.map(|fragment| { + // SAFETY: `trusted_parser::split_fragment()` must return a valid fragment component. + unsafe { RiFragmentStr::new_maybe_unchecked(fragment) } + }); + + (prefix, fragment) + } + + /// Strips the fragment part if exists, and returns [`&RiAbsoluteStr`][`RiAbsoluteStr`]. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?; + /// assert_eq!(iri.to_absolute(), "foo://bar/baz?qux=quux"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux")?; + /// assert_eq!(iri.to_absolute(), "foo://bar/baz?qux=quux"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html + #[must_use] + pub fn to_absolute(&self) -> &RiAbsoluteStr<S> { + let prefix_len = trusted_parser::split_fragment(self.as_str()).0.len(); + // SAFETY: IRI without the fragment part (including a leading `#` character) + // is also an absolute IRI. + unsafe { RiAbsoluteStr::new_maybe_unchecked(&self.as_str()[..prefix_len]) } + } + + /// Returns Ok`(())` if the IRI is normalizable by the RFC 3986 algorithm. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("HTTP://example.COM/foo/%2e/bar/..")?; + /// assert!(iri.ensure_rfc3986_normalizable().is_ok()); + /// + /// let iri2 = IriStr::new("scheme:/..//bar")?; + /// // The normalization result would be `scheme://bar` according to RFC + /// // 3986, but it is unintended and should be treated as a failure. + /// // This crate automatically handles this case so that `.normalize()` won't fail. + /// assert!(!iri.ensure_rfc3986_normalizable().is_err()); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> { + NormalizationInput::from(self).ensure_rfc3986_normalizable() + } + + /// Returns `true` if the IRI is already normalized. + /// + /// This returns the same result as `self.normalize().to_string() == self`, + /// but does this more efficiently without heap allocation. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?; + /// assert!(!iri.is_normalized()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query#fragment"); + /// assert!(normalized.is_normalized()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:/.///foo")?; + /// // Already normalized. + /// assert!(iri.is_normalized()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:relative/..//not-a-host")?; + /// // Default normalization algorithm assumes the path part to be NOT opaque. + /// assert!(!iri.is_normalized()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "scheme:/.//not-a-host"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + #[inline] + pub fn is_normalized(&self) -> bool { + trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Default) + } + + /// Returns `true` if the IRI is already normalized in the sense of RFC 3986. + /// + /// This returns the same result as + /// `self.ensure_rfc3986_normalizable() && (self.normalize().to_string() == self)`, + /// but does this more efficiently without heap allocation. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?; + /// assert!(!iri.is_normalized_rfc3986()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query#fragment"); + /// assert!(normalized.is_normalized_rfc3986()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:/.///foo")?; + /// // Not normalized in the sense of RFC 3986. + /// assert!(!iri.is_normalized_rfc3986()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:relative/..//not-a-host")?; + /// // RFC 3986 normalization algorithm assumes the path part to be NOT opaque. + /// assert!(!iri.is_normalized_rfc3986()); + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "scheme:/.//not-a-host"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + #[inline] + pub fn is_normalized_rfc3986(&self) -> bool { + trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Rfc3986) + } + + /// Returns `true` if the IRI is already normalized in the sense of + /// [`normalize_but_preserve_authorityless_relative_path`] method. + /// + /// This returns the same result as + /// `self.normalize_but_preserve_authorityless_relative_path().to_string() == self`, + /// but does this more efficiently without heap allocation. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?; + /// assert!(!iri.is_normalized_but_authorityless_relative_path_preserved()); + /// + /// let normalized = iri + /// .normalize_but_preserve_authorityless_relative_path() + /// .to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query#fragment"); + /// assert!(normalized.is_normalized()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:/.///foo")?; + /// // Already normalized in the sense of + /// // `normalize_but_opaque_authorityless_relative_path()` method. + /// assert!(iri.is_normalized_but_authorityless_relative_path_preserved()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:relative/..//not-a-host")?; + /// // Relative path is treated as opaque since the autority component is absent. + /// assert!(iri.is_normalized_but_authorityless_relative_path_preserved()); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// [`normalize_but_preserve_authorityless_relative_path`]: + /// `Self::normalize_but_preserve_authorityless_relative_path` + #[must_use] + #[inline] + pub fn is_normalized_but_authorityless_relative_path_preserved(&self) -> bool { + trusted_parser::is_normalized::<S>( + self.as_str(), + NormalizednessCheckMode::PreserveAuthoritylessRelativePath, + ) + } + + /// Returns the normalized IRI. + /// + /// # Notes + /// + /// For some abnormal IRIs, the normalization can produce semantically + /// incorrect string that looks syntactically valid. To avoid security + /// issues by this trap, the normalization algorithm by this crate + /// automatically applies the workaround. + /// + /// If you worry about this, test by [`RiStr::ensure_rfc3986_normalizable`] + /// method or [`Normalized::ensure_rfc3986_normalizable`] before using the + /// result string. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?; + /// + /// let normalized = iri.normalize().to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query#fragment"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn normalize(&self) -> Normalized<'_, Self> { + Normalized::from_input(NormalizationInput::from(self)).and_normalize() + } + + /// Returns the normalized IRI, but preserving dot segments in relative path + /// if the authority component is absent. + /// + /// This normalization would be similar to that of [WHATWG URL Standard] + /// while this implementation is not guaranteed to stricly follow the spec. + /// + /// Note that this normalization algorithm is not compatible with RFC 3986 + /// algorithm for some inputs. + /// + /// Note that case normalization and percent-encoding normalization will + /// still be applied to any path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?; + /// + /// let normalized = iri + /// .normalize_but_preserve_authorityless_relative_path() + /// .to_dedicated_string(); + /// assert_eq!(normalized, "http://example.com/baz?query#fragment"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("scheme:relative/../f%6f%6f")?; + /// + /// let normalized = iri + /// .normalize_but_preserve_authorityless_relative_path() + /// .to_dedicated_string(); + /// assert_eq!(normalized, "scheme:relative/../foo"); + /// // `.normalize()` would normalize this to `scheme:/foo`. + /// # assert_eq!(iri.normalize().to_dedicated_string(), "scheme:/foo"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// [WHATWG URL Standard]: https://url.spec.whatwg.org/ + #[inline] + #[must_use] + pub fn normalize_but_preserve_authorityless_relative_path(&self) -> Normalized<'_, Self> { + Normalized::from_input(NormalizationInput::from(self)) + .and_normalize_but_preserve_authorityless_relative_path() + } + + /// Returns the proxy to the IRI with password masking feature. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("http://user:password@example.com/path?query")?; + /// let masked = iri.mask_password(); + /// assert_eq!(masked.to_dedicated_string(), "http://user:@example.com/path?query"); + /// + /// assert_eq!( + /// masked.replace_password("${password}").to_string(), + /// "http://user:${password}@example.com/path?query" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn mask_password(&self) -> PasswordMasked<'_, Self> { + PasswordMasked::new(self) + } +} + +/// Components getters. +impl<S: Spec> RiStr<S> { + /// Returns the scheme. + /// + /// The following colon is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.scheme_str(), "http"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn scheme_str(&self) -> &str { + trusted_parser::extract_scheme_absolute(self.as_str()) + } + + /// Returns the authority. + /// + /// The leading `//` is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.authority_str(), Some("example.com")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_str(&self) -> Option<&str> { + trusted_parser::extract_authority_absolute(self.as_str()) + } + + /// Returns the path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.path_str(), "/pathpath"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.path_str(), "uuid:10db315b-fcd1-4428-aca8-15babc9a2da2"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn path_str(&self) -> &str { + trusted_parser::extract_path_absolute(self.as_str()) + } + + /// Returns the query. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::{IriQueryStr, IriStr}; + /// + /// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// let query = IriQueryStr::new("queryquery")?; + /// assert_eq!(iri.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.query(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query(&self) -> Option<&RiQueryStr<S>> { + AsRef::<RiReferenceStr<S>>::as_ref(self).query() + } + + /// Returns the query in a raw string slice. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.query_str(), Some("queryquery")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.query_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query_str(&self) -> Option<&str> { + trusted_parser::extract_query(self.as_str()) + } + + /// Returns the fragment part if exists. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?; + /// let fragment = IriFragmentStr::new("corge")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#")?; + /// let fragment = IriFragmentStr::new("")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn fragment(&self) -> Option<&RiFragmentStr<S>> { + AsRef::<RiReferenceStr<S>>::as_ref(self).fragment() + } + + /// Returns the fragment part as a raw string slice if exists. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?; + /// assert_eq!(iri.fragment_str(), Some("corge")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux#")?; + /// assert_eq!(iri.fragment_str(), Some("")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error}; + /// let iri = IriStr::new("foo://bar/baz?qux=quux")?; + /// assert_eq!(iri.fragment_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn fragment_str(&self) -> Option<&str> { + AsRef::<RiReferenceStr<S>>::as_ref(self).fragment_str() + } + + /// Returns the authority components. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("http://user:pass@example.com:8080/pathpath?queryquery")?; + /// let authority = iri.authority_components() + /// .expect("authority is available"); + /// assert_eq!(authority.userinfo(), Some("user:pass")); + /// assert_eq!(authority.host(), "example.com"); + /// assert_eq!(authority.port(), Some("8080")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriStr; + /// + /// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> { + AuthorityComponents::from_iri(self.as_ref()) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> RiString<S> { + /// Splits the IRI into an absolute IRI part and a fragment part. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// ``` + /// use std::convert::TryFrom; + /// # use iri_string::{spec::IriSpec, types::{IriFragmentString, IriString}, validate::Error}; + /// let iri = "foo://bar/baz?qux=quux#corge".parse::<IriString>()?; + /// let (absolute, fragment) = iri.into_absolute_and_fragment(); + /// let fragment_expected = IriFragmentString::try_from("corge".to_owned()) + /// .map_err(|e| e.validation_error())?; + /// assert_eq!(absolute, "foo://bar/baz?qux=quux"); + /// assert_eq!(fragment, Some(fragment_expected)); + /// # Ok::<_, Error>(()) + /// + /// ``` + /// + /// ``` + /// use std::convert::TryFrom; + /// # use iri_string::{spec::IriSpec, types::{IriFragmentString, IriString}, validate::Error}; + /// let iri = "foo://bar/baz?qux=quux#".parse::<IriString>()?; + /// let (absolute, fragment) = iri.into_absolute_and_fragment(); + /// let fragment_expected = IriFragmentString::try_from("".to_owned()) + /// .map_err(|e| e.validation_error())?; + /// assert_eq!(absolute, "foo://bar/baz?qux=quux"); + /// assert_eq!(fragment, Some(fragment_expected)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// use std::convert::TryFrom; + /// # use iri_string::{spec::IriSpec, types::IriString, validate::Error}; + /// let iri = "foo://bar/baz?qux=quux".parse::<IriString>()?; + /// let (absolute, fragment) = iri.into_absolute_and_fragment(); + /// assert_eq!(absolute, "foo://bar/baz?qux=quux"); + /// assert_eq!(fragment, None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn into_absolute_and_fragment(self) -> (RiAbsoluteString<S>, Option<RiFragmentString<S>>) { + let (prefix, fragment) = raw::split_fragment_owned(self.into()); + // SAFETY: an IRI without fragment part is also an absolute IRI. + let prefix = unsafe { RiAbsoluteString::new_maybe_unchecked(prefix) }; + let fragment = fragment.map(|fragment| { + // SAFETY: the string returned by `raw::split_fragment_owned()` must + // be the fragment part, and must also be a substring of the source IRI. + unsafe { RiFragmentString::new_maybe_unchecked(fragment) } + }); + + (prefix, fragment) + } + + /// Strips the fragment part if exists, and returns an [`RiAbsoluteString`]. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriString, validate::Error}; + /// let iri = "foo://bar/baz?qux=quux#corge".parse::<IriString>()?; + /// assert_eq!(iri.into_absolute(), "foo://bar/baz?qux=quux"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriString, validate::Error}; + /// let iri = "foo://bar/baz?qux=quux".parse::<IriString>()?; + /// assert_eq!(iri.into_absolute(), "foo://bar/baz?qux=quux"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// [`RiAbsoluteString`]: struct.RiAbsoluteString.html + #[must_use] + pub fn into_absolute(self) -> RiAbsoluteString<S> { + let mut s: String = self.into(); + raw::remove_fragment(&mut s); + // SAFETY: an IRI without fragment part is also an absolute IRI. + unsafe { RiAbsoluteString::new_maybe_unchecked(s) } + } + + /// Sets the fragment part to the given string. + /// + /// Removes fragment part (and following `#` character) if `None` is given. + pub fn set_fragment(&mut self, fragment: Option<&RiFragmentStr<S>>) { + raw::set_fragment(&mut self.inner, fragment.map(AsRef::as_ref)); + debug_assert!(iri::<S>(&self.inner).is_ok()); + } + + /// Removes the password completely (including separator colon) from `self` even if it is empty. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriString; + /// + /// let mut iri = IriString::try_from("http://user:password@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "http://user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Even if the password is empty, the password and separator will be removed. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriString; + /// + /// let mut iri = IriString::try_from("http://user:@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "http://user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice().as_ref()) { + Some(v) => v, + None => return, + }; + let separator_colon = pw_range.start - 1; + // SAFETY: the IRI must still be valid after the password component and + // the leading separator colon is removed. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(separator_colon..pw_range.end); + debug_assert!( + RiStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component is removed" + ); + } + } + + /// Replaces the non-empty password in `self` to the empty password. + /// + /// This leaves the separator colon if the password part was available. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriString; + /// + /// let mut iri = IriString::try_from("http://user:password@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "http://user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the password is empty, it is left as is. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriString; + /// + /// let mut iri = IriString::try_from("http://user:@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "http://user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_nonempty_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice().as_ref()) { + Some(v) if !v.is_empty() => v, + _ => return, + }; + debug_assert_eq!( + self.as_str().as_bytes().get(pw_range.start - 1).copied(), + Some(b':'), + "[validity] the password component must be prefixed with a separator colon" + ); + // SAFETY: the IRI must still be valid if the password is replaced with + // empty string. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(pw_range); + debug_assert!( + RiStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component is removed" + ); + } + } +} + +impl_trivial_conv_between_iri! { + from_slice: RiStr, + from_owned: RiString, + to_slice: RiReferenceStr, + to_owned: RiReferenceString, +} diff --git a/vendor/iri-string/src/types/generic/query.rs b/vendor/iri-string/src/types/generic/query.rs new file mode 100644 index 00000000..40057f23 --- /dev/null +++ b/vendor/iri-string/src/types/generic/query.rs @@ -0,0 +1,135 @@ +//! Query string. + +use crate::{ + spec::Spec, + validate::{query, Error}, +}; + +define_custom_string_slice! { + /// A borrowed slice of an IRI query (i.e. after the first `?` and before the first `#`). + /// + /// This corresponds to [`iquery` rule] in [RFC 3987] (and [`query` rule] in [RFC 3986]). + /// The rule for `ifragment` is `*( ipchar / iprivate / "/" / "?" )`. + /// + /// # Valid values + /// + /// This type can have an IRI fragment. + /// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`. + /// + /// ``` + /// # use iri_string::types::IriFragmentStr; + /// assert!(IriFragmentStr::new("").is_ok()); + /// assert!(IriFragmentStr::new("foo").is_ok()); + /// assert!(IriFragmentStr::new("foo/bar").is_ok()); + /// assert!(IriFragmentStr::new("/foo/bar").is_ok()); + /// assert!(IriFragmentStr::new("//foo/bar").is_ok()); + /// assert!(IriFragmentStr::new("https://user:pass@example.com:8080").is_ok()); + /// assert!(IriFragmentStr::new("https://example.com/").is_ok()); + /// ``` + /// + /// Some characters and sequences cannot used in a fragment. + /// + /// ``` + /// # use iri_string::types::IriFragmentStr; + /// // `<` and `>` cannot directly appear in an IRI reference. + /// assert!(IriFragmentStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI reference. + /// assert!(IriFragmentStr::new("%").is_err()); + /// assert!(IriFragmentStr::new("%GG").is_err()); + /// // Hash sign `#` cannot appear in an IRI fragment. + /// assert!(IriFragmentStr::new("#hash").is_err()); + /// ``` + /// ``` + /// use iri_string::types::IriQueryStr; + /// assert!(IriQueryStr::new("").is_ok()); + /// assert!(IriQueryStr::new("foo").is_ok()); + /// assert!(IriQueryStr::new("foo/bar").is_ok()); + /// assert!(IriQueryStr::new("/foo/bar").is_ok()); + /// assert!(IriQueryStr::new("//foo/bar").is_ok()); + /// assert!(IriQueryStr::new("https://user:pass@example.com:8080").is_ok()); + /// assert!(IriQueryStr::new("https://example.com/").is_ok()); + /// // Question sign `?` can appear in an IRI query. + /// assert!(IriQueryStr::new("query?again").is_ok()); + /// ``` + /// + /// Some characters and sequences cannot used in a query. + /// + /// ``` + /// use iri_string::types::IriQueryStr; + /// // `<` and `>` cannot directly appear in an IRI reference. + /// assert!(IriQueryStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI reference. + /// assert!(IriQueryStr::new("%").is_err()); + /// assert!(IriQueryStr::new("%GG").is_err()); + /// // Hash sign `#` cannot appear in an IRI query. + /// assert!(IriQueryStr::new("#hash").is_err()); + /// ``` + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`query` rule]: https://tools.ietf.org/html/rfc3986#section-3.4 + /// [`iquery` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + struct RiQueryStr { + validator = query, + expecting_msg = "IRI query string", + } +} + +#[cfg(feature = "alloc")] +define_custom_string_owned! { + /// An owned string of an IRI fragment (i.e. after the first `#` character). + /// + /// This corresponds to [`iquery` rule] in [RFC 3987] (and [`query` rule] in [RFC 3986]). + /// The rule for `absolute-IRI` is `*( ipchar / iprivate / "/" / "?" )`. + /// + /// For details, see the documentation for [`RiQueryStr`]. + /// + /// Enabled by `alloc` or `std` feature. + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`query` rule]: https://tools.ietf.org/html/rfc3986#section-3.4 + /// [`iquery` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`RiQueryStr`]: struct.RiQueryStr.html + struct RiQueryString { + validator = query, + slice = RiQueryStr, + expecting_msg = "IRI query string", + } +} + +impl<S: Spec> RiQueryStr<S> { + /// Creates a new `&RiQueryStr` from the query part prefixed by `?`. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::types::IriQueryStr; + /// assert!(IriQueryStr::from_prefixed("?").is_ok()); + /// assert!(IriQueryStr::from_prefixed("?foo").is_ok()); + /// assert!(IriQueryStr::from_prefixed("?foo/bar").is_ok()); + /// assert!(IriQueryStr::from_prefixed("?/foo/bar").is_ok()); + /// assert!(IriQueryStr::from_prefixed("?//foo/bar").is_ok()); + /// assert!(IriQueryStr::from_prefixed("?https://user:pass@example.com:8080").is_ok()); + /// assert!(IriQueryStr::from_prefixed("?https://example.com/").is_ok()); + /// // Question sign `?` can appear in an IRI query. + /// assert!(IriQueryStr::from_prefixed("?query?again").is_ok()); + /// + /// // `<` and `>` cannot directly appear in an IRI. + /// assert!(IriQueryStr::from_prefixed("?<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI. + /// assert!(IriQueryStr::new("?%").is_err()); + /// assert!(IriQueryStr::new("?%GG").is_err()); + /// // `?` prefix is expected. + /// assert!(IriQueryStr::from_prefixed("").is_err()); + /// assert!(IriQueryStr::from_prefixed("foo").is_err()); + /// // Hash sign `#` cannot appear in an IRI query. + /// assert!(IriQueryStr::from_prefixed("?#hash").is_err()); + /// ``` + pub fn from_prefixed(s: &str) -> Result<&Self, Error> { + if !s.starts_with('?') { + return Err(Error::new()); + } + TryFrom::try_from(&s[1..]) + } +} diff --git a/vendor/iri-string/src/types/generic/reference.rs b/vendor/iri-string/src/types/generic/reference.rs new file mode 100644 index 00000000..9ac62281 --- /dev/null +++ b/vendor/iri-string/src/types/generic/reference.rs @@ -0,0 +1,697 @@ +//! IRI reference. + +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +use crate::components::AuthorityComponents; +#[cfg(feature = "alloc")] +use crate::mask_password::password_range_to_hide; +use crate::mask_password::PasswordMasked; +use crate::normalize::Normalized; +use crate::parser::trusted as trusted_parser; +#[cfg(feature = "alloc")] +use crate::raw; +use crate::resolve::FixedBaseResolver; +use crate::spec::Spec; +use crate::types::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiRelativeStr, RiStr}; +#[cfg(feature = "alloc")] +use crate::types::{RiRelativeString, RiString}; +#[cfg(feature = "alloc")] +use crate::validate::iri; +use crate::validate::iri_reference; + +define_custom_string_slice! { + /// A borrowed string of an absolute IRI possibly with fragment part. + /// + /// This corresponds to [`IRI-reference` rule] in [RFC 3987] + /// (and [`URI-reference` rule] in [RFC 3986]). + /// The rule for `IRI-reference` is `IRI / irelative-ref`. + /// In other words, this is union of [`RiStr`] and [`RiRelativeStr`]. + /// + /// # Valid values + /// + /// This type can have an IRI reference (which can be absolute or relative). + /// + /// ``` + /// # use iri_string::types::IriReferenceStr; + /// assert!(IriReferenceStr::new("https://user:pass@example.com:8080").is_ok()); + /// assert!(IriReferenceStr::new("https://example.com/").is_ok()); + /// assert!(IriReferenceStr::new("https://example.com/foo?bar=baz").is_ok()); + /// assert!(IriReferenceStr::new("https://example.com/foo?bar=baz#qux").is_ok()); + /// assert!(IriReferenceStr::new("foo:bar").is_ok()); + /// assert!(IriReferenceStr::new("foo:").is_ok()); + /// // `foo://.../` below are all allowed. See the crate documentation for detail. + /// assert!(IriReferenceStr::new("foo:/").is_ok()); + /// assert!(IriReferenceStr::new("foo://").is_ok()); + /// assert!(IriReferenceStr::new("foo:///").is_ok()); + /// assert!(IriReferenceStr::new("foo:////").is_ok()); + /// assert!(IriReferenceStr::new("foo://///").is_ok()); + /// assert!(IriReferenceStr::new("foo/bar").is_ok()); + /// assert!(IriReferenceStr::new("/foo/bar").is_ok()); + /// assert!(IriReferenceStr::new("//foo/bar").is_ok()); + /// assert!(IriReferenceStr::new("#foo").is_ok()); + /// ``` + /// + /// Some characters and sequences cannot used in an IRI reference. + /// + /// ``` + /// # use iri_string::types::IriReferenceStr; + /// // `<` and `>` cannot directly appear in an IRI reference. + /// assert!(IriReferenceStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in an IRI reference. + /// assert!(IriReferenceStr::new("%").is_err()); + /// assert!(IriReferenceStr::new("%GG").is_err()); + /// ``` + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`IRI-reference` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`URI-reference` rule]: https://tools.ietf.org/html/rfc3986#section-4.1 + /// [`RiRelativeStr`]: struct.RiRelativeStr.html + /// [`RiStr`]: struct.RiStr.html + struct RiReferenceStr { + validator = iri_reference, + expecting_msg = "IRI reference string", + } +} + +#[cfg(feature = "alloc")] +define_custom_string_owned! { + /// An owned string of an absolute IRI possibly with fragment part. + /// + /// This corresponds to [`IRI-reference` rule] in [RFC 3987] + /// (and [`URI-reference` rule] in [RFC 3986]). + /// The rule for `IRI-reference` is `IRI / irelative-ref`. + /// In other words, this is union of [`RiString`] and [`RiRelativeString`]. + /// + /// For details, see the document for [`RiReferenceStr`]. + /// + /// Enabled by `alloc` or `std` feature. + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`IRI-reference` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`URI-reference` rule]: https://tools.ietf.org/html/rfc3986#section-4.1 + /// [`RiReferenceStr`]: struct.RiReferenceString.html + /// [`RiRelativeString`]: struct.RiRelativeString.html + /// [`RiString`]: struct.RiString.html + struct RiReferenceString { + validator = iri_reference, + slice = RiReferenceStr, + expecting_msg = "IRI reference string", + } +} + +impl<S: Spec> RiReferenceStr<S> { + /// Returns the string as [`&RiStr`][`RiStr`], if it is valid as an IRI. + /// + /// If it is not an IRI, then [`&RiRelativeStr`][`RiRelativeStr`] is returned as `Err(_)`. + /// + /// [`RiRelativeStr`]: struct.RiRelativeStr.html + /// [`RiStr`]: struct.RiStr.html + pub fn to_iri(&self) -> Result<&RiStr<S>, &RiRelativeStr<S>> { + // Check with `IRI` rule first, because the syntax rule for `IRI-reference` is + // `IRI / irelative-ref`. + // + // > Some productions are ambiguous. The "first-match-wins" (a.k.a. + // > "greedy") algorithm applies. For details, see [RFC3986]. + // > + // > --- <https://tools.ietf.org/html/rfc3987#section-2.2>. + + <&RiStr<S>>::try_from(self.as_str()).map_err(|_| { + // SAFETY: if an IRI reference is not an IRI, then it is a relative IRI. + // See the RFC 3987 syntax rule `IRI-reference = IRI / irelative-ref`. + unsafe { RiRelativeStr::new_maybe_unchecked(self.as_str()) } + }) + } + + /// Returns the string as [`&RiRelativeStr`][`RiRelativeStr`], if it is valid as an IRI. + /// + /// If it is not an IRI, then [`&RiStr`][`RiStr`] is returned as `Err(_)`. + /// + /// [`RiRelativeStr`]: struct.RiRelativeStr.html + /// [`RiStr`]: struct.RiStr.html + pub fn to_relative_iri(&self) -> Result<&RiRelativeStr<S>, &RiStr<S>> { + match self.to_iri() { + Ok(iri) => Err(iri), + Err(relative) => Ok(relative), + } + } + + /// Returns resolved IRI against the given base IRI. + /// + /// For IRI reference resolution output examples, see [RFC 3986 section 5.4]. + /// + /// If you are going to resolve multiple references against the common base, + /// consider using [`FixedBaseResolver`]. + /// + /// # Strictness + /// + /// The IRI parsers provided by this crate is strict (e.g. `http:g` is + /// always interpreted as a composition of the scheme `http` and the path + /// `g`), so backward compatible parsing and resolution are not provided. + /// About parser and resolver strictness, see [RFC 3986 section 5.4.2]: + /// + /// > Some parsers allow the scheme name to be present in a relative + /// > reference if it is the same as the base URI scheme. This is considered + /// > to be a loophole in prior specifications of partial URI + /// > [RFC1630](https://tools.ietf.org/html/rfc1630). Its use should be + /// > avoided but is allowed for backward compatibility. + /// > + /// > --- <https://tools.ietf.org/html/rfc3986#section-5.4.2> + /// + /// # Failures + /// + /// This method itself does not fail, but IRI resolution without WHATWG URL + /// Standard serialization can fail in some minor cases. + /// + /// To see examples of such unresolvable IRIs, visit the documentation + /// for [`normalize`][`crate::normalize`] module. + /// + /// [RFC 3986 section 5.4]: https://tools.ietf.org/html/rfc3986#section-5.4 + /// [RFC 3986 section 5.4.2]: https://tools.ietf.org/html/rfc3986#section-5.4.2 + pub fn resolve_against<'a>(&'a self, base: &'a RiAbsoluteStr<S>) -> Normalized<'a, RiStr<S>> { + FixedBaseResolver::new(base).resolve(self.as_ref()) + } + + /// Returns the proxy to the IRI with password masking feature. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://user:password@example.com/path?query")?; + /// let masked = iri.mask_password(); + /// assert_eq!(masked.to_dedicated_string(), "http://user:@example.com/path?query"); + /// + /// assert_eq!( + /// masked.replace_password("${password}").to_string(), + /// "http://user:${password}@example.com/path?query" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn mask_password(&self) -> PasswordMasked<'_, Self> { + PasswordMasked::new(self) + } +} + +/// Components getters. +impl<S: Spec> RiReferenceStr<S> { + /// Returns the scheme. + /// + /// The following colon is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.scheme_str(), Some("http")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("foo/bar:baz")?; + /// assert_eq!(iri.scheme_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn scheme_str(&self) -> Option<&str> { + trusted_parser::extract_scheme(self.as_str()) + } + + /// Returns the authority. + /// + /// The leading `//` is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.authority_str(), Some("example.com")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("foo/bar:baz")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_str(&self) -> Option<&str> { + trusted_parser::extract_authority(self.as_str()) + } + + /// Returns the path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.path_str(), "/pathpath"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.path_str(), "uuid:10db315b-fcd1-4428-aca8-15babc9a2da2"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("foo/bar:baz")?; + /// assert_eq!(iri.path_str(), "foo/bar:baz"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn path_str(&self) -> &str { + trusted_parser::extract_path(self.as_str()) + } + + /// Returns the query. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::{IriQueryStr, IriReferenceStr}; + /// + /// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// let query = IriQueryStr::new("queryquery")?; + /// assert_eq!(iri.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.query(), None); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::{IriQueryStr, IriReferenceStr}; + /// + /// let iri = IriReferenceStr::new("foo/bar:baz?")?; + /// let query = IriQueryStr::new("")?; + /// assert_eq!(iri.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query(&self) -> Option<&RiQueryStr<S>> { + trusted_parser::extract_query(self.as_str()).map(|query| { + // SAFETY: `extract_query` returns the query part of an IRI, and the + // returned string should have only valid characters since is the + // substring of the source IRI. + unsafe { RiQueryStr::new_maybe_unchecked(query) } + }) + } + + /// Returns the query as a raw string slice. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.query_str(), Some("queryquery")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?; + /// assert_eq!(iri.query_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("foo/bar:baz?")?; + /// assert_eq!(iri.query_str(), Some("")); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query_str(&self) -> Option<&str> { + trusted_parser::extract_query(self.as_str()) + } + + /// Returns the fragment part if exists. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// If the IRI has a fragment part, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error}; + /// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#corge")?; + /// let fragment = IriFragmentStr::new("corge")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error}; + /// let iri = IriReferenceStr::new("#foo")?; + /// let fragment = IriFragmentStr::new("foo")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// When the fragment part exists but is empty string, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error}; + /// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#")?; + /// let fragment = IriFragmentStr::new("")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error}; + /// let iri = IriReferenceStr::new("#")?; + /// let fragment = IriFragmentStr::new("")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the IRI has no fragment, `None` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn fragment(&self) -> Option<&RiFragmentStr<S>> { + trusted_parser::extract_fragment(self.as_str()).map(|fragment| { + // SAFETY: `extract_fragment` returns the fragment part of an IRI, + // and the returned string should have only valid characters since + // is the substring of the source IRI. + unsafe { RiFragmentStr::new_maybe_unchecked(fragment) } + }) + } + + /// Returns the fragment part as a raw string slice if exists. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// If the IRI has a fragment part, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#corge")?; + /// assert_eq!(iri.fragment_str(), Some("corge")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("#foo")?; + /// assert_eq!(iri.fragment_str(), Some("foo")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// When the fragment part exists but is empty string, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#")?; + /// assert_eq!(iri.fragment_str(), Some("")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("#")?; + /// assert_eq!(iri.fragment_str(), Some("")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the IRI has no fragment, `None` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error}; + /// let iri = IriReferenceStr::new("")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn fragment_str(&self) -> Option<&str> { + trusted_parser::extract_fragment(self.as_str()) + } + + /// Returns the authority components. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("http://user:pass@example.com:8080/pathpath?queryquery")?; + /// let authority = iri.authority_components() + /// .expect("authority is available"); + /// assert_eq!(authority.userinfo(), Some("user:pass")); + /// assert_eq!(authority.host(), "example.com"); + /// assert_eq!(authority.port(), Some("8080")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriReferenceStr; + /// + /// let iri = IriReferenceStr::new("foo//bar:baz")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> { + AuthorityComponents::from_iri(self) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> RiReferenceString<S> { + /// Returns the string as [`RiString`], if it is valid as an IRI. + /// + /// If it is not an IRI, then [`RiRelativeString`] is returned as `Err(_)`. + /// + /// [`RiRelativeString`]: struct.RiRelativeString.html + /// [`RiString`]: struct.RiString.html + pub fn into_iri(self) -> Result<RiString<S>, RiRelativeString<S>> { + let s: String = self.into(); + // Check with `IRI` rule first, because of the syntax. + // + // > Some productions are ambiguous. The "first-match-wins" (a.k.a. + // > "greedy") algorithm applies. For details, see [RFC3986]. + // > + // > --- <https://tools.ietf.org/html/rfc3987#section-2.2>. + if iri::<S>(&s).is_ok() { + // SAFETY: just checked `s` is valid as an IRI. + Ok(unsafe { RiString::new_always_unchecked(s) }) + } else { + // SAFETY: if an IRI reference is not an IRI, then it is a relative IRI. + // See the RFC 3987 syntax rule `IRI-reference = IRI / irelative-ref`. + Err(unsafe { RiRelativeString::new_maybe_unchecked(s) }) + } + } + + /// Returns the string as [`RiRelativeString`], if it is valid as an IRI. + /// + /// If it is not an IRI, then [`RiString`] is returned as `Err(_)`. + /// + /// [`RiRelativeString`]: struct.RiRelativeString.html + /// [`RiString`]: struct.RiString.html + pub fn into_relative_iri(self) -> Result<RiRelativeString<S>, RiString<S>> { + match self.into_iri() { + Ok(iri) => Err(iri), + Err(relative) => Ok(relative), + } + } + + /// Sets the fragment part to the given string. + /// + /// Removes fragment part (and following `#` character) if `None` is given. + pub fn set_fragment(&mut self, fragment: Option<&RiFragmentStr<S>>) { + raw::set_fragment(&mut self.inner, fragment.map(AsRef::as_ref)); + debug_assert!(iri_reference::<S>(&self.inner).is_ok()); + } + + /// Removes the password completely (including separator colon) from `self` even if it is empty. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriReferenceString; + /// + /// let mut iri = IriReferenceString::try_from("http://user:password@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "http://user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Even if the password is empty, the password and separator will be removed. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriReferenceString; + /// + /// let mut iri = IriReferenceString::try_from("http://user:@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "http://user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice()) { + Some(v) => v, + None => return, + }; + let separator_colon = pw_range.start - 1; + // SAFETY: the IRI must be valid after the password component and + // the leading separator colon is removed. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(separator_colon..pw_range.end); + debug_assert!( + RiReferenceStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component is removed" + ); + } + } + + /// Replaces the non-empty password in `self` to the empty password. + /// + /// This leaves the separator colon if the password part was available. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriReferenceString; + /// + /// let mut iri = IriReferenceString::try_from("http://user:password@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "http://user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the password is empty, it is left as is. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriReferenceString; + /// + /// let mut iri = IriReferenceString::try_from("http://user:@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "http://user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_nonempty_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice()) { + Some(v) if !v.is_empty() => v, + _ => return, + }; + debug_assert_eq!( + self.as_str().as_bytes().get(pw_range.start - 1).copied(), + Some(b':'), + "[validity] the password component must be prefixed with a separator colon" + ); + // SAFETY: the IRI must be valid after the password component is + // replaced with the empty password. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(pw_range); + debug_assert!( + RiReferenceStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component \ + is replaced with the empty password" + ); + } + } +} diff --git a/vendor/iri-string/src/types/generic/relative.rs b/vendor/iri-string/src/types/generic/relative.rs new file mode 100644 index 00000000..2c1618b5 --- /dev/null +++ b/vendor/iri-string/src/types/generic/relative.rs @@ -0,0 +1,571 @@ +//! Relative IRI reference. + +use crate::components::AuthorityComponents; +#[cfg(feature = "alloc")] +use crate::mask_password::password_range_to_hide; +use crate::mask_password::PasswordMasked; +use crate::normalize::Normalized; +use crate::parser::trusted as trusted_parser; +#[cfg(feature = "alloc")] +use crate::raw; +use crate::resolve::FixedBaseResolver; +use crate::spec::Spec; +#[cfg(feature = "alloc")] +use crate::types::RiReferenceString; +use crate::types::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiStr}; +use crate::validate::relative_ref; + +define_custom_string_slice! { + /// A borrowed slice of a relative IRI reference. + /// + /// This corresponds to [`irelative-ref` rule] in [RFC 3987] + /// (and [`relative-ref` rule] in [RFC 3986]). + /// The rule for `irelative-ref` is `irelative-part [ "?" iquery ] [ "#" ifragment ]`. + /// + /// # Valid values + /// + /// This type can have a relative IRI reference. + /// + /// ``` + /// # use iri_string::types::IriRelativeStr; + /// assert!(IriRelativeStr::new("foo").is_ok()); + /// assert!(IriRelativeStr::new("foo/bar").is_ok()); + /// assert!(IriRelativeStr::new("/foo").is_ok()); + /// assert!(IriRelativeStr::new("//foo/bar").is_ok()); + /// assert!(IriRelativeStr::new("?foo").is_ok()); + /// assert!(IriRelativeStr::new("#foo").is_ok()); + /// assert!(IriRelativeStr::new("foo/bar?baz#qux").is_ok()); + /// // The first path component can have colon if the path is absolute. + /// assert!(IriRelativeStr::new("/foo:bar/").is_ok()); + /// // Second or following path components can have colon. + /// assert!(IriRelativeStr::new("foo/bar://baz/").is_ok()); + /// assert!(IriRelativeStr::new("./foo://bar").is_ok()); + /// ``` + /// + /// Absolute form of a reference is not allowed. + /// + /// ``` + /// # use iri_string::types::IriRelativeStr; + /// assert!(IriRelativeStr::new("https://example.com/").is_err()); + /// // The first path component cannot have colon, if the path is not absolute. + /// assert!(IriRelativeStr::new("foo:bar").is_err()); + /// assert!(IriRelativeStr::new("foo:").is_err()); + /// assert!(IriRelativeStr::new("foo:/").is_err()); + /// assert!(IriRelativeStr::new("foo://").is_err()); + /// assert!(IriRelativeStr::new("foo:///").is_err()); + /// assert!(IriRelativeStr::new("foo:////").is_err()); + /// assert!(IriRelativeStr::new("foo://///").is_err()); + /// ``` + /// + /// Some characters and sequences cannot used in an IRI reference. + /// + /// ``` + /// # use iri_string::types::IriRelativeStr; + /// // `<` and `>` cannot directly appear in a relative IRI reference. + /// assert!(IriRelativeStr::new("<not allowed>").is_err()); + /// // Broken percent encoding cannot appear in a relative IRI reference. + /// assert!(IriRelativeStr::new("%").is_err()); + /// assert!(IriRelativeStr::new("%GG").is_err()); + /// ``` + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`irelative-ref` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`relative-ref` rule]: https://tools.ietf.org/html/rfc3986#section-4.2 + struct RiRelativeStr { + validator = relative_ref, + expecting_msg = "Relative IRI reference string", + } +} + +#[cfg(feature = "alloc")] +define_custom_string_owned! { + /// An owned string of a relative IRI reference. + /// + /// This corresponds to [`irelative-ref` rule] in [RFC 3987] + /// (and [`relative-ref` rule] in [RFC 3986]). + /// The rule for `irelative-ref` is `irelative-part [ "?" iquery ] [ "#" ifragment ]`. + /// + /// For details, see the document for [`RiRelativeStr`]. + /// + /// Enabled by `alloc` or `std` feature. + /// + /// [RFC 3986]: https://tools.ietf.org/html/rfc3986 + /// [RFC 3987]: https://tools.ietf.org/html/rfc3987 + /// [`irelative-ref` rule]: https://tools.ietf.org/html/rfc3987#section-2.2 + /// [`relative-ref` rule]: https://tools.ietf.org/html/rfc3986#section-4.2 + /// [`RiRelativeString`]: struct.RiRelativeString.html + struct RiRelativeString { + validator = relative_ref, + slice = RiRelativeStr, + expecting_msg = "Relative IRI reference string", + } +} + +impl<S: Spec> RiRelativeStr<S> { + /// Returns resolved IRI against the given base IRI. + /// + /// For IRI reference resolution output examples, see [RFC 3986 section 5.4]. + /// + /// If you are going to resolve multiple references against the common base, + /// consider using [`FixedBaseResolver`]. + /// + /// # Strictness + /// + /// The IRI parsers provided by this crate is strict (e.g. `http:g` is + /// always interpreted as a composition of the scheme `http` and the path + /// `g`), so backward compatible parsing and resolution are not provided. + /// About parser and resolver strictness, see [RFC 3986 section 5.4.2]: + /// + /// > Some parsers allow the scheme name to be present in a relative + /// > reference if it is the same as the base URI scheme. This is considered + /// > to be a loophole in prior specifications of partial URI + /// > [RFC1630](https://tools.ietf.org/html/rfc1630). Its use should be + /// > avoided but is allowed for backward compatibility. + /// > + /// > --- <https://tools.ietf.org/html/rfc3986#section-5.4.2> + /// + /// # Failures + /// + /// This method itself does not fail, but IRI resolution without WHATWG URL + /// Standard serialization can fail in some minor cases. + /// + /// To see examples of such unresolvable IRIs, visit the documentation + /// for [`normalize`][`crate::normalize`] module. + /// + /// [RFC 3986 section 5.4]: https://tools.ietf.org/html/rfc3986#section-5.4 + /// [RFC 3986 section 5.4.2]: https://tools.ietf.org/html/rfc3986#section-5.4.2 + pub fn resolve_against<'a>(&'a self, base: &'a RiAbsoluteStr<S>) -> Normalized<'a, RiStr<S>> { + FixedBaseResolver::new(base).resolve(self.as_ref()) + } + + /// Returns the proxy to the IRI with password masking feature. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::format::ToDedicatedString; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("//user:password@example.com/path?query")?; + /// let masked = iri.mask_password(); + /// assert_eq!(masked.to_dedicated_string(), "//user:@example.com/path?query"); + /// + /// assert_eq!( + /// masked.replace_password("${password}").to_string(), + /// "//user:${password}@example.com/path?query" + /// ); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn mask_password(&self) -> PasswordMasked<'_, Self> { + PasswordMasked::new(self) + } +} + +/// Components getters. +impl<S: Spec> RiRelativeStr<S> { + /// Returns the authority. + /// + /// The leading `//` is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.authority_str(), Some("example.com")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("foo//bar:baz")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_str(&self) -> Option<&str> { + trusted_parser::extract_authority_relative(self.as_str()) + } + + /// Returns the path. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.path_str(), "/pathpath"); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("foo//bar:baz")?; + /// assert_eq!(iri.path_str(), "foo//bar:baz"); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn path_str(&self) -> &str { + trusted_parser::extract_path_relative(self.as_str()) + } + + /// Returns the query. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::{IriQueryStr, IriRelativeStr}; + /// + /// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?; + /// let query = IriQueryStr::new("queryquery")?; + /// assert_eq!(iri.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::{IriQueryStr, IriRelativeStr}; + /// + /// let iri = IriRelativeStr::new("foo//bar:baz?")?; + /// let query = IriQueryStr::new("")?; + /// assert_eq!(iri.query(), Some(query)); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query(&self) -> Option<&RiQueryStr<S>> { + trusted_parser::extract_query(self.as_str()).map(|query| { + // SAFETY: `extract_query` returns the query part of an IRI, and the + // returned string should have only valid characters since is the + // substring of the source IRI. + unsafe { RiQueryStr::new_maybe_unchecked(query) } + }) + } + + /// Returns the query in a raw string slice. + /// + /// The leading question mark (`?`) is truncated. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?; + /// assert_eq!(iri.query_str(), Some("queryquery")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("foo//bar:baz?")?; + /// assert_eq!(iri.query_str(), Some("")); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn query_str(&self) -> Option<&str> { + trusted_parser::extract_query(self.as_str()) + } + + /// Returns the fragment part if exists. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// If the IRI has a fragment part, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriRelativeStr}, validate::Error}; + /// let iri = IriRelativeStr::new("?foo#bar")?; + /// let fragment = IriFragmentStr::new("bar")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriRelativeStr}, validate::Error}; + /// let iri = IriRelativeStr::new("#foo")?; + /// let fragment = IriFragmentStr::new("foo")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// When the fragment part exists but is empty string, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriRelativeStr}, validate::Error}; + /// let iri = IriRelativeStr::new("#")?; + /// let fragment = IriFragmentStr::new("")?; + /// assert_eq!(iri.fragment(), Some(fragment)); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the IRI has no fragment, `None` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error}; + /// let iri = IriRelativeStr::new("")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn fragment(&self) -> Option<&RiFragmentStr<S>> { + AsRef::<RiReferenceStr<S>>::as_ref(self).fragment() + } + + /// Returns the fragment part as a raw string slice if exists. + /// + /// A leading `#` character is truncated if the fragment part exists. + /// + /// # Examples + /// + /// If the IRI has a fragment part, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error}; + /// let iri = IriRelativeStr::new("?foo#bar")?; + /// assert_eq!(iri.fragment_str(), Some("bar")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error}; + /// let iri = IriRelativeStr::new("#foo")?; + /// assert_eq!(iri.fragment_str(), Some("foo")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// When the fragment part exists but is empty string, `Some(_)` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error}; + /// let iri = IriRelativeStr::new("#")?; + /// assert_eq!(iri.fragment_str(), Some("")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the IRI has no fragment, `None` is returned. + /// + /// ``` + /// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error}; + /// let iri = IriRelativeStr::new("")?; + /// assert_eq!(iri.fragment(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn fragment_str(&self) -> Option<&str> { + AsRef::<RiReferenceStr<S>>::as_ref(self).fragment_str() + } + + /// Returns the authority components. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("//user:pass@example.com:8080/pathpath?queryquery")?; + /// let authority = iri.authority_components() + /// .expect("authority is available"); + /// assert_eq!(authority.userinfo(), Some("user:pass")); + /// assert_eq!(authority.host(), "example.com"); + /// assert_eq!(authority.port(), Some("8080")); + /// # Ok::<_, Error>(()) + /// ``` + /// + /// ``` + /// # use iri_string::validate::Error; + /// use iri_string::types::IriRelativeStr; + /// + /// let iri = IriRelativeStr::new("foo//bar:baz")?; + /// assert_eq!(iri.authority_str(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> { + AuthorityComponents::from_iri(self.as_ref()) + } +} + +#[cfg(feature = "alloc")] +impl<S: Spec> RiRelativeString<S> { + /// Sets the fragment part to the given string. + /// + /// Removes fragment part (and following `#` character) if `None` is given. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::{IriFragmentStr, IriRelativeString}; + /// + /// let mut iri = IriRelativeString::try_from("//user:password@example.com/path?query#frag.old")?; + /// assert_eq!(iri.fragment_str(), Some("frag.old")); + /// + /// iri.set_fragment(None); + /// assert_eq!(iri.fragment(), None); + /// + /// let frag_new = IriFragmentStr::new("frag-new")?; + /// iri.set_fragment(Some(frag_new)); + /// assert_eq!(iri.fragment_str(), Some("frag-new")); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Fragment can be empty, and it is distinguished from the absense of a fragment. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriRelativeString; + /// + /// let mut iri = IriRelativeString::try_from("/path#")?; + /// assert_eq!(iri, "/path#"); + /// assert_eq!(iri.fragment_str(), Some(""), "Fragment is present and empty"); + /// + /// iri.set_fragment(None); + /// assert_eq!(iri, "/path", "Note that # is now removed"); + /// assert_eq!(iri.fragment_str(), None, "Fragment is absent"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn set_fragment(&mut self, fragment: Option<&RiFragmentStr<S>>) { + raw::set_fragment(&mut self.inner, fragment.map(AsRef::as_ref)); + debug_assert!(relative_ref::<S>(&self.inner).is_ok()); + } + + /// Removes the password completely (including separator colon) from `self` even if it is empty. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriRelativeString; + /// + /// let mut iri = IriRelativeString::try_from("//user:password@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "//user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// Even if the password is empty, the password and separator will be removed. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriRelativeString; + /// + /// let mut iri = IriRelativeString::try_from("//user:@example.com/path?query")?; + /// iri.remove_password_inline(); + /// assert_eq!(iri, "//user@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice().as_ref()) { + Some(v) => v, + None => return, + }; + let separator_colon = pw_range.start - 1; + // SAFETY: removing password component and the leading colon preserves + // the IRI still syntactically valid. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(separator_colon..pw_range.end); + debug_assert!( + RiRelativeStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component is removed" + ); + } + } + + /// Replaces the non-empty password in `self` to the empty password. + /// + /// This leaves the separator colon if the password part was available. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriRelativeString; + /// + /// let mut iri = IriRelativeString::try_from("//user:password@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "//user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + /// + /// If the password is empty, it is left as is. + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + /// use iri_string::types::IriRelativeString; + /// + /// let mut iri = IriRelativeString::try_from("//user:@example.com/path?query")?; + /// iri.remove_nonempty_password_inline(); + /// assert_eq!(iri, "//user:@example.com/path?query"); + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn remove_nonempty_password_inline(&mut self) { + let pw_range = match password_range_to_hide(self.as_slice().as_ref()) { + Some(v) if !v.is_empty() => v, + _ => return, + }; + debug_assert_eq!( + self.as_str().as_bytes().get(pw_range.start - 1).copied(), + Some(b':'), + "[validity] the password component must be prefixed with a separator colon" + ); + // SAFETY: the IRI must be valid after the password component is + // replaced with the empty password. + unsafe { + let buf = self.as_inner_mut(); + buf.drain(pw_range); + debug_assert!( + RiRelativeStr::<S>::new(buf).is_ok(), + "[validity] the IRI must be valid after the password component \ + is replaced with the empty password" + ); + } + } +} + +impl_trivial_conv_between_iri! { + from_slice: RiRelativeStr, + from_owned: RiRelativeString, + to_slice: RiReferenceStr, + to_owned: RiReferenceString, +} diff --git a/vendor/iri-string/src/types/iri.rs b/vendor/iri-string/src/types/iri.rs new file mode 100644 index 00000000..f89be7c7 --- /dev/null +++ b/vendor/iri-string/src/types/iri.rs @@ -0,0 +1,382 @@ +//! IRI-specific implementations. + +#[cfg(feature = "alloc")] +use alloc::collections::TryReserveError; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::String; + +#[cfg(feature = "alloc")] +use crate::convert::try_percent_encode_iri_inline; +use crate::convert::MappedToUri; +use crate::spec::IriSpec; +use crate::types::{ + RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr, +}; +#[cfg(feature = "alloc")] +use crate::types::{ + RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, RiRelativeString, + RiString, +}; +use crate::types::{ + UriAbsoluteStr, UriFragmentStr, UriQueryStr, UriReferenceStr, UriRelativeStr, UriStr, +}; +#[cfg(feature = "alloc")] +use crate::types::{ + UriAbsoluteString, UriFragmentString, UriQueryString, UriReferenceString, UriRelativeString, + UriString, +}; + +/// A type alias for [`RiAbsoluteStr`]`<`[`IriSpec`]`>`. +pub type IriAbsoluteStr = RiAbsoluteStr<IriSpec>; + +/// A type alias for [`RiAbsoluteString`]`<`[`IriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type IriAbsoluteString = RiAbsoluteString<IriSpec>; + +/// A type alias for [`RiFragmentStr`]`<`[`IriSpec`]`>`. +pub type IriFragmentStr = RiFragmentStr<IriSpec>; + +/// A type alias for [`RiFragmentString`]`<`[`IriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type IriFragmentString = RiFragmentString<IriSpec>; + +/// A type alias for [`RiStr`]`<`[`IriSpec`]`>`. +pub type IriStr = RiStr<IriSpec>; + +/// A type alias for [`RiString`]`<`[`IriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type IriString = RiString<IriSpec>; + +/// A type alias for [`RiReferenceStr`]`<`[`IriSpec`]`>`. +pub type IriReferenceStr = RiReferenceStr<IriSpec>; + +/// A type alias for [`RiReferenceString`]`<`[`IriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type IriReferenceString = RiReferenceString<IriSpec>; + +/// A type alias for [`RiRelativeStr`]`<`[`IriSpec`]`>`. +pub type IriRelativeStr = RiRelativeStr<IriSpec>; + +/// A type alias for [`RiRelativeString`]`<`[`IriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type IriRelativeString = RiRelativeString<IriSpec>; + +/// A type alias for [`RiQueryStr`]`<`[`IriSpec`]`>`. +pub type IriQueryStr = RiQueryStr<IriSpec>; + +/// A type alias for [`RiQueryString`]`<`[`IriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type IriQueryString = RiQueryString<IriSpec>; + +/// Implements the conversion from an IRI into a URI. +macro_rules! impl_conversion_between_uri { + ( + $ty_owned_iri:ident, + $ty_owned_uri:ident, + $ty_borrowed_iri:ident, + $ty_borrowed_uri:ident, + $example_iri:expr, + $example_uri:expr + ) => { + /// Conversion from an IRI into a URI. + impl $ty_borrowed_iri { + /// Percent-encodes the IRI into a valid URI that identifies the equivalent resource. + /// + /// If you need more precise control over memory allocation and buffer + /// handling, use [`MappedToUri`][`crate::convert::MappedToUri`] type. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// # #[cfg(feature = "alloc")] { + #[doc = concat!("use iri_string::format::ToDedicatedString;")] + #[doc = concat!("use iri_string::types::{", stringify!($ty_borrowed_iri), ", ", stringify!($ty_owned_uri), "};")] + /// + #[doc = concat!("let iri = ", stringify!($ty_borrowed_iri), "::new(", stringify!($example_iri), ")?;")] + /// // Type annotation here is not necessary. + #[doc = concat!("let uri: ", stringify!($ty_owned_uri), " = iri.encode_to_uri().to_dedicated_string();")] + #[doc = concat!("assert_eq!(uri, ", stringify!($example_uri), ");")] + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn encode_to_uri(&self) -> MappedToUri<'_, Self> { + MappedToUri::from(self) + } + + /// Converts an IRI into a URI without modification, if possible. + /// + /// This is semantically equivalent to + #[doc = concat!("`", stringify!($ty_borrowed_uri), "::new(self.as_str()).ok()`.")] + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + #[doc = concat!("use iri_string::types::{", stringify!($ty_borrowed_iri), ", ", stringify!($ty_borrowed_uri), "};")] + /// + #[doc = concat!("let ascii_iri = ", stringify!($ty_borrowed_iri), "::new(", stringify!($example_uri), ")?;")] + /// assert_eq!( + /// ascii_iri.as_uri().map(AsRef::as_ref), + #[doc = concat!(" Some(", stringify!($example_uri), ")")] + /// ); + /// + #[doc = concat!("let nonascii_iri = ", stringify!($ty_borrowed_iri), "::new(", stringify!($example_iri), ")?;")] + /// assert_eq!(nonascii_iri.as_uri(), None); + /// # Ok::<_, Error>(()) + /// ``` + #[must_use] + pub fn as_uri(&self) -> Option<&$ty_borrowed_uri> { + if !self.as_str().is_ascii() { + return None; + } + debug_assert!( + <$ty_borrowed_uri>::new(self.as_str()).is_ok(), + "[consistency] the ASCII-only IRI must also be a valid URI" + ); + // SAFETY: An ASCII-only IRI is a URI. + // URI (by `UriSpec`) is a subset of IRI (by `IriSpec`), + // and the difference is that URIs can only have ASCII characters. + let uri = unsafe { <$ty_borrowed_uri>::new_maybe_unchecked(self.as_str()) }; + Some(uri) + } + } + + /// Conversion from an IRI into a URI. + #[cfg(feature = "alloc")] + impl $ty_owned_iri { + /// Percent-encodes the IRI into a valid URI that identifies the equivalent resource. + /// + /// After the encode, the IRI is also a valid URI. + /// + /// If you want a new URI string rather than modifying the IRI + /// string, or if you need more precise control over memory + /// allocation and buffer handling, use + #[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")] + /// method. + /// + /// # Panics + /// + /// Panics if the memory allocation failed. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// #[cfg(feature = "alloc")] { + #[doc = concat!("use iri_string::types::", stringify!($ty_owned_iri), ";")] + /// + #[doc = concat!("let mut iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")] + /// iri.encode_to_uri_inline(); + #[doc = concat!("assert_eq!(iri, ", stringify!($example_uri), ");")] + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn encode_to_uri_inline(&mut self) { + self.try_encode_to_uri_inline() + .expect("failed to allocate memory"); + } + + /// Percent-encodes the IRI into a valid URI that identifies the equivalent resource. + /// + /// After the encode, the IRI is also a valid URI. + /// + /// If you want a new URI string rather than modifying the IRI + /// string, or if you need more precise control over memory + /// allocation and buffer handling, use + #[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")] + /// method. + /// + // TODO: This seems true as of this writing, but is this guaranteed? See + // <https://users.rust-lang.org/t/does-try-reserve-guarantees-that-the-content-is-preserved-on-allocation-failure/77446>. + // /// If the memory allocation failed, the content is preserved without modification. + // /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// #[cfg(feature = "alloc")] { + #[doc = concat!("use iri_string::types::", stringify!($ty_owned_iri), ";")] + /// + #[doc = concat!("let mut iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")] + /// iri.try_encode_to_uri_inline() + /// .expect("failed to allocate memory"); + #[doc = concat!("assert_eq!(iri, ", stringify!($example_uri), ");")] + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + pub fn try_encode_to_uri_inline(&mut self) -> Result<(), TryReserveError> { + // SAFETY: IRI is valid after it is encoded to URI (by percent encoding). + unsafe { + let buf = self.as_inner_mut(); + try_percent_encode_iri_inline(buf)?; + } + debug_assert!( + <$ty_borrowed_iri>::new(self.as_str()).is_ok(), + "[consistency] the content must be valid at any time" + ); + Ok(()) + } + + /// Percent-encodes the IRI into a valid URI that identifies the equivalent resource. + /// + /// If you want a new URI string rather than modifying the IRI + /// string, or if you need more precise control over memory + /// allocation and buffer handling, use + #[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")] + /// method. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// #[cfg(feature = "alloc")] { + #[doc = concat!("use iri_string::types::{", stringify!($ty_owned_iri), ", ", stringify!($ty_owned_uri), "};")] + /// + #[doc = concat!("let iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")] + /// // Type annotation here is not necessary. + #[doc = concat!("let uri: ", stringify!($ty_owned_uri), " = iri.encode_into_uri();")] + #[doc = concat!("assert_eq!(uri, ", stringify!($example_uri), ");")] + /// # } + /// # Ok::<_, Error>(()) + /// ``` + #[inline] + #[must_use] + pub fn encode_into_uri(self) -> $ty_owned_uri { + self.try_encode_into_uri() + .expect("failed to allocate memory") + } + + /// Percent-encodes the IRI into a valid URI that identifies the equivalent resource. + /// + /// If you want a new URI string rather than modifying the IRI + /// string, or if you need more precise control over memory + /// allocation and buffer handling, use + #[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")] + /// method. + /// + // TODO: This seems true as of this writing, but is this guaranteed? See + // <https://users.rust-lang.org/t/does-try-reserve-guarantees-that-the-content-is-preserved-on-allocation-failure/77446>. + // /// If the memory allocation failed, the content is preserved without modification. + // /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + /// #[cfg(feature = "alloc")] { + #[doc = concat!("use iri_string::types::{", stringify!($ty_owned_iri), ", ", stringify!($ty_owned_uri), "};")] + /// + #[doc = concat!("let iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")] + /// // Type annotation here is not necessary. + #[doc = concat!("let uri: ", stringify!($ty_owned_uri), " = iri.try_encode_into_uri()")] + /// .expect("failed to allocate memory"); + #[doc = concat!("assert_eq!(uri, ", stringify!($example_uri), ");")] + /// # } + /// # Ok::<_, Error>(()) + /// ``` + pub fn try_encode_into_uri(mut self) -> Result<$ty_owned_uri, TryReserveError> { + self.try_encode_to_uri_inline()?; + let s: String = self.into(); + debug_assert!( + <$ty_borrowed_uri>::new(s.as_str()).is_ok(), + "[consistency] the encoded IRI must also be a valid URI" + ); + // SAFETY: An ASCII-only IRI is a URI. + // URI (by `UriSpec`) is a subset of IRI (by `IriSpec`), + // and the difference is that URIs can only have ASCII characters. + let uri = unsafe { <$ty_owned_uri>::new_maybe_unchecked(s) }; + Ok(uri) + } + + /// Converts an IRI into a URI without modification, if possible. + /// + /// # Examples + /// + /// ``` + /// # use iri_string::validate::Error; + #[doc = concat!("use iri_string::types::{", stringify!($ty_owned_iri), ", ", stringify!($ty_owned_uri), "};")] + /// + #[doc = concat!("let ascii_iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_uri), ")?;")] + /// assert_eq!( + /// ascii_iri.try_into_uri().map(|uri| uri.to_string()), + #[doc = concat!(" Ok(", stringify!($example_uri), ".to_string())")] + /// ); + /// + #[doc = concat!("let nonascii_iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")] + /// assert_eq!( + /// nonascii_iri.try_into_uri().map_err(|iri| iri.to_string()), + #[doc = concat!(" Err(", stringify!($example_iri), ".to_string())")] + /// ); + /// # Ok::<_, Error>(()) + /// ``` + pub fn try_into_uri(self) -> Result<$ty_owned_uri, $ty_owned_iri> { + if !self.as_str().is_ascii() { + return Err(self); + } + let s: String = self.into(); + debug_assert!( + <$ty_borrowed_uri>::new(s.as_str()).is_ok(), + "[consistency] the ASCII-only IRI must also be a valid URI" + ); + // SAFETY: An ASCII-only IRI is a URI. + // URI (by `UriSpec`) is a subset of IRI (by `IriSpec`), + // and the difference is that URIs can only have ASCII characters. + let uri = unsafe { <$ty_owned_uri>::new_maybe_unchecked(s) }; + Ok(uri) + } + } + }; +} + +impl_conversion_between_uri!( + IriAbsoluteString, + UriAbsoluteString, + IriAbsoluteStr, + UriAbsoluteStr, + "http://example.com/?alpha=\u{03B1}", + "http://example.com/?alpha=%CE%B1" +); +impl_conversion_between_uri!( + IriReferenceString, + UriReferenceString, + IriReferenceStr, + UriReferenceStr, + "http://example.com/?alpha=\u{03B1}", + "http://example.com/?alpha=%CE%B1" +); +impl_conversion_between_uri!( + IriRelativeString, + UriRelativeString, + IriRelativeStr, + UriRelativeStr, + "../?alpha=\u{03B1}", + "../?alpha=%CE%B1" +); +impl_conversion_between_uri!( + IriString, + UriString, + IriStr, + UriStr, + "http://example.com/?alpha=\u{03B1}", + "http://example.com/?alpha=%CE%B1" +); +impl_conversion_between_uri!( + IriQueryString, + UriQueryString, + IriQueryStr, + UriQueryStr, + "alpha-is-\u{03B1}", + "alpha-is-%CE%B1" +); +impl_conversion_between_uri!( + IriFragmentString, + UriFragmentString, + IriFragmentStr, + UriFragmentStr, + "alpha-is-\u{03B1}", + "alpha-is-%CE%B1" +); diff --git a/vendor/iri-string/src/types/uri.rs b/vendor/iri-string/src/types/uri.rs new file mode 100644 index 00000000..682a971c --- /dev/null +++ b/vendor/iri-string/src/types/uri.rs @@ -0,0 +1,115 @@ +//! URI-specific implementations. + +use crate::spec::UriSpec; +use crate::types::{ + IriAbsoluteStr, IriFragmentStr, IriQueryStr, IriReferenceStr, IriRelativeStr, IriStr, + RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr, +}; +#[cfg(feature = "alloc")] +use crate::types::{ + IriAbsoluteString, IriFragmentString, IriQueryString, IriReferenceString, IriRelativeString, + IriString, RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, + RiRelativeString, RiString, +}; + +/// A type alias for [`RiAbsoluteStr`]`<`[`UriSpec`]`>`. +pub type UriAbsoluteStr = RiAbsoluteStr<UriSpec>; + +/// A type alias for [`RiAbsoluteString`]`<`[`UriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type UriAbsoluteString = RiAbsoluteString<UriSpec>; + +/// A type alias for [`RiFragmentStr`]`<`[`UriSpec`]`>`. +pub type UriFragmentStr = RiFragmentStr<UriSpec>; + +/// A type alias for [`RiFragmentString`]`<`[`UriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type UriFragmentString = RiFragmentString<UriSpec>; + +/// A type alias for [`RiStr`]`<`[`UriSpec`]`>`. +pub type UriStr = RiStr<UriSpec>; + +/// A type alias for [`RiString`]`<`[`UriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type UriString = RiString<UriSpec>; + +/// A type alias for [`RiReferenceStr`]`<`[`UriSpec`]`>`. +pub type UriReferenceStr = RiReferenceStr<UriSpec>; + +/// A type alias for [`RiReferenceString`]`<`[`UriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type UriReferenceString = RiReferenceString<UriSpec>; + +/// A type alias for [`RiRelativeStr`]`<`[`UriSpec`]`>`. +pub type UriRelativeStr = RiRelativeStr<UriSpec>; + +/// A type alias for [`RiRelativeString`]`<`[`UriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type UriRelativeString = RiRelativeString<UriSpec>; + +/// A type alias for [`RiQueryStr`]`<`[`UriSpec`]`>`. +pub type UriQueryStr = RiQueryStr<UriSpec>; + +/// A type alias for [`RiQueryString`]`<`[`UriSpec`]`>`. +#[cfg(feature = "alloc")] +pub type UriQueryString = RiQueryString<UriSpec>; + +/// Implements the trivial conversions between a URI and an IRI. +macro_rules! impl_conversions_between_iri { + ( + $borrowed_uri:ident, + $owned_uri:ident, + $borrowed_iri:ident, + $owned_iri:ident, + ) => { + impl AsRef<$borrowed_iri> for $borrowed_uri { + fn as_ref(&self) -> &$borrowed_iri { + // SAFETY: A valid URI is also a valid IRI. + unsafe { <$borrowed_iri>::new_maybe_unchecked(self.as_str()) } + } + } + + #[cfg(feature = "alloc")] + impl From<$owned_uri> for $owned_iri { + #[inline] + fn from(uri: $owned_uri) -> Self { + // SAFETY: A valid URI is also a valid IRI. + unsafe { Self::new_maybe_unchecked(uri.into()) } + } + } + + #[cfg(feature = "alloc")] + impl AsRef<$borrowed_iri> for $owned_uri { + fn as_ref(&self) -> &$borrowed_iri { + AsRef::<$borrowed_uri>::as_ref(self).as_ref() + } + } + }; +} + +impl_conversions_between_iri!( + UriAbsoluteStr, + UriAbsoluteString, + IriAbsoluteStr, + IriAbsoluteString, +); +impl_conversions_between_iri!( + UriReferenceStr, + UriReferenceString, + IriReferenceStr, + IriReferenceString, +); +impl_conversions_between_iri!( + UriRelativeStr, + UriRelativeString, + IriRelativeStr, + IriRelativeString, +); +impl_conversions_between_iri!(UriStr, UriString, IriStr, IriString,); +impl_conversions_between_iri!(UriQueryStr, UriQueryString, IriQueryStr, IriQueryString,); +impl_conversions_between_iri!( + UriFragmentStr, + UriFragmentString, + IriFragmentStr, + IriFragmentString, +); diff --git a/vendor/iri-string/src/validate.rs b/vendor/iri-string/src/validate.rs new file mode 100644 index 00000000..efaa7efd --- /dev/null +++ b/vendor/iri-string/src/validate.rs @@ -0,0 +1,358 @@ +//! Validators. + +use core::fmt; + +#[cfg(feature = "std")] +use std::error; + +use crate::parser::validate as parser; +use crate::spec::Spec; + +/// Resource identifier validation error. +// Note that this type should implement `Copy` trait. +// To return additional non-`Copy` data as an error, use wrapper type +// (as `std::string::FromUtf8Error` contains `std::str::Utf8Error`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Error(()); + +impl Error { + /// Creates a new `Error`. + /// + /// For internal use. + #[inline] + #[must_use] + pub(crate) fn new() -> Self { + Error(()) + } +} + +impl fmt::Display for Error { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("Invalid IRI") + } +} + +#[cfg(feature = "std")] +impl error::Error for Error {} + +/// Validates [IRI][uri]. +/// +/// This validator corresponds to [`RiStr`] and [`RiString`] types. +/// +/// # Examples +/// +/// This type can have an IRI (which is absolute, and may have fragment part). +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::iri}; +/// assert!(iri::<UriSpec>("https://user:pass@example.com:8080").is_ok()); +/// assert!(iri::<UriSpec>("https://example.com/").is_ok()); +/// assert!(iri::<UriSpec>("https://example.com/foo?bar=baz").is_ok()); +/// assert!(iri::<UriSpec>("https://example.com/foo?bar=baz#qux").is_ok()); +/// assert!(iri::<UriSpec>("foo:bar").is_ok()); +/// assert!(iri::<UriSpec>("foo:").is_ok()); +/// // `foo://.../` below are all allowed. See the crate documentation for detail. +/// assert!(iri::<UriSpec>("foo:/").is_ok()); +/// assert!(iri::<UriSpec>("foo://").is_ok()); +/// assert!(iri::<UriSpec>("foo:///").is_ok()); +/// assert!(iri::<UriSpec>("foo:////").is_ok()); +/// assert!(iri::<UriSpec>("foo://///").is_ok()); +/// ``` +/// +/// Relative IRI reference is not allowed. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::iri}; +/// // This is relative path. +/// assert!(iri::<UriSpec>("foo/bar").is_err()); +/// // `/foo/bar` is an absolute path, but it is authority-relative. +/// assert!(iri::<UriSpec>("/foo/bar").is_err()); +/// // `//foo/bar` is termed "network-path reference", +/// // or usually called "protocol-relative reference". +/// assert!(iri::<UriSpec>("//foo/bar").is_err()); +/// // Same-document reference is relative. +/// assert!(iri::<UriSpec>("#foo").is_err()); +/// // Empty string is not a valid absolute IRI. +/// assert!(iri::<UriSpec>("").is_err()); +/// ``` +/// +/// Some characters and sequences cannot used in an IRI. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::iri}; +/// // `<` and `>` cannot directly appear in an IRI. +/// assert!(iri::<UriSpec>("<not allowed>").is_err()); +/// // Broken percent encoding cannot appear in an IRI. +/// assert!(iri::<UriSpec>("%").is_err()); +/// assert!(iri::<UriSpec>("%GG").is_err()); +/// ``` +/// +/// [uri]: https://tools.ietf.org/html/rfc3986#section-3 +/// [`RiStr`]: ../types/struct.RiStr.html +/// [`RiString`]: ../types/struct.RiString.html +pub fn iri<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_uri::<S>(s) +} + +/// Validates [IRI reference][uri-reference]. +/// +/// This validator corresponds to [`RiReferenceStr`] and [`RiReferenceString`] types. +/// +/// # Examples +/// +/// This type can have an IRI reference (which can be absolute or relative). +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::iri_reference}; +/// assert!(iri_reference::<UriSpec>("https://user:pass@example.com:8080").is_ok()); +/// assert!(iri_reference::<UriSpec>("https://example.com/").is_ok()); +/// assert!(iri_reference::<UriSpec>("https://example.com/foo?bar=baz").is_ok()); +/// assert!(iri_reference::<UriSpec>("https://example.com/foo?bar=baz#qux").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo:bar").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo:").is_ok()); +/// // `foo://.../` below are all allowed. See the crate documentation for detail. +/// assert!(iri_reference::<UriSpec>("foo:/").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo://").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo:///").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo:////").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo://///").is_ok()); +/// assert!(iri_reference::<UriSpec>("foo/bar").is_ok()); +/// assert!(iri_reference::<UriSpec>("/foo/bar").is_ok()); +/// assert!(iri_reference::<UriSpec>("//foo/bar").is_ok()); +/// assert!(iri_reference::<UriSpec>("#foo").is_ok()); +/// ``` +/// +/// Some characters and sequences cannot used in an IRI reference. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::iri_reference}; +/// // `<` and `>` cannot directly appear in an IRI reference. +/// assert!(iri_reference::<UriSpec>("<not allowed>").is_err()); +/// // Broken percent encoding cannot appear in an IRI reference. +/// assert!(iri_reference::<UriSpec>("%").is_err()); +/// assert!(iri_reference::<UriSpec>("%GG").is_err()); +/// ``` +/// +/// [uri-reference]: https://tools.ietf.org/html/rfc3986#section-4.1 +/// [`RiReferenceStr`]: ../types/struct.RiReferenceStr.html +/// [`RiReferenceString`]: ../types/struct.RiReferenceString.html +pub fn iri_reference<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_uri_reference::<S>(s) +} + +/// Validates [absolute IRI][absolute-uri]. +/// +/// This validator corresponds to [`RiAbsoluteStr`] and [`RiAbsoluteString`] types. +/// +/// # Examples +/// +/// This type can have an absolute IRI without fragment part. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::absolute_iri}; +/// assert!(absolute_iri::<UriSpec>("https://example.com/foo?bar=baz").is_ok()); +/// assert!(absolute_iri::<UriSpec>("foo:bar").is_ok()); +/// // Scheme `foo` and empty path. +/// assert!(absolute_iri::<UriSpec>("foo:").is_ok()); +/// // `foo://.../` below are all allowed. See the crate documentation for detail. +/// assert!(absolute_iri::<UriSpec>("foo:/").is_ok()); +/// assert!(absolute_iri::<UriSpec>("foo://").is_ok()); +/// assert!(absolute_iri::<UriSpec>("foo:///").is_ok()); +/// assert!(absolute_iri::<UriSpec>("foo:////").is_ok()); +/// assert!(absolute_iri::<UriSpec>("foo://///").is_ok()); +/// +/// ``` +/// +/// Relative IRI is not allowed. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::absolute_iri}; +/// // This is relative path. +/// assert!(absolute_iri::<UriSpec>("foo/bar").is_err()); +/// // `/foo/bar` is an absolute path, but it is authority-relative. +/// assert!(absolute_iri::<UriSpec>("/foo/bar").is_err()); +/// // `//foo/bar` is termed "network-path reference", +/// // or usually called "protocol-relative reference". +/// assert!(absolute_iri::<UriSpec>("//foo/bar").is_err()); +/// // Empty string is not a valid absolute IRI. +/// assert!(absolute_iri::<UriSpec>("").is_err()); +/// ``` +/// +/// Fragment part (such as trailing `#foo`) is not allowed. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::absolute_iri}; +/// // Fragment part is not allowed. +/// assert!(absolute_iri::<UriSpec>("https://example.com/foo?bar=baz#qux").is_err()); +/// ``` +/// +/// Some characters and sequences cannot used in an absolute IRI. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::absolute_iri}; +/// // `<` and `>` cannot directly appear in an absolute IRI. +/// assert!(absolute_iri::<UriSpec>("<not allowed>").is_err()); +/// // Broken percent encoding cannot appear in an absolute IRI. +/// assert!(absolute_iri::<UriSpec>("%").is_err()); +/// assert!(absolute_iri::<UriSpec>("%GG").is_err()); +/// ``` +/// +/// [absolute-uri]: https://tools.ietf.org/html/rfc3986#section-4.3 +/// [`RiAbsoluteStr`]: ../types/struct.RiAbsoluteStr.html +/// [`RiAbsoluteString`]: ../types/struct.RiAbsoluteString.html +pub fn absolute_iri<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_absolute_uri::<S>(s) +} + +/// Validates [relative reference][relative-ref]. +/// +/// This validator corresponds to [`RiRelativeStr`] and [`RiRelativeString`] types. +/// +/// # Valid values +/// +/// This type can have a relative IRI reference. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::relative_ref}; +/// assert!(relative_ref::<UriSpec>("foo").is_ok()); +/// assert!(relative_ref::<UriSpec>("foo/bar").is_ok()); +/// assert!(relative_ref::<UriSpec>("/foo").is_ok()); +/// assert!(relative_ref::<UriSpec>("//foo/bar").is_ok()); +/// assert!(relative_ref::<UriSpec>("?foo").is_ok()); +/// assert!(relative_ref::<UriSpec>("#foo").is_ok()); +/// assert!(relative_ref::<UriSpec>("foo/bar?baz#qux").is_ok()); +/// // The first path component can have colon if the path is absolute. +/// assert!(relative_ref::<UriSpec>("/foo:bar/").is_ok()); +/// // Second or following path components can have colon. +/// assert!(relative_ref::<UriSpec>("foo/bar://baz/").is_ok()); +/// assert!(relative_ref::<UriSpec>("./foo://bar").is_ok()); +/// ``` +/// +/// Absolute form of a reference is not allowed. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::relative_ref}; +/// assert!(relative_ref::<UriSpec>("https://example.com/").is_err()); +/// // The first path component cannot have colon, if the path is not absolute. +/// assert!(relative_ref::<UriSpec>("foo:bar").is_err()); +/// assert!(relative_ref::<UriSpec>("foo:").is_err()); +/// assert!(relative_ref::<UriSpec>("foo:/").is_err()); +/// assert!(relative_ref::<UriSpec>("foo://").is_err()); +/// assert!(relative_ref::<UriSpec>("foo:///").is_err()); +/// assert!(relative_ref::<UriSpec>("foo:////").is_err()); +/// assert!(relative_ref::<UriSpec>("foo://///").is_err()); +/// ``` +/// +/// Some characters and sequences cannot used in an IRI reference. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::relative_ref}; +/// // `<` and `>` cannot directly appear in a relative IRI reference. +/// assert!(relative_ref::<UriSpec>("<not allowed>").is_err()); +/// // Broken percent encoding cannot appear in a relative IRI reference. +/// assert!(relative_ref::<UriSpec>("%").is_err()); +/// assert!(relative_ref::<UriSpec>("%GG").is_err()); +/// ``` +/// +/// [relative-ref]: https://tools.ietf.org/html/rfc3986#section-4.2 +/// [`RiRelativeStr`]: ../types/struct.RiRelativeStr.html +/// [`RiRelativeString`]: ../types/struct.RiRelativeString.html +pub fn relative_ref<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_relative_ref::<S>(s) +} + +/// Validates [IRI path][path]. +/// +/// [path]: https://tools.ietf.org/html/rfc3986#section-3.3 +pub fn path<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_path::<S>(s) +} + +/// Validates [IRI query][query]. +/// +/// This validator corresponds to [`RiQueryStr`] and [`RiQueryString`] types. +/// +/// Note that the first `?` character in an IRI is not a part of a query. +/// For example, `https://example.com/?foo#bar` has a query `foo`, **not** `?foo`. +/// +/// # Examples +/// +/// This type can have an IRI query. +/// Note that the IRI `foo://bar/baz?qux#quux` has the query `qux`, **not** `?qux`. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::query}; +/// assert!(query::<UriSpec>("").is_ok()); +/// assert!(query::<UriSpec>("foo").is_ok()); +/// assert!(query::<UriSpec>("foo/bar").is_ok()); +/// assert!(query::<UriSpec>("/foo/bar").is_ok()); +/// assert!(query::<UriSpec>("//foo/bar").is_ok()); +/// assert!(query::<UriSpec>("https://user:pass@example.com:8080").is_ok()); +/// assert!(query::<UriSpec>("https://example.com/").is_ok()); +/// // Question sign `?` can appear in an IRI query. +/// assert!(query::<UriSpec>("query?again").is_ok()); +/// ``` +/// +/// Some characters and sequences cannot used in a query. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::query}; +/// // `<` and `>` cannot directly appear in an IRI reference. +/// assert!(query::<UriSpec>("<not allowed>").is_err()); +/// // Broken percent encoding cannot appear in an IRI reference. +/// assert!(query::<UriSpec>("%").is_err()); +/// assert!(query::<UriSpec>("%GG").is_err()); +/// // Hash sign `#` cannot appear in an IRI query. +/// assert!(query::<UriSpec>("#hash").is_err()); +/// ``` +/// +/// [query]: https://tools.ietf.org/html/rfc3986#section-3.4 +/// [`RiQueryStr`]: ../types/struct.RiQueryStr.html +/// [`RiQueryString`]: ../types/struct.RiQueryString.html +pub fn query<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_query::<S>(s) +} + +/// Validates [IRI fragment][fragment]. +/// +/// This validator corresponds to [`RiFragmentStr`] and [`RiFragmentString`] types. +/// +/// Note that the first `#` character in an IRI is not a part of a fragment. +/// For example, `https://example.com/#foo` has a fragment `foo`, **not** `#foo`. +/// +/// # Examples +/// +/// This type can have an IRI fragment. +/// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::fragment}; +/// assert!(fragment::<UriSpec>("").is_ok()); +/// assert!(fragment::<UriSpec>("foo").is_ok()); +/// assert!(fragment::<UriSpec>("foo/bar").is_ok()); +/// assert!(fragment::<UriSpec>("/foo/bar").is_ok()); +/// assert!(fragment::<UriSpec>("//foo/bar").is_ok()); +/// assert!(fragment::<UriSpec>("https://user:pass@example.com:8080").is_ok()); +/// assert!(fragment::<UriSpec>("https://example.com/").is_ok()); +/// ``` +/// +/// Some characters and sequences cannot used in a fragment. +/// +/// ``` +/// use iri_string::{spec::UriSpec, validate::fragment}; +/// // `<` and `>` cannot directly appear in an IRI reference. +/// assert!(fragment::<UriSpec>("<not allowed>").is_err()); +/// // Broken percent encoding cannot appear in an IRI reference. +/// assert!(fragment::<UriSpec>("%").is_err()); +/// assert!(fragment::<UriSpec>("%GG").is_err()); +/// // Hash sign `#` cannot appear in an IRI fragment. +/// assert!(fragment::<UriSpec>("#hash").is_err()); +/// ``` +/// +/// [fragment]: https://tools.ietf.org/html/rfc3986#section-3.5 +/// [`RiFragmentStr`]: ../types/struct.RiFragmentStr.html +/// [`RiFragmentString`]: ../types/struct.RiFragmentString.html +pub fn fragment<S: Spec>(s: &str) -> Result<(), Error> { + parser::validate_fragment::<S>(s) +} diff --git a/vendor/iri-string/tests/build.rs b/vendor/iri-string/tests/build.rs new file mode 100644 index 00000000..b1f3883c --- /dev/null +++ b/vendor/iri-string/tests/build.rs @@ -0,0 +1,576 @@ +//! Tests for builder. + +mod components; +#[macro_use] +mod utils; + +use iri_string::build::Builder; +use iri_string::format::write_to_slice; +use iri_string::types::*; + +use self::components::{Components, TestCase, TEST_CASES}; + +/// Pairs of components and composed IRI should be consistent. +/// +/// This also (implicitly) tests that build-and-decompose and decompose-and-build +/// operations are identity conversions. +#[test] +fn consistent_components_and_composed() { + for case in TEST_CASES.iter().copied() { + let mut builder = Builder::new(); + case.components.feed_builder(&mut builder, false); + + // composed -> components. + let built = builder + .build::<IriReferenceStr>() + .expect("should be valid IRI reference"); + assert_eq_display!(built, case.composed); + + // components -> composed. + let composed = IriReferenceStr::new(case.composed).expect("should be valid IRI reference"); + let scheme = composed.scheme_str(); + let (user, password, host, port) = match composed.authority_components() { + None => (None, None, None, None), + Some(authority) => { + let (user, password) = match authority.userinfo() { + None => (None, None), + Some(userinfo) => match userinfo.find(':').map(|pos| userinfo.split_at(pos)) { + Some((user, password)) => (Some(user), Some(&password[1..])), + None => (Some(userinfo), None), + }, + }; + (user, password, Some(authority.host()), authority.port()) + } + }; + let path = composed.path_str(); + let query = composed.query().map(|s| s.as_str()); + let fragment = composed.fragment().map(|s| s.as_str()); + + let roundtrip_result = Components { + scheme, + user, + password, + host, + port, + path, + query, + fragment, + }; + assert_eq!(roundtrip_result, case.components, "case={case:#?}"); + } +} + +fn assert_builds_for_case(case: &TestCase<'_>, builder: &Builder<'_>) { + if case.is_iri_class() { + { + let built = builder + .clone() + .build::<IriReferenceStr>() + .expect("should be valid IRI reference"); + assert_eq_display!(built, case.composed); + } + { + let built = builder.clone().build::<IriStr>(); + if case.is_absolute() { + let built = built.expect("should be valid IRI"); + assert_eq_display!(built, case.composed); + } else { + assert!(built.is_err(), "should be invalid as IRI"); + } + } + { + let built = builder.clone().build::<IriAbsoluteStr>(); + if case.is_absolute_without_fragment() { + let built = built.expect("should be valid absolute IRI"); + assert_eq_display!(built, case.composed); + } else { + assert!(built.is_err(), "should be invalid as absolute IRI"); + } + } + { + let built = builder.clone().build::<IriRelativeStr>(); + if case.is_relative() { + let built = built.expect("should be valid relative IRI reference"); + assert_eq_display!(built, case.composed); + } else { + assert!( + built.is_err(), + "should be invalid as relative IRI reference" + ); + } + } + } + if case.is_uri_class() { + { + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!(built, case.composed); + } + { + let built = builder.clone().build::<UriStr>(); + if case.is_absolute() { + let built = built.expect("should be valid URI"); + assert_eq_display!(built, case.composed); + } else { + assert!(built.is_err(), "should be invalid as URI"); + } + } + { + let built = builder.clone().build::<UriAbsoluteStr>(); + if case.is_absolute_without_fragment() { + let built = built.expect("should be valid absolute URI"); + assert_eq_display!(built, case.composed); + } else { + assert!(built.is_err(), "should be invalid as absolute URI"); + } + } + { + let built = builder.clone().build::<UriRelativeStr>(); + if case.is_relative() { + let built = built.expect("should be valid relative URI reference"); + assert_eq_display!(built, case.composed); + } else { + assert!( + built.is_err(), + "should be invalid as relative URI reference" + ); + } + } + } +} + +/// Build should succeed or fail, depending on the target syntax and the source string. +#[test] +fn build_simple() { + for case in TEST_CASES.iter() { + let mut builder = Builder::new(); + case.components.feed_builder(&mut builder, false); + + assert_builds_for_case(case, &builder); + } +} + +/// Fields of a builder can be unset. +#[test] +fn reuse_dirty_builder() { + let dirty = { + let mut b = Builder::new(); + b.scheme("scheme"); + b.userinfo(("user", "password")); + b.host("host"); + b.port("90127"); + b.path("/path/path-again"); + b.query("query"); + b.fragment("fragment"); + b + }; + for case in TEST_CASES.iter() { + let mut builder = dirty.clone(); + case.components.feed_builder(&mut builder, true); + + assert_builds_for_case(case, &builder); + } +} + +/// Builder can normalize absolute IRIs. +#[test] +fn build_normalized_absolute() { + for case in TEST_CASES.iter().filter(|case| case.is_absolute()) { + assert!( + !case.is_relative(), + "every IRI is absolute or relative, but not both" + ); + + let mut builder = Builder::new(); + case.components.feed_builder(&mut builder, false); + builder.normalize(); + + let built_iri = builder + .clone() + .build::<IriStr>() + .expect("should be valid IRI reference"); + assert_eq_display!(built_iri, case.normalized_iri, "case={case:#?}"); + + if case.is_uri_class() { + let built_uri = builder + .build::<UriStr>() + .expect("should be valid URI reference"); + assert_eq_display!(built_uri, case.normalized_uri, "case={case:#?}"); + } + } +} + +/// Builder can normalize relative IRIs. +#[test] +fn build_normalized_relative() { + for case in TEST_CASES.iter().filter(|case| case.is_relative()) { + assert!( + !case.is_absolute(), + "every IRI is absolute or relative, but not both" + ); + + let mut builder = Builder::new(); + case.components.feed_builder(&mut builder, false); + builder.normalize(); + + let built = builder + .clone() + .build::<IriRelativeStr>() + .expect("should be valid relative IRI reference"); + assert_eq_display!(built, case.normalized_iri, "case={case:#?}"); + + if case.is_uri_class() { + let built_uri = builder + .build::<UriReferenceStr>() + .expect("should be valid relative URI reference"); + assert_eq_display!(built_uri, case.normalized_uri, "case={case:#?}"); + } + } +} + +/// Build result can judge RFC3986-normalizedness correctly. +#[test] +fn build_normalizedness() { + for case in TEST_CASES.iter().filter(|case| case.is_absolute()) { + let mut builder = Builder::new(); + case.components.feed_builder(&mut builder, false); + builder.normalize(); + + let built = builder + .clone() + .build::<IriStr>() + .expect("should be valid IRI reference"); + let built_judge = built.ensure_rfc3986_normalizable().is_ok(); + assert_eq!( + built_judge, + case.is_rfc3986_normalizable(), + "RFC3986-normalizedness should be correctly judged: case={case:#?}" + ); + + let mut buf = [0_u8; 512]; + let s = write_to_slice(&mut buf, &built).expect("not enough buffer"); + let built_slice = IriStr::new(s).expect("should be valid IRI reference"); + assert!( + built_slice.is_normalized_but_authorityless_relative_path_preserved(), + "should be normalized" + ); + let slice_judge = built_slice.is_normalized_rfc3986(); + + assert_eq!( + slice_judge, built_judge, + "RFC3986-normalizedness should be consistently judged: case={case:#?}" + ); + } +} + +/// `Builder::port` should accept `u8` value. +#[test] +fn set_port_u8() { + let mut builder = Builder::new(); + builder.port(8_u8); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!(built, "//:8", "should accept `u8`"); +} + +/// `Builder::port` should accept `u16` value. +#[test] +fn set_port_u16() { + let mut builder = Builder::new(); + builder.port(65535_u16); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!(built, "//:65535", "should accept `u16`"); +} + +/// `Builder::port` should accept `&str` value. +#[test] +fn set_port_str() { + let mut builder = Builder::new(); + builder.port("8080"); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!(built, "//:8080", "should accept `&str`"); +} + +/// `Builder::port` should accept `&str` value even it is quite large. +#[test] +fn set_port_str_large() { + let mut builder = Builder::new(); + builder.port("12345678901234567890"); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!( + built, + "//:12345678901234567890", + "should accept `&str` even it is quite large" + ); +} + +/// `Builder::ip_address` should accept `std::net::Ipv4Addr` value. +#[test] +#[cfg(feature = "std")] +fn set_ip_address_ipv4addr() { + let mut builder = Builder::new(); + builder.ip_address(std::net::Ipv4Addr::new(192, 0, 2, 0)); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!(built, "//192.0.2.0", "should accept `std::net::Ipv4Addr`"); +} + +/// `Builder::ip_address` should accept `std::net::Ipv6Addr` value. +#[test] +#[cfg(feature = "std")] +fn set_ip_address_ipv6addr() { + let mut builder = Builder::new(); + builder.ip_address(std::net::Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 1)); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!( + built, + "//[2001:db8::1]", + "should accept `std::net::Ipv6Addr`" + ); +} + +/// `Builder::ip_address` should accept `std::net::IpAddr` value. +#[test] +#[cfg(feature = "std")] +fn set_ip_address_ipaddr() { + let mut builder = Builder::new(); + builder.ip_address(std::net::IpAddr::V4(std::net::Ipv4Addr::new(192, 0, 2, 0))); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!(built, "//192.0.2.0", "should accept `std::net::IpAddr`"); +} + +/// `Builder::userinfo` should accept `&str`. +#[test] +fn set_userinfo_str() { + let mut builder = Builder::new(); + { + builder.userinfo("user:password"); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!(built, "//user:password@", "should accept `&str`"); + } + { + builder.userinfo("arbitrary-valid-string"); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!(built, "//arbitrary-valid-string@", "should accept `&str`"); + } + { + builder.userinfo("arbitrary:valid:string"); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!(built, "//arbitrary:valid:string@", "should accept `&str`"); + } +} + +/// `Builder::userinfo` should accept `(&str, &str)`. +#[test] +fn set_userinfo_pair_str_str() { + let mut builder = Builder::new(); + { + builder.userinfo(("user", "password")); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!(built, "//user:password@", "should accept `&str`"); + } + { + builder.userinfo(("", "")); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!(built, "//:@", "empty user and password should be preserved"); + } +} + +/// `Builder::userinfo` should accept `(&str, Option<&str>)`. +#[test] +fn set_userinfo_pair_str_optstr() { + let mut builder = Builder::new(); + { + builder.userinfo(("user", Some("password"))); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!( + built, + "//user:password@", + "should accept `(&str, Option<&str>)`" + ); + } + { + builder.userinfo(("", Some(""))); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!(built, "//:@", "empty user and password should be preserved"); + } + { + builder.userinfo(("user", None)); + let built = builder + .clone() + .build::<UriReferenceStr>() + .expect("should be valid URI reference"); + assert_eq_display!( + built, + "//user@", + "password given as `None` should be absent" + ); + } +} + +/// Builder should reject a colon in user. +#[test] +fn user_with_colon() { + let mut builder = Builder::new(); + builder.userinfo(("us:er", Some("password"))); + let result = builder.clone().build::<UriReferenceStr>(); + assert!(result.is_err(), "`user` part cannot have a colon"); +} + +/// Builder should be able to build a normalized IRI even when it requires +/// edge case handling of RFC 3986 normalization. +#[test] +fn normalize_double_slash_prefix() { + let mut builder = Builder::new(); + builder.scheme("scheme"); + builder.path("/..//bar"); + builder.normalize(); + let built = builder + .build::<IriStr>() + .expect("normalizable by `/.` path prefix"); + // Naive application of RFC 3986 normalization/resolution algorithm + // results in `scheme://bar`, but this is unintentional. `bar` should be + // the second path segment, not a host. So this should be rejected. + assert!( + built.ensure_rfc3986_normalizable().is_err(), + "not normalizable by RFC 3986 algorithm" + ); + // In contrast to RFC 3986, WHATWG URL Standard defines serialization + // algorithm and handles this case specially. In this case, the result + // is `scheme:/.//bar`, this won't be considered fully normalized from + // the RFC 3986 point of view, but more normalization would be + // impossible and this would practically work in most situations. + assert_eq_display!(built, "scheme:/.//bar"); +} + +/// Builder should be able to build a normalized IRI even when it requires +/// edge case handling of RFC 3986 normalization. +#[test] +fn absolute_double_slash_path_without_authority() { + let mut builder = Builder::new(); + builder.scheme("scheme"); + builder.path("//bar"); + + // Should fail without normalization. + { + let result = builder.clone().build::<IriStr>(); + assert!( + result.is_err(), + "`scheme://bar` is unintended so the build should fail" + ); + } + + // With normalization, the build succeeds. + builder.normalize(); + let built = builder + .build::<IriStr>() + .expect("normalizable by `/.` path prefix"); + // Naive application of RFC 3986 normalization/resolution algorithm + // results in `scheme://bar`, but this is unintentional. `bar` should be + // the second path segment, not a host. So this should be rejected. + assert!( + built.ensure_rfc3986_normalizable().is_err(), + "not normalizable by RFC 3986 algorithm" + ); + // In contrast to RFC 3986, WHATWG URL Standard defines serialization + // algorithm and handles this case specially. In this case, the result + // is `scheme:/.//bar`, this won't be considered fully normalized from + // the RFC 3986 point of view, but more normalization would be + // impossible and this would practically work in most situations. + assert_eq_display!(built, "scheme:/.//bar"); +} + +/// Authority requires the path to be empty or absolute (without normalization enabled). +#[test] +fn authority_and_relative_path() { + let mut builder = Builder::new(); + builder.host("example.com"); + builder.path("relative/path"); + assert!( + builder.clone().build::<IriReferenceStr>().is_err(), + "authority requires the path to be empty or absolute" + ); + + // Even if normalization is enabled, the relative path is unacceptable. + builder.normalize(); + assert!( + builder.build::<IriReferenceStr>().is_err(), + "authority requires the path to be empty or absolute" + ); +} + +#[test] +fn no_authority_and_double_slash_prefix_without_normalization() { + let mut builder = Builder::new(); + // This would be interpreted as "network-path reference" (see RFC 3986 + // section 4.2), so this should be rejected. + builder.path("//double-slash"); + assert!(builder.build::<IriReferenceStr>().is_err()); +} + +#[test] +fn no_authority_and_double_slash_prefix_with_normalization() { + let mut builder = Builder::new(); + builder.path("//double-slash"); + builder.normalize(); + let built = builder + .build::<IriReferenceStr>() + .expect("normalizable by `/.` path prefix"); + assert_eq_display!(built, "/.//double-slash"); + assert!(built.ensure_rfc3986_normalizable().is_err()); +} + +#[test] +fn no_authority_and_relative_first_segment_colon() { + let mut builder = Builder::new(); + // This would be interpreted as scheme `foo` and host `bar`, + // so this should be rejected. + builder.path("foo:bar"); + assert!(builder.clone().build::<IriReferenceStr>().is_err()); + + // Normalization does not change the situation. + builder.normalize(); + assert!(builder.build::<IriReferenceStr>().is_err()); +} diff --git a/vendor/iri-string/tests/components/mod.rs b/vendor/iri-string/tests/components/mod.rs new file mode 100644 index 00000000..2d44d858 --- /dev/null +++ b/vendor/iri-string/tests/components/mod.rs @@ -0,0 +1,1771 @@ +//! Components. +#![allow(dead_code)] + +use core::fmt; + +use iri_string::build::Builder; + +/// Test case. +#[derive(Debug, Clone, Copy)] +pub struct TestCase<'a> { + /// Test case name. + pub name: Option<&'a str>, + /// Test case description. + pub description: Option<&'a str>, + /// Composed string. + pub composed: &'a str, + /// Components. + pub components: Components<'a>, + /// Normalized string as URI. + pub normalized_uri: &'a str, + /// Normalized string as IRI. + pub normalized_iri: &'a str, + /// Normalized (by WHATWG-like algorithm) string as URI. + pub normalized_uri_whatwg_like: Option<&'a str>, + /// Normalized (by WHATWG-like algorithm) string as IRI. + pub normalized_iri_whatwg_like: Option<&'a str>, + /// Different IRIs. + pub different_iris: &'a [&'a str], +} + +impl TestCase<'_> { + #[inline] + #[must_use] + pub fn is_uri_class(&self) -> bool { + self.composed.is_ascii() + } + + #[inline] + #[must_use] + pub const fn is_iri_class(&self) -> bool { + true + } + + #[inline] + #[must_use] + pub const fn is_absolute(&self) -> bool { + self.components.is_absolute() + } + + #[inline] + #[must_use] + pub const fn is_absolute_without_fragment(&self) -> bool { + self.components.is_absolute_without_fragment() + } + + #[inline] + #[must_use] + pub const fn is_relative(&self) -> bool { + self.components.is_relative() + } + + #[inline] + #[must_use] + pub fn is_rfc3986_normalizable(&self) -> bool { + match self.normalized_iri.find('/') { + Some(pos) => !self.normalized_iri[(pos + 1)..].starts_with("./"), + None => true, + } + } +} + +/// Components. +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Components<'a> { + /// `scheme`. + pub scheme: Option<&'a str>, + /// User part (string before the first colon) of `userinfo`. + /// + /// Note that `host` should also be `Some(_)` if this is `Some(_)`. + pub user: Option<&'a str>, + /// Password part (string after the first colon) of `userinfo`. + /// + /// Note that `host` should also be `Some(_)` if this is `Some(_)`. + pub password: Option<&'a str>, + /// `host`. + pub host: Option<&'a str>, + /// `port`. + /// + /// Note that `host` should also be `Some(_)` if this is `Some(_)`. + pub port: Option<&'a str>, + /// `path`. + pub path: &'a str, + /// `query`. + pub query: Option<&'a str>, + /// `fragment`. + pub fragment: Option<&'a str>, +} + +impl<'a> Components<'a> { + #[inline] + #[must_use] + const fn const_default() -> Self { + Self { + scheme: None, + user: None, + password: None, + host: None, + port: None, + path: "", + query: None, + fragment: None, + } + } + + pub fn feed_builder(&self, builder: &mut Builder<'a>, clean: bool) { + if let Some(scheme) = self.scheme { + builder.scheme(scheme); + } else if clean { + builder.unset_scheme(); + } + + if let Some(host) = self.host { + if self.user.is_some() || self.password.is_some() { + builder.userinfo((self.user.unwrap_or(""), self.password)); + } else if clean { + builder.unset_userinfo(); + } + + builder.host(host); + + if let Some(port) = self.port { + builder.port(port); + } else if clean { + builder.unset_port(); + } + } else if clean { + builder.unset_authority(); + } + + builder.path(self.path); + + if let Some(query) = self.query { + builder.query(query); + } else if clean { + builder.unset_query(); + } + + if let Some(fragment) = self.fragment { + builder.fragment(fragment); + } else if clean { + builder.unset_fragment(); + } + } + + #[inline] + #[must_use] + pub const fn is_absolute(&self) -> bool { + self.scheme.is_some() + } + + #[inline] + #[must_use] + pub const fn is_absolute_without_fragment(&self) -> bool { + self.scheme.is_some() && self.fragment.is_none() + } + + #[inline] + #[must_use] + pub const fn is_relative(&self) -> bool { + self.scheme.is_none() + } +} + +impl fmt::Display for Components<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(scheme) = self.scheme { + write!(f, "{scheme}:")?; + } + + assert!( + self.host.is_some() + || (self.user.is_none() && self.password.is_none() && self.port.is_none()), + "`user`, `password`, and `port` requires `host` to be present" + ); + if let Some(host) = self.host { + if let Some(user) = self.user { + f.write_str(user)?; + } + if let Some(password) = self.password { + write!(f, ":{password}")?; + } + if self.user.is_some() || self.password.is_some() { + write!(f, "@")?; + } + f.write_str(host)?; + if let Some(port) = self.port { + write!(f, ":{port}")?; + } + } + + f.write_str(self.path)?; + + if let Some(query) = self.query { + write!(f, "#{query}")?; + } + + if let Some(fragment) = self.fragment { + write!(f, "#{fragment}")?; + } + + Ok(()) + } +} + +macro_rules! components { + () => { + Components::default() + }; + ($($field:ident: $expr:expr),* $(,)?) => { + Components { + $( $field: components!(@field; $field: $expr) ),*, + .. Components::const_default() + } + }; + (@field; path: $expr:expr) => { + $expr + }; + (@field; $field:ident: None) => { + None + }; + (@field; $field:ident: $expr:expr) => { + Some($expr) + }; +} + +macro_rules! test_case { + // Name. + (@field=name; name: $value:expr, $($rest:tt)*) => { + $value + }; + // Description. + (@field=description; description: $value:expr, $($rest:tt)*) => { + Some($value) + }; + (@field=description;) => { + None + }; + // Composed. + (@field=composed; composed: $value:expr, $($rest:tt)*) => { + $value + }; + // Components. + (@field=components; components: { $($toks:tt)* }, $($rest:tt)*) => { + components! { $($toks)* } + }; + // Normalized URI. + (@field=normalized_uri; normalized_uri: $value:expr, $($rest:tt)*) => { + $value + }; + // Normalized IRI. + (@field=normalized_iri; normalized_iri: $value:expr, $($rest:tt)*) => { + $value + }; + // Normalized URI (WHATWG-like). + (@field=normalized_uri_whatwg_like; normalized_uri_whatwg_like: $value:expr, $($rest:tt)*) => { + Some($value) + }; + (@field=normalized_uri_whatwg_like;) => { + None + }; + // Normalized IRI (WHATWG-like). + (@field=normalized_iri_whatwg_like; normalized_iri_whatwg_like: $value:expr, $($rest:tt)*) => { + Some($value) + }; + (@field=normalized_iri_whatwg_like;) => { + None + }; + // Different IRIs. + (@field=different_iris; different_iris: $value:expr, $($rest:tt)*) => { + $value + }; + (@field=different_iris;) => { + &[] + }; + // Fallback. + (@field=$name:ident; $field:ident: { $($toks:tt)* }, $($rest:tt)*) => { + test_case!(@field=$name; $($rest)*) + }; + // Fallback. + (@field=$name:ident; $field:ident: $value:expr, $($rest:tt)*) => { + test_case!(@field=$name; $($rest)*) + }; + ($($args:tt)*) => { + TestCase { + name: Some(test_case!(@field=name; $($args)*)), + description: test_case!(@field=description; $($args)*), + composed: test_case!(@field=composed; $($args)*), + components: test_case!(@field=components; $($args)*), + normalized_uri: test_case!(@field=normalized_uri; $($args)*), + normalized_iri: test_case!(@field=normalized_iri; $($args)*), + normalized_uri_whatwg_like: test_case!(@field=normalized_uri_whatwg_like; $($args)*), + normalized_iri_whatwg_like: test_case!(@field=normalized_iri_whatwg_like; $($args)*), + different_iris: test_case!(@field=different_iris; $($args)*), + } + }; +} + +macro_rules! test_cases { + ($({$($toks:tt)*}),* $(,)?) => { + &[ $( test_case! { $($toks)* } ),* ] + } +} + +#[allow(clippy::needless_update)] // For `components!` macro. +pub static TEST_CASES: &[TestCase<'static>] = test_cases![ + { + name: "typical example URI", + composed: "http://example.com/", + components: { + scheme: "http", + host: "example.com", + path: "/", + }, + normalized_uri: "http://example.com/", + normalized_iri: "http://example.com/", + }, + { + name: "typical example URI with user and password", + composed: "http://user:password@example.com/", + components: { + scheme: "http", + user: "user", + password: "password", + host: "example.com", + path: "/", + }, + normalized_uri: "http://user:password@example.com/", + normalized_iri: "http://user:password@example.com/", + }, + { + name: "URI with ASCII-only hostname with capital letters", + description: "ASCII-only hostname should be normalized to lower letters", + composed: "http://usER:passWORD@eXAMPLe.CoM/", + components: { + scheme: "http", + user: "usER", + password: "passWORD", + host: "eXAMPLe.CoM", + path: "/", + }, + normalized_uri: "http://usER:passWORD@example.com/", + normalized_iri: "http://usER:passWORD@example.com/", + }, + { + name: "IRI with non-ASCII hostname with capital letters", + description: "hostname with non-ASCII characters should not be normalized to lower letters", + composed: "http://usER:passWORD@\u{03B1}.CoM/", + components: { + scheme: "http", + user: "usER", + password: "passWORD", + host: "\u{03B1}.CoM", + path: "/", + }, + // The RFC 3986 (not 3987) spec is ambiguous: if the host contains percent-encoded + // non-ASCII characters, should other part of the host be lowercased? + // In this crate for now, the operations is implemented based on RFC 3987, i.e. + // even URI type internally checks whether the percent-encoded characters + // would be decoded to ASCII or not. + normalized_uri: "http://usER:passWORD@%CE%B1.CoM/", + normalized_iri: "http://usER:passWORD@\u{03B1}.CoM/", + }, + { + name: "URI with all components set", + composed: "http://user:password@example.com:80/path/to/somewhere?query#fragment", + components: { + scheme: "http", + user: "user", + password: "password", + host: "example.com", + port: "80", + path: "/path/to/somewhere", + query: "query", + fragment: "fragment", + }, + normalized_uri: "http://user:password@example.com:80/path/to/somewhere?query#fragment", + normalized_iri: "http://user:password@example.com:80/path/to/somewhere?query#fragment", + }, + { + name: "URI that cannot be normalized by pure RFC 3986", + composed: "scheme:/.//not-a-host", + components: { + scheme: "scheme", + path: "/.//not-a-host", + }, + normalized_uri: "scheme:/.//not-a-host", + normalized_iri: "scheme:/.//not-a-host", + }, + { + name: "URI that cannot be normalized by pure RFC 3986", + composed: "scheme:..///not-a-host", + components: { + scheme: "scheme", + path: "..///not-a-host", + }, + normalized_uri: "scheme:/.//not-a-host", + normalized_iri: "scheme:/.//not-a-host", + normalized_uri_whatwg_like: "scheme:..///not-a-host", + normalized_iri_whatwg_like: "scheme:..///not-a-host", + }, + { + name: "Relative URI reference as a relative path `..`", + description: "Relative path without scheme and authority should not be normalized", + composed: "..", + components: { + path: "..", + }, + normalized_uri: "..", + normalized_iri: "..", + }, + { + name: "Relative URI reference as a relative path", + description: "Relative path without scheme and authority should not be normalized", + composed: "../foo/..", + components: { + path: "../foo/..", + }, + normalized_uri: "../foo/..", + normalized_iri: "../foo/..", + }, + { + name: "Relative URI reference as a relative path", + description: "Relative path without scheme and authority should not be normalized", + composed: "foo/../p%61th", + components: { + path: "foo/../p%61th", + }, + normalized_uri: "foo/../path", + normalized_iri: "foo/../path", + }, + { + name: "Relative path in an absolute URI", + composed: "scheme:foo/../p%61th", + components: { + scheme: "scheme", + path: "foo/../p%61th", + }, + normalized_uri: "scheme:/path", + normalized_iri: "scheme:/path", + normalized_uri_whatwg_like: "scheme:foo/../path", + normalized_iri_whatwg_like: "scheme:foo/../path", + }, + { + name: "Non-normalized URI", + composed: "HTTPs://EXaMPLE.COM/pA/Th?Query#Frag", + components: { + scheme: "HTTPs", + host: "EXaMPLE.COM", + path: "/pA/Th", + query: "Query", + fragment: "Frag", + }, + normalized_uri: "https://example.com/pA/Th?Query#Frag", + normalized_iri: "https://example.com/pA/Th?Query#Frag", + different_iris: &[ + "https://example.com/pa/th?Query#Frag", + "https://example.com/pA/Th?query#Frag", + "https://example.com/pA/Th?Query#frag", + ], + }, + { + name: "UUID URN", + composed: "urn:uuid:7f1450df-6678-465b-a881-188f9b6ec822", + components: { + scheme: "urn", + path: "uuid:7f1450df-6678-465b-a881-188f9b6ec822", + }, + normalized_uri: "urn:uuid:7f1450df-6678-465b-a881-188f9b6ec822", + normalized_iri: "urn:uuid:7f1450df-6678-465b-a881-188f9b6ec822", + different_iris: &[ + "urn:UUID:7f1450df-6678-465b-a881-188f9b6ec822", + "urn:uuid:7F1450DF-6678-465B-A881-188F9B6EC822", + ], + }, + { + name: "UUID URN", + composed: "URN:uuid:7f1450df-6678-465b-a881-188f9b6ec822", + components: { + scheme: "URN", + path: "uuid:7f1450df-6678-465b-a881-188f9b6ec822", + }, + normalized_uri: "urn:uuid:7f1450df-6678-465b-a881-188f9b6ec822", + normalized_iri: "urn:uuid:7f1450df-6678-465b-a881-188f9b6ec822", + different_iris: &[ + "urn:UUID:7f1450df-6678-465b-a881-188f9b6ec822", + "urn:uuid:7F1450DF-6678-465B-A881-188F9B6EC822", + ], + }, + { + name: "UUID URN", + composed: "URN:uuid:7f1450df-6678-465b-a881-188f9b6ec822", + components: { + scheme: "URN", + path: "uuid:7f1450df-6678-465b-a881-188f9b6ec822", + }, + normalized_uri: "urn:uuid:7f1450df-6678-465b-a881-188f9b6ec822", + normalized_iri: "urn:uuid:7f1450df-6678-465b-a881-188f9b6ec822", + different_iris: &[ + "urn:UUID:7f1450df-6678-465b-a881-188f9b6ec822", + "urn:uuid:7F1450DF-6678-465B-A881-188F9B6EC822", + ], + }, + { + name: "IRI with percent-encoded unreserved characters and non-valid UTF-8 bytes", + composed: "http://example.com/?a=%CE%B1&b=%CE%CE%B1%B1", + components: { + scheme: "http", + host: "example.com", + path: "/", + query: "a=%CE%B1&b=%CE%CE%B1%B1", + }, + normalized_uri: "http://example.com/?a=%CE%B1&b=%CE%CE%B1%B1", + normalized_iri: "http://example.com/?a=\u{03B1}&b=%CE\u{03B1}%B1", + }, + { + name: "not ASCII-only host", + composed: "SCHEME://Alpha%ce%b1/", + components: { + scheme: "SCHEME", + host: "Alpha%ce%b1", + path: "/", + }, + normalized_uri: "scheme://Alpha%CE%B1/", + normalized_iri: "scheme://Alpha\u{03B1}/", + }, + { + name: "URI with percent-encoded unreserevd and reserved characters", + description: "Tilde character (0x7e) is unreserved and bang (0x21) is reserved", + composed: "http://example.com/%7E%41%73%63%69%69%21", + components: { + scheme: "http", + host: "example.com", + path: "/%7E%41%73%63%69%69%21", + }, + normalized_uri: "http://example.com/~Ascii%21", + normalized_iri: "http://example.com/~Ascii%21", + }, + { + name: "not ASCII-only host", + description: "Plus character (0x2B) is reserved (sub-delim), so it should not be decoded in host part", + composed: "SCHEME://PLUS%2bPLUS/", + components: { + scheme: "SCHEME", + host: "PLUS%2bPLUS", + path: "/", + }, + normalized_uri: "scheme://plus%2Bplus/", + normalized_iri: "scheme://plus%2Bplus/", + }, + { + name: "empty port", + // <https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.3>: + // + // > URI producers and normalizers should omit the port component + // > and its ":" delimiter if port is empty or if its value would + // > be the same as that of the scheme's default. + description: "According to RFC 3986 section 3.2.3, empty port should be omitted by normalization", + composed: "https://example.com:/", + components: { + scheme: "https", + host: "example.com", + port: "", + path: "/", + }, + normalized_uri: "https://example.com/", + normalized_iri: "https://example.com/", + }, + { + name: "URI with a dot-dot segment", + composed: "http://example.com/a/b/c/%2e%2e/d/e", + components: { + scheme: "http", + host: "example.com", + path: "/a/b/c/%2e%2e/d/e", + }, + normalized_uri: "http://example.com/a/b/d/e", + normalized_iri: "http://example.com/a/b/d/e", + }, + { + name: "URI with a dot-dot segment", + composed: "http://example.com/a/b/c/%2E%2E/d/e", + components: { + scheme: "http", + host: "example.com", + path: "/a/b/c/%2E%2E/d/e", + }, + normalized_uri: "http://example.com/a/b/d/e", + normalized_iri: "http://example.com/a/b/d/e", + }, + { + name: "URI with a dot-dot segment", + composed: "http://example.com/a/b/c/../d/e", + components: { + scheme: "http", + host: "example.com", + path: "/a/b/c/../d/e", + }, + normalized_uri: "http://example.com/a/b/d/e", + normalized_iri: "http://example.com/a/b/d/e", + }, + { + name: "URI with a dot-dot segment", + composed: "http://example.com/a/b/c/.%2e/d/e", + components: { + scheme: "http", + host: "example.com", + path: "/a/b/c/.%2e/d/e", + }, + normalized_uri: "http://example.com/a/b/d/e", + normalized_iri: "http://example.com/a/b/d/e", + }, + { + name: "URI with dot segments", + composed: "http://example.com/a/./././././b/c/.%2e/d/e", + components: { + scheme: "http", + host: "example.com", + path: "/a/./././././b/c/.%2e/d/e", + }, + normalized_uri: "http://example.com/a/b/d/e", + normalized_iri: "http://example.com/a/b/d/e", + }, + // START: Combination. + { + name: "Empty relative IRI", + composed: "", + components: { + path: "", + }, + normalized_uri: "", + normalized_iri: "", + }, + { + name: "Combination: fragment", + composed: "#fragment", + components: { + fragment: "fragment", + }, + normalized_uri: "#fragment", + normalized_iri: "#fragment", + }, + { + name: "Combination: query", + composed: "?query", + components: { + query: "query", + }, + normalized_uri: "?query", + normalized_iri: "?query", + }, + { + name: "Combination: query+fragment", + composed: "?query#fragment", + components: { + query: "query", + fragment: "fragment", + }, + normalized_uri: "?query#fragment", + normalized_iri: "?query#fragment", + }, + { + name: "Combination: path", + composed: "/pa/th", + components: { + path: "/pa/th", + }, + normalized_uri: "/pa/th", + normalized_iri: "/pa/th", + }, + { + name: "Combination: path+fragment", + composed: "/pa/th#fragment", + components: { + path: "/pa/th", + fragment: "fragment", + }, + normalized_uri: "/pa/th#fragment", + normalized_iri: "/pa/th#fragment", + }, + { + name: "Combination: path+query", + composed: "/pa/th?query", + components: { + path: "/pa/th", + query: "query", + }, + normalized_uri: "/pa/th?query", + normalized_iri: "/pa/th?query", + }, + { + name: "Combination: path+query+fragment", + composed: "/pa/th?query#fragment", + components: { + path: "/pa/th", + query: "query", + fragment: "fragment", + }, + normalized_uri: "/pa/th?query#fragment", + normalized_iri: "/pa/th?query#fragment", + }, + { + name: "Combination: authority", + composed: "//authority", + components: { + host: "authority", + }, + normalized_uri: "//authority", + normalized_iri: "//authority", + }, + { + name: "Combination: authority+fragment", + composed: "//authority#fragment", + components: { + host: "authority", + fragment: "fragment", + }, + normalized_uri: "//authority#fragment", + normalized_iri: "//authority#fragment", + }, + { + name: "Combination: authority+query", + composed: "//authority?query", + components: { + host: "authority", + query: "query", + }, + normalized_uri: "//authority?query", + normalized_iri: "//authority?query", + }, + { + name: "Combination: authority+query+fragment", + composed: "//authority?query#fragment", + components: { + host: "authority", + query: "query", + fragment: "fragment", + }, + normalized_uri: "//authority?query#fragment", + normalized_iri: "//authority?query#fragment", + }, + { + name: "Combination: authority+path", + composed: "//authority/pa/th", + components: { + host: "authority", + path: "/pa/th", + }, + normalized_uri: "//authority/pa/th", + normalized_iri: "//authority/pa/th", + }, + { + name: "Combination: authority+path+fragment", + composed: "//authority/pa/th#fragment", + components: { + host: "authority", + path: "/pa/th", + fragment: "fragment", + }, + normalized_uri: "//authority/pa/th#fragment", + normalized_iri: "//authority/pa/th#fragment", + }, + { + name: "Combination: authority+path+query", + composed: "//authority/pa/th?query", + components: { + host: "authority", + path: "/pa/th", + query: "query", + }, + normalized_uri: "//authority/pa/th?query", + normalized_iri: "//authority/pa/th?query", + }, + { + name: "Combination: authority+path+query+fragment", + composed: "//authority/pa/th?query#fragment", + components: { + host: "authority", + path: "/pa/th", + query: "query", + fragment: "fragment", + }, + normalized_uri: "//authority/pa/th?query#fragment", + normalized_iri: "//authority/pa/th?query#fragment", + }, + { + name: "Combination: scheme", + composed: "scheme:", + components: { + scheme: "scheme", + }, + normalized_uri: "scheme:", + normalized_iri: "scheme:", + }, + { + name: "Combination: scheme+fragment", + composed: "scheme:#fragment", + components: { + scheme: "scheme", + fragment: "fragment", + }, + normalized_uri: "scheme:#fragment", + normalized_iri: "scheme:#fragment", + }, + { + name: "Combination: scheme+query", + composed: "scheme:?query", + components: { + scheme: "scheme", + query: "query", + }, + normalized_uri: "scheme:?query", + normalized_iri: "scheme:?query", + }, + { + name: "Combination: scheme+query+fragment", + composed: "scheme:?query#fragment", + components: { + scheme: "scheme", + query: "query", + fragment: "fragment", + }, + normalized_uri: "scheme:?query#fragment", + normalized_iri: "scheme:?query#fragment", + }, + { + name: "Combination: scheme+path", + composed: "scheme:/pa/th", + components: { + scheme: "scheme", + path: "/pa/th", + }, + normalized_uri: "scheme:/pa/th", + normalized_iri: "scheme:/pa/th", + }, + { + name: "Combination: scheme+path+fragment", + composed: "scheme:/pa/th#fragment", + components: { + scheme: "scheme", + path: "/pa/th", + fragment: "fragment", + }, + normalized_uri: "scheme:/pa/th#fragment", + normalized_iri: "scheme:/pa/th#fragment", + }, + { + name: "Combination: scheme+path+query", + composed: "scheme:/pa/th?query", + components: { + scheme: "scheme", + path: "/pa/th", + query: "query", + }, + normalized_uri: "scheme:/pa/th?query", + normalized_iri: "scheme:/pa/th?query", + }, + { + name: "Combination: scheme+path+query+fragment", + composed: "scheme:/pa/th?query#fragment", + components: { + scheme: "scheme", + path: "/pa/th", + query: "query", + fragment: "fragment", + }, + normalized_uri: "scheme:/pa/th?query#fragment", + normalized_iri: "scheme:/pa/th?query#fragment", + }, + { + name: "Combination: scheme+authority", + composed: "scheme://authority", + components: { + scheme: "scheme", + host: "authority", + }, + normalized_uri: "scheme://authority", + normalized_iri: "scheme://authority", + }, + { + name: "Combination: scheme+authority+fragment", + composed: "scheme://authority#fragment", + components: { + scheme: "scheme", + host: "authority", + fragment: "fragment", + }, + normalized_uri: "scheme://authority#fragment", + normalized_iri: "scheme://authority#fragment", + }, + { + name: "Combination: scheme+authority+query", + composed: "scheme://authority?query", + components: { + scheme: "scheme", + host: "authority", + query: "query", + }, + normalized_uri: "scheme://authority?query", + normalized_iri: "scheme://authority?query", + }, + { + name: "Combination: scheme+authority+query+fragment", + composed: "scheme://authority?query#fragment", + components: { + scheme: "scheme", + host: "authority", + query: "query", + fragment: "fragment", + }, + normalized_uri: "scheme://authority?query#fragment", + normalized_iri: "scheme://authority?query#fragment", + }, + { + name: "Combination: scheme+authority+path", + composed: "scheme://authority/pa/th", + components: { + scheme: "scheme", + host: "authority", + path: "/pa/th", + }, + normalized_uri: "scheme://authority/pa/th", + normalized_iri: "scheme://authority/pa/th", + }, + { + name: "Combination: scheme+authority+path+fragment", + composed: "scheme://authority/pa/th#fragment", + components: { + scheme: "scheme", + host: "authority", + path: "/pa/th", + fragment: "fragment", + }, + normalized_uri: "scheme://authority/pa/th#fragment", + normalized_iri: "scheme://authority/pa/th#fragment", + }, + { + name: "Combination: scheme+authority+path+query", + composed: "scheme://authority/pa/th?query", + components: { + scheme: "scheme", + host: "authority", + path: "/pa/th", + query: "query", + }, + normalized_uri: "scheme://authority/pa/th?query", + normalized_iri: "scheme://authority/pa/th?query", + }, + { + name: "Combination: scheme+authority+path+query+fragment", + composed: "scheme://authority/pa/th?query#fragment", + components: { + scheme: "scheme", + host: "authority", + path: "/pa/th", + query: "query", + fragment: "fragment", + }, + normalized_uri: "scheme://authority/pa/th?query#fragment", + normalized_iri: "scheme://authority/pa/th?query#fragment", + }, + // END: Combination. + { + name: "1 slash following to the scheme", + composed: "scheme:/", + components: { + scheme: "scheme", + path: "/", + }, + normalized_uri: "scheme:/", + normalized_iri: "scheme:/", + }, + { + name: "2 slashes following to the scheme", + composed: "scheme://", + components: { + scheme: "scheme", + host: "", + }, + normalized_uri: "scheme://", + normalized_iri: "scheme://", + }, + { + name: "3 slashes following to the scheme", + composed: "scheme:///", + components: { + scheme: "scheme", + host: "", + path: "/", + }, + normalized_uri: "scheme:///", + normalized_iri: "scheme:///", + }, + { + name: "4 slashes following to the scheme", + composed: "scheme:////", + components: { + scheme: "scheme", + host: "", + path: "//", + }, + normalized_uri: "scheme:////", + normalized_iri: "scheme:////", + }, + { + name: "5 slashes following to the scheme", + composed: "scheme://///", + components: { + scheme: "scheme", + host: "", + path: "///", + }, + normalized_uri: "scheme://///", + normalized_iri: "scheme://///", + }, + { + name: "1 slash", + composed: "/", + components: { + path: "/", + }, + normalized_uri: "/", + normalized_iri: "/", + }, + { + name: "2 slash", + composed: "//", + components: { + host: "", + }, + normalized_uri: "//", + normalized_iri: "//", + }, + { + name: "3 slash", + composed: "///", + components: { + host: "", + path: "/", + }, + normalized_uri: "///", + normalized_iri: "///", + }, + { + name: "4 slash", + composed: "////", + components: { + host: "", + path: "//", + }, + normalized_uri: "////", + normalized_iri: "////", + }, + { + name: "5 slash", + composed: "/////", + components: { + host: "", + path: "///", + }, + normalized_uri: "/////", + normalized_iri: "/////", + }, + { + name: "IPv4 address", + composed: "//192.0.2.0", + components: { + host: "192.0.2.0", + }, + normalized_uri: "//192.0.2.0", + normalized_iri: "//192.0.2.0", + }, + { + name: "IPv4 address with port", + composed: "//192.0.2.0:80", + components: { + host: "192.0.2.0", + port: "80", + }, + normalized_uri: "//192.0.2.0:80", + normalized_iri: "//192.0.2.0:80", + }, + { + name: "IPv4 address", + composed: "//255.255.255.255", + components: { + host: "255.255.255.255", + }, + normalized_uri: "//255.255.255.255", + normalized_iri: "//255.255.255.255", + }, + { + name: "IPv4 address with port", + composed: "//255.255.255.255:65536", + components: { + host: "255.255.255.255", + port: "65536", + }, + normalized_uri: "//255.255.255.255:65536", + normalized_iri: "//255.255.255.255:65536", + }, + { + name: "IPv4 address", + composed: "//0.0.0.0", + components: { + host: "0.0.0.0", + }, + normalized_uri: "//0.0.0.0", + normalized_iri: "//0.0.0.0", + }, + { + name: "IPv4 address with port", + composed: "//0.0.0.0:0", + components: { + host: "0.0.0.0", + port: "0", + }, + normalized_uri: "//0.0.0.0:0", + normalized_iri: "//0.0.0.0:0", + }, + { + name: "IPv6 address", + composed: "//[2001:db8::]", + components: { + host: "[2001:db8::]", + }, + normalized_uri: "//[2001:db8::]", + normalized_iri: "//[2001:db8::]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:db8::]:80", + components: { + host: "[2001:db8::]", + port: "80", + }, + normalized_uri: "//[2001:db8::]:80", + normalized_iri: "//[2001:db8::]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0db8::]", + components: { + host: "[2001:0db8::]", + }, + normalized_uri: "//[2001:0db8::]", + normalized_iri: "//[2001:0db8::]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0db8::]:80", + components: { + host: "[2001:0db8::]", + port: "80", + }, + normalized_uri: "//[2001:0db8::]:80", + normalized_iri: "//[2001:0db8::]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0db8:0:0:0:0:0:ffff]", + components: { + host: "[2001:0db8:0:0:0:0:0:ffff]", + }, + normalized_uri: "//[2001:0db8:0:0:0:0:0:ffff]", + normalized_iri: "//[2001:0db8:0:0:0:0:0:ffff]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0db8:0:0:0:0:0:ffff]:80", + components: { + host: "[2001:0db8:0:0:0:0:0:ffff]", + port: "80", + }, + normalized_uri: "//[2001:0db8:0:0:0:0:0:ffff]:80", + normalized_iri: "//[2001:0db8:0:0:0:0:0:ffff]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0DB8:0000:0000:0000:000A:BCDE:FFFF]", + components: { + host: "[2001:0DB8:0000:0000:0000:000A:BCDE:FFFF]", + }, + normalized_uri: "//[2001:0db8:0000:0000:0000:000a:bcde:ffff]", + normalized_iri: "//[2001:0db8:0000:0000:0000:000a:bcde:ffff]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0DB8:0000:0000:0000:000A:BCDE:FFFF]:80", + components: { + host: "[2001:0DB8:0000:0000:0000:000A:BCDE:FFFF]", + port: "80", + }, + normalized_uri: "//[2001:0db8:0000:0000:0000:000a:bcde:ffff]:80", + normalized_iri: "//[2001:0db8:0000:0000:0000:000a:bcde:ffff]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0db8::]", + components: { + host: "[2001:0db8::]", + }, + normalized_uri: "//[2001:0db8::]", + normalized_iri: "//[2001:0db8::]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0db8::]:80", + components: { + host: "[2001:0db8::]", + port: "80", + }, + normalized_uri: "//[2001:0db8::]:80", + normalized_iri: "//[2001:0db8::]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0DB8:0:0:0:0::1]", + components: { + host: "[2001:0DB8:0:0:0:0::1]", + }, + normalized_uri: "//[2001:0db8:0:0:0:0::1]", + normalized_iri: "//[2001:0db8:0:0:0:0::1]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0DB8:0:0:0:0::1]:80", + components: { + host: "[2001:0DB8:0:0:0:0::1]", + port: "80", + }, + normalized_uri: "//[2001:0db8:0:0:0:0::1]:80", + normalized_iri: "//[2001:0db8:0:0:0:0::1]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0db8::89ab:cdef:89AB:CDEF]", + components: { + host: "[2001:0db8::89ab:cdef:89AB:CDEF]", + }, + normalized_uri: "//[2001:0db8::89ab:cdef:89ab:cdef]", + normalized_iri: "//[2001:0db8::89ab:cdef:89ab:cdef]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0db8::89ab:cdef:89AB:CDEF]:80", + components: { + host: "[2001:0db8::89ab:cdef:89AB:CDEF]", + port: "80", + }, + normalized_uri: "//[2001:0db8::89ab:cdef:89ab:cdef]:80", + normalized_iri: "//[2001:0db8::89ab:cdef:89ab:cdef]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0db8::1]", + components: { + host: "[2001:0db8::1]", + }, + normalized_uri: "//[2001:0db8::1]", + normalized_iri: "//[2001:0db8::1]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0db8::1]:80", + components: { + host: "[2001:0db8::1]", + port: "80", + }, + normalized_uri: "//[2001:0db8::1]:80", + normalized_iri: "//[2001:0db8::1]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0db8:0::1]", + components: { + host: "[2001:0db8:0::1]", + }, + normalized_uri: "//[2001:0db8:0::1]", + normalized_iri: "//[2001:0db8:0::1]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0db8:0::1]:80", + components: { + host: "[2001:0db8:0::1]", + port: "80", + }, + normalized_uri: "//[2001:0db8:0::1]:80", + normalized_iri: "//[2001:0db8:0::1]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0db8:0:0::1]", + components: { + host: "[2001:0db8:0:0::1]", + }, + normalized_uri: "//[2001:0db8:0:0::1]", + normalized_iri: "//[2001:0db8:0:0::1]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0db8:0:0::1]:80", + components: { + host: "[2001:0db8:0:0::1]", + port: "80", + }, + normalized_uri: "//[2001:0db8:0:0::1]:80", + normalized_iri: "//[2001:0db8:0:0::1]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0db8:0:0:0::1]", + components: { + host: "[2001:0db8:0:0:0::1]", + }, + normalized_uri: "//[2001:0db8:0:0:0::1]", + normalized_iri: "//[2001:0db8:0:0:0::1]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0db8:0:0:0::1]:80", + components: { + host: "[2001:0db8:0:0:0::1]", + port: "80", + }, + normalized_uri: "//[2001:0db8:0:0:0::1]:80", + normalized_iri: "//[2001:0db8:0:0:0::1]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0db8:0:0:0:0::1]", + components: { + host: "[2001:0db8:0:0:0:0::1]", + }, + normalized_uri: "//[2001:0db8:0:0:0:0::1]", + normalized_iri: "//[2001:0db8:0:0:0:0::1]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0db8:0:0:0:0::1]:80", + components: { + host: "[2001:0db8:0:0:0:0::1]", + port: "80", + }, + normalized_uri: "//[2001:0db8:0:0:0:0::1]:80", + normalized_iri: "//[2001:0db8:0:0:0:0::1]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0db8::0:1]", + components: { + host: "[2001:0db8::0:1]", + }, + normalized_uri: "//[2001:0db8::0:1]", + normalized_iri: "//[2001:0db8::0:1]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0db8::0:1]:80", + components: { + host: "[2001:0db8::0:1]", + port: "80", + }, + normalized_uri: "//[2001:0db8::0:1]:80", + normalized_iri: "//[2001:0db8::0:1]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0db8::0:0:1]", + components: { + host: "[2001:0db8::0:0:1]", + }, + normalized_uri: "//[2001:0db8::0:0:1]", + normalized_iri: "//[2001:0db8::0:0:1]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0db8::0:0:1]:80", + components: { + host: "[2001:0db8::0:0:1]", + port: "80", + }, + normalized_uri: "//[2001:0db8::0:0:1]:80", + normalized_iri: "//[2001:0db8::0:0:1]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0db8::0:0:0:1]", + components: { + host: "[2001:0db8::0:0:0:1]", + }, + normalized_uri: "//[2001:0db8::0:0:0:1]", + normalized_iri: "//[2001:0db8::0:0:0:1]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0db8::0:0:0:1]:80", + components: { + host: "[2001:0db8::0:0:0:1]", + port: "80", + }, + normalized_uri: "//[2001:0db8::0:0:0:1]:80", + normalized_iri: "//[2001:0db8::0:0:0:1]:80", + }, + { + name: "IPv6 address", + composed: "//[2001:0db8::0:0:0:0:1]", + components: { + host: "[2001:0db8::0:0:0:0:1]", + }, + normalized_uri: "//[2001:0db8::0:0:0:0:1]", + normalized_iri: "//[2001:0db8::0:0:0:0:1]", + }, + { + name: "IPv6 address with port", + composed: "//[2001:0db8::0:0:0:0:1]:80", + components: { + host: "[2001:0db8::0:0:0:0:1]", + port: "80", + }, + normalized_uri: "//[2001:0db8::0:0:0:0:1]:80", + normalized_iri: "//[2001:0db8::0:0:0:0:1]:80", + }, + { + name: "IPvFuture address", + composed: "//[v9999.this-is-future-version-of-ip-address:::::::::]", + components: { + host: "[v9999.this-is-future-version-of-ip-address:::::::::]", + }, + normalized_uri: "//[v9999.this-is-future-version-of-ip-address:::::::::]", + normalized_iri: "//[v9999.this-is-future-version-of-ip-address:::::::::]", + }, + { + name: "IPvFuture address with port", + composed: "//[v9999.this-is-future-version-of-ip-address:::::::::]:80", + components: { + host: "[v9999.this-is-future-version-of-ip-address:::::::::]", + port: "80", + }, + normalized_uri: "//[v9999.this-is-future-version-of-ip-address:::::::::]:80", + normalized_iri: "//[v9999.this-is-future-version-of-ip-address:::::::::]:80", + }, + { + name: "Too large port", + description: "RFC 3986 accepts `*DIGIT` as `port` component", + composed: "//localhost:999999999", + components: { + host: "localhost", + port: "999999999", + }, + normalized_uri: "//localhost:999999999", + normalized_iri: "//localhost:999999999", + }, + { + name: "Port only", + description: "`host` can be empty", + composed: "//:999999999", + components: { + host: "", + port: "999999999", + }, + normalized_uri: "//:999999999", + normalized_iri: "//:999999999", + }, + { + name: "Trailing slash should remain after normalization", + description: "Trailing slash should remain after normalization if the path ends with slash", + composed: "https://example.com/../../", + components: { + scheme: "https", + host: "example.com", + path: "/../../", + }, + normalized_uri: "https://example.com/", + normalized_iri: "https://example.com/", + }, + { + name: "Slash should remain", + description: "Slash should remain after normalization if the IRI ends with a dot segment", + composed: "https://example.com/..", + components: { + scheme: "https", + host: "example.com", + path: "/..", + }, + normalized_uri: "https://example.com/", + normalized_iri: "https://example.com/", + }, + { + name: "Slash should remain", + description: "Slash should remain after normalization if the IRI ends with a dot segment", + composed: "https://example.com/.", + components: { + scheme: "https", + host: "example.com", + path: "/.", + }, + normalized_uri: "https://example.com/", + normalized_iri: "https://example.com/", + }, + { + name: "WHATWG URL Standard serialization", + composed: "scheme:/a/b/../..//c", + components: { + scheme: "scheme", + path: "/a/b/../..//c", + }, + normalized_uri: "scheme:/.//c", + normalized_iri: "scheme:/.//c", + }, + { + name: "WHATWG URL Standard serialization", + composed: "scheme:/a/b/../..//c", + components: { + scheme: "scheme", + path: "/a/b/../..//c", + }, + normalized_uri: "scheme:/.//c", + normalized_iri: "scheme:/.//c", + }, + { + name: "redundant UTF-8 encoding (1 byte inflated to 2 bytes)", + composed: "scheme:/%C0%AE", + components: { + scheme: "scheme", + path: "/%C0%AE", + }, + normalized_uri: "scheme:/%C0%AE", + normalized_iri: "scheme:/%C0%AE", + }, + { + name: "redundant UTF-8 encoding (1 byte inflated to 3 bytes)", + composed: "scheme:/%E0%80%AE", + components: { + scheme: "scheme", + path: "/%E0%80%AE", + }, + normalized_uri: "scheme:/%E0%80%AE", + normalized_iri: "scheme:/%E0%80%AE", + }, + { + name: "redundant UTF-8 encoding (1 byte inflated to 4 bytes)", + composed: "scheme:/%F0%80%80%AE", + components: { + scheme: "scheme", + path: "/%F0%80%80%AE", + }, + normalized_uri: "scheme:/%F0%80%80%AE", + normalized_iri: "scheme:/%F0%80%80%AE", + }, + { + name: "redundant UTF-8 encoding (2 byte inflated to 3 bytes)", + composed: "scheme:/%E0%8E%B1", + components: { + scheme: "scheme", + path: "/%E0%8E%B1", + }, + normalized_uri: "scheme:/%E0%8E%B1", + normalized_iri: "scheme:/%E0%8E%B1", + }, + { + name: "redundant UTF-8 encoding (2 byte inflated to 4 bytes)", + composed: "scheme:/%F0%80%8E%B1", + components: { + scheme: "scheme", + path: "/%F0%80%8E%B1", + }, + normalized_uri: "scheme:/%F0%80%8E%B1", + normalized_iri: "scheme:/%F0%80%8E%B1", + }, + { + name: "redundant UTF-8 encoding (3 byte inflated to 4 bytes)", + composed: "scheme:/%F0%83%82%A4", + components: { + scheme: "scheme", + path: "/%F0%83%82%A4", + }, + normalized_uri: "scheme:/%F0%83%82%A4", + normalized_iri: "scheme:/%F0%83%82%A4", + }, + { + name: "non-UTF-8 percent encoding (starts with invaild byte)", + composed: "scheme:/%FF", + components: { + scheme: "scheme", + path: "/%FF", + }, + normalized_uri: "scheme:/%FF", + normalized_iri: "scheme:/%FF", + }, + { + name: "non-UTF-8 percent encoding (starts with continue byte)", + composed: "scheme:/%BF%BF", + components: { + scheme: "scheme", + path: "/%BF%BF", + }, + normalized_uri: "scheme:/%BF%BF", + normalized_iri: "scheme:/%BF%BF", + }, + { + name: "non-UTF-8 percent encoding (expected 2 bytes, invalid at 2nd byte)", + composed: "scheme:/%CE%FF", + components: { + scheme: "scheme", + path: "/%CE%FF", + }, + normalized_uri: "scheme:/%CE%FF", + normalized_iri: "scheme:/%CE%FF", + }, + { + name: "non-UTF-8 percent encoding (expected 2 bytes, starts again at 2nd byte)", + composed: "scheme:/%CE%CE%B1", + components: { + scheme: "scheme", + path: "/%CE%CE%B1", + }, + normalized_uri: "scheme:/%CE%CE%B1", + normalized_iri: "scheme:/%CE\u{03B1}", + }, + { + name: "non-UTF-8 percent encoding (expected 3 bytes, invalid at 2nd byte)", + composed: "scheme:/%E3%FF%A4", + components: { + scheme: "scheme", + path: "/%E3%FF%A4", + }, + normalized_uri: "scheme:/%E3%FF%A4", + normalized_iri: "scheme:/%E3%FF%A4", + }, + { + name: "non-UTF-8 percent encoding (expected 3 bytes, starts again at 2nd byte)", + composed: "scheme:/%E3%E3%82%A4", + components: { + scheme: "scheme", + path: "/%E3%E3%82%A4", + }, + normalized_uri: "scheme:/%E3%E3%82%A4", + normalized_iri: "scheme:/%E3\u{30A4}", + }, + { + name: "non-UTF-8 percent encoding (expected 3 bytes, invalid at 3rd byte)", + composed: "scheme:/%E3%82%FF", + components: { + scheme: "scheme", + path: "/%E3%82%FF", + }, + normalized_uri: "scheme:/%E3%82%FF", + normalized_iri: "scheme:/%E3%82%FF", + }, + { + name: "non-UTF-8 percent encoding (expected 3 bytes, starts again at 3rd byte)", + composed: "scheme:/%E3%82%E3%82%A4", + components: { + scheme: "scheme", + path: "/%E3%82%E3%82%A4", + }, + normalized_uri: "scheme:/%E3%82%E3%82%A4", + normalized_iri: "scheme:/%E3%82\u{30A4}", + }, + { + name: "non-UTF-8 percent encoding (expected 4 bytes, invalid at 2nd byte)", + composed: "scheme:/%F0%FF%8D%A3", + components: { + scheme: "scheme", + path: "/%F0%FF%8D%A3", + }, + normalized_uri: "scheme:/%F0%FF%8D%A3", + normalized_iri: "scheme:/%F0%FF%8D%A3", + }, + { + name: "non-UTF-8 percent encoding (expected 4 bytes, starts again at 2nd byte)", + composed: "scheme:/%F0%F0%9F%8D%A3", + components: { + scheme: "scheme", + path: "/%F0%F0%9F%8D%A3", + }, + normalized_uri: "scheme:/%F0%F0%9F%8D%A3", + normalized_iri: "scheme:/%F0\u{1F363}", + }, + { + name: "non-UTF-8 percent encoding (expected 4 bytes, invalid at 3rd byte)", + composed: "scheme:/%F0%9F%FF%A3", + components: { + scheme: "scheme", + path: "/%F0%9F%FF%A3", + }, + normalized_uri: "scheme:/%F0%9F%FF%A3", + normalized_iri: "scheme:/%F0%9F%FF%A3", + }, + { + name: "non-UTF-8 percent encoding (expected 4 bytes, starts again at 3rd byte)", + composed: "scheme:/%F0%9F%F0%9F%8D%A3", + components: { + scheme: "scheme", + path: "/%F0%9F%F0%9F%8D%A3", + }, + normalized_uri: "scheme:/%F0%9F%F0%9F%8D%A3", + normalized_iri: "scheme:/%F0%9F\u{1F363}", + }, + { + name: "non-UTF-8 percent encoding (expected 4 bytes, invalid at 4th byte)", + composed: "scheme:/%F0%9F%8D%FF", + components: { + scheme: "scheme", + path: "/%F0%9F%8D%FF", + }, + normalized_uri: "scheme:/%F0%9F%8D%FF", + normalized_iri: "scheme:/%F0%9F%8D%FF", + }, + { + name: "non-UTF-8 percent encoding (expected 4 bytes, starts again at 4th byte)", + composed: "scheme:/%F0%9F%8D%F0%9F%8D%A3", + components: { + scheme: "scheme", + path: "/%F0%9F%8D%F0%9F%8D%A3", + }, + normalized_uri: "scheme:/%F0%9F%8D%F0%9F%8D%A3", + normalized_iri: "scheme:/%F0%9F%8D\u{1F363}", + }, + { + name: "non-UTF-8 percent encoding (high-surrogate)", + composed: "scheme:/%ED%A0%A0", + components: { + scheme: "scheme", + path: "/%ED%A0%A0", + }, + normalized_uri: "scheme:/%ED%A0%A0", + normalized_iri: "scheme:/%ED%A0%A0", + }, + { + name: "non-UTF-8 percent encoding (out of range, larger than U+10FFFF)", + composed: "scheme:/%F4%90%80%80", + components: { + scheme: "scheme", + path: "/%F4%90%80%80", + }, + normalized_uri: "scheme:/%F4%90%80%80", + normalized_iri: "scheme:/%F4%90%80%80", + }, + { + name: "non-UTF-8 percent encoding, followed by valid pct encoding", + composed: "scheme:/%CE%2E%2E", + components: { + scheme: "scheme", + path: "/%CE%2E%2E", + }, + normalized_uri: "scheme:/%CE..", + normalized_iri: "scheme:/%CE..", + }, + { + name: "non-UTF-8 percent encoding, followed by valid pct encoding", + composed: "scheme:/%CE%FF%2E", + components: { + scheme: "scheme", + path: "/%CE%FF%2E", + }, + normalized_uri: "scheme:/%CE%FF.", + normalized_iri: "scheme:/%CE%FF.", + }, +]; diff --git a/vendor/iri-string/tests/gh-issues.rs b/vendor/iri-string/tests/gh-issues.rs new file mode 100644 index 00000000..6556d7a6 --- /dev/null +++ b/vendor/iri-string/tests/gh-issues.rs @@ -0,0 +1,197 @@ +//! Test cases for issues reported on GitHub. + +#[macro_use] +mod utils; + +use iri_string::types::UriReferenceStr; + +mod issue_17 { + use super::*; + + #[test] + fn ipv6_literal_authority_host() { + let uri = UriReferenceStr::new("//[::1]").expect("valid relative URI"); + let authority = uri + .authority_components() + .expect("the URI has authority `[::1]`"); + assert_eq!(authority.host(), "[::1]"); + } + + #[test] + fn extra_trailing_colon_in_ipv6_literal() { + assert!(UriReferenceStr::new("//[::1:]").is_err()); + } + + #[test] + fn ipvfuture_literal_capital_v() { + assert!(UriReferenceStr::new("//[v0.0]").is_ok()); + assert!(UriReferenceStr::new("//[V0.0]").is_ok()); + } + + #[test] + fn ipvfuture_empty_part() { + assert!( + UriReferenceStr::new("//[v0.]").is_err(), + "address should not be empty" + ); + assert!( + UriReferenceStr::new("//[v.0]").is_err(), + "version should not be empty" + ); + assert!( + UriReferenceStr::new("//[v.]").is_err(), + "neither address nor version should be empty" + ); + } +} + +mod issue_36 { + use super::*; + + #[cfg(feature = "alloc")] + use iri_string::format::ToDedicatedString; + use iri_string::types::UriAbsoluteStr; + + // "/.//.".resolve_against("a:/") + // => "a:" + remove_dot_segments("/.//.") + // + // STEP OUTPUT BUFFER INPUT BUFFER + // 1 : /.//. + // 2B: //. + // 2E: / /. + // 2B: / / + // 2E: // + // (see RFC 3986 section 5.2.4 for this notation.) + // + // => "a://" + // + // However, this is invalid since it should be semantically + // `<scheme="a">:<path="//">` but this string will be parsed as + // `<scheme="a">://<path="">`. So, `./` should be inserted to break + // `//` at the beginning of the path part. + #[test] + fn abnormal_resolution() { + let base = UriAbsoluteStr::new("a:/").expect("valid absolute URI"); + { + let relative = UriReferenceStr::new("/.//.").expect("valid relative URI"); + let result = relative.resolve_against(base); + + assert!( + result.ensure_rfc3986_normalizable().is_err(), + "strict RFC 3986 resolution should fail for base={:?}, ref={:?}", + base, + relative + ); + assert_eq_display!( + result, + "a:/.//", + "resolution result will be modified using serialization by WHATWG URL Standard" + ); + } + { + let relative = UriReferenceStr::new(".//.").expect("valid relative URI"); + let result = relative.resolve_against(base); + + assert!( + result.ensure_rfc3986_normalizable().is_err(), + "strict RFC 3986 resolution should fail for base={:?}, ref={:?}", + base, + relative + ); + assert_eq_display!( + result, + "a:/.//", + "resolution result will be modified using serialization by WHATWG URL Standard" + ); + } + } + + #[test] + fn abnormal_normalization() { + let uri = UriAbsoluteStr::new("a:/.//.").expect("valid absolute URI"); + + let normalized = uri.normalize(); + assert!( + normalized.ensure_rfc3986_normalizable().is_err(), + "strict RFC 3986 normalization should fail for uri={:?}", + uri + ); + assert_eq_display!( + normalized, + "a:/.//", + "normalization result will be modified using serialization by WHATWG URL Standard" + ); + + #[cfg(feature = "alloc")] + { + assert!( + !normalized.to_dedicated_string().is_normalized_rfc3986(), + "not normalizable by strict RFC 3986 algorithm" + ); + } + } + + #[test] + fn abnormal_normalization2() { + { + let uri = UriAbsoluteStr::new("a:/bar//.").expect("valid absolute URI"); + assert_eq_display!(uri.normalize(), "a:/bar//"); + } + { + let uri = UriAbsoluteStr::new("a:/bar/..//.").expect("valid absolute URI"); + assert_eq_display!( + uri.normalize(), + "a:/.//", + "normalization result will be modified using serialization by WHATWG URL Standard" + ); + } + { + let uri = UriAbsoluteStr::new("a:/.//bar/.").expect("valid absolute URI"); + assert_eq_display!( + uri.normalize(), + "a:/.//bar/", + "normalization result will be modified using serialization by WHATWG URL Standard" + ); + } + { + let uri = UriAbsoluteStr::new("a:/././././././foo/./.././././././././././/.") + .expect("valid absolute URI"); + assert_eq_display!( + uri.normalize(), + "a:/.//", + "normalization result will be modified using serialization by WHATWG URL Standard" + ); + } + } + + #[test] + fn normalization_pct_triplet_loss() { + let uri = UriAbsoluteStr::new("a://%92%99").expect("valid absolute URI"); + assert_eq_display!(uri.normalize(), "a://%92%99"); + // Other problems are found during fixing this bug. The test cases for + // them have been added to generic test case data source. + } +} + +/// <https://github.com/lo48576/iri-string/pull/46> +#[cfg(feature = "alloc")] +mod issue_46 { + use iri_string::types::{UriFragmentStr, UriRelativeString}; + + #[test] + fn set_fragment_to_relative() { + let mut uri = + UriRelativeString::try_from("//user:password@example.com/path?query#frag.old") + .expect("valid relative URI"); + assert_eq!(uri, "//user:password@example.com/path?query#frag.old"); + assert_eq!(uri.fragment_str(), Some("frag.old")); + + uri.set_fragment(None); + assert_eq!(uri, "//user:password@example.com/path?query"); + assert_eq!(uri.fragment(), None); + + let frag_new = UriFragmentStr::new("frag-new").expect("valid URI fragment"); + uri.set_fragment(Some(frag_new)); + assert_eq!(uri.fragment_str(), Some("frag-new")); + } +} diff --git a/vendor/iri-string/tests/iri.rs b/vendor/iri-string/tests/iri.rs new file mode 100644 index 00000000..3da68ee0 --- /dev/null +++ b/vendor/iri-string/tests/iri.rs @@ -0,0 +1,95 @@ +//! Tests specific to IRIs (not URIs). + +#[macro_use] +mod utils; + +use iri_string::format::write_to_slice; +#[cfg(feature = "alloc")] +use iri_string::format::ToDedicatedString; +#[cfg(feature = "alloc")] +use iri_string::types::IriReferenceString; +use iri_string::types::{IriReferenceStr, UriReferenceStr}; + +#[derive(Debug, Clone, Copy)] +struct TestCase { + iri: &'static str, + uri: &'static str, +} + +// `[(iri, uri)]`. +const CASES: &[TestCase] = &[ + TestCase { + iri: "?alpha=\u{03B1}", + uri: "?alpha=%CE%B1", + }, + TestCase { + iri: "?katakana-letter-i=\u{30A4}", + uri: "?katakana-letter-i=%E3%82%A4", + }, + TestCase { + iri: "?sushi=\u{1f363}", + uri: "?sushi=%F0%9F%8D%A3", + }, +]; + +#[test] +fn iri_to_uri() { + let mut buf = [0_u8; 256]; + let mut buf2 = [0_u8; 256]; + + for case in CASES.iter().copied() { + let expected = UriReferenceStr::new(case.uri).expect("should be valid URI reference"); + + let iri = IriReferenceStr::new(case.iri).expect("should be valid URI reference"); + let encoded = iri.encode_to_uri(); + assert_eq_display!(encoded, expected); + let encoded_uri = write_to_slice(&mut buf, &encoded).expect("not enough buffer"); + let encoded_uri = UriReferenceStr::new(encoded_uri).expect("should be valid URI reference"); + assert_eq!(encoded_uri, expected); + + let encoded_again = AsRef::<IriReferenceStr>::as_ref(encoded_uri).encode_to_uri(); + assert_eq_display!(encoded_again, expected); + let encoded_again_uri = + write_to_slice(&mut buf2, &encoded_again).expect("not enough buffer"); + let encoded_again_uri = + UriReferenceStr::new(encoded_again_uri).expect("should be valid URI reference"); + assert_eq!(encoded_again_uri, expected); + } +} + +#[cfg(feature = "alloc")] +#[test] +fn iri_to_uri_allocated() { + for case in CASES.iter().copied() { + let expected = UriReferenceStr::new(case.uri).expect("should be valid URI reference"); + + let iri = IriReferenceStr::new(case.iri).expect("should be valid URI reference"); + let encoded = iri.encode_to_uri().to_dedicated_string(); + assert_eq!(encoded, expected); + + let encoded_again = AsRef::<IriReferenceStr>::as_ref(&encoded) + .encode_to_uri() + .to_dedicated_string(); + assert_eq!(encoded_again, expected); + } +} + +#[cfg(feature = "alloc")] +#[test] +fn iri_to_uri_inline() { + for case in CASES.iter().copied() { + let expected = UriReferenceStr::new(case.uri).expect("should be valid URI reference"); + + let mut iri = + IriReferenceString::try_from(case.iri).expect("should be valid URI reference"); + + iri.encode_to_uri_inline(); + assert_eq!(iri, expected); + + iri.encode_to_uri_inline(); + assert_eq!( + iri, expected, + "``encode_to_uri_inline()` method should be idempotent" + ); + } +} diff --git a/vendor/iri-string/tests/normalize.rs b/vendor/iri-string/tests/normalize.rs new file mode 100644 index 00000000..12911af8 --- /dev/null +++ b/vendor/iri-string/tests/normalize.rs @@ -0,0 +1,218 @@ +//! Tests for normalization. + +mod components; +#[macro_use] +mod utils; + +#[cfg(feature = "alloc")] +use iri_string::format::ToDedicatedString; +use iri_string::types::*; + +use self::components::TEST_CASES; + +/// Semantically different IRIs should not be normalized into the same IRI. +#[test] +fn different_iris() { + for case in TEST_CASES + .iter() + .filter(|case| !case.different_iris.is_empty()) + { + let normalized = IriStr::new(case.normalized_iri).expect("should be valid IRI reference"); + for other in case.different_iris.iter().copied() { + let other = IriStr::new(other).expect("should be valid IRI reference"); + assert_ne!( + normalized, other, + "<{}> should not be normalized to <{other}>, case={case:#?}", + case.composed + ); + } + } +} + +/// Normalization should work for IRI. +#[test] +fn normalize_uri() { + for case in TEST_CASES + .iter() + .filter(|case| case.is_uri_class() && case.is_absolute()) + { + let source = UriStr::new(case.composed).expect("should be valid URI"); + let normalized = source.normalize(); + let expected = UriStr::new(case.normalized_uri).expect("should be valid URI"); + + assert_eq_display!(normalized, expected, "case={case:#?}"); + #[cfg(feature = "alloc")] + assert_eq!(normalized.to_string(), expected.as_str(), "case={case:#?}"); + #[cfg(feature = "alloc")] + assert_eq!(normalized.to_dedicated_string(), expected, "case={case:#?}"); + + assert_eq!( + case.is_rfc3986_normalizable(), + normalized.ensure_rfc3986_normalizable().is_ok(), + "case={case:#?}" + ); + } +} + +/// Normalization should work for IRI. +#[test] +fn normalize_iri() { + for case in TEST_CASES + .iter() + .filter(|case| case.is_iri_class() && case.is_absolute()) + { + let source = IriStr::new(case.composed).expect("should be valid IRI"); + let normalized = source.normalize(); + let expected = IriStr::new(case.normalized_iri).expect("should be valid IRI"); + + assert_eq_display!(normalized, expected, "case={case:#?}"); + #[cfg(feature = "alloc")] + assert_eq!(normalized.to_string(), expected.as_str(), "case={case:#?}"); + #[cfg(feature = "alloc")] + assert_eq!(normalized.to_dedicated_string(), expected, "case={case:#?}"); + + assert_eq!( + case.is_rfc3986_normalizable(), + normalized.ensure_rfc3986_normalizable().is_ok(), + "case={case:#?}" + ); + } +} + +/// WHATWG-like normalization should work for IRI. +#[test] +fn normalize_uri_whatwg_like() { + for case in TEST_CASES + .iter() + .filter(|case| case.is_uri_class() && case.is_absolute()) + { + let source = UriStr::new(case.composed).expect("should be valid URI"); + let normalized = source.normalize_but_preserve_authorityless_relative_path(); + let expected = UriStr::new( + case.normalized_uri_whatwg_like + .unwrap_or(case.normalized_uri), + ) + .expect("should be valid URI"); + + assert_eq_display!(normalized, expected, "case={case:#?}"); + #[cfg(feature = "alloc")] + assert_eq!(normalized.to_string(), expected.as_str(), "case={case:#?}"); + #[cfg(feature = "alloc")] + assert_eq!(normalized.to_dedicated_string(), expected, "case={case:#?}"); + + assert_eq!( + case.is_rfc3986_normalizable(), + normalized.ensure_rfc3986_normalizable().is_ok(), + "case={case:#?}" + ); + } +} + +/// WHATWG-like normalization should work for IRI. +#[test] +fn normalize_iri_whatwg_like() { + for case in TEST_CASES + .iter() + .filter(|case| case.is_iri_class() && case.is_absolute()) + { + let source = IriStr::new(case.composed).expect("should be valid IRI"); + let normalized = source.normalize_but_preserve_authorityless_relative_path(); + let expected = IriStr::new( + case.normalized_iri_whatwg_like + .unwrap_or(case.normalized_iri), + ) + .expect("should be valid IRI"); + + assert_eq_display!(normalized, expected, "case={case:#?}"); + #[cfg(feature = "alloc")] + assert_eq!(normalized.to_string(), expected.as_str(), "case={case:#?}"); + #[cfg(feature = "alloc")] + assert_eq!(normalized.to_dedicated_string(), expected, "case={case:#?}"); + + assert_eq!( + case.is_rfc3986_normalizable(), + normalized.ensure_rfc3986_normalizable().is_ok(), + "case={case:#?}" + ); + } +} + +/// Normalization should be idempotent. +#[test] +fn normalize_idempotent() { + let mut buf = [0_u8; 512]; + + for case in TEST_CASES + .iter() + .filter(|case| case.is_iri_class() && case.is_absolute()) + { + let source = IriStr::new(case.composed).expect("should be valid IRI"); + let normalized = source.normalize(); + let expected = IriStr::new(case.normalized_iri).expect("should be valid IRI"); + + let normalized_s = + iri_string::format::write_to_slice(&mut buf, &normalized).expect("not enough buffer"); + let normalized_s = IriStr::new(normalized_s).expect("should be valid IRI reference"); + + // Normalize again. + let normalized_again = normalized_s.normalize(); + assert_eq_display!(normalized_again, expected, "case={case:#?}"); + } +} + +/// Normalizedness checks. +#[test] +fn normalizedness() { + #[derive(Debug, Clone, Copy)] + struct Case { + iri: &'static str, + is_normalized_default: bool, + is_normalized_rfc3986: bool, + is_normalized_whatwg_like: bool, + } + const CASES: &[Case] = &[ + Case { + iri: "scheme:/.//foo", + is_normalized_default: true, + is_normalized_rfc3986: false, + is_normalized_whatwg_like: true, + }, + Case { + iri: "scheme:.///foo", + is_normalized_default: false, + is_normalized_rfc3986: false, + is_normalized_whatwg_like: true, + }, + Case { + iri: "scheme://authority/.//foo", + is_normalized_default: false, + is_normalized_rfc3986: false, + is_normalized_whatwg_like: false, + }, + Case { + iri: "scheme:relative/..//foo", + is_normalized_default: false, + is_normalized_rfc3986: false, + is_normalized_whatwg_like: true, + }, + ]; + + for case in CASES { + let iri = IriStr::new(case.iri).expect("should be valid IRI"); + assert_eq!( + iri.is_normalized(), + case.is_normalized_default, + "case={case:?}" + ); + assert_eq!( + iri.is_normalized_rfc3986(), + case.is_normalized_rfc3986, + "case={case:?}" + ); + assert_eq!( + iri.is_normalized_but_authorityless_relative_path_preserved(), + case.is_normalized_whatwg_like, + "case={case:?}" + ); + } +} diff --git a/vendor/iri-string/tests/percent_encode.rs b/vendor/iri-string/tests/percent_encode.rs new file mode 100644 index 00000000..96c8f0e8 --- /dev/null +++ b/vendor/iri-string/tests/percent_encode.rs @@ -0,0 +1,174 @@ +//! Tests for percent encoding. + +#[cfg(feature = "alloc")] +extern crate alloc; + +#[macro_use] +mod utils; + +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::string::ToString; + +use iri_string::percent_encode::{PercentEncodedForIri, PercentEncodedForUri}; + +#[test] +fn regname_uri() { + let encoded = PercentEncodedForUri::from_reg_name("alpha.\u{03B1}.reg.name"); + let expected = "alpha.%CE%B1.reg.name"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn regname_iri() { + let encoded = PercentEncodedForIri::from_reg_name("alpha.\u{03B1}.reg.name"); + let expected = "alpha.\u{03B1}.reg.name"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn path_segment_uri() { + let encoded = PercentEncodedForUri::from_path_segment("\u{03B1}/<alpha>?#"); + let expected = "%CE%B1%2F%3Calpha%3E%3F%23"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn path_segment_iri() { + let encoded = PercentEncodedForIri::from_path_segment("\u{03B1}/<alpha>?#"); + let expected = "\u{03B1}%2F%3Calpha%3E%3F%23"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn path_uri() { + let encoded = PercentEncodedForUri::from_path("\u{03B1}/<alpha>?#"); + let expected = "%CE%B1/%3Calpha%3E%3F%23"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn path_iri() { + let encoded = PercentEncodedForIri::from_path("\u{03B1}/<alpha>?#"); + let expected = "\u{03B1}/%3Calpha%3E%3F%23"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn query_uri() { + let encoded = PercentEncodedForUri::from_query("\u{03B1}/<alpha>?#"); + let expected = "%CE%B1/%3Calpha%3E?%23"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn query_iri() { + let encoded = PercentEncodedForIri::from_query("\u{03B1}/<alpha>?#"); + let expected = "\u{03B1}/%3Calpha%3E?%23"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn fragment_uri() { + let encoded = PercentEncodedForUri::from_fragment("\u{03B1}/<alpha>?#"); + let expected = "%CE%B1/%3Calpha%3E?%23"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn fragment_iri() { + let encoded = PercentEncodedForIri::from_fragment("\u{03B1}/<alpha>?#"); + let expected = "\u{03B1}/%3Calpha%3E?%23"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn unreserve_uri_unreserved() { + let encoded = PercentEncodedForUri::unreserve("%a0-._~\u{03B1}"); + let expected = "%25a0-._~%CE%B1"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn unreserve_iri_unreserved() { + let encoded = PercentEncodedForIri::unreserve("%a0-._~\u{03B1}"); + let expected = "%25a0-._~\u{03B1}"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn unreserve_uri_reserved() { + let encoded = PercentEncodedForUri::unreserve(":/?#[]@ !$&'()*+,;="); + let expected = "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn unreserve_iri_reserved() { + let encoded = PercentEncodedForIri::unreserve(":/?#[]@ !$&'()*+,;="); + let expected = "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn characters_uri_unreserved() { + let encoded = PercentEncodedForUri::characters("%a0-._~\u{03B1}"); + let expected = "%25a0-._~%CE%B1"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn characters_iri_unreserved() { + let encoded = PercentEncodedForIri::characters("%a0-._~\u{03B1}"); + let expected = "%25a0-._~\u{03B1}"; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn characters_uri_reserved() { + let encoded = PercentEncodedForUri::characters(":/?#[]@ !$&'()*+,;="); + let expected = ":/?#[]@%20!$&'()*+,;="; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} + +#[test] +fn characters_iri_reserved() { + let encoded = PercentEncodedForIri::characters(":/?#[]@ !$&'()*+,;="); + let expected = ":/?#[]@%20!$&'()*+,;="; + assert_eq_display!(encoded, expected); + #[cfg(feature = "alloc")] + assert_eq!(encoded.to_string(), expected); +} diff --git a/vendor/iri-string/tests/resolve.rs b/vendor/iri-string/tests/resolve.rs new file mode 100644 index 00000000..2bb8569f --- /dev/null +++ b/vendor/iri-string/tests/resolve.rs @@ -0,0 +1,473 @@ +//! Tests for IRI resolution. + +mod components; +#[macro_use] +mod utils; +mod resolve_refimpl; + +use iri_string::format::write_to_slice; +#[cfg(feature = "alloc")] +use iri_string::format::ToDedicatedString; +use iri_string::resolve::FixedBaseResolver; +use iri_string::types::*; + +#[cfg(feature = "alloc")] +use self::resolve_refimpl::resolve as resolve_refimpl; + +/// Test cases for strict resolvers. +// [(base, [(reference, output, Option<output_normalized>)])] +#[allow(clippy::type_complexity)] +const TEST_CASES: &[(&str, &[(&str, &str, Option<&str>)])] = &[ + // RFC 3986, section 5.2.4. + ("scheme:///a/b/c/./../../", &[("g", "scheme:///a/g", None)]), + ("scheme:///a/b/c/./../", &[("../g", "scheme:///a/g", None)]), + ("scheme:///a/b/c/./", &[("../../g", "scheme:///a/g", None)]), + ("scheme:///a/b/c/", &[("./../../g", "scheme:///a/g", None)]), + ("scheme:///a/b/", &[("c/./../../g", "scheme:///a/g", None)]), + ("scheme:///a/", &[("b/c/./../../g", "scheme:///a/g", None)]), + ("scheme:///", &[("a/b/c/./../../g", "scheme:///a/g", None)]), + ("scheme:mid/content=5/../", &[("6", "scheme:mid/6", None)]), + ("scheme:mid/content=5/", &[("../6", "scheme:mid/6", None)]), + ("scheme:mid/", &[("content=5/../6", "scheme:mid/6", None)]), + ("scheme:", &[("mid/content=5/../6", "scheme:mid/6", None)]), + // RFC 3986, section 5.4.1. + ( + "http://a/b/c/d;p?q", + &[ + ("g:h", "g:h", None), + ("g", "http://a/b/c/g", None), + ("./g", "http://a/b/c/g", None), + ("g/", "http://a/b/c/g/", None), + ("/g", "http://a/g", None), + ("//g", "http://g", None), + ("?y", "http://a/b/c/d;p?y", None), + ("g?y", "http://a/b/c/g?y", None), + ("#s", "http://a/b/c/d;p?q#s", None), + ("g#s", "http://a/b/c/g#s", None), + ("g?y#s", "http://a/b/c/g?y#s", None), + (";x", "http://a/b/c/;x", None), + ("g;x", "http://a/b/c/g;x", None), + ("g;x?y#s", "http://a/b/c/g;x?y#s", None), + ("", "http://a/b/c/d;p?q", None), + (".", "http://a/b/c/", None), + ("./", "http://a/b/c/", None), + ("..", "http://a/b/", None), + ("../", "http://a/b/", None), + ("../g", "http://a/b/g", None), + ("../..", "http://a/", None), + ("../../", "http://a/", None), + ("../../g", "http://a/g", None), + ], + ), + // RFC 3986, section 5.4.2. + ( + "http://a/b/c/d;p?q", + &[ + ("../../../g", "http://a/g", None), + ("../../../../g", "http://a/g", None), + ("/./g", "http://a/g", None), + ("/../g", "http://a/g", None), + ("g.", "http://a/b/c/g.", None), + (".g", "http://a/b/c/.g", None), + ("g..", "http://a/b/c/g..", None), + ("..g", "http://a/b/c/..g", None), + ("./../g", "http://a/b/g", None), + ("./g/.", "http://a/b/c/g/", None), + ("g/./h", "http://a/b/c/g/h", None), + ("g/../h", "http://a/b/c/h", None), + ("g;x=1/./y", "http://a/b/c/g;x=1/y", None), + ("g;x=1/../y", "http://a/b/c/y", None), + ("g?y/./x", "http://a/b/c/g?y/./x", None), + ("g?y/../x", "http://a/b/c/g?y/../x", None), + ("g#s/./x", "http://a/b/c/g#s/./x", None), + ("g#s/../x", "http://a/b/c/g#s/../x", None), + ("http:g", "http:g", None), + ], + ), + // Custom cases. + ( + "http://a/b/c/d/e/../..", + &[ + // `/a/b/c/d/e/../..` but without dot segments removal. + ("", "http://a/b/c/d/e/../..", Some("http://a/b/c/")), + // `/a/b/c/d/e/../..` + ("..", "http://a/b/c/", None), + // `/a/b/c/d/e/../../` + ("../", "http://a/b/c/", None), + // `/a/b/c/d/e/../.` + (".", "http://a/b/c/d/", None), + // `/a/b/c/d/e/.././` + ("./", "http://a/b/c/d/", None), + // `/a/b/c/d/e/../..?query` but without dot segments removal. + ( + "?query", + "http://a/b/c/d/e/../..?query", + Some("http://a/b/c/?query"), + ), + // `/a/b/c/d/e/../..#frag` but without dot segments removal. + ( + "#frag", + "http://a/b/c/d/e/../..#frag", + Some("http://a/b/c/#frag"), + ), + // If the authority is specified, paths won't be merged. + ("http://example.com", "http://example.com", None), + ("http://example.com/", "http://example.com/", None), + // If the path of the reference is not empty, remove_dot_segments is applied. + ("http://example.com/..", "http://example.com/", None), + // If the scheme is specified, paths won't be merged. + ("scheme:", "scheme:", None), + ("scheme:foo#frag", "scheme:foo#frag", None), + ], + ), + // Custom cases. + ( + "https://a/b/c", + &[ + ("", "https://a/b/c", None), + ("x/", "https://a/b/x/", None), + ("x//", "https://a/b/x//", None), + ("x///", "https://a/b/x///", None), + ("x//y", "https://a/b/x//y", None), + ("x//y/", "https://a/b/x//y/", None), + ("x//y//", "https://a/b/x//y//", None), + // `/b/x//..//y//`. + // STEP OUTPUT BUFFER INPUT BUFFER + // 1 : /b/x//..//y// + // 2E: /b /x//..//y// + // 2E: /b/x //..//y// + // 2E: /b/x/ /..//y// + // 2C: /b/x //y// + // 2E: /b/x/ /y// + // 2E: /b/x//y // + // 2E: /b/x//y/ / + // 2E: /b/x//y// + ("x//..//y//", "https://a/b/x//y//", None), + ], + ), + // Custom cases. + ( + "scheme:a/b/c", + &[ + ("", "scheme:a/b/c", None), + ("x/", "scheme:a/b/x/", None), + ("x//", "scheme:a/b/x//", None), + ("x///", "scheme:a/b/x///", None), + ("x//y", "scheme:a/b/x//y", None), + ("x//y/", "scheme:a/b/x//y/", None), + // `a/b/x//..//y//`. + // STEP OUTPUT BUFFER INPUT BUFFER + // 1 : a/b/x//..//y// + // 2E: a /b/x//..//y// + // 2E: a/b /x//..//y// + // 2E: a/b/x //..//y// + // 2E: a/b/x/ /..//y// + // 2C: a/b/x //y// + // 2E: a/b/x/ /y// + // 2E: a/b/x//y // + // 2E: a/b/x//y/ / + // 2E: a/b/x//y// + ("x//..//y//", "scheme:a/b/x//y//", None), + ], + ), + // Custom cases. + ( + "scheme:a", + &[ + // `x/../..`. + // STEP OUTPUT BUFFER INPUT BUFFER + // 1 : x/../.. + // 2E: x /../.. + // 2C: /.. + // 2C: / + // 2E: / + ("x/../..", "scheme:/", None), + // `x/../../y`. + // STEP OUTPUT BUFFER INPUT BUFFER + // 1 : x/../../y + // 2E: x /../../y + // 2C: /../y + // 2C: /y + // 2E: /y + ("x/../../y", "scheme:/y", None), + ], + ), + // Custom cases. + // Empty base path should be considered as `/` when the base authority is present. + ( + "scheme://host", + &[ + ("", "scheme://host", None), + (".", "scheme://host/", None), + ("..", "scheme://host/", None), + ("foo", "scheme://host/foo", None), + ], + ), + // Custom cases. + ( + "HTTP://USER:PASS@EXAMPLE.COM:80/1/2/3/4/.././5/../6/?QUERY", + &[( + "A/b/c/d/e/f/g/h/i/../../../j/k/l/../../../../m/n/./o", + "HTTP://USER:PASS@EXAMPLE.COM:80/1/2/3/6/A/b/c/d/e/m/n/o", + Some("http://USER:PASS@example.com:80/1/2/3/6/A/b/c/d/e/m/n/o"), + )], + ), + ( + "HTTP://USER:PASS@EXAMPLE.COM:/%7e/2/beta=%CE%B2/4/.././5/../6/", + &[( + "a/b/alpha=%CE%B1/d/e/f/g/h/i/../../../j/k/l/../../../../%3c/%7e/./%3e?query#fragment", + "HTTP://USER:PASS@EXAMPLE.COM:/%7e/2/beta=%CE%B2/6/a/b/alpha=%CE%B1/d/e/%3c/%7e/%3e?query#fragment", + Some("http://USER:PASS@example.com/~/2/beta=\u{03B2}/6/a/b/alpha=\u{03B1}/d/e/%3C/~/%3E?query#fragment") + )], + ), + ( + "http://user:pass@example.com:/%7e/2/beta=%ce%b2/4/.././5/../6/", + &[( + "a/b/alpha=%ce%b1/d/e/f/g/h/i/../../../j/k/l/../../../../%3c/%7e/./%3e?query#fragment", + "http://user:pass@example.com:/%7e/2/beta=%ce%b2/6/a/b/alpha=%ce%b1/d/e/%3c/%7e/%3e?query#fragment", + Some("http://user:pass@example.com/~/2/beta=\u{03B2}/6/a/b/alpha=\u{03B1}/d/e/%3C/~/%3E?query#fragment") + )], + ), +]; + +#[test] +fn resolve() { + for (base, pairs) in TEST_CASES { + let base = IriAbsoluteStr::new(base).expect("should be valid base IRI"); + + for (target, expected, _normalized_expected) in *pairs { + let target = IriReferenceStr::new(target).expect("should be valid IRI reference"); + let resolved = target.resolve_against(base); + assert_eq_display!(resolved, expected, "base={base:?}, target={target:?}"); + + #[cfg(feature = "alloc")] + assert_eq!( + resolved.to_dedicated_string().as_str(), + *expected, + "base={base:?}, target={target:?}" + ); + } + } +} + +#[test] +fn resolve_normalize() { + for (base, pairs) in TEST_CASES { + let base = IriAbsoluteStr::new(base).expect("should be valid base IRI"); + + for (target, expected, expected_normalized) in *pairs { + let target = IriReferenceStr::new(target).expect("should be valid IRI reference"); + let resolved_normalized = target.resolve_against(base).and_normalize(); + let expected = expected_normalized.unwrap_or(*expected); + assert_eq_display!( + resolved_normalized, + expected, + "base={base:?}, target={target:?}" + ); + + #[cfg(feature = "alloc")] + assert_eq!( + resolved_normalized.to_dedicated_string().as_str(), + expected, + "base={base:?}, target={target:?}" + ); + } + } +} + +#[test] +fn fixed_base_resolver() { + for (base, pairs) in TEST_CASES { + let base = IriAbsoluteStr::new(base).expect("should be valid base IRI"); + let resolver = FixedBaseResolver::new(base); + + for (target, expected, _normalized_expected) in *pairs { + let target = IriReferenceStr::new(target).expect("should be valid IRI reference"); + let resolved = resolver.resolve(target); + + assert_eq_display!(resolved, expected, "base={base:?}, target={target:?}"); + #[cfg(feature = "alloc")] + assert_eq!( + resolved.to_dedicated_string().as_str(), + *expected, + "base={base:?}, target={target:?}" + ); + } + } +} + +#[cfg(feature = "alloc")] +#[test] +fn same_result_as_reference_impl() { + for (base, pairs) in TEST_CASES { + let base = IriAbsoluteStr::new(base).expect("should be valid base IRI"); + + for (target, expected, _normalized_expected) in *pairs { + let target = IriReferenceStr::new(target).expect("should be valid IRI reference"); + let resolved = target.resolve_against(base).to_dedicated_string(); + + let expected_refimpl = resolve_refimpl(target, base); + assert_eq!( + *expected, expected_refimpl, + "base={base:?}, target={target:?}" + ); + assert_eq!( + resolved, expected_refimpl, + "base={base:?}, target={target:?}" + ); + } + } +} + +#[test] +fn percent_encoded_dots() { + // [(base, ref, result)] + const TEST_CASES: &[(&str, &str, &str)] = &[ + ("scheme:", ".", "scheme:"), + ("scheme:", "%2e", "scheme:"), + ("scheme:", "%2E", "scheme:"), + ("scheme://a", ".", "scheme://a/"), + ("scheme://a", "%2e", "scheme://a/"), + ("scheme://a", "%2E", "scheme://a/"), + ("scheme://a/b/c", ".", "scheme://a/b/"), + ("scheme://a/b/c", "%2e", "scheme://a/b/"), + ("scheme://a/b/c", "%2E", "scheme://a/b/"), + ("scheme://a/b/c", "./g", "scheme://a/b/g"), + ("scheme://a/b/c", "%2e/g", "scheme://a/b/g"), + ("scheme://a/b/c", "%2E/g", "scheme://a/b/g"), + ("scheme://a/b/c/d/e/f", "../../../g", "scheme://a/b/g"), + ( + "scheme://a/b/c/d/e/f", + "%2E%2E/%2E%2e/%2E./g", + "scheme://a/b/g", + ), + ( + "scheme://a/b/c/d/e/f", + "%2e%2E/%2e%2e/%2e./g", + "scheme://a/b/g", + ), + ("scheme://a/b/c/d/e/f", ".%2E/.%2e/../g", "scheme://a/b/g"), + ]; + + for (base, reference, expected) in TEST_CASES { + let base = IriAbsoluteStr::new(base).expect("should be valid base IRI"); + let reference = IriReferenceStr::new(reference).expect("should be valid IRI reference"); + + let resolved = reference.resolve_against(base); + assert_eq_display!(resolved, *expected); + #[cfg(feature = "alloc")] + assert_eq!(resolved.to_dedicated_string(), *expected); + } +} + +#[test] +fn write_to_slice_dont_require_extra_capacity() { + let mut buf = [0_u8; 128]; + + for (base, pairs) in TEST_CASES { + let base = IriAbsoluteStr::new(base).expect("should be valid base IRI"); + let resolver = FixedBaseResolver::new(base); + + for (target, expected, _normalized_expected) in *pairs { + if expected.is_empty() { + continue; + } + + let target = IriReferenceStr::new(target).expect("should be valid IRI reference"); + let resolved = resolver.resolve(target); + + let result_small = write_to_slice(&mut buf[..expected.len() - 1], &resolved); + assert!(result_small.is_err(), "should fail due to too small buffer"); + + let result_enough = write_to_slice(&mut buf[..expected.len()], &resolved); + assert!(result_enough.is_ok(), "buffer should have enough size"); + assert_eq!( + result_enough.unwrap(), + *expected, + "correct result should be written" + ); + } + } +} + +#[test] +fn resolution_result_live_longer_than_fixed_base_resolver() { + let mut buf = [0_u8; 128]; + + let base = IriAbsoluteStr::new("http://example.com/").expect("should be valid base IRI"); + let reference = IriReferenceStr::new("foo/bar").expect("should be valid IRI reference"); + + let resolved = { + let resolver = FixedBaseResolver::new(base); + resolver.resolve(reference) + }; + // Note that `the result of `resolver.resolve()` is still alive here. + let result = write_to_slice(&mut buf, &resolved).expect("`buf` should have enough capacity"); + assert_eq!(result, "http://example.com/foo/bar"); +} + +#[test] +fn uri_resolution_against_self_with_normalization() { + for case in components::TEST_CASES + .iter() + .filter(|case| case.is_uri_class() && case.is_absolute()) + { + let reference = UriStr::new(case.composed).expect("should be valid URI"); + let resolved_normalized = AsRef::<UriReferenceStr>::as_ref(reference) + .resolve_against(reference.to_absolute()) + .and_normalize(); + + assert_eq_display!(resolved_normalized, case.normalized_uri, "case={case:#?}"); + #[cfg(feature = "alloc")] + assert_eq!( + resolved_normalized.to_string(), + case.normalized_uri, + "case={case:#?}" + ); + #[cfg(feature = "alloc")] + assert_eq!( + resolved_normalized.to_dedicated_string(), + case.normalized_uri, + "case={case:#?}" + ); + + assert_eq!( + case.is_rfc3986_normalizable(), + resolved_normalized.ensure_rfc3986_normalizable().is_ok(), + "case={case:#?}" + ); + } +} + +#[test] +fn iri_resolution_against_self_with_normalization() { + for case in components::TEST_CASES + .iter() + .filter(|case| case.is_iri_class() && case.is_absolute()) + { + let reference = IriStr::new(case.composed).expect("should be valid IRI"); + let resolved_normalized = AsRef::<IriReferenceStr>::as_ref(reference) + .resolve_against(reference.to_absolute()) + .and_normalize(); + + assert_eq_display!(resolved_normalized, case.normalized_iri, "case={case:#?}"); + #[cfg(feature = "alloc")] + assert_eq!( + resolved_normalized.to_string(), + case.normalized_iri, + "case={case:#?}" + ); + #[cfg(feature = "alloc")] + assert_eq!( + resolved_normalized.to_dedicated_string(), + case.normalized_iri, + "case={case:#?}" + ); + + assert_eq!( + case.is_rfc3986_normalizable(), + resolved_normalized.ensure_rfc3986_normalizable().is_ok(), + "case={case:#?}" + ); + } +} diff --git a/vendor/iri-string/tests/resolve_refimpl/mod.rs b/vendor/iri-string/tests/resolve_refimpl/mod.rs new file mode 100644 index 00000000..b84db693 --- /dev/null +++ b/vendor/iri-string/tests/resolve_refimpl/mod.rs @@ -0,0 +1,179 @@ +//! Reference implementation based on RFC 3986 section 5. +#![cfg(feature = "alloc")] + +extern crate alloc; + +use alloc::format; +#[cfg(not(feature = "std"))] +use alloc::string::String; + +use iri_string::spec::Spec; +use iri_string::types::{RiAbsoluteStr, RiReferenceStr, RiString}; + +fn to_major_components<S: Spec>( + s: &RiReferenceStr<S>, +) -> (Option<&str>, Option<&str>, &str, Option<&str>, Option<&str>) { + ( + s.scheme_str(), + s.authority_str(), + s.path_str(), + s.query().map(|s| s.as_str()), + s.fragment().map(|s| s.as_str()), + ) +} + +/// Resolves the relative IRI. +/// +/// See <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.2.2>. +pub(super) fn resolve<S: Spec>( + reference: &RiReferenceStr<S>, + base: &RiAbsoluteStr<S>, +) -> RiString<S> { + let (r_scheme, r_authority, r_path, r_query, r_fragment) = to_major_components(reference); + let (b_scheme, b_authority, b_path, b_query, _) = to_major_components(base.as_ref()); + + let t_scheme: &str; + let t_authority: Option<&str>; + let t_path: String; + let t_query: Option<&str>; + + if let Some(r_scheme) = r_scheme { + t_scheme = r_scheme; + t_authority = r_authority; + t_path = remove_dot_segments(r_path.into()); + t_query = r_query; + } else { + if r_authority.is_some() { + t_authority = r_authority; + t_path = remove_dot_segments(r_path.into()); + t_query = r_query; + } else { + if r_path.is_empty() { + t_path = b_path.into(); + if r_query.is_some() { + t_query = r_query; + } else { + t_query = b_query; + } + } else { + if r_path.starts_with('/') { + t_path = remove_dot_segments(r_path.into()); + } else { + t_path = remove_dot_segments(merge(b_path, r_path, b_authority.is_some())); + } + t_query = r_query; + } + t_authority = b_authority; + } + t_scheme = b_scheme.expect("non-relative IRI must have a scheme"); + } + let t_fragment: Option<&str> = r_fragment; + + let s = recompose(t_scheme, t_authority, &t_path, t_query, t_fragment); + RiString::<S>::try_from(s).expect("resolution result must be a valid IRI") +} + +/// Merges the two paths. +/// +/// See <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.2.3>. +fn merge(base_path: &str, ref_path: &str, base_authority_defined: bool) -> String { + if base_authority_defined && base_path.is_empty() { + format!("/{}", ref_path) + } else { + let base_path_end = base_path.rfind('/').map_or(0, |s| s + 1); + format!("{}{}", &base_path[..base_path_end], ref_path) + } +} + +/// Removes dot segments from the path. +/// +/// See <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.2.4>. +fn remove_dot_segments(mut input: String) -> String { + let mut output = String::new(); + while !input.is_empty() { + if input.starts_with("../") { + // 2A. + input.drain(..3); + } else if input.starts_with("./") { + // 2A. + input.drain(..2); + } else if input.starts_with("/./") { + // 2B. + input.replace_range(..3, "/"); + } else if input == "/." { + // 2B. + input.replace_range(..2, "/"); + } else if input.starts_with("/../") { + // 2C. + input.replace_range(..4, "/"); + remove_last_segment_and_preceding_slash(&mut output); + } else if input == "/.." { + // 2C. + input.replace_range(..3, "/"); + remove_last_segment_and_preceding_slash(&mut output); + } else if input == "." { + // 2D. + input.drain(..1); + } else if input == ".." { + // 2D. + input.drain(..2); + } else { + // 2E. + let first_seg_end = if let Some(after_slash) = input.strip_prefix('/') { + // `+1` is the length of the initial slash. + after_slash + .find('/') + .map_or_else(|| input.len(), |pos| pos + 1) + } else { + input.find('/').unwrap_or(input.len()) + }; + output.extend(input.drain(..first_seg_end)); + } + } + + output +} + +/// Removes the last path segment and the preceding slash if any. +/// +/// See <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.2.4>, +/// step 2C. +fn remove_last_segment_and_preceding_slash(output: &mut String) { + match output.rfind('/') { + Some(slash_pos) => { + output.drain(slash_pos..); + } + None => output.clear(), + } +} + +/// Recomposes the components. +/// +/// See <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.3>. +fn recompose( + scheme: &str, + authority: Option<&str>, + path: &str, + query: Option<&str>, + fragment: Option<&str>, +) -> String { + let mut result = String::new(); + + result.push_str(scheme); + result.push(':'); + if let Some(authority) = authority { + result.push_str("//"); + result.push_str(authority); + } + result.push_str(path); + if let Some(query) = query { + result.push('?'); + result.push_str(query); + } + if let Some(fragment) = fragment { + result.push('#'); + result.push_str(fragment); + } + + result +} diff --git a/vendor/iri-string/tests/serde.rs b/vendor/iri-string/tests/serde.rs new file mode 100644 index 00000000..571d4b93 --- /dev/null +++ b/vendor/iri-string/tests/serde.rs @@ -0,0 +1,99 @@ +//! Serde test. +#![cfg(feature = "serde")] + +use serde_test::{assert_tokens, Token}; + +use iri_string::types::*; + +mod utils; + +macro_rules! define_tests { + ($positive:ident, $negative:ident, ($spec:ident, $kind:ident), $slice:ty, $owned:ty,) => { + define_tests! { + @positive, + $positive, + ($spec, $kind), + $slice, + $owned, + } + }; + (@positive, $name:ident, ($spec:ident, $kind:ident), $slice:ty, $owned:ty,) => { + #[test] + fn $name() { + for raw in utils::positive(utils::Spec::$spec, utils::Kind::$kind) { + let s = <$slice>::new(raw).expect("Should not fail: valid string"); + assert_tokens(&s, &[Token::BorrowedStr(raw)]); + + #[cfg(all(feature = "serde", feature = "alloc"))] + { + let s = s.to_owned(); + assert_tokens(&s, &[Token::BorrowedStr(raw)]); + } + } + } + }; +} + +define_tests! { + uri, + not_uri, + (Uri, Normal), + UriStr, + UriString, +} + +define_tests! { + uri_absolute, + not_uri_absolute, + (Uri, Absolute), + UriAbsoluteStr, + UriAbsoluteString, +} + +define_tests! { + uri_reference, + not_uri_reference, + (Uri, Reference), + UriReferenceStr, + UriReferenceString, +} + +define_tests! { + uri_relative, + not_uri_relative, + (Uri, Relative), + UriRelativeStr, + UriRelativeString, +} + +define_tests! { + iri, + not_iri, + (Iri, Normal), + IriStr, + IriString, +} + +define_tests! { + iri_absolute, + not_iri_absolute, + (Iri, Absolute), + IriAbsoluteStr, + IriAbsoluteString, +} + +define_tests! { + iri_reference, + not_iri_reference, + (Iri, Reference), + IriReferenceStr, + IriReferenceString, +} + +define_tests! { + iri_relative, + not_iri_relative, + (Iri, Relative), + IriRelativeStr, + IriRelativeString, +} diff --git a/vendor/iri-string/tests/string_types_interop.rs b/vendor/iri-string/tests/string_types_interop.rs new file mode 100644 index 00000000..f7fd78e4 --- /dev/null +++ b/vendor/iri-string/tests/string_types_interop.rs @@ -0,0 +1,342 @@ +//! Conversions between types. + +use iri_string::types::*; + +fn assert_convertible<T>(source: &str) +where + T: ?Sized + PartialEq<str> + core::fmt::Debug, + for<'a> &'a T: TryFrom<&'a str>, + for<'a> <&'a T as TryFrom<&'a str>>::Error: core::fmt::Debug, +{ + match <&T>::try_from(source) { + Ok(parsed) => assert_eq!(parsed, source), + Err(e) => panic!("should be convertible: source={:?}: {:?}", source, e), + } +} + +fn assert_non_convertible<T>(source: &str) +where + T: ?Sized + PartialEq<str> + core::fmt::Debug, + for<'a> &'a T: TryFrom<&'a str>, + for<'a> <&'a T as TryFrom<&'a str>>::Error: core::fmt::Debug, +{ + if let Ok(parsed) = <&T>::try_from(source) { + panic!( + "should not be convertible: source={:?}, parsed={:?}", + source, parsed + ); + } +} + +#[test] +fn rfc3986_uris_absolute_without_fragment() { + const URIS: &[&str] = &[ + // RFC 3986 itself. + "https://tools.ietf.org/html/rfc3986", + "https://datatracker.ietf.org/doc/html/rfc3986", + // RFC 3986 section 1.1.2. + "ftp://ftp.is.co.za/rfc/rfc1808.txt", + "http://www.ietf.org/rfc/rfc2396.txt", + "ldap://[2001:db8::7]/c=GB?objectClass?one", + "mailto:John.Doe@example.com", + "news:comp.infosystems.www.servers.unix", + "tel:+1-816-555-1212", + "telnet://192.0.2.16:80/", + "urn:oasis:names:specification:docbook:dtd:xml:4.1.2", + // RFC 3986 section 3. + "urn:example:animal:ferret:nose", + // RFC 3986 section 3.3. + "mailto:fred@example.com", + "foo://info.example.com?fred", + // RFC 3986 section 5.4. + "http://a/b/c/d;p?q", + // RFC 3986 section 5.4.1. + "g:h", + "http://a/b/c/g", + "http://a/b/c/g/", + "http://a/g", + "http://g", + "http://a/b/c/d;p?y", + "http://a/b/c/g?y", + "http://a/b/c/;x", + "http://a/b/c/g;x", + "http://a/b/c/d;p?q", + "http://a/b/c/", + "http://a/b/", + "http://a/b/g", + "http://a/", + // RFC 3986 section 5.4.2. + "http://a/b/c/g.", + "http://a/b/c/.g", + "http://a/b/c/g..", + "http://a/b/c/..g", + "http://a/b/c/g/h", + "http://a/b/c/h", + "http://a/b/c/g;x=1/y", + "http://a/b/c/y", + "http://a/b/c/g?y/./x", + "http://a/b/c/g?y/../x", + // RFC 3986 section 6.2.2. + "example://a/b/c/%7Bfoo%7D", + "eXAMPLE://a/./b/../b/%63/%7bfoo%7d", + // RFC 3986 section 6.2.2.1. + "HTTP://www.EXAMPLE.com/", + "http://www.example.com/", + // RFC 3986 section 6.2.3. + "http://example.com", + "http://example.com/", + "http://example.com:/", + "http://example.com:80/", + "http://example.com/?", + "mailto:Joe@Example.COM", + "mailto:Joe@example.com", + // RFC 3986 section 6.2.4. + "http://example.com/data", + "http://example.com/data/", + "ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm", + // RFC 3986 section Appendix C. + "http://www.w3.org/Addressing/", + "ftp://foo.example.com/rfc/", + // RFC 3987 itself. + "https://tools.ietf.org/html/rfc3987", + "https://datatracker.ietf.org/doc/html/rfc3987", + // RFC 3987 section 3.1. + "http://xn--rsum-bpad.example.org", + "http://r%C3%A9sum%C3%A9.example.org", + // RFC 3987 section 3.2. + "http://example.com/%F0%90%8C%80%F0%90%8C%81%F0%90%8C%82", + // RFC 3987 section 3.2.1. + "http://www.example.org/r%C3%A9sum%C3%A9.html", + "http://www.example.org/r%E9sum%E9.html", + "http://www.example.org/D%C3%BCrst", + "http://www.example.org/D%FCrst", + "http://xn--99zt52a.example.org/%e2%80%ae", + "http://xn--99zt52a.example.org/%E2%80%AE", + // RFC 3987 section 4.4. + "http://ab.CDEFGH.ij/kl/mn/op.html", + "http://ab.CDE.FGH/ij/kl/mn/op.html", + "http://AB.CD.ef/gh/IJ/KL.html", + "http://ab.cd.EF/GH/ij/kl.html", + "http://ab.CD.EF/GH/IJ/kl.html", + "http://ab.CDE123FGH.ij/kl/mn/op.html", + "http://ab.cd.ef/GH1/2IJ/KL.html", + "http://ab.cd.ef/GH%31/%32IJ/KL.html", + "http://ab.CDEFGH.123/kl/mn/op.html", + // RFC 3987 section 5.3.2. + "eXAMPLE://a/./b/../b/%63/%7bfoo%7d/ros%C3%A9", + // RFC 3987 section 5.3.2.1. + "HTTP://www.EXAMPLE.com/", + "http://www.example.com/", + // RFC 3987 section 5.3.2.3. + "http://example.org/~user", + "http://example.org/%7euser", + "http://example.org/%7Euser", + // RFC 3987 section 5.3.3. + "http://example.com", + "http://example.com/", + "http://example.com:/", + "http://example.com:80/", + //"http://xn--rsum-bpad.example.org", // duplicate + // RFC 3987 section 5.3.4. + "http://example.com/data", + "http://example.com/data/", + // RFC 3987 section 6.4. + //"http://www.example.org/r%C3%A9sum%C3%A9.html", // duplicate + //"http://www.example.org/r%E9sum%E9.html", // duplicate + ]; + for uri in URIS { + assert_convertible::<IriReferenceStr>(uri); + assert_convertible::<UriReferenceStr>(uri); + assert_convertible::<IriStr>(uri); + assert_convertible::<UriStr>(uri); + assert_convertible::<IriAbsoluteStr>(uri); + assert_convertible::<UriAbsoluteStr>(uri); + assert_non_convertible::<IriRelativeStr>(uri); + assert_non_convertible::<UriRelativeStr>(uri); + } +} + +#[test] +fn rfc3986_uris_absolute_with_fragment() { + const URIS: &[&str] = &[ + // RFC 3986 section 3. + "foo://example.com:8042/over/there?name=ferret#nose", + // RFC 3986 section 5.4.1. + "http://a/b/c/d;p?q#s", + "http://a/b/c/g#s", + "http://a/b/c/g?y#s", + "http://a/b/c/g;x?y#s", + // RFC 3986 section 5.4.2. + "http://a/b/c/g#s/./x", + "http://a/b/c/g#s/../x", + // RFC 3986 section Appendix B. + "http://www.ics.uci.edu/pub/ietf/uri/#Related", + // RFC 3986 section Appendix C. + "http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING", + // RFC 3987 section 3.1. + "http://www.example.org/red%09ros%C3%A9#red", + // RFC 3987 section 4.4. + "http://AB.CD.EF/GH/IJ/KL?MN=OP;QR=ST#UV", + ]; + for uri in URIS { + assert_convertible::<IriReferenceStr>(uri); + assert_convertible::<UriReferenceStr>(uri); + assert_convertible::<IriStr>(uri); + assert_convertible::<UriStr>(uri); + assert_non_convertible::<IriAbsoluteStr>(uri); + assert_non_convertible::<UriAbsoluteStr>(uri); + assert_non_convertible::<IriRelativeStr>(uri); + assert_non_convertible::<UriRelativeStr>(uri); + } +} + +#[test] +fn rfc3986_uris_relative() { + const URIS: &[&str] = &[ + // RFC 3986 section 5.4.1. + "g", + "./g", + "g/", + "/g", + "//g", + "?y", + "g?y", + "#s", + "g#s", + "g?y#s", + ";x", + "g;x", + "g;x?y#s", + "", + ".", + "./", + "..", + "../", + "../g", + "../..", + "../../", + "../../g", + // RFC 3986 section 5.4.2. + "/./g", + "/../g", + "g.", + ".g", + "g..", + "..g", + "./../g", + "./g/.", + "g/./h", + "g/../h", + "g;x=1/./y", + "g;x=1/../y", + "g?y/./x", + "g?y/../x", + "g#s/./x", + "g#s/../x", + ]; + for uri in URIS { + assert_convertible::<IriReferenceStr>(uri); + assert_convertible::<UriReferenceStr>(uri); + assert_non_convertible::<IriStr>(uri); + assert_non_convertible::<UriStr>(uri); + assert_non_convertible::<IriAbsoluteStr>(uri); + assert_non_convertible::<UriAbsoluteStr>(uri); + assert_convertible::<IriRelativeStr>(uri); + assert_convertible::<UriRelativeStr>(uri); + } +} + +#[test] +fn rfc3987_iris_absolute_without_fragment() { + const URIS: &[&str] = &[ + // RFC 3987 section 3.1. + "http://r\u{E9}sum\u{E9}.example.org", + // RFC 3987 section 3.2. + "http://example.com/\u{10300}\u{10301}\u{10302}", + "http://www.example.org/D\u{FC}rst", + "http://\u{7D0D}\u{8C46}.example.org/%E2%80%AE", + // RFC 3987 section 5.2. + "http://example.org/ros\u{E9}", + // RFC 3987 section 5.3.2. + "example://a/b/c/%7Bfoo%7D/ros\u{E9}", + // RFC 3987 section 5.3.2.2. + "http://www.example.org/r\u{E9}sum\u{E9}.html", + "http://www.example.org/re\u{301}sume\u{301}.html", + // RFC 3987 section 5.3.3. + //"http://r\u{E9}sum\u{E9}.example.org", // duplicate + // RFC 3987 section 6.4. + //"http://www.example.org/r\u{E9}sum\u{E9}.html", // duplicate + ]; + for uri in URIS { + assert_convertible::<IriReferenceStr>(uri); + assert_non_convertible::<UriReferenceStr>(uri); + assert_convertible::<IriStr>(uri); + assert_non_convertible::<UriStr>(uri); + assert_convertible::<IriAbsoluteStr>(uri); + assert_non_convertible::<UriAbsoluteStr>(uri); + assert_non_convertible::<IriRelativeStr>(uri); + assert_non_convertible::<UriRelativeStr>(uri); + } +} + +#[test] +fn rfc3987_iris_absolute_with_fragment() { + const URIS: &[&str] = &[ + // RFC 3987 section 6.4. + "http://www.example.org/r%E9sum%E9.xml#r\u{E9}sum\u{E9}", + ]; + for uri in URIS { + assert_convertible::<IriReferenceStr>(uri); + assert_non_convertible::<UriReferenceStr>(uri); + assert_convertible::<IriStr>(uri); + assert_non_convertible::<UriStr>(uri); + assert_non_convertible::<IriAbsoluteStr>(uri); + assert_non_convertible::<UriAbsoluteStr>(uri); + assert_non_convertible::<IriRelativeStr>(uri); + assert_non_convertible::<UriRelativeStr>(uri); + } +} + +#[test] +fn test_invalid_char() { + const URIS: &[&str] = &[ + "##", // Fragment cannot have `#`. + "<", // `<` cannot appear in an IRI reference. + ">", // `>` cannot appear in an IRI reference. + ]; + for uri in URIS { + assert_non_convertible::<IriReferenceStr>(uri); + assert_non_convertible::<UriReferenceStr>(uri); + assert_non_convertible::<IriStr>(uri); + assert_non_convertible::<UriStr>(uri); + assert_non_convertible::<IriAbsoluteStr>(uri); + assert_non_convertible::<UriAbsoluteStr>(uri); + assert_non_convertible::<IriRelativeStr>(uri); + assert_non_convertible::<UriRelativeStr>(uri); + } +} + +#[test] +fn invalid_percent_encoding() { + const URIS: &[&str] = &["%", "%0", "%0g", "%f", "%fg", "%g", "%g0", "%gf", "%gg"]; + for uri in URIS { + assert_non_convertible::<IriReferenceStr>(uri); + assert_non_convertible::<UriReferenceStr>(uri); + assert_non_convertible::<IriStr>(uri); + assert_non_convertible::<UriStr>(uri); + assert_non_convertible::<IriAbsoluteStr>(uri); + assert_non_convertible::<UriAbsoluteStr>(uri); + assert_non_convertible::<IriRelativeStr>(uri); + assert_non_convertible::<UriRelativeStr>(uri); + } +} + +#[test] +fn compare_different_types() +where + UriAbsoluteStr: PartialEq<IriReferenceStr>, + IriReferenceStr: PartialEq<UriAbsoluteStr>, + IriAbsoluteStr: PartialEq<UriReferenceStr>, + UriReferenceStr: PartialEq<IriAbsoluteStr>, +{ +} diff --git a/vendor/iri-string/tests/template.rs b/vendor/iri-string/tests/template.rs new file mode 100644 index 00000000..3c3e109d --- /dev/null +++ b/vendor/iri-string/tests/template.rs @@ -0,0 +1,404 @@ +//! Tests for URI template. +#![cfg(feature = "alloc")] + +#[macro_use] +mod utils; + +use std::cell::Cell; + +use iri_string::spec::UriSpec; +use iri_string::template::context::{Context, DynamicContext, Visitor}; +use iri_string::template::simple_context::{SimpleContext, Value}; +use iri_string::template::UriTemplateStr; + +/// Returns the context used by examples in RFC 6570 section 3.2. +fn rfc6570_context() -> SimpleContext { + let mut ctx = SimpleContext::new(); + ctx.insert( + "count", + Value::List(vec!["one".to_owned(), "two".to_owned(), "three".to_owned()]), + ); + ctx.insert( + "dom", + Value::List(vec!["example".to_owned(), "com".to_owned()]), + ); + ctx.insert("dub", Value::String("me/too".to_owned())); + ctx.insert("hello", Value::String("Hello World!".to_owned())); + ctx.insert("half", Value::String("50%".to_owned())); + ctx.insert("var", Value::String("value".to_owned())); + ctx.insert("who", Value::String("fred".to_owned())); + ctx.insert("base", Value::String("http://example.com/home/".to_owned())); + ctx.insert("path", Value::String("/foo/bar".to_owned())); + ctx.insert( + "list", + Value::List(vec![ + "red".to_owned(), + "green".to_owned(), + "blue".to_owned(), + ]), + ); + ctx.insert( + "keys", + Value::Assoc(vec![ + ("semi".to_owned(), ";".to_owned()), + ("dot".to_owned(), ".".to_owned()), + ("comma".to_owned(), ",".to_owned()), + ]), + ); + ctx.insert("v", Value::String("6".to_owned())); + ctx.insert("x", Value::String("1024".to_owned())); + ctx.insert("y", Value::String("768".to_owned())); + ctx.insert("empty", Value::String("".to_owned())); + ctx.insert("empty_keys", Value::Assoc(vec![])); + ctx.insert("undef", Value::Undefined); + + ctx +} + +/// Expression and expected expansion. +const SUCCESS_CASES: &[(&str, &str)] = &[ + // Section 3.2.1. Variable Expansion. + ("{count}", "one,two,three"), + ("{count*}", "one,two,three"), + ("{/count}", "/one,two,three"), + ("{/count*}", "/one/two/three"), + ("{;count}", ";count=one,two,three"), + ("{;count*}", ";count=one;count=two;count=three"), + ("{?count}", "?count=one,two,three"), + ("{?count*}", "?count=one&count=two&count=three"), + ("{&count*}", "&count=one&count=two&count=three"), + // Section 3.2.2. Simple String Expansion. + ("{var}", "value"), + ("{hello}", "Hello%20World%21"), + ("{half}", "50%25"), + ("O{empty}X", "OX"), + ("O{undef}X", "OX"), + ("{x,y}", "1024,768"), + ("{x,hello,y}", "1024,Hello%20World%21,768"), + ("?{x,empty}", "?1024,"), + ("?{x,undef}", "?1024"), + ("?{undef,y}", "?768"), + ("{var:3}", "val"), + ("{var:30}", "value"), + ("{list}", "red,green,blue"), + ("{list*}", "red,green,blue"), + ("{keys}", "semi,%3B,dot,.,comma,%2C"), + ("{keys*}", "semi=%3B,dot=.,comma=%2C"), + // Section 3.2.3. Reserved Expansion. + ("{+var}", "value"), + ("{+hello}", "Hello%20World!"), + ("{+half}", "50%25"), + ("{base}index", "http%3A%2F%2Fexample.com%2Fhome%2Findex"), + ("{+base}index", "http://example.com/home/index"), + ("O{+empty}X", "OX"), + ("O{+undef}X", "OX"), + ("{+path}/here", "/foo/bar/here"), + ("here?ref={+path}", "here?ref=/foo/bar"), + ("up{+path}{var}/here", "up/foo/barvalue/here"), + ("{+x,hello,y}", "1024,Hello%20World!,768"), + ("{+path,x}/here", "/foo/bar,1024/here"), + ("{+path:6}/here", "/foo/b/here"), + ("{+list}", "red,green,blue"), + ("{+list*}", "red,green,blue"), + ("{+keys}", "semi,;,dot,.,comma,,"), + ("{+keys*}", "semi=;,dot=.,comma=,"), + // Section 3.2.4. Fragment Expansion. + ("{#var}", "#value"), + ("{#hello}", "#Hello%20World!"), + ("{#half}", "#50%25"), + ("foo{#empty}", "foo#"), + ("foo{#undef}", "foo"), + ("{#x,hello,y}", "#1024,Hello%20World!,768"), + ("{#path,x}/here", "#/foo/bar,1024/here"), + ("{#path:6}/here", "#/foo/b/here"), + ("{#list}", "#red,green,blue"), + ("{#list*}", "#red,green,blue"), + ("{#keys}", "#semi,;,dot,.,comma,,"), + ("{#keys*}", "#semi=;,dot=.,comma=,"), + // Section 3.2.5. Label Expansion with Dot-Prefix. + ("{.who}", ".fred"), + ("{.who,who}", ".fred.fred"), + ("{.half,who}", ".50%25.fred"), + ("www{.dom*}", "www.example.com"), + ("X{.var}", "X.value"), + ("X{.empty}", "X."), + ("X{.undef}", "X"), + ("X{.var:3}", "X.val"), + ("X{.list}", "X.red,green,blue"), + ("X{.list*}", "X.red.green.blue"), + ("X{.keys}", "X.semi,%3B,dot,.,comma,%2C"), + ("X{.keys*}", "X.semi=%3B.dot=..comma=%2C"), + ("X{.empty_keys}", "X"), + ("X{.empty_keys*}", "X"), + // Section 3.2.6. Path Segment Expansion. + ("{/who}", "/fred"), + ("{/who,who}", "/fred/fred"), + ("{/half,who}", "/50%25/fred"), + ("{/who,dub}", "/fred/me%2Ftoo"), + ("{/var}", "/value"), + ("{/var,empty}", "/value/"), + ("{/var,undef}", "/value"), + ("{/var,x}/here", "/value/1024/here"), + ("{/var:1,var}", "/v/value"), + ("{/list}", "/red,green,blue"), + ("{/list*}", "/red/green/blue"), + ("{/list*,path:4}", "/red/green/blue/%2Ffoo"), + ("{/keys}", "/semi,%3B,dot,.,comma,%2C"), + ("{/keys*}", "/semi=%3B/dot=./comma=%2C"), + // Section 3.2.7. Path-Style Parameter Expansion. + ("{;who}", ";who=fred"), + ("{;half}", ";half=50%25"), + ("{;empty}", ";empty"), + ("{;v,empty,who}", ";v=6;empty;who=fred"), + ("{;v,bar,who}", ";v=6;who=fred"), + ("{;x,y}", ";x=1024;y=768"), + ("{;x,y,empty}", ";x=1024;y=768;empty"), + ("{;x,y,undef}", ";x=1024;y=768"), + ("{;hello:5}", ";hello=Hello"), + ("{;list}", ";list=red,green,blue"), + ("{;list*}", ";list=red;list=green;list=blue"), + ("{;keys}", ";keys=semi,%3B,dot,.,comma,%2C"), + ("{;keys*}", ";semi=%3B;dot=.;comma=%2C"), + // Section 3.2.8. Form-Style Query Expansion. + ("{?who}", "?who=fred"), + ("{?half}", "?half=50%25"), + ("{?x,y}", "?x=1024&y=768"), + ("{?x,y,empty}", "?x=1024&y=768&empty="), + ("{?x,y,undef}", "?x=1024&y=768"), + ("{?var:3}", "?var=val"), + ("{?list}", "?list=red,green,blue"), + ("{?list*}", "?list=red&list=green&list=blue"), + ("{?keys}", "?keys=semi,%3B,dot,.,comma,%2C"), + ("{?keys*}", "?semi=%3B&dot=.&comma=%2C"), + // Section 3.2.9. Form-Style Query Continuation. + ("{&who}", "&who=fred"), + ("{&half}", "&half=50%25"), + ("?fixed=yes{&x}", "?fixed=yes&x=1024"), + ("{&x,y,empty}", "&x=1024&y=768&empty="), + ("{&x,y,undef}", "&x=1024&y=768"), + ("{&var:3}", "&var=val"), + ("{&list}", "&list=red,green,blue"), + ("{&list*}", "&list=red&list=green&list=blue"), + ("{&keys}", "&keys=semi,%3B,dot,.,comma,%2C"), + ("{&keys*}", "&semi=%3B&dot=.&comma=%2C"), +]; + +/// Tests for examples in RFC 6570 section 3.2. +#[test] +fn rfc6570_section3_2() { + let context = rfc6570_context(); + + for (template, expected) in SUCCESS_CASES { + let template = UriTemplateStr::new(template).expect("must be valid template"); + let expanded = template + .expand::<UriSpec, _>(&context) + .expect("must not have variable type error"); + assert_eq_display!(expanded, expected, "template={template:?}"); + assert_eq!(expanded.to_string(), *expected, "template={template:?}"); + } +} + +#[test] +fn prefix_modifier_for_percent_encoded_content() { + let mut context = SimpleContext::new(); + context.insert("abcdef", "%61%62%63%64%65%66"); + // `%CE`, `%CE%B1`, `%B1`, `%CE`, `%CE%B2`, `%B2`. + context.insert("invalid1", "%CE%CE%B1%B1%CE%CE%B2%B2"); + // Each `%ff` is considered as an independent "character". + context.insert("invalid2", "%ff%ff%ff%ff%ff%ff"); + + // `&[(template, expected)]`. + const CASES: &[(&str, &str)] = &[ + ("{abcdef:4}", "%2561%25"), + ("{+abcdef:4}", "%61%62%63%64"), + ("{invalid1:2}", "%25C"), + ("{invalid1:4}", "%25CE%25"), + ("{+invalid1:2}", "%CE%CE%B1"), + ("{+invalid1:4}", "%CE%CE%B1%B1%CE"), + ("{invalid2:2}", "%25f"), + ("{invalid2:4}", "%25ff%25"), + ("{+invalid2:2}", "%ff%ff"), + ("{+invalid2:4}", "%ff%ff%ff%ff"), + ]; + + for (template, expected) in CASES { + let template = UriTemplateStr::new(template).expect("must be valid template"); + let expanded = template + .expand::<UriSpec, _>(&context) + .expect("must not have variable type error"); + assert_eq_display!(expanded, *expected, "template={template:?}"); + assert_eq!(expanded.to_string(), *expected, "template={template:?}"); + + let expanded_dynamic = template + .expand_dynamic_to_string::<UriSpec, _>(&mut context.clone()) + .expect("must not have variable type error"); + assert_eq!( + expanded_dynamic, *expected, + "dynamic, template={template:?}" + ); + } +} + +#[test] +fn incomplete_percent_encode() { + let mut context = SimpleContext::new(); + context.insert("incomplete1", "%ce%b1%"); + context.insert("incomplete2", "%ce%b1%c"); + context.insert("incomplete3", "%ce%b1%ce"); + + // `&[(template, expected)]`. + const CASES: &[(&str, &str)] = &[ + ("{incomplete1:1}", "%25"), + ("{incomplete1:2}", "%25c"), + ("{incomplete1:3}", "%25ce"), + ("{incomplete1:4}", "%25ce%25"), + ("{+incomplete1:1}", "%ce%b1"), + ("{+incomplete1:2}", "%ce%b1%25"), + ("{+incomplete2:1}", "%ce%b1"), + ("{+incomplete2:2}", "%ce%b1%25"), + ("{+incomplete2:3}", "%ce%b1%25c"), + ("{+incomplete3:1}", "%ce%b1"), + ("{+incomplete3:2}", "%ce%b1%ce"), + ("{+incomplete3:3}", "%ce%b1%ce"), + ]; + + for (template, expected) in CASES { + let template = UriTemplateStr::new(template).expect("must be valid template"); + let expanded = template + .expand::<UriSpec, _>(&context) + .expect("must not have variable type error"); + assert_eq_display!(expanded, *expected, "template={template:?}"); + assert_eq!(expanded.to_string(), *expected, "template={template:?}"); + + let expanded_dynamic = template + .expand_dynamic_to_string::<UriSpec, _>(&mut context.clone()) + .expect("must not have variable type error"); + assert_eq!( + expanded_dynamic, *expected, + "dynamic, template={template:?}" + ); + } +} + +#[test] +fn fragmented_write() { + use core::fmt; + + #[derive(Clone)] + enum Foo { + Incomplete1, + Incomplete2, + Incomplete3, + } + impl fmt::Display for Foo { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use core::fmt::Write; + + f.write_char('%')?; + f.write_char('c')?; + f.write_char('e')?; + f.write_char('%')?; + f.write_char('b')?; + f.write_char('1')?; + f.write_char('%')?; + match self { + Foo::Incomplete1 => {} + Foo::Incomplete2 => { + f.write_char('c')?; + } + Foo::Incomplete3 => { + f.write_char('c')?; + f.write_char('e')?; + } + } + Ok(()) + } + } + + #[derive(Clone)] + struct MyContext { + incomplete1: Foo, + incomplete2: Foo, + incomplete3: Foo, + } + impl Context for MyContext { + fn visit<V: Visitor>(&self, visitor: V) -> V::Result { + let name = visitor.var_name().as_str(); + match name { + "incomplete1" => visitor.visit_string(&self.incomplete1), + "incomplete2" => visitor.visit_string(&self.incomplete2), + "incomplete3" => visitor.visit_string(&self.incomplete3), + _ => visitor.visit_undefined(), + } + } + } + + let context = MyContext { + incomplete1: Foo::Incomplete1, + incomplete2: Foo::Incomplete2, + incomplete3: Foo::Incomplete3, + }; + + // `&[(template, expected)]`. + const CASES: &[(&str, &str)] = &[ + ("{incomplete1:1}", "%25"), + ("{incomplete1:2}", "%25c"), + ("{incomplete1:3}", "%25ce"), + ("{incomplete1:4}", "%25ce%25"), + ("{+incomplete1:1}", "%ce%b1"), + ("{+incomplete1:2}", "%ce%b1%25"), + ("{+incomplete2:1}", "%ce%b1"), + ("{+incomplete2:2}", "%ce%b1%25"), + ("{+incomplete2:3}", "%ce%b1%25c"), + ("{+incomplete3:1}", "%ce%b1"), + ("{+incomplete3:2}", "%ce%b1%ce"), + ("{+incomplete3:3}", "%ce%b1%ce"), + ]; + + for (template, expected) in CASES { + let template = UriTemplateStr::new(template).expect("must be valid template"); + let expanded = template + .expand::<UriSpec, _>(&context) + .expect("must not have variable type error"); + assert_eq_display!(expanded, *expected, "template={template:?}"); + assert_eq!(expanded.to_string(), *expected, "template={template:?}"); + + let expanded_dynamic = template + .expand_dynamic_to_string::<UriSpec, _>(&mut context.clone()) + .expect("must not have variable type error"); + assert_eq!( + expanded_dynamic, *expected, + "dynamic, template={template:?}" + ); + } +} + +#[test] +fn github_issue_39() { + #[derive(Default)] + struct MyContext { + on_expansion_start_called: Cell<bool>, + on_expansion_end_called: Cell<bool>, + } + impl DynamicContext for MyContext { + fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result { + visitor.visit_undefined() + } + fn on_expansion_start(&mut self) { + self.on_expansion_start_called.set(true); + } + fn on_expansion_end(&mut self) { + self.on_expansion_end_called.set(true); + } + } + + let mut dyctx = MyContext::default(); + let template = UriTemplateStr::new("hello/{world}").expect("valid template string"); + let s = template + .expand_dynamic_to_string::<UriSpec, _>(&mut dyctx) + .expect("must not have variable type error"); + assert_eq!(s, "hello/"); + assert!(dyctx.on_expansion_start_called.get()); + assert!(dyctx.on_expansion_end_called.get()); +} diff --git a/vendor/iri-string/tests/utils/mod.rs b/vendor/iri-string/tests/utils/mod.rs new file mode 100644 index 00000000..5993d30c --- /dev/null +++ b/vendor/iri-string/tests/utils/mod.rs @@ -0,0 +1,212 @@ +//! Utilities. +#![allow(dead_code)] + +use core::fmt; + +use RawKind::*; + +/// Raw kind (exclusive). +#[derive(Clone, Copy, PartialEq, Eq)] +enum RawKind { + /// Invalid string. + Invalid, + /// IRI. + Iri, + /// Absolute IRI. + IriAbsolute, + /// Relative IRI. + IriRelative, + /// URI. + Uri, + /// Absolute URI. + UriAbsolute, + /// Relative URI. + UriRelative, +} + +impl RawKind { + fn spec_is(self, spec: Spec) -> bool { + match spec { + Spec::Uri => matches!(self, Self::Uri | Self::UriAbsolute | Self::UriRelative), + Spec::Iri => self != Self::Invalid, + } + } + + fn kind_is(self, kind: Kind) -> bool { + match kind { + Kind::Absolute => matches!(self, Self::UriAbsolute | Self::IriAbsolute), + Kind::Normal => matches!( + self, + Self::UriAbsolute | Self::Uri | Self::IriAbsolute | Self::Iri + ), + Kind::Reference => self != Self::Invalid, + Kind::Relative => matches!(self, Self::UriRelative | Self::IriRelative), + } + } + + fn is(self, spec: Spec, kind: Kind) -> bool { + self.spec_is(spec) && self.kind_is(kind) + } +} + +/// Strings. +/// ``` +/// # use iri_string::types::IriReferenceStr; +/// // `<` and `>` cannot directly appear in an IRI reference. +/// assert!(IriReferenceStr::new("<not allowed>").is_err()); +/// // Broken percent encoding cannot appear in an IRI reference. +/// assert!(IriReferenceStr::new("%").is_err()); +/// assert!(IriReferenceStr::new("%GG").is_err()); +/// ``` +const STRINGS: &[(RawKind, &str)] = &[ + (UriAbsolute, "https://user:pass@example.com:8080"), + (UriAbsolute, "https://example.com/"), + (UriAbsolute, "https://example.com/foo?bar=baz"), + (Uri, "https://example.com/foo?bar=baz#qux"), + (UriAbsolute, "foo:bar"), + (UriAbsolute, "foo:"), + (UriAbsolute, "foo:/"), + (UriAbsolute, "foo://"), + (UriAbsolute, "foo:///"), + (UriAbsolute, "foo:////"), + (UriAbsolute, "foo://///"), + (UriRelative, "foo"), + (UriRelative, "foo/bar"), + (UriRelative, "foo//bar"), + (UriRelative, "/"), + (UriRelative, "/foo"), + (UriRelative, "/foo/bar"), + (UriRelative, "//foo/bar"), + (UriRelative, "/foo//bar"), + (UriRelative, "?"), + (UriRelative, "???"), + (UriRelative, "?foo"), + (UriRelative, "#"), + (UriRelative, "#foo"), + (Invalid, "##"), + (Invalid, "fragment#cannot#have#hash#char"), + // `<` cannot appear in an IRI reference. + (Invalid, "<"), + // `>` cannot appear in an IRI reference. + (Invalid, ">"), + // `<` and `>` cannot appear in an IRI reference. + (Invalid, "lt<and-gt>not-allowed"), + // Incomplete percent encoding. + (Invalid, "%"), + (Invalid, "%0"), + (Invalid, "%f"), + (Invalid, "%F"), + // Invalid percent encoding. + (Invalid, "%0g"), + (Invalid, "%0G"), + (Invalid, "%GG"), + (Invalid, "%G0"), +]; + +/// Spec. +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Spec { + /// URI. + Uri, + /// IRI and URI. + Iri, +} + +/// Kind. +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Kind { + /// Absolute IRI / URI. + Absolute, + /// IRI / URI. + Normal, + /// IRI / URI reference. + Reference, + /// Relative IRI / URI reference. + Relative, +} + +pub fn positive(spec: Spec, kind: Kind) -> impl Iterator<Item = &'static str> { + STRINGS + .iter() + .filter(move |(raw_kind, _)| raw_kind.is(spec, kind)) + .map(|(_, s)| *s) +} + +pub fn negative(spec: Spec, kind: Kind) -> impl Iterator<Item = &'static str> { + STRINGS + .iter() + .filter(move |(raw_kind, _)| !raw_kind.is(spec, kind)) + .map(|(_, s)| *s) +} + +/// Returns true if the two equals after they are converted to strings. +pub(crate) fn eq_display_str<T>(d: &T, s: &str) -> bool +where + T: ?Sized + fmt::Display, +{ + use core::fmt::Write as _; + + /// Dummy writer to compare the formatted object to the given string. + struct CmpWriter<'a>(&'a str); + impl fmt::Write for CmpWriter<'_> { + fn write_str(&mut self, s: &str) -> fmt::Result { + if self.0.len() < s.len() { + return Err(fmt::Error); + } + let (prefix, rest) = self.0.split_at(s.len()); + self.0 = rest; + if prefix == s { + Ok(()) + } else { + Err(fmt::Error) + } + } + } + + let mut writer = CmpWriter(s); + let succeeded = write!(writer, "{}", d).is_ok(); + succeeded && writer.0.is_empty() +} + +#[allow(unused_macros)] +macro_rules! assert_eq_display { + ($left:expr, $right:expr $(,)?) => {{ + match (&$left, &$right) { + (left, right) => { + assert!( + utils::eq_display_str(left, right.as_ref()), + "`eq_str_display(left, right)`\n left: `{left}`,\n right: `{right}`", + ); + #[cfg(feature = "alloc")] + { + let left = left.to_string(); + let right = right.to_string(); + assert_eq!(left, right); + } + } + } + }}; + ($left:expr, $right:expr, $($args:tt)*) => {{ + match (&$left, &$right) { + (left, right) => { + assert!( + utils::eq_display_str(left, right.as_ref()), + "{}", + format_args!( + "{}: {}", + format_args!( + "`eq_str_display(left, right)`\n left: `{left}`,\n right: `{right}`", + ), + format_args!($($args)*) + ) + ); + #[cfg(feature = "alloc")] + { + let left = left.to_string(); + let right = right.to_string(); + assert_eq!(left, right, $($args)*); + } + } + } + }}; +} |
