summaryrefslogtreecommitdiff
path: root/vendor/zerotrie
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2025-07-10 13:11:11 -0600
committermo khan <mo@mokhan.ca>2025-07-10 13:11:11 -0600
commit01959b16a21b22b5df5f16569c2a8e8f92beecef (patch)
tree32afa5d747c5466345c59ec52161a7cba3d6d755 /vendor/zerotrie
parentff30574117a996df332e23d1fb6f65259b316b5b (diff)
chore: vendor dependencies
Diffstat (limited to 'vendor/zerotrie')
-rw-r--r--vendor/zerotrie/.cargo-checksum.json1
-rw-r--r--vendor/zerotrie/Cargo.lock945
-rw-r--r--vendor/zerotrie/Cargo.toml183
-rw-r--r--vendor/zerotrie/LICENSE46
-rw-r--r--vendor/zerotrie/README.md41
-rw-r--r--vendor/zerotrie/benches/overview.rs198
-rw-r--r--vendor/zerotrie/examples/first_weekday_for_region.rs219
-rw-r--r--vendor/zerotrie/src/builder/branch_meta.rs29
-rw-r--r--vendor/zerotrie/src/builder/bytestr.rs121
-rw-r--r--vendor/zerotrie/src/builder/konst/builder.rs330
-rw-r--r--vendor/zerotrie/src/builder/konst/mod.rs9
-rw-r--r--vendor/zerotrie/src/builder/konst/store.rs341
-rw-r--r--vendor/zerotrie/src/builder/litemap.rs54
-rw-r--r--vendor/zerotrie/src/builder/mod.rs298
-rw-r--r--vendor/zerotrie/src/builder/nonconst/builder.rs418
-rw-r--r--vendor/zerotrie/src/builder/nonconst/mod.rs9
-rw-r--r--vendor/zerotrie/src/builder/nonconst/store.rs185
-rw-r--r--vendor/zerotrie/src/byte_phf/builder.rs208
-rw-r--r--vendor/zerotrie/src/byte_phf/cached_owned.rs39
-rw-r--r--vendor/zerotrie/src/byte_phf/mod.rs485
-rw-r--r--vendor/zerotrie/src/cursor.rs491
-rw-r--r--vendor/zerotrie/src/error.rs25
-rw-r--r--vendor/zerotrie/src/helpers.rs119
-rw-r--r--vendor/zerotrie/src/lib.rs87
-rw-r--r--vendor/zerotrie/src/options.rs153
-rw-r--r--vendor/zerotrie/src/reader.rs727
-rw-r--r--vendor/zerotrie/src/serde.rs642
-rw-r--r--vendor/zerotrie/src/varint.rs497
-rw-r--r--vendor/zerotrie/src/zerotrie.rs886
-rw-r--r--vendor/zerotrie/tests/asciitrie_test.rs73
-rw-r--r--vendor/zerotrie/tests/builder_test.rs855
-rw-r--r--vendor/zerotrie/tests/data/data.rs2210
-rw-r--r--vendor/zerotrie/tests/derive_test.rs138
-rw-r--r--vendor/zerotrie/tests/ignorecase_test.rs46
-rw-r--r--vendor/zerotrie/tests/locale_aux_test.rs168
35 files changed, 11276 insertions, 0 deletions
diff --git a/vendor/zerotrie/.cargo-checksum.json b/vendor/zerotrie/.cargo-checksum.json
new file mode 100644
index 00000000..8ec9800c
--- /dev/null
+++ b/vendor/zerotrie/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.lock":"4ab073038a47fdfe9881a370284f32f38622a2087c6d15d68871c2bb4add1da8","Cargo.toml":"51b4e43e670ff1a0552f6973efdf26eba7063a2f5e91e3b734501e41b2bcd6cb","LICENSE":"f367c1b8e1aa262435251e442901da4607b4650e0e63a026f5044473ecfb90f2","README.md":"21c5a3a4999c937473283001d787b27b7f92942a4cc9df99e63466346205e777","benches/overview.rs":"f1207ef0c53c743fbf61e1ba886dda70cdba338fb357e1f61ca9285cc6c5f49e","examples/first_weekday_for_region.rs":"2ee02fb39378be77ea897172c47e7cb2749a5a127a4efae101b30e6bb762ddfb","src/builder/branch_meta.rs":"31b306f321f10655a2efc2e395a2a65e6df6da8b1b71963772ef4407061a7eda","src/builder/bytestr.rs":"c4f40fb9b93a7735ddd181e1807f8573d6a7dbf66e1d84f596e8d318d755c910","src/builder/konst/builder.rs":"073a54b2879dc07c458bfb68ea838279b07bd23cd3c6e4ac7da243d88c760639","src/builder/konst/mod.rs":"160d6d2b0da65101e87371aadcf30aaa96f5851a58a23794b11d596f384b635c","src/builder/konst/store.rs":"88cd8668676dd0d0e4ff0817359d6ae4c57f66924fe43de2216f69d2dfcc054f","src/builder/litemap.rs":"229605a3516919a29d3207195cdacf458767d6f02e434595c5b3b577ada87e1e","src/builder/mod.rs":"5ae439e4511da028fec4b95dfa8ff65337e3c4e83ae594645a46eccc3e8b2446","src/builder/nonconst/builder.rs":"c8b89dcaf01385447d7aeae400a0912c875fe7282e664e700b980615e14293d7","src/builder/nonconst/mod.rs":"9f02f6a7ad30458d056568399b8640ce8332cedd8f88649b9c4e60254ba025a2","src/builder/nonconst/store.rs":"b1fef6e738241817682eb7f10b8b5ea713af7ec527f0da0f3e8443825f54adf5","src/byte_phf/builder.rs":"b0a89d69adf28f8bbd0be2a8fdd92105267fbc8cbc7625943952acf50770f441","src/byte_phf/cached_owned.rs":"2999ced75fba7aa8685fb1b1ee4c1dd47b6e36b55c9d5d6a10ed3aebc5a54b6b","src/byte_phf/mod.rs":"5324d52566afc48d365c4df61f89a5e0a03522d66d99c4e2781489cb5a880bdd","src/cursor.rs":"44eade91f6c39ed8aa670e8cc1be75f876db864b96675b90169c458ba188a830","src/error.rs":"487cb8807ee4fecffa0bc4c87ad94eb9b609590a2af3c98df548e6cad1dd18d8","src/helpers.rs":"9053fccf6b17180418b9f4d813aafd242e302d608e0a6bf0969dc228f161428f","src/lib.rs":"ff64bb55462571c7530e9d45fb3f4f34ea4cf62b74bb32a245ffb19af9595f6b","src/options.rs":"aa980fa7c58a10cf128151d15f1a6186a356ce9aba9c75c02108ca268680fdc4","src/reader.rs":"be554a75a94bd8ed4b6bdd0e7acd0936b4ee4b6936c265fc62e5c36f96dc1768","src/serde.rs":"b06ac3d970c5d80fb662cbf985c25b7516c0c71231cebb3e433a1f778c23a5c5","src/varint.rs":"8712fc408b5bdab4864c82f9c5bad39864dbac3a87d237dbbd2de04157252c93","src/zerotrie.rs":"ab44ea2e96987598c1ad643451008ea5421b9fa37bb5e071cf585fd442747a50","tests/asciitrie_test.rs":"0a4de1314e5ea805bc583262dcd7c3a19f8be9cc2c87c1b475da1392f621fbc3","tests/builder_test.rs":"916143542d4086e97b8a10243671cba50ad2daca7cbe9ecb7db89420dae5f033","tests/data/data.rs":"f8a7afcf64e8bd0f902a1d2eb829deb96c0a0fddd78f0aa80657764a91460515","tests/derive_test.rs":"786037168f4198268634e37b3a8496c805f1c5cdbc469ddf77cc9c326cb8661e","tests/ignorecase_test.rs":"28dce3e892ddf0b2ccf254b60e0d915872d74b9eab0f80522d4e7e7fefbbdc7e","tests/locale_aux_test.rs":"a64d439b08355bf8f98ddd8f3979774b20c2b6c56562e340e8e0e2a09689d2d8"},"package":"36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595"} \ No newline at end of file
diff --git a/vendor/zerotrie/Cargo.lock b/vendor/zerotrie/Cargo.lock
new file mode 100644
index 00000000..0dcaa32b
--- /dev/null
+++ b/vendor/zerotrie/Cargo.lock
@@ -0,0 +1,945 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
+[[package]]
+name = "anstyle"
+version = "1.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
+
+[[package]]
+name = "autocfg"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
+
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
+
+[[package]]
+name = "bumpalo"
+version = "3.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "ciborium"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
+dependencies = [
+ "ciborium-io",
+ "half",
+]
+
+[[package]]
+name = "clap"
+version = "4.4.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c"
+dependencies = [
+ "clap_builder",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.4.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7"
+dependencies = [
+ "anstyle",
+ "clap_lex",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
+
+[[package]]
+name = "cobs"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15"
+
+[[package]]
+name = "criterion"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
+dependencies = [
+ "anes",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "is-terminal",
+ "itertools",
+ "num-traits",
+ "once_cell",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+dependencies = [
+ "cast",
+ "itertools",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "crunchy"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929"
+
+[[package]]
+name = "databake"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff6ee9e2d2afb173bcdeee45934c89ec341ab26f91c9933774fc15c2b58f83ef"
+dependencies = [
+ "databake-derive",
+ "proc-macro2",
+ "quote",
+]
+
+[[package]]
+name = "databake-derive"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6834770958c7b84223607e49758ec0dde273c4df915e734aad50f62968a4c134"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
+[[package]]
+name = "embedded-io"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced"
+
+[[package]]
+name = "embedded-io"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
+
+[[package]]
+name = "getrandom"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasi",
+]
+
+[[package]]
+name = "half"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+]
+
+[[package]]
+name = "hermit-abi"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f154ce46856750ed433c8649605bf7ed2de3bc35fd9d2a9f30cddd873c80cb08"
+
+[[package]]
+name = "icu_locale_core"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+]
+
+[[package]]
+name = "is-terminal"
+version = "0.4.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "windows-sys",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "js-sys"
+version = "0.3.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.172"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
+
+[[package]]
+name = "litemap"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "log"
+version = "0.4.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+
+[[package]]
+name = "memchr"
+version = "2.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "oorandom"
+version = "11.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "plotters"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
+dependencies = [
+ "plotters-backend",
+]
+
+[[package]]
+name = "postcard"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "170a2601f67cc9dba8edd8c4870b15f71a6a2dc196daec8c83f72b59dff628a8"
+dependencies = [
+ "cobs",
+ "embedded-io 0.4.0",
+ "embedded-io 0.6.1",
+ "serde",
+]
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.95"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
+
+[[package]]
+name = "rand"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
+dependencies = [
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rand_pcg"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b48ac3f7ffaab7fac4d2376632268aa5f89abdb55f7ebf8f4d11fffccb2320f7"
+dependencies = [
+ "rand_core",
+]
+
+[[package]]
+name = "rayon"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "regex"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
+
+[[package]]
+name = "rmp"
+version = "0.8.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4"
+dependencies = [
+ "byteorder",
+ "num-traits",
+ "paste",
+]
+
+[[package]]
+name = "rmp-serde"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db"
+dependencies = [
+ "byteorder",
+ "rmp",
+ "serde",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2"
+
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "serde"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.219"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.140"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+
+[[package]]
+name = "syn"
+version = "2.0.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "synstructure"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tinystr"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasi"
+version = "0.14.2+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
+dependencies = [
+ "wit-bindgen-rt",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
+dependencies = [
+ "bumpalo",
+ "log",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "web-sys"
+version = "0.3.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "winapi-util"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "wit-bindgen-rt"
+version = "0.39.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "writeable"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
+
+[[package]]
+name = "yoke"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc"
+dependencies = [
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zerofrom"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "zerotrie"
+version = "0.2.2"
+dependencies = [
+ "bincode",
+ "criterion",
+ "databake",
+ "displaydoc",
+ "icu_locale_core",
+ "litemap",
+ "postcard",
+ "rand",
+ "rand_pcg",
+ "rmp-serde",
+ "serde",
+ "serde_json",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "zerovec"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428"
+dependencies = [
+ "databake",
+ "serde",
+ "zerofrom",
+]
diff --git a/vendor/zerotrie/Cargo.toml b/vendor/zerotrie/Cargo.toml
new file mode 100644
index 00000000..bb9b940a
--- /dev/null
+++ b/vendor/zerotrie/Cargo.toml
@@ -0,0 +1,183 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+rust-version = "1.82"
+name = "zerotrie"
+version = "0.2.2"
+authors = ["The ICU4X Project Developers"]
+build = false
+include = [
+ "data/**/*",
+ "src/**/*",
+ "examples/**/*",
+ "benches/**/*",
+ "tests/**/*",
+ "Cargo.toml",
+ "LICENSE",
+ "README.md",
+ "build.rs",
+]
+autolib = false
+autobins = false
+autoexamples = false
+autotests = false
+autobenches = false
+description = "A data structure that efficiently maps strings to integers"
+homepage = "https://icu4x.unicode.org"
+readme = "README.md"
+categories = ["internationalization"]
+license = "Unicode-3.0"
+repository = "https://github.com/unicode-org/icu4x"
+
+[package.metadata.docs.rs]
+all-features = true
+
+[features]
+alloc = []
+databake = [
+ "dep:databake",
+ "zerovec?/databake",
+]
+default = []
+litemap = [
+ "dep:litemap",
+ "alloc",
+]
+serde = [
+ "dep:serde",
+ "dep:litemap",
+ "alloc",
+ "litemap/serde",
+ "zerovec?/serde",
+]
+yoke = ["dep:yoke"]
+zerofrom = ["dep:zerofrom"]
+
+[lib]
+name = "zerotrie"
+path = "src/lib.rs"
+bench = false
+
+[[example]]
+name = "first_weekday_for_region"
+path = "examples/first_weekday_for_region.rs"
+
+[[test]]
+name = "asciitrie_test"
+path = "tests/asciitrie_test.rs"
+required-features = [
+ "alloc",
+ "litemap",
+]
+
+[[test]]
+name = "builder_test"
+path = "tests/builder_test.rs"
+required-features = [
+ "alloc",
+ "litemap",
+]
+
+[[test]]
+name = "derive_test"
+path = "tests/derive_test.rs"
+
+[[test]]
+name = "ignorecase_test"
+path = "tests/ignorecase_test.rs"
+
+[[test]]
+name = "locale_aux_test"
+path = "tests/locale_aux_test.rs"
+required-features = [
+ "alloc",
+ "litemap",
+]
+
+[[bench]]
+name = "overview"
+path = "benches/overview.rs"
+harness = false
+required-features = [
+ "alloc",
+ "litemap",
+]
+
+[dependencies.databake]
+version = "0.2.0"
+features = ["derive"]
+optional = true
+default-features = false
+
+[dependencies.displaydoc]
+version = "0.2.3"
+default-features = false
+
+[dependencies.litemap]
+version = "0.8.0"
+features = ["alloc"]
+optional = true
+default-features = false
+
+[dependencies.serde]
+version = "1.0.110"
+optional = true
+default-features = false
+
+[dependencies.yoke]
+version = "0.8.0"
+features = ["derive"]
+optional = true
+default-features = false
+
+[dependencies.zerofrom]
+version = "0.1.3"
+optional = true
+default-features = false
+
+[dependencies.zerovec]
+version = "0.11.1"
+optional = true
+default-features = false
+
+[dev-dependencies.bincode]
+version = "1.3.1"
+
+[dev-dependencies.icu_locale_core]
+version = "2.0.0"
+features = ["alloc"]
+default-features = false
+
+[dev-dependencies.postcard]
+version = "1.0.3"
+features = ["alloc"]
+default-features = false
+
+[dev-dependencies.rand]
+version = "0.9"
+
+[dev-dependencies.rand_pcg]
+version = "0.9"
+
+[dev-dependencies.rmp-serde]
+version = "1.2.0"
+
+[dev-dependencies.serde]
+version = "1.0.110"
+default-features = false
+
+[dev-dependencies.serde_json]
+version = "1.0.45"
+
+[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies.criterion]
+version = "0.5.0"
diff --git a/vendor/zerotrie/LICENSE b/vendor/zerotrie/LICENSE
new file mode 100644
index 00000000..c9be6012
--- /dev/null
+++ b/vendor/zerotrie/LICENSE
@@ -0,0 +1,46 @@
+UNICODE LICENSE V3
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 2020-2024 Unicode, Inc.
+
+NOTICE TO USER: Carefully read the following legal agreement. BY
+DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
+SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
+DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of data files and any associated documentation (the "Data Files") or
+software and any associated documentation (the "Software") to deal in the
+Data Files or Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Data Files or Software, and to permit persons to whom the
+Data Files or Software are furnished to do so, provided that either (a)
+this copyright and permission notice appear with all copies of the Data
+Files or Software, or (b) this copyright and permission notice appear in
+associated Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
+THIRD PARTY RIGHTS.
+
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
+FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall
+not be used in advertising or otherwise to promote the sale, use or other
+dealings in these Data Files or Software without prior written
+authorization of the copyright holder.
+
+SPDX-License-Identifier: Unicode-3.0
+
+—
+
+Portions of ICU4X may have been adapted from ICU4C and/or ICU4J.
+ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others.
diff --git a/vendor/zerotrie/README.md b/vendor/zerotrie/README.md
new file mode 100644
index 00000000..1a58da76
--- /dev/null
+++ b/vendor/zerotrie/README.md
@@ -0,0 +1,41 @@
+# zerotrie [![crates.io](https://img.shields.io/crates/v/zerotrie)](https://crates.io/crates/zerotrie)
+
+<!-- cargo-rdme start -->
+
+A data structure offering zero-copy storage and retrieval of byte strings, with a focus
+on the efficient storage of ASCII strings. Strings are mapped to `usize` values.
+
+[`ZeroTrie`] does not support mutation because doing so would require recomputing the entire
+data structure. Instead, it supports conversion to and from [`LiteMap`] and [`BTreeMap`].
+
+There are multiple variants of [`ZeroTrie`] optimized for different use cases.
+
+## Examples
+
+```rust
+use zerotrie::ZeroTrie;
+
+let data: &[(&str, usize)] = &[("abc", 11), ("xyz", 22), ("axyb", 33)];
+
+let trie: ZeroTrie<Vec<u8>> = data.iter().copied().collect();
+
+assert_eq!(trie.get("axyb"), Some(33));
+assert_eq!(trie.byte_len(), 18);
+```
+
+## Internal Structure
+
+To read about the internal structure of [`ZeroTrie`], build the docs with private modules:
+
+```bash
+cargo doc --document-private-items --all-features --no-deps --open
+```
+
+[`LiteMap`]: litemap::LiteMap
+[`BTreeMap`]: alloc::collections::BTreeMap
+
+<!-- cargo-rdme end -->
+
+## More Information
+
+For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x).
diff --git a/vendor/zerotrie/benches/overview.rs b/vendor/zerotrie/benches/overview.rs
new file mode 100644
index 00000000..be17e60f
--- /dev/null
+++ b/vendor/zerotrie/benches/overview.rs
@@ -0,0 +1,198 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use litemap::LiteMap;
+use std::collections::HashMap;
+use zerotrie::ZeroTrieExtendedCapacity;
+use zerotrie::ZeroTriePerfectHash;
+use zerotrie::ZeroTrieSimpleAscii;
+use zerovec::ZeroHashMap;
+use zerovec::ZeroMap;
+
+mod testdata {
+ include!("../tests/data/data.rs");
+}
+
+fn get_basic_bench(c: &mut Criterion) {
+ let mut g = c.benchmark_group("get/basic");
+
+ // NOTE: All the trie data are the same for basic data
+ let trie = testdata::basic::TRIE_ASCII;
+ let data = testdata::basic::DATA_ASCII;
+
+ g.bench_function("SimpleAscii", |b| {
+ let trie = ZeroTrieSimpleAscii::from_bytes(trie);
+ b.iter(|| {
+ for (key, expected) in black_box(data) {
+ let actual = black_box(&trie).get(key);
+ assert_eq!(Some(*expected), actual);
+ }
+ });
+ });
+
+ g.bench_function("PerfectHash", |b| {
+ let trie = ZeroTriePerfectHash::from_bytes(trie);
+ b.iter(|| {
+ for (key, expected) in black_box(data) {
+ let actual = black_box(&trie).get(key);
+ assert_eq!(Some(*expected), actual);
+ }
+ });
+ });
+
+ g.bench_function("ExtendedCapacity", |b| {
+ let trie = ZeroTrieExtendedCapacity::from_bytes(trie);
+ b.iter(|| {
+ for (key, expected) in black_box(data) {
+ let actual = black_box(&trie).get(key);
+ assert_eq!(Some(*expected), actual);
+ }
+ });
+ });
+
+ g.bench_function("ZeroMap/u32", |b| {
+ let zm: ZeroMap<[u8], u32> = data.iter().map(|(a, b)| (*a, *b as u32)).collect();
+ b.iter(|| {
+ for (key, expected) in black_box(data) {
+ let actual = black_box(&zm).get_copied(key);
+ assert_eq!(Some(*expected as u32), actual);
+ }
+ });
+ });
+
+ g.bench_function("ZeroMap/u8", |b| {
+ let zm: ZeroMap<[u8], u8> = data.iter().map(|(k, v)| (*k, *v as u8)).collect();
+ b.iter(|| {
+ for (key, expected) in black_box(data) {
+ let actual = black_box(&zm).get_copied(key);
+ assert_eq!(Some(*expected as u8), actual);
+ }
+ });
+ });
+
+ g.bench_function("HashMap", |b| {
+ let hm: HashMap<&[u8], usize> = data.iter().copied().collect();
+ b.iter(|| {
+ for (key, expected) in black_box(data) {
+ let actual = black_box(&hm).get(key);
+ assert_eq!(Some(expected), actual);
+ }
+ });
+ });
+
+ g.bench_function("ZeroHashMap/u8", |b| {
+ let zhm: ZeroHashMap<[u8], u8> = data.iter().map(|(k, v)| (*k, *v as u8)).collect();
+ b.iter(|| {
+ for (key, expected) in black_box(data) {
+ let actual = black_box(&zhm).get(key).copied();
+ assert_eq!(Some(*expected as u8), actual);
+ }
+ });
+ });
+}
+
+fn get_subtags_bench_medium(c: &mut Criterion) {
+ let g = c.benchmark_group("get/subtags_10pct");
+
+ let strings = testdata::short_subtags_10pct::STRINGS;
+ let litemap = testdata::strings_to_litemap(strings);
+
+ get_subtags_bench_helper(g, strings, litemap);
+}
+
+fn get_subtags_bench_large(c: &mut Criterion) {
+ let g = c.benchmark_group("get/subtags_full");
+
+ let strings = testdata::short_subtags::STRINGS;
+ let litemap = testdata::strings_to_litemap(strings);
+
+ get_subtags_bench_helper(g, strings, litemap);
+}
+
+fn get_subtags_bench_helper<M: criterion::measurement::Measurement>(
+ mut g: criterion::BenchmarkGroup<M>,
+ strings: &[&str],
+ litemap: LiteMap<&[u8], usize>,
+) {
+ g.bench_function("SimpleAscii", |b| {
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
+ b.iter(|| {
+ for (i, key) in black_box(strings).iter().enumerate() {
+ let actual = black_box(&trie).get(key);
+ assert_eq!(Some(i), actual);
+ }
+ });
+ });
+
+ g.bench_function("PerfectHash", |b| {
+ let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap();
+ b.iter(|| {
+ for (i, key) in black_box(strings).iter().enumerate() {
+ let actual = black_box(&trie).get(key);
+ assert_eq!(Some(i), actual);
+ }
+ });
+ });
+
+ g.bench_function("ExtendedCapacity", |b| {
+ let trie = ZeroTrieExtendedCapacity::try_from(&litemap).unwrap();
+ b.iter(|| {
+ for (i, key) in black_box(strings).iter().enumerate() {
+ let actual = black_box(&trie).get(key);
+ assert_eq!(Some(i), actual);
+ }
+ });
+ });
+
+ g.bench_function("ZeroMap/u32", |b| {
+ let zm: ZeroMap<[u8], u32> = litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
+ b.iter(|| {
+ for (i, key) in black_box(strings).iter().enumerate() {
+ let actual = black_box(&zm).get_copied(key.as_bytes());
+ assert_eq!(Some(i as u32), actual);
+ }
+ });
+ });
+
+ g.bench_function("ZeroMap/u8", |b| {
+ let zm: ZeroMap<[u8], u8> = litemap.iter().map(|(k, v)| (*k, *v as u8)).collect();
+ b.iter(|| {
+ for (i, key) in black_box(strings).iter().enumerate() {
+ let actual = black_box(&zm).get_copied(key.as_bytes());
+ assert_eq!(Some(i as u8), actual);
+ }
+ });
+ });
+
+ g.bench_function("HashMap", |b| {
+ let hm: HashMap<&[u8], u32> = litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
+ b.iter(|| {
+ for (i, key) in black_box(strings).iter().enumerate() {
+ let actual = black_box(&hm).get(key.as_bytes());
+ assert_eq!(Some(i as u32), actual.copied());
+ }
+ });
+ });
+
+ g.bench_function("ZeroHashMap/u8", |b| {
+ let zhm: ZeroHashMap<[u8], u8> = litemap.iter().map(|(k, v)| (*k, *v as u8)).collect();
+ b.iter(|| {
+ for (i, key) in black_box(strings).iter().enumerate() {
+ let actual = black_box(&zhm).get(key.as_bytes()).copied();
+ assert_eq!(Some(i as u8), actual);
+ }
+ });
+ });
+
+ g.finish();
+}
+
+criterion_group!(
+ benches,
+ get_basic_bench,
+ get_subtags_bench_medium,
+ get_subtags_bench_large
+);
+criterion_main!(benches);
diff --git a/vendor/zerotrie/examples/first_weekday_for_region.rs b/vendor/zerotrie/examples/first_weekday_for_region.rs
new file mode 100644
index 00000000..25a025b7
--- /dev/null
+++ b/vendor/zerotrie/examples/first_weekday_for_region.rs
@@ -0,0 +1,219 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+// This example demonstrates the use of ZeroTrieSimpleAscii to look up data based on a region code.
+
+#![allow(dead_code)]
+#![no_main] // https://github.com/unicode-org/icu4x/issues/395
+icu_benchmark_macros::instrument!();
+
+use zerotrie::ZeroTriePerfectHash;
+use zerotrie::ZeroTrieSimpleAscii;
+
+mod weekday {
+ pub const MON: usize = 1;
+ pub const FRI: usize = 5;
+ pub const SAT: usize = 6;
+ pub const SUN: usize = 7;
+}
+
+// This data originated from CLDR 41.
+static DATA: &[(&str, usize)] = &[
+ ("001", weekday::MON),
+ ("AD", weekday::MON),
+ ("AE", weekday::SAT),
+ ("AF", weekday::SAT),
+ ("AG", weekday::SUN),
+ ("AI", weekday::MON),
+ ("AL", weekday::MON),
+ ("AM", weekday::MON),
+ ("AN", weekday::MON),
+ ("AR", weekday::MON),
+ ("AS", weekday::SUN),
+ ("AT", weekday::MON),
+ ("AU", weekday::MON),
+ ("AX", weekday::MON),
+ ("AZ", weekday::MON),
+ ("BA", weekday::MON),
+ ("BD", weekday::SUN),
+ ("BE", weekday::MON),
+ ("BG", weekday::MON),
+ ("BH", weekday::SAT),
+ ("BM", weekday::MON),
+ ("BN", weekday::MON),
+ ("BR", weekday::SUN),
+ ("BS", weekday::SUN),
+ ("BT", weekday::SUN),
+ ("BW", weekday::SUN),
+ ("BY", weekday::MON),
+ ("BZ", weekday::SUN),
+ ("CA", weekday::SUN),
+ ("CH", weekday::MON),
+ ("CL", weekday::MON),
+ ("CM", weekday::MON),
+ ("CN", weekday::SUN),
+ ("CO", weekday::SUN),
+ ("CR", weekday::MON),
+ ("CY", weekday::MON),
+ ("CZ", weekday::MON),
+ ("DE", weekday::MON),
+ ("DJ", weekday::SAT),
+ ("DK", weekday::MON),
+ ("DM", weekday::SUN),
+ ("DO", weekday::SUN),
+ ("DZ", weekday::SAT),
+ ("EC", weekday::MON),
+ ("EE", weekday::MON),
+ ("EG", weekday::SAT),
+ ("ES", weekday::MON),
+ ("ET", weekday::SUN),
+ ("FI", weekday::MON),
+ ("FJ", weekday::MON),
+ ("FO", weekday::MON),
+ ("FR", weekday::MON),
+ ("GB", weekday::MON),
+ ("GB-alt-variant", weekday::SUN),
+ ("GE", weekday::MON),
+ ("GF", weekday::MON),
+ ("GP", weekday::MON),
+ ("GR", weekday::MON),
+ ("GT", weekday::SUN),
+ ("GU", weekday::SUN),
+ ("HK", weekday::SUN),
+ ("HN", weekday::SUN),
+ ("HR", weekday::MON),
+ ("HU", weekday::MON),
+ ("ID", weekday::SUN),
+ ("IE", weekday::MON),
+ ("IL", weekday::SUN),
+ ("IN", weekday::SUN),
+ ("IQ", weekday::SAT),
+ ("IR", weekday::SAT),
+ ("IS", weekday::MON),
+ ("IT", weekday::MON),
+ ("JM", weekday::SUN),
+ ("JO", weekday::SAT),
+ ("JP", weekday::SUN),
+ ("KE", weekday::SUN),
+ ("KG", weekday::MON),
+ ("KH", weekday::SUN),
+ ("KR", weekday::SUN),
+ ("KW", weekday::SAT),
+ ("KZ", weekday::MON),
+ ("LA", weekday::SUN),
+ ("LB", weekday::MON),
+ ("LI", weekday::MON),
+ ("LK", weekday::MON),
+ ("LT", weekday::MON),
+ ("LU", weekday::MON),
+ ("LV", weekday::MON),
+ ("LY", weekday::SAT),
+ ("MC", weekday::MON),
+ ("MD", weekday::MON),
+ ("ME", weekday::MON),
+ ("MH", weekday::SUN),
+ ("MK", weekday::MON),
+ ("MM", weekday::SUN),
+ ("MN", weekday::MON),
+ ("MO", weekday::SUN),
+ ("MQ", weekday::MON),
+ ("MT", weekday::SUN),
+ ("MV", weekday::FRI),
+ ("MX", weekday::SUN),
+ ("MY", weekday::MON),
+ ("MZ", weekday::SUN),
+ ("NI", weekday::SUN),
+ ("NL", weekday::MON),
+ ("NO", weekday::MON),
+ ("NP", weekday::SUN),
+ ("NZ", weekday::MON),
+ ("OM", weekday::SAT),
+ ("PA", weekday::SUN),
+ ("PE", weekday::SUN),
+ ("PH", weekday::SUN),
+ ("PK", weekday::SUN),
+ ("PL", weekday::MON),
+ ("PR", weekday::SUN),
+ ("PT", weekday::SUN),
+ ("PY", weekday::SUN),
+ ("QA", weekday::SAT),
+ ("RE", weekday::MON),
+ ("RO", weekday::MON),
+ ("RS", weekday::MON),
+ ("RU", weekday::MON),
+ ("SA", weekday::SUN),
+ ("SD", weekday::SAT),
+ ("SE", weekday::MON),
+ ("SG", weekday::SUN),
+ ("SI", weekday::MON),
+ ("SK", weekday::MON),
+ ("SM", weekday::MON),
+ ("SV", weekday::SUN),
+ ("SY", weekday::SAT),
+ ("TH", weekday::SUN),
+ ("TJ", weekday::MON),
+ ("TM", weekday::MON),
+ ("TR", weekday::MON),
+ ("TT", weekday::SUN),
+ ("TW", weekday::SUN),
+ ("UA", weekday::MON),
+ ("UM", weekday::SUN),
+ ("US", weekday::SUN),
+ ("UY", weekday::MON),
+ ("UZ", weekday::MON),
+ ("VA", weekday::MON),
+ ("VE", weekday::SUN),
+ ("VI", weekday::SUN),
+ ("VN", weekday::MON),
+ ("WS", weekday::SUN),
+ ("XK", weekday::MON),
+ ("YE", weekday::SUN),
+ ("ZA", weekday::SUN),
+ ("ZW", weekday::SUN),
+];
+static TRIE: ZeroTrieSimpleAscii<[u8; 539]> = ZeroTrieSimpleAscii::from_sorted_str_tuples(DATA);
+
+static TRIE_PHF: ZeroTriePerfectHash<[u8; 567]> = ZeroTriePerfectHash::from_store([
+ 225, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 0, 15, 0,
+ 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 79, 65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 74, 48, 76,
+ 78, 77, 80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
+ 14, 41, 59, 74, 86, 88, 90, 92, 98, 100, 142, 181, 208, 226, 241, 253, 31, 43, 67, 85, 94, 97,
+ 121, 136, 178, 65, 134, 196, 69, 79, 83, 85, 1, 2, 3, 129, 129, 129, 129, 201, 65, 68, 69, 71,
+ 73, 75, 77, 86, 89, 1, 2, 3, 4, 5, 6, 7, 8, 135, 134, 129, 135, 129, 129, 129, 135, 134, 198,
+ 72, 74, 77, 82, 84, 87, 1, 2, 3, 4, 5, 135, 129, 129, 129, 135, 135, 197, 65, 77, 83, 89, 90,
+ 1, 2, 3, 4, 129, 135, 135, 129, 129, 196, 65, 69, 73, 78, 1, 2, 3, 129, 135, 135, 129, 83, 135,
+ 75, 129, 69, 135, 194, 65, 87, 1, 135, 135, 77, 134, 206, 68, 69, 70, 71, 73, 76, 77, 78, 82,
+ 83, 84, 85, 88, 90, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 129, 134, 134, 135, 129, 129,
+ 129, 129, 129, 135, 129, 129, 129, 129, 205, 65, 68, 69, 71, 72, 77, 78, 82, 83, 84, 87, 89,
+ 90, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 129, 135, 129, 129, 134, 129, 129, 135, 135, 135,
+ 135, 129, 135, 201, 65, 72, 76, 77, 78, 79, 82, 89, 90, 1, 2, 3, 4, 5, 6, 7, 8, 135, 129, 129,
+ 129, 135, 135, 129, 129, 129, 198, 69, 74, 75, 77, 79, 90, 1, 2, 3, 4, 5, 129, 134, 129, 135,
+ 135, 134, 197, 67, 69, 71, 83, 84, 1, 2, 3, 4, 129, 129, 134, 129, 135, 196, 73, 74, 79, 82, 1,
+ 2, 3, 129, 129, 129, 129, 199, 66, 69, 70, 80, 82, 84, 85, 14, 15, 16, 17, 18, 19, 129, 45, 97,
+ 108, 116, 45, 118, 97, 114, 105, 97, 110, 116, 135, 129, 129, 129, 129, 135, 135, 196, 75, 78,
+ 82, 85, 1, 2, 3, 135, 135, 129, 129, 200, 68, 69, 76, 78, 81, 82, 83, 84, 1, 2, 3, 4, 5, 6, 7,
+ 135, 129, 135, 135, 134, 134, 129, 129, 198, 69, 71, 72, 82, 87, 90, 1, 2, 3, 4, 5, 135, 129,
+ 135, 135, 134, 129, 195, 77, 79, 80, 1, 2, 135, 134, 135, 48, 49, 129, 200, 65, 66, 73, 75, 84,
+ 85, 86, 89, 1, 2, 3, 4, 5, 6, 7, 135, 129, 129, 129, 129, 129, 129, 134, 197, 73, 76, 79, 80,
+ 90, 1, 2, 3, 4, 135, 129, 129, 135, 129, 206, 67, 68, 69, 72, 75, 77, 78, 79, 81, 84, 86, 88,
+ 89, 90, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 129, 129, 129, 135, 129, 135, 129, 135, 129,
+ 135, 133, 135, 129, 135, 200, 65, 69, 72, 75, 76, 82, 84, 89, 1, 2, 3, 4, 5, 6, 7, 135, 135,
+ 135, 135, 129, 135, 135, 135,
+]);
+
+fn black_box<T>(dummy: T) -> T {
+ unsafe {
+ let ret = std::ptr::read_volatile(&dummy);
+ std::mem::forget(dummy);
+ ret
+ }
+}
+
+fn main() {
+ // Un-comment to re-generate the bytes (printed to the terminal)
+ // let trie_phf = DATA.iter().copied().collect::<ZeroTriePerfectHash<Vec<_>>>();
+ // assert_eq!(trie_phf.as_bytes(), TRIE_PHF.as_bytes());
+
+ assert_eq!(black_box(TRIE_PHF).get(b"MV"), Some(weekday::FRI));
+}
diff --git a/vendor/zerotrie/src/builder/branch_meta.rs b/vendor/zerotrie/src/builder/branch_meta.rs
new file mode 100644
index 00000000..20db3f22
--- /dev/null
+++ b/vendor/zerotrie/src/builder/branch_meta.rs
@@ -0,0 +1,29 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+/// Intermediate metadata for a branch node under construction.
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct BranchMeta {
+ /// The lead byte for this branch. Formerly it was required to be an ASCII byte, but now
+ /// it can be any byte.
+ pub ascii: u8,
+ /// The size in bytes of the trie data reachable from this branch.
+ pub local_length: usize,
+ /// The size in bytes of this and all later sibling branches.
+ pub cumulative_length: usize,
+ /// The number of later sibling branches, including this.
+ pub count: usize,
+}
+
+impl BranchMeta {
+ /// Creates a new empty [`BranchMeta`].
+ pub const fn default() -> Self {
+ BranchMeta {
+ ascii: 0,
+ cumulative_length: 0,
+ local_length: 0,
+ count: 0,
+ }
+ }
+}
diff --git a/vendor/zerotrie/src/builder/bytestr.rs b/vendor/zerotrie/src/builder/bytestr.rs
new file mode 100644
index 00000000..9910efd7
--- /dev/null
+++ b/vendor/zerotrie/src/builder/bytestr.rs
@@ -0,0 +1,121 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::borrow::Borrow;
+
+#[cfg(feature = "serde")]
+use alloc::boxed::Box;
+
+/// A struct transparent over `[u8]` with convenient helper functions.
+#[repr(transparent)]
+#[derive(PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) struct ByteStr([u8]);
+
+impl ByteStr {
+ pub const fn from_byte_slice_with_value<'a, 'l>(
+ input: &'l [(&'a [u8], usize)],
+ ) -> &'l [(&'a ByteStr, usize)] {
+ // Safety: [u8] and ByteStr have the same layout and invariants
+ unsafe { core::mem::transmute(input) }
+ }
+
+ pub const fn from_str_slice_with_value<'a, 'l>(
+ input: &'l [(&'a str, usize)],
+ ) -> &'l [(&'a ByteStr, usize)] {
+ // Safety: str and ByteStr have the same layout, and ByteStr is less restrictive
+ unsafe { core::mem::transmute(input) }
+ }
+
+ pub fn from_bytes(input: &[u8]) -> &Self {
+ // Safety: [u8] and ByteStr have the same layout and invariants
+ unsafe { core::mem::transmute(input) }
+ }
+
+ #[cfg(feature = "serde")]
+ pub fn from_boxed_bytes(input: Box<[u8]>) -> Box<Self> {
+ // Safety: [u8] and ByteStr have the same layout and invariants
+ unsafe { core::mem::transmute(input) }
+ }
+
+ #[allow(dead_code)] // may want this in the future
+ pub fn from_str(input: &str) -> &Self {
+ Self::from_bytes(input.as_bytes())
+ }
+
+ #[allow(dead_code)] // may want this in the future
+ pub fn empty() -> &'static Self {
+ Self::from_bytes(&[])
+ }
+
+ #[allow(dead_code)] // not used in all features
+ pub const fn as_bytes(&self) -> &[u8] {
+ &self.0
+ }
+
+ pub const fn len(&self) -> usize {
+ self.0.len()
+ }
+
+ #[allow(dead_code)] // not used in all features
+ pub fn is_all_ascii(&self) -> bool {
+ for byte in self.0.iter() {
+ if !byte.is_ascii() {
+ return false;
+ }
+ }
+ true
+ }
+
+ #[allow(dead_code)] // may want this in the future
+ pub(crate) fn byte_at(&self, index: usize) -> Option<u8> {
+ self.0.get(index).copied()
+ }
+
+ /// Returns the byte at the given index, panicking if out of bounds.
+ pub(crate) const fn byte_at_or_panic(&self, index: usize) -> u8 {
+ self.0[index]
+ }
+
+ /// Const function to evaluate `self < other`.
+ pub(crate) const fn is_less_then(&self, other: &Self) -> bool {
+ let mut i = 0;
+ while i < self.len() && i < other.len() {
+ if self.0[i] < other.0[i] {
+ return true;
+ }
+ if self.0[i] > other.0[i] {
+ return false;
+ }
+ i += 1;
+ }
+ self.len() < other.len()
+ }
+
+ /// Const function to evaluate `self[..prefix_len] == other[..prefix_len]`
+ pub(crate) const fn prefix_eq(&self, other: &ByteStr, prefix_len: usize) -> bool {
+ assert!(prefix_len <= self.len());
+ assert!(prefix_len <= other.len());
+ let mut i = 0;
+ while i < prefix_len {
+ if self.0[i] != other.0[i] {
+ return false;
+ }
+ i += 1;
+ }
+ true
+ }
+}
+
+impl Borrow<[u8]> for ByteStr {
+ fn borrow(&self) -> &[u8] {
+ self.as_bytes()
+ }
+}
+
+#[cfg(feature = "alloc")]
+impl Borrow<[u8]> for alloc::boxed::Box<ByteStr> {
+ fn borrow(&self) -> &[u8] {
+ self.as_bytes()
+ }
+}
diff --git a/vendor/zerotrie/src/builder/konst/builder.rs b/vendor/zerotrie/src/builder/konst/builder.rs
new file mode 100644
index 00000000..ecfd9514
--- /dev/null
+++ b/vendor/zerotrie/src/builder/konst/builder.rs
@@ -0,0 +1,330 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::super::branch_meta::BranchMeta;
+use super::super::bytestr::ByteStr;
+use super::store::const_for_each;
+use super::store::ConstArrayBuilder;
+use super::store::ConstLengthsStack;
+use super::store::ConstSlice;
+use crate::error::ZeroTrieBuildError;
+use crate::varint;
+
+/// A low-level builder for ZeroTrieSimpleAscii. Works in const contexts.
+pub(crate) struct ZeroTrieBuilderConst<const N: usize> {
+ data: ConstArrayBuilder<N, u8>,
+}
+
+impl<const N: usize> ZeroTrieBuilderConst<N> {
+ /// Non-const function that returns the current trie data as a slice.
+ #[cfg(feature = "litemap")]
+ pub fn as_bytes(&self) -> &[u8] {
+ self.data.as_const_slice().as_slice()
+ }
+
+ /// Returns the trie data, panicking if the buffer is the wrong size.
+ pub const fn build_or_panic(self) -> [u8; N] {
+ self.data.const_build_or_panic()
+ }
+
+ /// Creates a new empty builder.
+ pub const fn new() -> Self {
+ Self {
+ data: ConstArrayBuilder::new_empty([0; N], N),
+ }
+ }
+
+ /// Prepends an ASCII node to the front of the builder. Returns the new builder
+ /// and the delta in length, which is always 1.
+ #[must_use]
+ const fn prepend_ascii(self, ascii: u8) -> (Self, usize) {
+ if ascii >= 128 {
+ panic!("Non-ASCII not supported in ZeroTrieSimpleAscii");
+ }
+ let data = self.data.const_push_front_or_panic(ascii);
+ (Self { data }, 1)
+ }
+
+ /// Prepends a value node to the front of the builder. Returns the new builder
+ /// and the delta in length, which depends on the size of the varint.
+ #[must_use]
+ const fn prepend_value(self, value: usize) -> (Self, usize) {
+ let mut data = self.data;
+ let varint_array = varint::write_varint_meta3(value);
+ data = data.const_extend_front_or_panic(varint_array.as_const_slice());
+ data = data.const_bitor_assign(0, 0b10000000);
+ (Self { data }, varint_array.len())
+ }
+
+ /// Prepends a branch node to the front of the builder. Returns the new builder
+ /// and the delta in length, which depends on the size of the varint.
+ #[must_use]
+ const fn prepend_branch(self, value: usize) -> (Self, usize) {
+ let mut data = self.data;
+ let varint_array = varint::write_varint_meta2(value);
+ data = data.const_extend_front_or_panic(varint_array.as_const_slice());
+ data = data.const_bitor_assign(0, 0b11000000);
+ (Self { data }, varint_array.len())
+ }
+
+ /// Prepends multiple arbitrary bytes to the front of the builder. Returns the new builder
+ /// and the delta in length, which is the length of the slice.
+ #[must_use]
+ const fn prepend_slice(self, s: ConstSlice<u8>) -> (Self, usize) {
+ let mut data = self.data;
+ let mut i = s.len();
+ while i > 0 {
+ data = data.const_push_front_or_panic(*s.get_or_panic(i - 1));
+ i -= 1;
+ }
+ (Self { data }, s.len())
+ }
+
+ /// Prepends multiple zeros to the front of the builder. Returns the new builder.
+ #[must_use]
+ const fn prepend_n_zeros(self, n: usize) -> Self {
+ let mut data = self.data;
+ let mut i = 0;
+ while i < n {
+ data = data.const_push_front_or_panic(0);
+ i += 1;
+ }
+ Self { data }
+ }
+
+ /// Performs the operation `self[index] |= bits`
+ const fn bitor_assign_at(self, index: usize, bits: u8) -> Self {
+ let mut data = self.data;
+ data = data.const_bitor_assign(index, bits);
+ Self { data }
+ }
+
+ /// Creates a new builder containing the elements in the given slice of key/value pairs.
+ ///
+ /// `K` is the stack size of the lengths stack. If you get an error such as
+ /// "AsciiTrie Builder: Need more stack", try increasing `K`.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the items are not sorted
+ pub const fn from_tuple_slice<'a, const K: usize>(
+ items: &[(&'a ByteStr, usize)],
+ ) -> Result<Self, ZeroTrieBuildError> {
+ let items = ConstSlice::from_slice(items);
+ let mut prev: Option<&'a ByteStr> = None;
+ const_for_each!(items, (ascii_str, _), {
+ match prev {
+ None => (),
+ Some(prev) => {
+ if !prev.is_less_then(ascii_str) {
+ panic!("Strings in ByteStr constructor are not sorted");
+ }
+ }
+ };
+ prev = Some(ascii_str)
+ });
+ Self::from_sorted_const_tuple_slice::<K>(items)
+ }
+
+ /// Creates a new builder containing the elements in the given slice of key/value pairs.
+ ///
+ /// Assumes that the items are sorted. If they are not, unexpected behavior may occur.
+ ///
+ /// `K` is the stack size of the lengths stack. If you get an error such as
+ /// "AsciiTrie Builder: Need more stack", try increasing `K`.
+ pub const fn from_sorted_const_tuple_slice<const K: usize>(
+ items: ConstSlice<(&ByteStr, usize)>,
+ ) -> Result<Self, ZeroTrieBuildError> {
+ let mut result = Self::new();
+ let total_size;
+ (result, total_size) = result.create_or_panic::<K>(items);
+ debug_assert!(total_size == result.data.len());
+ Ok(result)
+ }
+
+ /// The actual builder algorithm. For an explanation, see [`crate::builder`].
+ #[must_use]
+ const fn create_or_panic<const K: usize>(
+ mut self,
+ all_items: ConstSlice<(&ByteStr, usize)>,
+ ) -> (Self, usize) {
+ let mut prefix_len = match all_items.last() {
+ Some(x) => x.0.len(),
+ // Empty slice:
+ None => return (Self::new(), 0),
+ };
+ // Initialize the main loop to point at the last string.
+ let mut lengths_stack = ConstLengthsStack::<K>::new();
+ let mut i = all_items.len() - 1;
+ let mut j = all_items.len();
+ let mut current_len = 0;
+ // Start the main loop.
+ loop {
+ let item_i = all_items.get_or_panic(i);
+ let item_j = all_items.get_or_panic(j - 1);
+ debug_assert!(item_i.0.prefix_eq(item_j.0, prefix_len));
+ // Check if we need to add a value node here.
+ if item_i.0.len() == prefix_len {
+ let len;
+ (self, len) = self.prepend_value(item_i.1);
+ current_len += len;
+ }
+ if prefix_len == 0 {
+ // All done! Leave the main loop.
+ break;
+ }
+ // Reduce the prefix length by 1 and recalculate i and j.
+ prefix_len -= 1;
+ let mut new_i = i;
+ let mut new_j = j;
+ let mut ascii_i = item_i.0.byte_at_or_panic(prefix_len);
+ let mut ascii_j = item_j.0.byte_at_or_panic(prefix_len);
+ debug_assert!(ascii_i == ascii_j);
+ let key_ascii = ascii_i;
+ loop {
+ if new_i == 0 {
+ break;
+ }
+ let candidate = all_items.get_or_panic(new_i - 1).0;
+ if candidate.len() < prefix_len {
+ // Too short
+ break;
+ }
+ if item_i.0.prefix_eq(candidate, prefix_len) {
+ new_i -= 1;
+ } else {
+ break;
+ }
+ if candidate.len() == prefix_len {
+ // A string that equals the prefix does not take part in the branch node.
+ break;
+ }
+ let candidate = candidate.byte_at_or_panic(prefix_len);
+ if candidate != ascii_i {
+ ascii_i = candidate;
+ }
+ }
+ loop {
+ if new_j == all_items.len() {
+ break;
+ }
+ let candidate = all_items.get_or_panic(new_j).0;
+ if candidate.len() < prefix_len {
+ // Too short
+ break;
+ }
+ if item_j.0.prefix_eq(candidate, prefix_len) {
+ new_j += 1;
+ } else {
+ break;
+ }
+ if candidate.len() == prefix_len {
+ panic!("A shorter string should be earlier in the sequence");
+ }
+ let candidate = candidate.byte_at_or_panic(prefix_len);
+ if candidate != ascii_j {
+ ascii_j = candidate;
+ }
+ }
+ // If there are no different bytes at this prefix level, we can add an ASCII or Span
+ // node and then continue to the next iteration of the main loop.
+ if ascii_i == key_ascii && ascii_j == key_ascii {
+ let len;
+ (self, len) = self.prepend_ascii(ascii_i);
+ current_len += len;
+ debug_assert!(i == new_i || i == new_i + 1);
+ i = new_i;
+ debug_assert!(j == new_j);
+ continue;
+ }
+ // If i and j changed, we are a target of a branch node.
+ if ascii_j == key_ascii {
+ // We are the _last_ target of a branch node.
+ lengths_stack = lengths_stack.push_or_panic(BranchMeta {
+ ascii: key_ascii,
+ cumulative_length: current_len,
+ local_length: current_len,
+ count: 1,
+ });
+ } else {
+ // We are the _not the last_ target of a branch node.
+ let BranchMeta {
+ cumulative_length,
+ count,
+ ..
+ } = lengths_stack.peek_or_panic();
+ lengths_stack = lengths_stack.push_or_panic(BranchMeta {
+ ascii: key_ascii,
+ cumulative_length: cumulative_length + current_len,
+ local_length: current_len,
+ count: count + 1,
+ });
+ }
+ if ascii_i != key_ascii {
+ // We are _not the first_ target of a branch node.
+ // Set the cursor to the previous string and continue the loop.
+ j = i;
+ i -= 1;
+ prefix_len = all_items.get_or_panic(i).0.len();
+ current_len = 0;
+ continue;
+ }
+ // Branch (first)
+ let (total_length, total_count) = {
+ let BranchMeta {
+ cumulative_length,
+ count,
+ ..
+ } = lengths_stack.peek_or_panic();
+ (cumulative_length, count)
+ };
+ let branch_metas;
+ (lengths_stack, branch_metas) = lengths_stack.pop_many_or_panic(total_count);
+ let original_keys = branch_metas.map_to_ascii_bytes();
+ // Write out the offset table
+ current_len = total_length;
+ const USIZE_BITS: usize = core::mem::size_of::<usize>() * 8;
+ let w = (USIZE_BITS - (total_length.leading_zeros() as usize) - 1) / 8;
+ if w > 3 {
+ panic!("ZeroTrie capacity exceeded");
+ }
+ let mut k = 0;
+ while k <= w {
+ self = self.prepend_n_zeros(total_count - 1);
+ current_len += total_count - 1;
+ let mut l = 0;
+ let mut length_to_write = 0;
+ while l < total_count {
+ let BranchMeta { local_length, .. } = *branch_metas
+ .as_const_slice()
+ .get_or_panic(total_count - l - 1);
+ let mut adjusted_length = length_to_write;
+ let mut m = 0;
+ while m < k {
+ adjusted_length >>= 8;
+ m += 1;
+ }
+ if l > 0 {
+ self = self.bitor_assign_at(l - 1, adjusted_length as u8);
+ }
+ l += 1;
+ length_to_write += local_length;
+ }
+ k += 1;
+ }
+ // Write out the lookup table
+ assert!(0 < total_count && total_count <= 256);
+ let branch_value = (w << 8) + (total_count & 0xff);
+ let slice_len;
+ (self, slice_len) = self.prepend_slice(original_keys.as_const_slice());
+ let branch_len;
+ (self, branch_len) = self.prepend_branch(branch_value);
+ current_len += slice_len + branch_len;
+ i = new_i;
+ j = new_j;
+ }
+ assert!(lengths_stack.is_empty());
+ (self, current_len)
+ }
+}
diff --git a/vendor/zerotrie/src/builder/konst/mod.rs b/vendor/zerotrie/src/builder/konst/mod.rs
new file mode 100644
index 00000000..275af4bf
--- /dev/null
+++ b/vendor/zerotrie/src/builder/konst/mod.rs
@@ -0,0 +1,9 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod builder;
+mod store;
+
+pub(crate) use builder::*;
+pub(crate) use store::ConstArrayBuilder;
diff --git a/vendor/zerotrie/src/builder/konst/store.rs b/vendor/zerotrie/src/builder/konst/store.rs
new file mode 100644
index 00000000..f5885177
--- /dev/null
+++ b/vendor/zerotrie/src/builder/konst/store.rs
@@ -0,0 +1,341 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! This module contains internal collections for the const builder.
+
+use super::super::branch_meta::BranchMeta;
+
+/// A const-friendly slice type. It is backed by a full slice but is primarily intended
+/// to represent subslices of the full slice. We need this only because we can't take
+/// subslices in const Rust.
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct ConstSlice<'a, T> {
+ /// The full slice.
+ full_slice: &'a [T],
+ /// The start index of the slice represented by this [`ConstSlice`].
+ start: usize,
+ /// The non-inclusive end index of the slice represented by this [`ConstSlice`].
+ limit: usize,
+}
+
+impl<'a, T> ConstSlice<'a, T> {
+ /// Creates a [`ConstSlice`] representing an entire slice.
+ pub const fn from_slice(other: &'a [T]) -> Self {
+ ConstSlice {
+ full_slice: other,
+ start: 0,
+ limit: other.len(),
+ }
+ }
+
+ /// Creates a [`ConstSlice`] with the given start and limit.
+ pub const fn from_manual_slice(full_slice: &'a [T], start: usize, limit: usize) -> Self {
+ ConstSlice {
+ full_slice,
+ start,
+ limit,
+ }
+ }
+
+ /// Returns the length of the [`ConstSlice`].
+ pub const fn len(&self) -> usize {
+ self.limit - self.start
+ }
+
+ /// Gets the element at `index`, panicking if not present.
+ pub const fn get_or_panic(&self, index: usize) -> &T {
+ &self.full_slice[index + self.start]
+ }
+
+ /// Gets the first element or `None` if empty.
+ #[cfg(test)]
+ pub const fn first(&self) -> Option<&T> {
+ if self.len() == 0 {
+ None
+ } else {
+ Some(self.get_or_panic(0))
+ }
+ }
+
+ /// Gets the last element or `None` if empty.
+ pub const fn last(&self) -> Option<&T> {
+ if self.len() == 0 {
+ None
+ } else {
+ Some(self.get_or_panic(self.len() - 1))
+ }
+ }
+
+ /// Gets a subslice of this slice.
+ #[cfg(test)]
+ pub const fn get_subslice_or_panic(
+ &self,
+ new_start: usize,
+ new_limit: usize,
+ ) -> ConstSlice<'a, T> {
+ assert!(new_start <= new_limit);
+ assert!(new_limit <= self.len());
+ ConstSlice {
+ full_slice: self.full_slice,
+ start: self.start + new_start,
+ limit: self.start + new_limit,
+ }
+ }
+
+ /// Non-const function that returns this [`ConstSlice`] as a regular slice.
+ #[cfg(any(test, feature = "alloc"))]
+ pub fn as_slice(&self) -> &'a [T] {
+ &self.full_slice[self.start..self.limit]
+ }
+}
+
+impl<'a, T> From<&'a [T]> for ConstSlice<'a, T> {
+ fn from(other: &'a [T]) -> Self {
+ Self::from_slice(other)
+ }
+}
+
+/// A const-friendly mutable data structure backed by an array.
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct ConstArrayBuilder<const N: usize, T> {
+ full_array: [T; N],
+ start: usize,
+ limit: usize,
+}
+
+impl<const N: usize, T: Default> Default for ConstArrayBuilder<N, T> {
+ fn default() -> Self {
+ Self::new_empty([(); N].map(|_| Default::default()), 0)
+ }
+}
+
+impl<const N: usize, T> ConstArrayBuilder<N, T> {
+ /// Creates a new, empty builder of the given size. `cursor` indicates where in the
+ /// array new elements will be inserted first. Since we use a lot of prepend operations,
+ /// it is common to set `cursor` to `N`.
+ pub const fn new_empty(full_array: [T; N], cursor: usize) -> Self {
+ assert!(cursor <= N);
+ Self {
+ full_array,
+ start: cursor,
+ limit: cursor,
+ }
+ }
+
+ /// Creates a new builder with some initial content in `[start, limit)`.
+ pub const fn from_manual_slice(full_array: [T; N], start: usize, limit: usize) -> Self {
+ assert!(start <= limit);
+ assert!(limit <= N);
+ Self {
+ full_array,
+ start,
+ limit,
+ }
+ }
+
+ /// Returns the number of initialized elements in the builder.
+ pub const fn len(&self) -> usize {
+ self.limit - self.start
+ }
+
+ /// Whether there are no initialized elements in the builder.
+ #[allow(dead_code)]
+ pub const fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// Returns the initialized elements as a [`ConstSlice`].
+ pub const fn as_const_slice(&self) -> ConstSlice<T> {
+ ConstSlice::from_manual_slice(&self.full_array, self.start, self.limit)
+ }
+
+ /// Non-const function that returns a slice of the initialized elements.
+ #[cfg(any(test, feature = "alloc"))]
+ pub fn as_slice(&self) -> &[T] {
+ &self.full_array[self.start..self.limit]
+ }
+}
+
+// Certain functions that involve dropping `T` require that it be `Copy`
+impl<const N: usize, T: Copy> ConstArrayBuilder<N, T> {
+ /// Takes a fully initialized builder as an array. Panics if the builder is not
+ /// fully initialized.
+ pub const fn const_build_or_panic(self) -> [T; N] {
+ if self.start != 0 || self.limit != N {
+ let actual_len = self.limit - self.start;
+ const PREFIX: &[u8; 31] = b"Buffer too large. Size needed: ";
+ let len_bytes: [u8; PREFIX.len() + crate::helpers::MAX_USIZE_LEN_AS_DIGITS] =
+ crate::helpers::const_fmt_int(*PREFIX, actual_len);
+ let Ok(len_str) = core::str::from_utf8(&len_bytes) else {
+ unreachable!()
+ };
+ panic!("{}", len_str);
+ }
+ self.full_array
+ }
+
+ /// Prepends an element to the front of the builder, panicking if there is no room.
+ pub const fn const_push_front_or_panic(mut self, value: T) -> Self {
+ if self.start == 0 {
+ panic!("Buffer too small");
+ }
+ self.start -= 1;
+ self.full_array[self.start] = value;
+ self
+ }
+
+ /// Prepends multiple elements to the front of the builder, panicking if there is no room.
+ pub const fn const_extend_front_or_panic(mut self, other: ConstSlice<T>) -> Self {
+ if self.start < other.len() {
+ panic!("Buffer too small");
+ }
+ self.start -= other.len();
+ let mut i = self.start;
+ const_for_each!(other, byte, {
+ self.full_array[i] = *byte;
+ i += 1;
+ });
+ self
+ }
+}
+
+impl<const N: usize> ConstArrayBuilder<N, u8> {
+ /// Specialized function that performs `self[index] |= bits`
+ pub const fn const_bitor_assign(mut self, index: usize, bits: u8) -> Self {
+ self.full_array[self.start + index] |= bits;
+ self
+ }
+}
+
+impl<const N: usize, T: Copy> ConstArrayBuilder<N, T> {
+ /// Swaps the elements at positions `i` and `j`.
+ #[cfg(feature = "alloc")]
+ pub fn swap_or_panic(mut self, i: usize, j: usize) -> Self {
+ self.full_array.swap(self.start + i, self.start + j);
+ self
+ }
+}
+
+/// Evaluates a block over each element of a const slice. Takes three arguments:
+///
+/// 1. Expression that resolves to the [`ConstSlice`].
+/// 2. Token that will be assigned the value of the element.
+/// 3. Block to evaluate for each element.
+macro_rules! const_for_each {
+ ($safe_const_slice:expr, $item:tt, $inner:expr) => {{
+ let mut i = 0;
+ while i < $safe_const_slice.len() {
+ let $item = $safe_const_slice.get_or_panic(i);
+ $inner;
+ i += 1;
+ }
+ }};
+}
+
+pub(crate) use const_for_each;
+
+/// A data structure that holds up to K [`BranchMeta`] items.
+///
+/// Note: It should be possible to store the required data in the builder buffer itself,
+/// which would eliminate the need for this helper struct and the limit it imposes.
+pub(crate) struct ConstLengthsStack<const K: usize> {
+ data: [Option<BranchMeta>; K],
+ idx: usize,
+}
+
+impl<const K: usize> core::fmt::Debug for ConstLengthsStack<K> {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ self.as_slice().fmt(f)
+ }
+}
+
+impl<const K: usize> ConstLengthsStack<K> {
+ /// Creates a new empty [`ConstLengthsStack`].
+ pub const fn new() -> Self {
+ Self {
+ data: [None; K],
+ idx: 0,
+ }
+ }
+
+ /// Returns whether the stack is empty.
+ pub const fn is_empty(&self) -> bool {
+ self.idx == 0
+ }
+
+ /// Adds a [`BranchMeta`] to the stack, panicking if there is no room.
+ #[must_use]
+ pub const fn push_or_panic(mut self, meta: BranchMeta) -> Self {
+ if self.idx >= K {
+ panic!(concat!(
+ "AsciiTrie Builder: Need more stack (max ",
+ stringify!(K),
+ ")"
+ ));
+ }
+ self.data[self.idx] = Some(meta);
+ self.idx += 1;
+ self
+ }
+
+ /// Returns a copy of the [`BranchMeta`] on the top of the stack, panicking if
+ /// the stack is empty.
+ pub const fn peek_or_panic(&self) -> BranchMeta {
+ if self.idx == 0 {
+ panic!("AsciiTrie Builder: Attempted to peek from an empty stack");
+ }
+ self.get_or_panic(0)
+ }
+
+ /// Returns a copy of the [`BranchMeta`] at the specified index.
+ const fn get_or_panic(&self, index: usize) -> BranchMeta {
+ if self.idx <= index {
+ panic!("AsciiTrie Builder: Attempted to get too deep in a stack");
+ }
+ match self.data[self.idx - index - 1] {
+ Some(x) => x,
+ None => unreachable!(),
+ }
+ }
+
+ /// Removes many [`BranchMeta`]s from the stack, returning them in a [`ConstArrayBuilder`].
+ pub const fn pop_many_or_panic(
+ mut self,
+ len: usize,
+ ) -> (Self, ConstArrayBuilder<256, BranchMeta>) {
+ debug_assert!(len <= 256);
+ let mut result = ConstArrayBuilder::new_empty([BranchMeta::default(); 256], 256);
+ let mut ix = 0;
+ loop {
+ if ix == len {
+ break;
+ }
+ let i = self.idx - ix - 1;
+ result = result.const_push_front_or_panic(match self.data[i] {
+ Some(x) => x,
+ None => panic!("Not enough items in the ConstLengthsStack"),
+ });
+ ix += 1;
+ }
+ self.idx -= len;
+ (self, result)
+ }
+
+ /// Non-const function that returns the initialized elements as a slice.
+ fn as_slice(&self) -> &[Option<BranchMeta>] {
+ &self.data[0..self.idx]
+ }
+}
+
+impl<const K: usize> ConstArrayBuilder<K, BranchMeta> {
+ /// Converts this builder-array of [`BranchMeta`] to one of the `ascii` fields.
+ pub const fn map_to_ascii_bytes(&self) -> ConstArrayBuilder<K, u8> {
+ let mut result = ConstArrayBuilder::new_empty([0; K], K);
+ let self_as_slice = self.as_const_slice();
+ const_for_each!(self_as_slice, value, {
+ result = result.const_push_front_or_panic(value.ascii);
+ });
+ result
+ }
+}
diff --git a/vendor/zerotrie/src/builder/litemap.rs b/vendor/zerotrie/src/builder/litemap.rs
new file mode 100644
index 00000000..530d5999
--- /dev/null
+++ b/vendor/zerotrie/src/builder/litemap.rs
@@ -0,0 +1,54 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Impls for functions gated on the "litemap" feature.
+
+use super::konst::*;
+use crate::builder::bytestr::ByteStr;
+use crate::error::ZeroTrieBuildError;
+use crate::zerotrie::ZeroTrieSimpleAscii;
+use crate::ZeroTrie;
+use alloc::borrow::Borrow;
+use alloc::vec::Vec;
+use litemap::LiteMap;
+
+impl ZeroTrieSimpleAscii<Vec<u8>> {
+ #[doc(hidden)]
+ pub fn try_from_litemap_with_const_builder<'a, S>(
+ items: &LiteMap<&'a [u8], usize, S>,
+ ) -> Result<Self, ZeroTrieBuildError>
+ where
+ S: litemap::store::StoreSlice<&'a [u8], usize, Slice = [(&'a [u8], usize)]>,
+ {
+ let tuples = items.as_slice();
+ let byte_str_slice = ByteStr::from_byte_slice_with_value(tuples);
+ ZeroTrieBuilderConst::<10000>::from_sorted_const_tuple_slice::<100>(byte_str_slice.into())
+ .map(|s| Self {
+ store: s.as_bytes().to_vec(),
+ })
+ }
+}
+
+impl<K, S> TryFrom<&LiteMap<K, usize, S>> for ZeroTrie<Vec<u8>>
+where
+ // Borrow, not AsRef, because we rely on Ord being the same. Unfortunately
+ // this means `LiteMap<&str, usize>` does not work.
+ K: Borrow<[u8]>,
+ S: litemap::store::StoreSlice<K, usize, Slice = [(K, usize)]>,
+{
+ type Error = ZeroTrieBuildError;
+ fn try_from(items: &LiteMap<K, usize, S>) -> Result<Self, ZeroTrieBuildError> {
+ let byte_litemap = items.to_borrowed_keys::<[u8], Vec<_>>();
+ let byte_slice = byte_litemap.as_slice();
+ let byte_str_slice = ByteStr::from_byte_slice_with_value(byte_slice);
+ Self::try_from_tuple_slice(byte_str_slice)
+ }
+}
+
+// TODO(MSRV 1.83): Make this more infallible by calculating the required length,
+// heap-allocating the required capacity, and pointing ConstAsciiTrieBuilderStore
+// to the heap buffer.
+// ```
+// const fn write_to_mut_buffer(buf: &mut [u8]) { buf[0] = 0; }
+// ```
diff --git a/vendor/zerotrie/src/builder/mod.rs b/vendor/zerotrie/src/builder/mod.rs
new file mode 100644
index 00000000..c756c128
--- /dev/null
+++ b/vendor/zerotrie/src/builder/mod.rs
@@ -0,0 +1,298 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! # ZeroTrie Builder
+//!
+//! There are two implementations of the ZeroTrie Builder:
+//!
+//! - [konst::ZeroTrieBuilderConst] allows for human-readable const construction
+//! - [nonconst::ZeroTrieBuilder] has the full feaure set but requires `alloc`
+//!
+//! The two builders follow the same algorithm but have different capabilities.
+//!
+//! ## Builder Algorithm Overview
+//!
+//! The tries are built backwards, from the last node to the first node. The key step of the
+//! algorithm is **determining what is the next node to prepend.**
+//!
+//! In the simple case of [`ZeroTrieSimpleAscii`], all nodes are binary-search, so if the input
+//! strings are provided in lexicographic order, there is a simple, deterministic method for
+//! identifying the next node. This insight is what enables us to make the const builder.
+//!
+//! The builder works with the following intermediate state variables:
+//!
+//! - `prefix_len` indicates the byte index we are currently processing.
+//! - `i` and `j` bracket a window of strings in the input that share the same prefix.
+//! - `current_len` is the length in bytes of the current self-contained trie.
+//! - `lengths_stack` contains metadata for branch nodes.
+//!
+//! What follows is a verbal explanation of the build steps for a trie containing:
+//!
+//! - "" → 11
+//! - "ad" → 22
+//! - "adef" → 33
+//! - "adghk" → 44
+//!
+//! When a node is prepended, it is shown in **boldface**.
+//!
+//! 1. Initialize the builder by setting `i=3`, `j=4`, `prefix_len=5` (the last string),
+//! `current_len=0`, and `lengths_stack` empty. Start the main loop.
+//! 2. Top of loop. The string at `i` is equal in length to `prefix_len`, so we prepend
+//! our first node: a **value node 44**, which requires a 2-byte varint. Increase
+//! `current_len` to 2.
+//! 3. Reduce `prefix_len` to 4, read our `key_ascii="k"`, and recalculate `i` and `j`
+//! _(this calculation is a long chunk of code in the builder impls)_. Since there is no
+//! other string with the prefix "adgh", `i` and `j` stay the same, we prepend an
+//! **ASCII node "k"**, increase `current_len` to 3, and continue the main loop.
+//! 4. Top of loop. The string at `i` is of length 5, but `prefix_len` is 4, so there is
+//! no value node to prepend.
+//! 5. Reduce `prefix_len` to 3, read our `key_ascii="h"`, and recalculate `i` and `j`.
+//! There are no other strings sharing the prefix "abg", so we prepend an
+//! **ASCII node "h"**, increase `current_len` to 4, and continue the main loop.
+//! 6. Top of loop. There is still no value node to prepend.
+//! 7. Reduce `prefix_len` to 2, read our `key_ascii="g"`, and recalculate `i` and `j`.
+//! We find that `i=1` and `j=4`, the range of strings sharing the prefix "ad". Since
+//! `i` or `j` changed, proceed to evaluate the branch node.
+//! 8. The last branch byte `ascii_j` for this prefix is "g", which is the same as `key_ascii`,
+//! so we are the _last_ target of a branch node. Push an entry onto `lengths_stack`:
+//! `BranchMeta { ascii: "g", cumulative_length: 4, local_length: 4, count: 1 }`.
+//! 9. The first branch byte `ascii_i` for this prefix is "e", which is NOT equal to `key_ascii`,
+//! so we are _not the first_ target of a branch node. We therefore start evaluating the
+//! string preceding where we were at the top of the current loop. We set `i=2`, `j=3`,
+//! `prefix_len=4` (length of the string at `i`), and continue the main loop.
+//! 10. Top of loop. Since the string at `i` is equal in length to `prefix_len`, we prepend a
+//! **value node 33** (which requires a 2-byte varint) and increase `current_len` to 2.
+//! 11. Reduce `prefix_len` to 3, read our `key_ascii="f"`, and recalculate `i` and `j`.
+//! They stay the same, so we prepend an **ASCII node "f"**, increase `current_len` to 3,
+//! and continue the main loop.
+//! 12. Top of loop. No value node this time.
+//! 13. Reduce `prefix_len` to 2, read our `key_ascii="e"`, and recalculate `i` and `j`.
+//! They go back to `i=1` and `j=4`.
+//! 14. The last branch byte `ascii_j` for this prefix is "g", which is NOT equal to `key_ascii`,
+//! so we are _not the last_ target of a branch node. We peek at the entry at the front of
+//! the lengths stack and use it to push another entry onto the stack:
+//! `BranchMeta { ascii: "e", cumulative_length: 7, local_length: 3, count: 2 }`
+//! 15. The first branch byte `ascii_i` for this prefix is "e", which is the same as `key_ascii`,
+//! wo we are the _first_ target of a branch node. We can therefore proceed to prepend the
+//! metadata for the branch node. We peek at the top of the stack and find that there are 2
+//! tries reachable from this branch and they have a total byte length of 5. We then pull off
+//! 2 entries from the stack into a local variable `branch_metas`. From here, we write out
+//! the **offset table**, **lookup table**, and **branch head node**, which are determined
+//! from the metadata entries. We set `current_len` to the length of the two tries plus the
+//! metadata, which happens to be 11. Then we return to the top of the main loop.
+//! 16. Top of loop. The string at `i` is length 2, which is the same as `prefix_len`, so we
+//! prepend a **value node 22** (2-byte varint) and increase `current_len` to 13.
+//! 17. Reduce `prefix_len` to 1, read our `key_ascii="d"`, and recalculate `i` and `j`.
+//! They stay the same, so we prepend an **ASCII node "d"**, increase `current_len` to 14,
+//! and continue the main loop.
+//! 18. Top of loop. No value node this time.
+//! 19. Reduce `prefix_len` to 0, read our `key_ascii="a"`, and recalculate `i` and `j`.
+//! They change to `i=0` and `j=4`, since all strings have the empty string as a prefix.
+//! However, `ascii_i` and `ascii_j` both equal `key_ascii`, so we prepend **ASCII node "a"**,
+//! increase `current_len` to 15, and continue the main loop.
+//! 16. Top of loop. The string at `i` is length 0, which is the same as `prefix_len`, so we
+//! prepend a **value node 11** and increase `current_len` to 16.
+//! 17. We can no longer reduce `prefix_len`, so our trie is complete.
+//!
+//! ## Perfect Hash Reordering
+//!
+//! When the PHF is added to the mix, the main change is that the strings are no longer in sorted
+//! order when they are in the trie. To resolve this issue, when adding a branch node, the target
+//! tries are rearranged in-place in the buffer to be in the correct order for the PHF.
+//!
+//! ## Example
+//!
+//! Here is the output of the trie described above.
+//!
+//! ```
+//! use zerotrie::ZeroTrieSimpleAscii;
+//!
+//! const DATA: [(&str, usize); 4] =
+//! [("", 11), ("ad", 22), ("adef", 33), ("adghk", 44)];
+//!
+//! // As demonstrated above, the required capacity for this trie is 16 bytes
+//! const TRIE: ZeroTrieSimpleAscii<[u8; 16]> =
+//! ZeroTrieSimpleAscii::from_sorted_str_tuples(&DATA);
+//!
+//! assert_eq!(
+//! TRIE.as_bytes(),
+//! &[
+//! 0x8B, // value node 11
+//! b'a', // ASCII node 'a'
+//! b'd', // ASCII node 'd'
+//! 0x90, // value node 22 lead byte
+//! 0x06, // value node 22 trail byte
+//! 0xC2, // branch node 2
+//! b'e', // first target of branch
+//! b'g', // second target of branch
+//! 3, // offset
+//! b'f', // ASCII node 'f'
+//! 0x90, // value node 33 lead byte
+//! 0x11, // value node 33 trail byte
+//! b'h', // ASCII node 'h'
+//! b'k', // ASCII node 'k'
+//! 0x90, // value node 44 lead byte
+//! 0x1C, // value node 44 trail byte
+//! ]
+//! );
+//!
+//! assert_eq!(TRIE.get(b""), Some(11));
+//! assert_eq!(TRIE.get(b"ad"), Some(22));
+//! assert_eq!(TRIE.get(b"adef"), Some(33));
+//! assert_eq!(TRIE.get(b"adghk"), Some(44));
+//! assert_eq!(TRIE.get(b"unknown"), None);
+//! ```
+
+mod branch_meta;
+pub(crate) mod bytestr;
+pub(crate) mod konst;
+#[cfg(feature = "litemap")]
+mod litemap;
+#[cfg(feature = "alloc")]
+pub(crate) mod nonconst;
+
+use bytestr::ByteStr;
+
+use super::ZeroTrieSimpleAscii;
+
+impl<const N: usize> ZeroTrieSimpleAscii<[u8; N]> {
+ /// **Const Constructor:** Creates an [`ZeroTrieSimpleAscii`] from a sorted slice of keys and values.
+ ///
+ /// This function needs to know the exact length of the resulting trie at compile time. To
+ /// figure out `N`, first set `N` to be too large (say 0xFFFF), then look at the resulting
+ /// compile error which will tell you how to set `N`, like this:
+ ///
+ /// > the evaluated program panicked at 'Buffer too large. Size needed: 17'
+ ///
+ /// That error message says you need to set `N` to 17.
+ ///
+ /// Also see [`Self::from_sorted_str_tuples`].
+ ///
+ /// # Panics
+ ///
+ /// Panics if `items` is not sorted or if `N` is not correct.
+ ///
+ /// # Examples
+ ///
+ /// Create a `const` ZeroTrieSimpleAscii at compile time:
+ ///
+ /// ```
+ /// use zerotrie::ZeroTrieSimpleAscii;
+ ///
+ /// // The required capacity for this trie happens to be 17 bytes
+ /// const TRIE: ZeroTrieSimpleAscii<[u8; 17]> =
+ /// ZeroTrieSimpleAscii::from_sorted_u8_tuples(&[
+ /// (b"bar", 2),
+ /// (b"bazzoo", 3),
+ /// (b"foo", 1),
+ /// ]);
+ ///
+ /// assert_eq!(TRIE.get(b"foo"), Some(1));
+ /// assert_eq!(TRIE.get(b"bar"), Some(2));
+ /// assert_eq!(TRIE.get(b"bazzoo"), Some(3));
+ /// assert_eq!(TRIE.get(b"unknown"), None);
+ /// ```
+ ///
+ /// Panics if strings are not sorted:
+ ///
+ /// ```compile_fail
+ /// # use zerotrie::ZeroTrieSimpleAscii;
+ /// const TRIE: ZeroTrieSimpleAscii<[u8; 17]> = ZeroTrieSimpleAscii::from_sorted_u8_tuples(&[
+ /// (b"foo", 1),
+ /// (b"bar", 2),
+ /// (b"bazzoo", 3),
+ /// ]);
+ /// ```
+ ///
+ /// Panics if capacity is too small:
+ ///
+ /// ```compile_fail
+ /// # use zerotrie::ZeroTrieSimpleAscii;
+ /// const TRIE: ZeroTrieSimpleAscii<[u8; 15]> = ZeroTrieSimpleAscii::from_sorted_u8_tuples(&[
+ /// (b"bar", 2),
+ /// (b"bazzoo", 3),
+ /// (b"foo", 1),
+ /// ]);
+ /// ```
+ ///
+ /// Panics if capacity is too large:
+ ///
+ /// ```compile_fail
+ /// # use zerotrie::ZeroTrieSimpleAscii;
+ /// const TRIE: ZeroTrieSimpleAscii<[u8; 20]> = ZeroTrieSimpleAscii::from_sorted_u8_tuples(&[
+ /// (b"bar", 2),
+ /// (b"bazzoo", 3),
+ /// (b"foo", 1),
+ /// ]);
+ /// ```
+ pub const fn from_sorted_u8_tuples(tuples: &[(&[u8], usize)]) -> Self {
+ use konst::*;
+ let byte_str_slice = ByteStr::from_byte_slice_with_value(tuples);
+ let result = ZeroTrieBuilderConst::<N>::from_tuple_slice::<100>(byte_str_slice);
+ match result {
+ Ok(s) => Self::from_store(s.build_or_panic()),
+ Err(_) => panic!("Failed to build ZeroTrie"),
+ }
+ }
+
+ /// **Const Constructor:** Creates an [`ZeroTrieSimpleAscii`] from a sorted slice of keys and values.
+ ///
+ /// This function needs to know the exact length of the resulting trie at compile time. To
+ /// figure out `N`, first set `N` to be too large (say 0xFFFF), then look at the resulting
+ /// compile error which will tell you how to set `N`, like this:
+ ///
+ /// > the evaluated program panicked at 'Buffer too large. Size needed: 17'
+ ///
+ /// That error message says you need to set `N` to 17.
+ ///
+ /// Also see [`Self::from_sorted_u8_tuples`].
+ ///
+ /// # Panics
+ ///
+ /// Panics if `items` is not sorted, if `N` is not correct, or if any of the strings contain
+ /// non-ASCII characters.
+ ///
+ /// # Examples
+ ///
+ /// Create a `const` ZeroTrieSimpleAscii at compile time:
+ ///
+ /// ```
+ /// use zerotrie::ZeroTrieSimpleAscii;
+ ///
+ /// // The required capacity for this trie happens to be 17 bytes
+ /// const TRIE: ZeroTrieSimpleAscii<[u8; 17]> =
+ /// ZeroTrieSimpleAscii::from_sorted_str_tuples(&[
+ /// ("bar", 2),
+ /// ("bazzoo", 3),
+ /// ("foo", 1),
+ /// ]);
+ ///
+ /// assert_eq!(TRIE.get(b"foo"), Some(1));
+ /// assert_eq!(TRIE.get(b"bar"), Some(2));
+ /// assert_eq!(TRIE.get(b"bazzoo"), Some(3));
+ /// assert_eq!(TRIE.get(b"unknown"), None);
+ /// ```
+ ///
+ /// Panics if the strings are not ASCII:
+ ///
+ /// ```compile_fail
+ /// # use zerotrie::ZeroTrieSimpleAscii;
+ /// const TRIE: ZeroTrieSimpleAscii<[u8; 100]> = ZeroTrieSimpleAscii::from_sorted_str_tuples(&[
+ /// ("bár", 2),
+ /// ("båzzöo", 3),
+ /// ("foo", 1),
+ /// ]);
+ /// ```
+ pub const fn from_sorted_str_tuples(tuples: &[(&str, usize)]) -> Self {
+ use konst::*;
+ let byte_str_slice = ByteStr::from_str_slice_with_value(tuples);
+ // 100 is the value of `K`, the size of the lengths stack. If compile errors are
+ // encountered, this number may need to be increased.
+ let result = ZeroTrieBuilderConst::<N>::from_tuple_slice::<100>(byte_str_slice);
+ match result {
+ Ok(s) => Self::from_store(s.build_or_panic()),
+ Err(_) => panic!("Failed to build ZeroTrie"),
+ }
+ }
+}
diff --git a/vendor/zerotrie/src/builder/nonconst/builder.rs b/vendor/zerotrie/src/builder/nonconst/builder.rs
new file mode 100644
index 00000000..b42ce80e
--- /dev/null
+++ b/vendor/zerotrie/src/builder/nonconst/builder.rs
@@ -0,0 +1,418 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::cmp::Ordering;
+
+use super::super::branch_meta::BranchMeta;
+use super::store::NonConstLengthsStack;
+use super::store::TrieBuilderStore;
+use crate::builder::bytestr::ByteStr;
+use crate::byte_phf::PerfectByteHashMapCacheOwned;
+use crate::error::ZeroTrieBuildError;
+use crate::options::*;
+use crate::varint;
+use alloc::borrow::Cow;
+use alloc::vec::Vec;
+
+/// A low-level builder for ZeroTrie. Supports all options.
+pub(crate) struct ZeroTrieBuilder<S> {
+ data: S,
+ phf_cache: PerfectByteHashMapCacheOwned,
+ options: ZeroTrieBuilderOptions,
+}
+
+impl<S: TrieBuilderStore> ZeroTrieBuilder<S> {
+ /// Returns the trie data as a `Vec<u8>`.
+ pub fn to_bytes(&self) -> Vec<u8> {
+ self.data.atbs_to_bytes()
+ }
+
+ /// Prepends a byte value to the front of the builder. If it is ASCII, an ASCII
+ /// node is prepended. If it is non-ASCII, if there is already a span node at
+ /// the front, we modify the span node to add the new byte; otherwise, we create
+ /// a new span node. Returns the delta in length, which is either 1 or 2.
+ fn prepend_ascii(&mut self, ascii: u8) -> Result<usize, ZeroTrieBuildError> {
+ if ascii <= 127 {
+ self.data.atbs_push_front(ascii);
+ Ok(1)
+ } else if matches!(self.options.ascii_mode, AsciiMode::BinarySpans) {
+ if let Some(old_front) = self.data.atbs_pop_front() {
+ let old_byte_len = self.data.atbs_len() + 1;
+ if old_front & 0b11100000 == 0b10100000 {
+ // Extend an existing span
+ // Unwrap OK: there is a varint at this location in the buffer
+ #[allow(clippy::unwrap_used)]
+ let old_span_size =
+ varint::try_read_varint_meta3_from_tstore(old_front, &mut self.data)
+ .unwrap();
+ self.data.atbs_push_front(ascii);
+ let varint_array = varint::write_varint_meta3(old_span_size + 1);
+ self.data.atbs_extend_front(varint_array.as_slice());
+ self.data.atbs_bitor_assign(0, 0b10100000);
+ let new_byte_len = self.data.atbs_len();
+ return Ok(new_byte_len - old_byte_len);
+ } else {
+ self.data.atbs_push_front(old_front);
+ }
+ }
+ // Create a new span
+ self.data.atbs_push_front(ascii);
+ self.data.atbs_push_front(0b10100001);
+ Ok(2)
+ } else {
+ Err(ZeroTrieBuildError::NonAsciiError)
+ }
+ }
+
+ /// Prepends a value node to the front of the builder. Returns the
+ /// delta in length, which depends on the size of the varint.
+ #[must_use]
+ fn prepend_value(&mut self, value: usize) -> usize {
+ let varint_array = varint::write_varint_meta3(value);
+ self.data.atbs_extend_front(varint_array.as_slice());
+ self.data.atbs_bitor_assign(0, 0b10000000);
+ varint_array.len()
+ }
+
+ /// Prepends a branch node to the front of the builder. Returns the
+ /// delta in length, which depends on the size of the varint.
+ #[must_use]
+ fn prepend_branch(&mut self, value: usize) -> usize {
+ let varint_array = varint::write_varint_meta2(value);
+ self.data.atbs_extend_front(varint_array.as_slice());
+ self.data.atbs_bitor_assign(0, 0b11000000);
+ varint_array.len()
+ }
+
+ /// Prepends multiple arbitrary bytes to the front of the builder. Returns the
+ /// delta in length, which is the length of the slice.
+ #[must_use]
+ fn prepend_slice(&mut self, s: &[u8]) -> usize {
+ self.data.atbs_extend_front(s);
+ s.len()
+ }
+
+ /// Builds a ZeroTrie from an iterator of bytes. It first collects and sorts the iterator.
+ pub fn from_bytes_iter<K: AsRef<[u8]>, I: IntoIterator<Item = (K, usize)>>(
+ iter: I,
+ options: ZeroTrieBuilderOptions,
+ ) -> Result<Self, ZeroTrieBuildError> {
+ let items = Vec::<(K, usize)>::from_iter(iter);
+ let mut items = items
+ .iter()
+ .map(|(k, v)| (k.as_ref(), *v))
+ .collect::<Vec<(&[u8], usize)>>();
+ items.sort_by(|a, b| cmp_keys_values(options, *a, *b));
+ let ascii_str_slice = items.as_slice();
+ let byte_str_slice = ByteStr::from_byte_slice_with_value(ascii_str_slice);
+ Self::from_sorted_tuple_slice_impl(byte_str_slice, options)
+ }
+
+ /// Builds a ZeroTrie with the given items and options. Assumes that the items are sorted,
+ /// except for a case-insensitive trie where the items are re-sorted.
+ ///
+ /// # Panics
+ ///
+ /// May panic if the items are not sorted.
+ pub fn from_sorted_tuple_slice(
+ items: &[(&ByteStr, usize)],
+ options: ZeroTrieBuilderOptions,
+ ) -> Result<Self, ZeroTrieBuildError> {
+ let mut items = Cow::Borrowed(items);
+ if matches!(options.case_sensitivity, CaseSensitivity::IgnoreCase) {
+ // We need to re-sort the items with our custom comparator.
+ items.to_mut().sort_by(|a, b| {
+ cmp_keys_values(options, (a.0.as_bytes(), a.1), (b.0.as_bytes(), b.1))
+ });
+ }
+ Self::from_sorted_tuple_slice_impl(&items, options)
+ }
+
+ /// Internal constructor that does not re-sort the items.
+ fn from_sorted_tuple_slice_impl(
+ items: &[(&ByteStr, usize)],
+ options: ZeroTrieBuilderOptions,
+ ) -> Result<Self, ZeroTrieBuildError> {
+ for ab in items.windows(2) {
+ debug_assert!(cmp_keys_values(
+ options,
+ (ab[0].0.as_bytes(), ab[0].1),
+ (ab[1].0.as_bytes(), ab[1].1)
+ )
+ .is_lt());
+ }
+ let mut result = Self {
+ data: S::atbs_new_empty(),
+ phf_cache: PerfectByteHashMapCacheOwned::new_empty(),
+ options,
+ };
+ let total_size = result.create(items)?;
+ debug_assert!(total_size == result.data.atbs_len());
+ Ok(result)
+ }
+
+ /// The actual builder algorithm. For an explanation, see [`crate::builder`].
+ #[allow(clippy::unwrap_used)] // lots of indexing, but all indexes should be in range
+ fn create(&mut self, all_items: &[(&ByteStr, usize)]) -> Result<usize, ZeroTrieBuildError> {
+ let mut prefix_len = match all_items.last() {
+ Some(x) => x.0.len(),
+ // Empty slice:
+ None => return Ok(0),
+ };
+ // Initialize the main loop to point at the last string.
+ let mut lengths_stack = NonConstLengthsStack::new();
+ let mut i = all_items.len() - 1;
+ let mut j = all_items.len();
+ let mut current_len = 0;
+ // Start the main loop.
+ loop {
+ let item_i = all_items.get(i).unwrap();
+ let item_j = all_items.get(j - 1).unwrap();
+ debug_assert!(item_i.0.prefix_eq(item_j.0, prefix_len));
+ // Check if we need to add a value node here.
+ if item_i.0.len() == prefix_len {
+ let len = self.prepend_value(item_i.1);
+ current_len += len;
+ }
+ if prefix_len == 0 {
+ // All done! Leave the main loop.
+ break;
+ }
+ // Reduce the prefix length by 1 and recalculate i and j.
+ prefix_len -= 1;
+ let mut new_i = i;
+ let mut new_j = j;
+ let mut ascii_i = item_i.0.byte_at_or_panic(prefix_len);
+ let mut ascii_j = item_j.0.byte_at_or_panic(prefix_len);
+ debug_assert_eq!(ascii_i, ascii_j);
+ let key_ascii = ascii_i;
+ loop {
+ if new_i == 0 {
+ break;
+ }
+ let candidate = all_items.get(new_i - 1).unwrap().0;
+ if candidate.len() < prefix_len {
+ // Too short
+ break;
+ }
+ if item_i.0.prefix_eq(candidate, prefix_len) {
+ new_i -= 1;
+ } else {
+ break;
+ }
+ if candidate.len() == prefix_len {
+ // A string that equals the prefix does not take part in the branch node.
+ break;
+ }
+ let candidate = candidate.byte_at_or_panic(prefix_len);
+ if candidate != ascii_i {
+ ascii_i = candidate;
+ }
+ }
+ loop {
+ if new_j == all_items.len() {
+ break;
+ }
+ let candidate = all_items.get(new_j).unwrap().0;
+ if candidate.len() < prefix_len {
+ // Too short
+ break;
+ }
+ if item_j.0.prefix_eq(candidate, prefix_len) {
+ new_j += 1;
+ } else {
+ break;
+ }
+ if candidate.len() == prefix_len {
+ panic!("A shorter string should be earlier in the sequence");
+ }
+ let candidate = candidate.byte_at_or_panic(prefix_len);
+ if candidate != ascii_j {
+ ascii_j = candidate;
+ }
+ }
+ // If there are no different bytes at this prefix level, we can add an ASCII or Span
+ // node and then continue to the next iteration of the main loop.
+ if ascii_i == key_ascii && ascii_j == key_ascii {
+ let len = self.prepend_ascii(key_ascii)?;
+ current_len += len;
+ if matches!(self.options.case_sensitivity, CaseSensitivity::IgnoreCase)
+ && i == new_i + 2
+ {
+ // This can happen if two strings were picked up, each with a different case
+ return Err(ZeroTrieBuildError::MixedCase);
+ }
+ debug_assert!(
+ i == new_i || i == new_i + 1,
+ "only the exact prefix string can be picked up at this level: {}",
+ key_ascii
+ );
+ i = new_i;
+ debug_assert_eq!(j, new_j);
+ continue;
+ }
+ // If i and j changed, we are a target of a branch node.
+ if ascii_j == key_ascii {
+ // We are the _last_ target of a branch node.
+ lengths_stack.push(BranchMeta {
+ ascii: key_ascii,
+ cumulative_length: current_len,
+ local_length: current_len,
+ count: 1,
+ });
+ } else {
+ // We are the _not the last_ target of a branch node.
+ let BranchMeta {
+ cumulative_length,
+ count,
+ ..
+ } = lengths_stack.peek_or_panic();
+ lengths_stack.push(BranchMeta {
+ ascii: key_ascii,
+ cumulative_length: cumulative_length + current_len,
+ local_length: current_len,
+ count: count + 1,
+ });
+ }
+ if ascii_i != key_ascii {
+ // We are _not the first_ target of a branch node.
+ // Set the cursor to the previous string and continue the loop.
+ j = i;
+ i -= 1;
+ prefix_len = all_items.get(i).unwrap().0.len();
+ current_len = 0;
+ continue;
+ }
+ // Branch (first)
+ // std::println!("lengths_stack: {lengths_stack:?}");
+ let (total_length, total_count) = {
+ let BranchMeta {
+ cumulative_length,
+ count,
+ ..
+ } = lengths_stack.peek_or_panic();
+ (cumulative_length, count)
+ };
+ let mut branch_metas = lengths_stack.pop_many_or_panic(total_count);
+ let original_keys = branch_metas.map_to_ascii_bytes();
+ if matches!(self.options.case_sensitivity, CaseSensitivity::IgnoreCase) {
+ // Check to see if we have the same letter in two different cases
+ let mut seen_ascii_alpha = [false; 26];
+ for c in original_keys.as_const_slice().as_slice() {
+ if c.is_ascii_alphabetic() {
+ let i = (c.to_ascii_lowercase() - b'a') as usize;
+ if seen_ascii_alpha[i] {
+ return Err(ZeroTrieBuildError::MixedCase);
+ } else {
+ seen_ascii_alpha[i] = true;
+ }
+ }
+ }
+ }
+ let use_phf = matches!(self.options.phf_mode, PhfMode::UsePhf);
+ let opt_phf_vec = if total_count > 15 && use_phf {
+ let phf_vec = self
+ .phf_cache
+ .try_get_or_insert(original_keys.as_const_slice().as_slice().to_vec())?;
+ // Put everything in order via bubble sort
+ // Note: branch_metas is stored in reverse order (0 = last element)
+ loop {
+ let mut l = total_count - 1;
+ let mut changes = 0;
+ let mut start = 0;
+ while l > 0 {
+ let a = *branch_metas.as_const_slice().get_or_panic(l);
+ let b = *branch_metas.as_const_slice().get_or_panic(l - 1);
+ let a_idx = phf_vec.keys().iter().position(|x| x == &a.ascii).unwrap();
+ let b_idx = phf_vec.keys().iter().position(|x| x == &b.ascii).unwrap();
+ if a_idx > b_idx {
+ // std::println!("{a:?} <=> {b:?} ({phf_vec:?})");
+ self.data.atbs_swap_ranges(
+ start,
+ start + a.local_length,
+ start + a.local_length + b.local_length,
+ );
+ branch_metas = branch_metas.swap_or_panic(l - 1, l);
+ start += b.local_length;
+ changes += 1;
+ // FIXME: fix the `length` field
+ } else {
+ start += a.local_length;
+ }
+ l -= 1;
+ }
+ if changes == 0 {
+ break;
+ }
+ }
+ Some(phf_vec)
+ } else {
+ None
+ };
+ // Write out the offset table
+ current_len = total_length;
+ const USIZE_BITS: usize = core::mem::size_of::<usize>() * 8;
+ let w = (USIZE_BITS - (total_length.leading_zeros() as usize) - 1) / 8;
+ if w > 3 && matches!(self.options.capacity_mode, CapacityMode::Normal) {
+ return Err(ZeroTrieBuildError::CapacityExceeded);
+ }
+ let mut k = 0;
+ while k <= w {
+ self.data.atbs_prepend_n_zeros(total_count - 1);
+ current_len += total_count - 1;
+ let mut l = 0;
+ let mut length_to_write = 0;
+ while l < total_count {
+ let BranchMeta { local_length, .. } = *branch_metas
+ .as_const_slice()
+ .get_or_panic(total_count - l - 1);
+ let mut adjusted_length = length_to_write;
+ let mut m = 0;
+ while m < k {
+ adjusted_length >>= 8;
+ m += 1;
+ }
+ if l > 0 {
+ self.data.atbs_bitor_assign(l - 1, adjusted_length as u8);
+ }
+ l += 1;
+ length_to_write += local_length;
+ }
+ k += 1;
+ }
+ // Write out the lookup table
+ assert!(0 < total_count && total_count <= 256);
+ let branch_value = (w << 8) + (total_count & 0xff);
+ if let Some(phf_vec) = opt_phf_vec {
+ self.data.atbs_extend_front(phf_vec.as_bytes());
+ let phf_len = phf_vec.as_bytes().len();
+ let branch_len = self.prepend_branch(branch_value);
+ current_len += phf_len + branch_len;
+ } else {
+ let search_len = self.prepend_slice(original_keys.as_slice());
+ let branch_len = self.prepend_branch(branch_value);
+ current_len += search_len + branch_len;
+ }
+ i = new_i;
+ j = new_j;
+ }
+ assert!(lengths_stack.is_empty());
+ Ok(current_len)
+ }
+}
+
+fn cmp_keys_values(
+ options: ZeroTrieBuilderOptions,
+ a: (&[u8], usize),
+ b: (&[u8], usize),
+) -> Ordering {
+ if matches!(options.case_sensitivity, CaseSensitivity::Sensitive) {
+ a.0.cmp(b.0)
+ } else {
+ let a_iter = a.0.iter().map(|x| x.to_ascii_lowercase());
+ let b_iter = b.0.iter().map(|x| x.to_ascii_lowercase());
+ Iterator::cmp(a_iter, b_iter)
+ }
+ .then_with(|| a.1.cmp(&b.1))
+}
diff --git a/vendor/zerotrie/src/builder/nonconst/mod.rs b/vendor/zerotrie/src/builder/nonconst/mod.rs
new file mode 100644
index 00000000..6ed78d71
--- /dev/null
+++ b/vendor/zerotrie/src/builder/nonconst/mod.rs
@@ -0,0 +1,9 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+mod builder;
+mod store;
+
+pub(crate) use builder::*;
+pub(crate) use store::TrieBuilderStore;
diff --git a/vendor/zerotrie/src/builder/nonconst/store.rs b/vendor/zerotrie/src/builder/nonconst/store.rs
new file mode 100644
index 00000000..741d5fef
--- /dev/null
+++ b/vendor/zerotrie/src/builder/nonconst/store.rs
@@ -0,0 +1,185 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! This module contains internal collections for the non-const builder.
+
+use super::super::branch_meta::BranchMeta;
+use super::super::konst::ConstArrayBuilder;
+use alloc::collections::VecDeque;
+use alloc::vec::Vec;
+
+/// A trait applied to a data structure for building a ZeroTrie.
+pub(crate) trait TrieBuilderStore {
+ /// Create a new empty store.
+ fn atbs_new_empty() -> Self;
+
+ /// Return the length in bytes of the store.
+ fn atbs_len(&self) -> usize;
+
+ /// Push a byte to the front of the store.
+ fn atbs_push_front(&mut self, byte: u8);
+
+ /// Push multiple bytes to the front of the store.
+ fn atbs_extend_front(&mut self, other: &[u8]);
+
+ /// Read the store into a `Vec<u8>`.
+ fn atbs_to_bytes(&self) -> Vec<u8>;
+
+ /// Perform the operation `self[index] |= bits`
+ fn atbs_bitor_assign(&mut self, index: usize, bits: u8);
+
+ /// Swap the adjacent ranges `self[start..mid]` and `self[mid..limit]`.
+ fn atbs_swap_ranges(&mut self, start: usize, mid: usize, limit: usize);
+
+ /// Remove and return the first element in the store, or `None` if empty.
+ fn atbs_pop_front(&mut self) -> Option<u8>;
+
+ /// Prepend `n` zeros to the front of the store.
+ fn atbs_prepend_n_zeros(&mut self, n: usize) {
+ let mut i = 0;
+ while i < n {
+ self.atbs_push_front(0);
+ i += 1;
+ }
+ }
+}
+
+impl TrieBuilderStore for VecDeque<u8> {
+ fn atbs_new_empty() -> Self {
+ VecDeque::new()
+ }
+ fn atbs_len(&self) -> usize {
+ self.len()
+ }
+ fn atbs_push_front(&mut self, byte: u8) {
+ self.push_front(byte);
+ }
+ fn atbs_extend_front(&mut self, other: &[u8]) {
+ // TODO: No extend_front on VecDeque?
+ self.reserve(other.len());
+ for b in other.iter().rev() {
+ self.push_front(*b);
+ }
+ }
+ fn atbs_to_bytes(&self) -> Vec<u8> {
+ let mut v = Vec::with_capacity(self.len());
+ let (a, b) = self.as_slices();
+ v.extend(a);
+ v.extend(b);
+ v
+ }
+ fn atbs_bitor_assign(&mut self, index: usize, bits: u8) {
+ self[index] |= bits;
+ }
+ fn atbs_swap_ranges(&mut self, mut start: usize, mut mid: usize, mut limit: usize) {
+ if start > mid || mid > limit {
+ panic!("Invalid args to atbs_swap_ranges(): start > mid || mid > limit");
+ }
+ if limit > self.len() {
+ panic!(
+ "Invalid args to atbs_swap_ranges(): limit out of range: {limit} > {}",
+ self.len()
+ );
+ }
+ // The following algorithm is an in-place swap of two adjacent ranges of potentially
+ // different lengths. Would make a good coding interview question.
+ loop {
+ if start == mid || mid == limit {
+ return;
+ }
+ let len0 = mid - start;
+ let len1 = limit - mid;
+ let mut i = start;
+ let mut j = limit - core::cmp::min(len0, len1);
+ while j < limit {
+ self.swap(i, j);
+ i += 1;
+ j += 1;
+ }
+ if len0 < len1 {
+ mid = start + len0;
+ limit -= len0;
+ } else {
+ start += len1;
+ mid = limit - len1;
+ }
+ }
+ }
+ fn atbs_pop_front(&mut self) -> Option<u8> {
+ self.pop_front()
+ }
+}
+
+/// A data structure that holds any number of [`BranchMeta`] items.
+pub(crate) struct NonConstLengthsStack {
+ data: Vec<BranchMeta>,
+}
+
+impl core::fmt::Debug for NonConstLengthsStack {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ self.as_slice().fmt(f)
+ }
+}
+
+impl NonConstLengthsStack {
+ /// Creates a new empty [`NonConstLengthsStack`].
+ pub const fn new() -> Self {
+ Self { data: Vec::new() }
+ }
+
+ /// Returns whether the stack is empty.
+ pub fn is_empty(&self) -> bool {
+ self.data.is_empty()
+ }
+
+ /// Adds a [`BranchMeta`] to the stack.
+ pub fn push(&mut self, meta: BranchMeta) {
+ self.data.push(meta);
+ }
+
+ /// Returns a copy of the [`BranchMeta`] on the top of the stack, panicking if
+ /// the stack is empty.
+ pub fn peek_or_panic(&self) -> BranchMeta {
+ *self.data.last().unwrap()
+ }
+
+ /// Removes many [`BranchMeta`]s from the stack, returning them in a [`ConstArrayBuilder`].
+ pub fn pop_many_or_panic(&mut self, len: usize) -> ConstArrayBuilder<256, BranchMeta> {
+ debug_assert!(len <= 256);
+ let mut result = ConstArrayBuilder::new_empty([BranchMeta::default(); 256], 256);
+ let mut ix = 0;
+ loop {
+ if ix == len {
+ break;
+ }
+ let i = self.data.len() - ix - 1;
+ // Won't panic because len <= 256
+ result = result.const_push_front_or_panic(match self.data.get(i) {
+ Some(x) => *x,
+ None => panic!("Not enough items in the ConstLengthsStack"),
+ });
+ ix += 1;
+ }
+ self.data.truncate(self.data.len() - len);
+ result
+ }
+
+ /// Non-const function that returns the initialized elements as a slice.
+ fn as_slice(&self) -> &[BranchMeta] {
+ &self.data
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_swap_ranges() {
+ let s = b"..abcdefghijkl=";
+ let mut s = s.iter().copied().collect::<VecDeque<u8>>();
+ s.atbs_swap_ranges(2, 7, 14);
+ assert_eq!(s.atbs_to_bytes(), b"..fghijklabcde=");
+ }
+}
diff --git a/vendor/zerotrie/src/byte_phf/builder.rs b/vendor/zerotrie/src/byte_phf/builder.rs
new file mode 100644
index 00000000..5c9330ee
--- /dev/null
+++ b/vendor/zerotrie/src/byte_phf/builder.rs
@@ -0,0 +1,208 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use crate::error::ZeroTrieBuildError;
+use alloc::vec;
+use alloc::vec::Vec;
+
+/// To speed up the search algorithm, we limit the number of times the level-2 parameter (q)
+/// can hit its max value (initially Q_FAST_MAX) before we try the next level-1 parameter (p).
+/// In practice, this has a small impact on the resulting perfect hash, resulting in about
+/// 1 in 10000 hash maps that fall back to the slow path.
+const MAX_L2_SEARCH_MISSES: usize = 24;
+
+/// Directly compute the perfect hash function.
+///
+/// Returns `(p, [q_0, q_1, ..., q_(N-1)])`, or an error if the PHF could not be computed.
+#[allow(unused_labels)] // for readability
+pub fn find(bytes: &[u8]) -> Result<(u8, Vec<u8>), ZeroTrieBuildError> {
+ let n_usize = bytes.len();
+
+ let mut p = 0u8;
+ let mut qq = vec![0u8; n_usize];
+
+ let mut bqs = vec![0u8; n_usize];
+ let mut seen = vec![false; n_usize];
+ let max_allowable_p = P_FAST_MAX;
+ let mut max_allowable_q = Q_FAST_MAX;
+
+ #[allow(non_snake_case)]
+ let N = if n_usize > 0 && n_usize < 256 {
+ n_usize as u8
+ } else {
+ debug_assert!(n_usize == 0 || n_usize == 256);
+ return Ok((p, qq));
+ };
+
+ 'p_loop: loop {
+ let mut buckets: Vec<(usize, Vec<u8>)> = (0..n_usize).map(|i| (i, vec![])).collect();
+ for byte in bytes {
+ let l1 = f1(*byte, p, N) as usize;
+ buckets[l1].1.push(*byte);
+ }
+ buckets.sort_by_key(|(_, v)| -(v.len() as isize));
+ // println!("New P: p={p:?}, buckets={buckets:?}");
+ let mut i = 0;
+ let mut num_max_q = 0;
+ bqs.fill(0);
+ seen.fill(false);
+ 'q_loop: loop {
+ if i == buckets.len() {
+ for (local_j, real_j) in buckets.iter().map(|(j, _)| *j).enumerate() {
+ qq[real_j] = bqs[local_j];
+ }
+ // println!("Success: p={p:?}, num_max_q={num_max_q:?}, bqs={bqs:?}, qq={qq:?}");
+ // if num_max_q > 0 {
+ // println!("num_max_q={num_max_q:?}");
+ // }
+ return Ok((p, qq));
+ }
+ let mut bucket = buckets[i].1.as_slice();
+ 'byte_loop: for (j, byte) in bucket.iter().enumerate() {
+ let l2 = f2(*byte, bqs[i], N) as usize;
+ if seen[l2] {
+ // println!("Skipping Q: p={p:?}, i={i:?}, byte={byte:}, q={i:?}, l2={:?}", f2(*byte, bqs[i], N));
+ for k_byte in &bucket[0..j] {
+ let l2 = f2(*k_byte, bqs[i], N) as usize;
+ assert!(seen[l2]);
+ seen[l2] = false;
+ }
+ 'reset_loop: loop {
+ if bqs[i] < max_allowable_q {
+ bqs[i] += 1;
+ continue 'q_loop;
+ }
+ num_max_q += 1;
+ bqs[i] = 0;
+ if i == 0 || num_max_q > MAX_L2_SEARCH_MISSES {
+ if p == max_allowable_p && max_allowable_q != Q_REAL_MAX {
+ // println!("Could not solve fast function: trying again: {bytes:?}");
+ max_allowable_q = Q_REAL_MAX;
+ p = 0;
+ continue 'p_loop;
+ } else if p == max_allowable_p {
+ // If a fallback algorithm for `p` is added, relax this assertion
+ // and re-run the loop with a higher `max_allowable_p`.
+ debug_assert_eq!(max_allowable_p, P_REAL_MAX);
+ // println!("Could not solve PHF function");
+ return Err(ZeroTrieBuildError::CouldNotSolvePerfectHash);
+ } else {
+ p += 1;
+ continue 'p_loop;
+ }
+ }
+ i -= 1;
+ bucket = buckets[i].1.as_slice();
+ for byte in bucket {
+ let l2 = f2(*byte, bqs[i], N) as usize;
+ assert!(seen[l2]);
+ seen[l2] = false;
+ }
+ }
+ } else {
+ // println!("Marking as seen: i={i:?}, byte={byte:}, l2={:?}", f2(*byte, bqs[i], N));
+ let l2 = f2(*byte, bqs[i], N) as usize;
+ seen[l2] = true;
+ }
+ }
+ // println!("Found Q: i={i:?}, q={:?}", bqs[i]);
+ i += 1;
+ }
+ }
+}
+
+impl PerfectByteHashMap<Vec<u8>> {
+ /// Computes a new [`PerfectByteHashMap`].
+ ///
+ /// (this is a doc-hidden API)
+ pub fn try_new(keys: &[u8]) -> Result<Self, ZeroTrieBuildError> {
+ let n_usize = keys.len();
+ let n = n_usize as u8;
+ let (p, mut qq) = find(keys)?;
+ let mut keys_permuted = vec![0; n_usize];
+ for key in keys {
+ let l1 = f1(*key, p, n) as usize;
+ let q = qq[l1];
+ let l2 = f2(*key, q, n) as usize;
+ keys_permuted[l2] = *key;
+ }
+ let mut result = Vec::with_capacity(n_usize * 2 + 1);
+ result.push(p);
+ result.append(&mut qq);
+ result.append(&mut keys_permuted);
+ Ok(Self(result))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ extern crate std;
+ use std::print;
+ use std::println;
+
+ fn print_byte_to_stdout(byte: u8) {
+ let c = char::from(byte);
+ if c.is_ascii_alphanumeric() {
+ print!("'{c}'");
+ } else {
+ print!("0x{byte:X}");
+ }
+ }
+
+ fn random_alphanums(seed: u64, len: usize) -> Vec<u8> {
+ use rand::seq::SliceRandom;
+ use rand::SeedableRng;
+ let mut bytes: Vec<u8> =
+ b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789".into();
+ let mut rng = rand_pcg::Lcg64Xsh32::seed_from_u64(seed);
+ bytes.partial_shuffle(&mut rng, len).0.into()
+ }
+
+ #[test]
+ fn test_random_distributions() {
+ let mut p_distr = vec![0; 256];
+ let mut q_distr = vec![0; 256];
+ for len in 0..50 {
+ for seed in 0..50 {
+ let bytes = random_alphanums(seed, len);
+ let (p, qq) = find(bytes.as_slice()).unwrap();
+ p_distr[p as usize] += 1;
+ for q in qq {
+ q_distr[q as usize] += 1;
+ }
+ }
+ }
+ println!("p_distr: {p_distr:?}");
+ println!("q_distr: {q_distr:?}");
+
+ let fast_p = p_distr[0..=P_FAST_MAX as usize].iter().sum::<usize>();
+ let slow_p = p_distr[(P_FAST_MAX + 1) as usize..].iter().sum::<usize>();
+ let fast_q = q_distr[0..=Q_FAST_MAX as usize].iter().sum::<usize>();
+ let slow_q = q_distr[(Q_FAST_MAX + 1) as usize..].iter().sum::<usize>();
+
+ assert_eq!(2500, fast_p);
+ assert_eq!(0, slow_p);
+ assert_eq!(61243, fast_q);
+ assert_eq!(7, slow_q);
+
+ let bytes = random_alphanums(0, 16);
+
+ #[allow(non_snake_case)]
+ let N = u8::try_from(bytes.len()).unwrap();
+
+ let (p, qq) = find(bytes.as_slice()).unwrap();
+
+ println!("Results:");
+ for byte in bytes.iter() {
+ print_byte_to_stdout(*byte);
+ let l1 = f1(*byte, p, N) as usize;
+ let q = qq[l1];
+ let l2 = f2(*byte, q, N) as usize;
+ println!(" => l1 {l1} => q {q} => l2 {l2}");
+ }
+ }
+}
diff --git a/vendor/zerotrie/src/byte_phf/cached_owned.rs b/vendor/zerotrie/src/byte_phf/cached_owned.rs
new file mode 100644
index 00000000..bcb1b0be
--- /dev/null
+++ b/vendor/zerotrie/src/byte_phf/cached_owned.rs
@@ -0,0 +1,39 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use super::*;
+use crate::error::ZeroTrieBuildError;
+use alloc::collections::btree_map::Entry;
+use alloc::collections::BTreeMap;
+use alloc::vec::Vec;
+
+/// Helper class for caching the results of multiple [`PerfectByteHashMap`] calculations.
+pub struct PerfectByteHashMapCacheOwned {
+ // Note: This should probably be a HashMap but that isn't in `alloc`
+ data: BTreeMap<Vec<u8>, PerfectByteHashMap<Vec<u8>>>,
+}
+
+impl PerfectByteHashMapCacheOwned {
+ /// Creates a new empty instance.
+ pub fn new_empty() -> Self {
+ Self {
+ data: BTreeMap::new(),
+ }
+ }
+
+ /// Gets the [`PerfectByteHashMap`] for the given bytes, calculating it if necessary.
+ pub fn try_get_or_insert(
+ &mut self,
+ keys: Vec<u8>,
+ ) -> Result<&PerfectByteHashMap<[u8]>, ZeroTrieBuildError> {
+ let mut_phf = match self.data.entry(keys) {
+ Entry::Vacant(entry) => {
+ let value = PerfectByteHashMap::try_new(entry.key())?;
+ entry.insert(value)
+ }
+ Entry::Occupied(entry) => entry.into_mut(),
+ };
+ Ok(mut_phf.as_borrowed())
+ }
+}
diff --git a/vendor/zerotrie/src/byte_phf/mod.rs b/vendor/zerotrie/src/byte_phf/mod.rs
new file mode 100644
index 00000000..4d82fed7
--- /dev/null
+++ b/vendor/zerotrie/src/byte_phf/mod.rs
@@ -0,0 +1,485 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#![allow(rustdoc::private_intra_doc_links)] // doc(hidden) module
+
+//! # Byte Perfect Hash Function Internals
+//!
+//! This module contains a perfect hash function (PHF) designed for a fast, compact perfect
+//! hash over 1 to 256 nodes (bytes).
+//!
+//! The PHF uses the following variables:
+//!
+//! 1. A single parameter `p`, which is 0 in about 98% of cases.
+//! 2. A list of `N` parameters `q_t`, one per _bucket_
+//! 3. The `N` keys in an arbitrary order determined by the PHF
+//!
+//! Reading a `key` from the PHF uses the following algorithm:
+//!
+//! 1. Let `t`, the bucket index, be `f1(key, p)`.
+//! 2. Let `i`, the key index, be `f2(key, q_t)`.
+//! 3. If `key == k_i`, return `Some(i)`; else return `None`.
+//!
+//! The functions [`f1`] and [`f2`] are internal to the PHF but should remain stable across
+//! serialization versions of `ZeroTrie`. They are very fast, constant-time operations as long
+//! as `p` <= [`P_FAST_MAX`] and `q` <= [`Q_FAST_MAX`]. In practice, nearly 100% of parameter
+//! values are in the fast range.
+//!
+//! ```
+//! use zerotrie::_internal::PerfectByteHashMap;
+//!
+//! let phf_example_bytes = [
+//! // `p` parameter
+//! 1, // `q` parameters, one for each of the N buckets
+//! 0, 0, 1, 1, // Exact keys to be compared with the input
+//! b'e', b'a', b'c', b'g',
+//! ];
+//!
+//! let phf = PerfectByteHashMap::from_bytes(&phf_example_bytes);
+//!
+//! // The PHF returns the index of the key or `None` if not found.
+//! assert_eq!(phf.get(b'a'), Some(1));
+//! assert_eq!(phf.get(b'b'), None);
+//! assert_eq!(phf.get(b'c'), Some(2));
+//! assert_eq!(phf.get(b'd'), None);
+//! assert_eq!(phf.get(b'e'), Some(0));
+//! assert_eq!(phf.get(b'f'), None);
+//! assert_eq!(phf.get(b'g'), Some(3));
+//! ```
+
+use crate::helpers::*;
+
+#[cfg(feature = "alloc")]
+mod builder;
+#[cfg(feature = "alloc")]
+mod cached_owned;
+
+#[cfg(feature = "alloc")]
+pub use cached_owned::PerfectByteHashMapCacheOwned;
+
+/// The cutoff for the fast version of [`f1`].
+#[cfg(feature = "alloc")] // used in the builder code
+const P_FAST_MAX: u8 = 95;
+
+/// The cutoff for the fast version of [`f2`].
+const Q_FAST_MAX: u8 = 95;
+
+/// The maximum allowable value of `p`. This could be raised if found to be necessary.
+/// Values exceeding P_FAST_MAX could use a different `p` algorithm by modifying [`f1`].
+#[cfg(feature = "alloc")] // used in the builder code
+const P_REAL_MAX: u8 = P_FAST_MAX;
+
+/// The maximum allowable value of `q`. This could be raised if found to be necessary.
+#[cfg(feature = "alloc")] // used in the builder code
+const Q_REAL_MAX: u8 = 127;
+
+/// Calculates the function `f1` for the PHF. For the exact formula, please read the code.
+///
+/// When `p == 0`, the operation is a simple modulus.
+///
+/// The argument `n` is used only for taking the modulus so that the return value is
+/// in the range `[0, n)`.
+///
+/// # Examples
+///
+/// ```
+/// use zerotrie::_internal::f1;
+/// const N: u8 = 10;
+///
+/// // With p = 0:
+/// assert_eq!(0, f1(0, 0, N));
+/// assert_eq!(1, f1(1, 0, N));
+/// assert_eq!(2, f1(2, 0, N));
+/// assert_eq!(9, f1(9, 0, N));
+/// assert_eq!(0, f1(10, 0, N));
+/// assert_eq!(1, f1(11, 0, N));
+/// assert_eq!(2, f1(12, 0, N));
+/// assert_eq!(9, f1(19, 0, N));
+///
+/// // With p = 1:
+/// assert_eq!(1, f1(0, 1, N));
+/// assert_eq!(0, f1(1, 1, N));
+/// assert_eq!(2, f1(2, 1, N));
+/// assert_eq!(2, f1(9, 1, N));
+/// assert_eq!(4, f1(10, 1, N));
+/// assert_eq!(5, f1(11, 1, N));
+/// assert_eq!(1, f1(12, 1, N));
+/// assert_eq!(7, f1(19, 1, N));
+/// ```
+#[inline]
+pub fn f1(byte: u8, p: u8, n: u8) -> u8 {
+ if n == 0 {
+ byte
+ } else if p == 0 {
+ byte % n
+ } else {
+ // `p` always uses the below constant-time operation. If needed, we
+ // could add some other operation here with `p > P_FAST_MAX` to solve
+ // difficult cases if the need arises.
+ let result = byte ^ p ^ byte.wrapping_shr(p as u32);
+ result % n
+ }
+}
+
+/// Calculates the function `f2` for the PHF. For the exact formula, please read the code.
+///
+/// When `q == 0`, the operation is a simple modulus.
+///
+/// The argument `n` is used only for taking the modulus so that the return value is
+/// in the range `[0, n)`.
+///
+/// # Examples
+///
+/// ```
+/// use zerotrie::_internal::f2;
+/// const N: u8 = 10;
+///
+/// // With q = 0:
+/// assert_eq!(0, f2(0, 0, N));
+/// assert_eq!(1, f2(1, 0, N));
+/// assert_eq!(2, f2(2, 0, N));
+/// assert_eq!(9, f2(9, 0, N));
+/// assert_eq!(0, f2(10, 0, N));
+/// assert_eq!(1, f2(11, 0, N));
+/// assert_eq!(2, f2(12, 0, N));
+/// assert_eq!(9, f2(19, 0, N));
+///
+/// // With q = 1:
+/// assert_eq!(1, f2(0, 1, N));
+/// assert_eq!(0, f2(1, 1, N));
+/// assert_eq!(3, f2(2, 1, N));
+/// assert_eq!(8, f2(9, 1, N));
+/// assert_eq!(1, f2(10, 1, N));
+/// assert_eq!(0, f2(11, 1, N));
+/// assert_eq!(3, f2(12, 1, N));
+/// assert_eq!(8, f2(19, 1, N));
+/// ```
+#[inline]
+pub fn f2(byte: u8, q: u8, n: u8) -> u8 {
+ if n == 0 {
+ return byte;
+ }
+ let mut result = byte ^ q;
+ // In almost all cases, the PHF works with the above constant-time operation.
+ // However, to crack a few difficult cases, we fall back to the linear-time
+ // operation shown below.
+ for _ in Q_FAST_MAX..q {
+ result = result ^ (result << 1) ^ (result >> 1);
+ }
+ result % n
+}
+
+/// A constant-time map from bytes to unique indices.
+///
+/// Uses a perfect hash function (see module-level documentation). Does not support mutation.
+///
+/// Standard layout: P, N bytes of Q, N bytes of expected keys
+#[derive(Debug, PartialEq, Eq)]
+#[repr(transparent)]
+pub struct PerfectByteHashMap<Store: ?Sized>(Store);
+
+impl<Store> PerfectByteHashMap<Store> {
+ /// Creates an instance from a pre-existing store. See [`Self::as_bytes`].
+ #[inline]
+ pub fn from_store(store: Store) -> Self {
+ Self(store)
+ }
+}
+
+impl<Store> PerfectByteHashMap<Store>
+where
+ Store: AsRef<[u8]> + ?Sized,
+{
+ /// Gets the usize for the given byte, or `None` if it is not in the map.
+ pub fn get(&self, key: u8) -> Option<usize> {
+ let (p, buffer) = self.0.as_ref().split_first()?;
+ // Note: there are N buckets followed by N keys
+ let n_usize = buffer.len() / 2;
+ if n_usize == 0 {
+ return None;
+ }
+ let n = n_usize as u8;
+ let (qq, eks) = buffer.debug_split_at(n_usize);
+ debug_assert_eq!(qq.len(), eks.len());
+ let l1 = f1(key, *p, n) as usize;
+ let q = debug_unwrap!(qq.get(l1), return None);
+ let l2 = f2(key, *q, n) as usize;
+ let ek = debug_unwrap!(eks.get(l2), return None);
+ if *ek == key {
+ Some(l2)
+ } else {
+ None
+ }
+ }
+ /// This is called `num_items` because `len` is ambiguous: it could refer
+ /// to the number of items or the number of bytes.
+ pub fn num_items(&self) -> usize {
+ self.0.as_ref().len() / 2
+ }
+ /// Get an iterator over the keys in the order in which they are stored in the map.
+ pub fn keys(&self) -> &[u8] {
+ let n = self.num_items();
+ self.0.as_ref().debug_split_at(1 + n).1
+ }
+ /// Diagnostic function that returns `p` and the maximum value of `q`
+ #[cfg(test)]
+ pub fn p_qmax(&self) -> Option<(u8, u8)> {
+ let (p, buffer) = self.0.as_ref().split_first()?;
+ let n = buffer.len() / 2;
+ if n == 0 {
+ return None;
+ }
+ let (qq, _) = buffer.debug_split_at(n);
+ Some((*p, *qq.iter().max().unwrap()))
+ }
+ /// Returns the map as bytes. The map can be recovered with [`Self::from_store`]
+ /// or [`Self::from_bytes`].
+ pub fn as_bytes(&self) -> &[u8] {
+ self.0.as_ref()
+ }
+
+ #[cfg(all(feature = "alloc", test))]
+ pub(crate) fn check(&self) -> Result<(), (&'static str, u8)> {
+ use alloc::vec;
+ let len = self.num_items();
+ let mut seen = vec![false; len];
+ for b in 0..=255u8 {
+ let get_result = self.get(b);
+ if self.keys().contains(&b) {
+ let i = get_result.ok_or(("expected to find", b))?;
+ if seen[i] {
+ return Err(("seen", b));
+ }
+ seen[i] = true;
+ } else if get_result.is_some() {
+ return Err(("did not expect to find", b));
+ }
+ }
+ Ok(())
+ }
+}
+
+impl PerfectByteHashMap<[u8]> {
+ /// Creates an instance from pre-existing bytes. See [`Self::as_bytes`].
+ #[inline]
+ pub fn from_bytes(bytes: &[u8]) -> &Self {
+ // Safety: Self is repr(transparent) over [u8]
+ unsafe { core::mem::transmute(bytes) }
+ }
+}
+
+impl<Store> PerfectByteHashMap<Store>
+where
+ Store: AsRef<[u8]> + ?Sized,
+{
+ /// Converts from `PerfectByteHashMap<AsRef<[u8]>>` to `&PerfectByteHashMap<[u8]>`
+ #[inline]
+ pub fn as_borrowed(&self) -> &PerfectByteHashMap<[u8]> {
+ PerfectByteHashMap::from_bytes(self.0.as_ref())
+ }
+}
+
+#[cfg(all(test, feature = "alloc"))]
+mod tests {
+ use super::*;
+ use alloc::vec::Vec;
+ extern crate std;
+
+ fn random_alphanums(seed: u64, len: usize) -> Vec<u8> {
+ use rand::seq::SliceRandom;
+ use rand::SeedableRng;
+
+ let mut bytes: Vec<u8> =
+ b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789".into();
+ let mut rng = rand_pcg::Lcg64Xsh32::seed_from_u64(seed);
+ bytes.partial_shuffle(&mut rng, len).0.into()
+ }
+
+ #[test]
+ fn test_smaller() {
+ let mut count_by_p = [0; 256];
+ let mut count_by_qmax = [0; 256];
+ for len in 1..16 {
+ for seed in 0..150 {
+ let keys = random_alphanums(seed, len);
+ let keys_str = core::str::from_utf8(&keys).unwrap();
+ let computed = PerfectByteHashMap::try_new(&keys).expect(keys_str);
+ computed
+ .check()
+ .unwrap_or_else(|_| panic!("{}", std::str::from_utf8(&keys).expect(keys_str)));
+ let (p, qmax) = computed.p_qmax().unwrap();
+ count_by_p[p as usize] += 1;
+ count_by_qmax[qmax as usize] += 1;
+ }
+ }
+ std::println!("count_by_p (smaller): {count_by_p:?}");
+ std::println!("count_by_qmax (smaller): {count_by_qmax:?}");
+ let count_fastq = count_by_qmax[0..=Q_FAST_MAX as usize].iter().sum::<usize>();
+ let count_slowq = count_by_qmax[Q_FAST_MAX as usize + 1..]
+ .iter()
+ .sum::<usize>();
+ std::println!("fastq/slowq: {count_fastq}/{count_slowq}");
+ // Assert that 99% of cases resolve to the fast hash
+ assert!(count_fastq >= count_slowq * 100);
+ }
+
+ #[test]
+ fn test_larger() {
+ let mut count_by_p = [0; 256];
+ let mut count_by_qmax = [0; 256];
+ for len in 16..60 {
+ for seed in 0..75 {
+ let keys = random_alphanums(seed, len);
+ let keys_str = core::str::from_utf8(&keys).unwrap();
+ let computed = PerfectByteHashMap::try_new(&keys).expect(keys_str);
+ computed
+ .check()
+ .unwrap_or_else(|_| panic!("{}", std::str::from_utf8(&keys).expect(keys_str)));
+ let (p, qmax) = computed.p_qmax().unwrap();
+ count_by_p[p as usize] += 1;
+ count_by_qmax[qmax as usize] += 1;
+ }
+ }
+ std::println!("count_by_p (larger): {count_by_p:?}");
+ std::println!("count_by_qmax (larger): {count_by_qmax:?}");
+ let count_fastq = count_by_qmax[0..=Q_FAST_MAX as usize].iter().sum::<usize>();
+ let count_slowq = count_by_qmax[Q_FAST_MAX as usize + 1..]
+ .iter()
+ .sum::<usize>();
+ std::println!("fastq/slowq: {count_fastq}/{count_slowq}");
+ // Assert that 99% of cases resolve to the fast hash
+ assert!(count_fastq >= count_slowq * 100);
+ }
+
+ #[test]
+ fn test_hard_cases() {
+ let keys = [
+ 0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
+ 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
+ 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
+ 126, 195, 196,
+ ];
+
+ let computed = PerfectByteHashMap::try_new(&keys).unwrap();
+ let (p, qmax) = computed.p_qmax().unwrap();
+ assert_eq!(p, 69);
+ assert_eq!(qmax, 67);
+ }
+
+ #[test]
+ fn test_build_read_small() {
+ #[derive(Debug)]
+ struct TestCase<'a> {
+ keys: &'a str,
+ expected: &'a [u8],
+ reordered_keys: &'a str,
+ }
+ let cases = [
+ TestCase {
+ keys: "ab",
+ expected: &[0, 0, 0, b'b', b'a'],
+ reordered_keys: "ba",
+ },
+ TestCase {
+ keys: "abc",
+ expected: &[0, 0, 0, 0, b'c', b'a', b'b'],
+ reordered_keys: "cab",
+ },
+ TestCase {
+ // Note: splitting "a" and "c" into different buckets requires the heavier hash
+ // function because the difference between "a" and "c" is the period (2).
+ keys: "ac",
+ expected: &[1, 0, 1, b'c', b'a'],
+ reordered_keys: "ca",
+ },
+ TestCase {
+ keys: "aceg",
+ expected: &[1, 0, 0, 1, 1, b'e', b'a', b'c', b'g'],
+ reordered_keys: "eacg",
+ },
+ TestCase {
+ keys: "abd",
+ expected: &[0, 0, 1, 3, b'a', b'b', b'd'],
+ reordered_keys: "abd",
+ },
+ TestCase {
+ keys: "def",
+ expected: &[0, 0, 0, 0, b'f', b'd', b'e'],
+ reordered_keys: "fde",
+ },
+ TestCase {
+ keys: "fi",
+ expected: &[0, 0, 0, b'f', b'i'],
+ reordered_keys: "fi",
+ },
+ TestCase {
+ keys: "gh",
+ expected: &[0, 0, 0, b'h', b'g'],
+ reordered_keys: "hg",
+ },
+ TestCase {
+ keys: "lm",
+ expected: &[0, 0, 0, b'l', b'm'],
+ reordered_keys: "lm",
+ },
+ TestCase {
+ // Note: "a" and "q" (0x61 and 0x71) are very hard to split; only a handful of
+ // hash function crates can get them into separate buckets.
+ keys: "aq",
+ expected: &[4, 0, 1, b'a', b'q'],
+ reordered_keys: "aq",
+ },
+ TestCase {
+ keys: "xy",
+ expected: &[0, 0, 0, b'x', b'y'],
+ reordered_keys: "xy",
+ },
+ TestCase {
+ keys: "xyz",
+ expected: &[0, 0, 0, 0, b'x', b'y', b'z'],
+ reordered_keys: "xyz",
+ },
+ TestCase {
+ keys: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+ expected: &[
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 10, 12, 16, 4, 4, 4, 4, 4, 4, 8, 4, 4, 4, 16,
+ 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+ 2, 0, 7, 104, 105, 106, 107, 108, 109, 110, 111, 112, 117, 118, 119, 68, 69,
+ 70, 113, 114, 65, 66, 67, 120, 121, 122, 115, 72, 73, 74, 71, 80, 81, 82, 83,
+ 84, 85, 86, 87, 88, 89, 90, 75, 76, 77, 78, 79, 103, 97, 98, 99, 116, 100, 102,
+ 101,
+ ],
+ reordered_keys: "hijklmnopuvwDEFqrABCxyzsHIJGPQRSTUVWXYZKLMNOgabctdfe",
+ },
+ TestCase {
+ keys: "abcdefghij",
+ expected: &[
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 101, 102, 103, 104, 105, 106, 97, 98, 99,
+ ],
+ reordered_keys: "defghijabc",
+ },
+ TestCase {
+ // This is a small case that resolves to the slow hasher
+ keys: "Jbej",
+ expected: &[2, 0, 0, 102, 0, b'j', b'e', b'b', b'J'],
+ reordered_keys: "jebJ",
+ },
+ TestCase {
+ // This is another small case that resolves to the slow hasher
+ keys: "JFNv",
+ expected: &[1, 98, 0, 2, 0, b'J', b'F', b'N', b'v'],
+ reordered_keys: "JFNv",
+ },
+ ];
+ for cas in cases {
+ let computed = PerfectByteHashMap::try_new(cas.keys.as_bytes()).expect(cas.keys);
+ assert_eq!(computed.as_bytes(), cas.expected, "{:?}", cas);
+ assert_eq!(computed.keys(), cas.reordered_keys.as_bytes(), "{:?}", cas);
+ computed.check().expect(cas.keys);
+ }
+ }
+}
diff --git a/vendor/zerotrie/src/cursor.rs b/vendor/zerotrie/src/cursor.rs
new file mode 100644
index 00000000..ebcb97b0
--- /dev/null
+++ b/vendor/zerotrie/src/cursor.rs
@@ -0,0 +1,491 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Types for walking stepwise through a trie.
+//!
+//! For examples, see the `.cursor()` functions
+//! and the `Cursor` types in this module.
+
+use crate::reader;
+use crate::ZeroAsciiIgnoreCaseTrie;
+use crate::ZeroTrieSimpleAscii;
+
+use core::fmt;
+
+impl<Store> ZeroTrieSimpleAscii<Store>
+where
+ Store: AsRef<[u8]> + ?Sized,
+{
+ /// Gets a cursor into the current trie.
+ ///
+ /// Useful to query a trie with data that is not a slice.
+ ///
+ /// This is currently supported only on [`ZeroTrieSimpleAscii`]
+ /// and [`ZeroAsciiIgnoreCaseTrie`].
+ ///
+ /// # Examples
+ ///
+ /// Get a value out of a trie by [writing](fmt::Write) it to the cursor:
+ ///
+ /// ```
+ /// use core::fmt::Write;
+ /// use zerotrie::ZeroTrieSimpleAscii;
+ ///
+ /// // A trie with two values: "abc" and "abcdef"
+ /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+ ///
+ /// // Get out the value for "abc"
+ /// let mut cursor = trie.cursor();
+ /// write!(&mut cursor, "abc");
+ /// assert_eq!(cursor.take_value(), Some(0));
+ /// ```
+ ///
+ /// Find the longest prefix match:
+ ///
+ /// ```
+ /// use zerotrie::ZeroTrieSimpleAscii;
+ ///
+ /// // A trie with two values: "abc" and "abcdef"
+ /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+ ///
+ /// // Find the longest prefix of the string "abcdxy":
+ /// let query = b"abcdxy";
+ /// let mut longest_prefix = 0;
+ /// let mut cursor = trie.cursor();
+ /// for (i, b) in query.iter().enumerate() {
+ /// // Checking is_empty() is not required, but it is
+ /// // good for efficiency
+ /// if cursor.is_empty() {
+ /// break;
+ /// }
+ /// if cursor.take_value().is_some() {
+ /// longest_prefix = i;
+ /// }
+ /// cursor.step(*b);
+ /// }
+ ///
+ /// // The longest prefix is "abc" which is length 3:
+ /// assert_eq!(longest_prefix, 3);
+ /// ```
+ #[inline]
+ pub fn cursor(&self) -> ZeroTrieSimpleAsciiCursor {
+ ZeroTrieSimpleAsciiCursor {
+ trie: self.as_borrowed_slice(),
+ }
+ }
+}
+
+impl<Store> ZeroAsciiIgnoreCaseTrie<Store>
+where
+ Store: AsRef<[u8]> + ?Sized,
+{
+ /// Gets a cursor into the current trie.
+ ///
+ /// Useful to query a trie with data that is not a slice.
+ ///
+ /// This is currently supported only on [`ZeroTrieSimpleAscii`]
+ /// and [`ZeroAsciiIgnoreCaseTrie`].
+ ///
+ /// # Examples
+ ///
+ /// Get a value out of a trie by [writing](fmt::Write) it to the cursor:
+ ///
+ /// ```
+ /// use core::fmt::Write;
+ /// use zerotrie::ZeroAsciiIgnoreCaseTrie;
+ ///
+ /// // A trie with two values: "aBc" and "aBcdEf"
+ /// let trie = ZeroAsciiIgnoreCaseTrie::from_bytes(b"aBc\x80dEf\x81");
+ ///
+ /// // Get out the value for "abc" (case-insensitive!)
+ /// let mut cursor = trie.cursor();
+ /// write!(&mut cursor, "abc");
+ /// assert_eq!(cursor.take_value(), Some(0));
+ /// ```
+ ///
+ /// For more examples, see [`ZeroTrieSimpleAscii::cursor`].
+ #[inline]
+ pub fn cursor(&self) -> ZeroAsciiIgnoreCaseTrieCursor {
+ ZeroAsciiIgnoreCaseTrieCursor {
+ trie: self.as_borrowed_slice(),
+ }
+ }
+}
+
+impl<'a> ZeroTrieSimpleAscii<&'a [u8]> {
+ /// Same as [`ZeroTrieSimpleAscii::cursor()`] but moves self to avoid
+ /// having to doubly anchor the trie to the stack.
+ #[inline]
+ pub fn into_cursor(self) -> ZeroTrieSimpleAsciiCursor<'a> {
+ ZeroTrieSimpleAsciiCursor { trie: self }
+ }
+}
+
+impl<'a> ZeroAsciiIgnoreCaseTrie<&'a [u8]> {
+ /// Same as [`ZeroAsciiIgnoreCaseTrie::cursor()`] but moves self to avoid
+ /// having to doubly anchor the trie to the stack.
+ #[inline]
+ pub fn into_cursor(self) -> ZeroAsciiIgnoreCaseTrieCursor<'a> {
+ ZeroAsciiIgnoreCaseTrieCursor { trie: self }
+ }
+}
+
+/// A cursor into a [`ZeroTrieSimpleAscii`], useful for stepwise lookup.
+///
+/// For examples, see [`ZeroTrieSimpleAscii::cursor()`].
+// Clone but not Copy: <https://stackoverflow.com/q/32324251/1407170>
+#[derive(Debug, Clone)]
+pub struct ZeroTrieSimpleAsciiCursor<'a> {
+ trie: ZeroTrieSimpleAscii<&'a [u8]>,
+}
+
+/// A cursor into a [`ZeroAsciiIgnoreCaseTrie`], useful for stepwise lookup.
+///
+/// For examples, see [`ZeroAsciiIgnoreCaseTrie::cursor()`].
+// Clone but not Copy: <https://stackoverflow.com/q/32324251/1407170>
+#[derive(Debug, Clone)]
+pub struct ZeroAsciiIgnoreCaseTrieCursor<'a> {
+ trie: ZeroAsciiIgnoreCaseTrie<&'a [u8]>,
+}
+
+/// Information about a probed edge.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+#[non_exhaustive] // no need to destructure or construct this in userland
+pub struct AsciiProbeResult {
+ /// The character's byte value between this node and its parent.
+ pub byte: u8,
+ /// The number of siblings of this node, _including itself_.
+ pub total_siblings: u8,
+}
+
+impl ZeroTrieSimpleAsciiCursor<'_> {
+ /// Steps the cursor one character into the trie based on the character's byte value.
+ ///
+ /// # Examples
+ ///
+ /// Unrolled loop checking for string presence at every step:
+ ///
+ /// ```
+ /// use zerotrie::ZeroTrieSimpleAscii;
+ ///
+ /// // A trie with two values: "abc" and "abcdef"
+ /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+ ///
+ /// // Search the trie for the string "abcdxy"
+ /// let mut cursor = trie.cursor();
+ /// assert_eq!(cursor.take_value(), None); // ""
+ /// cursor.step(b'a');
+ /// assert_eq!(cursor.take_value(), None); // "a"
+ /// cursor.step(b'b');
+ /// assert_eq!(cursor.take_value(), None); // "ab"
+ /// cursor.step(b'c');
+ /// assert_eq!(cursor.take_value(), Some(0)); // "abc"
+ /// cursor.step(b'd');
+ /// assert_eq!(cursor.take_value(), None); // "abcd"
+ /// assert!(!cursor.is_empty());
+ /// cursor.step(b'x'); // no strings have the prefix "abcdx"
+ /// assert!(cursor.is_empty());
+ /// assert_eq!(cursor.take_value(), None); // "abcdx"
+ /// cursor.step(b'y');
+ /// assert_eq!(cursor.take_value(), None); // "abcdxy"
+ /// ```
+ ///
+ /// If the byte is not ASCII, the cursor will become empty:
+ ///
+ /// ```
+ /// use zerotrie::ZeroTrieSimpleAscii;
+ ///
+ /// // A trie with two values: "abc" and "abcdef"
+ /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+ ///
+ /// let mut cursor = trie.cursor();
+ /// assert_eq!(cursor.take_value(), None); // ""
+ /// cursor.step(b'a');
+ /// assert_eq!(cursor.take_value(), None); // "a"
+ /// cursor.step(b'b');
+ /// assert_eq!(cursor.take_value(), None); // "ab"
+ /// cursor.step(b'\xFF');
+ /// assert!(cursor.is_empty());
+ /// assert_eq!(cursor.take_value(), None);
+ /// ```
+ #[inline]
+ pub fn step(&mut self, byte: u8) {
+ reader::step_parameterized::<ZeroTrieSimpleAscii<[u8]>>(&mut self.trie.store, byte);
+ }
+
+ /// Takes the value at the current position.
+ ///
+ /// Calling this function on a new cursor is equivalent to calling `.get()`
+ /// with the empty string (except that it can only be called once).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use zerotrie::ZeroTrieSimpleAscii;
+ ///
+ /// // A trie with two values: "" and "abc"
+ /// let trie = ZeroTrieSimpleAscii::from_bytes(b"\x80abc\x81");
+ ///
+ /// assert_eq!(Some(0), trie.get(""));
+ /// let mut cursor = trie.cursor();
+ /// assert_eq!(Some(0), cursor.take_value());
+ /// assert_eq!(None, cursor.take_value());
+ /// ```
+ #[inline]
+ pub fn take_value(&mut self) -> Option<usize> {
+ reader::take_value(&mut self.trie.store)
+ }
+
+ /// Steps the cursor one character into the trie based on an edge index,
+ /// returning the corresponding character as a byte.
+ ///
+ /// This function is similar to [`Self::step()`], but it takes an index instead of a char.
+ /// This enables stepwise iteration over the contents of the trie.
+ ///
+ /// If there are multiple possibilities for the next byte, the `index` argument allows
+ /// visiting them in order. Since this function steps the cursor, the cursor must be
+ /// cloned (a cheap operation) in order to visit multiple children.
+ ///
+ /// # Examples
+ ///
+ /// Continually query index 0 to extract the first item from a trie:
+ ///
+ /// ```
+ /// use zerotrie::ZeroTrieSimpleAscii;
+ ///
+ /// let data: &[(String, usize)] = &[
+ /// ("ab".to_string(), 111),
+ /// ("abcxyz".to_string(), 22),
+ /// ("abde".to_string(), 333),
+ /// ("afg".to_string(), 44),
+ /// ];
+ ///
+ /// let trie: ZeroTrieSimpleAscii<Vec<u8>> =
+ /// data.iter().map(|(s, v)| (s.as_str(), *v)).collect();
+ ///
+ /// let mut cursor = trie.cursor();
+ /// let mut key = String::new();
+ /// let value = loop {
+ /// if let Some(value) = cursor.take_value() {
+ /// break value;
+ /// }
+ /// let probe_result = cursor.probe(0).unwrap();
+ /// key.push(char::from(probe_result.byte));
+ /// };
+ ///
+ /// assert_eq!(key, "ab");
+ /// assert_eq!(value, 111);
+ /// ```
+ ///
+ /// Stepwise iterate over all entries in the trie:
+ ///
+ /// ```
+ /// # use zerotrie::ZeroTrieSimpleAscii;
+ /// # let data: &[(String, usize)] = &[
+ /// # ("ab".to_string(), 111),
+ /// # ("abcxyz".to_string(), 22),
+ /// # ("abde".to_string(), 333),
+ /// # ("afg".to_string(), 44)
+ /// # ];
+ /// # let trie: ZeroTrieSimpleAscii<Vec<u8>> = data
+ /// # .iter()
+ /// # .map(|(s, v)| (s.as_str(), *v))
+ /// # .collect();
+ /// // (trie built as in previous example)
+ ///
+ /// // Initialize the iteration at the first child of the trie.
+ /// let mut stack = Vec::from([(trie.cursor(), 0, 0)]);
+ /// let mut key = Vec::new();
+ /// let mut results = Vec::new();
+ /// loop {
+ /// let Some((mut cursor, index, suffix_len)) = stack.pop() else {
+ /// // Nothing left in the trie.
+ /// break;
+ /// };
+ /// // Check to see if there is a value at the current node.
+ /// if let Some(value) = cursor.take_value() {
+ /// results.push((String::from_utf8(key.clone()).unwrap(), value));
+ /// }
+ /// // Now check for children of the current node.
+ /// let mut sub_cursor = cursor.clone();
+ /// if let Some(probe_result) = sub_cursor.probe(index) {
+ /// // Found a child. Add the current byte edge to the key.
+ /// key.push(probe_result.byte);
+ /// // Add the child to the stack, and also add back the current
+ /// // node if there are more siblings to visit.
+ /// if index + 1 < probe_result.total_siblings as usize {
+ /// stack.push((cursor, index + 1, suffix_len));
+ /// stack.push((sub_cursor, 0, 1));
+ /// } else {
+ /// stack.push((sub_cursor, 0, suffix_len + 1));
+ /// }
+ /// } else {
+ /// // No more children. Pop this node's bytes from the key.
+ /// for _ in 0..suffix_len {
+ /// key.pop();
+ /// }
+ /// }
+ /// }
+ ///
+ /// assert_eq!(&results, data);
+ /// ```
+ pub fn probe(&mut self, index: usize) -> Option<AsciiProbeResult> {
+ reader::probe_parameterized::<ZeroTrieSimpleAscii<[u8]>>(&mut self.trie.store, index)
+ }
+
+ /// Checks whether the cursor points to an empty trie.
+ ///
+ /// Use this to determine when to stop iterating.
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.trie.is_empty()
+ }
+}
+
+impl ZeroAsciiIgnoreCaseTrieCursor<'_> {
+ /// Steps the cursor one byte into the trie.
+ ///
+ /// Returns the byte if matched, which may be a different case than the input byte.
+ /// If this function returns `None`, any lookup loops can be terminated.
+ ///
+ /// # Examples
+ ///
+ /// Normalize the case of a value by stepping through an ignore-case trie:
+ ///
+ /// ```
+ /// use std::borrow::Cow;
+ /// use zerotrie::ZeroAsciiIgnoreCaseTrie;
+ ///
+ /// // A trie with two values: "aBc" and "aBcdEf"
+ /// let trie = ZeroAsciiIgnoreCaseTrie::from_bytes(b"aBc\x80dEf\x81");
+ ///
+ /// // Get out the value for "abc" and normalize the key string
+ /// let mut cursor = trie.cursor();
+ /// let mut key_str = Cow::Borrowed("abc".as_bytes());
+ /// let mut i = 0;
+ /// let value = loop {
+ /// let Some(&input_byte) = key_str.get(i) else {
+ /// break cursor.take_value();
+ /// };
+ /// let Some(matched_byte) = cursor.step(input_byte) else {
+ /// break None;
+ /// };
+ /// if matched_byte != input_byte {
+ /// key_str.to_mut()[i] = matched_byte;
+ /// }
+ /// i += 1;
+ /// };
+ ///
+ /// assert_eq!(value, Some(0));
+ /// assert_eq!(&*key_str, "aBc".as_bytes());
+ /// ```
+ ///
+ /// For more examples, see [`ZeroTrieSimpleAsciiCursor::step`].
+ #[inline]
+ pub fn step(&mut self, byte: u8) -> Option<u8> {
+ reader::step_parameterized::<ZeroAsciiIgnoreCaseTrie<[u8]>>(&mut self.trie.store, byte)
+ }
+
+ /// Takes the value at the current position.
+ ///
+ /// For more details, see [`ZeroTrieSimpleAsciiCursor::take_value`].
+ #[inline]
+ pub fn take_value(&mut self) -> Option<usize> {
+ reader::take_value(&mut self.trie.store)
+ }
+
+ /// Probes the next byte in the cursor.
+ ///
+ /// For more details, see [`ZeroTrieSimpleAsciiCursor::probe`].
+ pub fn probe(&mut self, index: usize) -> Option<AsciiProbeResult> {
+ reader::probe_parameterized::<ZeroAsciiIgnoreCaseTrie<[u8]>>(&mut self.trie.store, index)
+ }
+
+ /// Checks whether the cursor points to an empty trie.
+ ///
+ /// For more details, see [`ZeroTrieSimpleAsciiCursor::is_empty`].
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.trie.is_empty()
+ }
+}
+
+impl fmt::Write for ZeroTrieSimpleAsciiCursor<'_> {
+ /// Steps the cursor through each ASCII byte of the string.
+ ///
+ /// If the string contains non-ASCII chars, an error is returned.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use core::fmt::Write;
+ /// use zerotrie::ZeroTrieSimpleAscii;
+ ///
+ /// // A trie with two values: "abc" and "abcdef"
+ /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+ ///
+ /// let mut cursor = trie.cursor();
+ /// cursor.write_str("abcdxy").expect("all ASCII");
+ /// cursor.write_str("🚂").expect_err("non-ASCII");
+ /// ```
+ fn write_str(&mut self, s: &str) -> fmt::Result {
+ for b in s.bytes() {
+ if !b.is_ascii() {
+ return Err(fmt::Error);
+ }
+ self.step(b);
+ }
+ Ok(())
+ }
+
+ /// Equivalent to [`ZeroTrieSimpleAsciiCursor::step()`], except returns
+ /// an error if the char is non-ASCII.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use core::fmt::Write;
+ /// use zerotrie::ZeroTrieSimpleAscii;
+ ///
+ /// // A trie with two values: "abc" and "abcdef"
+ /// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+ ///
+ /// let mut cursor = trie.cursor();
+ /// cursor.write_char('a').expect("ASCII");
+ /// cursor.write_char('x').expect("ASCII");
+ /// cursor.write_char('🚂').expect_err("non-ASCII");
+ /// ```
+ fn write_char(&mut self, c: char) -> fmt::Result {
+ if !c.is_ascii() {
+ return Err(fmt::Error);
+ }
+ self.step(c as u8);
+ Ok(())
+ }
+}
+
+impl fmt::Write for ZeroAsciiIgnoreCaseTrieCursor<'_> {
+ /// Steps the cursor through each ASCII byte of the string.
+ ///
+ /// If the string contains non-ASCII chars, an error is returned.
+ fn write_str(&mut self, s: &str) -> fmt::Result {
+ for b in s.bytes() {
+ if !b.is_ascii() {
+ return Err(fmt::Error);
+ }
+ self.step(b);
+ }
+ Ok(())
+ }
+
+ /// Equivalent to [`ZeroAsciiIgnoreCaseTrieCursor::step()`], except returns
+ /// an error if the char is non-ASCII.
+ fn write_char(&mut self, c: char) -> fmt::Result {
+ if !c.is_ascii() {
+ return Err(fmt::Error);
+ }
+ self.step(c as u8);
+ Ok(())
+ }
+}
diff --git a/vendor/zerotrie/src/error.rs b/vendor/zerotrie/src/error.rs
new file mode 100644
index 00000000..9ef5b86e
--- /dev/null
+++ b/vendor/zerotrie/src/error.rs
@@ -0,0 +1,25 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use displaydoc::Display;
+
+/// Error types for the `zerotrie` crate.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Display)]
+#[non_exhaustive]
+pub enum ZeroTrieBuildError {
+ /// Non-ASCII data was added to an ASCII-only trie.
+ #[displaydoc("Non-ASCII cannot be added to an ASCII-only trie")]
+ NonAsciiError,
+ /// The trie reached its maximum supported capacity.
+ #[displaydoc("Reached maximum capacity of trie")]
+ CapacityExceeded,
+ /// The builder could not solve the perfect hash function.
+ #[displaydoc("Failed to solve the perfect hash function. This is rare! Please report your case to the ICU4X team.")]
+ CouldNotSolvePerfectHash,
+ /// Mixed-case data was added to a case-insensitive trie.
+ #[displaydoc("Mixed-case data added to case-insensitive trie")]
+ MixedCase,
+}
+
+impl core::error::Error for ZeroTrieBuildError {}
diff --git a/vendor/zerotrie/src/helpers.rs b/vendor/zerotrie/src/helpers.rs
new file mode 100644
index 00000000..33440b97
--- /dev/null
+++ b/vendor/zerotrie/src/helpers.rs
@@ -0,0 +1,119 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+pub(crate) trait MaybeSplitAt<T> {
+ /// Like slice::split_at but debug-panics and returns an empty second slice
+ /// if the index is out of range.
+ fn debug_split_at(&self, mid: usize) -> (&Self, &Self);
+}
+
+impl<T> MaybeSplitAt<T> for [T] {
+ #[inline]
+ fn debug_split_at(&self, mid: usize) -> (&Self, &Self) {
+ self.split_at_checked(mid).unwrap_or_else(|| {
+ debug_assert!(false, "debug_split_at: {mid} expected to be in range");
+ (self, &[])
+ })
+ }
+}
+
+pub(crate) trait DebugUnwrapOr<T> {
+ /// Unwraps the option or panics in debug mode, returning the `gigo_value`
+ fn debug_unwrap_or(self, gigo_value: T) -> T;
+}
+
+impl<T> DebugUnwrapOr<T> for Option<T> {
+ #[inline]
+ fn debug_unwrap_or(self, gigo_value: T) -> T {
+ match self {
+ Some(x) => x,
+ None => {
+ debug_assert!(false, "debug_unwrap_or called on a None value");
+ gigo_value
+ }
+ }
+ }
+}
+
+macro_rules! debug_unwrap {
+ ($expr:expr, return $retval:expr, $($arg:tt)+) => {
+ match $expr {
+ Some(x) => x,
+ None => {
+ debug_assert!(false, $($arg)*);
+ return $retval;
+ }
+ }
+ };
+ ($expr:expr, return $retval:expr) => {
+ debug_unwrap!($expr, return $retval, "invalid trie")
+ };
+ ($expr:expr, break, $($arg:tt)+) => {
+ match $expr {
+ Some(x) => x,
+ None => {
+ debug_assert!(false, $($arg)*);
+ break;
+ }
+ }
+ };
+ ($expr:expr, break) => {
+ debug_unwrap!($expr, break, "invalid trie")
+ };
+ ($expr:expr, $($arg:tt)+) => {
+ debug_unwrap!($expr, return (), $($arg)*)
+ };
+ ($expr:expr) => {
+ debug_unwrap!($expr, return ())
+ };
+}
+
+pub(crate) use debug_unwrap;
+
+/// The maximum number of base-10 digits required for rendering a usize.
+/// Note: 24/10 is an approximation of 8*log10(2)
+pub(crate) const MAX_USIZE_LEN_AS_DIGITS: usize = core::mem::size_of::<usize>() * 24 / 10 + 1;
+
+/// Formats a usize as a string of length N, padded with spaces,
+/// with the given prefix.
+///
+/// If the string is too short, the function may panic. To prevent
+/// this, N should be MAX_USIZE_LEN_AS_DIGITS larger than M.
+pub(crate) const fn const_fmt_int<const M: usize, const N: usize>(
+ prefix: [u8; M],
+ value: usize,
+) -> [u8; N] {
+ let mut output = [b' '; N];
+ let mut i = 0;
+ while i < M {
+ output[i] = prefix[i];
+ i += 1;
+ }
+ let mut int_only = [b' '; MAX_USIZE_LEN_AS_DIGITS];
+ let mut value = value;
+ let mut i = MAX_USIZE_LEN_AS_DIGITS - 1;
+ loop {
+ let x = (value % 10) as u8;
+ int_only[i] = x + b'0';
+ value /= 10;
+ if value == 0 {
+ break;
+ }
+ i -= 1;
+ }
+ let mut j = M;
+ while i < MAX_USIZE_LEN_AS_DIGITS {
+ output[j] = int_only[i];
+ j += 1;
+ i += 1;
+ }
+ output
+}
+
+#[test]
+fn test_const_fmt_int() {
+ assert_eq!(*b"123", const_fmt_int::<0, 3>(*b"", 123));
+ assert_eq!(*b"123 ", const_fmt_int::<0, 6>(*b"", 123));
+ assert_eq!(*b"abc123", const_fmt_int::<3, 6>(*b"abc", 123));
+}
diff --git a/vendor/zerotrie/src/lib.rs b/vendor/zerotrie/src/lib.rs
new file mode 100644
index 00000000..a5c9e3d5
--- /dev/null
+++ b/vendor/zerotrie/src/lib.rs
@@ -0,0 +1,87 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! A data structure offering zero-copy storage and retrieval of byte strings, with a focus
+//! on the efficient storage of ASCII strings. Strings are mapped to `usize` values.
+//!
+//! [`ZeroTrie`] does not support mutation because doing so would require recomputing the entire
+//! data structure. Instead, it supports conversion to and from [`LiteMap`] and [`BTreeMap`].
+//!
+//! There are multiple variants of [`ZeroTrie`] optimized for different use cases.
+//!
+//! # Examples
+//!
+//! ```
+//! use zerotrie::ZeroTrie;
+//!
+//! let data: &[(&str, usize)] = &[("abc", 11), ("xyz", 22), ("axyb", 33)];
+//!
+//! let trie: ZeroTrie<Vec<u8>> = data.iter().copied().collect();
+//!
+//! assert_eq!(trie.get("axyb"), Some(33));
+//! assert_eq!(trie.byte_len(), 18);
+//! ```
+//!
+//! # Internal Structure
+//!
+//! To read about the internal structure of [`ZeroTrie`], build the docs with private modules:
+//!
+//! ```bash
+//! cargo doc --document-private-items --all-features --no-deps --open
+//! ```
+//!
+//! [`LiteMap`]: litemap::LiteMap
+//! [`BTreeMap`]: alloc::collections::BTreeMap
+
+// https://github.com/unicode-org/icu4x/blob/main/documents/process/boilerplate.md#library-annotations
+#![cfg_attr(not(any(test, doc)), no_std)]
+#![cfg_attr(
+ not(test),
+ deny(
+ // TODO(#4034): Enable the rest of these lints.
+ // clippy::indexing_slicing,
+ // clippy::unwrap_used,
+ clippy::expect_used,
+ // clippy::panic,
+ clippy::exhaustive_structs,
+ clippy::exhaustive_enums, clippy::trivially_copy_pass_by_ref,
+ missing_debug_implementations,
+ )
+)]
+#![warn(missing_docs)]
+
+#[cfg(feature = "alloc")]
+extern crate alloc;
+
+mod builder;
+mod byte_phf;
+pub mod cursor;
+mod error;
+#[macro_use]
+mod helpers;
+mod options;
+mod reader;
+#[cfg(feature = "serde")]
+mod serde;
+mod varint;
+mod zerotrie;
+
+pub use crate::zerotrie::ZeroAsciiIgnoreCaseTrie;
+pub use crate::zerotrie::ZeroTrie;
+pub use crate::zerotrie::ZeroTrieExtendedCapacity;
+pub use crate::zerotrie::ZeroTriePerfectHash;
+pub use crate::zerotrie::ZeroTrieSimpleAscii;
+pub use error::ZeroTrieBuildError;
+
+#[cfg(feature = "alloc")]
+pub use crate::zerotrie::ZeroTrieStringIterator;
+#[cfg(feature = "alloc")]
+pub use reader::ZeroTrieIterator;
+
+#[doc(hidden)]
+pub mod _internal {
+ pub use crate::byte_phf::f1;
+ pub use crate::byte_phf::f2;
+ pub use crate::byte_phf::PerfectByteHashMap;
+}
diff --git a/vendor/zerotrie/src/options.rs b/vendor/zerotrie/src/options.rs
new file mode 100644
index 00000000..58a72bbc
--- /dev/null
+++ b/vendor/zerotrie/src/options.rs
@@ -0,0 +1,153 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Options for building and reading from a ZeroTrie.
+//!
+//! These options are internal to the crate. A small selection of options
+//! are exported by way of the different public types on this crate.
+
+/// Whether to use the perfect hash function in the ZeroTrie.
+#[derive(Copy, Clone)]
+pub(crate) enum PhfMode {
+ /// Use binary search for all branch nodes.
+ BinaryOnly,
+ /// Use the perfect hash function for large branch nodes.
+ UsePhf,
+}
+
+impl PhfMode {
+ #[cfg(feature = "serde")]
+ const fn to_u8_flag(self) -> u8 {
+ match self {
+ Self::BinaryOnly => 0,
+ Self::UsePhf => 0x1,
+ }
+ }
+}
+
+/// Whether to support non-ASCII data in the ZeroTrie.
+#[derive(Copy, Clone)]
+pub(crate) enum AsciiMode {
+ /// Support only ASCII, returning an error if non-ASCII is found.
+ AsciiOnly,
+ /// Support all data, creating span nodes for non-ASCII bytes.
+ BinarySpans,
+}
+
+impl AsciiMode {
+ #[cfg(feature = "serde")]
+ const fn to_u8_flag(self) -> u8 {
+ match self {
+ Self::AsciiOnly => 0,
+ Self::BinarySpans => 0x2,
+ }
+ }
+}
+
+/// Whether to enforce a limit to the capacity of the ZeroTrie.
+#[derive(Copy, Clone)]
+pub(crate) enum CapacityMode {
+ /// Return an error if the trie requires a branch of more than 2^32 bytes.
+ Normal,
+ /// Construct the trie without returning an error.
+ Extended,
+}
+
+impl CapacityMode {
+ #[cfg(feature = "serde")]
+ const fn to_u8_flag(self) -> u8 {
+ match self {
+ Self::Normal => 0,
+ Self::Extended => 0x4,
+ }
+ }
+}
+
+/// How to handle strings with mixed ASCII case at a node, such as "abc" and "Abc"
+#[derive(Copy, Clone)]
+pub(crate) enum CaseSensitivity {
+ /// Allow all strings and sort them by byte value.
+ Sensitive,
+ /// Reject strings with different case and sort them as if `to_ascii_lowercase` is called.
+ IgnoreCase,
+}
+
+impl CaseSensitivity {
+ #[cfg(feature = "serde")]
+ const fn to_u8_flag(self) -> u8 {
+ match self {
+ Self::Sensitive => 0,
+ Self::IgnoreCase => 0x8,
+ }
+ }
+}
+
+#[derive(Copy, Clone)]
+pub(crate) struct ZeroTrieBuilderOptions {
+ pub phf_mode: PhfMode,
+ pub ascii_mode: AsciiMode,
+ pub capacity_mode: CapacityMode,
+ pub case_sensitivity: CaseSensitivity,
+}
+
+impl ZeroTrieBuilderOptions {
+ #[cfg(feature = "serde")]
+ pub(crate) const fn to_u8_flags(self) -> u8 {
+ self.phf_mode.to_u8_flag()
+ | self.ascii_mode.to_u8_flag()
+ | self.capacity_mode.to_u8_flag()
+ | self.case_sensitivity.to_u8_flag()
+ }
+}
+
+pub(crate) trait ZeroTrieWithOptions {
+ const OPTIONS: ZeroTrieBuilderOptions;
+}
+
+/// All branch nodes are binary search
+/// and there are no span nodes.
+impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieSimpleAscii<S> {
+ const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
+ phf_mode: PhfMode::BinaryOnly,
+ ascii_mode: AsciiMode::AsciiOnly,
+ capacity_mode: CapacityMode::Normal,
+ case_sensitivity: CaseSensitivity::Sensitive,
+ };
+}
+
+impl<S: ?Sized> crate::ZeroTrieSimpleAscii<S> {
+ #[cfg(feature = "serde")]
+ pub(crate) const FLAGS: u8 = Self::OPTIONS.to_u8_flags();
+}
+
+/// All branch nodes are binary search
+/// and nodes use case-insensitive matching.
+impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroAsciiIgnoreCaseTrie<S> {
+ const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
+ phf_mode: PhfMode::BinaryOnly,
+ ascii_mode: AsciiMode::AsciiOnly,
+ capacity_mode: CapacityMode::Normal,
+ case_sensitivity: CaseSensitivity::IgnoreCase,
+ };
+}
+
+/// Branch nodes could be either binary search or PHF.
+impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTriePerfectHash<S> {
+ const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
+ phf_mode: PhfMode::UsePhf,
+ ascii_mode: AsciiMode::BinarySpans,
+ capacity_mode: CapacityMode::Normal,
+ case_sensitivity: CaseSensitivity::Sensitive,
+ };
+}
+
+/// No limited capacity assertion.
+impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieExtendedCapacity<S> {
+ const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
+ phf_mode: PhfMode::UsePhf,
+ ascii_mode: AsciiMode::BinarySpans,
+ capacity_mode: CapacityMode::Extended,
+ case_sensitivity: CaseSensitivity::Sensitive,
+ };
+}
diff --git a/vendor/zerotrie/src/reader.rs b/vendor/zerotrie/src/reader.rs
new file mode 100644
index 00000000..4d3dd005
--- /dev/null
+++ b/vendor/zerotrie/src/reader.rs
@@ -0,0 +1,727 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! # Internal layout of ZeroTrie
+//!
+//! A ZeroTrie is composed of a series of nodes stored in sequence in a byte slice.
+//!
+//! There are 4 types of nodes:
+//!
+//! 1. ASCII (`0xxxxxxx`): matches a literal ASCII byte.
+//! 2. Span (`101xxxxx`): matches a span of non-ASCII bytes.
+//! 3. Value (`100xxxxx`): associates a value with a string
+//! 4. Branch (`11xxxxxx`): matches one of a set of bytes.
+//!
+//! Span, Value, and Branch nodes contain a varint, which has different semantics for each:
+//!
+//! - Span varint: length of the span
+//! - Value varint: value associated with the string
+//! - Branch varint: number of edges in the branch and width of the offset table
+//!
+//! If reading an ASCII, Span, or Branch node, one or more bytes are consumed from the input
+//! string. If the next byte(s) in the input string do not match the node, we return `None`.
+//! If reading a Value node, if the string is empty, return `Some(value)`; otherwise, we skip
+//! the Value node and continue on to the next node.
+//!
+//! When a node is consumed, a shorter, well-formed ZeroTrie remains.
+//!
+//! ### Basic Example
+//!
+//! Here is an example ZeroTrie without branch nodes:
+//!
+//! ```
+//! use zerotrie::ZeroTriePerfectHash;
+//!
+//! let bytes = [
+//! b'a', // ASCII literal
+//! 0b10001010, // value 10
+//! b'b', // ASCII literal
+//! 0b10100011, // span of 3
+//! 0x81, // first byte in span
+//! 0x91, // second byte in span
+//! 0xA1, // third and final byte in span
+//! 0b10000100, // value 4
+//! ];
+//!
+//! let trie = ZeroTriePerfectHash::from_bytes(&bytes);
+//!
+//! // First value: "a" → 10
+//! assert_eq!(trie.get(b"a"), Some(10));
+//!
+//! // Second value: "ab\x81\x91\xA1" → 4
+//! assert_eq!(trie.get(b"ab\x81\x91\xA1"), Some(4));
+//!
+//! // A few examples of strings that do NOT have values in the trie:
+//! assert_eq!(trie.get(b"ab"), None);
+//! assert_eq!(trie.get(b"b"), None);
+//! assert_eq!(trie.get(b"b\x81\x91\xA1"), None);
+//! ```
+//!
+//! ## Branch Nodes
+//!
+//! There are two types of branch nodes: binary search and perfect hash. `ZeroTrieSimpleAscii`
+//! contains only binary search nodes, whereas `ZeroTriePerfectHash` can contain either.
+//!
+//! The head node of the branch has a varint that encodes two things:
+//!
+//! - Bottom 8 bits: number of edges in the branch (`N`); if N = 0, set N to 256
+//! - Bits 9 and 10: width of the offset table (`W`)
+//!
+//! Note that N is always in the range [1, 256]. There can't be more than 256 edges because
+//! there are only 256 unique u8 values.
+//!
+//! A few examples of the head node of the branch:
+//!
+//! - `0b11000000`: varint bits `0`: N = 0 which means N = 256; W = 0
+//! - `0b11000110`: varint bits `110`: N = 6; W = 0
+//! - `0b11100000 0b00000101`: varint bits `1000101`: N = 69; W = 0
+//! - `0b11100010 0b00000000`: varint bits `101000000`: N = 64; W = 1
+//!
+//! In `ZeroTriePerfectHash`, if N <= 15, the branch is assumed to be a binary search, and if
+//! N > 15, the branch is assumed to be a perfect hash.
+//!
+//! ### Binary Search Branch Nodes
+//!
+//! A binary search branch node is used when:
+//!
+//! 1. The trie is a `ZeroTrieSimpleAscii`, OR
+//! 2. There are 15 or fewer items in the branch.
+//!
+//! The head branch node is followed by N sorted bytes. When evaluating a branch node, one byte
+//! is consumed from the input. If it is one of the N sorted bytes (scanned using binary search),
+//! the index `i` of the byte within the list is used to index into the offset table (described
+//! below). If the byte is not in the list, the string is not in the trie, so return `None`.
+//!
+//! ### Perfect Hash Branch Nodes
+//!
+//! A perfect hash branch node is used when:
+//!
+//! 1. The trie is NOT a `ZeroTrieSimpleAscii`, AND
+//! 2. There are 16 or more items in the branch.
+//!
+//! The head branch node is followed by 1 byte containing parameter `p`, N bytes containing
+//! parameters `q`, and N bytes containing the bytes to match. From these parameters, either an
+//! index within the hash table `i` is resolved and used as input to index into the offset
+//! table (described below), or the value is determined to not be present and `None` is
+//! returned. For more detail on resolving the perfect hash function, see [`crate::byte_phf`].
+//!
+//! ### Offset Tables
+//!
+//! The _offset table_ encodes the range of the remaining buffer containing the trie reachable
+//! from the byte matched in the branch node. Both types of branch nodes include an offset
+//! table followig the key lookup. Given the index `i` from the first step, the range
+//! `[s_i, s_(i+1))` brackets the next step in the trie.
+//!
+//! Offset tables utilize the `W` parameter stored in the branch head node. The special case
+//! when `W == 0`, with `N - 1` bytes, is easiest to understand:
+//!
+//! **Offset table, W = 0:** `[s_1, s_2, ..., s_(N-1)]`
+//!
+//! Note that `s_0` is always 0 and `s_N` is always the length of the remaining slice, so those
+//! values are not explicitly included in the offset table.
+//!
+//! When W > 0, the high and low bits of the offsets are in separate bytes, arranged as follows:
+//!
+//! **Generalized offset table:** `[a_1, a_2, ..., a_(N-1), b_1, b_2, ..., b_(N-1), c_1, ...]`
+//!
+//! where `s_i = (a_i << 8 + b_i) << 8 + c_i ...` (high bits first, low bits last)
+//!
+//! ### Advanced Example
+//!
+//! The following trie encodes the following map. It has multiple varints and branch nodes, which
+//! are all binary search with W = 0. Note that there is a value for the empty string.
+//!
+//! - "" → 0
+//! - "axb" → 100
+//! - "ayc" → 2
+//! - "azd" → 3
+//! - "bxe" → 4
+//! - "bxefg" → 500
+//! - "bxefh" → 6
+//! - "bxei" → 7
+//! - "bxeikl" → 8
+//!
+//! ```
+//! use zerotrie::ZeroTrieSimpleAscii;
+//!
+//! let bytes = [
+//! 0b10000000, // value 0
+//! 0b11000010, // branch of 2
+//! b'a', //
+//! b'b', //
+//! 13, //
+//! 0b11000011, // start of 'a' subtree: branch of 3
+//! b'x', //
+//! b'y', //
+//! b'z', //
+//! 3, //
+//! 5, //
+//! b'b', //
+//! 0b10010000, // value 100 (lead)
+//! 0x54, // value 100 (trail)
+//! b'c', //
+//! 0b10000010, // value 2
+//! b'd', //
+//! 0b10000011, // value 3
+//! b'x', // start of 'b' subtree
+//! b'e', //
+//! 0b10000100, // value 4
+//! 0b11000010, // branch of 2
+//! b'f', //
+//! b'i', //
+//! 7, //
+//! 0b11000010, // branch of 2
+//! b'g', //
+//! b'h', //
+//! 2, //
+//! 0b10010011, // value 500 (lead)
+//! 0x64, // value 500 (trail)
+//! 0b10000110, // value 6
+//! 0b10000111, // value 7
+//! b'k', //
+//! b'l', //
+//! 0b10001000, // value 8
+//! ];
+//!
+//! let trie = ZeroTrieSimpleAscii::from_bytes(&bytes);
+//!
+//! // Assert that the specified items are in the map
+//! assert_eq!(trie.get(b""), Some(0));
+//! assert_eq!(trie.get(b"axb"), Some(100));
+//! assert_eq!(trie.get(b"ayc"), Some(2));
+//! assert_eq!(trie.get(b"azd"), Some(3));
+//! assert_eq!(trie.get(b"bxe"), Some(4));
+//! assert_eq!(trie.get(b"bxefg"), Some(500));
+//! assert_eq!(trie.get(b"bxefh"), Some(6));
+//! assert_eq!(trie.get(b"bxei"), Some(7));
+//! assert_eq!(trie.get(b"bxeikl"), Some(8));
+//!
+//! // Assert that some other items are not in the map
+//! assert_eq!(trie.get(b"a"), None);
+//! assert_eq!(trie.get(b"bx"), None);
+//! assert_eq!(trie.get(b"xba"), None);
+//! ```
+
+use crate::byte_phf::PerfectByteHashMap;
+use crate::cursor::AsciiProbeResult;
+use crate::helpers::*;
+use crate::options::*;
+use crate::varint::read_varint_meta2;
+use crate::varint::read_varint_meta3;
+
+#[cfg(feature = "alloc")]
+use alloc::string::String;
+
+/// Given a slice starting with an offset table, returns the trie for the given index.
+///
+/// Arguments:
+/// - `trie` = a trie pointing at an offset table (after the branch node and search table)
+/// - `i` = the desired index within the offset table
+/// - `n` = the number of items in the offset table
+/// - `w` = the width of the offset table items minus one
+#[inline]
+fn get_branch(mut trie: &[u8], i: usize, n: usize, mut w: usize) -> &[u8] {
+ let mut p = 0usize;
+ let mut q = 0usize;
+ loop {
+ let indices;
+ (indices, trie) = trie.debug_split_at(n - 1);
+ p = (p << 8)
+ + if i == 0 {
+ 0
+ } else {
+ *indices.get(i - 1).debug_unwrap_or(&0) as usize
+ };
+ q = match indices.get(i) {
+ Some(x) => (q << 8) + *x as usize,
+ None => trie.len(),
+ };
+ if w == 0 {
+ break;
+ }
+ w -= 1;
+ }
+ trie.get(p..q).debug_unwrap_or(&[])
+}
+
+/// Version of [`get_branch()`] specialized for the case `w == 0` for performance
+#[inline]
+fn get_branch_w0(mut trie: &[u8], i: usize, n: usize) -> &[u8] {
+ let indices;
+ (indices, trie) = trie.debug_split_at(n - 1);
+ let p = if i == 0 {
+ 0
+ } else {
+ *indices.get(i - 1).debug_unwrap_or(&0) as usize
+ };
+ let q = match indices.get(i) {
+ Some(x) => *x as usize,
+ None => trie.len(),
+ };
+ trie.get(p..q).debug_unwrap_or(&[])
+}
+
+/// The node type. See the module-level docs for more explanation of the four node types.
+enum NodeType {
+ /// An ASCII node. Contains a single literal ASCII byte and no varint.
+ Ascii,
+ /// A span node. Contains a varint indicating how big the span is.
+ Span,
+ /// A value node. Contains a varint representing the value.
+ Value,
+ /// A branch node. Contains a varint of the number of output nodes, plus W in the high bits.
+ Branch,
+}
+
+impl core::fmt::Debug for NodeType {
+ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+ use NodeType::*;
+ f.write_str(match *self {
+ Ascii => "a",
+ Span => "s",
+ Value => "v",
+ Branch => "m",
+ })
+ }
+}
+
+#[inline]
+fn byte_type(b: u8) -> NodeType {
+ match b & 0b11100000 {
+ 0b10000000 => NodeType::Value,
+ 0b10100000 => NodeType::Span,
+ 0b11000000 => NodeType::Branch,
+ 0b11100000 => NodeType::Branch,
+ _ => NodeType::Ascii,
+ }
+}
+
+#[inline]
+pub(crate) fn get_parameterized<T: ZeroTrieWithOptions + ?Sized>(
+ mut trie: &[u8],
+ mut ascii: &[u8],
+) -> Option<usize> {
+ loop {
+ let (b, x, i, search);
+ (b, trie) = trie.split_first()?;
+ let byte_type = byte_type(*b);
+ (x, trie) = match byte_type {
+ NodeType::Ascii => (0, trie),
+ NodeType::Span => {
+ if matches!(T::OPTIONS.ascii_mode, AsciiMode::BinarySpans) {
+ read_varint_meta3(*b, trie)
+ } else {
+ debug_assert!(false, "Span node found in ASCII trie!");
+ return None;
+ }
+ }
+ NodeType::Value => read_varint_meta3(*b, trie),
+ NodeType::Branch => read_varint_meta2(*b, trie),
+ };
+ if let Some((c, temp)) = ascii.split_first() {
+ if matches!(byte_type, NodeType::Ascii) {
+ let is_match = if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase)
+ {
+ b.eq_ignore_ascii_case(c)
+ } else {
+ b == c
+ };
+ if is_match {
+ // Matched a byte
+ ascii = temp;
+ continue;
+ } else {
+ // Byte that doesn't match
+ return None;
+ }
+ }
+ if matches!(byte_type, NodeType::Value) {
+ // Value node, but not at end of string
+ continue;
+ }
+ if matches!(T::OPTIONS.ascii_mode, AsciiMode::BinarySpans)
+ && matches!(byte_type, NodeType::Span)
+ {
+ let (trie_span, ascii_span);
+ (trie_span, trie) = trie.debug_split_at(x);
+ (ascii_span, ascii) = ascii.split_at_checked(x)?;
+ if trie_span == ascii_span {
+ // Matched a byte span
+ continue;
+ } else {
+ // Byte span that doesn't match
+ return None;
+ }
+ }
+ // Branch node
+ let (x, w) = if x >= 256 { (x & 0xff, x >> 8) } else { (x, 0) };
+ let w = if matches!(T::OPTIONS.capacity_mode, CapacityMode::Extended) {
+ w
+ } else {
+ // See the table below regarding this assertion
+ debug_assert!(w <= 3, "get: w > 3 but we assume w <= 3");
+ w & 0x3
+ };
+ let x = if x == 0 { 256 } else { x };
+ if matches!(T::OPTIONS.phf_mode, PhfMode::BinaryOnly) || x < 16 {
+ // binary search
+ (search, trie) = trie.debug_split_at(x);
+ let bsearch_result =
+ if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase) {
+ search.binary_search_by_key(&c.to_ascii_lowercase(), |x| {
+ x.to_ascii_lowercase()
+ })
+ } else {
+ search.binary_search(c)
+ };
+ i = bsearch_result.ok()?;
+ } else {
+ // phf
+ (search, trie) = trie.debug_split_at(x * 2 + 1);
+ i = PerfectByteHashMap::from_store(search).get(*c)?;
+ }
+ trie = if w == 0 {
+ get_branch_w0(trie, i, x)
+ } else {
+ get_branch(trie, i, x, w)
+ };
+ ascii = temp;
+ continue;
+ } else {
+ if matches!(byte_type, NodeType::Value) {
+ // Value node at end of string
+ return Some(x);
+ }
+ return None;
+ }
+ }
+}
+
+// DISCUSS: This function is 7% faster *on aarch64* if we assert a max on w.
+//
+// | Bench | No Assert, x86_64 | No Assert, aarch64 | Assertion, x86_64 | Assertion, aarch64 |
+// |---------------|-------------------|--------------------|-------------------|--------------------|
+// | basic | ~187.51 ns | ~97.586 ns | ~199.11 ns | ~99.236 ns |
+// | subtags_10pct | ~9.5557 µs | ~4.8696 µs | ~9.5779 µs | ~4.5649 µs |
+// | subtags_full | ~137.75 µs | ~76.016 µs | ~142.02 µs | ~70.254 µs |
+
+/// Steps one node into the trie assuming all branch nodes are binary search and that
+/// there are no span nodes.
+///
+/// The input-output argument `trie` starts at the original trie and ends pointing to
+/// the sub-trie reachable by `c`.
+#[inline]
+pub(crate) fn step_parameterized<T: ZeroTrieWithOptions + ?Sized>(
+ trie: &mut &[u8],
+ c: u8,
+) -> Option<u8> {
+ // Currently, the only option `step_parameterized` supports is `CaseSensitivity::IgnoreCase`.
+ // `AsciiMode::BinarySpans` is tricky because the state can no longer be simply a trie.
+ // If a span node is encountered, `None` is returned later in this function.
+ debug_assert!(
+ matches!(T::OPTIONS.ascii_mode, AsciiMode::AsciiOnly),
+ "Spans not yet implemented in step function"
+ );
+ // PHF can be easily implemented but the code is not yet reachable
+ debug_assert!(
+ matches!(T::OPTIONS.phf_mode, PhfMode::BinaryOnly),
+ "PHF not yet implemented in step function"
+ );
+ // Extended Capacity can be easily implemented but the code is not yet reachable
+ debug_assert!(
+ matches!(T::OPTIONS.capacity_mode, CapacityMode::Normal),
+ "Extended capacity not yet implemented in step function"
+ );
+ let (mut b, x, search);
+ loop {
+ (b, *trie) = match trie.split_first() {
+ Some(v) => v,
+ None => {
+ // Empty trie or only a value node
+ return None;
+ }
+ };
+ match byte_type(*b) {
+ NodeType::Ascii => {
+ let is_match = if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase)
+ {
+ b.eq_ignore_ascii_case(&c)
+ } else {
+ *b == c
+ };
+ if is_match {
+ // Matched a byte
+ return Some(*b);
+ } else {
+ // Byte that doesn't match
+ *trie = &[];
+ return None;
+ }
+ }
+ NodeType::Branch => {
+ // Proceed to the branch node logic below
+ (x, *trie) = read_varint_meta2(*b, trie);
+ break;
+ }
+ NodeType::Span => {
+ // Question: Should we put the trie back into a valid state?
+ // Currently this code is unreachable so let's not worry about it.
+ debug_assert!(false, "Span node found in ASCII trie!");
+ return None;
+ }
+ NodeType::Value => {
+ // Skip the value node and go to the next node
+ (_, *trie) = read_varint_meta3(*b, trie);
+ continue;
+ }
+ };
+ }
+ // Branch node
+ let (x, w) = if x >= 256 { (x & 0xff, x >> 8) } else { (x, 0) };
+ // See comment above regarding this assertion
+ debug_assert!(w <= 3, "get: w > 3 but we assume w <= 3");
+ let w = w & 0x3;
+ let x = if x == 0 { 256 } else { x };
+ // Always use binary search
+ (search, *trie) = trie.debug_split_at(x);
+ let bsearch_result = if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase) {
+ search.binary_search_by_key(&c.to_ascii_lowercase(), |x| x.to_ascii_lowercase())
+ } else {
+ search.binary_search(&c)
+ };
+ match bsearch_result {
+ Ok(i) => {
+ // Matched a byte
+ *trie = if w == 0 {
+ get_branch_w0(trie, i, x)
+ } else {
+ get_branch(trie, i, x, w)
+ };
+ Some(search[i])
+ }
+ Err(_) => {
+ // Byte that doesn't match
+ *trie = &[];
+ None
+ }
+ }
+}
+
+/// Steps one node into the trie, assuming all branch nodes are binary search and that
+/// there are no span nodes, using an index.
+///
+/// The input-output argument `trie` starts at the original trie and ends pointing to
+/// the sub-trie indexed by `index`.
+#[inline]
+pub(crate) fn probe_parameterized<T: ZeroTrieWithOptions + ?Sized>(
+ trie: &mut &[u8],
+ index: usize,
+) -> Option<AsciiProbeResult> {
+ // Currently, the only option `step_parameterized` supports is `CaseSensitivity::IgnoreCase`.
+ // `AsciiMode::BinarySpans` is tricky because the state can no longer be simply a trie.
+ // If a span node is encountered, `None` is returned later in this function.
+ debug_assert!(
+ matches!(T::OPTIONS.ascii_mode, AsciiMode::AsciiOnly),
+ "Spans not yet implemented in step function"
+ );
+ // PHF can be easily implemented but the code is not yet reachable
+ debug_assert!(
+ matches!(T::OPTIONS.phf_mode, PhfMode::BinaryOnly),
+ "PHF not yet implemented in step function"
+ );
+ // Extended Capacity can be easily implemented but the code is not yet reachable
+ debug_assert!(
+ matches!(T::OPTIONS.capacity_mode, CapacityMode::Normal),
+ "Extended capacity not yet implemented in step function"
+ );
+ let (mut b, x, search);
+ loop {
+ (b, *trie) = match trie.split_first() {
+ Some(v) => v,
+ None => {
+ // Empty trie or only a value node
+ return None;
+ }
+ };
+ match byte_type(*b) {
+ NodeType::Ascii => {
+ if index > 0 {
+ *trie = &[];
+ return None;
+ }
+ return Some(AsciiProbeResult {
+ byte: *b,
+ total_siblings: 1,
+ });
+ }
+ NodeType::Branch => {
+ // Proceed to the branch node logic below
+ (x, *trie) = read_varint_meta2(*b, trie);
+ break;
+ }
+ NodeType::Span => {
+ // Question: Should we put the trie back into a valid state?
+ // Currently this code is unreachable so let's not worry about it.
+ debug_assert!(false, "Span node found in ASCII trie!");
+ return None;
+ }
+ NodeType::Value => {
+ // Skip the value node and go to the next node
+ (_, *trie) = read_varint_meta3(*b, trie);
+ continue;
+ }
+ };
+ }
+ // Branch node
+ let (x, w) = if x >= 256 { (x & 0xff, x >> 8) } else { (x, 0) };
+ debug_assert!(u8::try_from(x).is_ok());
+ let total_siblings = x as u8;
+ // See comment above regarding this assertion
+ debug_assert!(w <= 3, "get: w > 3 but we assume w <= 3");
+ let w = w & 0x3;
+ let x = if x == 0 { 256 } else { x };
+ if index >= x {
+ *trie = &[];
+ return None;
+ }
+ (search, *trie) = trie.debug_split_at(x);
+ *trie = if w == 0 {
+ get_branch_w0(trie, index, x)
+ } else {
+ get_branch(trie, index, x, w)
+ };
+ Some(AsciiProbeResult {
+ byte: search[index],
+ total_siblings,
+ })
+}
+
+/// Steps one node into the trie if the head node is a value node, returning the value.
+/// If the head node is not a value node, no change is made.
+///
+/// The input-output argument `trie` starts at the original trie and ends pointing to
+/// the sub-trie with the value node removed.
+pub(crate) fn take_value(trie: &mut &[u8]) -> Option<usize> {
+ let (b, new_trie) = trie.split_first()?;
+ match byte_type(*b) {
+ NodeType::Ascii | NodeType::Span | NodeType::Branch => None,
+ NodeType::Value => {
+ let x;
+ (x, *trie) = read_varint_meta3(*b, new_trie);
+ Some(x)
+ }
+ }
+}
+
+#[cfg(feature = "alloc")]
+use alloc::vec::Vec;
+
+/// Iterator type for walking the byte sequences contained in a ZeroTrie.
+#[cfg(feature = "alloc")]
+#[derive(Debug)]
+pub struct ZeroTrieIterator<'a> {
+ /// Whether the PHF is enabled on this trie.
+ use_phf: bool,
+ /// Intermediate state during iteration:
+ /// 1. A trie (usually a slice of the original, bigger trie)
+ /// 2. The string that leads to the trie
+ /// 3. If the trie's lead node is a branch node, the current index being evaluated
+ state: Vec<(&'a [u8], Vec<u8>, usize)>,
+}
+
+#[cfg(feature = "alloc")]
+impl<'a> ZeroTrieIterator<'a> {
+ pub(crate) fn new<S: AsRef<[u8]> + ?Sized>(store: &'a S, use_phf: bool) -> Self {
+ ZeroTrieIterator {
+ use_phf,
+ state: alloc::vec![(store.as_ref(), alloc::vec![], 0)],
+ }
+ }
+}
+
+#[cfg(feature = "alloc")]
+impl Iterator for ZeroTrieIterator<'_> {
+ type Item = (Vec<u8>, usize);
+ fn next(&mut self) -> Option<Self::Item> {
+ let (mut trie, mut string, mut branch_idx);
+ (trie, string, branch_idx) = self.state.pop()?;
+ loop {
+ let (b, x, span, search);
+ let return_trie = trie;
+ (b, trie) = match trie.split_first() {
+ Some(tpl) => tpl,
+ None => {
+ // At end of current branch; step back to the branch node.
+ // If there are no more branches, we are finished.
+ (trie, string, branch_idx) = self.state.pop()?;
+ continue;
+ }
+ };
+ let byte_type = byte_type(*b);
+ if matches!(byte_type, NodeType::Ascii) {
+ string.push(*b);
+ continue;
+ }
+ (x, trie) = match byte_type {
+ NodeType::Ascii => (0, trie),
+ NodeType::Span | NodeType::Value => read_varint_meta3(*b, trie),
+ NodeType::Branch => read_varint_meta2(*b, trie),
+ };
+ if matches!(byte_type, NodeType::Span) {
+ (span, trie) = trie.debug_split_at(x);
+ string.extend(span);
+ continue;
+ }
+ if matches!(byte_type, NodeType::Value) {
+ let retval = string.clone();
+ // Return to this position on the next step
+ self.state.push((trie, string, 0));
+ return Some((retval, x));
+ }
+ // Match node
+ let (x, w) = if x >= 256 { (x & 0xff, x >> 8) } else { (x, 0) };
+ let x = if x == 0 { 256 } else { x };
+ if branch_idx + 1 < x {
+ // Return to this branch node at the next index
+ self.state
+ .push((return_trie, string.clone(), branch_idx + 1));
+ }
+ let byte = if x < 16 || !self.use_phf {
+ // binary search
+ (search, trie) = trie.debug_split_at(x);
+ debug_unwrap!(search.get(branch_idx), return None)
+ } else {
+ // phf
+ (search, trie) = trie.debug_split_at(x * 2 + 1);
+ debug_unwrap!(search.get(branch_idx + x + 1), return None)
+ };
+ string.push(*byte);
+ trie = if w == 0 {
+ get_branch_w0(trie, branch_idx, x)
+ } else {
+ get_branch(trie, branch_idx, x, w)
+ };
+ branch_idx = 0;
+ }
+ }
+}
+
+#[cfg(feature = "alloc")]
+pub(crate) fn get_iter_phf<S: AsRef<[u8]> + ?Sized>(store: &S) -> ZeroTrieIterator<'_> {
+ ZeroTrieIterator::new(store, true)
+}
+
+/// # Panics
+/// Panics if the trie contains non-ASCII items.
+#[cfg(feature = "alloc")]
+#[allow(clippy::type_complexity)]
+pub(crate) fn get_iter_ascii_or_panic<S: AsRef<[u8]> + ?Sized>(
+ store: &S,
+) -> core::iter::Map<ZeroTrieIterator<'_>, fn((Vec<u8>, usize)) -> (String, usize)> {
+ ZeroTrieIterator::new(store, false).map(|(k, v)| {
+ #[allow(clippy::unwrap_used)] // in signature of function
+ let ascii_str = String::from_utf8(k).unwrap();
+ (ascii_str, v)
+ })
+}
diff --git a/vendor/zerotrie/src/serde.rs b/vendor/zerotrie/src/serde.rs
new file mode 100644
index 00000000..78837192
--- /dev/null
+++ b/vendor/zerotrie/src/serde.rs
@@ -0,0 +1,642 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::builder::bytestr::ByteStr;
+use crate::options::ZeroTrieWithOptions;
+use crate::zerotrie::ZeroTrieFlavor;
+use crate::ZeroAsciiIgnoreCaseTrie;
+use crate::ZeroTrie;
+use crate::ZeroTrieExtendedCapacity;
+use crate::ZeroTriePerfectHash;
+use crate::ZeroTrieSimpleAscii;
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::fmt;
+use litemap::LiteMap;
+use serde::de::Error;
+use serde::de::Visitor;
+use serde::Deserialize;
+use serde::Deserializer;
+use serde::Serialize;
+use serde::Serializer;
+
+struct ByteStrVisitor;
+impl<'de> Visitor<'de> for ByteStrVisitor {
+ type Value = Box<[u8]>;
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ write!(formatter, "a slice of borrowed bytes or a string")
+ }
+ fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E> {
+ Ok(Box::from(v))
+ }
+ fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> {
+ Ok(Box::from(v.as_bytes()))
+ }
+ fn visit_seq<A>(self, mut v: A) -> Result<Self::Value, A::Error>
+ where
+ A: serde::de::SeqAccess<'de>,
+ {
+ let mut result = Vec::with_capacity(v.size_hint().unwrap_or(0));
+ while let Some(x) = v.next_element::<u8>()? {
+ result.push(x);
+ }
+ Ok(Box::from(result))
+ }
+}
+
+impl<'data, 'de: 'data> Deserialize<'de> for &'data ByteStr {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ let s = <&'data [u8]>::deserialize(deserializer)?;
+ Ok(ByteStr::from_bytes(s))
+ }
+}
+
+impl<'de> Deserialize<'de> for Box<ByteStr> {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let s = deserializer.deserialize_any(ByteStrVisitor)?;
+ Ok(ByteStr::from_boxed_bytes(s))
+ } else {
+ let s = Vec::<u8>::deserialize(deserializer)?;
+ Ok(ByteStr::from_boxed_bytes(s.into_boxed_slice()))
+ }
+ }
+}
+
+impl Serialize for &ByteStr {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ let bytes = self.as_bytes();
+ if serializer.is_human_readable() {
+ match core::str::from_utf8(bytes) {
+ Ok(s) => serializer.serialize_str(s),
+ Err(_) => serializer.serialize_bytes(bytes),
+ }
+ } else {
+ serializer.serialize_bytes(bytes)
+ }
+ }
+}
+
+impl<'data, 'de: 'data, Store> Deserialize<'de> for ZeroTrieSimpleAscii<Store>
+where
+ // DISCUSS: There are several possibilities for the bounds here that would
+ // get the job done. I could look for Deserialize, but this would require
+ // creating a custom Deserializer for the map case. I also considered
+ // introducing a new trait instead of relying on From.
+ Store: From<&'data [u8]> + From<Vec<u8>> + 'data,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let lm = LiteMap::<Box<ByteStr>, usize>::deserialize(deserializer)?;
+ ZeroTrieSimpleAscii::try_from_serde_litemap(&lm)
+ .map_err(D::Error::custom)
+ .map(|trie| trie.convert_store())
+ } else {
+ // Note: `impl Deserialize for &[u8]` uses visit_borrowed_bytes
+ let (flags, trie_bytes) = <(u8, &[u8])>::deserialize(deserializer)?;
+ if Self::OPTIONS.to_u8_flags() != flags {
+ return Err(D::Error::custom("invalid ZeroTrie tag"));
+ };
+ Ok(ZeroTrieSimpleAscii::from_store(Store::from(trie_bytes)))
+ }
+ }
+}
+
+impl<Store> Serialize for ZeroTrieSimpleAscii<Store>
+where
+ Store: AsRef<[u8]>,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ if serializer.is_human_readable() {
+ let lm = self.to_litemap();
+ lm.serialize(serializer)
+ } else {
+ // Note: `impl Serialize for ByteStr` uses `serialize_bytes`
+ (Self::FLAGS, ByteStr::from_bytes(self.as_bytes())).serialize(serializer)
+ }
+ }
+}
+
+impl<'de, 'data, Store> Deserialize<'de> for ZeroAsciiIgnoreCaseTrie<Store>
+where
+ 'de: 'data,
+ // DISCUSS: There are several possibilities for the bounds here that would
+ // get the job done. I could look for Deserialize, but this would require
+ // creating a custom Deserializer for the map case. I also considered
+ // introducing a new trait instead of relying on From.
+ Store: From<&'data [u8]> + From<Vec<u8>> + 'data,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let lm = LiteMap::<Box<ByteStr>, usize>::deserialize(deserializer)?;
+ ZeroAsciiIgnoreCaseTrie::try_from_serde_litemap(&lm)
+ .map_err(D::Error::custom)
+ .map(|trie| trie.convert_store())
+ } else {
+ // Note: `impl Deserialize for &[u8]` uses visit_borrowed_bytes
+ let (flags, trie_bytes) = <(u8, &[u8])>::deserialize(deserializer)?;
+ if Self::OPTIONS.to_u8_flags() != flags {
+ return Err(D::Error::custom("invalid ZeroTrie tag"));
+ }
+ Ok(ZeroAsciiIgnoreCaseTrie::from_store(Store::from(trie_bytes)))
+ }
+ }
+}
+
+impl<Store> Serialize for ZeroAsciiIgnoreCaseTrie<Store>
+where
+ Store: AsRef<[u8]>,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ if serializer.is_human_readable() {
+ let lm = self.to_litemap();
+ lm.serialize(serializer)
+ } else {
+ // Note: `impl Serialize for ByteStr` uses `serialize_bytes`
+ (
+ Self::OPTIONS.to_u8_flags(),
+ ByteStr::from_bytes(self.as_bytes()),
+ )
+ .serialize(serializer)
+ }
+ }
+}
+
+impl<'de, 'data, Store> Deserialize<'de> for ZeroTriePerfectHash<Store>
+where
+ 'de: 'data,
+ Store: From<&'data [u8]> + From<Vec<u8>> + 'data,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let lm = LiteMap::<Box<ByteStr>, usize>::deserialize(deserializer)?;
+ ZeroTriePerfectHash::try_from_serde_litemap(&lm)
+ .map_err(D::Error::custom)
+ .map(|trie| trie.convert_store())
+ } else {
+ // Note: `impl Deserialize for &[u8]` uses visit_borrowed_bytes
+ let (flags, trie_bytes) = <(u8, &[u8])>::deserialize(deserializer)?;
+ if Self::OPTIONS.to_u8_flags() != flags {
+ return Err(D::Error::custom("invalid ZeroTrie tag"));
+ }
+ Ok(ZeroTriePerfectHash::from_store(Store::from(trie_bytes)))
+ }
+ }
+}
+
+impl<Store> Serialize for ZeroTriePerfectHash<Store>
+where
+ Store: AsRef<[u8]>,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ if serializer.is_human_readable() {
+ let lm = self.to_litemap();
+ let lm = lm
+ .iter()
+ .map(|(k, v)| (ByteStr::from_bytes(k), v))
+ .collect::<LiteMap<_, _>>();
+ lm.serialize(serializer)
+ } else {
+ // Note: `impl Serialize for ByteStr` uses `serialize_bytes`
+ (
+ Self::OPTIONS.to_u8_flags(),
+ ByteStr::from_bytes(self.as_bytes()),
+ )
+ .serialize(serializer)
+ }
+ }
+}
+
+impl<'de, 'data, Store> Deserialize<'de> for ZeroTrieExtendedCapacity<Store>
+where
+ 'de: 'data,
+ Store: From<&'data [u8]> + From<Vec<u8>> + 'data,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let lm = LiteMap::<Box<ByteStr>, usize>::deserialize(deserializer)?;
+ ZeroTrieExtendedCapacity::try_from_serde_litemap(&lm)
+ .map_err(D::Error::custom)
+ .map(|trie| trie.convert_store())
+ } else {
+ // Note: `impl Deserialize for &[u8]` uses visit_borrowed_bytes
+ let (flags, trie_bytes) = <(u8, &[u8])>::deserialize(deserializer)?;
+ if Self::OPTIONS.to_u8_flags() != flags {
+ return Err(D::Error::custom("invalid ZeroTrie tag"));
+ }
+ Ok(ZeroTrieExtendedCapacity::from_store(Store::from(
+ trie_bytes,
+ )))
+ }
+ }
+}
+
+impl<Store> Serialize for ZeroTrieExtendedCapacity<Store>
+where
+ Store: AsRef<[u8]>,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ if serializer.is_human_readable() {
+ let lm = self.to_litemap();
+ let lm = lm
+ .iter()
+ .map(|(k, v)| (ByteStr::from_bytes(k), v))
+ .collect::<LiteMap<_, _>>();
+ lm.serialize(serializer)
+ } else {
+ // Note: `impl Serialize for ByteStr` uses `serialize_bytes`
+ (
+ Self::OPTIONS.to_u8_flags(),
+ ByteStr::from_bytes(self.as_bytes()),
+ )
+ .serialize(serializer)
+ }
+ }
+}
+
+impl<'de, 'data, Store> Deserialize<'de> for ZeroTrie<Store>
+where
+ 'de: 'data,
+ Store: From<&'data [u8]> + From<Vec<u8>> + 'data,
+{
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ if deserializer.is_human_readable() {
+ let lm = LiteMap::<Box<ByteStr>, usize>::deserialize(deserializer)?;
+ ZeroTrie::<Vec<u8>>::try_from(&lm)
+ .map_err(D::Error::custom)
+ .map(|trie| trie.convert_store())
+ } else {
+ // Note: `impl Deserialize for &[u8]` uses visit_borrowed_bytes
+ let bytes = <&[u8]>::deserialize(deserializer)?;
+ let (tag, trie_bytes) = bytes
+ .split_first()
+ .ok_or(D::Error::custom("expected at least 1 byte for ZeroTrie"))?;
+ let store = Store::from(trie_bytes);
+ let zerotrie = if *tag == ZeroTrieSimpleAscii::<u8>::OPTIONS.to_u8_flags() {
+ ZeroTrieSimpleAscii::from_store(store).into_zerotrie()
+ } else if *tag == ZeroTriePerfectHash::<u8>::OPTIONS.to_u8_flags() {
+ ZeroTriePerfectHash::from_store(store).into_zerotrie()
+ } else if *tag == ZeroTrieExtendedCapacity::<u8>::OPTIONS.to_u8_flags() {
+ ZeroTrieExtendedCapacity::from_store(store).into_zerotrie()
+ } else {
+ return Err(D::Error::custom("invalid ZeroTrie tag"));
+ };
+ Ok(zerotrie)
+ }
+ }
+}
+
+impl<Store> Serialize for ZeroTrie<Store>
+where
+ Store: AsRef<[u8]>,
+{
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ if serializer.is_human_readable() {
+ let lm = self.to_litemap();
+ let lm = lm
+ .iter()
+ .map(|(k, v)| (ByteStr::from_bytes(k), v))
+ .collect::<LiteMap<_, _>>();
+ lm.serialize(serializer)
+ } else {
+ let (tag, bytes) = match &self.0 {
+ ZeroTrieFlavor::SimpleAscii(t) => (
+ ZeroTrieSimpleAscii::<u8>::OPTIONS.to_u8_flags(),
+ t.as_bytes(),
+ ),
+ ZeroTrieFlavor::PerfectHash(t) => (
+ ZeroTriePerfectHash::<u8>::OPTIONS.to_u8_flags(),
+ t.as_bytes(),
+ ),
+ ZeroTrieFlavor::ExtendedCapacity(t) => (
+ ZeroTrieExtendedCapacity::<u8>::OPTIONS.to_u8_flags(),
+ t.as_bytes(),
+ ),
+ };
+ let mut all_in_one_vec = Vec::with_capacity(bytes.len() + 1);
+ all_in_one_vec.push(tag);
+ all_in_one_vec.extend(bytes);
+ serializer.serialize_bytes(&all_in_one_vec)
+ }
+ }
+}
+
+#[cfg(test)]
+mod testdata {
+ include!("../tests/data/data.rs");
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use alloc::borrow::Cow;
+
+ #[derive(Serialize, Deserialize)]
+ pub struct ZeroTrieSimpleAsciiCow<'a> {
+ #[serde(borrow)]
+ trie: ZeroTrieSimpleAscii<Cow<'a, [u8]>>,
+ }
+
+ #[test]
+ pub fn test_serde_simpleascii_cow() {
+ let trie = ZeroTrieSimpleAscii::from_store(Cow::from(testdata::basic::TRIE_ASCII));
+ let original = ZeroTrieSimpleAsciiCow { trie };
+ let json_str = serde_json::to_string(&original).unwrap();
+ let bincode_bytes = bincode::serialize(&original).unwrap();
+ let rmp_bytes = rmp_serde::to_vec(&original).unwrap();
+
+ assert_eq!(json_str, testdata::basic::JSON_STR_ASCII);
+ assert_eq!(&bincode_bytes[0..9], &[0, 26, 0, 0, 0, 0, 0, 0, 0]);
+ assert_eq!(&bincode_bytes[9..], testdata::basic::BINCODE_BYTES_ASCII);
+ assert_eq!(&rmp_bytes[0..5], &[145, 146, 0, 196, 26]);
+ assert_eq!(&rmp_bytes[5..], testdata::basic::BINCODE_BYTES_ASCII);
+
+ let json_recovered: ZeroTrieSimpleAsciiCow = serde_json::from_str(&json_str).unwrap();
+ let bincode_recovered: ZeroTrieSimpleAsciiCow =
+ bincode::deserialize(&bincode_bytes).unwrap();
+ let rmp_recovered: ZeroTrieSimpleAsciiCow = rmp_serde::from_slice(&rmp_bytes).unwrap();
+
+ assert_eq!(original.trie, json_recovered.trie);
+ assert_eq!(original.trie, bincode_recovered.trie);
+ assert_eq!(original.trie, rmp_recovered.trie);
+
+ assert!(matches!(json_recovered.trie.into_store(), Cow::Owned(_)));
+ assert!(matches!(
+ bincode_recovered.trie.into_store(),
+ Cow::Borrowed(_)
+ ));
+ }
+
+ #[derive(Serialize, Deserialize)]
+ pub struct ZeroAsciiIgnoreCaseTrieCow<'a> {
+ #[serde(borrow)]
+ trie: ZeroAsciiIgnoreCaseTrie<Cow<'a, [u8]>>,
+ }
+
+ #[test]
+ pub fn test_serde_asciiignorecase_cow() {
+ let trie = ZeroAsciiIgnoreCaseTrie::from_store(Cow::from(testdata::basic::TRIE_ASCII));
+ let original = ZeroAsciiIgnoreCaseTrieCow { trie };
+ let json_str = serde_json::to_string(&original).unwrap();
+ let bincode_bytes = bincode::serialize(&original).unwrap();
+
+ assert_eq!(json_str, testdata::basic::JSON_STR_ASCII);
+ assert_eq!(&bincode_bytes[0..9], &[8, 26, 0, 0, 0, 0, 0, 0, 0]);
+ assert_eq!(&bincode_bytes[9..], testdata::basic::BINCODE_BYTES_ASCII);
+
+ let json_recovered: ZeroAsciiIgnoreCaseTrieCow = serde_json::from_str(&json_str).unwrap();
+ let bincode_recovered: ZeroAsciiIgnoreCaseTrieCow =
+ bincode::deserialize(&bincode_bytes).unwrap();
+
+ assert_eq!(original.trie, json_recovered.trie);
+ assert_eq!(original.trie, bincode_recovered.trie);
+
+ assert!(matches!(json_recovered.trie.into_store(), Cow::Owned(_)));
+ assert!(matches!(
+ bincode_recovered.trie.into_store(),
+ Cow::Borrowed(_)
+ ));
+ }
+
+ #[derive(Serialize, Deserialize)]
+ pub struct ZeroTriePerfectHashCow<'a> {
+ #[serde(borrow)]
+ trie: ZeroTriePerfectHash<Cow<'a, [u8]>>,
+ }
+
+ #[test]
+ pub fn test_serde_perfecthash_cow() {
+ let trie = ZeroTriePerfectHash::from_store(Cow::from(testdata::basic::TRIE_ASCII));
+ let original = ZeroTriePerfectHashCow { trie };
+ let json_str = serde_json::to_string(&original).unwrap();
+ let bincode_bytes = bincode::serialize(&original).unwrap();
+
+ assert_eq!(json_str, testdata::basic::JSON_STR_ASCII);
+ assert_eq!(&bincode_bytes[0..9], &[3, 26, 0, 0, 0, 0, 0, 0, 0]);
+ assert_eq!(&bincode_bytes[9..], testdata::basic::BINCODE_BYTES_ASCII);
+
+ let json_recovered: ZeroTriePerfectHashCow = serde_json::from_str(&json_str).unwrap();
+ let bincode_recovered: ZeroTriePerfectHashCow =
+ bincode::deserialize(&bincode_bytes).unwrap();
+
+ assert_eq!(original.trie, json_recovered.trie);
+ assert_eq!(original.trie, bincode_recovered.trie);
+
+ assert!(matches!(json_recovered.trie.into_store(), Cow::Owned(_)));
+ assert!(matches!(
+ bincode_recovered.trie.into_store(),
+ Cow::Borrowed(_)
+ ));
+ }
+
+ #[test]
+ pub fn test_serde_perfecthash_cow_u() {
+ let trie = ZeroTriePerfectHash::from_store(Cow::from(testdata::basic::TRIE_UNICODE));
+ let original = ZeroTriePerfectHashCow { trie };
+ let json_str = serde_json::to_string(&original).unwrap();
+ let bincode_bytes = bincode::serialize(&original).unwrap();
+
+ assert_eq!(json_str, testdata::basic::JSON_STR_UNICODE);
+ assert_eq!(&bincode_bytes[0..9], &[3, 39, 0, 0, 0, 0, 0, 0, 0]);
+ assert_eq!(&bincode_bytes[9..], testdata::basic::BINCODE_BYTES_UNICODE);
+
+ let json_recovered: ZeroTriePerfectHashCow = serde_json::from_str(&json_str).unwrap();
+ let bincode_recovered: ZeroTriePerfectHashCow =
+ bincode::deserialize(&bincode_bytes).unwrap();
+
+ assert_eq!(original.trie, json_recovered.trie);
+ assert_eq!(original.trie, bincode_recovered.trie);
+
+ assert!(matches!(json_recovered.trie.into_store(), Cow::Owned(_)));
+ assert!(matches!(
+ bincode_recovered.trie.into_store(),
+ Cow::Borrowed(_)
+ ));
+ }
+
+ #[test]
+ pub fn test_serde_perfecthash_cow_bin() {
+ let trie = ZeroTriePerfectHash::from_store(Cow::from(testdata::basic::TRIE_BINARY));
+ let original = ZeroTriePerfectHashCow { trie };
+ let json_str = serde_json::to_string(&original).unwrap();
+ let bincode_bytes = bincode::serialize(&original).unwrap();
+
+ assert_eq!(json_str, testdata::basic::JSON_STR_BINARY);
+ assert_eq!(&bincode_bytes[0..9], &[3, 26, 0, 0, 0, 0, 0, 0, 0]);
+ assert_eq!(&bincode_bytes[9..], testdata::basic::BINCODE_BYTES_BINARY);
+
+ let json_recovered: ZeroTriePerfectHashCow = serde_json::from_str(&json_str).unwrap();
+ let bincode_recovered: ZeroTriePerfectHashCow =
+ bincode::deserialize(&bincode_bytes).unwrap();
+
+ assert_eq!(original.trie, json_recovered.trie);
+ assert_eq!(original.trie, bincode_recovered.trie);
+
+ assert!(matches!(json_recovered.trie.into_store(), Cow::Owned(_)));
+ assert!(matches!(
+ bincode_recovered.trie.into_store(),
+ Cow::Borrowed(_)
+ ));
+ }
+
+ #[derive(Serialize, Deserialize)]
+ pub struct ZeroTrieAnyCow<'a> {
+ #[serde(borrow)]
+ trie: ZeroTrie<Cow<'a, [u8]>>,
+ }
+
+ #[test]
+ pub fn test_serde_any_cow() {
+ let trie =
+ ZeroTrieSimpleAscii::from_store(Cow::from(testdata::basic::TRIE_ASCII)).into_zerotrie();
+ let original = ZeroTrieAnyCow { trie };
+ let json_str = serde_json::to_string(&original).unwrap();
+ let bincode_bytes = bincode::serialize(&original).unwrap();
+
+ assert_eq!(json_str, testdata::basic::JSON_STR_ASCII);
+ assert_eq!(&bincode_bytes[0..9], &[27, 0, 0, 0, 0, 0, 0, 0, 0]);
+ assert_eq!(&bincode_bytes[9..], testdata::basic::BINCODE_BYTES_ASCII);
+
+ let json_recovered: ZeroTrieAnyCow = serde_json::from_str(&json_str).unwrap();
+ let bincode_recovered: ZeroTrieAnyCow = bincode::deserialize(&bincode_bytes).unwrap();
+
+ assert_eq!(original.trie, json_recovered.trie);
+ assert_eq!(original.trie, bincode_recovered.trie);
+
+ assert!(matches!(json_recovered.trie.into_store(), Cow::Owned(_)));
+ assert!(matches!(
+ bincode_recovered.trie.into_store(),
+ Cow::Borrowed(_)
+ ));
+ }
+
+ #[test]
+ pub fn test_serde_any_cow_u() {
+ let trie = ZeroTriePerfectHash::from_store(Cow::from(testdata::basic::TRIE_UNICODE))
+ .into_zerotrie();
+ let original = ZeroTrieAnyCow { trie };
+ let json_str = serde_json::to_string(&original).unwrap();
+ let bincode_bytes = bincode::serialize(&original).unwrap();
+
+ assert_eq!(json_str, testdata::basic::JSON_STR_UNICODE);
+ assert_eq!(&bincode_bytes[0..9], &[40, 0, 0, 0, 0, 0, 0, 0, 3]);
+ assert_eq!(&bincode_bytes[9..], testdata::basic::BINCODE_BYTES_UNICODE);
+
+ let json_recovered: ZeroTrieAnyCow = serde_json::from_str(&json_str).unwrap();
+ let bincode_recovered: ZeroTrieAnyCow = bincode::deserialize(&bincode_bytes).unwrap();
+
+ assert_eq!(original.trie, json_recovered.trie);
+ assert_eq!(original.trie, bincode_recovered.trie);
+
+ assert!(matches!(json_recovered.trie.into_store(), Cow::Owned(_)));
+ assert!(matches!(
+ bincode_recovered.trie.into_store(),
+ Cow::Borrowed(_)
+ ));
+ }
+}
+
+#[cfg(test)]
+#[cfg(feature = "zerovec")]
+mod tests_zerovec {
+ use super::*;
+ use zerovec::ZeroVec;
+
+ #[derive(Serialize, Deserialize)]
+ pub struct ZeroTrieSimpleAsciiZeroVec<'a> {
+ #[serde(borrow)]
+ trie: ZeroTrieSimpleAscii<ZeroVec<'a, u8>>,
+ }
+
+ #[test]
+ pub fn test_serde_simpleascii_zerovec() {
+ let trie =
+ ZeroTrieSimpleAscii::from_store(ZeroVec::new_borrowed(testdata::basic::TRIE_ASCII));
+ let original = ZeroTrieSimpleAsciiZeroVec { trie };
+ let json_str = serde_json::to_string(&original).unwrap();
+ let bincode_bytes = bincode::serialize(&original).unwrap();
+
+ assert_eq!(json_str, testdata::basic::JSON_STR_ASCII);
+ assert_eq!(&bincode_bytes[0..9], &[0, 26, 0, 0, 0, 0, 0, 0, 0]);
+ assert_eq!(&bincode_bytes[9..], testdata::basic::BINCODE_BYTES_ASCII);
+
+ let json_recovered: ZeroTrieSimpleAsciiZeroVec = serde_json::from_str(&json_str).unwrap();
+ let bincode_recovered: ZeroTrieSimpleAsciiZeroVec =
+ bincode::deserialize(&bincode_bytes).unwrap();
+
+ assert_eq!(original.trie, json_recovered.trie);
+ assert_eq!(original.trie, bincode_recovered.trie);
+
+ assert!(json_recovered.trie.into_store().is_owned());
+ assert!(!bincode_recovered.trie.into_store().is_owned());
+ }
+
+ #[derive(Serialize, Deserialize)]
+ pub struct ZeroTriePerfectHashZeroVec<'a> {
+ #[serde(borrow)]
+ trie: ZeroTriePerfectHash<ZeroVec<'a, u8>>,
+ }
+
+ #[test]
+ pub fn test_serde_perfecthash_zerovec() {
+ let trie =
+ ZeroTriePerfectHash::from_store(ZeroVec::new_borrowed(testdata::basic::TRIE_ASCII));
+ let original = ZeroTriePerfectHashZeroVec { trie };
+ let json_str = serde_json::to_string(&original).unwrap();
+ let bincode_bytes = bincode::serialize(&original).unwrap();
+
+ assert_eq!(json_str, testdata::basic::JSON_STR_ASCII);
+ assert_eq!(&bincode_bytes[0..9], &[3, 26, 0, 0, 0, 0, 0, 0, 0]);
+ assert_eq!(&bincode_bytes[9..], testdata::basic::BINCODE_BYTES_ASCII);
+
+ let json_recovered: ZeroTriePerfectHashZeroVec = serde_json::from_str(&json_str).unwrap();
+ let bincode_recovered: ZeroTriePerfectHashZeroVec =
+ bincode::deserialize(&bincode_bytes).unwrap();
+
+ assert_eq!(original.trie, json_recovered.trie);
+ assert_eq!(original.trie, bincode_recovered.trie);
+
+ assert!(json_recovered.trie.into_store().is_owned());
+ assert!(!bincode_recovered.trie.into_store().is_owned());
+ }
+}
diff --git a/vendor/zerotrie/src/varint.rs b/vendor/zerotrie/src/varint.rs
new file mode 100644
index 00000000..5af0ea66
--- /dev/null
+++ b/vendor/zerotrie/src/varint.rs
@@ -0,0 +1,497 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+//! Varint spec for ZeroTrie:
+//!
+//! - Lead byte: top M (2 or 3) bits are metadata; next is varint extender; rest is value
+//! - Trail bytes: top bit is varint extender; rest are low bits of value
+//! - Guaranteed uniqueness of varint by adding "latent value" for each extender byte
+//! - No maximum, but high bits will be dropped if they don't fit in the platform's `usize`
+//!
+//! This is best shown by examples.
+//!
+//! ```txt
+//! xxx0'1010 = 10
+//! xxx0'1111 = 15 (largest single-byte value with M=3)
+//! xxx1'0000 0000'0000 must be 16 (smallest two-byte value with M=3)
+//! xxx1'0000 0000'0001 = 17
+//! xxx1'1111 0111'1111 = 2063 (largest two-byte value with M=3)
+//! xxx1'0000 1000'0000 0000'0000 must be 2064 (smallest three-byte value with M=3)
+//! xxx1'0000 1000'0000 0000'0001 = 2065
+//! ```
+//!
+//! The latent values by number of bytes for M=3 are:
+//!
+//! - 1 byte: 0
+//! - 2 bytes: 16 = 0x10 = 0b10000
+//! - 3 bytes: 2064 = 0x810 = 0b100000010000
+//! - 4 bytes: 264208 = 0x40810 = 0b1000000100000010000
+//! - 5 bytes: 33818640 = 0x2040810 = 0b10000001000000100000010000
+//! - …
+//!
+//! For M=2, the latent values are:
+//!
+//! - 1 byte: 0
+//! - 2 bytes: 32 = 0x20 = 0b100000
+//! - 3 bytes: 4128 = 0x1020 = 0b1000000100000
+//! - 4 bytes: 524320 = 0x81020 = 0b10000001000000100000
+//! - 5 bytes: 67637280 = 0x4081020 = 0b100000010000001000000100000
+//! - …
+
+use crate::builder::konst::ConstArrayBuilder;
+
+#[cfg(feature = "alloc")]
+use crate::builder::nonconst::TrieBuilderStore;
+
+/// Reads a varint with 2 bits of metadata in the lead byte.
+///
+/// Returns the varint value and a subslice of `remainder` with the varint bytes removed.
+///
+/// If the varint spills off the end of the slice, a debug assertion will fail,
+/// and the function will return the value up to that point.
+pub const fn read_varint_meta2(start: u8, remainder: &[u8]) -> (usize, &[u8]) {
+ let mut value = (start & 0b00011111) as usize;
+ let mut remainder = remainder;
+ if (start & 0b00100000) != 0 {
+ loop {
+ let next;
+ (next, remainder) = debug_unwrap!(remainder.split_first(), break, "invalid varint");
+ // Note: value << 7 could drop high bits. The first addition can't overflow.
+ // The second addition could overflow; in such a case we just inform the
+ // developer via the debug assertion.
+ value = (value << 7) + ((*next & 0b01111111) as usize) + 32;
+ if (*next & 0b10000000) == 0 {
+ break;
+ }
+ }
+ }
+ (value, remainder)
+}
+
+/// Reads a varint with 3 bits of metadata in the lead byte.
+///
+/// Returns the varint value and a subslice of `remainder` with the varint bytes removed.
+///
+/// If the varint spills off the end of the slice, a debug assertion will fail,
+/// and the function will return the value up to that point.
+pub const fn read_varint_meta3(start: u8, remainder: &[u8]) -> (usize, &[u8]) {
+ let mut value = (start & 0b00001111) as usize;
+ let mut remainder = remainder;
+ if (start & 0b00010000) != 0 {
+ loop {
+ let next;
+ (next, remainder) = debug_unwrap!(remainder.split_first(), break, "invalid varint");
+ // Note: value << 7 could drop high bits. The first addition can't overflow.
+ // The second addition could overflow; in such a case we just inform the
+ // developer via the debug assertion.
+ value = (value << 7) + ((*next & 0b01111111) as usize) + 16;
+ if (*next & 0b10000000) == 0 {
+ break;
+ }
+ }
+ }
+ (value, remainder)
+}
+
+/// Reads and removes a varint with 3 bits of metadata from a [`TrieBuilderStore`].
+///
+/// Returns the varint value.
+#[cfg(feature = "alloc")]
+pub(crate) fn try_read_varint_meta3_from_tstore<S: TrieBuilderStore>(
+ start: u8,
+ remainder: &mut S,
+) -> Option<usize> {
+ let mut value = (start & 0b00001111) as usize;
+ if (start & 0b00010000) != 0 {
+ loop {
+ let next = remainder.atbs_pop_front()?;
+ // Note: value << 7 could drop high bits. The first addition can't overflow.
+ // The second addition could overflow; in such a case we just inform the
+ // developer via the debug assertion.
+ value = (value << 7) + ((next & 0b01111111) as usize) + 16;
+ if (next & 0b10000000) == 0 {
+ break;
+ }
+ }
+ }
+ Some(value)
+}
+
+#[cfg(test)]
+const MAX_VARINT: usize = usize::MAX;
+
+// *Upper Bound:* Each trail byte stores 7 bits of data, plus the latent value.
+// Add an extra 1 since the lead byte holds only 5 bits of data.
+const MAX_VARINT_LENGTH: usize = 1 + core::mem::size_of::<usize>() * 8 / 7;
+
+/// Returns a new [`ConstArrayBuilder`] containing a varint with 2 bits of metadata.
+pub(crate) const fn write_varint_meta2(value: usize) -> ConstArrayBuilder<MAX_VARINT_LENGTH, u8> {
+ let mut result = [0; MAX_VARINT_LENGTH];
+ let mut i = MAX_VARINT_LENGTH - 1;
+ let mut value = value;
+ let mut last = true;
+ loop {
+ if value < 32 {
+ result[i] = value as u8;
+ if !last {
+ result[i] |= 0b00100000;
+ }
+ break;
+ }
+ value -= 32;
+ result[i] = (value as u8) & 0b01111111;
+ if !last {
+ result[i] |= 0b10000000;
+ } else {
+ last = false;
+ }
+ value >>= 7;
+ i -= 1;
+ }
+ // The bytes are from i to the end.
+ ConstArrayBuilder::from_manual_slice(result, i, MAX_VARINT_LENGTH)
+}
+
+/// Returns a new [`ConstArrayBuilder`] containing a varint with 3 bits of metadata.
+pub(crate) const fn write_varint_meta3(value: usize) -> ConstArrayBuilder<MAX_VARINT_LENGTH, u8> {
+ let mut result = [0; MAX_VARINT_LENGTH];
+ let mut i = MAX_VARINT_LENGTH - 1;
+ let mut value = value;
+ let mut last = true;
+ loop {
+ if value < 16 {
+ result[i] = value as u8;
+ if !last {
+ result[i] |= 0b00010000;
+ }
+ break;
+ }
+ value -= 16;
+ result[i] = (value as u8) & 0b01111111;
+ if !last {
+ result[i] |= 0b10000000;
+ } else {
+ last = false;
+ }
+ value >>= 7;
+ i -= 1;
+ }
+ // The bytes are from i to the end.
+ ConstArrayBuilder::from_manual_slice(result, i, MAX_VARINT_LENGTH)
+}
+
+/// A secondary implementation that separates the latent value while computing the varint.
+#[cfg(test)]
+pub(crate) const fn write_varint_reference(
+ value: usize,
+) -> ConstArrayBuilder<MAX_VARINT_LENGTH, u8> {
+ let mut result = [0; MAX_VARINT_LENGTH];
+ if value < 32 {
+ result[0] = value as u8;
+ return ConstArrayBuilder::from_manual_slice(result, 0, 1);
+ }
+ result[0] = 32;
+ let mut latent = 32;
+ let mut steps = 2;
+ loop {
+ let next_latent = (latent << 7) + 32;
+ if value < next_latent || next_latent == latent {
+ break;
+ }
+ latent = next_latent;
+ steps += 1;
+ }
+ let mut value = value - latent;
+ let mut i = steps;
+ while i > 0 {
+ i -= 1;
+ result[i] |= (value as u8) & 0b01111111;
+ value >>= 7;
+ if i > 0 && i < steps - 1 {
+ result[i] |= 0b10000000;
+ }
+ }
+ // The bytes are from 0 to `steps`.
+ ConstArrayBuilder::from_manual_slice(result, 0, steps)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[derive(Debug)]
+ struct TestCase<'a> {
+ bytes: &'a [u8],
+ remainder: &'a [u8],
+ value: usize,
+ }
+ static CASES: &[TestCase] = &[
+ TestCase {
+ bytes: &[0b00000000],
+ remainder: &[],
+ value: 0,
+ },
+ TestCase {
+ bytes: &[0b00001010],
+ remainder: &[],
+ value: 10,
+ },
+ TestCase {
+ bytes: &[0b00011111],
+ remainder: &[],
+ value: 31,
+ },
+ TestCase {
+ bytes: &[0b00011111, 0b10101010],
+ remainder: &[0b10101010],
+ value: 31,
+ },
+ TestCase {
+ bytes: &[0b00100000, 0b00000000],
+ remainder: &[],
+ value: 32,
+ },
+ TestCase {
+ bytes: &[0b00100000, 0b00000001],
+ remainder: &[],
+ value: 33,
+ },
+ TestCase {
+ bytes: &[0b00100000, 0b00100000],
+ remainder: &[],
+ value: 64,
+ },
+ TestCase {
+ bytes: &[0x20, 0x44],
+ remainder: &[],
+ value: 100,
+ },
+ TestCase {
+ bytes: &[0b00100000, 0b01111111],
+ remainder: &[],
+ value: 159,
+ },
+ TestCase {
+ bytes: &[0b00100001, 0b00000000],
+ remainder: &[],
+ value: 160,
+ },
+ TestCase {
+ bytes: &[0b00100001, 0b00000001],
+ remainder: &[],
+ value: 161,
+ },
+ TestCase {
+ bytes: &[0x23, 0x54],
+ remainder: &[],
+ value: 500,
+ },
+ TestCase {
+ bytes: &[0b00111111, 0b01111111],
+ remainder: &[],
+ value: 4127, // 32 + (1 << 12) - 1
+ },
+ TestCase {
+ bytes: &[0b00100000, 0b10000000, 0b00000000],
+ remainder: &[],
+ value: 4128, // 32 + (1 << 12)
+ },
+ TestCase {
+ bytes: &[0b00100000, 0b10000000, 0b00000001],
+ remainder: &[],
+ value: 4129, // 32 + (1 << 12) + 1
+ },
+ TestCase {
+ bytes: &[0b00100000, 0b10000000, 0b01111111],
+ remainder: &[],
+ value: 4255, // 32 + (1 << 12) + 127
+ },
+ TestCase {
+ bytes: &[0b00100000, 0b10000001, 0b00000000],
+ remainder: &[],
+ value: 4256, // 32 + (1 << 12) + 128
+ },
+ TestCase {
+ bytes: &[0b00100000, 0b10000001, 0b00000001],
+ remainder: &[],
+ value: 4257, // 32 + (1 << 12) + 129
+ },
+ TestCase {
+ bytes: &[0x20, 0x86, 0x68],
+ remainder: &[],
+ value: 5000,
+ },
+ TestCase {
+ bytes: &[0b00100000, 0b11111111, 0b01111111],
+ remainder: &[],
+ value: 20511, // 32 + (1 << 12) + (1 << 14) - 1
+ },
+ TestCase {
+ bytes: &[0b00100001, 0b10000000, 0b00000000],
+ remainder: &[],
+ value: 20512, // 32 + (1 << 12) + (1 << 14)
+ },
+ TestCase {
+ bytes: &[0b00111111, 0b11111111, 0b01111111],
+ remainder: &[],
+ value: 528415, // 32 + (1 << 12) + (1 << 19) - 1
+ },
+ TestCase {
+ bytes: &[0b00100000, 0b10000000, 0b10000000, 0b00000000],
+ remainder: &[],
+ value: 528416, // 32 + (1 << 12) + (1 << 19)
+ },
+ TestCase {
+ bytes: &[0b00100000, 0b10000000, 0b10000000, 0b00000001],
+ remainder: &[],
+ value: 528417, // 32 + (1 << 12) + (1 << 19) + 1
+ },
+ TestCase {
+ bytes: &[0b00111111, 0b11111111, 0b11111111, 0b01111111],
+ remainder: &[],
+ value: 67637279, // 32 + (1 << 12) + (1 << 19) + (1 << 26) - 1
+ },
+ TestCase {
+ bytes: &[0b00100000, 0b10000000, 0b10000000, 0b10000000, 0b00000000],
+ remainder: &[],
+ value: 67637280, // 32 + (1 << 12) + (1 << 19) + (1 << 26)
+ },
+ ];
+
+ #[test]
+ fn test_read() {
+ for cas in CASES {
+ let recovered = read_varint_meta2(cas.bytes[0], &cas.bytes[1..]);
+ assert_eq!(recovered, (cas.value, cas.remainder), "{:?}", cas);
+ }
+ }
+
+ #[test]
+ fn test_read_write() {
+ for cas in CASES {
+ let reference_bytes = write_varint_reference(cas.value);
+ assert_eq!(
+ reference_bytes.len(),
+ cas.bytes.len() - cas.remainder.len(),
+ "{:?}",
+ cas
+ );
+ assert_eq!(
+ reference_bytes.as_slice(),
+ &cas.bytes[0..reference_bytes.len()],
+ "{:?}",
+ cas
+ );
+ let recovered = read_varint_meta2(cas.bytes[0], &cas.bytes[1..]);
+ assert_eq!(recovered, (cas.value, cas.remainder), "{:?}", cas);
+ let write_bytes = write_varint_meta2(cas.value);
+ assert_eq!(
+ reference_bytes.as_slice(),
+ write_bytes.as_slice(),
+ "{:?}",
+ cas
+ );
+ }
+ }
+
+ #[test]
+ fn test_roundtrip() {
+ let mut i = 0usize;
+ while i < MAX_VARINT {
+ let bytes = write_varint_meta2(i);
+ let recovered = read_varint_meta2(bytes.as_slice()[0], &bytes.as_slice()[1..]);
+ assert_eq!(i, recovered.0, "{:?}", bytes.as_slice());
+ i <<= 1;
+ i += 1;
+ }
+ }
+
+ #[test]
+ fn test_extended_roundtrip() {
+ let mut i = 0usize;
+ while i < MAX_VARINT {
+ let bytes = write_varint_meta3(i);
+ let recovered = read_varint_meta3(bytes.as_slice()[0], &bytes.as_slice()[1..]);
+ assert_eq!(i, recovered.0, "{:?}", bytes.as_slice());
+ i <<= 1;
+ i += 1;
+ }
+ }
+
+ #[test]
+ fn test_max() {
+ let reference_bytes = write_varint_reference(MAX_VARINT);
+ let write_bytes = write_varint_meta2(MAX_VARINT);
+ assert_eq!(reference_bytes.len(), MAX_VARINT_LENGTH);
+ assert_eq!(reference_bytes.as_slice(), write_bytes.as_slice());
+ let subarray = write_bytes
+ .as_const_slice()
+ .get_subslice_or_panic(1, write_bytes.len());
+ let (recovered_value, remainder) = read_varint_meta2(
+ *write_bytes.as_const_slice().first().unwrap(),
+ subarray.as_slice(),
+ );
+ assert!(remainder.is_empty());
+ assert_eq!(recovered_value, MAX_VARINT);
+ assert_eq!(
+ write_bytes.as_slice(),
+ &[
+ 0b00100001, //
+ 0b11011111, //
+ 0b11011111, //
+ 0b11011111, //
+ 0b11011111, //
+ 0b11011111, //
+ 0b11011111, //
+ 0b11011111, //
+ 0b11011111, //
+ 0b01011111, //
+ ]
+ );
+ }
+
+ #[test]
+ fn text_extended_max() {
+ let write_bytes = write_varint_meta3(MAX_VARINT);
+ assert_eq!(write_bytes.len(), MAX_VARINT_LENGTH);
+ let (lead, trailing) = write_bytes.as_slice().split_first().unwrap();
+ let (recovered_value, remainder) = read_varint_meta3(*lead, trailing);
+ assert!(remainder.is_empty());
+ assert_eq!(recovered_value, MAX_VARINT);
+ assert_eq!(
+ write_bytes.as_slice(),
+ &[
+ 0b00010001, //
+ 0b11101111, //
+ 0b11101111, //
+ 0b11101111, //
+ 0b11101111, //
+ 0b11101111, //
+ 0b11101111, //
+ 0b11101111, //
+ 0b11101111, //
+ 0b01101111, //
+ ]
+ );
+ }
+
+ #[test]
+ fn test_latent_values() {
+ // Same values documented in the module docs: M=2
+ let m2 = read_varint_meta2;
+ assert_eq!(m2(0, &[]).0, 0);
+ assert_eq!(m2(0x20, &[0x00]).0, 32);
+ assert_eq!(m2(0x20, &[0x80, 0x00]).0, 4128);
+ assert_eq!(m2(0x20, &[0x80, 0x80, 0x00]).0, 528416);
+ assert_eq!(m2(0x20, &[0x80, 0x80, 0x80, 0x00]).0, 67637280);
+
+ // Same values documented in the module docs: M=3
+ let m3 = read_varint_meta3;
+ assert_eq!(m3(0, &[]).0, 0);
+ assert_eq!(m3(0x10, &[0x00]).0, 16);
+ assert_eq!(m3(0x10, &[0x80, 0x00]).0, 2064);
+ assert_eq!(m3(0x10, &[0x80, 0x80, 0x00]).0, 264208);
+ assert_eq!(m3(0x10, &[0x80, 0x80, 0x80, 0x00]).0, 33818640);
+ }
+}
diff --git a/vendor/zerotrie/src/zerotrie.rs b/vendor/zerotrie/src/zerotrie.rs
new file mode 100644
index 00000000..a0320f9f
--- /dev/null
+++ b/vendor/zerotrie/src/zerotrie.rs
@@ -0,0 +1,886 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use crate::reader;
+
+use core::borrow::Borrow;
+
+#[cfg(feature = "alloc")]
+use crate::{
+ builder::bytestr::ByteStr, builder::nonconst::ZeroTrieBuilder, error::ZeroTrieBuildError,
+};
+#[cfg(feature = "alloc")]
+use alloc::{boxed::Box, collections::BTreeMap, collections::VecDeque, string::String, vec::Vec};
+#[cfg(feature = "litemap")]
+use litemap::LiteMap;
+
+/// A data structure that compactly maps from byte sequences to integers.
+///
+/// There are several variants of `ZeroTrie` which are very similar but are optimized
+/// for different use cases:
+///
+/// - [`ZeroTrieSimpleAscii`] is the most compact structure. Very fast for small data.
+/// Only stores ASCII-encoded strings. Can be const-constructed!
+/// - [`ZeroTriePerfectHash`] is also compact, but it also supports arbitrary binary
+/// strings. It also scales better to large data. Cannot be const-constructed.
+/// - [`ZeroTrieExtendedCapacity`] can be used if more than 2^32 bytes are required.
+///
+/// You can create a `ZeroTrie` directly, in which case the most appropriate
+/// backing implementation will be chosen.
+///
+/// # Backing Store
+///
+/// The data structure has a flexible backing data store. The only requirement for most
+/// functionality is that it implement `AsRef<[u8]>`. All of the following are valid
+/// ZeroTrie types:
+///
+/// - `ZeroTrie<[u8]>` (dynamically sized type: must be stored in a reference or Box)
+/// - `ZeroTrie<&[u8]>` (borrows its data from a u8 buffer)
+/// - `ZeroTrie<Vec<u8>>` (fully owned data)
+/// - `ZeroTrie<ZeroVec<u8>>` (the recommended borrowed-or-owned signature)
+/// - `Cow<ZeroTrie<[u8]>>` (another borrowed-or-owned signature)
+/// - `ZeroTrie<Cow<[u8]>>` (another borrowed-or-owned signature)
+///
+/// # Examples
+///
+/// ```
+/// use litemap::LiteMap;
+/// use zerotrie::ZeroTrie;
+///
+/// let mut map = LiteMap::<&[u8], usize>::new_vec();
+/// map.insert("foo".as_bytes(), 1);
+/// map.insert("bar".as_bytes(), 2);
+/// map.insert("bazzoo".as_bytes(), 3);
+///
+/// let trie = ZeroTrie::try_from(&map)?;
+///
+/// assert_eq!(trie.get("foo"), Some(1));
+/// assert_eq!(trie.get("bar"), Some(2));
+/// assert_eq!(trie.get("bazzoo"), Some(3));
+/// assert_eq!(trie.get("unknown"), None);
+///
+/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
+/// ```
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+// Note: The absence of the following derive does not cause any test failures in this crate
+#[cfg_attr(feature = "yoke", derive(yoke::Yokeable))]
+pub struct ZeroTrie<Store>(pub(crate) ZeroTrieFlavor<Store>);
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) enum ZeroTrieFlavor<Store> {
+ SimpleAscii(ZeroTrieSimpleAscii<Store>),
+ PerfectHash(ZeroTriePerfectHash<Store>),
+ ExtendedCapacity(ZeroTrieExtendedCapacity<Store>),
+}
+
+/// A data structure that compactly maps from ASCII strings to integers.
+///
+/// For more information, see [`ZeroTrie`].
+///
+/// # Examples
+///
+/// ```
+/// use litemap::LiteMap;
+/// use zerotrie::ZeroTrieSimpleAscii;
+///
+/// let mut map = LiteMap::new_vec();
+/// map.insert(&b"foo"[..], 1);
+/// map.insert(b"bar", 2);
+/// map.insert(b"bazzoo", 3);
+///
+/// let trie = ZeroTrieSimpleAscii::try_from(&map)?;
+///
+/// assert_eq!(trie.get(b"foo"), Some(1));
+/// assert_eq!(trie.get(b"bar"), Some(2));
+/// assert_eq!(trie.get(b"bazzoo"), Some(3));
+/// assert_eq!(trie.get(b"unknown"), None);
+///
+/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
+/// ```
+///
+/// The trie can only store ASCII bytes; a string with non-ASCII always returns None:
+///
+/// ```
+/// use zerotrie::ZeroTrieSimpleAscii;
+///
+/// // A trie with two values: "abc" and "abcdef"
+/// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
+///
+/// assert!(matches!(trie.get(b"ab\xFF"), None));
+/// ```
+#[repr(transparent)]
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
+#[cfg_attr(feature = "databake", derive(databake::Bake))]
+#[cfg_attr(feature = "databake", databake(path = zerotrie))]
+#[allow(clippy::exhaustive_structs)] // databake hidden fields
+pub struct ZeroTrieSimpleAscii<Store: ?Sized> {
+ #[doc(hidden)] // for databake, but there are no invariants
+ pub store: Store,
+}
+
+impl<Store: ?Sized> ZeroTrieSimpleAscii<Store> {
+ fn transparent_ref_from_store(s: &Store) -> &Self {
+ unsafe {
+ // Safety: Self is transparent over Store
+ core::mem::transmute(s)
+ }
+ }
+}
+
+impl<Store> ZeroTrieSimpleAscii<Store> {
+ /// Wrap this specific ZeroTrie variant into a ZeroTrie.
+ #[inline]
+ pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
+ ZeroTrie(ZeroTrieFlavor::SimpleAscii(self))
+ }
+}
+
+/// A data structure that compactly maps from ASCII strings to integers
+/// in a case-insensitive way.
+///
+/// # Examples
+///
+/// ```
+/// use litemap::LiteMap;
+/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
+///
+/// let mut map = LiteMap::new_vec();
+/// map.insert(&b"foo"[..], 1);
+/// map.insert(b"Bar", 2);
+/// map.insert(b"Bazzoo", 3);
+///
+/// let trie = ZeroAsciiIgnoreCaseTrie::try_from(&map)?;
+///
+/// assert_eq!(trie.get(b"foo"), Some(1));
+/// assert_eq!(trie.get(b"bar"), Some(2));
+/// assert_eq!(trie.get(b"BAR"), Some(2));
+/// assert_eq!(trie.get(b"bazzoo"), Some(3));
+/// assert_eq!(trie.get(b"unknown"), None);
+///
+/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
+/// ```
+///
+/// Strings with different cases of the same character at the same offset are not allowed:
+///
+/// ```
+/// use litemap::LiteMap;
+/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
+///
+/// let mut map = LiteMap::new_vec();
+/// map.insert(&b"bar"[..], 1);
+/// // OK: 'r' and 'Z' are different letters
+/// map.insert(b"baZ", 2);
+/// // Bad: we already inserted 'r' so we cannot also insert 'R' at the same position
+/// map.insert(b"baR", 2);
+///
+/// ZeroAsciiIgnoreCaseTrie::try_from(&map).expect_err("mixed-case strings!");
+/// ```
+#[repr(transparent)]
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
+#[cfg_attr(feature = "databake", derive(databake::Bake))]
+#[cfg_attr(feature = "databake", databake(path = zerotrie))]
+#[allow(clippy::exhaustive_structs)] // databake hidden fields
+pub struct ZeroAsciiIgnoreCaseTrie<Store: ?Sized> {
+ #[doc(hidden)] // for databake, but there are no invariants
+ pub store: Store,
+}
+
+impl<Store: ?Sized> ZeroAsciiIgnoreCaseTrie<Store> {
+ fn transparent_ref_from_store(s: &Store) -> &Self {
+ unsafe {
+ // Safety: Self is transparent over Store
+ core::mem::transmute(s)
+ }
+ }
+}
+
+// Note: ZeroAsciiIgnoreCaseTrie is not a variant of ZeroTrie so there is no `into_zerotrie`
+
+/// A data structure that compactly maps from byte strings to integers.
+///
+/// For more information, see [`ZeroTrie`].
+///
+/// # Examples
+///
+/// ```
+/// use litemap::LiteMap;
+/// use zerotrie::ZeroTriePerfectHash;
+///
+/// let mut map = LiteMap::<&[u8], usize>::new_vec();
+/// map.insert("foo".as_bytes(), 1);
+/// map.insert("bår".as_bytes(), 2);
+/// map.insert("båzzøø".as_bytes(), 3);
+///
+/// let trie = ZeroTriePerfectHash::try_from(&map)?;
+///
+/// assert_eq!(trie.get("foo".as_bytes()), Some(1));
+/// assert_eq!(trie.get("bår".as_bytes()), Some(2));
+/// assert_eq!(trie.get("båzzøø".as_bytes()), Some(3));
+/// assert_eq!(trie.get("bazzoo".as_bytes()), None);
+///
+/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
+/// ```
+#[repr(transparent)]
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
+#[cfg_attr(feature = "databake", derive(databake::Bake))]
+#[cfg_attr(feature = "databake", databake(path = zerotrie))]
+#[allow(clippy::exhaustive_structs)] // databake hidden fields
+pub struct ZeroTriePerfectHash<Store: ?Sized> {
+ #[doc(hidden)] // for databake, but there are no invariants
+ pub store: Store,
+}
+
+impl<Store: ?Sized> ZeroTriePerfectHash<Store> {
+ fn transparent_ref_from_store(s: &Store) -> &Self {
+ unsafe {
+ // Safety: Self is transparent over Store
+ core::mem::transmute(s)
+ }
+ }
+}
+
+impl<Store> ZeroTriePerfectHash<Store> {
+ /// Wrap this specific ZeroTrie variant into a ZeroTrie.
+ #[inline]
+ pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
+ ZeroTrie(ZeroTrieFlavor::PerfectHash(self))
+ }
+}
+
+/// A data structure that maps from a large number of byte strings to integers.
+///
+/// For more information, see [`ZeroTrie`].
+#[repr(transparent)]
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
+#[cfg_attr(feature = "databake", derive(databake::Bake))]
+#[cfg_attr(feature = "databake", databake(path = zerotrie))]
+#[allow(clippy::exhaustive_structs)] // databake hidden fields
+pub struct ZeroTrieExtendedCapacity<Store: ?Sized> {
+ #[doc(hidden)] // for databake, but there are no invariants
+ pub store: Store,
+}
+
+impl<Store: ?Sized> ZeroTrieExtendedCapacity<Store> {
+ fn transparent_ref_from_store(s: &Store) -> &Self {
+ unsafe {
+ // Safety: Self is transparent over Store
+ core::mem::transmute(s)
+ }
+ }
+}
+
+impl<Store> ZeroTrieExtendedCapacity<Store> {
+ /// Wrap this specific ZeroTrie variant into a ZeroTrie.
+ #[inline]
+ pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
+ ZeroTrie(ZeroTrieFlavor::ExtendedCapacity(self))
+ }
+}
+
+macro_rules! impl_zerotrie_subtype {
+ ($name:ident, $iter_element:ty, $iter_fn:path, $iter_ty:ty, $cnv_fn:path) => {
+ impl<Store> $name<Store> {
+ /// Create a trie directly from a store.
+ ///
+ /// If the store does not contain valid bytes, unexpected behavior may occur.
+ #[inline]
+ pub const fn from_store(store: Store) -> Self {
+ Self { store }
+ }
+ /// Takes the byte store from this trie.
+ #[inline]
+ pub fn into_store(self) -> Store {
+ self.store
+ }
+ /// Converts this trie's store to a different store implementing the `From` trait.
+ ///
+ #[doc = concat!("For example, use this to change `", stringify!($name), "<Vec<u8>>` to `", stringify!($name), "<Cow<[u8]>>`.")]
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::borrow::Cow;
+ #[doc = concat!("use zerotrie::", stringify!($name), ";")]
+ ///
+ #[doc = concat!("let trie: ", stringify!($name), "<Vec<u8>> = ", stringify!($name), "::from_bytes(b\"abc\\x85\").to_owned();")]
+ #[doc = concat!("let cow: ", stringify!($name), "<Cow<[u8]>> = trie.convert_store();")]
+ ///
+ /// assert_eq!(cow.get(b"abc"), Some(5));
+ /// ```
+ pub fn convert_store<X: From<Store>>(self) -> $name<X> {
+ $name::<X>::from_store(X::from(self.store))
+ }
+ }
+ impl<Store> $name<Store>
+ where
+ Store: AsRef<[u8]> + ?Sized,
+ {
+ /// Queries the trie for a string.
+ pub fn get<K>(&self, key: K) -> Option<usize> where K: AsRef<[u8]> {
+ // TODO: Should this be AsRef or Borrow?
+ reader::get_parameterized::<Self>(self.store.as_ref(), key.as_ref())
+ }
+ /// Returns `true` if the trie is empty.
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.store.as_ref().is_empty()
+ }
+ /// Returns the size of the trie in number of bytes.
+ ///
+ /// To get the number of keys in the trie, use `.iter().count()`:
+ ///
+ /// ```
+ #[doc = concat!("use zerotrie::", stringify!($name), ";")]
+ ///
+ /// // A trie with two values: "abc" and "abcdef"
+ #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x80def\\x81\");")]
+ ///
+ /// assert_eq!(8, trie.byte_len());
+ /// assert_eq!(2, trie.iter().count());
+ /// ```
+ #[inline]
+ pub fn byte_len(&self) -> usize {
+ self.store.as_ref().len()
+ }
+ /// Returns the bytes contained in the underlying store.
+ #[inline]
+ pub fn as_bytes(&self) -> &[u8] {
+ self.store.as_ref()
+ }
+ /// Returns this trie as a reference transparent over a byte slice.
+ #[inline]
+ pub fn as_borrowed(&self) -> &$name<[u8]> {
+ $name::from_bytes(self.store.as_ref())
+ }
+ /// Returns a trie with a store borrowing from this trie.
+ #[inline]
+ pub fn as_borrowed_slice(&self) -> $name<&[u8]> {
+ $name::from_store(self.store.as_ref())
+ }
+ }
+ impl<Store> AsRef<$name<[u8]>> for $name<Store>
+ where
+ Store: AsRef<[u8]> + ?Sized,
+ {
+ #[inline]
+ fn as_ref(&self) -> &$name<[u8]> {
+ self.as_borrowed()
+ }
+ }
+ #[cfg(feature = "alloc")]
+ impl<Store> $name<Store>
+ where
+ Store: AsRef<[u8]> + ?Sized,
+ {
+ /// Converts a possibly-borrowed $name to an owned one.
+ ///
+ /// ✨ *Enabled with the `alloc` Cargo feature.*
+ ///
+ /// # Examples
+ ///
+ /// ```
+ #[doc = concat!("use zerotrie::", stringify!($name), ";")]
+ ///
+ #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x85\");")]
+ #[doc = concat!("let owned: ", stringify!($name), "<Vec<u8>> = trie.to_owned();")]
+ ///
+ /// assert_eq!(trie.get(b"abc"), Some(5));
+ /// assert_eq!(owned.get(b"abc"), Some(5));
+ /// ```
+ #[inline]
+ pub fn to_owned(&self) -> $name<Vec<u8>> {
+ $name::from_store(
+ Vec::from(self.store.as_ref()),
+ )
+ }
+ /// Returns an iterator over the key/value pairs in this trie.
+ ///
+ /// ✨ *Enabled with the `alloc` Cargo feature.*
+ ///
+ /// # Examples
+ ///
+ /// ```
+ #[doc = concat!("use zerotrie::", stringify!($name), ";")]
+ ///
+ /// // A trie with two values: "abc" and "abcdef"
+ #[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x80def\\x81\");")]
+ ///
+ /// let mut it = trie.iter();
+ /// assert_eq!(it.next(), Some(("abc".into(), 0)));
+ /// assert_eq!(it.next(), Some(("abcdef".into(), 1)));
+ /// assert_eq!(it.next(), None);
+ /// ```
+ #[inline]
+ #[allow(clippy::type_complexity)]
+ pub fn iter(&self) -> $iter_ty {
+ $iter_fn(self.as_bytes())
+ }
+ }
+ impl $name<[u8]> {
+ /// Casts from a byte slice to a reference to a trie with the same lifetime.
+ ///
+ /// If the bytes are not a valid trie, unexpected behavior may occur.
+ #[inline]
+ pub fn from_bytes(trie: &[u8]) -> &Self {
+ Self::transparent_ref_from_store(trie)
+ }
+ }
+ #[cfg(feature = "alloc")]
+ impl $name<Vec<u8>> {
+ pub(crate) fn try_from_tuple_slice(items: &[(&ByteStr, usize)]) -> Result<Self, ZeroTrieBuildError> {
+ use crate::options::ZeroTrieWithOptions;
+ ZeroTrieBuilder::<VecDeque<u8>>::from_sorted_tuple_slice(
+ items,
+ Self::OPTIONS,
+ )
+ .map(|s| Self {
+ store: s.to_bytes(),
+ })
+ }
+ }
+ #[cfg(feature = "alloc")]
+ impl<'a, K> FromIterator<(K, usize)> for $name<Vec<u8>>
+ where
+ K: AsRef<[u8]>
+ {
+ fn from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self {
+ use crate::options::ZeroTrieWithOptions;
+ use crate::builder::nonconst::ZeroTrieBuilder;
+ ZeroTrieBuilder::<VecDeque<u8>>::from_bytes_iter(
+ iter,
+ Self::OPTIONS
+ )
+ .map(|s| Self {
+ store: s.to_bytes(),
+ })
+ .unwrap()
+ }
+ }
+ #[cfg(feature = "alloc")]
+ impl<'a, K> TryFrom<&'a BTreeMap<K, usize>> for $name<Vec<u8>>
+ where
+ K: Borrow<[u8]>
+ {
+ type Error = crate::error::ZeroTrieBuildError;
+ fn try_from(map: &'a BTreeMap<K, usize>) -> Result<Self, Self::Error> {
+ let tuples: Vec<(&[u8], usize)> = map
+ .iter()
+ .map(|(k, v)| (k.borrow(), *v))
+ .collect();
+ let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples);
+ Self::try_from_tuple_slice(byte_str_slice)
+ }
+ }
+ #[cfg(feature = "alloc")]
+ impl<Store> $name<Store>
+ where
+ Store: AsRef<[u8]> + ?Sized
+ {
+ /// Exports the data from this ZeroTrie type into a BTreeMap.
+ ///
+ /// ✨ *Enabled with the `alloc` Cargo feature.*
+ ///
+ /// # Examples
+ ///
+ /// ```
+ #[doc = concat!("use zerotrie::", stringify!($name), ";")]
+ /// use std::collections::BTreeMap;
+ ///
+ #[doc = concat!("let trie = ", stringify!($name), "::from_bytes(b\"abc\\x81def\\x82\");")]
+ /// let items = trie.to_btreemap();
+ ///
+ /// assert_eq!(items.len(), 2);
+ ///
+ #[doc = concat!("let recovered_trie: ", stringify!($name), "<Vec<u8>> = items")]
+ /// .into_iter()
+ /// .collect();
+ /// assert_eq!(trie.as_bytes(), recovered_trie.as_bytes());
+ /// ```
+ pub fn to_btreemap(&self) -> BTreeMap<$iter_element, usize> {
+ self.iter().collect()
+ }
+ #[allow(dead_code)] // not needed for ZeroAsciiIgnoreCaseTrie
+ pub(crate) fn to_btreemap_bytes(&self) -> BTreeMap<Box<[u8]>, usize> {
+ self.iter().map(|(k, v)| ($cnv_fn(k), v)).collect()
+ }
+ }
+ #[cfg(feature = "alloc")]
+ impl<Store> From<&$name<Store>> for BTreeMap<$iter_element, usize>
+ where
+ Store: AsRef<[u8]> + ?Sized,
+ {
+ #[inline]
+ fn from(other: &$name<Store>) -> Self {
+ other.to_btreemap()
+ }
+ }
+ #[cfg(feature = "litemap")]
+ impl<'a, K, S> TryFrom<&'a LiteMap<K, usize, S>> for $name<Vec<u8>>
+ where
+ K: Borrow<[u8]>,
+ S: litemap::store::StoreIterable<'a, K, usize>,
+ {
+ type Error = crate::error::ZeroTrieBuildError;
+ fn try_from(map: &'a LiteMap<K, usize, S>) -> Result<Self, Self::Error> {
+ let tuples: Vec<(&[u8], usize)> = map
+ .iter()
+ .map(|(k, v)| (k.borrow(), *v))
+ .collect();
+ let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples);
+ Self::try_from_tuple_slice(byte_str_slice)
+ }
+ }
+ #[cfg(feature = "litemap")]
+ impl<Store> $name<Store>
+ where
+ Store: AsRef<[u8]> + ?Sized,
+ {
+ /// Exports the data from this ZeroTrie type into a LiteMap.
+ ///
+ /// ✨ *Enabled with the `litemap` Cargo feature.*
+ ///
+ /// # Examples
+ ///
+ /// ```
+ #[doc = concat!("use zerotrie::", stringify!($name), ";")]
+ /// use litemap::LiteMap;
+ ///
+ #[doc = concat!("let trie = ", stringify!($name), "::from_bytes(b\"abc\\x81def\\x82\");")]
+ ///
+ /// let items = trie.to_litemap();
+ /// assert_eq!(items.len(), 2);
+ ///
+ #[doc = concat!("let recovered_trie: ", stringify!($name), "<Vec<u8>> = items")]
+ /// .iter()
+ /// .map(|(k, v)| (k, *v))
+ /// .collect();
+ /// assert_eq!(trie.as_bytes(), recovered_trie.as_bytes());
+ /// ```
+ pub fn to_litemap(&self) -> LiteMap<$iter_element, usize> {
+ self.iter().collect()
+ }
+ #[allow(dead_code)] // not needed for ZeroAsciiIgnoreCaseTrie
+ pub(crate) fn to_litemap_bytes(&self) -> LiteMap<Box<[u8]>, usize> {
+ self.iter().map(|(k, v)| ($cnv_fn(k), v)).collect()
+ }
+ }
+ #[cfg(feature = "litemap")]
+ impl<Store> From<&$name<Store>> for LiteMap<$iter_element, usize>
+ where
+ Store: AsRef<[u8]> + ?Sized,
+ {
+ #[inline]
+ fn from(other: &$name<Store>) -> Self {
+ other.to_litemap()
+ }
+ }
+ #[cfg(feature = "litemap")]
+ impl $name<Vec<u8>>
+ {
+ #[cfg(feature = "serde")]
+ pub(crate) fn try_from_serde_litemap(items: &LiteMap<Box<ByteStr>, usize>) -> Result<Self, ZeroTrieBuildError> {
+ let lm_borrowed: LiteMap<&ByteStr, usize> = items.to_borrowed_keys();
+ Self::try_from_tuple_slice(lm_borrowed.as_slice())
+ }
+ }
+ // Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
+ impl Borrow<$name<[u8]>> for $name<&[u8]> {
+ #[inline]
+ fn borrow(&self) -> &$name<[u8]> {
+ self.as_borrowed()
+ }
+ }
+ // Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
+ #[cfg(feature = "alloc")]
+ impl Borrow<$name<[u8]>> for $name<Box<[u8]>> {
+ #[inline]
+ fn borrow(&self) -> &$name<[u8]> {
+ self.as_borrowed()
+ }
+ }
+ // Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
+ #[cfg(feature = "alloc")]
+ impl Borrow<$name<[u8]>> for $name<Vec<u8>> {
+ #[inline]
+ fn borrow(&self) -> &$name<[u8]> {
+ self.as_borrowed()
+ }
+ }
+ #[cfg(feature = "alloc")]
+ impl alloc::borrow::ToOwned for $name<[u8]> {
+ type Owned = $name<Box<[u8]>>;
+ #[doc = concat!("This impl allows [`", stringify!($name), "`] to be used inside of a [`Cow`](alloc::borrow::Cow).")]
+ ///
+ #[doc = concat!("Note that it is also possible to use `", stringify!($name), "<ZeroVec<u8>>` for a similar result.")]
+ ///
+ /// ✨ *Enabled with the `alloc` Cargo feature.*
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::borrow::Cow;
+ #[doc = concat!("use zerotrie::", stringify!($name), ";")]
+ ///
+ #[doc = concat!("let trie: Cow<", stringify!($name), "<[u8]>> = Cow::Borrowed(", stringify!($name), "::from_bytes(b\"abc\\x85\"));")]
+ /// assert_eq!(trie.get(b"abc"), Some(5));
+ /// ```
+ fn to_owned(&self) -> Self::Owned {
+ let bytes: &[u8] = self.store.as_ref();
+ $name::from_store(
+ Vec::from(bytes).into_boxed_slice(),
+ )
+ }
+ }
+ // TODO(#2778): Auto-derive these impls based on the repr(transparent).
+ //
+ // Safety (based on the safety checklist on the VarULE trait):
+ // 1. `$name` does not include any uninitialized or padding bytes as it is `repr(transparent)`
+ // over a `VarULE` type, `Store`, as evidenced by the existence of `transparent_ref_from_store()`
+ // 2. `$name` is aligned to 1 byte for the same reason
+ // 3. The impl of `validate_bytes()` returns an error if any byte is not valid (passed down to `VarULE` impl of `Store`)
+ // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (passed down to `VarULE` impl of `Store`)
+ // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
+ // 6. `parse_bytes()` is left to its default impl
+ // 7. byte equality is semantic equality
+ #[cfg(feature = "zerovec")]
+ unsafe impl<Store> zerovec::ule::VarULE for $name<Store>
+ where
+ Store: zerovec::ule::VarULE,
+ {
+ #[inline]
+ fn validate_bytes(bytes: &[u8]) -> Result<(), zerovec::ule::UleError> {
+ Store::validate_bytes(bytes)
+ }
+ #[inline]
+ unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
+ // Safety: we can pass down the validity invariant to Store
+ Self::transparent_ref_from_store(Store::from_bytes_unchecked(bytes))
+ }
+ }
+ #[cfg(feature = "zerofrom")]
+ impl<'zf, Store1, Store2> zerofrom::ZeroFrom<'zf, $name<Store1>> for $name<Store2>
+ where
+ Store2: zerofrom::ZeroFrom<'zf, Store1>,
+ {
+ #[inline]
+ fn zero_from(other: &'zf $name<Store1>) -> Self {
+ $name::from_store(zerofrom::ZeroFrom::zero_from(&other.store))
+ }
+ }
+ };
+}
+
+#[cfg(feature = "alloc")]
+fn string_to_box_u8(input: String) -> Box<[u8]> {
+ input.into_boxed_str().into_boxed_bytes()
+}
+
+#[doc(hidden)] // subject to change
+#[cfg(feature = "alloc")]
+pub type ZeroTrieStringIterator<'a> =
+ core::iter::Map<reader::ZeroTrieIterator<'a>, fn((Vec<u8>, usize)) -> (String, usize)>;
+
+impl_zerotrie_subtype!(
+ ZeroTrieSimpleAscii,
+ String,
+ reader::get_iter_ascii_or_panic,
+ ZeroTrieStringIterator,
+ string_to_box_u8
+);
+impl_zerotrie_subtype!(
+ ZeroAsciiIgnoreCaseTrie,
+ String,
+ reader::get_iter_ascii_or_panic,
+ ZeroTrieStringIterator,
+ string_to_box_u8
+);
+impl_zerotrie_subtype!(
+ ZeroTriePerfectHash,
+ Vec<u8>,
+ reader::get_iter_phf,
+ reader::ZeroTrieIterator<'_>,
+ Vec::into_boxed_slice
+);
+impl_zerotrie_subtype!(
+ ZeroTrieExtendedCapacity,
+ Vec<u8>,
+ reader::get_iter_phf,
+ reader::ZeroTrieIterator<'_>,
+ Vec::into_boxed_slice
+);
+
+macro_rules! impl_dispatch {
+ ($self:ident, $inner_fn:ident()) => {
+ match $self.0 {
+ ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn(),
+ ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn(),
+ ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn(),
+ }
+ };
+ ($self:ident, $inner_fn:ident().into_zerotrie()) => {
+ match $self.0 {
+ ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn().into_zerotrie(),
+ ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn().into_zerotrie(),
+ ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn().into_zerotrie(),
+ }
+ };
+ (&$self:ident, $inner_fn:ident()) => {
+ match &$self.0 {
+ ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn(),
+ ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn(),
+ ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn(),
+ }
+ };
+ ($self:ident, $inner_fn:ident($arg:ident)) => {
+ match $self.0 {
+ ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn($arg),
+ ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn($arg),
+ ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn($arg),
+ }
+ };
+ (&$self:ident, $inner_fn:ident($arg:ident)) => {
+ match &$self.0 {
+ ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn($arg),
+ ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn($arg),
+ ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn($arg),
+ }
+ };
+ (&$self:ident, $trait:ident::$inner_fn:ident()) => {
+ match &$self.0 {
+ ZeroTrieFlavor::SimpleAscii(subtype) => {
+ ZeroTrie(ZeroTrieFlavor::SimpleAscii($trait::$inner_fn(subtype)))
+ }
+ ZeroTrieFlavor::PerfectHash(subtype) => {
+ ZeroTrie(ZeroTrieFlavor::PerfectHash($trait::$inner_fn(subtype)))
+ }
+ ZeroTrieFlavor::ExtendedCapacity(subtype) => {
+ ZeroTrie(ZeroTrieFlavor::ExtendedCapacity($trait::$inner_fn(subtype)))
+ }
+ }
+ };
+}
+
+impl<Store> ZeroTrie<Store> {
+ /// Takes the byte store from this trie.
+ pub fn into_store(self) -> Store {
+ impl_dispatch!(self, into_store())
+ }
+ /// Converts this trie's store to a different store implementing the `From` trait.
+ ///
+ /// For example, use this to change `ZeroTrie<Vec<u8>>` to `ZeroTrie<Cow<[u8]>>`.
+ pub fn convert_store<NewStore>(self) -> ZeroTrie<NewStore>
+ where
+ NewStore: From<Store>,
+ {
+ impl_dispatch!(self, convert_store().into_zerotrie())
+ }
+}
+
+impl<Store> ZeroTrie<Store>
+where
+ Store: AsRef<[u8]>,
+{
+ /// Queries the trie for a string.
+ pub fn get<K>(&self, key: K) -> Option<usize>
+ where
+ K: AsRef<[u8]>,
+ {
+ impl_dispatch!(&self, get(key))
+ }
+ /// Returns `true` if the trie is empty.
+ pub fn is_empty(&self) -> bool {
+ impl_dispatch!(&self, is_empty())
+ }
+ /// Returns the size of the trie in number of bytes.
+ ///
+ /// To get the number of keys in the trie, use `.iter().count()`.
+ pub fn byte_len(&self) -> usize {
+ impl_dispatch!(&self, byte_len())
+ }
+}
+
+#[cfg(feature = "alloc")]
+impl<Store> ZeroTrie<Store>
+where
+ Store: AsRef<[u8]>,
+{
+ /// Exports the data from this ZeroTrie into a BTreeMap.
+ pub fn to_btreemap(&self) -> BTreeMap<Box<[u8]>, usize> {
+ impl_dispatch!(&self, to_btreemap_bytes())
+ }
+}
+
+#[cfg(feature = "litemap")]
+impl<Store> ZeroTrie<Store>
+where
+ Store: AsRef<[u8]>,
+{
+ /// Exports the data from this ZeroTrie into a LiteMap.
+ pub fn to_litemap(&self) -> LiteMap<Box<[u8]>, usize> {
+ impl_dispatch!(&self, to_litemap_bytes())
+ }
+}
+
+#[cfg(feature = "alloc")]
+impl ZeroTrie<Vec<u8>> {
+ pub(crate) fn try_from_tuple_slice(
+ items: &[(&ByteStr, usize)],
+ ) -> Result<Self, ZeroTrieBuildError> {
+ let is_all_ascii = items.iter().all(|(s, _)| s.is_all_ascii());
+ if is_all_ascii && items.len() < 512 {
+ ZeroTrieSimpleAscii::try_from_tuple_slice(items).map(|x| x.into_zerotrie())
+ } else {
+ ZeroTriePerfectHash::try_from_tuple_slice(items).map(|x| x.into_zerotrie())
+ }
+ }
+}
+
+#[cfg(feature = "alloc")]
+impl<K> FromIterator<(K, usize)> for ZeroTrie<Vec<u8>>
+where
+ K: AsRef<[u8]>,
+{
+ fn from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self {
+ // We need two Vecs because the first one anchors the `K`s that the second one borrows.
+ let items = Vec::from_iter(iter);
+ let mut items: Vec<(&[u8], usize)> = items.iter().map(|(k, v)| (k.as_ref(), *v)).collect();
+ items.sort();
+ let byte_str_slice = ByteStr::from_byte_slice_with_value(&items);
+ #[allow(clippy::unwrap_used)] // FromIterator is panicky
+ Self::try_from_tuple_slice(byte_str_slice).unwrap()
+ }
+}
+
+#[cfg(feature = "databake")]
+impl<Store> databake::Bake for ZeroTrie<Store>
+where
+ Store: databake::Bake,
+{
+ fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
+ use databake::*;
+ let inner = impl_dispatch!(&self, bake(env));
+ quote! { #inner.into_zerotrie() }
+ }
+}
+
+#[cfg(feature = "databake")]
+impl<Store> databake::BakeSize for ZeroTrie<Store>
+where
+ Store: databake::BakeSize,
+{
+ fn borrows_size(&self) -> usize {
+ impl_dispatch!(&self, borrows_size())
+ }
+}
+
+#[cfg(feature = "zerofrom")]
+impl<'zf, Store1, Store2> zerofrom::ZeroFrom<'zf, ZeroTrie<Store1>> for ZeroTrie<Store2>
+where
+ Store2: zerofrom::ZeroFrom<'zf, Store1>,
+{
+ fn zero_from(other: &'zf ZeroTrie<Store1>) -> Self {
+ use zerofrom::ZeroFrom;
+ impl_dispatch!(&other, ZeroFrom::zero_from())
+ }
+}
diff --git a/vendor/zerotrie/tests/asciitrie_test.rs b/vendor/zerotrie/tests/asciitrie_test.rs
new file mode 100644
index 00000000..cc6d5fbe
--- /dev/null
+++ b/vendor/zerotrie/tests/asciitrie_test.rs
@@ -0,0 +1,73 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use postcard::ser_flavors::{AllocVec, Flavor};
+use serde::Serialize;
+use zerotrie::ZeroTriePerfectHash;
+use zerotrie::ZeroTrieSimpleAscii;
+use zerovec::ZeroMap;
+
+mod testdata {
+ include!("data/data.rs");
+}
+
+#[test]
+fn test_basic() {
+ let bytes_ascii = testdata::basic::TRIE_ASCII;
+ let data_ascii = testdata::basic::DATA_ASCII;
+ let trie_ascii = ZeroTrieSimpleAscii::from_bytes(bytes_ascii);
+ let trie_phf_ascii = ZeroTriePerfectHash::from_bytes(bytes_ascii);
+
+ let bytes_unicode = testdata::basic::TRIE_UNICODE;
+ let data_unicode = testdata::basic::DATA_UNICODE;
+ let trie_phf_unicode = ZeroTriePerfectHash::from_bytes(bytes_unicode);
+
+ let bytes_binary = testdata::basic::TRIE_BINARY;
+ let data_binary = testdata::basic::DATA_BINARY;
+ let trie_phf_binary = ZeroTriePerfectHash::from_bytes(bytes_binary);
+
+ // Check that the getter works
+ for (key, expected) in data_ascii {
+ let actual = match trie_ascii.get(key) {
+ Some(v) => v,
+ None => panic!("value should be in trie: {:?} => {}", key, expected),
+ };
+ assert_eq!(*expected, actual);
+ let actual = match trie_phf_ascii.get(key) {
+ Some(v) => v,
+ None => panic!("value should be in trie6: {:?} => {}", key, expected),
+ };
+ assert_eq!(*expected, actual);
+ }
+
+ for (key, expected) in data_unicode {
+ let actual_unicode = match trie_phf_unicode.get(key) {
+ Some(v) => v,
+ None => panic!("value should be in trie6: {:?} => {}", key, expected),
+ };
+ assert_eq!(*expected, actual_unicode);
+ }
+
+ for (key, expected) in data_binary {
+ let actual_bin6 = match trie_phf_binary.get(key) {
+ Some(v) => v,
+ None => panic!("value should be in trie6: {:?} => {}", key, expected),
+ };
+ assert_eq!(*expected, actual_bin6);
+ }
+
+ // Compare the size to a postcard ZeroMap
+ let zm: ZeroMap<[u8], u32> = data_ascii.iter().map(|(a, b)| (*a, *b as u32)).collect();
+ let mut serializer = postcard::Serializer {
+ output: AllocVec::new(),
+ };
+ Serialize::serialize(&zm, &mut serializer).unwrap();
+ let zeromap_bytes = serializer
+ .output
+ .finalize()
+ .expect("Failed to finalize serializer output");
+
+ assert_eq!(26, bytes_ascii.len());
+ assert_eq!(77, zeromap_bytes.len());
+}
diff --git a/vendor/zerotrie/tests/builder_test.rs b/vendor/zerotrie/tests/builder_test.rs
new file mode 100644
index 00000000..263169e0
--- /dev/null
+++ b/vendor/zerotrie/tests/builder_test.rs
@@ -0,0 +1,855 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use litemap::LiteMap;
+use zerotrie::ZeroTriePerfectHash;
+use zerotrie::ZeroTrieSimpleAscii;
+
+mod testdata {
+ include!("data/data.rs");
+}
+
+use testdata::strings_to_litemap;
+
+const NON_EXISTENT_STRINGS: &[&str] = &[
+ "a9PS", "ahsY", "ahBO", "a8IN", "xk8o", "xv1l", "xI2S", "618y", "d6My", "uszy",
+];
+
+macro_rules! assert_bytes_eq {
+ ($len:literal, $a:expr, $b:expr) => {
+ assert_eq!($len, $a.len());
+ assert_eq!($a, $b);
+ };
+}
+
+fn check_simple_ascii_trie<S>(items: &LiteMap<&[u8], usize>, trie: &ZeroTrieSimpleAscii<S>)
+where
+ S: AsRef<[u8]> + ?Sized,
+{
+ // Check that each item is in the trie
+ for (k, v) in items.iter() {
+ assert_eq!(trie.get(k), Some(*v));
+ }
+ // Check that some items are not in the trie
+ for s in NON_EXISTENT_STRINGS.iter() {
+ assert_eq!(trie.get(s.as_bytes()), None);
+ }
+ // Check that the iterator returns items in the same order as the LiteMap
+ assert!(items
+ .iter()
+ .map(|(s, v)| (String::from_utf8(s.to_vec()).unwrap(), *v))
+ .eq(trie.iter()));
+ // Check that the const builder works
+ let const_trie = ZeroTrieSimpleAscii::try_from_litemap_with_const_builder(items).unwrap();
+ assert_eq!(trie.as_bytes(), const_trie.as_bytes());
+}
+
+fn check_phf_ascii_trie<S>(items: &LiteMap<&[u8], usize>, trie: &ZeroTriePerfectHash<S>)
+where
+ S: AsRef<[u8]> + ?Sized,
+{
+ // Check that each item is in the trie
+ for (k, v) in items.iter() {
+ assert_eq!(trie.get(k), Some(*v));
+ }
+ // Check that some items are not in the trie
+ for s in NON_EXISTENT_STRINGS.iter() {
+ assert_eq!(trie.get(s.as_bytes()), None);
+ }
+ // Check that the iterator returns the contents of the LiteMap
+ // Note: Since the items might not be in order, we collect them into a new LiteMap
+ let recovered_items: LiteMap<_, _> = trie.iter().collect();
+ assert_eq!(
+ items.to_borrowed_keys_values::<[u8], usize, Vec<_>>(),
+ recovered_items.to_borrowed_keys_values()
+ );
+}
+
+fn check_phf_bytes_trie<S>(items: &LiteMap<&[u8], usize>, trie: &ZeroTriePerfectHash<S>)
+where
+ S: AsRef<[u8]> + ?Sized,
+{
+ // Check that each item is in the trie
+ for (k, v) in items.iter() {
+ assert_eq!(trie.get(k), Some(*v), "{k:?}");
+ }
+ // Check that some items are not in the trie
+ for s in NON_EXISTENT_STRINGS.iter() {
+ assert_eq!(trie.get(s.as_bytes()), None, "{s:?}");
+ }
+ // Check that the iterator returns the contents of the LiteMap
+ // Note: Since the items might not be in order, we collect them into a new LiteMap
+ let recovered_items: LiteMap<_, _> = trie.iter().collect();
+ assert_eq!(
+ items.to_borrowed_keys_values::<[u8], usize, Vec<_>>(),
+ recovered_items.to_borrowed_keys_values()
+ );
+}
+
+#[test]
+fn test_basic() {
+ let lm1a: LiteMap<&[u8], usize> = testdata::basic::DATA_ASCII.iter().copied().collect();
+ let lm1b: LiteMap<&[u8], usize> = lm1a.to_borrowed_keys();
+ let lm2: LiteMap<&[u8], usize> = testdata::basic::DATA_UNICODE.iter().copied().collect();
+ let lm3: LiteMap<&[u8], usize> = testdata::basic::DATA_BINARY.iter().copied().collect();
+
+ let expected_bytes = testdata::basic::TRIE_ASCII;
+ let trie = ZeroTrieSimpleAscii::try_from(&lm1a).unwrap();
+ assert_bytes_eq!(26, trie.as_bytes(), expected_bytes);
+ check_simple_ascii_trie(&lm1a, &trie);
+
+ let trie = ZeroTriePerfectHash::try_from(&lm1b).unwrap();
+ assert_bytes_eq!(26, trie.as_bytes(), expected_bytes);
+ check_phf_ascii_trie(&lm1a, &trie);
+
+ let expected_bytes = testdata::basic::TRIE_UNICODE;
+ let trie = ZeroTriePerfectHash::try_from(&lm2).unwrap();
+ assert_bytes_eq!(39, trie.as_bytes(), expected_bytes);
+ check_phf_bytes_trie(&lm2, &trie);
+
+ let expected_bytes = testdata::basic::TRIE_BINARY;
+ let trie = ZeroTriePerfectHash::try_from(&lm3).unwrap();
+ assert_bytes_eq!(26, trie.as_bytes(), expected_bytes);
+ check_phf_bytes_trie(&lm3, &trie);
+}
+
+#[test]
+fn test_empty() {
+ let trie = ZeroTrieSimpleAscii::try_from(&LiteMap::<&[u8], usize>::new_vec()).unwrap();
+ assert_eq!(trie.byte_len(), 0);
+ assert!(trie.is_empty());
+ assert_eq!(trie.get(b""), None);
+ assert_eq!(trie.as_bytes(), &[]);
+}
+
+#[test]
+fn test_single_empty_value() {
+ let litemap: LiteMap<&[u8], usize> = [
+ (&b""[..], 10), //
+ ]
+ .into_iter()
+ .collect();
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
+ assert_eq!(trie.get(b""), Some(10));
+ assert_eq!(trie.get(b"x"), None);
+ let expected_bytes = &[0b10001010];
+ assert_eq!(trie.as_bytes(), expected_bytes);
+
+ let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
+ assert_bytes_eq!(1, trie_phf.as_bytes(), expected_bytes);
+ check_phf_ascii_trie(&litemap, &trie_phf);
+}
+
+#[test]
+fn test_single_byte_string() {
+ let litemap: LiteMap<&[u8], usize> = [
+ (&b"x"[..], 10), //
+ ]
+ .into_iter()
+ .collect();
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
+ assert_eq!(trie.get(b""), None);
+ assert_eq!(trie.get(b"xy"), None);
+ check_simple_ascii_trie(&litemap, &trie);
+ let expected_bytes = &[b'x', 0b10001010];
+ assert_bytes_eq!(2, trie.as_bytes(), expected_bytes);
+
+ let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
+ assert_bytes_eq!(2, trie_phf.as_bytes(), expected_bytes);
+ check_phf_ascii_trie(&litemap, &trie_phf);
+}
+
+#[test]
+fn test_single_string() {
+ let litemap: LiteMap<&[u8], usize> = [
+ (&b"xyz"[..], 10), //
+ ]
+ .into_iter()
+ .collect();
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
+ assert_eq!(trie.get(b""), None);
+ assert_eq!(trie.get(b"x"), None);
+ assert_eq!(trie.get(b"xy"), None);
+ assert_eq!(trie.get(b"xyzz"), None);
+ check_simple_ascii_trie(&litemap, &trie);
+ let expected_bytes = &[b'x', b'y', b'z', 0b10001010];
+ assert_bytes_eq!(4, trie.as_bytes(), expected_bytes);
+
+ let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
+ assert_bytes_eq!(4, trie_phf.as_bytes(), expected_bytes);
+ check_phf_ascii_trie(&litemap, &trie_phf);
+}
+
+#[test]
+fn test_prefix_strings() {
+ let litemap: LiteMap<&[u8], usize> = [(&b"x"[..], 0), (b"xy", 1)].into_iter().collect();
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
+ assert_eq!(trie.get(b""), None);
+ assert_eq!(trie.get(b"xyz"), None);
+ check_simple_ascii_trie(&litemap, &trie);
+ let expected_bytes = &[b'x', 0b10000000, b'y', 0b10000001];
+ assert_bytes_eq!(4, trie.as_bytes(), expected_bytes);
+
+ let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
+ assert_bytes_eq!(4, trie_phf.as_bytes(), expected_bytes);
+ check_phf_ascii_trie(&litemap, &trie_phf);
+}
+
+#[test]
+fn test_single_byte_branch() {
+ let litemap: LiteMap<&[u8], usize> = [(&b"x"[..], 0), (b"y", 1)].into_iter().collect();
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
+ assert_eq!(trie.get(b""), None);
+ assert_eq!(trie.get(b"xy"), None);
+ check_simple_ascii_trie(&litemap, &trie);
+ let expected_bytes = &[0b11000010, b'x', b'y', 1, 0b10000000, 0b10000001];
+ assert_bytes_eq!(6, trie.as_bytes(), expected_bytes);
+
+ let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
+ assert_bytes_eq!(6, trie_phf.as_bytes(), expected_bytes);
+ check_phf_ascii_trie(&litemap, &trie_phf);
+}
+
+#[test]
+fn test_multi_byte_branch() {
+ let litemap: LiteMap<&[u8], usize> = [(&b"axb"[..], 0), (b"ayc", 1)].into_iter().collect();
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
+ assert_eq!(trie.get(b""), None);
+ assert_eq!(trie.get(b"a"), None);
+ assert_eq!(trie.get(b"ax"), None);
+ assert_eq!(trie.get(b"ay"), None);
+ check_simple_ascii_trie(&litemap, &trie);
+ let expected_bytes = &[
+ b'a', 0b11000010, b'x', b'y', 2, b'b', 0b10000000, b'c', 0b10000001,
+ ];
+ assert_bytes_eq!(9, trie.as_bytes(), expected_bytes);
+
+ let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
+ assert_bytes_eq!(9, trie_phf.as_bytes(), expected_bytes);
+ check_phf_ascii_trie(&litemap, &trie_phf);
+}
+
+#[test]
+fn test_linear_varint_values() {
+ let litemap: LiteMap<&[u8], usize> = [(&b""[..], 100), (b"x", 500), (b"xyz", 5000)]
+ .into_iter()
+ .collect();
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
+ assert_eq!(trie.get(b"xy"), None);
+ assert_eq!(trie.get(b"xz"), None);
+ assert_eq!(trie.get(b"xyzz"), None);
+ check_simple_ascii_trie(&litemap, &trie);
+ let expected_bytes = &[0x90, 0x54, b'x', 0x93, 0x64, b'y', b'z', 0x90, 0x96, 0x78];
+ assert_bytes_eq!(10, trie.as_bytes(), expected_bytes);
+
+ let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
+ assert_bytes_eq!(10, trie_phf.as_bytes(), expected_bytes);
+ check_phf_ascii_trie(&litemap, &trie_phf);
+}
+
+#[test]
+fn test_bug() {
+ let litemap: LiteMap<&[u8], usize> = [(&b"abc"[..], 100), (b"abcd", 500), (b"abcde", 5000)]
+ .into_iter()
+ .collect();
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
+ assert_eq!(trie.get(b"ab"), None);
+ assert_eq!(trie.get(b"abd"), None);
+ assert_eq!(trie.get(b"abCD"), None);
+ check_simple_ascii_trie(&litemap, &trie);
+
+ let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
+ check_phf_ascii_trie(&litemap, &trie_phf);
+}
+
+#[test]
+fn test_varint_branch() {
+ let chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+ let litemap: LiteMap<&[u8], usize> = (0..chars.len())
+ .map(|i| (chars.get(i..i + 1).unwrap().as_bytes(), i))
+ .collect();
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
+ assert_eq!(trie.get(b""), None);
+ assert_eq!(trie.get(b"ax"), None);
+ assert_eq!(trie.get(b"ay"), None);
+ check_simple_ascii_trie(&litemap, &trie);
+ #[rustfmt::skip]
+ let expected_bytes = &[
+ 0b11100000, // branch varint lead
+ 0x14, // branch varint trail
+ // search array:
+ b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J',
+ b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
+ b'U', b'V', b'W', b'X', b'Y', b'Z',
+ b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
+ b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
+ b'u', b'v', b'w', b'x', b'y', b'z',
+ // offset array:
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20,
+ 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52,
+ 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84,
+ 86,
+ // single-byte values:
+ 0x80, (0x80 | 1), (0x80 | 2), (0x80 | 3), (0x80 | 4),
+ (0x80 | 5), (0x80 | 6), (0x80 | 7), (0x80 | 8), (0x80 | 9),
+ (0x80 | 10), (0x80 | 11), (0x80 | 12), (0x80 | 13), (0x80 | 14),
+ (0x80 | 15),
+ // multi-byte values:
+ 0x90, 0, 0x90, 1, 0x90, 2, 0x90, 3, 0x90, 4, 0x90, 5,
+ 0x90, 6, 0x90, 7, 0x90, 8, 0x90, 9, 0x90, 10, 0x90, 11,
+ 0x90, 12, 0x90, 13, 0x90, 14, 0x90, 15, 0x90, 16, 0x90, 17,
+ 0x90, 18, 0x90, 19, 0x90, 20, 0x90, 21, 0x90, 22, 0x90, 23,
+ 0x90, 24, 0x90, 25, 0x90, 26, 0x90, 27, 0x90, 28, 0x90, 29,
+ 0x90, 30, 0x90, 31, 0x90, 32, 0x90, 33, 0x90, 34, 0x90, 35,
+ ];
+ assert_bytes_eq!(193, trie.as_bytes(), expected_bytes);
+
+ #[rustfmt::skip]
+ let expected_bytes = &[
+ 0b11100000, // branch varint lead
+ 0x14, // branch varint trail
+ // PHF metadata:
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 10, 12, 16, 4, 4, 4, 4, 4, 4, 8,
+ 4, 4, 4, 16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 7,
+ // search array:
+ b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
+ b'p', b'u', b'v', b'w', b'D', b'E', b'F', b'q',
+ b'r', b'A', b'B', b'C', b'x', b'y', b'z', b's',
+ b'H', b'I', b'J', b'G', b'P', b'Q', b'R', b'S',
+ b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'K',
+ b'L', b'M', b'N', b'O', b'g', b'a', b'b', b'c',
+ b't', b'd', b'f', b'e',
+ // offset array:
+ 2, 4, 6, 8, 10, 12, 14,
+ 16, 18, 20, 22, 24, 25, 26, 27,
+ 29, 31, 32, 33, 34, 36, 38, 40,
+ 42, 43, 44, 45, 46, 47, 49, 51,
+ 53, 55, 57, 59, 61, 63, 65, 67,
+ 68, 69, 70, 71, 72, 74, 76, 78,
+ 80, 82, 84, 86,
+ // values:
+ 0x90, 17, 0x90, 18, 0x90, 19, 0x90, 20, 0x90, 21, 0x90, 22, 0x90, 23,
+ 0x90, 24, 0x90, 25, 0x90, 30, 0x90, 31, 0x90, 32, 0x80 | 3, 0x80 | 4,
+ 0x80 | 5, 0x90, 26, 0x90, 27, 0x80, 0x80 | 1, 0x80 | 2, 0x90, 33,
+ 0x90, 34, 0x90, 35, 0x90, 28, 0x80 | 7, 0x80 | 8, 0x80 | 9, 0x80 | 6,
+ 0x80 | 15, 0x90, 0, 0x90, 1, 0x90, 2, 0x90, 3, 0x90, 4, 0x90, 5,
+ 0x90, 6, 0x90, 7, 0x90, 8, 0x90, 9, 0x80 | 10, 0x80 | 11, 0x80 | 12,
+ 0x80 | 13, 0x80 | 14, 0x90, 16, 0x90, 10, 0x90, 11, 0x90, 12, 0x90, 29,
+ 0x90, 13, 0x90, 15, 0x90, 14,
+ ];
+ let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
+ assert_bytes_eq!(246, trie_phf.as_bytes(), expected_bytes);
+ check_phf_ascii_trie(&litemap, &trie_phf);
+}
+
+#[test]
+fn test_below_wide() {
+ let litemap: LiteMap<&[u8], usize> = [
+ (&b"abcdefghijklmnopqrstuvwxyz"[..], 1),
+ (b"bcdefghijklmnopqrstuvwxyza", 2),
+ (b"cdefghijklmnopqrstuvwxyzab", 3),
+ (b"defghijklmnopqrstuvwxyzabc", 4),
+ (b"efghijklmnopqrstuvwxyzabcd", 5),
+ (b"fghijklmnopqrstuvwxyzabcde", 6),
+ (b"ghijklmnopqrstuvwxyzabcdef", 7),
+ (b"hijklmnopqrstuvwxyzabcdefg", 8),
+ (b"ijklmnopqrstuvwxyzabcdefgh", 9),
+ (b"jklmnopqrstuvwxyzabcd", 10),
+ ]
+ .into_iter()
+ .collect();
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
+ assert_eq!(trie.get(b""), None);
+ assert_eq!(trie.get(b"abc"), None);
+ check_simple_ascii_trie(&litemap, &trie);
+ #[rustfmt::skip]
+ let expected_bytes = &[
+ 0b11001010, // branch
+ // search array:
+ b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
+ // offset array:
+ 26, 52, 78, 104, 130, 156, 182, 208, 234,
+ // offset data:
+ b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n',
+ b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',
+ 0x81,
+ b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
+ b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a',
+ 0x82,
+ b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
+ b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b',
+ 0x83,
+ b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q',
+ b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c',
+ 0x84,
+ b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r',
+ b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd',
+ 0x85,
+ b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's',
+ b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e',
+ 0x86,
+ b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
+ b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f',
+ 0x87,
+ b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u',
+ b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
+ 0x88,
+ b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
+ b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h',
+ 0x89,
+ b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
+ b'x', b'y', b'z', b'a', b'b', b'c', b'd',
+ 0x8A,
+ ];
+ assert_bytes_eq!(275, trie.as_bytes(), expected_bytes);
+}
+
+#[test]
+fn test_at_wide() {
+ let litemap: LiteMap<&[u8], usize> = [
+ (&b"abcdefghijklmnopqrstuvwxyz"[..], 1),
+ (b"bcdefghijklmnopqrstuvwxyza", 2),
+ (b"cdefghijklmnopqrstuvwxyzab", 3),
+ (b"defghijklmnopqrstuvwxyzabc", 4),
+ (b"efghijklmnopqrstuvwxyzabcd", 5),
+ (b"fghijklmnopqrstuvwxyzabcde", 6),
+ (b"ghijklmnopqrstuvwxyzabcdef", 7),
+ (b"hijklmnopqrstuvwxyzabcdefg", 8),
+ (b"ijklmnopqrstuvwxyzabcdefgh", 9),
+ (b"jklmnopqrstuvwxyzabcde", 10),
+ ]
+ .into_iter()
+ .collect();
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
+ assert_eq!(trie.get(b""), None);
+ assert_eq!(trie.get(b"abc"), None);
+ check_simple_ascii_trie(&litemap, &trie);
+ #[rustfmt::skip]
+ let expected_bytes = &[
+ 0b11100001, // branch lead
+ 0x6A, // branch trail
+ // search array:
+ b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
+ // offset array (wide):
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 26, 52, 78, 104, 130, 156, 182, 208, 234,
+ // offset data:
+ b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n',
+ b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',
+ 0x81,
+ b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
+ b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a',
+ 0x82,
+ b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
+ b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b',
+ 0x83,
+ b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q',
+ b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c',
+ 0x84,
+ b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r',
+ b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd',
+ 0x85,
+ b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's',
+ b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e',
+ 0x86,
+ b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
+ b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f',
+ 0x87,
+ b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u',
+ b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
+ 0x88,
+ b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
+ b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h',
+ 0x89,
+ b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
+ b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e',
+ 0x8A,
+ ];
+ assert_bytes_eq!(286, trie.as_bytes(), expected_bytes);
+}
+
+#[test]
+fn test_at_wide_plus() {
+ let litemap: LiteMap<&[u8], usize> = [
+ (&b"abcdefghijklmnopqrstuvwxyz"[..], 1),
+ (b"bcdefghijklmnopqrstuvwxyza", 2),
+ (b"cdefghijklmnopqrstuvwxyzab", 3),
+ (b"defghijklmnopqrstuvwxyzabc", 4),
+ (b"efghijklmnopqrstuvwxyzabcd", 5),
+ (b"fghijklmnopqrstuvwxyzabcde", 6),
+ (b"ghijklmnopqrstuvwxyzabcdef", 7),
+ (b"hijklmnopqrstuvwxyzabcdefg", 8),
+ (b"ijklmnopqrstuvwxyzabcdefgh", 9),
+ (b"jklmnopqrstuvwxyzabcdef", 10),
+ ]
+ .into_iter()
+ .collect();
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
+ assert_eq!(trie.get(b""), None);
+ assert_eq!(trie.get(b"abc"), None);
+ check_simple_ascii_trie(&litemap, &trie);
+ #[rustfmt::skip]
+ let expected_bytes = &[
+ 0b11100001, // branch lead
+ 0x6A, // branch trail
+ // search array:
+ b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
+ // offset array (wide):
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 26, 52, 78, 104, 130, 156, 182, 208, 234,
+ // offset data:
+ b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n',
+ b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',
+ 0x81,
+ b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
+ b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a',
+ 0x82,
+ b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p',
+ b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b',
+ 0x83,
+ b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q',
+ b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c',
+ 0x84,
+ b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r',
+ b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd',
+ 0x85,
+ b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's',
+ b't', b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e',
+ 0x86,
+ b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
+ b'u', b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f',
+ 0x87,
+ b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u',
+ b'v', b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
+ 0x88,
+ b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v',
+ b'w', b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h',
+ 0x89,
+ b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
+ b'x', b'y', b'z', b'a', b'b', b'c', b'd', b'e', b'f',
+ 0x8A,
+ ];
+ assert_bytes_eq!(287, trie.as_bytes(), expected_bytes);
+}
+
+#[test]
+fn test_everything() {
+ let litemap: LiteMap<&[u8], usize> = [
+ (&b""[..], 0),
+ (b"axb", 100),
+ (b"ayc", 2),
+ (b"azd", 3),
+ (b"bxe", 4),
+ (b"bxefg", 500),
+ (b"bxefh", 6),
+ (b"bxei", 7),
+ (b"bxeikl", 8),
+ ]
+ .into_iter()
+ .collect();
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap();
+ assert_eq!(trie.get(b""), Some(0));
+ assert_eq!(trie.get(b"a"), None);
+ assert_eq!(trie.get(b"ax"), None);
+ assert_eq!(trie.get(b"ay"), None);
+ check_simple_ascii_trie(&litemap, &trie);
+ let expected_bytes = &[
+ 0b10000000, // value 0
+ 0b11000010, // branch of 2
+ b'a', //
+ b'b', //
+ 13, //
+ 0b11000011, // branch of 3
+ b'x', //
+ b'y', //
+ b'z', //
+ 3, //
+ 5, //
+ b'b', //
+ 0b10010000, // value 100 (lead)
+ 0x54, // value 100 (trail)
+ b'c', //
+ 0b10000010, // value 2
+ b'd', //
+ 0b10000011, // value 3
+ b'x', //
+ b'e', //
+ 0b10000100, // value 4
+ 0b11000010, // branch of 2
+ b'f', //
+ b'i', //
+ 7, //
+ 0b11000010, // branch of 2
+ b'g', //
+ b'h', //
+ 2, //
+ 0b10010011, // value 500 (lead)
+ 0x64, // value 500 (trail)
+ 0b10000110, // value 6
+ 0b10000111, // value 7
+ b'k', //
+ b'l', //
+ 0b10001000, // value 8
+ ];
+ assert_bytes_eq!(36, trie.as_bytes(), expected_bytes);
+
+ #[rustfmt::skip]
+ let expected_bytes = &[
+ 0b10000000, // value 0
+ 0b11000010, // branch of 2
+ b'a', //
+ b'b', //
+ 13, //
+ 0b11000011, // start of 'a' subtree: branch of 3
+ b'x', //
+ b'y', //
+ b'z', //
+ 3, //
+ 5, //
+ b'b', //
+ 0b10010000, // value 100 (lead)
+ 0x54, // value 100 (trail)
+ b'c', //
+ 0b10000010, // value 2
+ b'd', //
+ 0b10000011, // value 3
+ b'x', // start of 'b' subtree
+ b'e', //
+ 0b10000100, // value 4
+ 0b11000010, // branch of 2
+ b'f', //
+ b'i', //
+ 7, //
+ 0b11000010, // branch of 2
+ b'g', //
+ b'h', //
+ 2, //
+ 0b10010011, // value 500 (lead)
+ 0x64, // value 500 (trail)
+ 0b10000110, // value 6
+ 0b10000111, // value 7
+ b'k', //
+ b'l', //
+ 0b10001000, // value 8
+ ];
+ let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
+ assert_bytes_eq!(36, trie_phf.as_bytes(), expected_bytes);
+ check_phf_ascii_trie(&litemap, &trie_phf);
+
+ let zhm: zerovec::ZeroMap<[u8], u32> = litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
+ let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
+ assert_eq!(zhm_buf.len(), 88);
+
+ let zhm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect();
+ let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
+ assert_eq!(zhm_buf.len(), 61);
+
+ let zhm: zerovec::ZeroHashMap<[u8], u32> =
+ litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
+ let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
+ assert_eq!(zhm_buf.len(), 161);
+
+ let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect();
+ let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
+ assert_eq!(zhm_buf.len(), 134);
+}
+
+macro_rules! utf8_byte {
+ ($ch:expr, $i:literal) => {{
+ let mut utf8_encoder_buf = [0u8; 4];
+ $ch.encode_utf8(&mut utf8_encoder_buf);
+ utf8_encoder_buf[$i]
+ }};
+}
+
+#[test]
+fn test_non_ascii() {
+ let litemap: LiteMap<&[u8], usize> = [
+ ("".as_bytes(), 0),
+ ("axb".as_bytes(), 100),
+ ("ayc".as_bytes(), 2),
+ ("azd".as_bytes(), 3),
+ ("bxe".as_bytes(), 4),
+ ("bxefg".as_bytes(), 500),
+ ("bxefh".as_bytes(), 6),
+ ("bxei".as_bytes(), 7),
+ ("bxeikl".as_bytes(), 8),
+ ("bxeiklmΚαλημέρααα".as_bytes(), 9),
+ ("bxeiklmαnλo".as_bytes(), 10),
+ ("bxeiklmη".as_bytes(), 11),
+ ]
+ .into_iter()
+ .collect();
+
+ #[rustfmt::skip]
+ let expected_bytes = &[
+ 0b10000000, // value 0
+ 0b11000010, // branch of 2
+ b'a', //
+ b'b', //
+ 13, //
+ 0b11000011, // start of 'a' subtree: branch of 3
+ b'x', //
+ b'y', //
+ b'z', //
+ 3, //
+ 5, //
+ b'b', //
+ 0b10010000, // value 100 (lead)
+ 0x54, // value 100 (trail)
+ b'c', //
+ 0b10000010, // value 2
+ b'd', //
+ 0b10000011, // value 3
+ b'x', // start of 'b' subtree
+ b'e', //
+ 0b10000100, // value 4
+ 0b11000010, // branch of 2
+ b'f', //
+ b'i', //
+ 7, //
+ 0b11000010, // branch of 2
+ b'g', //
+ b'h', //
+ 2, //
+ 0b10010011, // value 500 (lead)
+ 0x64, // value 500 (trail)
+ 0b10000110, // value 6
+ 0b10000111, // value 7
+ b'k', //
+ b'l', //
+ 0b10001000, // value 8
+ b'm', //
+ 0b10100001, // span of length 1
+ utf8_byte!('Κ', 0), // NOTE: all three letters have the same lead byte
+ 0b11000011, // branch of 3
+ utf8_byte!('Κ', 1),
+ utf8_byte!('α', 1),
+ utf8_byte!('η', 1),
+ 21,
+ 27,
+ 0b10110000, // span of length 18 (lead)
+ 0b00000010, // span of length 18 (trail)
+ utf8_byte!('α', 0),
+ utf8_byte!('α', 1),
+ utf8_byte!('λ', 0),
+ utf8_byte!('λ', 1),
+ utf8_byte!('η', 0),
+ utf8_byte!('η', 1),
+ utf8_byte!('μ', 0),
+ utf8_byte!('μ', 1),
+ utf8_byte!('έ', 0),
+ utf8_byte!('έ', 1),
+ utf8_byte!('ρ', 0),
+ utf8_byte!('ρ', 1),
+ utf8_byte!('α', 0),
+ utf8_byte!('α', 1),
+ utf8_byte!('α', 0),
+ utf8_byte!('α', 1),
+ utf8_byte!('α', 0),
+ utf8_byte!('α', 1),
+ 0b10001001, // value 9
+ b'n',
+ 0b10100010, // span of length 2
+ utf8_byte!('λ', 0),
+ utf8_byte!('λ', 1),
+ b'o',
+ 0b10001010, // value 10
+ 0b10001011, // value 11
+ ];
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap();
+ assert_bytes_eq!(73, trie_phf.as_bytes(), expected_bytes);
+ check_phf_bytes_trie(&litemap, &trie_phf);
+}
+
+#[test]
+fn test_max_branch() {
+ // Evaluate a branch with all 256 possible children
+ let mut litemap: LiteMap<&[u8], usize> = LiteMap::new_vec();
+ let all_bytes: Vec<u8> = (u8::MIN..=u8::MAX).collect();
+ assert_eq!(all_bytes.len(), 256);
+ let all_bytes_prefixed: Vec<[u8; 2]> = (u8::MIN..=u8::MAX).map(|x| [b'\0', x]).collect();
+ for b in all_bytes.iter() {
+ litemap.insert(core::slice::from_ref(b), *b as usize);
+ }
+ for s in all_bytes_prefixed.iter() {
+ litemap.insert(s, s[1] as usize);
+ }
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap();
+ assert_eq!(trie_phf.byte_len(), 3042);
+ check_phf_bytes_trie(&litemap, &trie_phf);
+}
+
+#[test]
+fn test_short_subtags_10pct() {
+ let litemap = strings_to_litemap(testdata::short_subtags_10pct::STRINGS);
+
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
+ assert_eq!(trie.byte_len(), 1050);
+ check_simple_ascii_trie(&litemap, &trie);
+
+ let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
+ assert_eq!(trie_phf.byte_len(), 1100);
+ check_phf_ascii_trie(&litemap, &trie_phf);
+
+ let zhm: zerovec::ZeroMap<[u8], u32> = litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
+ let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
+ assert_eq!(zhm_buf.len(), 1890);
+
+ let zhm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect();
+ let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
+ assert_eq!(zhm_buf.len(), 1326);
+
+ let zhm: zerovec::ZeroHashMap<[u8], u32> =
+ litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
+ let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
+ assert_eq!(zhm_buf.len(), 3396);
+
+ let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect();
+ let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
+ assert_eq!(zhm_buf.len(), 2832);
+}
+
+#[test]
+fn test_short_subtags() {
+ let litemap = strings_to_litemap(testdata::short_subtags::STRINGS);
+
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
+ assert_eq!(trie.byte_len(), 8793);
+ check_simple_ascii_trie(&litemap, &trie);
+
+ let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>();
+ let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap();
+ assert_eq!(trie_phf.byte_len(), 9400);
+ check_phf_ascii_trie(&litemap, &trie_phf);
+
+ let zm: zerovec::ZeroMap<[u8], u32> = litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
+ let zhm_buf = postcard::to_allocvec(&zm).unwrap();
+ assert_eq!(zhm_buf.len(), 18931);
+
+ let zm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect();
+ let zhm_buf = postcard::to_allocvec(&zm).unwrap();
+ assert_eq!(zhm_buf.len(), 13300);
+
+ let zhm: zerovec::ZeroHashMap<[u8], u32> =
+ litemap.iter().map(|(a, b)| (*a, *b as u32)).collect();
+ let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
+ assert_eq!(zhm_buf.len(), 33949);
+
+ let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect();
+ let zhm_buf = postcard::to_allocvec(&zhm).unwrap();
+ assert_eq!(zhm_buf.len(), 28318);
+}
diff --git a/vendor/zerotrie/tests/data/data.rs b/vendor/zerotrie/tests/data/data.rs
new file mode 100644
index 00000000..6dd483b7
--- /dev/null
+++ b/vendor/zerotrie/tests/data/data.rs
@@ -0,0 +1,2210 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use litemap::LiteMap;
+
+const fn single_byte_intermediate_value(x: u8) -> u8 {
+ debug_assert!(x <= 0b00001111);
+ x | 0b10000000
+}
+
+use single_byte_intermediate_value as single_byte_short_value;
+
+const fn single_byte_branch_equal(x: u8) -> u8 {
+ debug_assert!(x <= 0b00001111);
+ x | 0b11000000
+}
+
+use single_byte_branch_equal as single_byte_short_match;
+
+#[allow(dead_code)]
+pub fn strings_to_litemap<'a>(strings: &[&'a str]) -> LiteMap<&'a [u8], usize> {
+ strings
+ .iter()
+ .copied()
+ .map(|x| x.as_bytes())
+ .enumerate()
+ .map(|(i, s)| (s, i))
+ .collect()
+}
+
+#[allow(dead_code)]
+pub mod basic {
+ use super::*;
+ pub static TRIE_ASCII: &[u8] = &[
+ b'a',
+ b'b',
+ single_byte_short_value(1),
+ b'c',
+ single_byte_short_value(2),
+ // Begin Match Node
+ single_byte_short_match(3),
+ b'd',
+ b'e',
+ b'f',
+ 5,
+ 8,
+ // End Match Node
+ // subslice @ 0
+ single_byte_short_value(3),
+ b'g',
+ b'h',
+ b'i',
+ single_byte_short_value(4),
+ // subslice @ 5
+ b'j',
+ b'k',
+ single_byte_short_value(5),
+ // subslice @ 8
+ // Begin Match Node
+ single_byte_short_match(2),
+ b'l',
+ b'm',
+ 1,
+ // End Match Node
+ // subsubslice @ 0
+ single_byte_short_value(6),
+ // subsubslice @ 1
+ b'n',
+ single_byte_short_value(7),
+ ];
+ pub static DATA_ASCII: &[(&[u8], usize)] = &[
+ (b"ab", 1),
+ (b"abc", 2),
+ (b"abcd", 3),
+ (b"abcdghi", 4),
+ (b"abcejk", 5),
+ (b"abcfl", 6),
+ (b"abcfmn", 7),
+ ];
+
+ pub static TRIE_UNICODE: &[u8] = &[
+ 196, 100, 102, 103, 107, 12, 17, 23, 195, 97, 101, 105, 2, 4, 115, 129, 114, 130, 101, 131,
+ 162, 195, 188, 114, 132, 114, 111, 162, 195, 159, 133, 162, 195, 182, 110, 110, 101, 110,
+ 134,
+ ];
+ pub static DATA_UNICODE: &[(&[u8], usize)] = &[
+ ("das".as_bytes(), 1),
+ ("der".as_bytes(), 2),
+ ("die".as_bytes(), 3),
+ ("für".as_bytes(), 4),
+ ("groß".as_bytes(), 5),
+ ("können".as_bytes(), 6),
+ ];
+
+ pub static TRIE_BINARY: &[u8] = &[
+ 196, 0, 129, 144, 240, 3, 9, 12, 161, 144, 131, 194, 130, 131, 1, 129, 130, 161, 144, 132,
+ 161, 255, 133, 161, 255, 134,
+ ];
+ pub static DATA_BINARY: &[(&[u8], usize)] = &[
+ (b"\0\x90", 3),
+ (b"\x81\x82", 1),
+ (b"\x81\x83", 2),
+ (b"\x90\x90", 4),
+ (b"\xF0\xFF", 5),
+ (b"\xF0\xFF\xFF", 6),
+ ];
+
+ // Note: Cow and ZeroVec have the same serialized form
+ pub static JSON_STR_ASCII: &str = "{\"trie\":{\"ab\":1,\"abc\":2,\"abcd\":3,\"abcdghi\":4,\"abcejk\":5,\"abcfl\":6,\"abcfmn\":7}}";
+ pub static JSON_STR_UNICODE: &str =
+ "{\"trie\":{\"das\":1,\"der\":2,\"die\":3,\"für\":4,\"groß\":5,\"können\":6}}";
+ pub static JSON_STR_BINARY: &str = "{\"trie\":[[[0,144],3],[[129,130],1],[[129,131],2],[[144,144],4],[[240,255],5],[[240,255,255],6]]}";
+ pub static BINCODE_BYTES_ASCII: &[u8] = &[
+ 97, 98, 129, 99, 130, 195, 100, 101, 102, 5, 8, 131, 103, 104,
+ 105, 132, 106, 107, 133, 194, 108, 109, 1, 134, 110, 135,
+ ];
+ pub static BINCODE_BYTES_UNICODE: &[u8] = &[
+ 196, 100, 102, 103, 107, 12, 17, 23, 195, 97, 101, 105, 2, 4, 115,
+ 129, 114, 130, 101, 131, 162, 195, 188, 114, 132, 114, 111, 162, 195, 159, 133, 162, 195,
+ 182, 110, 110, 101, 110, 134,
+ ];
+ pub static BINCODE_BYTES_BINARY: &[u8] = &[
+ 196, 0, 129, 144, 240, 3, 9, 12, 161, 144, 131, 194, 130, 131, 1,
+ 129, 130, 161, 144, 132, 161, 255, 133, 161, 255, 134,
+ ];
+}
+
+#[allow(dead_code)]
+pub mod short_subtags {
+ pub static STRINGS: &[&str] = &[
+ "aa",
+ "aai",
+ "aak",
+ "aau",
+ "ab",
+ "abi",
+ "abq",
+ "abr",
+ "abt",
+ "aby",
+ "acd",
+ "ace",
+ "ach",
+ "ada",
+ "ade",
+ "adj",
+ "adp",
+ "ady",
+ "adz",
+ "ae",
+ "aeb",
+ "aey",
+ "af",
+ "agc",
+ "agd",
+ "agg",
+ "agm",
+ "ago",
+ "agq",
+ "aha",
+ "ahl",
+ "aho",
+ "ajg",
+ "ak",
+ "akk",
+ "ala",
+ "ali",
+ "aln",
+ "alt",
+ "am",
+ "amm",
+ "amn",
+ "amo",
+ "amp",
+ "an",
+ "anc",
+ "ank",
+ "ann",
+ "any",
+ "aoj",
+ "aom",
+ "aoz",
+ "apc",
+ "apd",
+ "ape",
+ "apr",
+ "aps",
+ "apz",
+ "ar",
+ "arc",
+ "arc-Nbat",
+ "arc-Palm",
+ "arh",
+ "arn",
+ "aro",
+ "arq",
+ "ars",
+ "ary",
+ "arz",
+ "as",
+ "asa",
+ "ase",
+ "asg",
+ "aso",
+ "ast",
+ "ata",
+ "atg",
+ "atj",
+ "auy",
+ "av",
+ "avl",
+ "avn",
+ "avt",
+ "avu",
+ "awa",
+ "awb",
+ "awo",
+ "awx",
+ "ay",
+ "ayb",
+ "az",
+ "az-Arab",
+ "az-IQ",
+ "az-IR",
+ "az-RU",
+ "ba",
+ "bal",
+ "ban",
+ "bap",
+ "bar",
+ "bas",
+ "bav",
+ "bax",
+ "bba",
+ "bbb",
+ "bbc",
+ "bbd",
+ "bbj",
+ "bbp",
+ "bbr",
+ "bcf",
+ "bch",
+ "bci",
+ "bcm",
+ "bcn",
+ "bco",
+ "bcq",
+ "bcu",
+ "bdd",
+ "be",
+ "bef",
+ "beh",
+ "bej",
+ "bem",
+ "bet",
+ "bew",
+ "bex",
+ "bez",
+ "bfd",
+ "bfq",
+ "bft",
+ "bfy",
+ "bg",
+ "bgc",
+ "bgn",
+ "bgx",
+ "bhb",
+ "bhg",
+ "bhi",
+ "bhl",
+ "bho",
+ "bhy",
+ "bi",
+ "bib",
+ "big",
+ "bik",
+ "bim",
+ "bin",
+ "bio",
+ "biq",
+ "bjh",
+ "bji",
+ "bjj",
+ "bjn",
+ "bjo",
+ "bjr",
+ "bjt",
+ "bjz",
+ "bkc",
+ "bkm",
+ "bkq",
+ "bku",
+ "bkv",
+ "bla",
+ "blg",
+ "blt",
+ "bm",
+ "bmh",
+ "bmk",
+ "bmq",
+ "bmu",
+ "bn",
+ "bng",
+ "bnm",
+ "bnp",
+ "bo",
+ "boj",
+ "bom",
+ "bon",
+ "bpy",
+ "bqc",
+ "bqi",
+ "bqp",
+ "bqv",
+ "br",
+ "bra",
+ "brh",
+ "brx",
+ "brz",
+ "bs",
+ "bsj",
+ "bsq",
+ "bss",
+ "bst",
+ "bto",
+ "btt",
+ "btv",
+ "bua",
+ "buc",
+ "bud",
+ "bug",
+ "buk",
+ "bum",
+ "buo",
+ "bus",
+ "buu",
+ "bvb",
+ "bwd",
+ "bwr",
+ "bxh",
+ "bye",
+ "byn",
+ "byr",
+ "bys",
+ "byv",
+ "byx",
+ "bza",
+ "bze",
+ "bzf",
+ "bzh",
+ "bzw",
+ "ca",
+ "cad",
+ "can",
+ "cbj",
+ "cch",
+ "ccp",
+ "ce",
+ "ceb",
+ "cfa",
+ "cgg",
+ "ch",
+ "chk",
+ "chm",
+ "cho",
+ "chp",
+ "chr",
+ "cic",
+ "cja",
+ "cjm",
+ "cjv",
+ "ckb",
+ "ckl",
+ "cko",
+ "cky",
+ "cla",
+ "clc",
+ "cme",
+ "cmg",
+ "co",
+ "cop",
+ "cps",
+ "cr",
+ "crg",
+ "crh",
+ "crk",
+ "crl",
+ "crs",
+ "cs",
+ "csb",
+ "csw",
+ "ctd",
+ "cu",
+ "cu-Glag",
+ "cv",
+ "cy",
+ "da",
+ "dad",
+ "daf",
+ "dag",
+ "dah",
+ "dak",
+ "dar",
+ "dav",
+ "dbd",
+ "dbq",
+ "dcc",
+ "ddn",
+ "de",
+ "ded",
+ "den",
+ "dga",
+ "dgh",
+ "dgi",
+ "dgl",
+ "dgr",
+ "dgz",
+ "dia",
+ "dje",
+ "dmf",
+ "dnj",
+ "dob",
+ "doi",
+ "dop",
+ "dow",
+ "drh",
+ "dri",
+ "drs",
+ "dsb",
+ "dtm",
+ "dtp",
+ "dts",
+ "dty",
+ "dua",
+ "duc",
+ "dud",
+ "dug",
+ "dv",
+ "dva",
+ "dww",
+ "dyo",
+ "dyu",
+ "dz",
+ "dzg",
+ "ebu",
+ "ee",
+ "efi",
+ "egl",
+ "egy",
+ "eka",
+ "eky",
+ "el",
+ "ema",
+ "emi",
+ "en",
+ "en-Shaw",
+ "enn",
+ "enq",
+ "eo",
+ "eri",
+ "es",
+ "esg",
+ "esu",
+ "et",
+ "etr",
+ "ett",
+ "etu",
+ "etx",
+ "eu",
+ "ewo",
+ "ext",
+ "eza",
+ "fa",
+ "faa",
+ "fab",
+ "fag",
+ "fai",
+ "fan",
+ "ff",
+ "ff-Adlm",
+ "ffi",
+ "ffm",
+ "fi",
+ "fia",
+ "fil",
+ "fit",
+ "fj",
+ "flr",
+ "fmp",
+ "fo",
+ "fod",
+ "fon",
+ "for",
+ "fpe",
+ "fqs",
+ "fr",
+ "frc",
+ "frp",
+ "frr",
+ "frs",
+ "fub",
+ "fud",
+ "fue",
+ "fuf",
+ "fuh",
+ "fuq",
+ "fur",
+ "fuv",
+ "fuy",
+ "fvr",
+ "fy",
+ "ga",
+ "gaa",
+ "gaf",
+ "gag",
+ "gah",
+ "gaj",
+ "gam",
+ "gan",
+ "gaw",
+ "gay",
+ "gba",
+ "gbf",
+ "gbm",
+ "gby",
+ "gbz",
+ "gcr",
+ "gd",
+ "gde",
+ "gdn",
+ "gdr",
+ "geb",
+ "gej",
+ "gel",
+ "gez",
+ "gfk",
+ "ggn",
+ "ghs",
+ "gil",
+ "gim",
+ "gjk",
+ "gjn",
+ "gju",
+ "gkn",
+ "gkp",
+ "gl",
+ "glk",
+ "gmm",
+ "gmv",
+ "gn",
+ "gnd",
+ "gng",
+ "god",
+ "gof",
+ "goi",
+ "gom",
+ "gon",
+ "gor",
+ "gos",
+ "got",
+ "grb",
+ "grc",
+ "grc-Linb",
+ "grt",
+ "grw",
+ "gsw",
+ "gu",
+ "gub",
+ "guc",
+ "gud",
+ "gur",
+ "guw",
+ "gux",
+ "guz",
+ "gv",
+ "gvf",
+ "gvr",
+ "gvs",
+ "gwc",
+ "gwi",
+ "gwt",
+ "gyi",
+ "ha",
+ "ha-CM",
+ "ha-SD",
+ "hag",
+ "hak",
+ "ham",
+ "haw",
+ "haz",
+ "hbb",
+ "hdy",
+ "he",
+ "hhy",
+ "hi",
+ "hi-Latn",
+ "hia",
+ "hif",
+ "hig",
+ "hih",
+ "hil",
+ "hla",
+ "hlu",
+ "hmd",
+ "hmt",
+ "hnd",
+ "hne",
+ "hnj",
+ "hnn",
+ "hno",
+ "ho",
+ "hoc",
+ "hoj",
+ "hot",
+ "hr",
+ "hsb",
+ "hsn",
+ "ht",
+ "hu",
+ "hui",
+ "hur",
+ "hy",
+ "hz",
+ "ia",
+ "ian",
+ "iar",
+ "iba",
+ "ibb",
+ "iby",
+ "ica",
+ "ich",
+ "id",
+ "idd",
+ "idi",
+ "idu",
+ "ife",
+ "ig",
+ "igb",
+ "ige",
+ "ii",
+ "ijj",
+ "ik",
+ "ikk",
+ "ikw",
+ "ikx",
+ "ilo",
+ "imo",
+ "in",
+ "inh",
+ "io",
+ "iou",
+ "iri",
+ "is",
+ "it",
+ "iu",
+ "iw",
+ "iwm",
+ "iws",
+ "izh",
+ "izi",
+ "ja",
+ "jab",
+ "jam",
+ "jar",
+ "jbo",
+ "jbu",
+ "jen",
+ "jgk",
+ "jgo",
+ "ji",
+ "jib",
+ "jmc",
+ "jml",
+ "jra",
+ "jut",
+ "jv",
+ "jw",
+ "ka",
+ "kaa",
+ "kab",
+ "kac",
+ "kad",
+ "kai",
+ "kaj",
+ "kam",
+ "kao",
+ "kaw",
+ "kbd",
+ "kbm",
+ "kbp",
+ "kbq",
+ "kbx",
+ "kby",
+ "kcg",
+ "kck",
+ "kcl",
+ "kct",
+ "kde",
+ "kdh",
+ "kdl",
+ "kdt",
+ "kea",
+ "ken",
+ "kez",
+ "kfo",
+ "kfr",
+ "kfy",
+ "kg",
+ "kge",
+ "kgf",
+ "kgp",
+ "kha",
+ "khb",
+ "khn",
+ "khq",
+ "khs",
+ "kht",
+ "khw",
+ "khz",
+ "ki",
+ "kij",
+ "kiu",
+ "kiw",
+ "kj",
+ "kjd",
+ "kjg",
+ "kjs",
+ "kjy",
+ "kk",
+ "kk-AF",
+ "kk-Arab",
+ "kk-CN",
+ "kk-IR",
+ "kk-MN",
+ "kkc",
+ "kkj",
+ "kl",
+ "kln",
+ "klq",
+ "klt",
+ "klx",
+ "km",
+ "kmb",
+ "kmh",
+ "kmo",
+ "kms",
+ "kmu",
+ "kmw",
+ "kn",
+ "knf",
+ "knp",
+ "ko",
+ "koi",
+ "kok",
+ "kol",
+ "kos",
+ "koz",
+ "kpe",
+ "kpf",
+ "kpo",
+ "kpr",
+ "kpx",
+ "kqb",
+ "kqf",
+ "kqs",
+ "kqy",
+ "kr",
+ "krc",
+ "kri",
+ "krj",
+ "krl",
+ "krs",
+ "kru",
+ "ks",
+ "ksb",
+ "ksd",
+ "ksf",
+ "ksh",
+ "ksj",
+ "ksr",
+ "ktb",
+ "ktm",
+ "kto",
+ "ktr",
+ "ku",
+ "ku-Arab",
+ "ku-LB",
+ "ku-Yezi",
+ "kub",
+ "kud",
+ "kue",
+ "kuj",
+ "kum",
+ "kun",
+ "kup",
+ "kus",
+ "kv",
+ "kvg",
+ "kvr",
+ "kvx",
+ "kw",
+ "kwj",
+ "kwk",
+ "kwo",
+ "kwq",
+ "kxa",
+ "kxc",
+ "kxe",
+ "kxl",
+ "kxm",
+ "kxp",
+ "kxw",
+ "kxz",
+ "ky",
+ "ky-Arab",
+ "ky-CN",
+ "ky-Latn",
+ "ky-TR",
+ "kye",
+ "kyx",
+ "kzh",
+ "kzj",
+ "kzr",
+ "kzt",
+ "la",
+ "lab",
+ "lad",
+ "lag",
+ "lah",
+ "laj",
+ "las",
+ "lb",
+ "lbe",
+ "lbu",
+ "lbw",
+ "lcm",
+ "lcp",
+ "ldb",
+ "led",
+ "lee",
+ "lem",
+ "lep",
+ "leq",
+ "leu",
+ "lez",
+ "lg",
+ "lgg",
+ "li",
+ "lia",
+ "lid",
+ "lif",
+ "lif-Limb",
+ "lig",
+ "lih",
+ "lij",
+ "lil",
+ "lis",
+ "ljp",
+ "lki",
+ "lkt",
+ "lle",
+ "lln",
+ "lmn",
+ "lmo",
+ "lmp",
+ "ln",
+ "lns",
+ "lnu",
+ "lo",
+ "loj",
+ "lok",
+ "lol",
+ "lor",
+ "los",
+ "loz",
+ "lrc",
+ "lt",
+ "ltg",
+ "lu",
+ "lua",
+ "luo",
+ "luy",
+ "luz",
+ "lv",
+ "lwl",
+ "lzh",
+ "lzz",
+ "mad",
+ "maf",
+ "mag",
+ "mai",
+ "mak",
+ "man",
+ "man-GN",
+ "man-Nkoo",
+ "mas",
+ "maw",
+ "maz",
+ "mbh",
+ "mbo",
+ "mbq",
+ "mbu",
+ "mbw",
+ "mci",
+ "mcp",
+ "mcq",
+ "mcr",
+ "mcu",
+ "mda",
+ "mde",
+ "mdf",
+ "mdh",
+ "mdj",
+ "mdr",
+ "mdx",
+ "med",
+ "mee",
+ "mek",
+ "men",
+ "mer",
+ "met",
+ "meu",
+ "mfa",
+ "mfe",
+ "mfn",
+ "mfo",
+ "mfq",
+ "mg",
+ "mgh",
+ "mgl",
+ "mgo",
+ "mgp",
+ "mgy",
+ "mh",
+ "mhi",
+ "mhl",
+ "mi",
+ "mic",
+ "mif",
+ "min",
+ "miw",
+ "mk",
+ "mki",
+ "mkl",
+ "mkp",
+ "mkw",
+ "ml",
+ "mle",
+ "mlp",
+ "mls",
+ "mmo",
+ "mmu",
+ "mmx",
+ "mn",
+ "mn-CN",
+ "mn-Mong",
+ "mna",
+ "mnf",
+ "mni",
+ "mnw",
+ "mo",
+ "moa",
+ "moe",
+ "moh",
+ "mos",
+ "mox",
+ "mpp",
+ "mps",
+ "mpt",
+ "mpx",
+ "mql",
+ "mr",
+ "mrd",
+ "mrj",
+ "mro",
+ "ms",
+ "ms-CC",
+ "mt",
+ "mtc",
+ "mtf",
+ "mti",
+ "mtr",
+ "mua",
+ "mur",
+ "mus",
+ "mva",
+ "mvn",
+ "mvy",
+ "mwk",
+ "mwr",
+ "mwv",
+ "mww",
+ "mxc",
+ "mxm",
+ "my",
+ "myk",
+ "mym",
+ "myv",
+ "myw",
+ "myx",
+ "myz",
+ "mzk",
+ "mzm",
+ "mzn",
+ "mzp",
+ "mzw",
+ "mzz",
+ "na",
+ "nac",
+ "naf",
+ "nak",
+ "nan",
+ "nap",
+ "naq",
+ "nas",
+ "nb",
+ "nca",
+ "nce",
+ "ncf",
+ "nch",
+ "nco",
+ "ncu",
+ "nd",
+ "ndc",
+ "nds",
+ "ne",
+ "neb",
+ "new",
+ "nex",
+ "nfr",
+ "ng",
+ "nga",
+ "ngb",
+ "ngl",
+ "nhb",
+ "nhe",
+ "nhw",
+ "nif",
+ "nii",
+ "nij",
+ "nin",
+ "niu",
+ "niy",
+ "niz",
+ "njo",
+ "nkg",
+ "nko",
+ "nl",
+ "nmg",
+ "nmz",
+ "nn",
+ "nnf",
+ "nnh",
+ "nnk",
+ "nnm",
+ "nnp",
+ "no",
+ "nod",
+ "noe",
+ "non",
+ "nop",
+ "nou",
+ "nqo",
+ "nr",
+ "nrb",
+ "nsk",
+ "nsn",
+ "nso",
+ "nss",
+ "nst",
+ "ntm",
+ "ntr",
+ "nui",
+ "nup",
+ "nus",
+ "nuv",
+ "nux",
+ "nv",
+ "nwb",
+ "nxq",
+ "nxr",
+ "ny",
+ "nym",
+ "nyn",
+ "nzi",
+ "oc",
+ "ogc",
+ "oj",
+ "ojs",
+ "oka",
+ "okr",
+ "okv",
+ "om",
+ "ong",
+ "onn",
+ "ons",
+ "opm",
+ "or",
+ "oro",
+ "oru",
+ "os",
+ "osa",
+ "ota",
+ "otk",
+ "oui",
+ "ozm",
+ "pa",
+ "pa-Arab",
+ "pa-PK",
+ "pag",
+ "pal",
+ "pal-Phlp",
+ "pam",
+ "pap",
+ "pau",
+ "pbi",
+ "pcd",
+ "pcm",
+ "pdc",
+ "pdt",
+ "ped",
+ "peo",
+ "pex",
+ "pfl",
+ "phl",
+ "phn",
+ "pil",
+ "pip",
+ "pka",
+ "pko",
+ "pl",
+ "pla",
+ "pms",
+ "png",
+ "pnn",
+ "pnt",
+ "pon",
+ "ppa",
+ "ppo",
+ "pqm",
+ "pra",
+ "prd",
+ "prg",
+ "ps",
+ "pss",
+ "pt",
+ "ptp",
+ "puu",
+ "pwa",
+ "qu",
+ "quc",
+ "qug",
+ "rai",
+ "raj",
+ "rao",
+ "rcf",
+ "rej",
+ "rel",
+ "res",
+ "rgn",
+ "rhg",
+ "ria",
+ "rif",
+ "rif-NL",
+ "rjs",
+ "rkt",
+ "rm",
+ "rmf",
+ "rmo",
+ "rmt",
+ "rmu",
+ "rn",
+ "rna",
+ "rng",
+ "ro",
+ "rob",
+ "rof",
+ "roo",
+ "rro",
+ "rtm",
+ "ru",
+ "rue",
+ "rug",
+ "rw",
+ "rwk",
+ "rwo",
+ "ryu",
+ "sa",
+ "saf",
+ "sah",
+ "saq",
+ "sas",
+ "sat",
+ "sav",
+ "saz",
+ "sba",
+ "sbe",
+ "sbp",
+ "sc",
+ "sck",
+ "scl",
+ "scn",
+ "sco",
+ "sd",
+ "sd-Deva",
+ "sd-IN",
+ "sd-Khoj",
+ "sd-Sind",
+ "sdc",
+ "sdh",
+ "se",
+ "sef",
+ "seh",
+ "sei",
+ "ses",
+ "sg",
+ "sga",
+ "sgs",
+ "sgw",
+ "sgz",
+ "shi",
+ "shk",
+ "shn",
+ "shu",
+ "si",
+ "sid",
+ "sig",
+ "sil",
+ "sim",
+ "sjr",
+ "sk",
+ "skc",
+ "skr",
+ "sks",
+ "sl",
+ "sld",
+ "sli",
+ "sll",
+ "sly",
+ "sm",
+ "sma",
+ "smj",
+ "smn",
+ "smp",
+ "smq",
+ "sms",
+ "sn",
+ "snc",
+ "snk",
+ "snp",
+ "snx",
+ "sny",
+ "so",
+ "sog",
+ "sok",
+ "soq",
+ "sou",
+ "soy",
+ "spd",
+ "spl",
+ "sps",
+ "sq",
+ "sr",
+ "sr-ME",
+ "sr-RO",
+ "sr-RU",
+ "sr-TR",
+ "srb",
+ "srn",
+ "srr",
+ "srx",
+ "ss",
+ "ssd",
+ "ssg",
+ "ssy",
+ "st",
+ "stk",
+ "stq",
+ "su",
+ "sua",
+ "sue",
+ "suk",
+ "sur",
+ "sus",
+ "sv",
+ "sw",
+ "swb",
+ "swc",
+ "swg",
+ "swp",
+ "swv",
+ "sxn",
+ "sxw",
+ "syl",
+ "syr",
+ "szl",
+ "ta",
+ "taj",
+ "tal",
+ "tan",
+ "taq",
+ "tbc",
+ "tbd",
+ "tbf",
+ "tbg",
+ "tbo",
+ "tbw",
+ "tbz",
+ "tci",
+ "tcy",
+ "tdd",
+ "tdg",
+ "tdh",
+ "tdu",
+ "te",
+ "ted",
+ "tem",
+ "teo",
+ "tet",
+ "tfi",
+ "tg",
+ "tg-Arab",
+ "tg-PK",
+ "tgc",
+ "tgo",
+ "tgu",
+ "th",
+ "thl",
+ "thq",
+ "thr",
+ "ti",
+ "tif",
+ "tig",
+ "tik",
+ "tim",
+ "tio",
+ "tiv",
+ "tk",
+ "tkl",
+ "tkr",
+ "tkt",
+ "tl",
+ "tlf",
+ "tlx",
+ "tly",
+ "tmh",
+ "tmy",
+ "tn",
+ "tnh",
+ "to",
+ "tof",
+ "tog",
+ "toq",
+ "tpi",
+ "tpm",
+ "tpz",
+ "tqo",
+ "tr",
+ "tru",
+ "trv",
+ "trw",
+ "ts",
+ "tsd",
+ "tsf",
+ "tsg",
+ "tsj",
+ "tsw",
+ "tt",
+ "ttd",
+ "tte",
+ "ttj",
+ "ttr",
+ "tts",
+ "ttt",
+ "tuh",
+ "tul",
+ "tum",
+ "tuq",
+ "tvd",
+ "tvl",
+ "tvu",
+ "twh",
+ "twq",
+ "txg",
+ "txo",
+ "ty",
+ "tya",
+ "tyv",
+ "tzm",
+ "ubu",
+ "udi",
+ "udm",
+ "ug",
+ "ug-Cyrl",
+ "ug-KZ",
+ "ug-MN",
+ "uga",
+ "uk",
+ "uli",
+ "umb",
+ "und",
+ "und-002",
+ "und-003",
+ "und-005",
+ "und-009",
+ "und-011",
+ "und-013",
+ "und-014",
+ "und-015",
+ "und-017",
+ "und-018",
+ "und-019",
+ "und-021",
+ "und-029",
+ "und-030",
+ "und-034",
+ "und-035",
+ "und-039",
+ "und-053",
+ "und-054",
+ "und-057",
+ "und-061",
+ "und-142",
+ "und-143",
+ "und-145",
+ "und-150",
+ "und-151",
+ "und-154",
+ "und-155",
+ "und-202",
+ "und-419",
+ "und-AD",
+ "und-Adlm",
+ "und-AE",
+ "und-AF",
+ "und-Aghb",
+ "und-Ahom",
+ "und-AL",
+ "und-AM",
+ "und-AO",
+ "und-AQ",
+ "und-AR",
+ "und-Arab",
+ "und-Arab-CC",
+ "und-Arab-CN",
+ "und-Arab-GB",
+ "und-Arab-ID",
+ "und-Arab-IN",
+ "und-Arab-KH",
+ "und-Arab-MM",
+ "und-Arab-MN",
+ "und-Arab-MU",
+ "und-Arab-NG",
+ "und-Arab-PK",
+ "und-Arab-TG",
+ "und-Arab-TH",
+ "und-Arab-TJ",
+ "und-Arab-TR",
+ "und-Arab-YT",
+ "und-Armi",
+ "und-Armn",
+ "und-AS",
+ "und-AT",
+ "und-Avst",
+ "und-AW",
+ "und-AX",
+ "und-AZ",
+ "und-BA",
+ "und-Bali",
+ "und-Bamu",
+ "und-Bass",
+ "und-Batk",
+ "und-BD",
+ "und-BE",
+ "und-Beng",
+ "und-BF",
+ "und-BG",
+ "und-BH",
+ "und-Bhks",
+ "und-BI",
+ "und-BJ",
+ "und-BL",
+ "und-BN",
+ "und-BO",
+ "und-Bopo",
+ "und-BQ",
+ "und-BR",
+ "und-Brah",
+ "und-Brai",
+ "und-BT",
+ "und-Bugi",
+ "und-Buhd",
+ "und-BV",
+ "und-BY",
+ "und-Cakm",
+ "und-Cans",
+ "und-Cari",
+ "und-CD",
+ "und-CF",
+ "und-CG",
+ "und-CH",
+ "und-Cham",
+ "und-Cher",
+ "und-Chrs",
+ "und-CI",
+ "und-CL",
+ "und-CM",
+ "und-CN",
+ "und-CO",
+ "und-Copt",
+ "und-CP",
+ "und-Cpmn",
+ "und-Cpmn-CY",
+ "und-Cprt",
+ "und-CR",
+ "und-CU",
+ "und-CV",
+ "und-CW",
+ "und-CY",
+ "und-Cyrl",
+ "und-Cyrl-AL",
+ "und-Cyrl-BA",
+ "und-Cyrl-GE",
+ "und-Cyrl-GR",
+ "und-Cyrl-MD",
+ "und-Cyrl-RO",
+ "und-Cyrl-SK",
+ "und-Cyrl-TR",
+ "und-Cyrl-XK",
+ "und-CZ",
+ "und-DE",
+ "und-Deva",
+ "und-Deva-BT",
+ "und-Deva-FJ",
+ "und-Deva-MU",
+ "und-Deva-PK",
+ "und-Diak",
+ "und-DJ",
+ "und-DK",
+ "und-DO",
+ "und-Dogr",
+ "und-Dupl",
+ "und-DZ",
+ "und-EA",
+ "und-EC",
+ "und-EE",
+ "und-EG",
+ "und-Egyp",
+ "und-EH",
+ "und-Elba",
+ "und-Elym",
+ "und-ER",
+ "und-ES",
+ "und-ET",
+ "und-Ethi",
+ "und-EU",
+ "und-EZ",
+ "und-FI",
+ "und-FO",
+ "und-FR",
+ "und-GA",
+ "und-GE",
+ "und-Geor",
+ "und-GF",
+ "und-GH",
+ "und-GL",
+ "und-Glag",
+ "und-GN",
+ "und-Gong",
+ "und-Gonm",
+ "und-Goth",
+ "und-GP",
+ "und-GQ",
+ "und-GR",
+ "und-Gran",
+ "und-Grek",
+ "und-Grek-TR",
+ "und-GS",
+ "und-GT",
+ "und-Gujr",
+ "und-Guru",
+ "und-GW",
+ "und-Hanb",
+ "und-Hang",
+ "und-Hani",
+ "und-Hano",
+ "und-Hans",
+ "und-Hant",
+ "und-Hant-CA",
+ "und-Hebr",
+ "und-Hebr-SE",
+ "und-Hebr-UA",
+ "und-Hebr-US",
+ "und-Hira",
+ "und-HK",
+ "und-Hluw",
+ "und-HM",
+ "und-Hmng",
+ "und-Hmnp",
+ "und-HN",
+ "und-HR",
+ "und-HT",
+ "und-HU",
+ "und-Hung",
+ "und-IC",
+ "und-ID",
+ "und-IL",
+ "und-IN",
+ "und-IQ",
+ "und-IR",
+ "und-IS",
+ "und-IT",
+ "und-Ital",
+ "und-Jamo",
+ "und-Java",
+ "und-JO",
+ "und-JP",
+ "und-Jpan",
+ "und-Kali",
+ "und-Kana",
+ "und-Kawi",
+ "und-KE",
+ "und-KG",
+ "und-KH",
+ "und-Khar",
+ "und-Khmr",
+ "und-Khoj",
+ "und-Kits",
+ "und-KM",
+ "und-Knda",
+ "und-Kore",
+ "und-KP",
+ "und-KR",
+ "und-Kthi",
+ "und-KW",
+ "und-KZ",
+ "und-LA",
+ "und-Lana",
+ "und-Laoo",
+ "und-Latn-AF",
+ "und-Latn-AM",
+ "und-Latn-CN",
+ "und-Latn-CY",
+ "und-Latn-DZ",
+ "und-Latn-ET",
+ "und-Latn-GE",
+ "und-Latn-IR",
+ "und-Latn-KM",
+ "und-Latn-MA",
+ "und-Latn-MK",
+ "und-Latn-MM",
+ "und-Latn-MO",
+ "und-Latn-MR",
+ "und-Latn-RU",
+ "und-Latn-SY",
+ "und-Latn-TN",
+ "und-Latn-TW",
+ "und-Latn-UA",
+ "und-LB",
+ "und-Lepc",
+ "und-LI",
+ "und-Limb",
+ "und-Lina",
+ "und-Linb",
+ "und-Lisu",
+ "und-LK",
+ "und-LS",
+ "und-LT",
+ "und-LU",
+ "und-LV",
+ "und-LY",
+ "und-Lyci",
+ "und-Lydi",
+ "und-MA",
+ "und-Mahj",
+ "und-Maka",
+ "und-Mand",
+ "und-Mani",
+ "und-Marc",
+ "und-MC",
+ "und-MD",
+ "und-ME",
+ "und-Medf",
+ "und-Mend",
+ "und-Merc",
+ "und-Mero",
+ "und-MF",
+ "und-MG",
+ "und-MK",
+ "und-ML",
+ "und-Mlym",
+ "und-MM",
+ "und-MN",
+ "und-MO",
+ "und-Modi",
+ "und-Mong",
+ "und-MQ",
+ "und-MR",
+ "und-Mroo",
+ "und-MT",
+ "und-Mtei",
+ "und-MU",
+ "und-Mult",
+ "und-MV",
+ "und-MX",
+ "und-MY",
+ "und-Mymr",
+ "und-Mymr-IN",
+ "und-Mymr-TH",
+ "und-MZ",
+ "und-NA",
+ "und-Nagm",
+ "und-Nand",
+ "und-Narb",
+ "und-Nbat",
+ "und-NC",
+ "und-NE",
+ "und-Newa",
+ "und-NI",
+ "und-Nkoo",
+ "und-NL",
+ "und-NO",
+ "und-NP",
+ "und-Nshu",
+ "und-Ogam",
+ "und-Olck",
+ "und-OM",
+ "und-Orkh",
+ "und-Orya",
+ "und-Osge",
+ "und-Osma",
+ "und-Ougr",
+ "und-PA",
+ "und-Palm",
+ "und-Pauc",
+ "und-PE",
+ "und-Perm",
+ "und-PF",
+ "und-PG",
+ "und-PH",
+ "und-Phag",
+ "und-Phli",
+ "und-Phlp",
+ "und-Phnx",
+ "und-PK",
+ "und-PL",
+ "und-Plrd",
+ "und-PM",
+ "und-PR",
+ "und-Prti",
+ "und-PS",
+ "und-PT",
+ "und-PW",
+ "und-PY",
+ "und-QA",
+ "und-QO",
+ "und-RE",
+ "und-Rjng",
+ "und-RO",
+ "und-Rohg",
+ "und-RS",
+ "und-RU",
+ "und-Runr",
+ "und-RW",
+ "und-SA",
+ "und-Samr",
+ "und-Sarb",
+ "und-Saur",
+ "und-SC",
+ "und-SD",
+ "und-SE",
+ "und-Sgnw",
+ "und-Shaw",
+ "und-Shrd",
+ "und-SI",
+ "und-Sidd",
+ "und-Sind",
+ "und-Sinh",
+ "und-SJ",
+ "und-SK",
+ "und-SM",
+ "und-SN",
+ "und-SO",
+ "und-Sogd",
+ "und-Sogo",
+ "und-Sora",
+ "und-Soyo",
+ "und-SR",
+ "und-ST",
+ "und-Sund",
+ "und-SV",
+ "und-SY",
+ "und-Sylo",
+ "und-Syrc",
+ "und-Tagb",
+ "und-Takr",
+ "und-Tale",
+ "und-Talu",
+ "und-Taml",
+ "und-Tang",
+ "und-Tavt",
+ "und-TD",
+ "und-Telu",
+ "und-TF",
+ "und-Tfng",
+ "und-TG",
+ "und-Tglg",
+ "und-TH",
+ "und-Thaa",
+ "und-Thai",
+ "und-Thai-CN",
+ "und-Thai-KH",
+ "und-Thai-LA",
+ "und-Tibt",
+ "und-Tirh",
+ "und-TJ",
+ "und-TK",
+ "und-TL",
+ "und-TM",
+ "und-TN",
+ "und-Tnsa",
+ "und-TO",
+ "und-Toto",
+ "und-TR",
+ "und-TV",
+ "und-TW",
+ "und-TZ",
+ "und-UA",
+ "und-UG",
+ "und-Ugar",
+ "und-UY",
+ "und-UZ",
+ "und-VA",
+ "und-Vaii",
+ "und-VE",
+ "und-Vith",
+ "und-VN",
+ "und-VU",
+ "und-Wara",
+ "und-Wcho",
+ "und-WF",
+ "und-WS",
+ "und-XK",
+ "und-Xpeo",
+ "und-Xsux",
+ "und-YE",
+ "und-Yezi",
+ "und-Yiii",
+ "und-YT",
+ "und-Zanb",
+ "und-ZW",
+ "unr",
+ "unr-Deva",
+ "unr-NP",
+ "unx",
+ "uok",
+ "ur",
+ "uri",
+ "urt",
+ "urw",
+ "usa",
+ "uth",
+ "utr",
+ "uvh",
+ "uvl",
+ "uz",
+ "uz-AF",
+ "uz-Arab",
+ "uz-CN",
+ "vag",
+ "vai",
+ "van",
+ "ve",
+ "vec",
+ "vep",
+ "vi",
+ "vic",
+ "viv",
+ "vls",
+ "vmf",
+ "vmw",
+ "vo",
+ "vot",
+ "vro",
+ "vun",
+ "vut",
+ "wa",
+ "wae",
+ "waj",
+ "wal",
+ "wan",
+ "war",
+ "wbp",
+ "wbq",
+ "wbr",
+ "wci",
+ "wer",
+ "wgi",
+ "whg",
+ "wib",
+ "wiu",
+ "wiv",
+ "wja",
+ "wji",
+ "wls",
+ "wmo",
+ "wnc",
+ "wni",
+ "wnu",
+ "wo",
+ "wob",
+ "wos",
+ "wrs",
+ "wsg",
+ "wsk",
+ "wtm",
+ "wuu",
+ "wuv",
+ "wwa",
+ "xav",
+ "xbi",
+ "xco",
+ "xcr",
+ "xes",
+ "xh",
+ "xla",
+ "xlc",
+ "xld",
+ "xmf",
+ "xmn",
+ "xmr",
+ "xna",
+ "xnr",
+ "xog",
+ "xon",
+ "xpr",
+ "xrb",
+ "xsa",
+ "xsi",
+ "xsm",
+ "xsr",
+ "xwe",
+ "yam",
+ "yao",
+ "yap",
+ "yas",
+ "yat",
+ "yav",
+ "yay",
+ "yaz",
+ "yba",
+ "ybb",
+ "yby",
+ "yer",
+ "ygr",
+ "ygw",
+ "yi",
+ "yko",
+ "yle",
+ "ylg",
+ "yll",
+ "yml",
+ "yo",
+ "yon",
+ "yrb",
+ "yre",
+ "yrl",
+ "yss",
+ "yua",
+ "yue",
+ "yue-CN",
+ "yue-Hans",
+ "yuj",
+ "yut",
+ "yuw",
+ "za",
+ "zag",
+ "zdj",
+ "zea",
+ "zgh",
+ "zh",
+ "zh-AU",
+ "zh-BN",
+ "zh-Bopo",
+ "zh-GB",
+ "zh-GF",
+ "zh-Hanb",
+ "zh-Hant",
+ "zh-HK",
+ "zh-ID",
+ "zh-MO",
+ "zh-PA",
+ "zh-PF",
+ "zh-PH",
+ "zh-SR",
+ "zh-TH",
+ "zh-TW",
+ "zh-US",
+ "zh-VN",
+ "zhx",
+ "zia",
+ "zkt",
+ "zlm",
+ "zmi",
+ "zne",
+ "zu",
+ "zza",
+ ];
+}
+
+#[allow(dead_code)]
+pub mod short_subtags_10pct {
+ pub static STRINGS: &[&str] = &[
+ "aa",
+ "acd",
+ "aeb",
+ "ahl",
+ "amm",
+ "aom",
+ "arc-Nbat",
+ "asa",
+ "avl",
+ "az",
+ "bas",
+ "bcf",
+ "bef",
+ "bft",
+ "bho",
+ "bjh",
+ "bkq",
+ "bmu",
+ "bqc",
+ "bsj",
+ "bug",
+ "bye",
+ "bzw",
+ "cgg",
+ "cjv",
+ "cop",
+ "csw",
+ "dah",
+ "den",
+ "dnj",
+ "dtp",
+ "dyo",
+ "eky",
+ "es",
+ "ext",
+ "ffi",
+ "fod",
+ "fub",
+ "fy",
+ "gay",
+ "gdr",
+ "gjk",
+ "gnd",
+ "grb",
+ "gur",
+ "gwt",
+ "hbb",
+ "hil",
+ "ho",
+ "hur",
+ "ich",
+ "ijj",
+ "iou",
+ "ja",
+ "jib",
+ "kac",
+ "kbq",
+ "kdt",
+ "kgp",
+ "kij",
+ "kk-Arab",
+ "klx",
+ "knp",
+ "kpr",
+ "krl",
+ "ktb",
+ "kue",
+ "kw",
+ "kxp",
+ "kzh",
+ "las",
+ "lem",
+ "lif",
+ "lle",
+ "lok",
+ "luo",
+ "mai",
+ "mbq",
+ "mdf",
+ "met",
+ "mgo",
+ "miw",
+ "mmo",
+ "mo",
+ "mql",
+ "mti",
+ "mwv",
+ "myz",
+ "nak",
+ "nco",
+ "ng",
+ "nin",
+ "nn",
+ "nop",
+ "ntm",
+ "nxr",
+ "okr",
+ "os",
+ "pal",
+ "ped",
+ "pl",
+ "pra",
+ "quc",
+ "rhg",
+ "rmu",
+ "ru",
+ "saq",
+ "scl",
+ "se",
+ "shi",
+ "sk",
+ "sma",
+ "snx",
+ "sps",
+ "srx",
+ "sue",
+ "swv",
+ "taq",
+ "tdd",
+ "tg",
+ "ti",
+ "tkt",
+ "tof",
+ "trw",
+ "ttj",
+ "tvu",
+ "udi",
+ "und",
+ "und-018",
+ "und-057",
+ "und-419",
+ "und-AQ",
+ "und-Arab-MN",
+ "und-Armn",
+ "und-Bass",
+ "und-BJ",
+ "und-Bugi",
+ "und-CH",
+ "und-CP",
+ "und-Cyrl-AL",
+ "und-DE",
+ "und-Dogr",
+ "und-Elym",
+ "und-GA",
+ "und-Goth",
+ "und-Guru",
+ "und-Hebr-SE",
+ "und-HR",
+ "und-IS",
+ "und-Kawi",
+ "und-Kore",
+ "und-Latn-AM",
+ "und-Latn-MM",
+ "und-LI",
+ "und-LY",
+ "und-MD",
+ "und-Mlym",
+ "und-Mtei",
+ "und-NA",
+ "und-NL",
+ "und-Osma",
+ "und-Phag",
+ "und-PS",
+ "und-RS",
+ "und-SE",
+ "und-SM",
+ "und-SV",
+ "und-Tavt",
+ "und-Thai-CN",
+ "und-Tnsa",
+ "und-UY",
+ "und-WF",
+ "und-ZW",
+ "usa",
+ "vai",
+ "vmw",
+ "wan",
+ "wiu",
+ "wob",
+ "xbi",
+ "xmr",
+ "xsr",
+ "yba",
+ "yll",
+ "yue-CN",
+ "zh",
+ "zh-MO",
+ "zia",
+ ];
+}
+
+#[allow(dead_code)]
+pub mod locales_with_aux {
+ pub static NUM_UNIQUE_BLOBS: usize = 411;
+ pub static STRINGS: &[&str] = &["af-x-3", "af-x-3s", "af-x-4", "af-x-4s", "af-x-5", "af-x-5s", "am-x-3", "am-x-3s", "am-x-4", "am-x-4s", "am-x-5", "am-x-5s", "ar-DZ-x-3", "ar-DZ-x-3s", "ar-DZ-x-4", "ar-DZ-x-4s", "ar-DZ-x-5", "ar-DZ-x-5s", "ar-IQ-x-3", "ar-IQ-x-3s", "ar-IQ-x-4", "ar-IQ-x-4s", "ar-IQ-x-5", "ar-IQ-x-5s", "ar-JO-x-3", "ar-JO-x-3s", "ar-JO-x-4", "ar-JO-x-4s", "ar-JO-x-5", "ar-JO-x-5s", "ar-LB-x-3", "ar-LB-x-3s", "ar-LB-x-4", "ar-LB-x-4s", "ar-LB-x-5", "ar-LB-x-5s", "ar-MA-x-3", "ar-MA-x-3s", "ar-MA-x-4", "ar-MA-x-4s", "ar-MA-x-5", "ar-MA-x-5s", "ar-MR-x-3", "ar-MR-x-3s", "ar-MR-x-4", "ar-MR-x-4s", "ar-MR-x-5", "ar-MR-x-5s", "ar-PS-x-3", "ar-PS-x-3s", "ar-PS-x-4", "ar-PS-x-4s", "ar-PS-x-5", "ar-PS-x-5s", "ar-SY-x-3", "ar-SY-x-3s", "ar-SY-x-4", "ar-SY-x-4s", "ar-SY-x-5", "ar-SY-x-5s", "ar-TN-x-3", "ar-TN-x-3s", "ar-TN-x-4", "ar-TN-x-4s", "ar-TN-x-5", "ar-TN-x-5s", "ar-x-3", "ar-x-3s", "ar-x-4", "ar-x-4s", "ar-x-5", "ar-x-5s", "as-x-3", "as-x-3s", "as-x-4", "as-x-4s", "as-x-5", "as-x-5s", "ast-x-3", "ast-x-3s", "ast-x-4", "ast-x-4s", "ast-x-5", "ast-x-5s", "az-x-3", "az-x-3s", "az-x-5", "az-x-5s", "be-x-3", "be-x-3s", "be-x-4", "be-x-4s", "be-x-5", "be-x-5s", "bg-x-3", "bg-x-3s", "bg-x-4", "bg-x-4s", "bg-x-5", "bg-x-5s", "bgc-x-3", "bgc-x-3s", "bgc-x-5", "bgc-x-5s", "bho-x-3", "bho-x-3s", "bho-x-5", "bho-x-5s", "bn-IN-x-3", "bn-IN-x-3s", "bn-IN-x-4", "bn-IN-x-4s", "bn-x-3", "bn-x-3s", "bn-x-4", "bn-x-4s", "bn-x-5", "bn-x-5s", "br-x-3", "br-x-3s", "br-x-4", "br-x-4s", "br-x-5", "br-x-5s", "brx-x-3", "brx-x-3s", "brx-x-4", "brx-x-4s", "brx-x-5", "brx-x-5s", "bs-Cyrl-x-3", "bs-Cyrl-x-3s", "bs-Cyrl-x-4", "bs-Cyrl-x-4s", "bs-Cyrl-x-5", "bs-Cyrl-x-5s", "bs-x-3", "bs-x-3s", "bs-x-4", "bs-x-4s", "bs-x-5", "bs-x-5s", "ca-x-3", "ca-x-3s", "ca-x-4", "ca-x-4s", "ca-x-5", "ca-x-5s", "ceb-x-3", "ceb-x-3s", "ceb-x-4", "ceb-x-4s", "ceb-x-5", "ceb-x-5s", "chr-x-3", "chr-x-3s", "chr-x-4", "chr-x-4s", "chr-x-5", "chr-x-5s", "cs-x-3", "cs-x-3s", "cs-x-5", "cs-x-5s", "cv-x-3", "cv-x-3s", "cv-x-4", "cv-x-4s", "cv-x-5", "cv-x-5s", "cy-x-3", "cy-x-3s", "cy-x-4", "cy-x-4s", "cy-x-5", "cy-x-5s", "da-x-3", "da-x-3s", "da-x-4", "da-x-4s", "da-x-5", "da-x-5s", "de-AT-x-3", "de-AT-x-3s", "de-AT-x-5", "de-AT-x-5s", "de-IT-x-3", "de-IT-x-3s", "de-IT-x-5", "de-IT-x-5s", "de-x-3", "de-x-3s", "de-x-4", "de-x-4s", "de-x-5", "de-x-5s", "doi-x-3", "doi-x-3s", "doi-x-4", "doi-x-4s", "doi-x-5", "doi-x-5s", "dsb-x-3", "dsb-x-3s", "dsb-x-4", "dsb-x-4s", "dsb-x-5", "dsb-x-5s", "el-polyton-x-3", "el-polyton-x-5", "el-polyton-x-5s", "el-x-3", "el-x-3s", "el-x-4", "el-x-4s", "el-x-5", "el-x-5s", "en-001-x-3", "en-001-x-3s", "en-150-x-3", "en-150-x-3s", "en-AG-x-3", "en-AG-x-3s", "en-AI-x-3", "en-AI-x-3s", "en-AT-x-3", "en-AT-x-3s", "en-AU-x-3", "en-AU-x-3s", "en-BB-x-3", "en-BB-x-3s", "en-BE-x-3", "en-BE-x-3s", "en-BM-x-3", "en-BM-x-3s", "en-BS-x-3", "en-BS-x-3s", "en-BW-x-3", "en-BW-x-3s", "en-BZ-x-3", "en-BZ-x-3s", "en-CC-x-3", "en-CC-x-3s", "en-CH-x-3", "en-CH-x-3s", "en-CK-x-3", "en-CK-x-3s", "en-CM-x-3", "en-CM-x-3s", "en-CX-x-3", "en-CX-x-3s", "en-CY-x-3", "en-CY-x-3s", "en-DE-x-3", "en-DE-x-3s", "en-DG-x-3", "en-DG-x-3s", "en-DK-x-3", "en-DK-x-3s", "en-DM-x-3", "en-DM-x-3s", "en-ER-x-3", "en-ER-x-3s", "en-FI-x-3", "en-FI-x-3s", "en-FJ-x-3", "en-FJ-x-3s", "en-FK-x-3", "en-FK-x-3s", "en-FM-x-3", "en-FM-x-3s", "en-GB-x-3", "en-GB-x-3s", "en-GD-x-3", "en-GD-x-3s", "en-GG-x-3", "en-GG-x-3s", "en-GH-x-3", "en-GH-x-3s", "en-GI-x-3", "en-GI-x-3s", "en-GM-x-3", "en-GM-x-3s", "en-GY-x-3", "en-GY-x-3s", "en-HK-x-3", "en-HK-x-3s", "en-IE-x-3", "en-IE-x-3s", "en-IL-x-3", "en-IL-x-3s", "en-IM-x-3", "en-IM-x-3s", "en-IN-x-3", "en-IN-x-3s", "en-IO-x-3", "en-IO-x-3s", "en-JE-x-3", "en-JE-x-3s", "en-JM-x-3", "en-JM-x-3s", "en-KE-x-3", "en-KE-x-3s", "en-KI-x-3", "en-KI-x-3s", "en-KN-x-3", "en-KN-x-3s", "en-KY-x-3", "en-KY-x-3s", "en-LC-x-3", "en-LC-x-3s", "en-LR-x-3", "en-LR-x-3s", "en-LS-x-3", "en-LS-x-3s", "en-MG-x-3", "en-MG-x-3s", "en-MO-x-3", "en-MO-x-3s", "en-MS-x-3", "en-MS-x-3s", "en-MT-x-3", "en-MT-x-3s", "en-MU-x-3", "en-MU-x-3s", "en-MV-x-3", "en-MV-x-3s", "en-MW-x-3", "en-MW-x-3s", "en-MY-x-3", "en-MY-x-3s", "en-NA-x-3", "en-NA-x-3s", "en-NF-x-3", "en-NF-x-3s", "en-NG-x-3", "en-NG-x-3s", "en-NL-x-3", "en-NL-x-3s", "en-NR-x-3", "en-NR-x-3s", "en-NU-x-3", "en-NU-x-3s", "en-NZ-x-3", "en-NZ-x-3s", "en-PG-x-3", "en-PG-x-3s", "en-PK-x-3", "en-PK-x-3s", "en-PN-x-3", "en-PN-x-3s", "en-PW-x-3", "en-PW-x-3s", "en-RW-x-3", "en-RW-x-3s", "en-SB-x-3", "en-SB-x-3s", "en-SC-x-3", "en-SC-x-3s", "en-SD-x-3", "en-SD-x-3s", "en-SE-x-3", "en-SE-x-3s", "en-SG-x-3", "en-SG-x-3s", "en-SH-x-3", "en-SH-x-3s", "en-SI-x-3", "en-SI-x-3s", "en-SL-x-3", "en-SL-x-3s", "en-SS-x-3", "en-SS-x-3s", "en-SX-x-3", "en-SX-x-3s", "en-SZ-x-3", "en-SZ-x-3s", "en-TC-x-3", "en-TC-x-3s", "en-TK-x-3", "en-TK-x-3s", "en-TO-x-3", "en-TO-x-3s", "en-TT-x-3", "en-TT-x-3s", "en-TV-x-3", "en-TV-x-3s", "en-TZ-x-3", "en-TZ-x-3s", "en-UG-x-3", "en-UG-x-3s", "en-VC-x-3", "en-VC-x-3s", "en-VG-x-3", "en-VG-x-3s", "en-VU-x-3", "en-VU-x-3s", "en-WS-x-3", "en-WS-x-3s", "en-ZA-x-3", "en-ZA-x-3s", "en-ZM-x-3", "en-ZM-x-3s", "en-ZW-x-3", "en-ZW-x-3s", "en-x-3", "en-x-3s", "en-x-4", "en-x-4s", "en-x-5", "en-x-5s", "es-CL-x-3s", "es-CO-x-3s", "es-PE-x-3", "es-PE-x-3s", "es-PE-x-5", "es-PE-x-5s", "es-PY-x-3", "es-PY-x-3s", "es-UY-x-3", "es-UY-x-3s", "es-UY-x-5", "es-UY-x-5s", "es-VE-x-3", "es-VE-x-3s", "es-x-3", "es-x-3s", "es-x-4", "es-x-4s", "es-x-5", "es-x-5s", "et-x-3", "et-x-3s", "et-x-4", "et-x-4s", "et-x-5", "et-x-5s", "eu-x-3", "eu-x-3s", "eu-x-4", "eu-x-4s", "eu-x-5", "eu-x-5s", "fa-AF-x-3", "fa-AF-x-3s", "fa-AF-x-4", "fa-AF-x-4s", "fa-AF-x-5", "fa-AF-x-5s", "fa-x-3", "fa-x-3s", "fa-x-4", "fa-x-4s", "fa-x-5", "fa-x-5s", "ff-Adlm-x-3", "ff-Adlm-x-3s", "ff-Adlm-x-4", "ff-Adlm-x-4s", "ff-Adlm-x-5", "ff-Adlm-x-5s", "fi-x-3", "fi-x-3s", "fi-x-4", "fi-x-4s", "fi-x-5", "fi-x-5s", "fil-x-3", "fil-x-3s", "fil-x-4", "fil-x-4s", "fil-x-5", "fil-x-5s", "fo-x-3", "fo-x-3s", "fo-x-4", "fo-x-4s", "fo-x-5", "fo-x-5s", "fr-CA-x-3", "fr-CA-x-3s", "fr-MA-x-3", "fr-MA-x-3s", "fr-x-3", "fr-x-3s", "fr-x-4", "fr-x-4s", "fr-x-5", "fr-x-5s", "ga-x-3", "ga-x-3s", "ga-x-4", "ga-x-4s", "ga-x-5", "ga-x-5s", "gd-x-3", "gd-x-3s", "gd-x-4", "gd-x-4s", "gd-x-5", "gd-x-5s", "gl-x-3", "gl-x-3s", "gl-x-4", "gl-x-4s", "gl-x-5", "gl-x-5s", "gu-x-3", "gu-x-3s", "gu-x-4", "gu-x-4s", "gu-x-5", "gu-x-5s", "ha-x-3", "ha-x-3s", "ha-x-4", "ha-x-4s", "ha-x-5", "ha-x-5s", "he-x-3", "he-x-3s", "he-x-5", "he-x-5s", "hi-Latn-x-3", "hi-Latn-x-3s", "hi-Latn-x-4", "hi-Latn-x-4s", "hi-Latn-x-5", "hi-Latn-x-5s", "hi-x-3", "hi-x-3s", "hi-x-4", "hi-x-4s", "hi-x-5", "hi-x-5s", "hr-x-3", "hr-x-3s", "hr-x-4", "hr-x-4s", "hr-x-5", "hr-x-5s", "hsb-x-3", "hsb-x-3s", "hsb-x-4", "hsb-x-4s", "hsb-x-5", "hsb-x-5s", "hu-x-3", "hu-x-3s", "hu-x-4", "hu-x-4s", "hu-x-5", "hu-x-5s", "hy-x-3", "hy-x-3s", "hy-x-4", "hy-x-4s", "hy-x-5", "hy-x-5s", "ia-x-3", "ia-x-3s", "ia-x-4", "ia-x-5", "ia-x-5s", "id-x-3", "id-x-3s", "id-x-4", "id-x-4s", "id-x-5", "id-x-5s", "ig-x-3", "ig-x-3s", "ig-x-4", "ig-x-4s", "ig-x-5", "ig-x-5s", "is-x-3", "is-x-3s", "is-x-4", "is-x-4s", "is-x-5", "is-x-5s", "it-x-3", "it-x-3s", "it-x-4", "it-x-4s", "it-x-5", "it-x-5s", "ja-x-3", "ja-x-3s", "ja-x-5", "ja-x-5s", "jv-x-3", "jv-x-3s", "jv-x-4", "jv-x-4s", "jv-x-5", "jv-x-5s", "ka-x-3", "ka-x-3s", "ka-x-4", "ka-x-4s", "ka-x-5", "ka-x-5s", "kea-x-3", "kea-x-3s", "kea-x-4", "kea-x-4s", "kea-x-5", "kea-x-5s", "kgp-x-3", "kgp-x-3s", "kgp-x-4", "kgp-x-4s", "kgp-x-5", "kgp-x-5s", "kk-x-3", "kk-x-3s", "kk-x-4", "kk-x-4s", "kk-x-5", "kk-x-5s", "km-x-3", "km-x-3s", "km-x-4", "km-x-4s", "km-x-5", "km-x-5s", "kn-x-3", "kn-x-3s", "kn-x-4", "kn-x-4s", "kn-x-5", "kn-x-5s", "ko-x-3", "ko-x-3s", "ko-x-4", "ko-x-4s", "ko-x-5", "ko-x-5s", "kok-x-3", "kok-x-3s", "kok-x-5", "kok-x-5s", "ks-Deva-x-3", "ks-Deva-x-3s", "ks-Deva-x-4", "ks-Deva-x-4s", "ks-Deva-x-5", "ks-Deva-x-5s", "ks-x-3", "ks-x-3s", "ks-x-4", "ks-x-4s", "ks-x-5", "ks-x-5s", "ky-x-3", "ky-x-3s", "ky-x-4", "ky-x-4s", "ky-x-5", "ky-x-5s", "lo-x-3", "lo-x-3s", "lo-x-5", "lo-x-5s", "lt-x-3", "lt-x-3s", "lt-x-4", "lt-x-4s", "lt-x-5", "lt-x-5s", "lv-x-3", "lv-x-3s", "lv-x-4", "lv-x-4s", "lv-x-5", "lv-x-5s", "mai-x-3", "mai-x-3s", "mai-x-4", "mai-x-4s", "mai-x-5", "mai-x-5s", "mi-x-3", "mi-x-3s", "mi-x-4", "mi-x-4s", "mi-x-5", "mi-x-5s", "mk-x-3", "mk-x-3s", "mk-x-4", "mk-x-4s", "mk-x-5", "mk-x-5s", "ml-x-3", "ml-x-3s", "ml-x-4", "ml-x-4s", "ml-x-5", "ml-x-5s", "mn-x-3", "mn-x-3s", "mn-x-4", "mn-x-4s", "mn-x-5", "mn-x-5s", "mni-x-3", "mni-x-3s", "mni-x-4", "mni-x-4s", "mni-x-5", "mni-x-5s", "mr-x-3", "mr-x-3s", "mr-x-4", "mr-x-4s", "mr-x-5", "mr-x-5s", "ms-x-3", "ms-x-3s", "ms-x-4", "ms-x-4s", "ms-x-5", "ms-x-5s", "my-x-3", "my-x-3s", "my-x-4", "my-x-4s", "my-x-5", "my-x-5s", "nb-x-3", "nb-x-3s", "nb-x-4", "nb-x-4s", "nb-x-5", "nb-x-5s", "ne-x-3", "ne-x-3s", "ne-x-4", "ne-x-4s", "ne-x-5", "ne-x-5s", "nl-x-3", "nl-x-3s", "nl-x-4", "nl-x-4s", "nl-x-5", "nl-x-5s", "nn-x-3", "nn-x-3s", "nn-x-4", "nn-x-4s", "nn-x-5", "nn-x-5s", "no-x-3", "no-x-3s", "no-x-4", "no-x-4s", "no-x-5", "no-x-5s", "or-x-3", "or-x-3s", "or-x-4", "or-x-4s", "or-x-5", "or-x-5s", "pa-x-3", "pa-x-3s", "pa-x-4", "pa-x-4s", "pa-x-5", "pa-x-5s", "pcm-x-3", "pcm-x-3s", "pcm-x-4", "pcm-x-4s", "pcm-x-5", "pcm-x-5s", "pl-x-3", "pl-x-3s", "pl-x-4", "pl-x-4s", "pl-x-5", "pl-x-5s", "ps-x-3", "ps-x-3s", "ps-x-4", "ps-x-5", "ps-x-5s", "pt-x-3", "pt-x-3s", "pt-x-4", "pt-x-4s", "pt-x-5", "pt-x-5s", "qu-x-3", "qu-x-3s", "qu-x-5", "qu-x-5s", "raj-x-3", "raj-x-3s", "raj-x-5", "raj-x-5s", "rm-x-3", "rm-x-3s", "rm-x-4", "rm-x-4s", "rm-x-5", "rm-x-5s", "ro-x-3", "ro-x-3s", "ro-x-4", "ro-x-4s", "ro-x-5", "ro-x-5s", "ru-x-3", "ru-x-3s", "ru-x-4", "ru-x-4s", "ru-x-5", "ru-x-5s", "sa-x-3", "sa-x-3s", "sa-x-4", "sa-x-5", "sa-x-5s", "sat-x-3", "sat-x-3s", "sat-x-4", "sat-x-4s", "sat-x-5", "sat-x-5s", "sc-x-3", "sc-x-3s", "sc-x-4", "sc-x-4s", "sc-x-5", "sc-x-5s", "sd-Deva-x-3", "sd-Deva-x-3s", "sd-Deva-x-4", "sd-Deva-x-4s", "sd-Deva-x-5", "sd-Deva-x-5s", "sd-x-3", "sd-x-3s", "sd-x-4", "sd-x-4s", "sd-x-5", "sd-x-5s", "si-x-3", "si-x-3s", "si-x-4", "si-x-4s", "si-x-5", "si-x-5s", "sk-x-3", "sk-x-3s", "sk-x-4", "sk-x-4s", "sk-x-5", "sk-x-5s", "sl-x-3", "sl-x-3s", "sl-x-4", "sl-x-4s", "sl-x-5", "sl-x-5s", "so-x-3", "so-x-3s", "so-x-4", "so-x-4s", "so-x-5", "so-x-5s", "sq-x-3", "sq-x-3s", "sq-x-4", "sq-x-4s", "sq-x-5", "sq-x-5s", "sr-Latn-XK-x-3", "sr-Latn-XK-x-3s", "sr-Latn-x-3", "sr-Latn-x-3s", "sr-Latn-x-4", "sr-Latn-x-4s", "sr-Latn-x-5", "sr-Latn-x-5s", "sr-ME-x-3", "sr-ME-x-3s", "sr-XK-x-3", "sr-XK-x-3s", "sr-x-3", "sr-x-3s", "sr-x-4", "sr-x-4s", "sr-x-5", "sr-x-5s", "su-x-3", "su-x-3s", "su-x-4", "su-x-4s", "su-x-5", "su-x-5s", "sv-x-3", "sv-x-3s", "sv-x-4", "sv-x-4s", "sv-x-5", "sv-x-5s", "sw-x-3", "sw-x-3s", "sw-x-4", "sw-x-4s", "sw-x-5", "sw-x-5s", "ta-x-3", "ta-x-3s", "ta-x-4", "ta-x-4s", "ta-x-5", "ta-x-5s", "te-x-3", "te-x-3s", "te-x-4", "te-x-4s", "te-x-5", "te-x-5s", "tg-x-3", "tg-x-3s", "tg-x-4", "tg-x-4s", "tg-x-5", "tg-x-5s", "th-x-3", "th-x-3s", "th-x-4", "th-x-4s", "th-x-5", "th-x-5s", "ti-x-3", "ti-x-3s", "ti-x-4", "ti-x-4s", "ti-x-5", "ti-x-5s", "tk-x-3", "tk-x-3s", "tk-x-4", "tk-x-4s", "tk-x-5", "tk-x-5s", "to-x-3", "to-x-3s", "to-x-4", "to-x-4s", "to-x-5", "to-x-5s", "tr-x-3", "tr-x-3s", "tr-x-4", "tr-x-4s", "tr-x-5", "tr-x-5s", "tt-x-3", "tt-x-3s", "tt-x-5", "tt-x-5s", "uk-x-3", "uk-x-3s", "uk-x-4", "uk-x-4s", "uk-x-5", "uk-x-5s", "und-x-3", "und-x-3s", "und-x-4", "und-x-4s", "und-x-5", "und-x-5s", "ur-x-3", "ur-x-3s", "ur-x-4", "ur-x-4s", "ur-x-5", "ur-x-5s", "uz-Cyrl-x-3", "uz-Cyrl-x-3s", "uz-Cyrl-x-4", "uz-Cyrl-x-4s", "uz-Cyrl-x-5", "uz-Cyrl-x-5s", "uz-x-3", "uz-x-3s", "uz-x-4", "uz-x-4s", "uz-x-5", "uz-x-5s", "vi-x-3", "vi-x-3s", "vi-x-5", "vi-x-5s", "wo-x-3", "wo-x-3s", "wo-x-5", "wo-x-5s", "xh-x-3", "xh-x-3s", "xh-x-5", "xh-x-5s", "yo-BJ-x-3", "yo-BJ-x-3s", "yo-BJ-x-4", "yo-BJ-x-4s", "yo-BJ-x-5", "yo-BJ-x-5s", "yo-x-3", "yo-x-3s", "yo-x-4", "yo-x-4s", "yo-x-5", "yo-x-5s", "yrl-x-3", "yrl-x-3s", "yrl-x-4", "yrl-x-4s", "yrl-x-5", "yrl-x-5s", "yue-Hans-x-3", "yue-Hans-x-3s", "yue-Hans-x-5", "yue-Hans-x-5s", "yue-x-3", "yue-x-3s", "yue-x-5", "yue-x-5s", "zh-Hant-x-3", "zh-Hant-x-3s", "zh-Hant-x-5", "zh-Hant-x-5s", "zh-x-3", "zh-x-3s", "zh-x-5", "zh-x-5s", "zu-x-3", "zu-x-3s", "zu-x-4", "zu-x-4s", "zu-x-5", "zu-x-5s"];
+}
diff --git a/vendor/zerotrie/tests/derive_test.rs b/vendor/zerotrie/tests/derive_test.rs
new file mode 100644
index 00000000..26ee36ab
--- /dev/null
+++ b/vendor/zerotrie/tests/derive_test.rs
@@ -0,0 +1,138 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+#![allow(non_camel_case_types, non_snake_case)]
+
+use zerotrie::ZeroAsciiIgnoreCaseTrie;
+use zerotrie::ZeroTrie;
+use zerotrie::ZeroTrieExtendedCapacity;
+use zerotrie::ZeroTriePerfectHash;
+use zerotrie::ZeroTrieSimpleAscii;
+use zerovec::ZeroVec;
+
+#[cfg_attr(feature = "yoke", derive(yoke::Yokeable))]
+#[cfg_attr(feature = "zerofrom", derive(zerofrom::ZeroFrom))]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+#[cfg_attr(feature = "databake", derive(databake::Bake))]
+#[cfg_attr(feature = "databake", databake(path = crate))]
+struct DeriveTest_ZeroTrie_ZeroVec<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: ZeroTrie<ZeroVec<'data, u8>>,
+}
+
+#[test]
+#[cfg(all(feature = "databake", feature = "alloc"))]
+fn bake_ZeroTrie_ZeroVec() {
+ use databake::*;
+ extern crate std;
+ test_bake!(
+ DeriveTest_ZeroTrie_ZeroVec<'static>,
+ crate::DeriveTest_ZeroTrie_ZeroVec {
+ _data: zerotrie::ZeroTrieSimpleAscii {
+ store: zerovec::ZeroVec::new(),
+ }
+ .into_zerotrie()
+ },
+ );
+}
+
+#[cfg_attr(feature = "yoke", derive(yoke::Yokeable))]
+#[cfg_attr(feature = "zerofrom", derive(zerofrom::ZeroFrom))]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+#[cfg_attr(feature = "databake", derive(databake::Bake))]
+#[cfg_attr(feature = "databake", databake(path = crate))]
+struct DeriveTest_ZeroTrieSimpleAscii_ZeroVec<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: ZeroTrieSimpleAscii<ZeroVec<'data, u8>>,
+}
+
+#[test]
+#[cfg(all(feature = "databake", feature = "alloc"))]
+fn bake_ZeroTrieSimpleAscii_ZeroVec() {
+ use databake::*;
+ extern crate std;
+ test_bake!(
+ DeriveTest_ZeroTrieSimpleAscii_ZeroVec<'static>,
+ crate::DeriveTest_ZeroTrieSimpleAscii_ZeroVec {
+ _data: zerotrie::ZeroTrieSimpleAscii {
+ store: zerovec::ZeroVec::new(),
+ }
+ },
+ );
+}
+
+#[cfg_attr(feature = "yoke", derive(yoke::Yokeable))]
+#[cfg_attr(feature = "zerofrom", derive(zerofrom::ZeroFrom))]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+#[cfg_attr(feature = "databake", derive(databake::Bake))]
+#[cfg_attr(feature = "databake", databake(path = crate))]
+struct DeriveTest_ZeroAsciiIgnoreCaseTrie_ZeroVec<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: ZeroAsciiIgnoreCaseTrie<ZeroVec<'data, u8>>,
+}
+
+#[test]
+#[cfg(all(feature = "databake", feature = "alloc"))]
+fn bake_ZeroAsciiIgnoreCaseTrie_ZeroVec() {
+ use databake::*;
+ extern crate std;
+ test_bake!(
+ DeriveTest_ZeroAsciiIgnoreCaseTrie_ZeroVec<'static>,
+ crate::DeriveTest_ZeroAsciiIgnoreCaseTrie_ZeroVec {
+ _data: zerotrie::ZeroAsciiIgnoreCaseTrie {
+ store: zerovec::ZeroVec::new(),
+ }
+ },
+ );
+}
+
+#[cfg_attr(feature = "yoke", derive(yoke::Yokeable))]
+#[cfg_attr(feature = "zerofrom", derive(zerofrom::ZeroFrom))]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+#[cfg_attr(feature = "databake", derive(databake::Bake))]
+#[cfg_attr(feature = "databake", databake(path = crate))]
+struct DeriveTest_ZeroTriePerfectHash_ZeroVec<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: ZeroTriePerfectHash<ZeroVec<'data, u8>>,
+}
+
+#[test]
+#[cfg(all(feature = "databake", feature = "alloc"))]
+fn bake_ZeroTriePerfectHash_ZeroVec() {
+ use databake::*;
+ extern crate std;
+ test_bake!(
+ DeriveTest_ZeroTriePerfectHash_ZeroVec<'static>,
+ crate::DeriveTest_ZeroTriePerfectHash_ZeroVec {
+ _data: zerotrie::ZeroTriePerfectHash {
+ store: zerovec::ZeroVec::new(),
+ }
+ },
+ );
+}
+
+#[cfg_attr(feature = "yoke", derive(yoke::Yokeable))]
+#[cfg_attr(feature = "zerofrom", derive(zerofrom::ZeroFrom))]
+#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
+#[cfg_attr(feature = "databake", derive(databake::Bake))]
+#[cfg_attr(feature = "databake", databake(path = crate))]
+struct DeriveTest_ZeroTrieExtendedCapacity_ZeroVec<'data> {
+ #[cfg_attr(feature = "serde", serde(borrow))]
+ _data: ZeroTrieExtendedCapacity<ZeroVec<'data, u8>>,
+}
+
+#[test]
+#[cfg(all(feature = "databake", feature = "alloc"))]
+fn bake_ZeroTrieExtendedCapacity_ZeroVec() {
+ use databake::*;
+ extern crate std;
+ test_bake!(
+ DeriveTest_ZeroTrieExtendedCapacity_ZeroVec<'static>,
+ crate::DeriveTest_ZeroTrieExtendedCapacity_ZeroVec {
+ _data: zerotrie::ZeroTrieExtendedCapacity {
+ store: zerovec::ZeroVec::new(),
+ }
+ },
+ );
+}
diff --git a/vendor/zerotrie/tests/ignorecase_test.rs b/vendor/zerotrie/tests/ignorecase_test.rs
new file mode 100644
index 00000000..fb73ef7c
--- /dev/null
+++ b/vendor/zerotrie/tests/ignorecase_test.rs
@@ -0,0 +1,46 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use zerotrie::ZeroAsciiIgnoreCaseTrie;
+
+mod testdata {
+ include!("data/data.rs");
+}
+
+use testdata::strings_to_litemap;
+
+#[test]
+fn test_ignore_case_coverage() {
+ let litemap = strings_to_litemap(&["", "aBc", "aBcD", "aBce", "aBcF", "aBcghi"]);
+
+ // Test both construction paths
+ ZeroAsciiIgnoreCaseTrie::try_from(&litemap).unwrap();
+ let trie = litemap
+ .iter()
+ .map(|(k, v)| (*k, *v))
+ .collect::<ZeroAsciiIgnoreCaseTrie<Vec<u8>>>();
+
+ // Test lookup
+ for (k, v) in litemap.iter() {
+ assert_eq!(trie.get(k), Some(*v), "normal: {k:?}");
+ let k_upper = k
+ .iter()
+ .map(|c| c.to_ascii_uppercase())
+ .collect::<Vec<u8>>();
+ assert_eq!(trie.get(k_upper), Some(*v), "upper: {k:?}");
+ let k_lower = k
+ .iter()
+ .map(|c| c.to_ascii_lowercase())
+ .collect::<Vec<u8>>();
+ assert_eq!(trie.get(k_lower), Some(*v), "lower: {k:?}");
+ }
+
+ // Test mixed-case strings
+ let problematic_strs = &["A", "ab", "abc", "aBcd", "aBcgHi"];
+ for problematic_str in problematic_strs {
+ let mut litemap = litemap.clone();
+ litemap.insert(problematic_str.as_bytes(), 100);
+ ZeroAsciiIgnoreCaseTrie::try_from(&litemap).expect_err(problematic_str);
+ }
+}
diff --git a/vendor/zerotrie/tests/locale_aux_test.rs b/vendor/zerotrie/tests/locale_aux_test.rs
new file mode 100644
index 00000000..10b61071
--- /dev/null
+++ b/vendor/zerotrie/tests/locale_aux_test.rs
@@ -0,0 +1,168 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use icu_locale_core::extensions::private::Private;
+use icu_locale_core::Locale;
+use litemap::LiteMap;
+use std::collections::BTreeSet;
+use writeable::Writeable;
+use zerotrie::ZeroTriePerfectHash;
+use zerotrie::ZeroTrieSimpleAscii;
+use zerovec::VarZeroVec;
+
+mod testdata {
+ include!("data/data.rs");
+}
+
+use testdata::locales_with_aux::{NUM_UNIQUE_BLOBS, STRINGS};
+use testdata::strings_to_litemap;
+
+#[test]
+fn test_combined() {
+ let litemap = strings_to_litemap(STRINGS);
+
+ let vzv: VarZeroVec<str> = STRINGS.into();
+
+ // Lookup table size:
+ assert_eq!(vzv.as_bytes().len(), 10219);
+
+ // Size including pointer array:
+ assert_eq!(
+ vzv.as_bytes().len() + STRINGS.len() * core::mem::size_of::<usize>(),
+ 18635
+ );
+
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
+
+ // Lookup table size:
+ assert_eq!(trie.byte_len(), 5104);
+
+ // Size including pointer array:
+ assert_eq!(
+ trie.byte_len() + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
+ 8392
+ );
+
+ let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap();
+
+ // Lookup table size:
+ assert_eq!(trie.byte_len(), 5157);
+
+ // Size including pointer array:
+ assert_eq!(
+ trie.byte_len() + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
+ 8445
+ );
+
+ let total_str_len = litemap.keys().map(|k| k.len()).sum::<usize>();
+ assert_eq!(total_str_len, 8115);
+
+ // Lookup table size:
+ assert_eq!(
+ total_str_len + STRINGS.len() * core::mem::size_of::<usize>(),
+ 16531
+ );
+
+ // Size including pointer array: (2x for the lookup array and value array)
+ assert_eq!(
+ total_str_len + 2 * STRINGS.len() * core::mem::size_of::<usize>(),
+ 24947
+ );
+
+ // Size including u16 pointer array:
+ assert_eq!(
+ total_str_len
+ + STRINGS.len() * core::mem::size_of::<usize>()
+ + STRINGS.len() * core::mem::size_of::<u16>()
+ + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
+ 21923
+ );
+}
+
+#[test]
+fn test_aux_split() {
+ let locales: Vec<Locale> = STRINGS.iter().map(|s| s.parse().unwrap()).collect();
+
+ let aux_keys: BTreeSet<&Private> = locales.iter().map(|l| &l.extensions.private).collect();
+ assert_eq!(aux_keys.len(), 6);
+
+ let mut cumulative_index = 0;
+ let mut total_simpleascii_len = 0;
+ let mut total_perfecthash_len = 0;
+ let mut total_vzv_len = 0;
+ let mut unique_locales = BTreeSet::new();
+ for private in aux_keys.iter() {
+ let current_locales: Vec<Locale> = locales
+ .iter()
+ .filter(|l| l.extensions.private == **private)
+ .map(|l| {
+ let mut l = l.clone();
+ l.extensions.private = Private::default();
+ l
+ })
+ .collect();
+ let litemap: LiteMap<Vec<u8>, usize> = current_locales
+ .iter()
+ .map(|l| {
+ (l.write_to_string().into_owned().into_bytes(), {
+ cumulative_index += 1;
+ cumulative_index - 1
+ })
+ })
+ .collect();
+
+ let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
+ total_simpleascii_len += trie.byte_len();
+
+ let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap();
+ total_perfecthash_len += trie.byte_len();
+
+ for k in litemap.keys() {
+ unique_locales.insert(k.clone());
+ }
+
+ let strs: Vec<String> = current_locales
+ .iter()
+ .map(|l| l.write_to_string().into_owned())
+ .collect();
+ let vzv: VarZeroVec<str> = strs.as_slice().into();
+ total_vzv_len += vzv.as_bytes().len();
+ }
+ assert_eq!(cumulative_index, locales.len());
+
+ assert_eq!(total_simpleascii_len, 5098);
+ assert_eq!(total_perfecthash_len, 5302);
+ assert_eq!(total_vzv_len, 5486);
+
+ let total_unique_locale_str_len = unique_locales.iter().map(|v| v.len()).sum::<usize>();
+ assert_eq!(total_unique_locale_str_len, 945);
+
+ // Size including pointer array:
+ assert_eq!(
+ total_simpleascii_len + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
+ 8386
+ );
+ assert_eq!(
+ total_perfecthash_len + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
+ 8590
+ );
+ assert_eq!(
+ total_vzv_len + STRINGS.len() * core::mem::size_of::<usize>(),
+ 13902
+ );
+ // 2x for the lookup arrays and value arrays
+ assert_eq!(
+ total_unique_locale_str_len + 2 * STRINGS.len() * core::mem::size_of::<usize>(),
+ 17777
+ );
+
+ // Size including u16 pointer array:
+ assert_eq!(
+ total_unique_locale_str_len
+ + STRINGS.len() * core::mem::size_of::<usize>()
+ + STRINGS.len() * core::mem::size_of::<u16>()
+ + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
+ 14753
+ );
+}