From 45df4d0d9b577fecee798d672695fe24ff57fb1b Mon Sep 17 00:00:00 2001 From: mo khan Date: Tue, 15 Jul 2025 16:37:08 -0600 Subject: feat: migrate from Cedar to SpiceDB authorization system This is a major architectural change that replaces the Cedar policy-based authorization system with SpiceDB's relation-based authorization. Key changes: - Migrate from Rust to Go implementation - Replace Cedar policies with SpiceDB schema and relationships - Switch from envoy `ext_authz` with Cedar to SpiceDB permission checks - Update build system and dependencies for Go ecosystem - Maintain Envoy integration for external authorization This change enables more flexible permission modeling through SpiceDB's Google Zanzibar inspired relation-based system, supporting complex hierarchical permissions that were difficult to express in Cedar. Breaking change: Existing Cedar policies and Rust-based configuration will no longer work and need to be migrated to SpiceDB schema. --- vendor/regex-syntax/.cargo-checksum.json | 1 - vendor/regex-syntax/Cargo.toml | 74 - vendor/regex-syntax/LICENSE-APACHE | 201 - vendor/regex-syntax/LICENSE-MIT | 25 - vendor/regex-syntax/README.md | 96 - vendor/regex-syntax/benches/bench.rs | 63 - vendor/regex-syntax/src/ast/mod.rs | 1809 --- vendor/regex-syntax/src/ast/parse.rs | 6377 ---------- vendor/regex-syntax/src/ast/print.rs | 577 - vendor/regex-syntax/src/ast/visitor.rs | 522 - vendor/regex-syntax/src/debug.rs | 107 - vendor/regex-syntax/src/either.rs | 8 - vendor/regex-syntax/src/error.rs | 311 - vendor/regex-syntax/src/hir/interval.rs | 564 - vendor/regex-syntax/src/hir/literal.rs | 3214 ----- vendor/regex-syntax/src/hir/mod.rs | 3873 ------ vendor/regex-syntax/src/hir/print.rs | 608 - vendor/regex-syntax/src/hir/translate.rs | 3744 ------ vendor/regex-syntax/src/hir/visitor.rs | 215 - vendor/regex-syntax/src/lib.rs | 431 - vendor/regex-syntax/src/parser.rs | 254 - vendor/regex-syntax/src/rank.rs | 258 - vendor/regex-syntax/src/unicode.rs | 1041 -- .../src/unicode_tables/LICENSE-UNICODE | 57 - vendor/regex-syntax/src/unicode_tables/age.rs | 1846 --- .../src/unicode_tables/case_folding_simple.rs | 2948 ----- .../src/unicode_tables/general_category.rs | 6717 ---------- .../src/unicode_tables/grapheme_cluster_break.rs | 1420 --- vendor/regex-syntax/src/unicode_tables/mod.rs | 57 - .../src/unicode_tables/perl_decimal.rs | 84 - .../regex-syntax/src/unicode_tables/perl_space.rs | 23 - .../regex-syntax/src/unicode_tables/perl_word.rs | 806 -- .../src/unicode_tables/property_bool.rs | 12095 ------------------- .../src/unicode_tables/property_names.rs | 281 - .../src/unicode_tables/property_values.rs | 956 -- vendor/regex-syntax/src/unicode_tables/script.rs | 1300 -- .../src/unicode_tables/script_extension.rs | 1718 --- .../src/unicode_tables/sentence_break.rs | 2530 ---- .../regex-syntax/src/unicode_tables/word_break.rs | 1152 -- vendor/regex-syntax/src/utf8.rs | 592 - vendor/regex-syntax/test | 30 - 41 files changed, 58985 deletions(-) delete mode 100644 vendor/regex-syntax/.cargo-checksum.json delete mode 100644 vendor/regex-syntax/Cargo.toml delete mode 100644 vendor/regex-syntax/LICENSE-APACHE delete mode 100644 vendor/regex-syntax/LICENSE-MIT delete mode 100644 vendor/regex-syntax/README.md delete mode 100644 vendor/regex-syntax/benches/bench.rs delete mode 100644 vendor/regex-syntax/src/ast/mod.rs delete mode 100644 vendor/regex-syntax/src/ast/parse.rs delete mode 100644 vendor/regex-syntax/src/ast/print.rs delete mode 100644 vendor/regex-syntax/src/ast/visitor.rs delete mode 100644 vendor/regex-syntax/src/debug.rs delete mode 100644 vendor/regex-syntax/src/either.rs delete mode 100644 vendor/regex-syntax/src/error.rs delete mode 100644 vendor/regex-syntax/src/hir/interval.rs delete mode 100644 vendor/regex-syntax/src/hir/literal.rs delete mode 100644 vendor/regex-syntax/src/hir/mod.rs delete mode 100644 vendor/regex-syntax/src/hir/print.rs delete mode 100644 vendor/regex-syntax/src/hir/translate.rs delete mode 100644 vendor/regex-syntax/src/hir/visitor.rs delete mode 100644 vendor/regex-syntax/src/lib.rs delete mode 100644 vendor/regex-syntax/src/parser.rs delete mode 100644 vendor/regex-syntax/src/rank.rs delete mode 100644 vendor/regex-syntax/src/unicode.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/LICENSE-UNICODE delete mode 100644 vendor/regex-syntax/src/unicode_tables/age.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/case_folding_simple.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/general_category.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/mod.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/perl_decimal.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/perl_space.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/perl_word.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/property_bool.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/property_names.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/property_values.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/script.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/script_extension.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/sentence_break.rs delete mode 100644 vendor/regex-syntax/src/unicode_tables/word_break.rs delete mode 100644 vendor/regex-syntax/src/utf8.rs delete mode 100755 vendor/regex-syntax/test (limited to 'vendor/regex-syntax') diff --git a/vendor/regex-syntax/.cargo-checksum.json b/vendor/regex-syntax/.cargo-checksum.json deleted file mode 100644 index 3b7b712f..00000000 --- a/vendor/regex-syntax/.cargo-checksum.json +++ /dev/null @@ -1 +0,0 @@ -{"files":{"Cargo.toml":"362cfcf492ed93948ca374491d8dc3b688da0f90d00c2bb9a3892ec2ac27e276","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"b2484aa7e66fb92d1378e9a7ce7605af18f77cb12c179866eaf92ba28cfec1d9","benches/bench.rs":"d2b6ae5b939abd6093064f144b981b7739d7f474ec0698a1268052fc92406635","src/ast/mod.rs":"21cda9fe9e1810b285cb8f8a2aa5eeaff6c38e256ceed036b68c66fb6b0124d6","src/ast/parse.rs":"89a3701a9a95fea692be925e97b7dcfc5af1ac41f20e8f054eafaeb391e8dec2","src/ast/print.rs":"99cb69ece252ef31e0be177fb3364797eb30b785f936532b8dcd8106e7be0738","src/ast/visitor.rs":"f0fdf758801fe70e6b299b73ab63196e814af95ef6eccad7ef4f72075743fcf6","src/debug.rs":"7a16cca02be9715fdc8c26a32279465774623cd12fab1ec59ac25a6e3047817f","src/either.rs":"1758e3edd056884eccadd995708d1e374ba9aa65846bd0e13b1aae852607c560","src/error.rs":"01a67e3407b0d0d869119363e47a94d92158834bfe5936366c2e3f6f4ed13f36","src/hir/interval.rs":"74d75837d24ab9a3cff33b375b70694cdd3b9a4610c799137533f365755ba604","src/hir/literal.rs":"6a8108b8919fbfd9ab93072846124c51d2998489810fcd6e7a89fdccc45833e0","src/hir/mod.rs":"599ceb4921f2345a7d01d2390188fad13f236b98efe7a38c9beb9a0ce5c4ebad","src/hir/print.rs":"ad51c515c933bfd67d307ba3d7e6ac59c9c5903b4f393a9f9a4785c92b88348d","src/hir/translate.rs":"6129a12a686a6ec8965cdeb9f889640891da6aae75995606080fa88d4dd1a602","src/hir/visitor.rs":"71ca9c93aa48a5ed445399659fa6455093a1bbd9ef44b66bc7095c1b08b2ec1f","src/lib.rs":"5ae457d402e49443bdb23b71353693dd3b0d263b57a6eeb9eb5b5dae5c901bdd","src/parser.rs":"6b2f4f27e3331a01a25b87c89368dd2e54396bd425dac57941f9c1ebfd238ac8","src/rank.rs":"ff3d58b0cc5ffa69e2e8c56fc7d9ef41dd399d59a639a253a51551b858cb5bbd","src/unicode.rs":"b2084dcbd4331501b9a895fd7e7575d93ff96eb661c6e6adbc8c66bb72685cde","src/unicode_tables/LICENSE-UNICODE":"74db5baf44a41b1000312c673544b3374e4198af5605c7f9080a402cec42cfa3","src/unicode_tables/age.rs":"71b7cf52acdb4aa98b44145303b8efbfa94913235493521941ef1e0092a0ffe2","src/unicode_tables/case_folding_simple.rs":"7622c7f7f03ac0dc2f2bcd51c81a217d64de0cc912f62f1add5f676603a02456","src/unicode_tables/general_category.rs":"9488e3721f7c2ae20e1b77fcff9a59b4ed8f22954b8645ea6d8592eac1856423","src/unicode_tables/grapheme_cluster_break.rs":"0dd9d66bad598f4ec3451b6699f05c17c52079e37d463baf6385bbe51aa218f1","src/unicode_tables/mod.rs":"26c837099cd934c8062e24bc9a0aaecf15fe1de03f9c6da3f3e1e5ac3ca24bee","src/unicode_tables/perl_decimal.rs":"6a59143db81a0bcaf0e8d0af265e711d1a6472e1f091ee9ee4377da5d5d0cd1f","src/unicode_tables/perl_space.rs":"ec9bb22ed7e99feef292249c7e6f4673ee0af9635d4d158f93923494c14cd5ed","src/unicode_tables/perl_word.rs":"30f073baae28ea34c373c7778c00f20c1621c3e644404eff031f7d1cc8e9c9e2","src/unicode_tables/property_bool.rs":"66cf5bd2a1438bf9694152f077a285cf014fbd50b9dd63a97233b2ea61d64962","src/unicode_tables/property_names.rs":"8c93985d1bcb01735667a3c4cb92f7e260d267326bde9d7f048bc77cd7e07855","src/unicode_tables/property_values.rs":"ef9131ce0a575c7327ec6d466aafd8b7c25600d80c232b5a4110bbf0a5a59136","src/unicode_tables/script.rs":"41bd424f1e3a03290cf4995ced678dcf24c94b38c905c62f6819bf67e098a2ec","src/unicode_tables/script_extension.rs":"a314099ddbf50a07fe350bb0835bf2fe494ed5ad278b30e171e21506eb557906","src/unicode_tables/sentence_break.rs":"be84fbe8c5c67e761b16fe6c27f16664dbb145357835cd6b92bc2a4a4c52ee79","src/unicode_tables/word_break.rs":"c551681ad49ec28c7ae32bab1371945821c736ca8f0de410cb89f28066ec2ecf","src/utf8.rs":"33657f668361b6648d74c92d3d59eab97e3747d785760f47e4d71c13af07bfba","test":"c7de5fbc0010d9b5b758cd49956375a64b88601c068167fd366808950257f108"},"package":"2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"} \ No newline at end of file diff --git a/vendor/regex-syntax/Cargo.toml b/vendor/regex-syntax/Cargo.toml deleted file mode 100644 index 6b93357c..00000000 --- a/vendor/regex-syntax/Cargo.toml +++ /dev/null @@ -1,74 +0,0 @@ -# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO -# -# When uploading crates to the registry Cargo will automatically -# "normalize" Cargo.toml files for maximal compatibility -# with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies. -# -# If you are reading this file be aware that the original Cargo.toml -# will likely look very different (and much more reasonable). -# See Cargo.toml.orig for the original contents. - -[package] -edition = "2021" -rust-version = "1.65" -name = "regex-syntax" -version = "0.8.5" -authors = [ - "The Rust Project Developers", - "Andrew Gallant ", -] -build = false -autobins = false -autoexamples = false -autotests = false -autobenches = false -description = "A regular expression parser." -documentation = "https://docs.rs/regex-syntax" -readme = "README.md" -license = "MIT OR Apache-2.0" -repository = "https://github.com/rust-lang/regex/tree/master/regex-syntax" - -[package.metadata.docs.rs] -all-features = true -rustdoc-args = [ - "--cfg", - "docsrs", -] - -[lib] -name = "regex_syntax" -path = "src/lib.rs" - -[[bench]] -name = "bench" -path = "benches/bench.rs" - -[dependencies.arbitrary] -version = "1.3.0" -features = ["derive"] -optional = true - -[features] -arbitrary = ["dep:arbitrary"] -default = [ - "std", - "unicode", -] -std = [] -unicode = [ - "unicode-age", - "unicode-bool", - "unicode-case", - "unicode-gencat", - "unicode-perl", - "unicode-script", - "unicode-segment", -] -unicode-age = [] -unicode-bool = [] -unicode-case = [] -unicode-gencat = [] -unicode-perl = [] -unicode-script = [] -unicode-segment = [] diff --git a/vendor/regex-syntax/LICENSE-APACHE b/vendor/regex-syntax/LICENSE-APACHE deleted file mode 100644 index 16fe87b0..00000000 --- a/vendor/regex-syntax/LICENSE-APACHE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/vendor/regex-syntax/LICENSE-MIT b/vendor/regex-syntax/LICENSE-MIT deleted file mode 100644 index 39d4bdb5..00000000 --- a/vendor/regex-syntax/LICENSE-MIT +++ /dev/null @@ -1,25 +0,0 @@ -Copyright (c) 2014 The Rust Project Developers - -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/vendor/regex-syntax/README.md b/vendor/regex-syntax/README.md deleted file mode 100644 index 529513b0..00000000 --- a/vendor/regex-syntax/README.md +++ /dev/null @@ -1,96 +0,0 @@ -regex-syntax -============ -This crate provides a robust regular expression parser. - -[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions) -[![Crates.io](https://img.shields.io/crates/v/regex-syntax.svg)](https://crates.io/crates/regex-syntax) - - -### Documentation - -https://docs.rs/regex-syntax - - -### Overview - -There are two primary types exported by this crate: `Ast` and `Hir`. The former -is a faithful abstract syntax of a regular expression, and can convert regular -expressions back to their concrete syntax while mostly preserving its original -form. The latter type is a high level intermediate representation of a regular -expression that is amenable to analysis and compilation into byte codes or -automata. An `Hir` achieves this by drastically simplifying the syntactic -structure of the regular expression. While an `Hir` can be converted back to -its equivalent concrete syntax, the result is unlikely to resemble the original -concrete syntax that produced the `Hir`. - - -### Example - -This example shows how to parse a pattern string into its HIR: - -```rust -use regex_syntax::{hir::Hir, parse}; - -let hir = parse("a|b").unwrap(); -assert_eq!(hir, Hir::alternation(vec![ - Hir::literal("a".as_bytes()), - Hir::literal("b".as_bytes()), -])); -``` - - -### Safety - -This crate has no `unsafe` code and sets `forbid(unsafe_code)`. While it's -possible this crate could use `unsafe` code in the future, the standard -for doing so is extremely high. In general, most code in this crate is not -performance critical, since it tends to be dwarfed by the time it takes to -compile a regular expression into an automaton. Therefore, there is little need -for extreme optimization, and therefore, use of `unsafe`. - -The standard for using `unsafe` in this crate is extremely high because this -crate is intended to be reasonably safe to use with user supplied regular -expressions. Therefore, while there may be bugs in the regex parser itself, -they should _never_ result in memory unsafety unless there is either a bug -in the compiler or the standard library. (Since `regex-syntax` has zero -dependencies.) - - -### Crate features - -By default, this crate bundles a fairly large amount of Unicode data tables -(a source size of ~750KB). Because of their large size, one can disable some -or all of these data tables. If a regular expression attempts to use Unicode -data that is not available, then an error will occur when translating the `Ast` -to the `Hir`. - -The full set of features one can disable are -[in the "Crate features" section of the documentation](https://docs.rs/regex-syntax/*/#crate-features). - - -### Testing - -Simply running `cargo test` will give you very good coverage. However, because -of the large number of features exposed by this crate, a `test` script is -included in this directory which will test several feature combinations. This -is the same script that is run in CI. - - -### Motivation - -The primary purpose of this crate is to provide the parser used by `regex`. -Specifically, this crate is treated as an implementation detail of the `regex`, -and is primarily developed for the needs of `regex`. - -Since this crate is an implementation detail of `regex`, it may experience -breaking change releases at a different cadence from `regex`. This is only -possible because this crate is _not_ a public dependency of `regex`. - -Another consequence of this de-coupling is that there is no direct way to -compile a `regex::Regex` from a `regex_syntax::hir::Hir`. Instead, one must -first convert the `Hir` to a string (via its `std::fmt::Display`) and then -compile that via `Regex::new`. While this does repeat some work, compilation -typically takes much longer than parsing. - -Stated differently, the coupling between `regex` and `regex-syntax` exists only -at the level of the concrete syntax. diff --git a/vendor/regex-syntax/benches/bench.rs b/vendor/regex-syntax/benches/bench.rs deleted file mode 100644 index d4703d4f..00000000 --- a/vendor/regex-syntax/benches/bench.rs +++ /dev/null @@ -1,63 +0,0 @@ -#![feature(test)] - -extern crate test; - -use regex_syntax::Parser; -use test::Bencher; - -#[bench] -fn parse_simple1(b: &mut Bencher) { - b.iter(|| { - let re = r"^bc(d|e)*$"; - Parser::new().parse(re).unwrap() - }); -} - -#[bench] -fn parse_simple2(b: &mut Bencher) { - b.iter(|| { - let re = r"'[a-zA-Z_][a-zA-Z0-9_]*(')\b"; - Parser::new().parse(re).unwrap() - }); -} - -#[bench] -fn parse_small1(b: &mut Bencher) { - b.iter(|| { - let re = r"\p{L}|\p{N}|\s|.|\d"; - Parser::new().parse(re).unwrap() - }); -} - -#[bench] -fn parse_medium1(b: &mut Bencher) { - b.iter(|| { - let re = r"\pL\p{Greek}\p{Hiragana}\p{Alphabetic}\p{Hebrew}\p{Arabic}"; - Parser::new().parse(re).unwrap() - }); -} - -#[bench] -fn parse_medium2(b: &mut Bencher) { - b.iter(|| { - let re = r"\s\S\w\W\d\D"; - Parser::new().parse(re).unwrap() - }); -} - -#[bench] -fn parse_medium3(b: &mut Bencher) { - b.iter(|| { - let re = - r"\p{age:3.2}\p{hira}\p{scx:hira}\p{alphabetic}\p{sc:Greek}\pL"; - Parser::new().parse(re).unwrap() - }); -} - -#[bench] -fn parse_huge(b: &mut Bencher) { - b.iter(|| { - let re = r"\p{L}{100}"; - Parser::new().parse(re).unwrap() - }); -} diff --git a/vendor/regex-syntax/src/ast/mod.rs b/vendor/regex-syntax/src/ast/mod.rs deleted file mode 100644 index ce79a89a..00000000 --- a/vendor/regex-syntax/src/ast/mod.rs +++ /dev/null @@ -1,1809 +0,0 @@ -/*! -Defines an abstract syntax for regular expressions. -*/ - -use core::cmp::Ordering; - -use alloc::{boxed::Box, string::String, vec, vec::Vec}; - -pub use crate::ast::visitor::{visit, Visitor}; - -pub mod parse; -pub mod print; -mod visitor; - -/// An error that occurred while parsing a regular expression into an abstract -/// syntax tree. -/// -/// Note that not all ASTs represents a valid regular expression. For example, -/// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a -/// valid Unicode property name. That particular error is reported when -/// translating an AST to the high-level intermediate representation (`HIR`). -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct Error { - /// The kind of error. - kind: ErrorKind, - /// The original pattern that the parser generated the error from. Every - /// span in an error is a valid range into this string. - pattern: String, - /// The span of this error. - span: Span, -} - -impl Error { - /// Return the type of this error. - pub fn kind(&self) -> &ErrorKind { - &self.kind - } - - /// The original pattern string in which this error occurred. - /// - /// Every span reported by this error is reported in terms of this string. - pub fn pattern(&self) -> &str { - &self.pattern - } - - /// Return the span at which this error occurred. - pub fn span(&self) -> &Span { - &self.span - } - - /// Return an auxiliary span. This span exists only for some errors that - /// benefit from being able to point to two locations in the original - /// regular expression. For example, "duplicate" errors will have the - /// main error position set to the duplicate occurrence while its - /// auxiliary span will be set to the initial occurrence. - pub fn auxiliary_span(&self) -> Option<&Span> { - use self::ErrorKind::*; - match self.kind { - FlagDuplicate { ref original } => Some(original), - FlagRepeatedNegation { ref original, .. } => Some(original), - GroupNameDuplicate { ref original, .. } => Some(original), - _ => None, - } - } -} - -/// The type of an error that occurred while building an AST. -/// -/// This error type is marked as `non_exhaustive`. This means that adding a -/// new variant is not considered a breaking change. -#[non_exhaustive] -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum ErrorKind { - /// The capturing group limit was exceeded. - /// - /// Note that this represents a limit on the total number of capturing - /// groups in a regex and not necessarily the number of nested capturing - /// groups. That is, the nest limit can be low and it is still possible for - /// this error to occur. - CaptureLimitExceeded, - /// An invalid escape sequence was found in a character class set. - ClassEscapeInvalid, - /// An invalid character class range was found. An invalid range is any - /// range where the start is greater than the end. - ClassRangeInvalid, - /// An invalid range boundary was found in a character class. Range - /// boundaries must be a single literal codepoint, but this error indicates - /// that something else was found, such as a nested class. - ClassRangeLiteral, - /// An opening `[` was found with no corresponding closing `]`. - ClassUnclosed, - /// Note that this error variant is no longer used. Namely, a decimal - /// number can only appear as a repetition quantifier. When the number - /// in a repetition quantifier is empty, then it gets its own specialized - /// error, `RepetitionCountDecimalEmpty`. - DecimalEmpty, - /// An invalid decimal number was given where one was expected. - DecimalInvalid, - /// A bracketed hex literal was empty. - EscapeHexEmpty, - /// A bracketed hex literal did not correspond to a Unicode scalar value. - EscapeHexInvalid, - /// An invalid hexadecimal digit was found. - EscapeHexInvalidDigit, - /// EOF was found before an escape sequence was completed. - EscapeUnexpectedEof, - /// An unrecognized escape sequence. - EscapeUnrecognized, - /// A dangling negation was used when setting flags, e.g., `i-`. - FlagDanglingNegation, - /// A flag was used twice, e.g., `i-i`. - FlagDuplicate { - /// The position of the original flag. The error position - /// points to the duplicate flag. - original: Span, - }, - /// The negation operator was used twice, e.g., `-i-s`. - FlagRepeatedNegation { - /// The position of the original negation operator. The error position - /// points to the duplicate negation operator. - original: Span, - }, - /// Expected a flag but got EOF, e.g., `(?`. - FlagUnexpectedEof, - /// Unrecognized flag, e.g., `a`. - FlagUnrecognized, - /// A duplicate capture name was found. - GroupNameDuplicate { - /// The position of the initial occurrence of the capture name. The - /// error position itself points to the duplicate occurrence. - original: Span, - }, - /// A capture group name is empty, e.g., `(?P<>abc)`. - GroupNameEmpty, - /// An invalid character was seen for a capture group name. This includes - /// errors where the first character is a digit (even though subsequent - /// characters are allowed to be digits). - GroupNameInvalid, - /// A closing `>` could not be found for a capture group name. - GroupNameUnexpectedEof, - /// An unclosed group, e.g., `(ab`. - /// - /// The span of this error corresponds to the unclosed parenthesis. - GroupUnclosed, - /// An unopened group, e.g., `ab)`. - GroupUnopened, - /// The nest limit was exceeded. The limit stored here is the limit - /// configured in the parser. - NestLimitExceeded(u32), - /// The range provided in a counted repetition operator is invalid. The - /// range is invalid if the start is greater than the end. - RepetitionCountInvalid, - /// An opening `{` was not followed by a valid decimal value. - /// For example, `x{}` or `x{]}` would fail. - RepetitionCountDecimalEmpty, - /// An opening `{` was found with no corresponding closing `}`. - RepetitionCountUnclosed, - /// A repetition operator was applied to a missing sub-expression. This - /// occurs, for example, in the regex consisting of just a `*` or even - /// `(?i)*`. It is, however, possible to create a repetition operating on - /// an empty sub-expression. For example, `()*` is still considered valid. - RepetitionMissing, - /// The special word boundary syntax, `\b{something}`, was used, but - /// either EOF without `}` was seen, or an invalid character in the - /// braces was seen. - SpecialWordBoundaryUnclosed, - /// The special word boundary syntax, `\b{something}`, was used, but - /// `something` was not recognized as a valid word boundary kind. - SpecialWordBoundaryUnrecognized, - /// The syntax `\b{` was observed, but afterwards the end of the pattern - /// was observed without being able to tell whether it was meant to be a - /// bounded repetition on the `\b` or the beginning of a special word - /// boundary assertion. - SpecialWordOrRepetitionUnexpectedEof, - /// The Unicode class is not valid. This typically occurs when a `\p` is - /// followed by something other than a `{`. - UnicodeClassInvalid, - /// When octal support is disabled, this error is produced when an octal - /// escape is used. The octal escape is assumed to be an invocation of - /// a backreference, which is the common case. - UnsupportedBackreference, - /// When syntax similar to PCRE's look-around is used, this error is - /// returned. Some example syntaxes that are rejected include, but are - /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and - /// `(?) -> core::fmt::Result { - crate::error::Formatter::from(self).fmt(f) - } -} - -impl core::fmt::Display for ErrorKind { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - use self::ErrorKind::*; - match *self { - CaptureLimitExceeded => write!( - f, - "exceeded the maximum number of \ - capturing groups ({})", - u32::MAX - ), - ClassEscapeInvalid => { - write!(f, "invalid escape sequence found in character class") - } - ClassRangeInvalid => write!( - f, - "invalid character class range, \ - the start must be <= the end" - ), - ClassRangeLiteral => { - write!(f, "invalid range boundary, must be a literal") - } - ClassUnclosed => write!(f, "unclosed character class"), - DecimalEmpty => write!(f, "decimal literal empty"), - DecimalInvalid => write!(f, "decimal literal invalid"), - EscapeHexEmpty => write!(f, "hexadecimal literal empty"), - EscapeHexInvalid => { - write!(f, "hexadecimal literal is not a Unicode scalar value") - } - EscapeHexInvalidDigit => write!(f, "invalid hexadecimal digit"), - EscapeUnexpectedEof => write!( - f, - "incomplete escape sequence, \ - reached end of pattern prematurely" - ), - EscapeUnrecognized => write!(f, "unrecognized escape sequence"), - FlagDanglingNegation => { - write!(f, "dangling flag negation operator") - } - FlagDuplicate { .. } => write!(f, "duplicate flag"), - FlagRepeatedNegation { .. } => { - write!(f, "flag negation operator repeated") - } - FlagUnexpectedEof => { - write!(f, "expected flag but got end of regex") - } - FlagUnrecognized => write!(f, "unrecognized flag"), - GroupNameDuplicate { .. } => { - write!(f, "duplicate capture group name") - } - GroupNameEmpty => write!(f, "empty capture group name"), - GroupNameInvalid => write!(f, "invalid capture group character"), - GroupNameUnexpectedEof => write!(f, "unclosed capture group name"), - GroupUnclosed => write!(f, "unclosed group"), - GroupUnopened => write!(f, "unopened group"), - NestLimitExceeded(limit) => write!( - f, - "exceed the maximum number of \ - nested parentheses/brackets ({})", - limit - ), - RepetitionCountInvalid => write!( - f, - "invalid repetition count range, \ - the start must be <= the end" - ), - RepetitionCountDecimalEmpty => { - write!(f, "repetition quantifier expects a valid decimal") - } - RepetitionCountUnclosed => { - write!(f, "unclosed counted repetition") - } - RepetitionMissing => { - write!(f, "repetition operator missing expression") - } - SpecialWordBoundaryUnclosed => { - write!( - f, - "special word boundary assertion is either \ - unclosed or contains an invalid character", - ) - } - SpecialWordBoundaryUnrecognized => { - write!( - f, - "unrecognized special word boundary assertion, \ - valid choices are: start, end, start-half \ - or end-half", - ) - } - SpecialWordOrRepetitionUnexpectedEof => { - write!( - f, - "found either the beginning of a special word \ - boundary or a bounded repetition on a \\b with \ - an opening brace, but no closing brace", - ) - } - UnicodeClassInvalid => { - write!(f, "invalid Unicode character class") - } - UnsupportedBackreference => { - write!(f, "backreferences are not supported") - } - UnsupportedLookAround => write!( - f, - "look-around, including look-ahead and look-behind, \ - is not supported" - ), - } - } -} - -/// Span represents the position information of a single AST item. -/// -/// All span positions are absolute byte offsets that can be used on the -/// original regular expression that was parsed. -#[derive(Clone, Copy, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct Span { - /// The start byte offset. - pub start: Position, - /// The end byte offset. - pub end: Position, -} - -impl core::fmt::Debug for Span { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "Span({:?}, {:?})", self.start, self.end) - } -} - -impl Ord for Span { - fn cmp(&self, other: &Span) -> Ordering { - (&self.start, &self.end).cmp(&(&other.start, &other.end)) - } -} - -impl PartialOrd for Span { - fn partial_cmp(&self, other: &Span) -> Option { - Some(self.cmp(other)) - } -} - -/// A single position in a regular expression. -/// -/// A position encodes one half of a span, and include the byte offset, line -/// number and column number. -#[derive(Clone, Copy, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct Position { - /// The absolute offset of this position, starting at `0` from the - /// beginning of the regular expression pattern string. - pub offset: usize, - /// The line number, starting at `1`. - pub line: usize, - /// The approximate column number, starting at `1`. - pub column: usize, -} - -impl core::fmt::Debug for Position { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!( - f, - "Position(o: {:?}, l: {:?}, c: {:?})", - self.offset, self.line, self.column - ) - } -} - -impl Ord for Position { - fn cmp(&self, other: &Position) -> Ordering { - self.offset.cmp(&other.offset) - } -} - -impl PartialOrd for Position { - fn partial_cmp(&self, other: &Position) -> Option { - Some(self.cmp(other)) - } -} - -impl Span { - /// Create a new span with the given positions. - pub fn new(start: Position, end: Position) -> Span { - Span { start, end } - } - - /// Create a new span using the given position as the start and end. - pub fn splat(pos: Position) -> Span { - Span::new(pos, pos) - } - - /// Create a new span by replacing the starting the position with the one - /// given. - pub fn with_start(self, pos: Position) -> Span { - Span { start: pos, ..self } - } - - /// Create a new span by replacing the ending the position with the one - /// given. - pub fn with_end(self, pos: Position) -> Span { - Span { end: pos, ..self } - } - - /// Returns true if and only if this span occurs on a single line. - pub fn is_one_line(&self) -> bool { - self.start.line == self.end.line - } - - /// Returns true if and only if this span is empty. That is, it points to - /// a single position in the concrete syntax of a regular expression. - pub fn is_empty(&self) -> bool { - self.start.offset == self.end.offset - } -} - -impl Position { - /// Create a new position with the given information. - /// - /// `offset` is the absolute offset of the position, starting at `0` from - /// the beginning of the regular expression pattern string. - /// - /// `line` is the line number, starting at `1`. - /// - /// `column` is the approximate column number, starting at `1`. - pub fn new(offset: usize, line: usize, column: usize) -> Position { - Position { offset, line, column } - } -} - -/// An abstract syntax tree for a singular expression along with comments -/// found. -/// -/// Comments are not stored in the tree itself to avoid complexity. Each -/// comment contains a span of precisely where it occurred in the original -/// regular expression. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct WithComments { - /// The actual ast. - pub ast: Ast, - /// All comments found in the original regular expression. - pub comments: Vec, -} - -/// A comment from a regular expression with an associated span. -/// -/// A regular expression can only contain comments when the `x` flag is -/// enabled. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct Comment { - /// The span of this comment, including the beginning `#` and ending `\n`. - pub span: Span, - /// The comment text, starting with the first character following the `#` - /// and ending with the last character preceding the `\n`. - pub comment: String, -} - -/// An abstract syntax tree for a single regular expression. -/// -/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap -/// space proportional to the size of the `Ast`. -/// -/// This type defines its own destructor that uses constant stack space and -/// heap space proportional to the size of the `Ast`. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum Ast { - /// An empty regex that matches everything. - Empty(Box), - /// A set of flags, e.g., `(?is)`. - Flags(Box), - /// A single character literal, which includes escape sequences. - Literal(Box), - /// The "any character" class. - Dot(Box), - /// A single zero-width assertion. - Assertion(Box), - /// A single Unicode character class, e.g., `\pL` or `\p{Greek}`. - ClassUnicode(Box), - /// A single perl character class, e.g., `\d` or `\W`. - ClassPerl(Box), - /// A single bracketed character class set, which may contain zero or more - /// character ranges and/or zero or more nested classes. e.g., - /// `[a-zA-Z\pL]`. - ClassBracketed(Box), - /// A repetition operator applied to an arbitrary regular expression. - Repetition(Box), - /// A grouped regular expression. - Group(Box), - /// An alternation of regular expressions. - Alternation(Box), - /// A concatenation of regular expressions. - Concat(Box), -} - -impl Ast { - /// Create an "empty" AST item. - pub fn empty(span: Span) -> Ast { - Ast::Empty(Box::new(span)) - } - - /// Create a "flags" AST item. - pub fn flags(e: SetFlags) -> Ast { - Ast::Flags(Box::new(e)) - } - - /// Create a "literal" AST item. - pub fn literal(e: Literal) -> Ast { - Ast::Literal(Box::new(e)) - } - - /// Create a "dot" AST item. - pub fn dot(span: Span) -> Ast { - Ast::Dot(Box::new(span)) - } - - /// Create a "assertion" AST item. - pub fn assertion(e: Assertion) -> Ast { - Ast::Assertion(Box::new(e)) - } - - /// Create a "Unicode class" AST item. - pub fn class_unicode(e: ClassUnicode) -> Ast { - Ast::ClassUnicode(Box::new(e)) - } - - /// Create a "Perl class" AST item. - pub fn class_perl(e: ClassPerl) -> Ast { - Ast::ClassPerl(Box::new(e)) - } - - /// Create a "bracketed class" AST item. - pub fn class_bracketed(e: ClassBracketed) -> Ast { - Ast::ClassBracketed(Box::new(e)) - } - - /// Create a "repetition" AST item. - pub fn repetition(e: Repetition) -> Ast { - Ast::Repetition(Box::new(e)) - } - - /// Create a "group" AST item. - pub fn group(e: Group) -> Ast { - Ast::Group(Box::new(e)) - } - - /// Create a "alternation" AST item. - pub fn alternation(e: Alternation) -> Ast { - Ast::Alternation(Box::new(e)) - } - - /// Create a "concat" AST item. - pub fn concat(e: Concat) -> Ast { - Ast::Concat(Box::new(e)) - } - - /// Return the span of this abstract syntax tree. - pub fn span(&self) -> &Span { - match *self { - Ast::Empty(ref span) => span, - Ast::Flags(ref x) => &x.span, - Ast::Literal(ref x) => &x.span, - Ast::Dot(ref span) => span, - Ast::Assertion(ref x) => &x.span, - Ast::ClassUnicode(ref x) => &x.span, - Ast::ClassPerl(ref x) => &x.span, - Ast::ClassBracketed(ref x) => &x.span, - Ast::Repetition(ref x) => &x.span, - Ast::Group(ref x) => &x.span, - Ast::Alternation(ref x) => &x.span, - Ast::Concat(ref x) => &x.span, - } - } - - /// Return true if and only if this Ast is empty. - pub fn is_empty(&self) -> bool { - match *self { - Ast::Empty(_) => true, - _ => false, - } - } - - /// Returns true if and only if this AST has any (including possibly empty) - /// subexpressions. - fn has_subexprs(&self) -> bool { - match *self { - Ast::Empty(_) - | Ast::Flags(_) - | Ast::Literal(_) - | Ast::Dot(_) - | Ast::Assertion(_) - | Ast::ClassUnicode(_) - | Ast::ClassPerl(_) => false, - Ast::ClassBracketed(_) - | Ast::Repetition(_) - | Ast::Group(_) - | Ast::Alternation(_) - | Ast::Concat(_) => true, - } - } -} - -/// Print a display representation of this Ast. -/// -/// This does not preserve any of the original whitespace formatting that may -/// have originally been present in the concrete syntax from which this Ast -/// was generated. -/// -/// This implementation uses constant stack space and heap space proportional -/// to the size of the `Ast`. -impl core::fmt::Display for Ast { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - use crate::ast::print::Printer; - Printer::new().print(self, f) - } -} - -/// An alternation of regular expressions. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct Alternation { - /// The span of this alternation. - pub span: Span, - /// The alternate regular expressions. - pub asts: Vec, -} - -impl Alternation { - /// Return this alternation as an AST. - /// - /// If this alternation contains zero ASTs, then `Ast::empty` is returned. - /// If this alternation contains exactly 1 AST, then the corresponding AST - /// is returned. Otherwise, `Ast::alternation` is returned. - pub fn into_ast(mut self) -> Ast { - match self.asts.len() { - 0 => Ast::empty(self.span), - 1 => self.asts.pop().unwrap(), - _ => Ast::alternation(self), - } - } -} - -/// A concatenation of regular expressions. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct Concat { - /// The span of this concatenation. - pub span: Span, - /// The concatenation regular expressions. - pub asts: Vec, -} - -impl Concat { - /// Return this concatenation as an AST. - /// - /// If this alternation contains zero ASTs, then `Ast::empty` is returned. - /// If this alternation contains exactly 1 AST, then the corresponding AST - /// is returned. Otherwise, `Ast::concat` is returned. - pub fn into_ast(mut self) -> Ast { - match self.asts.len() { - 0 => Ast::empty(self.span), - 1 => self.asts.pop().unwrap(), - _ => Ast::concat(self), - } - } -} - -/// A single literal expression. -/// -/// A literal corresponds to a single Unicode scalar value. Literals may be -/// represented in their literal form, e.g., `a` or in their escaped form, -/// e.g., `\x61`. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct Literal { - /// The span of this literal. - pub span: Span, - /// The kind of this literal. - pub kind: LiteralKind, - /// The Unicode scalar value corresponding to this literal. - pub c: char, -} - -impl Literal { - /// If this literal was written as a `\x` hex escape, then this returns - /// the corresponding byte value. Otherwise, this returns `None`. - pub fn byte(&self) -> Option { - match self.kind { - LiteralKind::HexFixed(HexLiteralKind::X) => { - u8::try_from(self.c).ok() - } - _ => None, - } - } -} - -/// The kind of a single literal expression. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum LiteralKind { - /// The literal is written verbatim, e.g., `a` or `☃`. - Verbatim, - /// The literal is written as an escape because it is otherwise a special - /// regex meta character, e.g., `\*` or `\[`. - Meta, - /// The literal is written as an escape despite the fact that the escape is - /// unnecessary, e.g., `\%` or `\/`. - Superfluous, - /// The literal is written as an octal escape, e.g., `\141`. - Octal, - /// The literal is written as a hex code with a fixed number of digits - /// depending on the type of the escape, e.g., `\x61` or `\u0061` or - /// `\U00000061`. - HexFixed(HexLiteralKind), - /// The literal is written as a hex code with a bracketed number of - /// digits. The only restriction is that the bracketed hex code must refer - /// to a valid Unicode scalar value. - HexBrace(HexLiteralKind), - /// The literal is written as a specially recognized escape, e.g., `\f` - /// or `\n`. - Special(SpecialLiteralKind), -} - -/// The type of a special literal. -/// -/// A special literal is a special escape sequence recognized by the regex -/// parser, e.g., `\f` or `\n`. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum SpecialLiteralKind { - /// Bell, spelled `\a` (`\x07`). - Bell, - /// Form feed, spelled `\f` (`\x0C`). - FormFeed, - /// Tab, spelled `\t` (`\x09`). - Tab, - /// Line feed, spelled `\n` (`\x0A`). - LineFeed, - /// Carriage return, spelled `\r` (`\x0D`). - CarriageReturn, - /// Vertical tab, spelled `\v` (`\x0B`). - VerticalTab, - /// Space, spelled `\ ` (`\x20`). Note that this can only appear when - /// parsing in verbose mode. - Space, -} - -/// The type of a Unicode hex literal. -/// -/// Note that all variants behave the same when used with brackets. They only -/// differ when used without brackets in the number of hex digits that must -/// follow. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum HexLiteralKind { - /// A `\x` prefix. When used without brackets, this form is limited to - /// two digits. - X, - /// A `\u` prefix. When used without brackets, this form is limited to - /// four digits. - UnicodeShort, - /// A `\U` prefix. When used without brackets, this form is limited to - /// eight digits. - UnicodeLong, -} - -impl HexLiteralKind { - /// The number of digits that must be used with this literal form when - /// used without brackets. When used with brackets, there is no - /// restriction on the number of digits. - pub fn digits(&self) -> u32 { - match *self { - HexLiteralKind::X => 2, - HexLiteralKind::UnicodeShort => 4, - HexLiteralKind::UnicodeLong => 8, - } - } -} - -/// A Perl character class. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct ClassPerl { - /// The span of this class. - pub span: Span, - /// The kind of Perl class. - pub kind: ClassPerlKind, - /// Whether the class is negated or not. e.g., `\d` is not negated but - /// `\D` is. - pub negated: bool, -} - -/// The available Perl character classes. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum ClassPerlKind { - /// Decimal numbers. - Digit, - /// Whitespace. - Space, - /// Word characters. - Word, -} - -/// An ASCII character class. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct ClassAscii { - /// The span of this class. - pub span: Span, - /// The kind of ASCII class. - pub kind: ClassAsciiKind, - /// Whether the class is negated or not. e.g., `[[:alpha:]]` is not negated - /// but `[[:^alpha:]]` is. - pub negated: bool, -} - -/// The available ASCII character classes. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum ClassAsciiKind { - /// `[0-9A-Za-z]` - Alnum, - /// `[A-Za-z]` - Alpha, - /// `[\x00-\x7F]` - Ascii, - /// `[ \t]` - Blank, - /// `[\x00-\x1F\x7F]` - Cntrl, - /// `[0-9]` - Digit, - /// `[!-~]` - Graph, - /// `[a-z]` - Lower, - /// `[ -~]` - Print, - /// `[!-/:-@\[-`{-~]` - Punct, - /// `[\t\n\v\f\r ]` - Space, - /// `[A-Z]` - Upper, - /// `[0-9A-Za-z_]` - Word, - /// `[0-9A-Fa-f]` - Xdigit, -} - -impl ClassAsciiKind { - /// Return the corresponding ClassAsciiKind variant for the given name. - /// - /// The name given should correspond to the lowercase version of the - /// variant name. e.g., `cntrl` is the name for `ClassAsciiKind::Cntrl`. - /// - /// If no variant with the corresponding name exists, then `None` is - /// returned. - pub fn from_name(name: &str) -> Option { - use self::ClassAsciiKind::*; - match name { - "alnum" => Some(Alnum), - "alpha" => Some(Alpha), - "ascii" => Some(Ascii), - "blank" => Some(Blank), - "cntrl" => Some(Cntrl), - "digit" => Some(Digit), - "graph" => Some(Graph), - "lower" => Some(Lower), - "print" => Some(Print), - "punct" => Some(Punct), - "space" => Some(Space), - "upper" => Some(Upper), - "word" => Some(Word), - "xdigit" => Some(Xdigit), - _ => None, - } - } -} - -/// A Unicode character class. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct ClassUnicode { - /// The span of this class. - pub span: Span, - /// Whether this class is negated or not. - /// - /// Note: be careful when using this attribute. This specifically refers - /// to whether the class is written as `\p` or `\P`, where the latter - /// is `negated = true`. However, it also possible to write something like - /// `\P{scx!=Katakana}` which is actually equivalent to - /// `\p{scx=Katakana}` and is therefore not actually negated even though - /// `negated = true` here. To test whether this class is truly negated - /// or not, use the `is_negated` method. - pub negated: bool, - /// The kind of Unicode class. - pub kind: ClassUnicodeKind, -} - -impl ClassUnicode { - /// Returns true if this class has been negated. - /// - /// Note that this takes the Unicode op into account, if it's present. - /// e.g., `is_negated` for `\P{scx!=Katakana}` will return `false`. - pub fn is_negated(&self) -> bool { - match self.kind { - ClassUnicodeKind::NamedValue { - op: ClassUnicodeOpKind::NotEqual, - .. - } => !self.negated, - _ => self.negated, - } - } -} - -/// The available forms of Unicode character classes. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum ClassUnicodeKind { - /// A one letter abbreviated class, e.g., `\pN`. - OneLetter(char), - /// A binary property, general category or script. The string may be - /// empty. - Named(String), - /// A property name and an associated value. - NamedValue { - /// The type of Unicode op used to associate `name` with `value`. - op: ClassUnicodeOpKind, - /// The property name (which may be empty). - name: String, - /// The property value (which may be empty). - value: String, - }, -} - -#[cfg(feature = "arbitrary")] -impl arbitrary::Arbitrary<'_> for ClassUnicodeKind { - fn arbitrary( - u: &mut arbitrary::Unstructured, - ) -> arbitrary::Result { - #[cfg(any( - feature = "unicode-age", - feature = "unicode-bool", - feature = "unicode-gencat", - feature = "unicode-perl", - feature = "unicode-script", - feature = "unicode-segment", - ))] - { - use alloc::string::ToString; - - use super::unicode_tables::{ - property_names::PROPERTY_NAMES, - property_values::PROPERTY_VALUES, - }; - - match u.choose_index(3)? { - 0 => { - let all = PROPERTY_VALUES - .iter() - .flat_map(|e| e.1.iter()) - .filter(|(name, _)| name.len() == 1) - .count(); - let idx = u.choose_index(all)?; - let value = PROPERTY_VALUES - .iter() - .flat_map(|e| e.1.iter()) - .take(idx + 1) - .last() - .unwrap() - .0 - .chars() - .next() - .unwrap(); - Ok(ClassUnicodeKind::OneLetter(value)) - } - 1 => { - let all = PROPERTY_VALUES - .iter() - .map(|e| e.1.len()) - .sum::() - + PROPERTY_NAMES.len(); - let idx = u.choose_index(all)?; - let name = PROPERTY_VALUES - .iter() - .flat_map(|e| e.1.iter()) - .chain(PROPERTY_NAMES) - .map(|(_, e)| e) - .take(idx + 1) - .last() - .unwrap(); - Ok(ClassUnicodeKind::Named(name.to_string())) - } - 2 => { - let all = PROPERTY_VALUES - .iter() - .map(|e| e.1.len()) - .sum::(); - let idx = u.choose_index(all)?; - let (prop, value) = PROPERTY_VALUES - .iter() - .flat_map(|e| { - e.1.iter().map(|(_, value)| (e.0, value)) - }) - .take(idx + 1) - .last() - .unwrap(); - Ok(ClassUnicodeKind::NamedValue { - op: u.arbitrary()?, - name: prop.to_string(), - value: value.to_string(), - }) - } - _ => unreachable!("index chosen is impossible"), - } - } - #[cfg(not(any( - feature = "unicode-age", - feature = "unicode-bool", - feature = "unicode-gencat", - feature = "unicode-perl", - feature = "unicode-script", - feature = "unicode-segment", - )))] - { - match u.choose_index(3)? { - 0 => Ok(ClassUnicodeKind::OneLetter(u.arbitrary()?)), - 1 => Ok(ClassUnicodeKind::Named(u.arbitrary()?)), - 2 => Ok(ClassUnicodeKind::NamedValue { - op: u.arbitrary()?, - name: u.arbitrary()?, - value: u.arbitrary()?, - }), - _ => unreachable!("index chosen is impossible"), - } - } - } - - fn size_hint(depth: usize) -> (usize, Option) { - #[cfg(any( - feature = "unicode-age", - feature = "unicode-bool", - feature = "unicode-gencat", - feature = "unicode-perl", - feature = "unicode-script", - feature = "unicode-segment", - ))] - { - arbitrary::size_hint::and_all(&[ - usize::size_hint(depth), - usize::size_hint(depth), - arbitrary::size_hint::or( - (0, Some(0)), - ClassUnicodeOpKind::size_hint(depth), - ), - ]) - } - #[cfg(not(any( - feature = "unicode-age", - feature = "unicode-bool", - feature = "unicode-gencat", - feature = "unicode-perl", - feature = "unicode-script", - feature = "unicode-segment", - )))] - { - arbitrary::size_hint::and( - usize::size_hint(depth), - arbitrary::size_hint::or_all(&[ - char::size_hint(depth), - String::size_hint(depth), - arbitrary::size_hint::and_all(&[ - String::size_hint(depth), - String::size_hint(depth), - ClassUnicodeOpKind::size_hint(depth), - ]), - ]), - ) - } - } -} - -/// The type of op used in a Unicode character class. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum ClassUnicodeOpKind { - /// A property set to a specific value, e.g., `\p{scx=Katakana}`. - Equal, - /// A property set to a specific value using a colon, e.g., - /// `\p{scx:Katakana}`. - Colon, - /// A property that isn't a particular value, e.g., `\p{scx!=Katakana}`. - NotEqual, -} - -impl ClassUnicodeOpKind { - /// Whether the op is an equality op or not. - pub fn is_equal(&self) -> bool { - match *self { - ClassUnicodeOpKind::Equal | ClassUnicodeOpKind::Colon => true, - _ => false, - } - } -} - -/// A bracketed character class, e.g., `[a-z0-9]`. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct ClassBracketed { - /// The span of this class. - pub span: Span, - /// Whether this class is negated or not. e.g., `[a]` is not negated but - /// `[^a]` is. - pub negated: bool, - /// The type of this set. A set is either a normal union of things, e.g., - /// `[abc]` or a result of applying set operations, e.g., `[\pL--c]`. - pub kind: ClassSet, -} - -/// A character class set. -/// -/// This type corresponds to the internal structure of a bracketed character -/// class. That is, every bracketed character is one of two types: a union of -/// items (literals, ranges, other bracketed classes) or a tree of binary set -/// operations. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum ClassSet { - /// An item, which can be a single literal, range, nested character class - /// or a union of items. - Item(ClassSetItem), - /// A single binary operation (i.e., &&, -- or ~~). - BinaryOp(ClassSetBinaryOp), -} - -impl ClassSet { - /// Build a set from a union. - pub fn union(ast: ClassSetUnion) -> ClassSet { - ClassSet::Item(ClassSetItem::Union(ast)) - } - - /// Return the span of this character class set. - pub fn span(&self) -> &Span { - match *self { - ClassSet::Item(ref x) => x.span(), - ClassSet::BinaryOp(ref x) => &x.span, - } - } - - /// Return true if and only if this class set is empty. - fn is_empty(&self) -> bool { - match *self { - ClassSet::Item(ClassSetItem::Empty(_)) => true, - _ => false, - } - } -} - -/// A single component of a character class set. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum ClassSetItem { - /// An empty item. - /// - /// Note that a bracketed character class cannot contain a single empty - /// item. Empty items can appear when using one of the binary operators. - /// For example, `[&&]` is the intersection of two empty classes. - Empty(Span), - /// A single literal. - Literal(Literal), - /// A range between two literals. - Range(ClassSetRange), - /// An ASCII character class, e.g., `[:alnum:]` or `[:punct:]`. - Ascii(ClassAscii), - /// A Unicode character class, e.g., `\pL` or `\p{Greek}`. - Unicode(ClassUnicode), - /// A perl character class, e.g., `\d` or `\W`. - Perl(ClassPerl), - /// A bracketed character class set, which may contain zero or more - /// character ranges and/or zero or more nested classes. e.g., - /// `[a-zA-Z\pL]`. - Bracketed(Box), - /// A union of items. - Union(ClassSetUnion), -} - -impl ClassSetItem { - /// Return the span of this character class set item. - pub fn span(&self) -> &Span { - match *self { - ClassSetItem::Empty(ref span) => span, - ClassSetItem::Literal(ref x) => &x.span, - ClassSetItem::Range(ref x) => &x.span, - ClassSetItem::Ascii(ref x) => &x.span, - ClassSetItem::Perl(ref x) => &x.span, - ClassSetItem::Unicode(ref x) => &x.span, - ClassSetItem::Bracketed(ref x) => &x.span, - ClassSetItem::Union(ref x) => &x.span, - } - } -} - -/// A single character class range in a set. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct ClassSetRange { - /// The span of this range. - pub span: Span, - /// The start of this range. - pub start: Literal, - /// The end of this range. - pub end: Literal, -} - -impl ClassSetRange { - /// Returns true if and only if this character class range is valid. - /// - /// The only case where a range is invalid is if its start is greater than - /// its end. - pub fn is_valid(&self) -> bool { - self.start.c <= self.end.c - } -} - -/// A union of items inside a character class set. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct ClassSetUnion { - /// The span of the items in this operation. e.g., the `a-z0-9` in - /// `[^a-z0-9]` - pub span: Span, - /// The sequence of items that make up this union. - pub items: Vec, -} - -impl ClassSetUnion { - /// Push a new item in this union. - /// - /// The ending position of this union's span is updated to the ending - /// position of the span of the item given. If the union is empty, then - /// the starting position of this union is set to the starting position - /// of this item. - /// - /// In other words, if you only use this method to add items to a union - /// and you set the spans on each item correctly, then you should never - /// need to adjust the span of the union directly. - pub fn push(&mut self, item: ClassSetItem) { - if self.items.is_empty() { - self.span.start = item.span().start; - } - self.span.end = item.span().end; - self.items.push(item); - } - - /// Return this union as a character class set item. - /// - /// If this union contains zero items, then an empty union is - /// returned. If this concatenation contains exactly 1 item, then the - /// corresponding item is returned. Otherwise, ClassSetItem::Union is - /// returned. - pub fn into_item(mut self) -> ClassSetItem { - match self.items.len() { - 0 => ClassSetItem::Empty(self.span), - 1 => self.items.pop().unwrap(), - _ => ClassSetItem::Union(self), - } - } -} - -/// A Unicode character class set operation. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct ClassSetBinaryOp { - /// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`. - pub span: Span, - /// The type of this set operation. - pub kind: ClassSetBinaryOpKind, - /// The left hand side of the operation. - pub lhs: Box, - /// The right hand side of the operation. - pub rhs: Box, -} - -/// The type of a Unicode character class set operation. -/// -/// Note that this doesn't explicitly represent union since there is no -/// explicit union operator. Concatenation inside a character class corresponds -/// to the union operation. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum ClassSetBinaryOpKind { - /// The intersection of two sets, e.g., `\pN&&[a-z]`. - Intersection, - /// The difference of two sets, e.g., `\pN--[0-9]`. - Difference, - /// The symmetric difference of two sets. The symmetric difference is the - /// set of elements belonging to one but not both sets. - /// e.g., `[\pL~~[:ascii:]]`. - SymmetricDifference, -} - -/// A single zero-width assertion. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct Assertion { - /// The span of this assertion. - pub span: Span, - /// The assertion kind, e.g., `\b` or `^`. - pub kind: AssertionKind, -} - -/// An assertion kind. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum AssertionKind { - /// `^` - StartLine, - /// `$` - EndLine, - /// `\A` - StartText, - /// `\z` - EndText, - /// `\b` - WordBoundary, - /// `\B` - NotWordBoundary, - /// `\b{start}` - WordBoundaryStart, - /// `\b{end}` - WordBoundaryEnd, - /// `\<` (alias for `\b{start}`) - WordBoundaryStartAngle, - /// `\>` (alias for `\b{end}`) - WordBoundaryEndAngle, - /// `\b{start-half}` - WordBoundaryStartHalf, - /// `\b{end-half}` - WordBoundaryEndHalf, -} - -/// A repetition operation applied to a regular expression. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct Repetition { - /// The span of this operation. - pub span: Span, - /// The actual operation. - pub op: RepetitionOp, - /// Whether this operation was applied greedily or not. - pub greedy: bool, - /// The regular expression under repetition. - pub ast: Box, -} - -/// The repetition operator itself. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct RepetitionOp { - /// The span of this operator. This includes things like `+`, `*?` and - /// `{m,n}`. - pub span: Span, - /// The type of operation. - pub kind: RepetitionKind, -} - -/// The kind of a repetition operator. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum RepetitionKind { - /// `?` - ZeroOrOne, - /// `*` - ZeroOrMore, - /// `+` - OneOrMore, - /// `{m,n}` - Range(RepetitionRange), -} - -/// A range repetition operator. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum RepetitionRange { - /// `{m}` - Exactly(u32), - /// `{m,}` - AtLeast(u32), - /// `{m,n}` - Bounded(u32, u32), -} - -impl RepetitionRange { - /// Returns true if and only if this repetition range is valid. - /// - /// The only case where a repetition range is invalid is if it is bounded - /// and its start is greater than its end. - pub fn is_valid(&self) -> bool { - match *self { - RepetitionRange::Bounded(s, e) if s > e => false, - _ => true, - } - } -} - -/// A grouped regular expression. -/// -/// This includes both capturing and non-capturing groups. This does **not** -/// include flag-only groups like `(?is)`, but does contain any group that -/// contains a sub-expression, e.g., `(a)`, `(?Pa)`, `(?:a)` and -/// `(?is:a)`. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct Group { - /// The span of this group. - pub span: Span, - /// The kind of this group. - pub kind: GroupKind, - /// The regular expression in this group. - pub ast: Box, -} - -impl Group { - /// If this group is non-capturing, then this returns the (possibly empty) - /// set of flags. Otherwise, `None` is returned. - pub fn flags(&self) -> Option<&Flags> { - match self.kind { - GroupKind::NonCapturing(ref flags) => Some(flags), - _ => None, - } - } - - /// Returns true if and only if this group is capturing. - pub fn is_capturing(&self) -> bool { - match self.kind { - GroupKind::CaptureIndex(_) | GroupKind::CaptureName { .. } => true, - GroupKind::NonCapturing(_) => false, - } - } - - /// Returns the capture index of this group, if this is a capturing group. - /// - /// This returns a capture index precisely when `is_capturing` is `true`. - pub fn capture_index(&self) -> Option { - match self.kind { - GroupKind::CaptureIndex(i) => Some(i), - GroupKind::CaptureName { ref name, .. } => Some(name.index), - GroupKind::NonCapturing(_) => None, - } - } -} - -/// The kind of a group. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum GroupKind { - /// `(a)` - CaptureIndex(u32), - /// `(?a)` or `(?Pa)` - CaptureName { - /// True if the `?P<` syntax is used and false if the `?<` syntax is used. - starts_with_p: bool, - /// The capture name. - name: CaptureName, - }, - /// `(?:a)` and `(?i:a)` - NonCapturing(Flags), -} - -/// A capture name. -/// -/// This corresponds to the name itself between the angle brackets in, e.g., -/// `(?Pexpr)`. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct CaptureName { - /// The span of this capture name. - pub span: Span, - /// The capture name. - pub name: String, - /// The capture index. - pub index: u32, -} - -#[cfg(feature = "arbitrary")] -impl arbitrary::Arbitrary<'_> for CaptureName { - fn arbitrary( - u: &mut arbitrary::Unstructured, - ) -> arbitrary::Result { - let len = u.arbitrary_len::()?; - if len == 0 { - return Err(arbitrary::Error::NotEnoughData); - } - let mut name: String = String::new(); - for _ in 0..len { - let ch: char = u.arbitrary()?; - let cp = u32::from(ch); - let ascii_letter_offset = u8::try_from(cp % 26).unwrap(); - let ascii_letter = b'a' + ascii_letter_offset; - name.push(char::from(ascii_letter)); - } - Ok(CaptureName { span: u.arbitrary()?, name, index: u.arbitrary()? }) - } - - fn size_hint(depth: usize) -> (usize, Option) { - arbitrary::size_hint::and_all(&[ - Span::size_hint(depth), - usize::size_hint(depth), - u32::size_hint(depth), - ]) - } -} - -/// A group of flags that is not applied to a particular regular expression. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct SetFlags { - /// The span of these flags, including the grouping parentheses. - pub span: Span, - /// The actual sequence of flags. - pub flags: Flags, -} - -/// A group of flags. -/// -/// This corresponds only to the sequence of flags themselves, e.g., `is-u`. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct Flags { - /// The span of this group of flags. - pub span: Span, - /// A sequence of flag items. Each item is either a flag or a negation - /// operator. - pub items: Vec, -} - -impl Flags { - /// Add the given item to this sequence of flags. - /// - /// If the item was added successfully, then `None` is returned. If the - /// given item is a duplicate, then `Some(i)` is returned, where - /// `items[i].kind == item.kind`. - pub fn add_item(&mut self, item: FlagsItem) -> Option { - for (i, x) in self.items.iter().enumerate() { - if x.kind == item.kind { - return Some(i); - } - } - self.items.push(item); - None - } - - /// Returns the state of the given flag in this set. - /// - /// If the given flag is in the set but is negated, then `Some(false)` is - /// returned. - /// - /// If the given flag is in the set and is not negated, then `Some(true)` - /// is returned. - /// - /// Otherwise, `None` is returned. - pub fn flag_state(&self, flag: Flag) -> Option { - let mut negated = false; - for x in &self.items { - match x.kind { - FlagsItemKind::Negation => { - negated = true; - } - FlagsItemKind::Flag(ref xflag) if xflag == &flag => { - return Some(!negated); - } - _ => {} - } - } - None - } -} - -/// A single item in a group of flags. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub struct FlagsItem { - /// The span of this item. - pub span: Span, - /// The kind of this item. - pub kind: FlagsItemKind, -} - -/// The kind of an item in a group of flags. -#[derive(Clone, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum FlagsItemKind { - /// A negation operator applied to all subsequent flags in the enclosing - /// group. - Negation, - /// A single flag in a group. - Flag(Flag), -} - -impl FlagsItemKind { - /// Returns true if and only if this item is a negation operator. - pub fn is_negation(&self) -> bool { - match *self { - FlagsItemKind::Negation => true, - _ => false, - } - } -} - -/// A single flag. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] -pub enum Flag { - /// `i` - CaseInsensitive, - /// `m` - MultiLine, - /// `s` - DotMatchesNewLine, - /// `U` - SwapGreed, - /// `u` - Unicode, - /// `R` - CRLF, - /// `x` - IgnoreWhitespace, -} - -/// A custom `Drop` impl is used for `Ast` such that it uses constant stack -/// space but heap space proportional to the depth of the `Ast`. -impl Drop for Ast { - fn drop(&mut self) { - use core::mem; - - match *self { - Ast::Empty(_) - | Ast::Flags(_) - | Ast::Literal(_) - | Ast::Dot(_) - | Ast::Assertion(_) - | Ast::ClassUnicode(_) - | Ast::ClassPerl(_) - // Bracketed classes are recursive, they get their own Drop impl. - | Ast::ClassBracketed(_) => return, - Ast::Repetition(ref x) if !x.ast.has_subexprs() => return, - Ast::Group(ref x) if !x.ast.has_subexprs() => return, - Ast::Alternation(ref x) if x.asts.is_empty() => return, - Ast::Concat(ref x) if x.asts.is_empty() => return, - _ => {} - } - - let empty_span = || Span::splat(Position::new(0, 0, 0)); - let empty_ast = || Ast::empty(empty_span()); - let mut stack = vec![mem::replace(self, empty_ast())]; - while let Some(mut ast) = stack.pop() { - match ast { - Ast::Empty(_) - | Ast::Flags(_) - | Ast::Literal(_) - | Ast::Dot(_) - | Ast::Assertion(_) - | Ast::ClassUnicode(_) - | Ast::ClassPerl(_) - // Bracketed classes are recursive, so they get their own Drop - // impl. - | Ast::ClassBracketed(_) => {} - Ast::Repetition(ref mut x) => { - stack.push(mem::replace(&mut x.ast, empty_ast())); - } - Ast::Group(ref mut x) => { - stack.push(mem::replace(&mut x.ast, empty_ast())); - } - Ast::Alternation(ref mut x) => { - stack.extend(x.asts.drain(..)); - } - Ast::Concat(ref mut x) => { - stack.extend(x.asts.drain(..)); - } - } - } - } -} - -/// A custom `Drop` impl is used for `ClassSet` such that it uses constant -/// stack space but heap space proportional to the depth of the `ClassSet`. -impl Drop for ClassSet { - fn drop(&mut self) { - use core::mem; - - match *self { - ClassSet::Item(ref item) => match *item { - ClassSetItem::Empty(_) - | ClassSetItem::Literal(_) - | ClassSetItem::Range(_) - | ClassSetItem::Ascii(_) - | ClassSetItem::Unicode(_) - | ClassSetItem::Perl(_) => return, - ClassSetItem::Bracketed(ref x) => { - if x.kind.is_empty() { - return; - } - } - ClassSetItem::Union(ref x) => { - if x.items.is_empty() { - return; - } - } - }, - ClassSet::BinaryOp(ref op) => { - if op.lhs.is_empty() && op.rhs.is_empty() { - return; - } - } - } - - let empty_span = || Span::splat(Position::new(0, 0, 0)); - let empty_set = || ClassSet::Item(ClassSetItem::Empty(empty_span())); - let mut stack = vec![mem::replace(self, empty_set())]; - while let Some(mut set) = stack.pop() { - match set { - ClassSet::Item(ref mut item) => match *item { - ClassSetItem::Empty(_) - | ClassSetItem::Literal(_) - | ClassSetItem::Range(_) - | ClassSetItem::Ascii(_) - | ClassSetItem::Unicode(_) - | ClassSetItem::Perl(_) => {} - ClassSetItem::Bracketed(ref mut x) => { - stack.push(mem::replace(&mut x.kind, empty_set())); - } - ClassSetItem::Union(ref mut x) => { - stack.extend(x.items.drain(..).map(ClassSet::Item)); - } - }, - ClassSet::BinaryOp(ref mut op) => { - stack.push(mem::replace(&mut op.lhs, empty_set())); - stack.push(mem::replace(&mut op.rhs, empty_set())); - } - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - // We use a thread with an explicit stack size to test that our destructor - // for Ast can handle arbitrarily sized expressions in constant stack - // space. In case we run on a platform without threads (WASM?), we limit - // this test to Windows/Unix. - #[test] - #[cfg(any(unix, windows))] - fn no_stack_overflow_on_drop() { - use std::thread; - - let run = || { - let span = || Span::splat(Position::new(0, 0, 0)); - let mut ast = Ast::empty(span()); - for i in 0..200 { - ast = Ast::group(Group { - span: span(), - kind: GroupKind::CaptureIndex(i), - ast: Box::new(ast), - }); - } - assert!(!ast.is_empty()); - }; - - // We run our test on a thread with a small stack size so we can - // force the issue more easily. - // - // NOTE(2023-03-21): It turns out that some platforms (like FreeBSD) - // will just barf with very small stack sizes. So we bump this up a bit - // to give more room to breath. When I did this, I confirmed that if - // I remove the custom `Drop` impl for `Ast`, then this test does - // indeed still fail with a stack overflow. (At the time of writing, I - // had to bump it all the way up to 32K before the test would pass even - // without the custom `Drop` impl. So 16K seems like a safe number - // here.) - // - // See: https://github.com/rust-lang/regex/issues/967 - thread::Builder::new() - .stack_size(16 << 10) - .spawn(run) - .unwrap() - .join() - .unwrap(); - } - - // This tests that our `Ast` has a reasonable size. This isn't a hard rule - // and it can be increased if given a good enough reason. But this test - // exists because the size of `Ast` was at one point over 200 bytes on a - // 64-bit target. Wow. - #[test] - fn ast_size() { - let max = 2 * core::mem::size_of::(); - let size = core::mem::size_of::(); - assert!( - size <= max, - "Ast size of {} bytes is bigger than suggested max {}", - size, - max - ); - } -} diff --git a/vendor/regex-syntax/src/ast/parse.rs b/vendor/regex-syntax/src/ast/parse.rs deleted file mode 100644 index 0c2a3526..00000000 --- a/vendor/regex-syntax/src/ast/parse.rs +++ /dev/null @@ -1,6377 +0,0 @@ -/*! -This module provides a regular expression parser. -*/ - -use core::{ - borrow::Borrow, - cell::{Cell, RefCell}, - mem, -}; - -use alloc::{ - boxed::Box, - string::{String, ToString}, - vec, - vec::Vec, -}; - -use crate::{ - ast::{self, Ast, Position, Span}, - either::Either, - is_escapeable_character, is_meta_character, -}; - -type Result = core::result::Result; - -/// A primitive is an expression with no sub-expressions. This includes -/// literals, assertions and non-set character classes. This representation -/// is used as intermediate state in the parser. -/// -/// This does not include ASCII character classes, since they can only appear -/// within a set character class. -#[derive(Clone, Debug, Eq, PartialEq)] -enum Primitive { - Literal(ast::Literal), - Assertion(ast::Assertion), - Dot(Span), - Perl(ast::ClassPerl), - Unicode(ast::ClassUnicode), -} - -impl Primitive { - /// Return the span of this primitive. - fn span(&self) -> &Span { - match *self { - Primitive::Literal(ref x) => &x.span, - Primitive::Assertion(ref x) => &x.span, - Primitive::Dot(ref span) => span, - Primitive::Perl(ref x) => &x.span, - Primitive::Unicode(ref x) => &x.span, - } - } - - /// Convert this primitive into a proper AST. - fn into_ast(self) -> Ast { - match self { - Primitive::Literal(lit) => Ast::literal(lit), - Primitive::Assertion(assert) => Ast::assertion(assert), - Primitive::Dot(span) => Ast::dot(span), - Primitive::Perl(cls) => Ast::class_perl(cls), - Primitive::Unicode(cls) => Ast::class_unicode(cls), - } - } - - /// Convert this primitive into an item in a character class. - /// - /// If this primitive is not a legal item (i.e., an assertion or a dot), - /// then return an error. - fn into_class_set_item>( - self, - p: &ParserI<'_, P>, - ) -> Result { - use self::Primitive::*; - use crate::ast::ClassSetItem; - - match self { - Literal(lit) => Ok(ClassSetItem::Literal(lit)), - Perl(cls) => Ok(ClassSetItem::Perl(cls)), - Unicode(cls) => Ok(ClassSetItem::Unicode(cls)), - x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)), - } - } - - /// Convert this primitive into a literal in a character class. In - /// particular, literals are the only valid items that can appear in - /// ranges. - /// - /// If this primitive is not a legal item (i.e., a class, assertion or a - /// dot), then return an error. - fn into_class_literal>( - self, - p: &ParserI<'_, P>, - ) -> Result { - use self::Primitive::*; - - match self { - Literal(lit) => Ok(lit), - x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)), - } - } -} - -/// Returns true if the given character is a hexadecimal digit. -fn is_hex(c: char) -> bool { - ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') -} - -/// Returns true if the given character is a valid in a capture group name. -/// -/// If `first` is true, then `c` is treated as the first character in the -/// group name (which must be alphabetic or underscore). -fn is_capture_char(c: char, first: bool) -> bool { - if first { - c == '_' || c.is_alphabetic() - } else { - c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric() - } -} - -/// A builder for a regular expression parser. -/// -/// This builder permits modifying configuration options for the parser. -#[derive(Clone, Debug)] -pub struct ParserBuilder { - ignore_whitespace: bool, - nest_limit: u32, - octal: bool, - empty_min_range: bool, -} - -impl Default for ParserBuilder { - fn default() -> ParserBuilder { - ParserBuilder::new() - } -} - -impl ParserBuilder { - /// Create a new parser builder with a default configuration. - pub fn new() -> ParserBuilder { - ParserBuilder { - ignore_whitespace: false, - nest_limit: 250, - octal: false, - empty_min_range: false, - } - } - - /// Build a parser from this configuration with the given pattern. - pub fn build(&self) -> Parser { - Parser { - pos: Cell::new(Position { offset: 0, line: 1, column: 1 }), - capture_index: Cell::new(0), - nest_limit: self.nest_limit, - octal: self.octal, - empty_min_range: self.empty_min_range, - initial_ignore_whitespace: self.ignore_whitespace, - ignore_whitespace: Cell::new(self.ignore_whitespace), - comments: RefCell::new(vec![]), - stack_group: RefCell::new(vec![]), - stack_class: RefCell::new(vec![]), - capture_names: RefCell::new(vec![]), - scratch: RefCell::new(String::new()), - } - } - - /// Set the nesting limit for this parser. - /// - /// The nesting limit controls how deep the abstract syntax tree is allowed - /// to be. If the AST exceeds the given limit (e.g., with too many nested - /// groups), then an error is returned by the parser. - /// - /// The purpose of this limit is to act as a heuristic to prevent stack - /// overflow for consumers that do structural induction on an `Ast` using - /// explicit recursion. While this crate never does this (instead using - /// constant stack space and moving the call stack to the heap), other - /// crates may. - /// - /// This limit is not checked until the entire AST is parsed. Therefore, - /// if callers want to put a limit on the amount of heap space used, then - /// they should impose a limit on the length, in bytes, of the concrete - /// pattern string. In particular, this is viable since this parser - /// implementation will limit itself to heap space proportional to the - /// length of the pattern string. - /// - /// Note that a nest limit of `0` will return a nest limit error for most - /// patterns but not all. For example, a nest limit of `0` permits `a` but - /// not `ab`, since `ab` requires a concatenation, which results in a nest - /// depth of `1`. In general, a nest limit is not something that manifests - /// in an obvious way in the concrete syntax, therefore, it should not be - /// used in a granular way. - pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder { - self.nest_limit = limit; - self - } - - /// Whether to support octal syntax or not. - /// - /// Octal syntax is a little-known way of uttering Unicode codepoints in - /// a regular expression. For example, `a`, `\x61`, `\u0061` and - /// `\141` are all equivalent regular expressions, where the last example - /// shows octal syntax. - /// - /// While supporting octal syntax isn't in and of itself a problem, it does - /// make good error messages harder. That is, in PCRE based regex engines, - /// syntax like `\0` invokes a backreference, which is explicitly - /// unsupported in Rust's regex engine. However, many users expect it to - /// be supported. Therefore, when octal support is disabled, the error - /// message will explicitly mention that backreferences aren't supported. - /// - /// Octal syntax is disabled by default. - pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder { - self.octal = yes; - self - } - - /// Enable verbose mode in the regular expression. - /// - /// When enabled, verbose mode permits insignificant whitespace in many - /// places in the regular expression, as well as comments. Comments are - /// started using `#` and continue until the end of the line. - /// - /// By default, this is disabled. It may be selectively enabled in the - /// regular expression by using the `x` flag regardless of this setting. - pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder { - self.ignore_whitespace = yes; - self - } - - /// Allow using `{,n}` as an equivalent to `{0,n}`. - /// - /// When enabled, the parser accepts `{,n}` as valid syntax for `{0,n}`. - /// Most regular expression engines don't support the `{,n}` syntax, but - /// some others do it, namely Python's `re` library. - /// - /// This is disabled by default. - pub fn empty_min_range(&mut self, yes: bool) -> &mut ParserBuilder { - self.empty_min_range = yes; - self - } -} - -/// A regular expression parser. -/// -/// This parses a string representation of a regular expression into an -/// abstract syntax tree. The size of the tree is proportional to the length -/// of the regular expression pattern. -/// -/// A `Parser` can be configured in more detail via a [`ParserBuilder`]. -#[derive(Clone, Debug)] -pub struct Parser { - /// The current position of the parser. - pos: Cell, - /// The current capture index. - capture_index: Cell, - /// The maximum number of open parens/brackets allowed. If the parser - /// exceeds this number, then an error is returned. - nest_limit: u32, - /// Whether to support octal syntax or not. When `false`, the parser will - /// return an error helpfully pointing out that backreferences are not - /// supported. - octal: bool, - /// The initial setting for `ignore_whitespace` as provided by - /// `ParserBuilder`. It is used when resetting the parser's state. - initial_ignore_whitespace: bool, - /// Whether the parser supports `{,n}` repetitions as an equivalent to - /// `{0,n}.` - empty_min_range: bool, - /// Whether whitespace should be ignored. When enabled, comments are - /// also permitted. - ignore_whitespace: Cell, - /// A list of comments, in order of appearance. - comments: RefCell>, - /// A stack of grouped sub-expressions, including alternations. - stack_group: RefCell>, - /// A stack of nested character classes. This is only non-empty when - /// parsing a class. - stack_class: RefCell>, - /// A sorted sequence of capture names. This is used to detect duplicate - /// capture names and report an error if one is detected. - capture_names: RefCell>, - /// A scratch buffer used in various places. Mostly this is used to - /// accumulate relevant characters from parts of a pattern. - scratch: RefCell, -} - -/// ParserI is the internal parser implementation. -/// -/// We use this separate type so that we can carry the provided pattern string -/// along with us. In particular, a `Parser` internal state is not tied to any -/// one pattern, but `ParserI` is. -/// -/// This type also lets us use `ParserI<&Parser>` in production code while -/// retaining the convenience of `ParserI` for tests, which sometimes -/// work against the internal interface of the parser. -#[derive(Clone, Debug)] -struct ParserI<'s, P> { - /// The parser state/configuration. - parser: P, - /// The full regular expression provided by the user. - pattern: &'s str, -} - -/// GroupState represents a single stack frame while parsing nested groups -/// and alternations. Each frame records the state up to an opening parenthesis -/// or a alternating bracket `|`. -#[derive(Clone, Debug)] -enum GroupState { - /// This state is pushed whenever an opening group is found. - Group { - /// The concatenation immediately preceding the opening group. - concat: ast::Concat, - /// The group that has been opened. Its sub-AST is always empty. - group: ast::Group, - /// Whether this group has the `x` flag enabled or not. - ignore_whitespace: bool, - }, - /// This state is pushed whenever a new alternation branch is found. If - /// an alternation branch is found and this state is at the top of the - /// stack, then this state should be modified to include the new - /// alternation. - Alternation(ast::Alternation), -} - -/// ClassState represents a single stack frame while parsing character classes. -/// Each frame records the state up to an intersection, difference, symmetric -/// difference or nested class. -/// -/// Note that a parser's character class stack is only non-empty when parsing -/// a character class. In all other cases, it is empty. -#[derive(Clone, Debug)] -enum ClassState { - /// This state is pushed whenever an opening bracket is found. - Open { - /// The union of class items immediately preceding this class. - union: ast::ClassSetUnion, - /// The class that has been opened. Typically this just corresponds - /// to the `[`, but it can also include `[^` since `^` indicates - /// negation of the class. - set: ast::ClassBracketed, - }, - /// This state is pushed when a operator is seen. When popped, the stored - /// set becomes the left hand side of the operator. - Op { - /// The type of the operation, i.e., &&, -- or ~~. - kind: ast::ClassSetBinaryOpKind, - /// The left-hand side of the operator. - lhs: ast::ClassSet, - }, -} - -impl Parser { - /// Create a new parser with a default configuration. - /// - /// The parser can be run with either the `parse` or `parse_with_comments` - /// methods. The parse methods return an abstract syntax tree. - /// - /// To set configuration options on the parser, use [`ParserBuilder`]. - pub fn new() -> Parser { - ParserBuilder::new().build() - } - - /// Parse the regular expression into an abstract syntax tree. - pub fn parse(&mut self, pattern: &str) -> Result { - ParserI::new(self, pattern).parse() - } - - /// Parse the regular expression and return an abstract syntax tree with - /// all of the comments found in the pattern. - pub fn parse_with_comments( - &mut self, - pattern: &str, - ) -> Result { - ParserI::new(self, pattern).parse_with_comments() - } - - /// Reset the internal state of a parser. - /// - /// This is called at the beginning of every parse. This prevents the - /// parser from running with inconsistent state (say, if a previous - /// invocation returned an error and the parser is reused). - fn reset(&self) { - // These settings should be in line with the construction - // in `ParserBuilder::build`. - self.pos.set(Position { offset: 0, line: 1, column: 1 }); - self.ignore_whitespace.set(self.initial_ignore_whitespace); - self.comments.borrow_mut().clear(); - self.stack_group.borrow_mut().clear(); - self.stack_class.borrow_mut().clear(); - } -} - -impl<'s, P: Borrow> ParserI<'s, P> { - /// Build an internal parser from a parser configuration and a pattern. - fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> { - ParserI { parser, pattern } - } - - /// Return a reference to the parser state. - fn parser(&self) -> &Parser { - self.parser.borrow() - } - - /// Return a reference to the pattern being parsed. - fn pattern(&self) -> &str { - self.pattern - } - - /// Create a new error with the given span and error type. - fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error { - ast::Error { kind, pattern: self.pattern().to_string(), span } - } - - /// Return the current offset of the parser. - /// - /// The offset starts at `0` from the beginning of the regular expression - /// pattern string. - fn offset(&self) -> usize { - self.parser().pos.get().offset - } - - /// Return the current line number of the parser. - /// - /// The line number starts at `1`. - fn line(&self) -> usize { - self.parser().pos.get().line - } - - /// Return the current column of the parser. - /// - /// The column number starts at `1` and is reset whenever a `\n` is seen. - fn column(&self) -> usize { - self.parser().pos.get().column - } - - /// Return the next capturing index. Each subsequent call increments the - /// internal index. - /// - /// The span given should correspond to the location of the opening - /// parenthesis. - /// - /// If the capture limit is exceeded, then an error is returned. - fn next_capture_index(&self, span: Span) -> Result { - let current = self.parser().capture_index.get(); - let i = current.checked_add(1).ok_or_else(|| { - self.error(span, ast::ErrorKind::CaptureLimitExceeded) - })?; - self.parser().capture_index.set(i); - Ok(i) - } - - /// Adds the given capture name to this parser. If this capture name has - /// already been used, then an error is returned. - fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> { - let mut names = self.parser().capture_names.borrow_mut(); - match names - .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str()) - { - Err(i) => { - names.insert(i, cap.clone()); - Ok(()) - } - Ok(i) => Err(self.error( - cap.span, - ast::ErrorKind::GroupNameDuplicate { original: names[i].span }, - )), - } - } - - /// Return whether the parser should ignore whitespace or not. - fn ignore_whitespace(&self) -> bool { - self.parser().ignore_whitespace.get() - } - - /// Return the character at the current position of the parser. - /// - /// This panics if the current position does not point to a valid char. - fn char(&self) -> char { - self.char_at(self.offset()) - } - - /// Return the character at the given position. - /// - /// This panics if the given position does not point to a valid char. - fn char_at(&self, i: usize) -> char { - self.pattern()[i..] - .chars() - .next() - .unwrap_or_else(|| panic!("expected char at offset {}", i)) - } - - /// Bump the parser to the next Unicode scalar value. - /// - /// If the end of the input has been reached, then `false` is returned. - fn bump(&self) -> bool { - if self.is_eof() { - return false; - } - let Position { mut offset, mut line, mut column } = self.pos(); - if self.char() == '\n' { - line = line.checked_add(1).unwrap(); - column = 1; - } else { - column = column.checked_add(1).unwrap(); - } - offset += self.char().len_utf8(); - self.parser().pos.set(Position { offset, line, column }); - self.pattern()[self.offset()..].chars().next().is_some() - } - - /// If the substring starting at the current position of the parser has - /// the given prefix, then bump the parser to the character immediately - /// following the prefix and return true. Otherwise, don't bump the parser - /// and return false. - fn bump_if(&self, prefix: &str) -> bool { - if self.pattern()[self.offset()..].starts_with(prefix) { - for _ in 0..prefix.chars().count() { - self.bump(); - } - true - } else { - false - } - } - - /// Returns true if and only if the parser is positioned at a look-around - /// prefix. The conditions under which this returns true must always - /// correspond to a regular expression that would otherwise be consider - /// invalid. - /// - /// This should only be called immediately after parsing the opening of - /// a group or a set of flags. - fn is_lookaround_prefix(&self) -> bool { - self.bump_if("?=") - || self.bump_if("?!") - || self.bump_if("?<=") - || self.bump_if("? bool { - if !self.bump() { - return false; - } - self.bump_space(); - !self.is_eof() - } - - /// If the `x` flag is enabled (i.e., whitespace insensitivity with - /// comments), then this will advance the parser through all whitespace - /// and comments to the next non-whitespace non-comment byte. - /// - /// If the `x` flag is disabled, then this is a no-op. - /// - /// This should be used selectively throughout the parser where - /// arbitrary whitespace is permitted when the `x` flag is enabled. For - /// example, `{ 5 , 6}` is equivalent to `{5,6}`. - fn bump_space(&self) { - if !self.ignore_whitespace() { - return; - } - while !self.is_eof() { - if self.char().is_whitespace() { - self.bump(); - } else if self.char() == '#' { - let start = self.pos(); - let mut comment_text = String::new(); - self.bump(); - while !self.is_eof() { - let c = self.char(); - self.bump(); - if c == '\n' { - break; - } - comment_text.push(c); - } - let comment = ast::Comment { - span: Span::new(start, self.pos()), - comment: comment_text, - }; - self.parser().comments.borrow_mut().push(comment); - } else { - break; - } - } - } - - /// Peek at the next character in the input without advancing the parser. - /// - /// If the input has been exhausted, then this returns `None`. - fn peek(&self) -> Option { - if self.is_eof() { - return None; - } - self.pattern()[self.offset() + self.char().len_utf8()..].chars().next() - } - - /// Like peek, but will ignore spaces when the parser is in whitespace - /// insensitive mode. - fn peek_space(&self) -> Option { - if !self.ignore_whitespace() { - return self.peek(); - } - if self.is_eof() { - return None; - } - let mut start = self.offset() + self.char().len_utf8(); - let mut in_comment = false; - for (i, c) in self.pattern()[start..].char_indices() { - if c.is_whitespace() { - continue; - } else if !in_comment && c == '#' { - in_comment = true; - } else if in_comment && c == '\n' { - in_comment = false; - } else { - start += i; - break; - } - } - self.pattern()[start..].chars().next() - } - - /// Returns true if the next call to `bump` would return false. - fn is_eof(&self) -> bool { - self.offset() == self.pattern().len() - } - - /// Return the current position of the parser, which includes the offset, - /// line and column. - fn pos(&self) -> Position { - self.parser().pos.get() - } - - /// Create a span at the current position of the parser. Both the start - /// and end of the span are set. - fn span(&self) -> Span { - Span::splat(self.pos()) - } - - /// Create a span that covers the current character. - fn span_char(&self) -> Span { - let mut next = Position { - offset: self.offset().checked_add(self.char().len_utf8()).unwrap(), - line: self.line(), - column: self.column().checked_add(1).unwrap(), - }; - if self.char() == '\n' { - next.line += 1; - next.column = 1; - } - Span::new(self.pos(), next) - } - - /// Parse and push a single alternation on to the parser's internal stack. - /// If the top of the stack already has an alternation, then add to that - /// instead of pushing a new one. - /// - /// The concatenation given corresponds to a single alternation branch. - /// The concatenation returned starts the next branch and is empty. - /// - /// This assumes the parser is currently positioned at `|` and will advance - /// the parser to the character following `|`. - #[inline(never)] - fn push_alternate(&self, mut concat: ast::Concat) -> Result { - assert_eq!(self.char(), '|'); - concat.span.end = self.pos(); - self.push_or_add_alternation(concat); - self.bump(); - Ok(ast::Concat { span: self.span(), asts: vec![] }) - } - - /// Pushes or adds the given branch of an alternation to the parser's - /// internal stack of state. - fn push_or_add_alternation(&self, concat: ast::Concat) { - use self::GroupState::*; - - let mut stack = self.parser().stack_group.borrow_mut(); - if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() { - alts.asts.push(concat.into_ast()); - return; - } - stack.push(Alternation(ast::Alternation { - span: Span::new(concat.span.start, self.pos()), - asts: vec![concat.into_ast()], - })); - } - - /// Parse and push a group AST (and its parent concatenation) on to the - /// parser's internal stack. Return a fresh concatenation corresponding - /// to the group's sub-AST. - /// - /// If a set of flags was found (with no group), then the concatenation - /// is returned with that set of flags added. - /// - /// This assumes that the parser is currently positioned on the opening - /// parenthesis. It advances the parser to the character at the start - /// of the sub-expression (or adjoining expression). - /// - /// If there was a problem parsing the start of the group, then an error - /// is returned. - #[inline(never)] - fn push_group(&self, mut concat: ast::Concat) -> Result { - assert_eq!(self.char(), '('); - match self.parse_group()? { - Either::Left(set) => { - let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace); - if let Some(v) = ignore { - self.parser().ignore_whitespace.set(v); - } - - concat.asts.push(Ast::flags(set)); - Ok(concat) - } - Either::Right(group) => { - let old_ignore_whitespace = self.ignore_whitespace(); - let new_ignore_whitespace = group - .flags() - .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace)) - .unwrap_or(old_ignore_whitespace); - self.parser().stack_group.borrow_mut().push( - GroupState::Group { - concat, - group, - ignore_whitespace: old_ignore_whitespace, - }, - ); - self.parser().ignore_whitespace.set(new_ignore_whitespace); - Ok(ast::Concat { span: self.span(), asts: vec![] }) - } - } - } - - /// Pop a group AST from the parser's internal stack and set the group's - /// AST to the given concatenation. Return the concatenation containing - /// the group. - /// - /// This assumes that the parser is currently positioned on the closing - /// parenthesis and advances the parser to the character following the `)`. - /// - /// If no such group could be popped, then an unopened group error is - /// returned. - #[inline(never)] - fn pop_group(&self, mut group_concat: ast::Concat) -> Result { - use self::GroupState::*; - - assert_eq!(self.char(), ')'); - let mut stack = self.parser().stack_group.borrow_mut(); - let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack - .pop() - { - Some(Group { concat, group, ignore_whitespace }) => { - (concat, group, ignore_whitespace, None) - } - Some(Alternation(alt)) => match stack.pop() { - Some(Group { concat, group, ignore_whitespace }) => { - (concat, group, ignore_whitespace, Some(alt)) - } - None | Some(Alternation(_)) => { - return Err(self.error( - self.span_char(), - ast::ErrorKind::GroupUnopened, - )); - } - }, - None => { - return Err(self - .error(self.span_char(), ast::ErrorKind::GroupUnopened)); - } - }; - self.parser().ignore_whitespace.set(ignore_whitespace); - group_concat.span.end = self.pos(); - self.bump(); - group.span.end = self.pos(); - match alt { - Some(mut alt) => { - alt.span.end = group_concat.span.end; - alt.asts.push(group_concat.into_ast()); - group.ast = Box::new(alt.into_ast()); - } - None => { - group.ast = Box::new(group_concat.into_ast()); - } - } - prior_concat.asts.push(Ast::group(group)); - Ok(prior_concat) - } - - /// Pop the last state from the parser's internal stack, if it exists, and - /// add the given concatenation to it. There either must be no state or a - /// single alternation item on the stack. Any other scenario produces an - /// error. - /// - /// This assumes that the parser has advanced to the end. - #[inline(never)] - fn pop_group_end(&self, mut concat: ast::Concat) -> Result { - concat.span.end = self.pos(); - let mut stack = self.parser().stack_group.borrow_mut(); - let ast = match stack.pop() { - None => Ok(concat.into_ast()), - Some(GroupState::Alternation(mut alt)) => { - alt.span.end = self.pos(); - alt.asts.push(concat.into_ast()); - Ok(Ast::alternation(alt)) - } - Some(GroupState::Group { group, .. }) => { - return Err( - self.error(group.span, ast::ErrorKind::GroupUnclosed) - ); - } - }; - // If we try to pop again, there should be nothing. - match stack.pop() { - None => ast, - Some(GroupState::Alternation(_)) => { - // This unreachable is unfortunate. This case can't happen - // because the only way we can be here is if there were two - // `GroupState::Alternation`s adjacent in the parser's stack, - // which we guarantee to never happen because we never push a - // `GroupState::Alternation` if one is already at the top of - // the stack. - unreachable!() - } - Some(GroupState::Group { group, .. }) => { - Err(self.error(group.span, ast::ErrorKind::GroupUnclosed)) - } - } - } - - /// Parse the opening of a character class and push the current class - /// parsing context onto the parser's stack. This assumes that the parser - /// is positioned at an opening `[`. The given union should correspond to - /// the union of set items built up before seeing the `[`. - /// - /// If there was a problem parsing the opening of the class, then an error - /// is returned. Otherwise, a new union of set items for the class is - /// returned (which may be populated with either a `]` or a `-`). - #[inline(never)] - fn push_class_open( - &self, - parent_union: ast::ClassSetUnion, - ) -> Result { - assert_eq!(self.char(), '['); - - let (nested_set, nested_union) = self.parse_set_class_open()?; - self.parser() - .stack_class - .borrow_mut() - .push(ClassState::Open { union: parent_union, set: nested_set }); - Ok(nested_union) - } - - /// Parse the end of a character class set and pop the character class - /// parser stack. The union given corresponds to the last union built - /// before seeing the closing `]`. The union returned corresponds to the - /// parent character class set with the nested class added to it. - /// - /// This assumes that the parser is positioned at a `]` and will advance - /// the parser to the byte immediately following the `]`. - /// - /// If the stack is empty after popping, then this returns the final - /// "top-level" character class AST (where a "top-level" character class - /// is one that is not nested inside any other character class). - /// - /// If there is no corresponding opening bracket on the parser's stack, - /// then an error is returned. - #[inline(never)] - fn pop_class( - &self, - nested_union: ast::ClassSetUnion, - ) -> Result> { - assert_eq!(self.char(), ']'); - - let item = ast::ClassSet::Item(nested_union.into_item()); - let prevset = self.pop_class_op(item); - let mut stack = self.parser().stack_class.borrow_mut(); - match stack.pop() { - None => { - // We can never observe an empty stack: - // - // 1) We are guaranteed to start with a non-empty stack since - // the character class parser is only initiated when it sees - // a `[`. - // 2) If we ever observe an empty stack while popping after - // seeing a `]`, then we signal the character class parser - // to terminate. - panic!("unexpected empty character class stack") - } - Some(ClassState::Op { .. }) => { - // This panic is unfortunate, but this case is impossible - // since we already popped the Op state if one exists above. - // Namely, every push to the class parser stack is guarded by - // whether an existing Op is already on the top of the stack. - // If it is, the existing Op is modified. That is, the stack - // can never have consecutive Op states. - panic!("unexpected ClassState::Op") - } - Some(ClassState::Open { mut union, mut set }) => { - self.bump(); - set.span.end = self.pos(); - set.kind = prevset; - if stack.is_empty() { - Ok(Either::Right(set)) - } else { - union.push(ast::ClassSetItem::Bracketed(Box::new(set))); - Ok(Either::Left(union)) - } - } - } - } - - /// Return an "unclosed class" error whose span points to the most - /// recently opened class. - /// - /// This should only be called while parsing a character class. - #[inline(never)] - fn unclosed_class_error(&self) -> ast::Error { - for state in self.parser().stack_class.borrow().iter().rev() { - if let ClassState::Open { ref set, .. } = *state { - return self.error(set.span, ast::ErrorKind::ClassUnclosed); - } - } - // We are guaranteed to have a non-empty stack with at least - // one open bracket, so we should never get here. - panic!("no open character class found") - } - - /// Push the current set of class items on to the class parser's stack as - /// the left hand side of the given operator. - /// - /// A fresh set union is returned, which should be used to build the right - /// hand side of this operator. - #[inline(never)] - fn push_class_op( - &self, - next_kind: ast::ClassSetBinaryOpKind, - next_union: ast::ClassSetUnion, - ) -> ast::ClassSetUnion { - let item = ast::ClassSet::Item(next_union.into_item()); - let new_lhs = self.pop_class_op(item); - self.parser() - .stack_class - .borrow_mut() - .push(ClassState::Op { kind: next_kind, lhs: new_lhs }); - ast::ClassSetUnion { span: self.span(), items: vec![] } - } - - /// Pop a character class set from the character class parser stack. If the - /// top of the stack is just an item (not an operation), then return the - /// given set unchanged. If the top of the stack is an operation, then the - /// given set will be used as the rhs of the operation on the top of the - /// stack. In that case, the binary operation is returned as a set. - #[inline(never)] - fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet { - let mut stack = self.parser().stack_class.borrow_mut(); - let (kind, lhs) = match stack.pop() { - Some(ClassState::Op { kind, lhs }) => (kind, lhs), - Some(state @ ClassState::Open { .. }) => { - stack.push(state); - return rhs; - } - None => unreachable!(), - }; - let span = Span::new(lhs.span().start, rhs.span().end); - ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { - span, - kind, - lhs: Box::new(lhs), - rhs: Box::new(rhs), - }) - } -} - -impl<'s, P: Borrow> ParserI<'s, P> { - /// Parse the regular expression into an abstract syntax tree. - fn parse(&self) -> Result { - self.parse_with_comments().map(|astc| astc.ast) - } - - /// Parse the regular expression and return an abstract syntax tree with - /// all of the comments found in the pattern. - fn parse_with_comments(&self) -> Result { - assert_eq!(self.offset(), 0, "parser can only be used once"); - self.parser().reset(); - let mut concat = ast::Concat { span: self.span(), asts: vec![] }; - loop { - self.bump_space(); - if self.is_eof() { - break; - } - match self.char() { - '(' => concat = self.push_group(concat)?, - ')' => concat = self.pop_group(concat)?, - '|' => concat = self.push_alternate(concat)?, - '[' => { - let class = self.parse_set_class()?; - concat.asts.push(Ast::class_bracketed(class)); - } - '?' => { - concat = self.parse_uncounted_repetition( - concat, - ast::RepetitionKind::ZeroOrOne, - )?; - } - '*' => { - concat = self.parse_uncounted_repetition( - concat, - ast::RepetitionKind::ZeroOrMore, - )?; - } - '+' => { - concat = self.parse_uncounted_repetition( - concat, - ast::RepetitionKind::OneOrMore, - )?; - } - '{' => { - concat = self.parse_counted_repetition(concat)?; - } - _ => concat.asts.push(self.parse_primitive()?.into_ast()), - } - } - let ast = self.pop_group_end(concat)?; - NestLimiter::new(self).check(&ast)?; - Ok(ast::WithComments { - ast, - comments: mem::replace( - &mut *self.parser().comments.borrow_mut(), - vec![], - ), - }) - } - - /// Parses an uncounted repetition operation. An uncounted repetition - /// operator includes ?, * and +, but does not include the {m,n} syntax. - /// The given `kind` should correspond to the operator observed by the - /// caller. - /// - /// This assumes that the parser is currently positioned at the repetition - /// operator and advances the parser to the first character after the - /// operator. (Note that the operator may include a single additional `?`, - /// which makes the operator ungreedy.) - /// - /// The caller should include the concatenation that is being built. The - /// concatenation returned includes the repetition operator applied to the - /// last expression in the given concatenation. - #[inline(never)] - fn parse_uncounted_repetition( - &self, - mut concat: ast::Concat, - kind: ast::RepetitionKind, - ) -> Result { - assert!( - self.char() == '?' || self.char() == '*' || self.char() == '+' - ); - let op_start = self.pos(); - let ast = match concat.asts.pop() { - Some(ast) => ast, - None => { - return Err( - self.error(self.span(), ast::ErrorKind::RepetitionMissing) - ) - } - }; - match ast { - Ast::Empty(_) | Ast::Flags(_) => { - return Err( - self.error(self.span(), ast::ErrorKind::RepetitionMissing) - ) - } - _ => {} - } - let mut greedy = true; - if self.bump() && self.char() == '?' { - greedy = false; - self.bump(); - } - concat.asts.push(Ast::repetition(ast::Repetition { - span: ast.span().with_end(self.pos()), - op: ast::RepetitionOp { - span: Span::new(op_start, self.pos()), - kind, - }, - greedy, - ast: Box::new(ast), - })); - Ok(concat) - } - - /// Parses a counted repetition operation. A counted repetition operator - /// corresponds to the {m,n} syntax, and does not include the ?, * or + - /// operators. - /// - /// This assumes that the parser is currently positioned at the opening `{` - /// and advances the parser to the first character after the operator. - /// (Note that the operator may include a single additional `?`, which - /// makes the operator ungreedy.) - /// - /// The caller should include the concatenation that is being built. The - /// concatenation returned includes the repetition operator applied to the - /// last expression in the given concatenation. - #[inline(never)] - fn parse_counted_repetition( - &self, - mut concat: ast::Concat, - ) -> Result { - assert!(self.char() == '{'); - let start = self.pos(); - let ast = match concat.asts.pop() { - Some(ast) => ast, - None => { - return Err( - self.error(self.span(), ast::ErrorKind::RepetitionMissing) - ) - } - }; - match ast { - Ast::Empty(_) | Ast::Flags(_) => { - return Err( - self.error(self.span(), ast::ErrorKind::RepetitionMissing) - ) - } - _ => {} - } - if !self.bump_and_bump_space() { - return Err(self.error( - Span::new(start, self.pos()), - ast::ErrorKind::RepetitionCountUnclosed, - )); - } - let count_start = specialize_err( - self.parse_decimal(), - ast::ErrorKind::DecimalEmpty, - ast::ErrorKind::RepetitionCountDecimalEmpty, - ); - if self.is_eof() { - return Err(self.error( - Span::new(start, self.pos()), - ast::ErrorKind::RepetitionCountUnclosed, - )); - } - let range = if self.char() == ',' { - if !self.bump_and_bump_space() { - return Err(self.error( - Span::new(start, self.pos()), - ast::ErrorKind::RepetitionCountUnclosed, - )); - } - if self.char() != '}' { - let count_start = match count_start { - Ok(c) => c, - Err(err) - if err.kind - == ast::ErrorKind::RepetitionCountDecimalEmpty => - { - if self.parser().empty_min_range { - 0 - } else { - return Err(err); - } - } - err => err?, - }; - let count_end = specialize_err( - self.parse_decimal(), - ast::ErrorKind::DecimalEmpty, - ast::ErrorKind::RepetitionCountDecimalEmpty, - )?; - ast::RepetitionRange::Bounded(count_start, count_end) - } else { - ast::RepetitionRange::AtLeast(count_start?) - } - } else { - ast::RepetitionRange::Exactly(count_start?) - }; - - if self.is_eof() || self.char() != '}' { - return Err(self.error( - Span::new(start, self.pos()), - ast::ErrorKind::RepetitionCountUnclosed, - )); - } - - let mut greedy = true; - if self.bump_and_bump_space() && self.char() == '?' { - greedy = false; - self.bump(); - } - - let op_span = Span::new(start, self.pos()); - if !range.is_valid() { - return Err( - self.error(op_span, ast::ErrorKind::RepetitionCountInvalid) - ); - } - concat.asts.push(Ast::repetition(ast::Repetition { - span: ast.span().with_end(self.pos()), - op: ast::RepetitionOp { - span: op_span, - kind: ast::RepetitionKind::Range(range), - }, - greedy, - ast: Box::new(ast), - })); - Ok(concat) - } - - /// Parse a group (which contains a sub-expression) or a set of flags. - /// - /// If a group was found, then it is returned with an empty AST. If a set - /// of flags is found, then that set is returned. - /// - /// The parser should be positioned at the opening parenthesis. - /// - /// This advances the parser to the character before the start of the - /// sub-expression (in the case of a group) or to the closing parenthesis - /// immediately following the set of flags. - /// - /// # Errors - /// - /// If flags are given and incorrectly specified, then a corresponding - /// error is returned. - /// - /// If a capture name is given and it is incorrectly specified, then a - /// corresponding error is returned. - #[inline(never)] - fn parse_group(&self) -> Result> { - assert_eq!(self.char(), '('); - let open_span = self.span_char(); - self.bump(); - self.bump_space(); - if self.is_lookaround_prefix() { - return Err(self.error( - Span::new(open_span.start, self.span().end), - ast::ErrorKind::UnsupportedLookAround, - )); - } - let inner_span = self.span(); - let mut starts_with_p = true; - if self.bump_if("?P<") || { - starts_with_p = false; - self.bump_if("?<") - } { - let capture_index = self.next_capture_index(open_span)?; - let name = self.parse_capture_name(capture_index)?; - Ok(Either::Right(ast::Group { - span: open_span, - kind: ast::GroupKind::CaptureName { starts_with_p, name }, - ast: Box::new(Ast::empty(self.span())), - })) - } else if self.bump_if("?") { - if self.is_eof() { - return Err( - self.error(open_span, ast::ErrorKind::GroupUnclosed) - ); - } - let flags = self.parse_flags()?; - let char_end = self.char(); - self.bump(); - if char_end == ')' { - // We don't allow empty flags, e.g., `(?)`. We instead - // interpret it as a repetition operator missing its argument. - if flags.items.is_empty() { - return Err(self.error( - inner_span, - ast::ErrorKind::RepetitionMissing, - )); - } - Ok(Either::Left(ast::SetFlags { - span: Span { end: self.pos(), ..open_span }, - flags, - })) - } else { - assert_eq!(char_end, ':'); - Ok(Either::Right(ast::Group { - span: open_span, - kind: ast::GroupKind::NonCapturing(flags), - ast: Box::new(Ast::empty(self.span())), - })) - } - } else { - let capture_index = self.next_capture_index(open_span)?; - Ok(Either::Right(ast::Group { - span: open_span, - kind: ast::GroupKind::CaptureIndex(capture_index), - ast: Box::new(Ast::empty(self.span())), - })) - } - } - - /// Parses a capture group name. Assumes that the parser is positioned at - /// the first character in the name following the opening `<` (and may - /// possibly be EOF). This advances the parser to the first character - /// following the closing `>`. - /// - /// The caller must provide the capture index of the group for this name. - #[inline(never)] - fn parse_capture_name( - &self, - capture_index: u32, - ) -> Result { - if self.is_eof() { - return Err(self - .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof)); - } - let start = self.pos(); - loop { - if self.char() == '>' { - break; - } - if !is_capture_char(self.char(), self.pos() == start) { - return Err(self.error( - self.span_char(), - ast::ErrorKind::GroupNameInvalid, - )); - } - if !self.bump() { - break; - } - } - let end = self.pos(); - if self.is_eof() { - return Err(self - .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof)); - } - assert_eq!(self.char(), '>'); - self.bump(); - let name = &self.pattern()[start.offset..end.offset]; - if name.is_empty() { - return Err(self.error( - Span::new(start, start), - ast::ErrorKind::GroupNameEmpty, - )); - } - let capname = ast::CaptureName { - span: Span::new(start, end), - name: name.to_string(), - index: capture_index, - }; - self.add_capture_name(&capname)?; - Ok(capname) - } - - /// Parse a sequence of flags starting at the current character. - /// - /// This advances the parser to the character immediately following the - /// flags, which is guaranteed to be either `:` or `)`. - /// - /// # Errors - /// - /// If any flags are duplicated, then an error is returned. - /// - /// If the negation operator is used more than once, then an error is - /// returned. - /// - /// If no flags could be found or if the negation operation is not followed - /// by any flags, then an error is returned. - #[inline(never)] - fn parse_flags(&self) -> Result { - let mut flags = ast::Flags { span: self.span(), items: vec![] }; - let mut last_was_negation = None; - while self.char() != ':' && self.char() != ')' { - if self.char() == '-' { - last_was_negation = Some(self.span_char()); - let item = ast::FlagsItem { - span: self.span_char(), - kind: ast::FlagsItemKind::Negation, - }; - if let Some(i) = flags.add_item(item) { - return Err(self.error( - self.span_char(), - ast::ErrorKind::FlagRepeatedNegation { - original: flags.items[i].span, - }, - )); - } - } else { - last_was_negation = None; - let item = ast::FlagsItem { - span: self.span_char(), - kind: ast::FlagsItemKind::Flag(self.parse_flag()?), - }; - if let Some(i) = flags.add_item(item) { - return Err(self.error( - self.span_char(), - ast::ErrorKind::FlagDuplicate { - original: flags.items[i].span, - }, - )); - } - } - if !self.bump() { - return Err( - self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof) - ); - } - } - if let Some(span) = last_was_negation { - return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation)); - } - flags.span.end = self.pos(); - Ok(flags) - } - - /// Parse the current character as a flag. Do not advance the parser. - /// - /// # Errors - /// - /// If the flag is not recognized, then an error is returned. - #[inline(never)] - fn parse_flag(&self) -> Result { - match self.char() { - 'i' => Ok(ast::Flag::CaseInsensitive), - 'm' => Ok(ast::Flag::MultiLine), - 's' => Ok(ast::Flag::DotMatchesNewLine), - 'U' => Ok(ast::Flag::SwapGreed), - 'u' => Ok(ast::Flag::Unicode), - 'R' => Ok(ast::Flag::CRLF), - 'x' => Ok(ast::Flag::IgnoreWhitespace), - _ => { - Err(self - .error(self.span_char(), ast::ErrorKind::FlagUnrecognized)) - } - } - } - - /// Parse a primitive AST. e.g., A literal, non-set character class or - /// assertion. - /// - /// This assumes that the parser expects a primitive at the current - /// location. i.e., All other non-primitive cases have been handled. - /// For example, if the parser's position is at `|`, then `|` will be - /// treated as a literal (e.g., inside a character class). - /// - /// This advances the parser to the first character immediately following - /// the primitive. - fn parse_primitive(&self) -> Result { - match self.char() { - '\\' => self.parse_escape(), - '.' => { - let ast = Primitive::Dot(self.span_char()); - self.bump(); - Ok(ast) - } - '^' => { - let ast = Primitive::Assertion(ast::Assertion { - span: self.span_char(), - kind: ast::AssertionKind::StartLine, - }); - self.bump(); - Ok(ast) - } - '$' => { - let ast = Primitive::Assertion(ast::Assertion { - span: self.span_char(), - kind: ast::AssertionKind::EndLine, - }); - self.bump(); - Ok(ast) - } - c => { - let ast = Primitive::Literal(ast::Literal { - span: self.span_char(), - kind: ast::LiteralKind::Verbatim, - c, - }); - self.bump(); - Ok(ast) - } - } - } - - /// Parse an escape sequence as a primitive AST. - /// - /// This assumes the parser is positioned at the start of the escape - /// sequence, i.e., `\`. It advances the parser to the first position - /// immediately following the escape sequence. - #[inline(never)] - fn parse_escape(&self) -> Result { - assert_eq!(self.char(), '\\'); - let start = self.pos(); - if !self.bump() { - return Err(self.error( - Span::new(start, self.pos()), - ast::ErrorKind::EscapeUnexpectedEof, - )); - } - let c = self.char(); - // Put some of the more complicated routines into helpers. - match c { - '0'..='7' => { - if !self.parser().octal { - return Err(self.error( - Span::new(start, self.span_char().end), - ast::ErrorKind::UnsupportedBackreference, - )); - } - let mut lit = self.parse_octal(); - lit.span.start = start; - return Ok(Primitive::Literal(lit)); - } - '8'..='9' if !self.parser().octal => { - return Err(self.error( - Span::new(start, self.span_char().end), - ast::ErrorKind::UnsupportedBackreference, - )); - } - 'x' | 'u' | 'U' => { - let mut lit = self.parse_hex()?; - lit.span.start = start; - return Ok(Primitive::Literal(lit)); - } - 'p' | 'P' => { - let mut cls = self.parse_unicode_class()?; - cls.span.start = start; - return Ok(Primitive::Unicode(cls)); - } - 'd' | 's' | 'w' | 'D' | 'S' | 'W' => { - let mut cls = self.parse_perl_class(); - cls.span.start = start; - return Ok(Primitive::Perl(cls)); - } - _ => {} - } - - // Handle all of the one letter sequences inline. - self.bump(); - let span = Span::new(start, self.pos()); - if is_meta_character(c) { - return Ok(Primitive::Literal(ast::Literal { - span, - kind: ast::LiteralKind::Meta, - c, - })); - } - if is_escapeable_character(c) { - return Ok(Primitive::Literal(ast::Literal { - span, - kind: ast::LiteralKind::Superfluous, - c, - })); - } - let special = |kind, c| { - Ok(Primitive::Literal(ast::Literal { - span, - kind: ast::LiteralKind::Special(kind), - c, - })) - }; - match c { - 'a' => special(ast::SpecialLiteralKind::Bell, '\x07'), - 'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'), - 't' => special(ast::SpecialLiteralKind::Tab, '\t'), - 'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'), - 'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'), - 'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'), - 'A' => Ok(Primitive::Assertion(ast::Assertion { - span, - kind: ast::AssertionKind::StartText, - })), - 'z' => Ok(Primitive::Assertion(ast::Assertion { - span, - kind: ast::AssertionKind::EndText, - })), - 'b' => { - let mut wb = ast::Assertion { - span, - kind: ast::AssertionKind::WordBoundary, - }; - // After a \b, we "try" to parse things like \b{start} for - // special word boundary assertions. - if !self.is_eof() && self.char() == '{' { - if let Some(kind) = - self.maybe_parse_special_word_boundary(start)? - { - wb.kind = kind; - wb.span.end = self.pos(); - } - } - Ok(Primitive::Assertion(wb)) - } - 'B' => Ok(Primitive::Assertion(ast::Assertion { - span, - kind: ast::AssertionKind::NotWordBoundary, - })), - '<' => Ok(Primitive::Assertion(ast::Assertion { - span, - kind: ast::AssertionKind::WordBoundaryStartAngle, - })), - '>' => Ok(Primitive::Assertion(ast::Assertion { - span, - kind: ast::AssertionKind::WordBoundaryEndAngle, - })), - _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)), - } - } - - /// Attempt to parse a specialty word boundary. That is, `\b{start}`, - /// `\b{end}`, `\b{start-half}` or `\b{end-half}`. - /// - /// This is similar to `maybe_parse_ascii_class` in that, in most cases, - /// if it fails it will just return `None` with no error. This is done - /// because `\b{5}` is a valid expression and we want to let that be parsed - /// by the existing counted repetition parsing code. (I thought about just - /// invoking the counted repetition code from here, but it seemed a little - /// ham-fisted.) - /// - /// Unlike `maybe_parse_ascii_class` though, this can return an error. - /// Namely, if we definitely know it isn't a counted repetition, then we - /// return an error specific to the specialty word boundaries. - /// - /// This assumes the parser is positioned at a `{` immediately following - /// a `\b`. When `None` is returned, the parser is returned to the position - /// at which it started: pointing at a `{`. - /// - /// The position given should correspond to the start of the `\b`. - fn maybe_parse_special_word_boundary( - &self, - wb_start: Position, - ) -> Result> { - assert_eq!(self.char(), '{'); - - let is_valid_char = |c| match c { - 'A'..='Z' | 'a'..='z' | '-' => true, - _ => false, - }; - let start = self.pos(); - if !self.bump_and_bump_space() { - return Err(self.error( - Span::new(wb_start, self.pos()), - ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof, - )); - } - let start_contents = self.pos(); - // This is one of the critical bits: if the first non-whitespace - // character isn't in [-A-Za-z] (i.e., this can't be a special word - // boundary), then we bail and let the counted repetition parser deal - // with this. - if !is_valid_char(self.char()) { - self.parser().pos.set(start); - return Ok(None); - } - - // Now collect up our chars until we see a '}'. - let mut scratch = self.parser().scratch.borrow_mut(); - scratch.clear(); - while !self.is_eof() && is_valid_char(self.char()) { - scratch.push(self.char()); - self.bump_and_bump_space(); - } - if self.is_eof() || self.char() != '}' { - return Err(self.error( - Span::new(start, self.pos()), - ast::ErrorKind::SpecialWordBoundaryUnclosed, - )); - } - let end = self.pos(); - self.bump(); - let kind = match scratch.as_str() { - "start" => ast::AssertionKind::WordBoundaryStart, - "end" => ast::AssertionKind::WordBoundaryEnd, - "start-half" => ast::AssertionKind::WordBoundaryStartHalf, - "end-half" => ast::AssertionKind::WordBoundaryEndHalf, - _ => { - return Err(self.error( - Span::new(start_contents, end), - ast::ErrorKind::SpecialWordBoundaryUnrecognized, - )) - } - }; - Ok(Some(kind)) - } - - /// Parse an octal representation of a Unicode codepoint up to 3 digits - /// long. This expects the parser to be positioned at the first octal - /// digit and advances the parser to the first character immediately - /// following the octal number. This also assumes that parsing octal - /// escapes is enabled. - /// - /// Assuming the preconditions are met, this routine can never fail. - #[inline(never)] - fn parse_octal(&self) -> ast::Literal { - assert!(self.parser().octal); - assert!('0' <= self.char() && self.char() <= '7'); - let start = self.pos(); - // Parse up to two more digits. - while self.bump() - && '0' <= self.char() - && self.char() <= '7' - && self.pos().offset - start.offset <= 2 - {} - let end = self.pos(); - let octal = &self.pattern()[start.offset..end.offset]; - // Parsing the octal should never fail since the above guarantees a - // valid number. - let codepoint = - u32::from_str_radix(octal, 8).expect("valid octal number"); - // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no - // invalid Unicode scalar values. - let c = char::from_u32(codepoint).expect("Unicode scalar value"); - ast::Literal { - span: Span::new(start, end), - kind: ast::LiteralKind::Octal, - c, - } - } - - /// Parse a hex representation of a Unicode codepoint. This handles both - /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to - /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to - /// the first character immediately following the hexadecimal literal. - #[inline(never)] - fn parse_hex(&self) -> Result { - assert!( - self.char() == 'x' || self.char() == 'u' || self.char() == 'U' - ); - - let hex_kind = match self.char() { - 'x' => ast::HexLiteralKind::X, - 'u' => ast::HexLiteralKind::UnicodeShort, - _ => ast::HexLiteralKind::UnicodeLong, - }; - if !self.bump_and_bump_space() { - return Err( - self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof) - ); - } - if self.char() == '{' { - self.parse_hex_brace(hex_kind) - } else { - self.parse_hex_digits(hex_kind) - } - } - - /// Parse an N-digit hex representation of a Unicode codepoint. This - /// expects the parser to be positioned at the first digit and will advance - /// the parser to the first character immediately following the escape - /// sequence. - /// - /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`) - /// or 8 (for `\UNNNNNNNN`). - #[inline(never)] - fn parse_hex_digits( - &self, - kind: ast::HexLiteralKind, - ) -> Result { - let mut scratch = self.parser().scratch.borrow_mut(); - scratch.clear(); - - let start = self.pos(); - for i in 0..kind.digits() { - if i > 0 && !self.bump_and_bump_space() { - return Err(self - .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)); - } - if !is_hex(self.char()) { - return Err(self.error( - self.span_char(), - ast::ErrorKind::EscapeHexInvalidDigit, - )); - } - scratch.push(self.char()); - } - // The final bump just moves the parser past the literal, which may - // be EOF. - self.bump_and_bump_space(); - let end = self.pos(); - let hex = scratch.as_str(); - match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) { - None => Err(self.error( - Span::new(start, end), - ast::ErrorKind::EscapeHexInvalid, - )), - Some(c) => Ok(ast::Literal { - span: Span::new(start, end), - kind: ast::LiteralKind::HexFixed(kind), - c, - }), - } - } - - /// Parse a hex representation of any Unicode scalar value. This expects - /// the parser to be positioned at the opening brace `{` and will advance - /// the parser to the first character following the closing brace `}`. - #[inline(never)] - fn parse_hex_brace( - &self, - kind: ast::HexLiteralKind, - ) -> Result { - let mut scratch = self.parser().scratch.borrow_mut(); - scratch.clear(); - - let brace_pos = self.pos(); - let start = self.span_char().end; - while self.bump_and_bump_space() && self.char() != '}' { - if !is_hex(self.char()) { - return Err(self.error( - self.span_char(), - ast::ErrorKind::EscapeHexInvalidDigit, - )); - } - scratch.push(self.char()); - } - if self.is_eof() { - return Err(self.error( - Span::new(brace_pos, self.pos()), - ast::ErrorKind::EscapeUnexpectedEof, - )); - } - let end = self.pos(); - let hex = scratch.as_str(); - assert_eq!(self.char(), '}'); - self.bump_and_bump_space(); - - if hex.is_empty() { - return Err(self.error( - Span::new(brace_pos, self.pos()), - ast::ErrorKind::EscapeHexEmpty, - )); - } - match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) { - None => Err(self.error( - Span::new(start, end), - ast::ErrorKind::EscapeHexInvalid, - )), - Some(c) => Ok(ast::Literal { - span: Span::new(start, self.pos()), - kind: ast::LiteralKind::HexBrace(kind), - c, - }), - } - } - - /// Parse a decimal number into a u32 while trimming leading and trailing - /// whitespace. - /// - /// This expects the parser to be positioned at the first position where - /// a decimal digit could occur. This will advance the parser to the byte - /// immediately following the last contiguous decimal digit. - /// - /// If no decimal digit could be found or if there was a problem parsing - /// the complete set of digits into a u32, then an error is returned. - fn parse_decimal(&self) -> Result { - let mut scratch = self.parser().scratch.borrow_mut(); - scratch.clear(); - - while !self.is_eof() && self.char().is_whitespace() { - self.bump(); - } - let start = self.pos(); - while !self.is_eof() && '0' <= self.char() && self.char() <= '9' { - scratch.push(self.char()); - self.bump_and_bump_space(); - } - let span = Span::new(start, self.pos()); - while !self.is_eof() && self.char().is_whitespace() { - self.bump_and_bump_space(); - } - let digits = scratch.as_str(); - if digits.is_empty() { - return Err(self.error(span, ast::ErrorKind::DecimalEmpty)); - } - match u32::from_str_radix(digits, 10).ok() { - Some(n) => Ok(n), - None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)), - } - } - - /// Parse a standard character class consisting primarily of characters or - /// character ranges, but can also contain nested character classes of - /// any type (sans `.`). - /// - /// This assumes the parser is positioned at the opening `[`. If parsing - /// is successful, then the parser is advanced to the position immediately - /// following the closing `]`. - #[inline(never)] - fn parse_set_class(&self) -> Result { - assert_eq!(self.char(), '['); - - let mut union = - ast::ClassSetUnion { span: self.span(), items: vec![] }; - loop { - self.bump_space(); - if self.is_eof() { - return Err(self.unclosed_class_error()); - } - match self.char() { - '[' => { - // If we've already parsed the opening bracket, then - // attempt to treat this as the beginning of an ASCII - // class. If ASCII class parsing fails, then the parser - // backs up to `[`. - if !self.parser().stack_class.borrow().is_empty() { - if let Some(cls) = self.maybe_parse_ascii_class() { - union.push(ast::ClassSetItem::Ascii(cls)); - continue; - } - } - union = self.push_class_open(union)?; - } - ']' => match self.pop_class(union)? { - Either::Left(nested_union) => { - union = nested_union; - } - Either::Right(class) => return Ok(class), - }, - '&' if self.peek() == Some('&') => { - assert!(self.bump_if("&&")); - union = self.push_class_op( - ast::ClassSetBinaryOpKind::Intersection, - union, - ); - } - '-' if self.peek() == Some('-') => { - assert!(self.bump_if("--")); - union = self.push_class_op( - ast::ClassSetBinaryOpKind::Difference, - union, - ); - } - '~' if self.peek() == Some('~') => { - assert!(self.bump_if("~~")); - union = self.push_class_op( - ast::ClassSetBinaryOpKind::SymmetricDifference, - union, - ); - } - _ => { - union.push(self.parse_set_class_range()?); - } - } - } - } - - /// Parse a single primitive item in a character class set. The item to - /// be parsed can either be one of a simple literal character, a range - /// between two simple literal characters or a "primitive" character - /// class like \w or \p{Greek}. - /// - /// If an invalid escape is found, or if a character class is found where - /// a simple literal is expected (e.g., in a range), then an error is - /// returned. - #[inline(never)] - fn parse_set_class_range(&self) -> Result { - let prim1 = self.parse_set_class_item()?; - self.bump_space(); - if self.is_eof() { - return Err(self.unclosed_class_error()); - } - // If the next char isn't a `-`, then we don't have a range. - // There are two exceptions. If the char after a `-` is a `]`, then - // `-` is interpreted as a literal `-`. Alternatively, if the char - // after a `-` is a `-`, then `--` corresponds to a "difference" - // operation. - if self.char() != '-' - || self.peek_space() == Some(']') - || self.peek_space() == Some('-') - { - return prim1.into_class_set_item(self); - } - // OK, now we're parsing a range, so bump past the `-` and parse the - // second half of the range. - if !self.bump_and_bump_space() { - return Err(self.unclosed_class_error()); - } - let prim2 = self.parse_set_class_item()?; - let range = ast::ClassSetRange { - span: Span::new(prim1.span().start, prim2.span().end), - start: prim1.into_class_literal(self)?, - end: prim2.into_class_literal(self)?, - }; - if !range.is_valid() { - return Err( - self.error(range.span, ast::ErrorKind::ClassRangeInvalid) - ); - } - Ok(ast::ClassSetItem::Range(range)) - } - - /// Parse a single item in a character class as a primitive, where the - /// primitive either consists of a verbatim literal or a single escape - /// sequence. - /// - /// This assumes the parser is positioned at the beginning of a primitive, - /// and advances the parser to the first position after the primitive if - /// successful. - /// - /// Note that it is the caller's responsibility to report an error if an - /// illegal primitive was parsed. - #[inline(never)] - fn parse_set_class_item(&self) -> Result { - if self.char() == '\\' { - self.parse_escape() - } else { - let x = Primitive::Literal(ast::Literal { - span: self.span_char(), - kind: ast::LiteralKind::Verbatim, - c: self.char(), - }); - self.bump(); - Ok(x) - } - } - - /// Parses the opening of a character class set. This includes the opening - /// bracket along with `^` if present to indicate negation. This also - /// starts parsing the opening set of unioned items if applicable, since - /// there are special rules applied to certain characters in the opening - /// of a character class. For example, `[^]]` is the class of all - /// characters not equal to `]`. (`]` would need to be escaped in any other - /// position.) Similarly for `-`. - /// - /// In all cases, the op inside the returned `ast::ClassBracketed` is an - /// empty union. This empty union should be replaced with the actual item - /// when it is popped from the parser's stack. - /// - /// This assumes the parser is positioned at the opening `[` and advances - /// the parser to the first non-special byte of the character class. - /// - /// An error is returned if EOF is found. - #[inline(never)] - fn parse_set_class_open( - &self, - ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> { - assert_eq!(self.char(), '['); - let start = self.pos(); - if !self.bump_and_bump_space() { - return Err(self.error( - Span::new(start, self.pos()), - ast::ErrorKind::ClassUnclosed, - )); - } - - let negated = if self.char() != '^' { - false - } else { - if !self.bump_and_bump_space() { - return Err(self.error( - Span::new(start, self.pos()), - ast::ErrorKind::ClassUnclosed, - )); - } - true - }; - // Accept any number of `-` as literal `-`. - let mut union = - ast::ClassSetUnion { span: self.span(), items: vec![] }; - while self.char() == '-' { - union.push(ast::ClassSetItem::Literal(ast::Literal { - span: self.span_char(), - kind: ast::LiteralKind::Verbatim, - c: '-', - })); - if !self.bump_and_bump_space() { - return Err(self.error( - Span::new(start, start), - ast::ErrorKind::ClassUnclosed, - )); - } - } - // If `]` is the *first* char in a set, then interpret it as a literal - // `]`. That is, an empty class is impossible to write. - if union.items.is_empty() && self.char() == ']' { - union.push(ast::ClassSetItem::Literal(ast::Literal { - span: self.span_char(), - kind: ast::LiteralKind::Verbatim, - c: ']', - })); - if !self.bump_and_bump_space() { - return Err(self.error( - Span::new(start, self.pos()), - ast::ErrorKind::ClassUnclosed, - )); - } - } - let set = ast::ClassBracketed { - span: Span::new(start, self.pos()), - negated, - kind: ast::ClassSet::union(ast::ClassSetUnion { - span: Span::new(union.span.start, union.span.start), - items: vec![], - }), - }; - Ok((set, union)) - } - - /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`. - /// - /// This assumes the parser is positioned at the opening `[`. - /// - /// If no valid ASCII character class could be found, then this does not - /// advance the parser and `None` is returned. Otherwise, the parser is - /// advanced to the first byte following the closing `]` and the - /// corresponding ASCII class is returned. - #[inline(never)] - fn maybe_parse_ascii_class(&self) -> Option { - // ASCII character classes are interesting from a parsing perspective - // because parsing cannot fail with any interesting error. For example, - // in order to use an ASCII character class, it must be enclosed in - // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think - // of it as "ASCII character classes have the syntax `[:NAME:]` which - // can only appear within character brackets." This means that things - // like `[[:lower:]A]` are legal constructs. - // - // However, if one types an incorrect ASCII character class, e.g., - // `[[:loower:]]`, then we treat that as a normal nested character - // class containing the characters `:elorw`. One might argue that we - // should return an error instead since the repeated colons give away - // the intent to write an ASCII class. But what if the user typed - // `[[:lower]]` instead? How can we tell that was intended to be an - // ASCII class and not just a normal nested class? - // - // Reasonable people can probably disagree over this, but for better - // or worse, we implement semantics that never fails at the expense - // of better failure modes. - assert_eq!(self.char(), '['); - // If parsing fails, then we back up the parser to this starting point. - let start = self.pos(); - let mut negated = false; - if !self.bump() || self.char() != ':' { - self.parser().pos.set(start); - return None; - } - if !self.bump() { - self.parser().pos.set(start); - return None; - } - if self.char() == '^' { - negated = true; - if !self.bump() { - self.parser().pos.set(start); - return None; - } - } - let name_start = self.offset(); - while self.char() != ':' && self.bump() {} - if self.is_eof() { - self.parser().pos.set(start); - return None; - } - let name = &self.pattern()[name_start..self.offset()]; - if !self.bump_if(":]") { - self.parser().pos.set(start); - return None; - } - let kind = match ast::ClassAsciiKind::from_name(name) { - Some(kind) => kind, - None => { - self.parser().pos.set(start); - return None; - } - }; - Some(ast::ClassAscii { - span: Span::new(start, self.pos()), - kind, - negated, - }) - } - - /// Parse a Unicode class in either the single character notation, `\pN` - /// or the multi-character bracketed notation, `\p{Greek}`. This assumes - /// the parser is positioned at the `p` (or `P` for negation) and will - /// advance the parser to the character immediately following the class. - /// - /// Note that this does not check whether the class name is valid or not. - #[inline(never)] - fn parse_unicode_class(&self) -> Result { - assert!(self.char() == 'p' || self.char() == 'P'); - - let mut scratch = self.parser().scratch.borrow_mut(); - scratch.clear(); - - let negated = self.char() == 'P'; - if !self.bump_and_bump_space() { - return Err( - self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof) - ); - } - let (start, kind) = if self.char() == '{' { - let start = self.span_char().end; - while self.bump_and_bump_space() && self.char() != '}' { - scratch.push(self.char()); - } - if self.is_eof() { - return Err(self - .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)); - } - assert_eq!(self.char(), '}'); - self.bump(); - - let name = scratch.as_str(); - if let Some(i) = name.find("!=") { - ( - start, - ast::ClassUnicodeKind::NamedValue { - op: ast::ClassUnicodeOpKind::NotEqual, - name: name[..i].to_string(), - value: name[i + 2..].to_string(), - }, - ) - } else if let Some(i) = name.find(':') { - ( - start, - ast::ClassUnicodeKind::NamedValue { - op: ast::ClassUnicodeOpKind::Colon, - name: name[..i].to_string(), - value: name[i + 1..].to_string(), - }, - ) - } else if let Some(i) = name.find('=') { - ( - start, - ast::ClassUnicodeKind::NamedValue { - op: ast::ClassUnicodeOpKind::Equal, - name: name[..i].to_string(), - value: name[i + 1..].to_string(), - }, - ) - } else { - (start, ast::ClassUnicodeKind::Named(name.to_string())) - } - } else { - let start = self.pos(); - let c = self.char(); - if c == '\\' { - return Err(self.error( - self.span_char(), - ast::ErrorKind::UnicodeClassInvalid, - )); - } - self.bump_and_bump_space(); - let kind = ast::ClassUnicodeKind::OneLetter(c); - (start, kind) - }; - Ok(ast::ClassUnicode { - span: Span::new(start, self.pos()), - negated, - kind, - }) - } - - /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the - /// parser is currently at a valid character class name and will be - /// advanced to the character immediately following the class. - #[inline(never)] - fn parse_perl_class(&self) -> ast::ClassPerl { - let c = self.char(); - let span = self.span_char(); - self.bump(); - let (negated, kind) = match c { - 'd' => (false, ast::ClassPerlKind::Digit), - 'D' => (true, ast::ClassPerlKind::Digit), - 's' => (false, ast::ClassPerlKind::Space), - 'S' => (true, ast::ClassPerlKind::Space), - 'w' => (false, ast::ClassPerlKind::Word), - 'W' => (true, ast::ClassPerlKind::Word), - c => panic!("expected valid Perl class but got '{}'", c), - }; - ast::ClassPerl { span, kind, negated } - } -} - -/// A type that traverses a fully parsed Ast and checks whether its depth -/// exceeds the specified nesting limit. If it does, then an error is returned. -#[derive(Debug)] -struct NestLimiter<'p, 's, P> { - /// The parser that is checking the nest limit. - p: &'p ParserI<'s, P>, - /// The current depth while walking an Ast. - depth: u32, -} - -impl<'p, 's, P: Borrow> NestLimiter<'p, 's, P> { - fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> { - NestLimiter { p, depth: 0 } - } - - #[inline(never)] - fn check(self, ast: &Ast) -> Result<()> { - ast::visit(ast, self) - } - - fn increment_depth(&mut self, span: &Span) -> Result<()> { - let new = self.depth.checked_add(1).ok_or_else(|| { - self.p.error( - span.clone(), - ast::ErrorKind::NestLimitExceeded(u32::MAX), - ) - })?; - let limit = self.p.parser().nest_limit; - if new > limit { - return Err(self.p.error( - span.clone(), - ast::ErrorKind::NestLimitExceeded(limit), - )); - } - self.depth = new; - Ok(()) - } - - fn decrement_depth(&mut self) { - // Assuming the correctness of the visitor, this should never drop - // below 0. - self.depth = self.depth.checked_sub(1).unwrap(); - } -} - -impl<'p, 's, P: Borrow> ast::Visitor for NestLimiter<'p, 's, P> { - type Output = (); - type Err = ast::Error; - - fn finish(self) -> Result<()> { - Ok(()) - } - - fn visit_pre(&mut self, ast: &Ast) -> Result<()> { - let span = match *ast { - Ast::Empty(_) - | Ast::Flags(_) - | Ast::Literal(_) - | Ast::Dot(_) - | Ast::Assertion(_) - | Ast::ClassUnicode(_) - | Ast::ClassPerl(_) => { - // These are all base cases, so we don't increment depth. - return Ok(()); - } - Ast::ClassBracketed(ref x) => &x.span, - Ast::Repetition(ref x) => &x.span, - Ast::Group(ref x) => &x.span, - Ast::Alternation(ref x) => &x.span, - Ast::Concat(ref x) => &x.span, - }; - self.increment_depth(span) - } - - fn visit_post(&mut self, ast: &Ast) -> Result<()> { - match *ast { - Ast::Empty(_) - | Ast::Flags(_) - | Ast::Literal(_) - | Ast::Dot(_) - | Ast::Assertion(_) - | Ast::ClassUnicode(_) - | Ast::ClassPerl(_) => { - // These are all base cases, so we don't decrement depth. - Ok(()) - } - Ast::ClassBracketed(_) - | Ast::Repetition(_) - | Ast::Group(_) - | Ast::Alternation(_) - | Ast::Concat(_) => { - self.decrement_depth(); - Ok(()) - } - } - } - - fn visit_class_set_item_pre( - &mut self, - ast: &ast::ClassSetItem, - ) -> Result<()> { - let span = match *ast { - ast::ClassSetItem::Empty(_) - | ast::ClassSetItem::Literal(_) - | ast::ClassSetItem::Range(_) - | ast::ClassSetItem::Ascii(_) - | ast::ClassSetItem::Unicode(_) - | ast::ClassSetItem::Perl(_) => { - // These are all base cases, so we don't increment depth. - return Ok(()); - } - ast::ClassSetItem::Bracketed(ref x) => &x.span, - ast::ClassSetItem::Union(ref x) => &x.span, - }; - self.increment_depth(span) - } - - fn visit_class_set_item_post( - &mut self, - ast: &ast::ClassSetItem, - ) -> Result<()> { - match *ast { - ast::ClassSetItem::Empty(_) - | ast::ClassSetItem::Literal(_) - | ast::ClassSetItem::Range(_) - | ast::ClassSetItem::Ascii(_) - | ast::ClassSetItem::Unicode(_) - | ast::ClassSetItem::Perl(_) => { - // These are all base cases, so we don't decrement depth. - Ok(()) - } - ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => { - self.decrement_depth(); - Ok(()) - } - } - } - - fn visit_class_set_binary_op_pre( - &mut self, - ast: &ast::ClassSetBinaryOp, - ) -> Result<()> { - self.increment_depth(&ast.span) - } - - fn visit_class_set_binary_op_post( - &mut self, - _ast: &ast::ClassSetBinaryOp, - ) -> Result<()> { - self.decrement_depth(); - Ok(()) - } -} - -/// When the result is an error, transforms the ast::ErrorKind from the source -/// Result into another one. This function is used to return clearer error -/// messages when possible. -fn specialize_err( - result: Result, - from: ast::ErrorKind, - to: ast::ErrorKind, -) -> Result { - if let Err(e) = result { - if e.kind == from { - Err(ast::Error { kind: to, pattern: e.pattern, span: e.span }) - } else { - Err(e) - } - } else { - result - } -} - -#[cfg(test)] -mod tests { - use core::ops::Range; - - use alloc::format; - - use super::*; - - // Our own assert_eq, which has slightly better formatting (but honestly - // still kind of crappy). - macro_rules! assert_eq { - ($left:expr, $right:expr) => {{ - match (&$left, &$right) { - (left_val, right_val) => { - if !(*left_val == *right_val) { - panic!( - "assertion failed: `(left == right)`\n\n\ - left: `{:?}`\nright: `{:?}`\n\n", - left_val, right_val - ) - } - } - } - }}; - } - - // We create these errors to compare with real ast::Errors in the tests. - // We define equality between TestError and ast::Error to disregard the - // pattern string in ast::Error, which is annoying to provide in tests. - #[derive(Clone, Debug)] - struct TestError { - span: Span, - kind: ast::ErrorKind, - } - - impl PartialEq for TestError { - fn eq(&self, other: &ast::Error) -> bool { - self.span == other.span && self.kind == other.kind - } - } - - impl PartialEq for ast::Error { - fn eq(&self, other: &TestError) -> bool { - self.span == other.span && self.kind == other.kind - } - } - - fn s(str: &str) -> String { - str.to_string() - } - - fn parser(pattern: &str) -> ParserI<'_, Parser> { - ParserI::new(Parser::new(), pattern) - } - - fn parser_octal(pattern: &str) -> ParserI<'_, Parser> { - let parser = ParserBuilder::new().octal(true).build(); - ParserI::new(parser, pattern) - } - - fn parser_empty_min_range(pattern: &str) -> ParserI<'_, Parser> { - let parser = ParserBuilder::new().empty_min_range(true).build(); - ParserI::new(parser, pattern) - } - - fn parser_nest_limit( - pattern: &str, - nest_limit: u32, - ) -> ParserI<'_, Parser> { - let p = ParserBuilder::new().nest_limit(nest_limit).build(); - ParserI::new(p, pattern) - } - - fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> { - let p = ParserBuilder::new().ignore_whitespace(true).build(); - ParserI::new(p, pattern) - } - - /// Short alias for creating a new span. - fn nspan(start: Position, end: Position) -> Span { - Span::new(start, end) - } - - /// Short alias for creating a new position. - fn npos(offset: usize, line: usize, column: usize) -> Position { - Position::new(offset, line, column) - } - - /// Create a new span from the given offset range. This assumes a single - /// line and sets the columns based on the offsets. i.e., This only works - /// out of the box for ASCII, which is fine for most tests. - fn span(range: Range) -> Span { - let start = Position::new(range.start, 1, range.start + 1); - let end = Position::new(range.end, 1, range.end + 1); - Span::new(start, end) - } - - /// Create a new span for the corresponding byte range in the given string. - fn span_range(subject: &str, range: Range) -> Span { - let start = Position { - offset: range.start, - line: 1 + subject[..range.start].matches('\n').count(), - column: 1 + subject[..range.start] - .chars() - .rev() - .position(|c| c == '\n') - .unwrap_or(subject[..range.start].chars().count()), - }; - let end = Position { - offset: range.end, - line: 1 + subject[..range.end].matches('\n').count(), - column: 1 + subject[..range.end] - .chars() - .rev() - .position(|c| c == '\n') - .unwrap_or(subject[..range.end].chars().count()), - }; - Span::new(start, end) - } - - /// Create a verbatim literal starting at the given position. - fn lit(c: char, start: usize) -> Ast { - lit_with(c, span(start..start + c.len_utf8())) - } - - /// Create a meta literal starting at the given position. - fn meta_lit(c: char, span: Span) -> Ast { - Ast::literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c }) - } - - /// Create a verbatim literal with the given span. - fn lit_with(c: char, span: Span) -> Ast { - Ast::literal(ast::Literal { - span, - kind: ast::LiteralKind::Verbatim, - c, - }) - } - - /// Create a concatenation with the given range. - fn concat(range: Range, asts: Vec) -> Ast { - concat_with(span(range), asts) - } - - /// Create a concatenation with the given span. - fn concat_with(span: Span, asts: Vec) -> Ast { - Ast::concat(ast::Concat { span, asts }) - } - - /// Create an alternation with the given span. - fn alt(range: Range, asts: Vec) -> Ast { - Ast::alternation(ast::Alternation { span: span(range), asts }) - } - - /// Create a capturing group with the given span. - fn group(range: Range, index: u32, ast: Ast) -> Ast { - Ast::group(ast::Group { - span: span(range), - kind: ast::GroupKind::CaptureIndex(index), - ast: Box::new(ast), - }) - } - - /// Create an ast::SetFlags. - /// - /// The given pattern should be the full pattern string. The range given - /// should correspond to the byte offsets where the flag set occurs. - /// - /// If negated is true, then the set is interpreted as beginning with a - /// negation. - fn flag_set( - pat: &str, - range: Range, - flag: ast::Flag, - negated: bool, - ) -> Ast { - let mut items = vec![ast::FlagsItem { - span: span_range(pat, (range.end - 2)..(range.end - 1)), - kind: ast::FlagsItemKind::Flag(flag), - }]; - if negated { - items.insert( - 0, - ast::FlagsItem { - span: span_range(pat, (range.start + 2)..(range.end - 2)), - kind: ast::FlagsItemKind::Negation, - }, - ); - } - Ast::flags(ast::SetFlags { - span: span_range(pat, range.clone()), - flags: ast::Flags { - span: span_range(pat, (range.start + 2)..(range.end - 1)), - items, - }, - }) - } - - #[test] - fn parse_nest_limit() { - // A nest limit of 0 still allows some types of regexes. - assert_eq!( - parser_nest_limit("", 0).parse(), - Ok(Ast::empty(span(0..0))) - ); - assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0))); - - // Test repetition operations, which require one level of nesting. - assert_eq!( - parser_nest_limit("a+", 0).parse().unwrap_err(), - TestError { - span: span(0..2), - kind: ast::ErrorKind::NestLimitExceeded(0), - } - ); - assert_eq!( - parser_nest_limit("a+", 1).parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..2), - op: ast::RepetitionOp { - span: span(1..2), - kind: ast::RepetitionKind::OneOrMore, - }, - greedy: true, - ast: Box::new(lit('a', 0)), - })) - ); - assert_eq!( - parser_nest_limit("(a)+", 1).parse().unwrap_err(), - TestError { - span: span(0..3), - kind: ast::ErrorKind::NestLimitExceeded(1), - } - ); - assert_eq!( - parser_nest_limit("a+*", 1).parse().unwrap_err(), - TestError { - span: span(0..2), - kind: ast::ErrorKind::NestLimitExceeded(1), - } - ); - assert_eq!( - parser_nest_limit("a+*", 2).parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..3), - op: ast::RepetitionOp { - span: span(2..3), - kind: ast::RepetitionKind::ZeroOrMore, - }, - greedy: true, - ast: Box::new(Ast::repetition(ast::Repetition { - span: span(0..2), - op: ast::RepetitionOp { - span: span(1..2), - kind: ast::RepetitionKind::OneOrMore, - }, - greedy: true, - ast: Box::new(lit('a', 0)), - })), - })) - ); - - // Test concatenations. A concatenation requires one level of nesting. - assert_eq!( - parser_nest_limit("ab", 0).parse().unwrap_err(), - TestError { - span: span(0..2), - kind: ast::ErrorKind::NestLimitExceeded(0), - } - ); - assert_eq!( - parser_nest_limit("ab", 1).parse(), - Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)])) - ); - assert_eq!( - parser_nest_limit("abc", 1).parse(), - Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)])) - ); - - // Test alternations. An alternation requires one level of nesting. - assert_eq!( - parser_nest_limit("a|b", 0).parse().unwrap_err(), - TestError { - span: span(0..3), - kind: ast::ErrorKind::NestLimitExceeded(0), - } - ); - assert_eq!( - parser_nest_limit("a|b", 1).parse(), - Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)])) - ); - assert_eq!( - parser_nest_limit("a|b|c", 1).parse(), - Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)])) - ); - - // Test character classes. Classes form their own mini-recursive - // syntax! - assert_eq!( - parser_nest_limit("[a]", 0).parse().unwrap_err(), - TestError { - span: span(0..3), - kind: ast::ErrorKind::NestLimitExceeded(0), - } - ); - assert_eq!( - parser_nest_limit("[a]", 1).parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..3), - negated: false, - kind: ast::ClassSet::Item(ast::ClassSetItem::Literal( - ast::Literal { - span: span(1..2), - kind: ast::LiteralKind::Verbatim, - c: 'a', - } - )), - })) - ); - assert_eq!( - parser_nest_limit("[ab]", 1).parse().unwrap_err(), - TestError { - span: span(1..3), - kind: ast::ErrorKind::NestLimitExceeded(1), - } - ); - assert_eq!( - parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(), - TestError { - span: span(3..7), - kind: ast::ErrorKind::NestLimitExceeded(2), - } - ); - assert_eq!( - parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(), - TestError { - span: span(4..6), - kind: ast::ErrorKind::NestLimitExceeded(3), - } - ); - assert_eq!( - parser_nest_limit("[a--b]", 1).parse().unwrap_err(), - TestError { - span: span(1..5), - kind: ast::ErrorKind::NestLimitExceeded(1), - } - ); - assert_eq!( - parser_nest_limit("[a--bc]", 2).parse().unwrap_err(), - TestError { - span: span(4..6), - kind: ast::ErrorKind::NestLimitExceeded(2), - } - ); - } - - #[test] - fn parse_comments() { - let pat = "(?x) -# This is comment 1. -foo # This is comment 2. - # This is comment 3. -bar -# This is comment 4."; - let astc = parser(pat).parse_with_comments().unwrap(); - assert_eq!( - astc.ast, - concat_with( - span_range(pat, 0..pat.len()), - vec![ - flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - lit_with('f', span_range(pat, 26..27)), - lit_with('o', span_range(pat, 27..28)), - lit_with('o', span_range(pat, 28..29)), - lit_with('b', span_range(pat, 74..75)), - lit_with('a', span_range(pat, 75..76)), - lit_with('r', span_range(pat, 76..77)), - ] - ) - ); - assert_eq!( - astc.comments, - vec![ - ast::Comment { - span: span_range(pat, 5..26), - comment: s(" This is comment 1."), - }, - ast::Comment { - span: span_range(pat, 30..51), - comment: s(" This is comment 2."), - }, - ast::Comment { - span: span_range(pat, 53..74), - comment: s(" This is comment 3."), - }, - ast::Comment { - span: span_range(pat, 78..98), - comment: s(" This is comment 4."), - }, - ] - ); - } - - #[test] - fn parse_holistic() { - assert_eq!(parser("]").parse(), Ok(lit(']', 0))); - assert_eq!( - parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(), - Ok(concat( - 0..36, - vec![ - meta_lit('\\', span(0..2)), - meta_lit('.', span(2..4)), - meta_lit('+', span(4..6)), - meta_lit('*', span(6..8)), - meta_lit('?', span(8..10)), - meta_lit('(', span(10..12)), - meta_lit(')', span(12..14)), - meta_lit('|', span(14..16)), - meta_lit('[', span(16..18)), - meta_lit(']', span(18..20)), - meta_lit('{', span(20..22)), - meta_lit('}', span(22..24)), - meta_lit('^', span(24..26)), - meta_lit('$', span(26..28)), - meta_lit('#', span(28..30)), - meta_lit('&', span(30..32)), - meta_lit('-', span(32..34)), - meta_lit('~', span(34..36)), - ] - )) - ); - } - - #[test] - fn parse_ignore_whitespace() { - // Test that basic whitespace insensitivity works. - let pat = "(?x)a b"; - assert_eq!( - parser(pat).parse(), - Ok(concat_with( - nspan(npos(0, 1, 1), npos(7, 1, 8)), - vec![ - flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))), - lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))), - ] - )) - ); - - // Test that we can toggle whitespace insensitivity. - let pat = "(?x)a b(?-x)a b"; - assert_eq!( - parser(pat).parse(), - Ok(concat_with( - nspan(npos(0, 1, 1), npos(15, 1, 16)), - vec![ - flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))), - lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))), - flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true), - lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))), - lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))), - lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))), - ] - )) - ); - - // Test that nesting whitespace insensitive flags works. - let pat = "a (?x:a )a "; - assert_eq!( - parser(pat).parse(), - Ok(concat_with( - span_range(pat, 0..11), - vec![ - lit_with('a', span_range(pat, 0..1)), - lit_with(' ', span_range(pat, 1..2)), - Ast::group(ast::Group { - span: span_range(pat, 2..9), - kind: ast::GroupKind::NonCapturing(ast::Flags { - span: span_range(pat, 4..5), - items: vec![ast::FlagsItem { - span: span_range(pat, 4..5), - kind: ast::FlagsItemKind::Flag( - ast::Flag::IgnoreWhitespace - ), - },], - }), - ast: Box::new(lit_with('a', span_range(pat, 6..7))), - }), - lit_with('a', span_range(pat, 9..10)), - lit_with(' ', span_range(pat, 10..11)), - ] - )) - ); - - // Test that whitespace after an opening paren is insignificant. - let pat = "(?x)( ?P a )"; - assert_eq!( - parser(pat).parse(), - Ok(concat_with( - span_range(pat, 0..pat.len()), - vec![ - flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::group(ast::Group { - span: span_range(pat, 4..pat.len()), - kind: ast::GroupKind::CaptureName { - starts_with_p: true, - name: ast::CaptureName { - span: span_range(pat, 9..12), - name: s("foo"), - index: 1, - } - }, - ast: Box::new(lit_with('a', span_range(pat, 14..15))), - }), - ] - )) - ); - let pat = "(?x)( a )"; - assert_eq!( - parser(pat).parse(), - Ok(concat_with( - span_range(pat, 0..pat.len()), - vec![ - flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::group(ast::Group { - span: span_range(pat, 4..pat.len()), - kind: ast::GroupKind::CaptureIndex(1), - ast: Box::new(lit_with('a', span_range(pat, 7..8))), - }), - ] - )) - ); - let pat = "(?x)( ?: a )"; - assert_eq!( - parser(pat).parse(), - Ok(concat_with( - span_range(pat, 0..pat.len()), - vec![ - flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::group(ast::Group { - span: span_range(pat, 4..pat.len()), - kind: ast::GroupKind::NonCapturing(ast::Flags { - span: span_range(pat, 8..8), - items: vec![], - }), - ast: Box::new(lit_with('a', span_range(pat, 11..12))), - }), - ] - )) - ); - let pat = r"(?x)\x { 53 }"; - assert_eq!( - parser(pat).parse(), - Ok(concat_with( - span_range(pat, 0..pat.len()), - vec![ - flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::literal(ast::Literal { - span: span(4..13), - kind: ast::LiteralKind::HexBrace( - ast::HexLiteralKind::X - ), - c: 'S', - }), - ] - )) - ); - - // Test that whitespace after an escape is OK. - let pat = r"(?x)\ "; - assert_eq!( - parser(pat).parse(), - Ok(concat_with( - span_range(pat, 0..pat.len()), - vec![ - flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::literal(ast::Literal { - span: span_range(pat, 4..6), - kind: ast::LiteralKind::Superfluous, - c: ' ', - }), - ] - )) - ); - } - - #[test] - fn parse_newlines() { - let pat = ".\n."; - assert_eq!( - parser(pat).parse(), - Ok(concat_with( - span_range(pat, 0..3), - vec![ - Ast::dot(span_range(pat, 0..1)), - lit_with('\n', span_range(pat, 1..2)), - Ast::dot(span_range(pat, 2..3)), - ] - )) - ); - - let pat = "foobar\nbaz\nquux\n"; - assert_eq!( - parser(pat).parse(), - Ok(concat_with( - span_range(pat, 0..pat.len()), - vec![ - lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))), - lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))), - lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))), - lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))), - lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))), - lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))), - lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))), - lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))), - lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))), - lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))), - lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))), - lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))), - lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))), - lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))), - lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))), - lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))), - ] - )) - ); - } - - #[test] - fn parse_uncounted_repetition() { - assert_eq!( - parser(r"a*").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..2), - op: ast::RepetitionOp { - span: span(1..2), - kind: ast::RepetitionKind::ZeroOrMore, - }, - greedy: true, - ast: Box::new(lit('a', 0)), - })) - ); - assert_eq!( - parser(r"a+").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..2), - op: ast::RepetitionOp { - span: span(1..2), - kind: ast::RepetitionKind::OneOrMore, - }, - greedy: true, - ast: Box::new(lit('a', 0)), - })) - ); - - assert_eq!( - parser(r"a?").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..2), - op: ast::RepetitionOp { - span: span(1..2), - kind: ast::RepetitionKind::ZeroOrOne, - }, - greedy: true, - ast: Box::new(lit('a', 0)), - })) - ); - assert_eq!( - parser(r"a??").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..3), - op: ast::RepetitionOp { - span: span(1..3), - kind: ast::RepetitionKind::ZeroOrOne, - }, - greedy: false, - ast: Box::new(lit('a', 0)), - })) - ); - assert_eq!( - parser(r"a?").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..2), - op: ast::RepetitionOp { - span: span(1..2), - kind: ast::RepetitionKind::ZeroOrOne, - }, - greedy: true, - ast: Box::new(lit('a', 0)), - })) - ); - assert_eq!( - parser(r"a?b").parse(), - Ok(concat( - 0..3, - vec![ - Ast::repetition(ast::Repetition { - span: span(0..2), - op: ast::RepetitionOp { - span: span(1..2), - kind: ast::RepetitionKind::ZeroOrOne, - }, - greedy: true, - ast: Box::new(lit('a', 0)), - }), - lit('b', 2), - ] - )) - ); - assert_eq!( - parser(r"a??b").parse(), - Ok(concat( - 0..4, - vec![ - Ast::repetition(ast::Repetition { - span: span(0..3), - op: ast::RepetitionOp { - span: span(1..3), - kind: ast::RepetitionKind::ZeroOrOne, - }, - greedy: false, - ast: Box::new(lit('a', 0)), - }), - lit('b', 3), - ] - )) - ); - assert_eq!( - parser(r"ab?").parse(), - Ok(concat( - 0..3, - vec![ - lit('a', 0), - Ast::repetition(ast::Repetition { - span: span(1..3), - op: ast::RepetitionOp { - span: span(2..3), - kind: ast::RepetitionKind::ZeroOrOne, - }, - greedy: true, - ast: Box::new(lit('b', 1)), - }), - ] - )) - ); - assert_eq!( - parser(r"(ab)?").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..5), - op: ast::RepetitionOp { - span: span(4..5), - kind: ast::RepetitionKind::ZeroOrOne, - }, - greedy: true, - ast: Box::new(group( - 0..4, - 1, - concat(1..3, vec![lit('a', 1), lit('b', 2),]) - )), - })) - ); - assert_eq!( - parser(r"|a?").parse(), - Ok(alt( - 0..3, - vec![ - Ast::empty(span(0..0)), - Ast::repetition(ast::Repetition { - span: span(1..3), - op: ast::RepetitionOp { - span: span(2..3), - kind: ast::RepetitionKind::ZeroOrOne, - }, - greedy: true, - ast: Box::new(lit('a', 1)), - }), - ] - )) - ); - - assert_eq!( - parser(r"*").parse().unwrap_err(), - TestError { - span: span(0..0), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - assert_eq!( - parser(r"(?i)*").parse().unwrap_err(), - TestError { - span: span(4..4), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - assert_eq!( - parser(r"(*)").parse().unwrap_err(), - TestError { - span: span(1..1), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - assert_eq!( - parser(r"(?:?)").parse().unwrap_err(), - TestError { - span: span(3..3), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - assert_eq!( - parser(r"+").parse().unwrap_err(), - TestError { - span: span(0..0), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - assert_eq!( - parser(r"?").parse().unwrap_err(), - TestError { - span: span(0..0), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - assert_eq!( - parser(r"(?)").parse().unwrap_err(), - TestError { - span: span(1..1), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - assert_eq!( - parser(r"|*").parse().unwrap_err(), - TestError { - span: span(1..1), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - assert_eq!( - parser(r"|+").parse().unwrap_err(), - TestError { - span: span(1..1), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - assert_eq!( - parser(r"|?").parse().unwrap_err(), - TestError { - span: span(1..1), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - } - - #[test] - fn parse_counted_repetition() { - assert_eq!( - parser(r"a{5}").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..4), - op: ast::RepetitionOp { - span: span(1..4), - kind: ast::RepetitionKind::Range( - ast::RepetitionRange::Exactly(5) - ), - }, - greedy: true, - ast: Box::new(lit('a', 0)), - })) - ); - assert_eq!( - parser(r"a{5,}").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..5), - op: ast::RepetitionOp { - span: span(1..5), - kind: ast::RepetitionKind::Range( - ast::RepetitionRange::AtLeast(5) - ), - }, - greedy: true, - ast: Box::new(lit('a', 0)), - })) - ); - assert_eq!( - parser(r"a{5,9}").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..6), - op: ast::RepetitionOp { - span: span(1..6), - kind: ast::RepetitionKind::Range( - ast::RepetitionRange::Bounded(5, 9) - ), - }, - greedy: true, - ast: Box::new(lit('a', 0)), - })) - ); - assert_eq!( - parser(r"a{5}?").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..5), - op: ast::RepetitionOp { - span: span(1..5), - kind: ast::RepetitionKind::Range( - ast::RepetitionRange::Exactly(5) - ), - }, - greedy: false, - ast: Box::new(lit('a', 0)), - })) - ); - assert_eq!( - parser(r"ab{5}").parse(), - Ok(concat( - 0..5, - vec![ - lit('a', 0), - Ast::repetition(ast::Repetition { - span: span(1..5), - op: ast::RepetitionOp { - span: span(2..5), - kind: ast::RepetitionKind::Range( - ast::RepetitionRange::Exactly(5) - ), - }, - greedy: true, - ast: Box::new(lit('b', 1)), - }), - ] - )) - ); - assert_eq!( - parser(r"ab{5}c").parse(), - Ok(concat( - 0..6, - vec![ - lit('a', 0), - Ast::repetition(ast::Repetition { - span: span(1..5), - op: ast::RepetitionOp { - span: span(2..5), - kind: ast::RepetitionKind::Range( - ast::RepetitionRange::Exactly(5) - ), - }, - greedy: true, - ast: Box::new(lit('b', 1)), - }), - lit('c', 5), - ] - )) - ); - - assert_eq!( - parser(r"a{ 5 }").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..6), - op: ast::RepetitionOp { - span: span(1..6), - kind: ast::RepetitionKind::Range( - ast::RepetitionRange::Exactly(5) - ), - }, - greedy: true, - ast: Box::new(lit('a', 0)), - })) - ); - assert_eq!( - parser(r"a{ 5 , 9 }").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..10), - op: ast::RepetitionOp { - span: span(1..10), - kind: ast::RepetitionKind::Range( - ast::RepetitionRange::Bounded(5, 9) - ), - }, - greedy: true, - ast: Box::new(lit('a', 0)), - })) - ); - assert_eq!( - parser_empty_min_range(r"a{,9}").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..5), - op: ast::RepetitionOp { - span: span(1..5), - kind: ast::RepetitionKind::Range( - ast::RepetitionRange::Bounded(0, 9) - ), - }, - greedy: true, - ast: Box::new(lit('a', 0)), - })) - ); - assert_eq!( - parser_ignore_whitespace(r"a{5,9} ?").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..8), - op: ast::RepetitionOp { - span: span(1..8), - kind: ast::RepetitionKind::Range( - ast::RepetitionRange::Bounded(5, 9) - ), - }, - greedy: false, - ast: Box::new(lit('a', 0)), - })) - ); - assert_eq!( - parser(r"\b{5,9}").parse(), - Ok(Ast::repetition(ast::Repetition { - span: span(0..7), - op: ast::RepetitionOp { - span: span(2..7), - kind: ast::RepetitionKind::Range( - ast::RepetitionRange::Bounded(5, 9) - ), - }, - greedy: true, - ast: Box::new(Ast::assertion(ast::Assertion { - span: span(0..2), - kind: ast::AssertionKind::WordBoundary, - })), - })) - ); - - assert_eq!( - parser(r"(?i){0}").parse().unwrap_err(), - TestError { - span: span(4..4), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - assert_eq!( - parser(r"(?m){1,1}").parse().unwrap_err(), - TestError { - span: span(4..4), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - assert_eq!( - parser(r"a{]}").parse().unwrap_err(), - TestError { - span: span(2..2), - kind: ast::ErrorKind::RepetitionCountDecimalEmpty, - } - ); - assert_eq!( - parser(r"a{1,]}").parse().unwrap_err(), - TestError { - span: span(4..4), - kind: ast::ErrorKind::RepetitionCountDecimalEmpty, - } - ); - assert_eq!( - parser(r"a{").parse().unwrap_err(), - TestError { - span: span(1..2), - kind: ast::ErrorKind::RepetitionCountUnclosed, - } - ); - assert_eq!( - parser(r"a{}").parse().unwrap_err(), - TestError { - span: span(2..2), - kind: ast::ErrorKind::RepetitionCountDecimalEmpty, - } - ); - assert_eq!( - parser(r"a{a").parse().unwrap_err(), - TestError { - span: span(2..2), - kind: ast::ErrorKind::RepetitionCountDecimalEmpty, - } - ); - assert_eq!( - parser(r"a{9999999999}").parse().unwrap_err(), - TestError { - span: span(2..12), - kind: ast::ErrorKind::DecimalInvalid, - } - ); - assert_eq!( - parser(r"a{9").parse().unwrap_err(), - TestError { - span: span(1..3), - kind: ast::ErrorKind::RepetitionCountUnclosed, - } - ); - assert_eq!( - parser(r"a{9,a").parse().unwrap_err(), - TestError { - span: span(4..4), - kind: ast::ErrorKind::RepetitionCountDecimalEmpty, - } - ); - assert_eq!( - parser(r"a{9,9999999999}").parse().unwrap_err(), - TestError { - span: span(4..14), - kind: ast::ErrorKind::DecimalInvalid, - } - ); - assert_eq!( - parser(r"a{9,").parse().unwrap_err(), - TestError { - span: span(1..4), - kind: ast::ErrorKind::RepetitionCountUnclosed, - } - ); - assert_eq!( - parser(r"a{9,11").parse().unwrap_err(), - TestError { - span: span(1..6), - kind: ast::ErrorKind::RepetitionCountUnclosed, - } - ); - assert_eq!( - parser(r"a{2,1}").parse().unwrap_err(), - TestError { - span: span(1..6), - kind: ast::ErrorKind::RepetitionCountInvalid, - } - ); - assert_eq!( - parser(r"{5}").parse().unwrap_err(), - TestError { - span: span(0..0), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - assert_eq!( - parser(r"|{5}").parse().unwrap_err(), - TestError { - span: span(1..1), - kind: ast::ErrorKind::RepetitionMissing, - } - ); - } - - #[test] - fn parse_alternate() { - assert_eq!( - parser(r"a|b").parse(), - Ok(Ast::alternation(ast::Alternation { - span: span(0..3), - asts: vec![lit('a', 0), lit('b', 2)], - })) - ); - assert_eq!( - parser(r"(a|b)").parse(), - Ok(group( - 0..5, - 1, - Ast::alternation(ast::Alternation { - span: span(1..4), - asts: vec![lit('a', 1), lit('b', 3)], - }) - )) - ); - - assert_eq!( - parser(r"a|b|c").parse(), - Ok(Ast::alternation(ast::Alternation { - span: span(0..5), - asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)], - })) - ); - assert_eq!( - parser(r"ax|by|cz").parse(), - Ok(Ast::alternation(ast::Alternation { - span: span(0..8), - asts: vec![ - concat(0..2, vec![lit('a', 0), lit('x', 1)]), - concat(3..5, vec![lit('b', 3), lit('y', 4)]), - concat(6..8, vec![lit('c', 6), lit('z', 7)]), - ], - })) - ); - assert_eq!( - parser(r"(ax|by|cz)").parse(), - Ok(group( - 0..10, - 1, - Ast::alternation(ast::Alternation { - span: span(1..9), - asts: vec![ - concat(1..3, vec![lit('a', 1), lit('x', 2)]), - concat(4..6, vec![lit('b', 4), lit('y', 5)]), - concat(7..9, vec![lit('c', 7), lit('z', 8)]), - ], - }) - )) - ); - assert_eq!( - parser(r"(ax|(by|(cz)))").parse(), - Ok(group( - 0..14, - 1, - alt( - 1..13, - vec![ - concat(1..3, vec![lit('a', 1), lit('x', 2)]), - group( - 4..13, - 2, - alt( - 5..12, - vec![ - concat( - 5..7, - vec![lit('b', 5), lit('y', 6)] - ), - group( - 8..12, - 3, - concat( - 9..11, - vec![lit('c', 9), lit('z', 10),] - ) - ), - ] - ) - ), - ] - ) - )) - ); - - assert_eq!( - parser(r"|").parse(), - Ok(alt( - 0..1, - vec![Ast::empty(span(0..0)), Ast::empty(span(1..1)),] - )) - ); - assert_eq!( - parser(r"||").parse(), - Ok(alt( - 0..2, - vec![ - Ast::empty(span(0..0)), - Ast::empty(span(1..1)), - Ast::empty(span(2..2)), - ] - )) - ); - assert_eq!( - parser(r"a|").parse(), - Ok(alt(0..2, vec![lit('a', 0), Ast::empty(span(2..2)),])) - ); - assert_eq!( - parser(r"|a").parse(), - Ok(alt(0..2, vec![Ast::empty(span(0..0)), lit('a', 1),])) - ); - - assert_eq!( - parser(r"(|)").parse(), - Ok(group( - 0..3, - 1, - alt( - 1..2, - vec![Ast::empty(span(1..1)), Ast::empty(span(2..2)),] - ) - )) - ); - assert_eq!( - parser(r"(a|)").parse(), - Ok(group( - 0..4, - 1, - alt(1..3, vec![lit('a', 1), Ast::empty(span(3..3)),]) - )) - ); - assert_eq!( - parser(r"(|a)").parse(), - Ok(group( - 0..4, - 1, - alt(1..3, vec![Ast::empty(span(1..1)), lit('a', 2),]) - )) - ); - - assert_eq!( - parser(r"a|b)").parse().unwrap_err(), - TestError { - span: span(3..4), - kind: ast::ErrorKind::GroupUnopened, - } - ); - assert_eq!( - parser(r"(a|b").parse().unwrap_err(), - TestError { - span: span(0..1), - kind: ast::ErrorKind::GroupUnclosed, - } - ); - } - - #[test] - fn parse_unsupported_lookaround() { - assert_eq!( - parser(r"(?=a)").parse().unwrap_err(), - TestError { - span: span(0..3), - kind: ast::ErrorKind::UnsupportedLookAround, - } - ); - assert_eq!( - parser(r"(?!a)").parse().unwrap_err(), - TestError { - span: span(0..3), - kind: ast::ErrorKind::UnsupportedLookAround, - } - ); - assert_eq!( - parser(r"(?<=a)").parse().unwrap_err(), - TestError { - span: span(0..4), - kind: ast::ErrorKind::UnsupportedLookAround, - } - ); - assert_eq!( - parser(r"(?z)").parse(), - Ok(Ast::group(ast::Group { - span: span(0..7), - kind: ast::GroupKind::CaptureName { - starts_with_p: false, - name: ast::CaptureName { - span: span(3..4), - name: s("a"), - index: 1, - } - }, - ast: Box::new(lit('z', 5)), - })) - ); - assert_eq!( - parser("(?Pz)").parse(), - Ok(Ast::group(ast::Group { - span: span(0..8), - kind: ast::GroupKind::CaptureName { - starts_with_p: true, - name: ast::CaptureName { - span: span(4..5), - name: s("a"), - index: 1, - } - }, - ast: Box::new(lit('z', 6)), - })) - ); - assert_eq!( - parser("(?Pz)").parse(), - Ok(Ast::group(ast::Group { - span: span(0..10), - kind: ast::GroupKind::CaptureName { - starts_with_p: true, - name: ast::CaptureName { - span: span(4..7), - name: s("abc"), - index: 1, - } - }, - ast: Box::new(lit('z', 8)), - })) - ); - - assert_eq!( - parser("(?Pz)").parse(), - Ok(Ast::group(ast::Group { - span: span(0..10), - kind: ast::GroupKind::CaptureName { - starts_with_p: true, - name: ast::CaptureName { - span: span(4..7), - name: s("a_1"), - index: 1, - } - }, - ast: Box::new(lit('z', 8)), - })) - ); - - assert_eq!( - parser("(?Pz)").parse(), - Ok(Ast::group(ast::Group { - span: span(0..10), - kind: ast::GroupKind::CaptureName { - starts_with_p: true, - name: ast::CaptureName { - span: span(4..7), - name: s("a.1"), - index: 1, - } - }, - ast: Box::new(lit('z', 8)), - })) - ); - - assert_eq!( - parser("(?Pz)").parse(), - Ok(Ast::group(ast::Group { - span: span(0..11), - kind: ast::GroupKind::CaptureName { - starts_with_p: true, - name: ast::CaptureName { - span: span(4..8), - name: s("a[1]"), - index: 1, - } - }, - ast: Box::new(lit('z', 9)), - })) - ); - - assert_eq!( - parser("(?P)").parse(), - Ok(Ast::group(ast::Group { - span: Span::new( - Position::new(0, 1, 1), - Position::new(9, 1, 9), - ), - kind: ast::GroupKind::CaptureName { - starts_with_p: true, - name: ast::CaptureName { - span: Span::new( - Position::new(4, 1, 5), - Position::new(7, 1, 7), - ), - name: s("a¾"), - index: 1, - } - }, - ast: Box::new(Ast::empty(Span::new( - Position::new(8, 1, 8), - Position::new(8, 1, 8), - ))), - })) - ); - assert_eq!( - parser("(?P<名字>)").parse(), - Ok(Ast::group(ast::Group { - span: Span::new( - Position::new(0, 1, 1), - Position::new(12, 1, 9), - ), - kind: ast::GroupKind::CaptureName { - starts_with_p: true, - name: ast::CaptureName { - span: Span::new( - Position::new(4, 1, 5), - Position::new(10, 1, 7), - ), - name: s("名字"), - index: 1, - } - }, - ast: Box::new(Ast::empty(Span::new( - Position::new(11, 1, 8), - Position::new(11, 1, 8), - ))), - })) - ); - - assert_eq!( - parser("(?P<").parse().unwrap_err(), - TestError { - span: span(4..4), - kind: ast::ErrorKind::GroupNameUnexpectedEof, - } - ); - assert_eq!( - parser("(?P<>z)").parse().unwrap_err(), - TestError { - span: span(4..4), - kind: ast::ErrorKind::GroupNameEmpty, - } - ); - assert_eq!( - parser("(?Py)(?Pz)").parse().unwrap_err(), - TestError { - span: span(12..13), - kind: ast::ErrorKind::GroupNameDuplicate { - original: span(4..5), - }, - } - ); - assert_eq!( - parser("(?P<5>)").parse().unwrap_err(), - TestError { - span: span(4..5), - kind: ast::ErrorKind::GroupNameInvalid, - } - ); - assert_eq!( - parser("(?P<5a>)").parse().unwrap_err(), - TestError { - span: span(4..5), - kind: ast::ErrorKind::GroupNameInvalid, - } - ); - assert_eq!( - parser("(?P<¾>)").parse().unwrap_err(), - TestError { - span: Span::new( - Position::new(4, 1, 5), - Position::new(6, 1, 6), - ), - kind: ast::ErrorKind::GroupNameInvalid, - } - ); - assert_eq!( - parser("(?P<¾a>)").parse().unwrap_err(), - TestError { - span: Span::new( - Position::new(4, 1, 5), - Position::new(6, 1, 6), - ), - kind: ast::ErrorKind::GroupNameInvalid, - } - ); - assert_eq!( - parser("(?P<☃>)").parse().unwrap_err(), - TestError { - span: Span::new( - Position::new(4, 1, 5), - Position::new(7, 1, 6), - ), - kind: ast::ErrorKind::GroupNameInvalid, - } - ); - assert_eq!( - parser("(?P)").parse().unwrap_err(), - TestError { - span: Span::new( - Position::new(5, 1, 6), - Position::new(8, 1, 7), - ), - kind: ast::ErrorKind::GroupNameInvalid, - } - ); - } - - #[test] - fn parse_flags() { - assert_eq!( - parser("i:").parse_flags(), - Ok(ast::Flags { - span: span(0..1), - items: vec![ast::FlagsItem { - span: span(0..1), - kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive), - }], - }) - ); - assert_eq!( - parser("i)").parse_flags(), - Ok(ast::Flags { - span: span(0..1), - items: vec![ast::FlagsItem { - span: span(0..1), - kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive), - }], - }) - ); - - assert_eq!( - parser("isU:").parse_flags(), - Ok(ast::Flags { - span: span(0..3), - items: vec![ - ast::FlagsItem { - span: span(0..1), - kind: ast::FlagsItemKind::Flag( - ast::Flag::CaseInsensitive - ), - }, - ast::FlagsItem { - span: span(1..2), - kind: ast::FlagsItemKind::Flag( - ast::Flag::DotMatchesNewLine - ), - }, - ast::FlagsItem { - span: span(2..3), - kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed), - }, - ], - }) - ); - - assert_eq!( - parser("-isU:").parse_flags(), - Ok(ast::Flags { - span: span(0..4), - items: vec![ - ast::FlagsItem { - span: span(0..1), - kind: ast::FlagsItemKind::Negation, - }, - ast::FlagsItem { - span: span(1..2), - kind: ast::FlagsItemKind::Flag( - ast::Flag::CaseInsensitive - ), - }, - ast::FlagsItem { - span: span(2..3), - kind: ast::FlagsItemKind::Flag( - ast::Flag::DotMatchesNewLine - ), - }, - ast::FlagsItem { - span: span(3..4), - kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed), - }, - ], - }) - ); - assert_eq!( - parser("i-sU:").parse_flags(), - Ok(ast::Flags { - span: span(0..4), - items: vec![ - ast::FlagsItem { - span: span(0..1), - kind: ast::FlagsItemKind::Flag( - ast::Flag::CaseInsensitive - ), - }, - ast::FlagsItem { - span: span(1..2), - kind: ast::FlagsItemKind::Negation, - }, - ast::FlagsItem { - span: span(2..3), - kind: ast::FlagsItemKind::Flag( - ast::Flag::DotMatchesNewLine - ), - }, - ast::FlagsItem { - span: span(3..4), - kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed), - }, - ], - }) - ); - assert_eq!( - parser("i-sR:").parse_flags(), - Ok(ast::Flags { - span: span(0..4), - items: vec![ - ast::FlagsItem { - span: span(0..1), - kind: ast::FlagsItemKind::Flag( - ast::Flag::CaseInsensitive - ), - }, - ast::FlagsItem { - span: span(1..2), - kind: ast::FlagsItemKind::Negation, - }, - ast::FlagsItem { - span: span(2..3), - kind: ast::FlagsItemKind::Flag( - ast::Flag::DotMatchesNewLine - ), - }, - ast::FlagsItem { - span: span(3..4), - kind: ast::FlagsItemKind::Flag(ast::Flag::CRLF), - }, - ], - }) - ); - - assert_eq!( - parser("isU").parse_flags().unwrap_err(), - TestError { - span: span(3..3), - kind: ast::ErrorKind::FlagUnexpectedEof, - } - ); - assert_eq!( - parser("isUa:").parse_flags().unwrap_err(), - TestError { - span: span(3..4), - kind: ast::ErrorKind::FlagUnrecognized, - } - ); - assert_eq!( - parser("isUi:").parse_flags().unwrap_err(), - TestError { - span: span(3..4), - kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) }, - } - ); - assert_eq!( - parser("i-sU-i:").parse_flags().unwrap_err(), - TestError { - span: span(4..5), - kind: ast::ErrorKind::FlagRepeatedNegation { - original: span(1..2), - }, - } - ); - assert_eq!( - parser("-)").parse_flags().unwrap_err(), - TestError { - span: span(0..1), - kind: ast::ErrorKind::FlagDanglingNegation, - } - ); - assert_eq!( - parser("i-)").parse_flags().unwrap_err(), - TestError { - span: span(1..2), - kind: ast::ErrorKind::FlagDanglingNegation, - } - ); - assert_eq!( - parser("iU-)").parse_flags().unwrap_err(), - TestError { - span: span(2..3), - kind: ast::ErrorKind::FlagDanglingNegation, - } - ); - } - - #[test] - fn parse_flag() { - assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive)); - assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine)); - assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine)); - assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed)); - assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode)); - assert_eq!(parser("R").parse_flag(), Ok(ast::Flag::CRLF)); - assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace)); - - assert_eq!( - parser("a").parse_flag().unwrap_err(), - TestError { - span: span(0..1), - kind: ast::ErrorKind::FlagUnrecognized, - } - ); - assert_eq!( - parser("☃").parse_flag().unwrap_err(), - TestError { - span: span_range("☃", 0..3), - kind: ast::ErrorKind::FlagUnrecognized, - } - ); - } - - #[test] - fn parse_primitive_non_escape() { - assert_eq!( - parser(r".").parse_primitive(), - Ok(Primitive::Dot(span(0..1))) - ); - assert_eq!( - parser(r"^").parse_primitive(), - Ok(Primitive::Assertion(ast::Assertion { - span: span(0..1), - kind: ast::AssertionKind::StartLine, - })) - ); - assert_eq!( - parser(r"$").parse_primitive(), - Ok(Primitive::Assertion(ast::Assertion { - span: span(0..1), - kind: ast::AssertionKind::EndLine, - })) - ); - - assert_eq!( - parser(r"a").parse_primitive(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..1), - kind: ast::LiteralKind::Verbatim, - c: 'a', - })) - ); - assert_eq!( - parser(r"|").parse_primitive(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..1), - kind: ast::LiteralKind::Verbatim, - c: '|', - })) - ); - assert_eq!( - parser(r"☃").parse_primitive(), - Ok(Primitive::Literal(ast::Literal { - span: span_range("☃", 0..3), - kind: ast::LiteralKind::Verbatim, - c: '☃', - })) - ); - } - - #[test] - fn parse_escape() { - assert_eq!( - parser(r"\|").parse_primitive(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..2), - kind: ast::LiteralKind::Meta, - c: '|', - })) - ); - let specials = &[ - (r"\a", '\x07', ast::SpecialLiteralKind::Bell), - (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed), - (r"\t", '\t', ast::SpecialLiteralKind::Tab), - (r"\n", '\n', ast::SpecialLiteralKind::LineFeed), - (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn), - (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab), - ]; - for &(pat, c, ref kind) in specials { - assert_eq!( - parser(pat).parse_primitive(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..2), - kind: ast::LiteralKind::Special(kind.clone()), - c, - })) - ); - } - assert_eq!( - parser(r"\A").parse_primitive(), - Ok(Primitive::Assertion(ast::Assertion { - span: span(0..2), - kind: ast::AssertionKind::StartText, - })) - ); - assert_eq!( - parser(r"\z").parse_primitive(), - Ok(Primitive::Assertion(ast::Assertion { - span: span(0..2), - kind: ast::AssertionKind::EndText, - })) - ); - assert_eq!( - parser(r"\b").parse_primitive(), - Ok(Primitive::Assertion(ast::Assertion { - span: span(0..2), - kind: ast::AssertionKind::WordBoundary, - })) - ); - assert_eq!( - parser(r"\b{start}").parse_primitive(), - Ok(Primitive::Assertion(ast::Assertion { - span: span(0..9), - kind: ast::AssertionKind::WordBoundaryStart, - })) - ); - assert_eq!( - parser(r"\b{end}").parse_primitive(), - Ok(Primitive::Assertion(ast::Assertion { - span: span(0..7), - kind: ast::AssertionKind::WordBoundaryEnd, - })) - ); - assert_eq!( - parser(r"\b{start-half}").parse_primitive(), - Ok(Primitive::Assertion(ast::Assertion { - span: span(0..14), - kind: ast::AssertionKind::WordBoundaryStartHalf, - })) - ); - assert_eq!( - parser(r"\b{end-half}").parse_primitive(), - Ok(Primitive::Assertion(ast::Assertion { - span: span(0..12), - kind: ast::AssertionKind::WordBoundaryEndHalf, - })) - ); - assert_eq!( - parser(r"\<").parse_primitive(), - Ok(Primitive::Assertion(ast::Assertion { - span: span(0..2), - kind: ast::AssertionKind::WordBoundaryStartAngle, - })) - ); - assert_eq!( - parser(r"\>").parse_primitive(), - Ok(Primitive::Assertion(ast::Assertion { - span: span(0..2), - kind: ast::AssertionKind::WordBoundaryEndAngle, - })) - ); - assert_eq!( - parser(r"\B").parse_primitive(), - Ok(Primitive::Assertion(ast::Assertion { - span: span(0..2), - kind: ast::AssertionKind::NotWordBoundary, - })) - ); - - // We also support superfluous escapes in most cases now too. - for c in ['!', '@', '%', '"', '\'', '/', ' '] { - let pat = format!(r"\{}", c); - assert_eq!( - parser(&pat).parse_primitive(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..2), - kind: ast::LiteralKind::Superfluous, - c, - })) - ); - } - - // Some superfluous escapes, namely [0-9A-Za-z], are still banned. This - // gives flexibility for future evolution. - assert_eq!( - parser(r"\e").parse_escape().unwrap_err(), - TestError { - span: span(0..2), - kind: ast::ErrorKind::EscapeUnrecognized, - } - ); - assert_eq!( - parser(r"\y").parse_escape().unwrap_err(), - TestError { - span: span(0..2), - kind: ast::ErrorKind::EscapeUnrecognized, - } - ); - - // Starting a special word boundary without any non-whitespace chars - // after the brace makes it ambiguous whether the user meant to write - // a counted repetition (probably not?) or an actual special word - // boundary assertion. - assert_eq!( - parser(r"\b{").parse_escape().unwrap_err(), - TestError { - span: span(0..3), - kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof, - } - ); - assert_eq!( - parser_ignore_whitespace(r"\b{ ").parse_escape().unwrap_err(), - TestError { - span: span(0..4), - kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof, - } - ); - // When 'x' is not enabled, the space is seen as a non-[-A-Za-z] char, - // and thus causes the parser to treat it as a counted repetition. - assert_eq!( - parser(r"\b{ ").parse().unwrap_err(), - TestError { - span: span(2..4), - kind: ast::ErrorKind::RepetitionCountUnclosed, - } - ); - // In this case, we got some valid chars that makes it look like the - // user is writing one of the special word boundary assertions, but - // we forget to close the brace. - assert_eq!( - parser(r"\b{foo").parse_escape().unwrap_err(), - TestError { - span: span(2..6), - kind: ast::ErrorKind::SpecialWordBoundaryUnclosed, - } - ); - // We get the same error as above, except it is provoked by seeing a - // char that we know is invalid before seeing a closing brace. - assert_eq!( - parser(r"\b{foo!}").parse_escape().unwrap_err(), - TestError { - span: span(2..6), - kind: ast::ErrorKind::SpecialWordBoundaryUnclosed, - } - ); - // And this one occurs when, syntactically, everything looks okay, but - // we don't use a valid spelling of a word boundary assertion. - assert_eq!( - parser(r"\b{foo}").parse_escape().unwrap_err(), - TestError { - span: span(3..6), - kind: ast::ErrorKind::SpecialWordBoundaryUnrecognized, - } - ); - - // An unfinished escape is illegal. - assert_eq!( - parser(r"\").parse_escape().unwrap_err(), - TestError { - span: span(0..1), - kind: ast::ErrorKind::EscapeUnexpectedEof, - } - ); - } - - #[test] - fn parse_unsupported_backreference() { - assert_eq!( - parser(r"\0").parse_escape().unwrap_err(), - TestError { - span: span(0..2), - kind: ast::ErrorKind::UnsupportedBackreference, - } - ); - assert_eq!( - parser(r"\9").parse_escape().unwrap_err(), - TestError { - span: span(0..2), - kind: ast::ErrorKind::UnsupportedBackreference, - } - ); - } - - #[test] - fn parse_octal() { - for i in 0..511 { - let pat = format!(r"\{:o}", i); - assert_eq!( - parser_octal(&pat).parse_escape(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..pat.len()), - kind: ast::LiteralKind::Octal, - c: char::from_u32(i).unwrap(), - })) - ); - } - assert_eq!( - parser_octal(r"\778").parse_escape(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..3), - kind: ast::LiteralKind::Octal, - c: '?', - })) - ); - assert_eq!( - parser_octal(r"\7777").parse_escape(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..4), - kind: ast::LiteralKind::Octal, - c: '\u{01FF}', - })) - ); - assert_eq!( - parser_octal(r"\778").parse(), - Ok(Ast::concat(ast::Concat { - span: span(0..4), - asts: vec![ - Ast::literal(ast::Literal { - span: span(0..3), - kind: ast::LiteralKind::Octal, - c: '?', - }), - Ast::literal(ast::Literal { - span: span(3..4), - kind: ast::LiteralKind::Verbatim, - c: '8', - }), - ], - })) - ); - assert_eq!( - parser_octal(r"\7777").parse(), - Ok(Ast::concat(ast::Concat { - span: span(0..5), - asts: vec![ - Ast::literal(ast::Literal { - span: span(0..4), - kind: ast::LiteralKind::Octal, - c: '\u{01FF}', - }), - Ast::literal(ast::Literal { - span: span(4..5), - kind: ast::LiteralKind::Verbatim, - c: '7', - }), - ], - })) - ); - - assert_eq!( - parser_octal(r"\8").parse_escape().unwrap_err(), - TestError { - span: span(0..2), - kind: ast::ErrorKind::EscapeUnrecognized, - } - ); - } - - #[test] - fn parse_hex_two() { - for i in 0..256 { - let pat = format!(r"\x{:02x}", i); - assert_eq!( - parser(&pat).parse_escape(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..pat.len()), - kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X), - c: char::from_u32(i).unwrap(), - })) - ); - } - - assert_eq!( - parser(r"\xF").parse_escape().unwrap_err(), - TestError { - span: span(3..3), - kind: ast::ErrorKind::EscapeUnexpectedEof, - } - ); - assert_eq!( - parser(r"\xG").parse_escape().unwrap_err(), - TestError { - span: span(2..3), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - assert_eq!( - parser(r"\xFG").parse_escape().unwrap_err(), - TestError { - span: span(3..4), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - } - - #[test] - fn parse_hex_four() { - for i in 0..65536 { - let c = match char::from_u32(i) { - None => continue, - Some(c) => c, - }; - let pat = format!(r"\u{:04x}", i); - assert_eq!( - parser(&pat).parse_escape(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..pat.len()), - kind: ast::LiteralKind::HexFixed( - ast::HexLiteralKind::UnicodeShort - ), - c, - })) - ); - } - - assert_eq!( - parser(r"\uF").parse_escape().unwrap_err(), - TestError { - span: span(3..3), - kind: ast::ErrorKind::EscapeUnexpectedEof, - } - ); - assert_eq!( - parser(r"\uG").parse_escape().unwrap_err(), - TestError { - span: span(2..3), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - assert_eq!( - parser(r"\uFG").parse_escape().unwrap_err(), - TestError { - span: span(3..4), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - assert_eq!( - parser(r"\uFFG").parse_escape().unwrap_err(), - TestError { - span: span(4..5), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - assert_eq!( - parser(r"\uFFFG").parse_escape().unwrap_err(), - TestError { - span: span(5..6), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - assert_eq!( - parser(r"\uD800").parse_escape().unwrap_err(), - TestError { - span: span(2..6), - kind: ast::ErrorKind::EscapeHexInvalid, - } - ); - } - - #[test] - fn parse_hex_eight() { - for i in 0..65536 { - let c = match char::from_u32(i) { - None => continue, - Some(c) => c, - }; - let pat = format!(r"\U{:08x}", i); - assert_eq!( - parser(&pat).parse_escape(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..pat.len()), - kind: ast::LiteralKind::HexFixed( - ast::HexLiteralKind::UnicodeLong - ), - c, - })) - ); - } - - assert_eq!( - parser(r"\UF").parse_escape().unwrap_err(), - TestError { - span: span(3..3), - kind: ast::ErrorKind::EscapeUnexpectedEof, - } - ); - assert_eq!( - parser(r"\UG").parse_escape().unwrap_err(), - TestError { - span: span(2..3), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - assert_eq!( - parser(r"\UFG").parse_escape().unwrap_err(), - TestError { - span: span(3..4), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - assert_eq!( - parser(r"\UFFG").parse_escape().unwrap_err(), - TestError { - span: span(4..5), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - assert_eq!( - parser(r"\UFFFG").parse_escape().unwrap_err(), - TestError { - span: span(5..6), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - assert_eq!( - parser(r"\UFFFFG").parse_escape().unwrap_err(), - TestError { - span: span(6..7), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - assert_eq!( - parser(r"\UFFFFFG").parse_escape().unwrap_err(), - TestError { - span: span(7..8), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - assert_eq!( - parser(r"\UFFFFFFG").parse_escape().unwrap_err(), - TestError { - span: span(8..9), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - assert_eq!( - parser(r"\UFFFFFFFG").parse_escape().unwrap_err(), - TestError { - span: span(9..10), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - } - - #[test] - fn parse_hex_brace() { - assert_eq!( - parser(r"\u{26c4}").parse_escape(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..8), - kind: ast::LiteralKind::HexBrace( - ast::HexLiteralKind::UnicodeShort - ), - c: '⛄', - })) - ); - assert_eq!( - parser(r"\U{26c4}").parse_escape(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..8), - kind: ast::LiteralKind::HexBrace( - ast::HexLiteralKind::UnicodeLong - ), - c: '⛄', - })) - ); - assert_eq!( - parser(r"\x{26c4}").parse_escape(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..8), - kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X), - c: '⛄', - })) - ); - assert_eq!( - parser(r"\x{26C4}").parse_escape(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..8), - kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X), - c: '⛄', - })) - ); - assert_eq!( - parser(r"\x{10fFfF}").parse_escape(), - Ok(Primitive::Literal(ast::Literal { - span: span(0..10), - kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X), - c: '\u{10FFFF}', - })) - ); - - assert_eq!( - parser(r"\x").parse_escape().unwrap_err(), - TestError { - span: span(2..2), - kind: ast::ErrorKind::EscapeUnexpectedEof, - } - ); - assert_eq!( - parser(r"\x{").parse_escape().unwrap_err(), - TestError { - span: span(2..3), - kind: ast::ErrorKind::EscapeUnexpectedEof, - } - ); - assert_eq!( - parser(r"\x{FF").parse_escape().unwrap_err(), - TestError { - span: span(2..5), - kind: ast::ErrorKind::EscapeUnexpectedEof, - } - ); - assert_eq!( - parser(r"\x{}").parse_escape().unwrap_err(), - TestError { - span: span(2..4), - kind: ast::ErrorKind::EscapeHexEmpty, - } - ); - assert_eq!( - parser(r"\x{FGF}").parse_escape().unwrap_err(), - TestError { - span: span(4..5), - kind: ast::ErrorKind::EscapeHexInvalidDigit, - } - ); - assert_eq!( - parser(r"\x{FFFFFF}").parse_escape().unwrap_err(), - TestError { - span: span(3..9), - kind: ast::ErrorKind::EscapeHexInvalid, - } - ); - assert_eq!( - parser(r"\x{D800}").parse_escape().unwrap_err(), - TestError { - span: span(3..7), - kind: ast::ErrorKind::EscapeHexInvalid, - } - ); - assert_eq!( - parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(), - TestError { - span: span(3..12), - kind: ast::ErrorKind::EscapeHexInvalid, - } - ); - } - - #[test] - fn parse_decimal() { - assert_eq!(parser("123").parse_decimal(), Ok(123)); - assert_eq!(parser("0").parse_decimal(), Ok(0)); - assert_eq!(parser("01").parse_decimal(), Ok(1)); - - assert_eq!( - parser("-1").parse_decimal().unwrap_err(), - TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty } - ); - assert_eq!( - parser("").parse_decimal().unwrap_err(), - TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty } - ); - assert_eq!( - parser("9999999999").parse_decimal().unwrap_err(), - TestError { - span: span(0..10), - kind: ast::ErrorKind::DecimalInvalid, - } - ); - } - - #[test] - fn parse_set_class() { - fn union(span: Span, items: Vec) -> ast::ClassSet { - ast::ClassSet::union(ast::ClassSetUnion { span, items }) - } - - fn intersection( - span: Span, - lhs: ast::ClassSet, - rhs: ast::ClassSet, - ) -> ast::ClassSet { - ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { - span, - kind: ast::ClassSetBinaryOpKind::Intersection, - lhs: Box::new(lhs), - rhs: Box::new(rhs), - }) - } - - fn difference( - span: Span, - lhs: ast::ClassSet, - rhs: ast::ClassSet, - ) -> ast::ClassSet { - ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { - span, - kind: ast::ClassSetBinaryOpKind::Difference, - lhs: Box::new(lhs), - rhs: Box::new(rhs), - }) - } - - fn symdifference( - span: Span, - lhs: ast::ClassSet, - rhs: ast::ClassSet, - ) -> ast::ClassSet { - ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp { - span, - kind: ast::ClassSetBinaryOpKind::SymmetricDifference, - lhs: Box::new(lhs), - rhs: Box::new(rhs), - }) - } - - fn itemset(item: ast::ClassSetItem) -> ast::ClassSet { - ast::ClassSet::Item(item) - } - - fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem { - ast::ClassSetItem::Ascii(cls) - } - - fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem { - ast::ClassSetItem::Unicode(cls) - } - - fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem { - ast::ClassSetItem::Perl(cls) - } - - fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem { - ast::ClassSetItem::Bracketed(Box::new(cls)) - } - - fn lit(span: Span, c: char) -> ast::ClassSetItem { - ast::ClassSetItem::Literal(ast::Literal { - span, - kind: ast::LiteralKind::Verbatim, - c, - }) - } - - fn empty(span: Span) -> ast::ClassSetItem { - ast::ClassSetItem::Empty(span) - } - - fn range(span: Span, start: char, end: char) -> ast::ClassSetItem { - let pos1 = Position { - offset: span.start.offset + start.len_utf8(), - column: span.start.column + 1, - ..span.start - }; - let pos2 = Position { - offset: span.end.offset - end.len_utf8(), - column: span.end.column - 1, - ..span.end - }; - ast::ClassSetItem::Range(ast::ClassSetRange { - span, - start: ast::Literal { - span: Span { end: pos1, ..span }, - kind: ast::LiteralKind::Verbatim, - c: start, - }, - end: ast::Literal { - span: Span { start: pos2, ..span }, - kind: ast::LiteralKind::Verbatim, - c: end, - }, - }) - } - - fn alnum(span: Span, negated: bool) -> ast::ClassAscii { - ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated } - } - - fn lower(span: Span, negated: bool) -> ast::ClassAscii { - ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated } - } - - assert_eq!( - parser("[[:alnum:]]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..11), - negated: false, - kind: itemset(item_ascii(alnum(span(1..10), false))), - })) - ); - assert_eq!( - parser("[[[:alnum:]]]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..13), - negated: false, - kind: itemset(item_bracket(ast::ClassBracketed { - span: span(1..12), - negated: false, - kind: itemset(item_ascii(alnum(span(2..11), false))), - })), - })) - ); - assert_eq!( - parser("[[:alnum:]&&[:lower:]]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..22), - negated: false, - kind: intersection( - span(1..21), - itemset(item_ascii(alnum(span(1..10), false))), - itemset(item_ascii(lower(span(12..21), false))), - ), - })) - ); - assert_eq!( - parser("[[:alnum:]--[:lower:]]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..22), - negated: false, - kind: difference( - span(1..21), - itemset(item_ascii(alnum(span(1..10), false))), - itemset(item_ascii(lower(span(12..21), false))), - ), - })) - ); - assert_eq!( - parser("[[:alnum:]~~[:lower:]]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..22), - negated: false, - kind: symdifference( - span(1..21), - itemset(item_ascii(alnum(span(1..10), false))), - itemset(item_ascii(lower(span(12..21), false))), - ), - })) - ); - - assert_eq!( - parser("[a]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..3), - negated: false, - kind: itemset(lit(span(1..2), 'a')), - })) - ); - assert_eq!( - parser(r"[a\]]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..5), - negated: false, - kind: union( - span(1..4), - vec![ - lit(span(1..2), 'a'), - ast::ClassSetItem::Literal(ast::Literal { - span: span(2..4), - kind: ast::LiteralKind::Meta, - c: ']', - }), - ] - ), - })) - ); - assert_eq!( - parser(r"[a\-z]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..6), - negated: false, - kind: union( - span(1..5), - vec![ - lit(span(1..2), 'a'), - ast::ClassSetItem::Literal(ast::Literal { - span: span(2..4), - kind: ast::LiteralKind::Meta, - c: '-', - }), - lit(span(4..5), 'z'), - ] - ), - })) - ); - assert_eq!( - parser("[ab]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..4), - negated: false, - kind: union( - span(1..3), - vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),] - ), - })) - ); - assert_eq!( - parser("[a-]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..4), - negated: false, - kind: union( - span(1..3), - vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),] - ), - })) - ); - assert_eq!( - parser("[-a]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..4), - negated: false, - kind: union( - span(1..3), - vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),] - ), - })) - ); - assert_eq!( - parser(r"[\pL]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..5), - negated: false, - kind: itemset(item_unicode(ast::ClassUnicode { - span: span(1..4), - negated: false, - kind: ast::ClassUnicodeKind::OneLetter('L'), - })), - })) - ); - assert_eq!( - parser(r"[\w]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..4), - negated: false, - kind: itemset(item_perl(ast::ClassPerl { - span: span(1..3), - kind: ast::ClassPerlKind::Word, - negated: false, - })), - })) - ); - assert_eq!( - parser(r"[a\wz]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..6), - negated: false, - kind: union( - span(1..5), - vec![ - lit(span(1..2), 'a'), - item_perl(ast::ClassPerl { - span: span(2..4), - kind: ast::ClassPerlKind::Word, - negated: false, - }), - lit(span(4..5), 'z'), - ] - ), - })) - ); - - assert_eq!( - parser("[a-z]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..5), - negated: false, - kind: itemset(range(span(1..4), 'a', 'z')), - })) - ); - assert_eq!( - parser("[a-cx-z]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..8), - negated: false, - kind: union( - span(1..7), - vec![ - range(span(1..4), 'a', 'c'), - range(span(4..7), 'x', 'z'), - ] - ), - })) - ); - assert_eq!( - parser(r"[\w&&a-cx-z]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..12), - negated: false, - kind: intersection( - span(1..11), - itemset(item_perl(ast::ClassPerl { - span: span(1..3), - kind: ast::ClassPerlKind::Word, - negated: false, - })), - union( - span(5..11), - vec![ - range(span(5..8), 'a', 'c'), - range(span(8..11), 'x', 'z'), - ] - ), - ), - })) - ); - assert_eq!( - parser(r"[a-cx-z&&\w]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..12), - negated: false, - kind: intersection( - span(1..11), - union( - span(1..7), - vec![ - range(span(1..4), 'a', 'c'), - range(span(4..7), 'x', 'z'), - ] - ), - itemset(item_perl(ast::ClassPerl { - span: span(9..11), - kind: ast::ClassPerlKind::Word, - negated: false, - })), - ), - })) - ); - assert_eq!( - parser(r"[a--b--c]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..9), - negated: false, - kind: difference( - span(1..8), - difference( - span(1..5), - itemset(lit(span(1..2), 'a')), - itemset(lit(span(4..5), 'b')), - ), - itemset(lit(span(7..8), 'c')), - ), - })) - ); - assert_eq!( - parser(r"[a~~b~~c]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..9), - negated: false, - kind: symdifference( - span(1..8), - symdifference( - span(1..5), - itemset(lit(span(1..2), 'a')), - itemset(lit(span(4..5), 'b')), - ), - itemset(lit(span(7..8), 'c')), - ), - })) - ); - assert_eq!( - parser(r"[\^&&^]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..7), - negated: false, - kind: intersection( - span(1..6), - itemset(ast::ClassSetItem::Literal(ast::Literal { - span: span(1..3), - kind: ast::LiteralKind::Meta, - c: '^', - })), - itemset(lit(span(5..6), '^')), - ), - })) - ); - assert_eq!( - parser(r"[\&&&&]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..7), - negated: false, - kind: intersection( - span(1..6), - itemset(ast::ClassSetItem::Literal(ast::Literal { - span: span(1..3), - kind: ast::LiteralKind::Meta, - c: '&', - })), - itemset(lit(span(5..6), '&')), - ), - })) - ); - assert_eq!( - parser(r"[&&&&]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..6), - negated: false, - kind: intersection( - span(1..5), - intersection( - span(1..3), - itemset(empty(span(1..1))), - itemset(empty(span(3..3))), - ), - itemset(empty(span(5..5))), - ), - })) - ); - - let pat = "[☃-⛄]"; - assert_eq!( - parser(pat).parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span_range(pat, 0..9), - negated: false, - kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange { - span: span_range(pat, 1..8), - start: ast::Literal { - span: span_range(pat, 1..4), - kind: ast::LiteralKind::Verbatim, - c: '☃', - }, - end: ast::Literal { - span: span_range(pat, 5..8), - kind: ast::LiteralKind::Verbatim, - c: '⛄', - }, - })), - })) - ); - - assert_eq!( - parser(r"[]]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..3), - negated: false, - kind: itemset(lit(span(1..2), ']')), - })) - ); - assert_eq!( - parser(r"[]\[]").parse(), - Ok(Ast::class_bracketed(ast::ClassBracketed { - span: span(0..5), - negated: false, - kind: union( - span(1..4), - vec![ - lit(span(1..2), ']'), - ast::ClassSetItem::Literal(ast::Literal { - span: span(2..4), - kind: ast::LiteralKind::Meta, - c: '[', - }), - ] - ), - })) - ); - assert_eq!( - parser(r"[\[]]").parse(), - Ok(concat( - 0..5, - vec![ - Ast::class_bracketed(ast::ClassBracketed { - span: span(0..4), - negated: false, - kind: itemset(ast::ClassSetItem::Literal( - ast::Literal { - span: span(1..3), - kind: ast::LiteralKind::Meta, - c: '[', - } - )), - }), - Ast::literal(ast::Literal { - span: span(4..5), - kind: ast::LiteralKind::Verbatim, - c: ']', - }), - ] - )) - ); - - assert_eq!( - parser("[").parse().unwrap_err(), - TestError { - span: span(0..1), - kind: ast::ErrorKind::ClassUnclosed, - } - ); - assert_eq!( - parser("[[").parse().unwrap_err(), - TestError { - span: span(1..2), - kind: ast::ErrorKind::ClassUnclosed, - } - ); - assert_eq!( - parser("[[-]").parse().unwrap_err(), - TestError { - span: span(0..1), - kind: ast::ErrorKind::ClassUnclosed, - } - ); - assert_eq!( - parser("[[[:alnum:]").parse().unwrap_err(), - TestError { - span: span(1..2), - kind: ast::ErrorKind::ClassUnclosed, - } - ); - assert_eq!( - parser(r"[\b]").parse().unwrap_err(), - TestError { - span: span(1..3), - kind: ast::ErrorKind::ClassEscapeInvalid, - } - ); - assert_eq!( - parser(r"[\w-a]").parse().unwrap_err(), - TestError { - span: span(1..3), - kind: ast::ErrorKind::ClassRangeLiteral, - } - ); - assert_eq!( - parser(r"[a-\w]").parse().unwrap_err(), - TestError { - span: span(3..5), - kind: ast::ErrorKind::ClassRangeLiteral, - } - ); - assert_eq!( - parser(r"[z-a]").parse().unwrap_err(), - TestError { - span: span(1..4), - kind: ast::ErrorKind::ClassRangeInvalid, - } - ); - - assert_eq!( - parser_ignore_whitespace("[a ").parse().unwrap_err(), - TestError { - span: span(0..1), - kind: ast::ErrorKind::ClassUnclosed, - } - ); - assert_eq!( - parser_ignore_whitespace("[a- ").parse().unwrap_err(), - TestError { - span: span(0..1), - kind: ast::ErrorKind::ClassUnclosed, - } - ); - } - - #[test] - fn parse_set_class_open() { - assert_eq!(parser("[a]").parse_set_class_open(), { - let set = ast::ClassBracketed { - span: span(0..1), - negated: false, - kind: ast::ClassSet::union(ast::ClassSetUnion { - span: span(1..1), - items: vec![], - }), - }; - let union = ast::ClassSetUnion { span: span(1..1), items: vec![] }; - Ok((set, union)) - }); - assert_eq!( - parser_ignore_whitespace("[ a]").parse_set_class_open(), - { - let set = ast::ClassBracketed { - span: span(0..4), - negated: false, - kind: ast::ClassSet::union(ast::ClassSetUnion { - span: span(4..4), - items: vec![], - }), - }; - let union = - ast::ClassSetUnion { span: span(4..4), items: vec![] }; - Ok((set, union)) - } - ); - assert_eq!(parser("[^a]").parse_set_class_open(), { - let set = ast::ClassBracketed { - span: span(0..2), - negated: true, - kind: ast::ClassSet::union(ast::ClassSetUnion { - span: span(2..2), - items: vec![], - }), - }; - let union = ast::ClassSetUnion { span: span(2..2), items: vec![] }; - Ok((set, union)) - }); - assert_eq!( - parser_ignore_whitespace("[ ^ a]").parse_set_class_open(), - { - let set = ast::ClassBracketed { - span: span(0..4), - negated: true, - kind: ast::ClassSet::union(ast::ClassSetUnion { - span: span(4..4), - items: vec![], - }), - }; - let union = - ast::ClassSetUnion { span: span(4..4), items: vec![] }; - Ok((set, union)) - } - ); - assert_eq!(parser("[-a]").parse_set_class_open(), { - let set = ast::ClassBracketed { - span: span(0..2), - negated: false, - kind: ast::ClassSet::union(ast::ClassSetUnion { - span: span(1..1), - items: vec![], - }), - }; - let union = ast::ClassSetUnion { - span: span(1..2), - items: vec![ast::ClassSetItem::Literal(ast::Literal { - span: span(1..2), - kind: ast::LiteralKind::Verbatim, - c: '-', - })], - }; - Ok((set, union)) - }); - assert_eq!( - parser_ignore_whitespace("[ - a]").parse_set_class_open(), - { - let set = ast::ClassBracketed { - span: span(0..4), - negated: false, - kind: ast::ClassSet::union(ast::ClassSetUnion { - span: span(2..2), - items: vec![], - }), - }; - let union = ast::ClassSetUnion { - span: span(2..3), - items: vec![ast::ClassSetItem::Literal(ast::Literal { - span: span(2..3), - kind: ast::LiteralKind::Verbatim, - c: '-', - })], - }; - Ok((set, union)) - } - ); - assert_eq!(parser("[^-a]").parse_set_class_open(), { - let set = ast::ClassBracketed { - span: span(0..3), - negated: true, - kind: ast::ClassSet::union(ast::ClassSetUnion { - span: span(2..2), - items: vec![], - }), - }; - let union = ast::ClassSetUnion { - span: span(2..3), - items: vec![ast::ClassSetItem::Literal(ast::Literal { - span: span(2..3), - kind: ast::LiteralKind::Verbatim, - c: '-', - })], - }; - Ok((set, union)) - }); - assert_eq!(parser("[--a]").parse_set_class_open(), { - let set = ast::ClassBracketed { - span: span(0..3), - negated: false, - kind: ast::ClassSet::union(ast::ClassSetUnion { - span: span(1..1), - items: vec![], - }), - }; - let union = ast::ClassSetUnion { - span: span(1..3), - items: vec![ - ast::ClassSetItem::Literal(ast::Literal { - span: span(1..2), - kind: ast::LiteralKind::Verbatim, - c: '-', - }), - ast::ClassSetItem::Literal(ast::Literal { - span: span(2..3), - kind: ast::LiteralKind::Verbatim, - c: '-', - }), - ], - }; - Ok((set, union)) - }); - assert_eq!(parser("[]a]").parse_set_class_open(), { - let set = ast::ClassBracketed { - span: span(0..2), - negated: false, - kind: ast::ClassSet::union(ast::ClassSetUnion { - span: span(1..1), - items: vec![], - }), - }; - let union = ast::ClassSetUnion { - span: span(1..2), - items: vec![ast::ClassSetItem::Literal(ast::Literal { - span: span(1..2), - kind: ast::LiteralKind::Verbatim, - c: ']', - })], - }; - Ok((set, union)) - }); - assert_eq!( - parser_ignore_whitespace("[ ] a]").parse_set_class_open(), - { - let set = ast::ClassBracketed { - span: span(0..4), - negated: false, - kind: ast::ClassSet::union(ast::ClassSetUnion { - span: span(2..2), - items: vec![], - }), - }; - let union = ast::ClassSetUnion { - span: span(2..3), - items: vec![ast::ClassSetItem::Literal(ast::Literal { - span: span(2..3), - kind: ast::LiteralKind::Verbatim, - c: ']', - })], - }; - Ok((set, union)) - } - ); - assert_eq!(parser("[^]a]").parse_set_class_open(), { - let set = ast::ClassBracketed { - span: span(0..3), - negated: true, - kind: ast::ClassSet::union(ast::ClassSetUnion { - span: span(2..2), - items: vec![], - }), - }; - let union = ast::ClassSetUnion { - span: span(2..3), - items: vec![ast::ClassSetItem::Literal(ast::Literal { - span: span(2..3), - kind: ast::LiteralKind::Verbatim, - c: ']', - })], - }; - Ok((set, union)) - }); - assert_eq!(parser("[-]a]").parse_set_class_open(), { - let set = ast::ClassBracketed { - span: span(0..2), - negated: false, - kind: ast::ClassSet::union(ast::ClassSetUnion { - span: span(1..1), - items: vec![], - }), - }; - let union = ast::ClassSetUnion { - span: span(1..2), - items: vec![ast::ClassSetItem::Literal(ast::Literal { - span: span(1..2), - kind: ast::LiteralKind::Verbatim, - c: '-', - })], - }; - Ok((set, union)) - }); - - assert_eq!( - parser("[").parse_set_class_open().unwrap_err(), - TestError { - span: span(0..1), - kind: ast::ErrorKind::ClassUnclosed, - } - ); - assert_eq!( - parser_ignore_whitespace("[ ") - .parse_set_class_open() - .unwrap_err(), - TestError { - span: span(0..5), - kind: ast::ErrorKind::ClassUnclosed, - } - ); - assert_eq!( - parser("[^").parse_set_class_open().unwrap_err(), - TestError { - span: span(0..2), - kind: ast::ErrorKind::ClassUnclosed, - } - ); - assert_eq!( - parser("[]").parse_set_class_open().unwrap_err(), - TestError { - span: span(0..2), - kind: ast::ErrorKind::ClassUnclosed, - } - ); - assert_eq!( - parser("[-").parse_set_class_open().unwrap_err(), - TestError { - span: span(0..0), - kind: ast::ErrorKind::ClassUnclosed, - } - ); - assert_eq!( - parser("[--").parse_set_class_open().unwrap_err(), - TestError { - span: span(0..0), - kind: ast::ErrorKind::ClassUnclosed, - } - ); - - // See: https://github.com/rust-lang/regex/issues/792 - assert_eq!( - parser("(?x)[-#]").parse_with_comments().unwrap_err(), - TestError { - span: span(4..4), - kind: ast::ErrorKind::ClassUnclosed, - } - ); - } - - #[test] - fn maybe_parse_ascii_class() { - assert_eq!( - parser(r"[:alnum:]").maybe_parse_ascii_class(), - Some(ast::ClassAscii { - span: span(0..9), - kind: ast::ClassAsciiKind::Alnum, - negated: false, - }) - ); - assert_eq!( - parser(r"[:alnum:]A").maybe_parse_ascii_class(), - Some(ast::ClassAscii { - span: span(0..9), - kind: ast::ClassAsciiKind::Alnum, - negated: false, - }) - ); - assert_eq!( - parser(r"[:^alnum:]").maybe_parse_ascii_class(), - Some(ast::ClassAscii { - span: span(0..10), - kind: ast::ClassAsciiKind::Alnum, - negated: true, - }) - ); - - let p = parser(r"[:"); - assert_eq!(p.maybe_parse_ascii_class(), None); - assert_eq!(p.offset(), 0); - - let p = parser(r"[:^"); - assert_eq!(p.maybe_parse_ascii_class(), None); - assert_eq!(p.offset(), 0); - - let p = parser(r"[^:alnum:]"); - assert_eq!(p.maybe_parse_ascii_class(), None); - assert_eq!(p.offset(), 0); - - let p = parser(r"[:alnnum:]"); - assert_eq!(p.maybe_parse_ascii_class(), None); - assert_eq!(p.offset(), 0); - - let p = parser(r"[:alnum]"); - assert_eq!(p.maybe_parse_ascii_class(), None); - assert_eq!(p.offset(), 0); - - let p = parser(r"[:alnum:"); - assert_eq!(p.maybe_parse_ascii_class(), None); - assert_eq!(p.offset(), 0); - } - - #[test] - fn parse_unicode_class() { - assert_eq!( - parser(r"\pN").parse_escape(), - Ok(Primitive::Unicode(ast::ClassUnicode { - span: span(0..3), - negated: false, - kind: ast::ClassUnicodeKind::OneLetter('N'), - })) - ); - assert_eq!( - parser(r"\PN").parse_escape(), - Ok(Primitive::Unicode(ast::ClassUnicode { - span: span(0..3), - negated: true, - kind: ast::ClassUnicodeKind::OneLetter('N'), - })) - ); - assert_eq!( - parser(r"\p{N}").parse_escape(), - Ok(Primitive::Unicode(ast::ClassUnicode { - span: span(0..5), - negated: false, - kind: ast::ClassUnicodeKind::Named(s("N")), - })) - ); - assert_eq!( - parser(r"\P{N}").parse_escape(), - Ok(Primitive::Unicode(ast::ClassUnicode { - span: span(0..5), - negated: true, - kind: ast::ClassUnicodeKind::Named(s("N")), - })) - ); - assert_eq!( - parser(r"\p{Greek}").parse_escape(), - Ok(Primitive::Unicode(ast::ClassUnicode { - span: span(0..9), - negated: false, - kind: ast::ClassUnicodeKind::Named(s("Greek")), - })) - ); - - assert_eq!( - parser(r"\p{scx:Katakana}").parse_escape(), - Ok(Primitive::Unicode(ast::ClassUnicode { - span: span(0..16), - negated: false, - kind: ast::ClassUnicodeKind::NamedValue { - op: ast::ClassUnicodeOpKind::Colon, - name: s("scx"), - value: s("Katakana"), - }, - })) - ); - assert_eq!( - parser(r"\p{scx=Katakana}").parse_escape(), - Ok(Primitive::Unicode(ast::ClassUnicode { - span: span(0..16), - negated: false, - kind: ast::ClassUnicodeKind::NamedValue { - op: ast::ClassUnicodeOpKind::Equal, - name: s("scx"), - value: s("Katakana"), - }, - })) - ); - assert_eq!( - parser(r"\p{scx!=Katakana}").parse_escape(), - Ok(Primitive::Unicode(ast::ClassUnicode { - span: span(0..17), - negated: false, - kind: ast::ClassUnicodeKind::NamedValue { - op: ast::ClassUnicodeOpKind::NotEqual, - name: s("scx"), - value: s("Katakana"), - }, - })) - ); - - assert_eq!( - parser(r"\p{:}").parse_escape(), - Ok(Primitive::Unicode(ast::ClassUnicode { - span: span(0..5), - negated: false, - kind: ast::ClassUnicodeKind::NamedValue { - op: ast::ClassUnicodeOpKind::Colon, - name: s(""), - value: s(""), - }, - })) - ); - assert_eq!( - parser(r"\p{=}").parse_escape(), - Ok(Primitive::Unicode(ast::ClassUnicode { - span: span(0..5), - negated: false, - kind: ast::ClassUnicodeKind::NamedValue { - op: ast::ClassUnicodeOpKind::Equal, - name: s(""), - value: s(""), - }, - })) - ); - assert_eq!( - parser(r"\p{!=}").parse_escape(), - Ok(Primitive::Unicode(ast::ClassUnicode { - span: span(0..6), - negated: false, - kind: ast::ClassUnicodeKind::NamedValue { - op: ast::ClassUnicodeOpKind::NotEqual, - name: s(""), - value: s(""), - }, - })) - ); - - assert_eq!( - parser(r"\p").parse_escape().unwrap_err(), - TestError { - span: span(2..2), - kind: ast::ErrorKind::EscapeUnexpectedEof, - } - ); - assert_eq!( - parser(r"\p{").parse_escape().unwrap_err(), - TestError { - span: span(3..3), - kind: ast::ErrorKind::EscapeUnexpectedEof, - } - ); - assert_eq!( - parser(r"\p{N").parse_escape().unwrap_err(), - TestError { - span: span(4..4), - kind: ast::ErrorKind::EscapeUnexpectedEof, - } - ); - assert_eq!( - parser(r"\p{Greek").parse_escape().unwrap_err(), - TestError { - span: span(8..8), - kind: ast::ErrorKind::EscapeUnexpectedEof, - } - ); - - assert_eq!( - parser(r"\pNz").parse(), - Ok(Ast::concat(ast::Concat { - span: span(0..4), - asts: vec![ - Ast::class_unicode(ast::ClassUnicode { - span: span(0..3), - negated: false, - kind: ast::ClassUnicodeKind::OneLetter('N'), - }), - Ast::literal(ast::Literal { - span: span(3..4), - kind: ast::LiteralKind::Verbatim, - c: 'z', - }), - ], - })) - ); - assert_eq!( - parser(r"\p{Greek}z").parse(), - Ok(Ast::concat(ast::Concat { - span: span(0..10), - asts: vec![ - Ast::class_unicode(ast::ClassUnicode { - span: span(0..9), - negated: false, - kind: ast::ClassUnicodeKind::Named(s("Greek")), - }), - Ast::literal(ast::Literal { - span: span(9..10), - kind: ast::LiteralKind::Verbatim, - c: 'z', - }), - ], - })) - ); - assert_eq!( - parser(r"\p\{").parse().unwrap_err(), - TestError { - span: span(2..3), - kind: ast::ErrorKind::UnicodeClassInvalid, - } - ); - assert_eq!( - parser(r"\P\{").parse().unwrap_err(), - TestError { - span: span(2..3), - kind: ast::ErrorKind::UnicodeClassInvalid, - } - ); - } - - #[test] - fn parse_perl_class() { - assert_eq!( - parser(r"\d").parse_escape(), - Ok(Primitive::Perl(ast::ClassPerl { - span: span(0..2), - kind: ast::ClassPerlKind::Digit, - negated: false, - })) - ); - assert_eq!( - parser(r"\D").parse_escape(), - Ok(Primitive::Perl(ast::ClassPerl { - span: span(0..2), - kind: ast::ClassPerlKind::Digit, - negated: true, - })) - ); - assert_eq!( - parser(r"\s").parse_escape(), - Ok(Primitive::Perl(ast::ClassPerl { - span: span(0..2), - kind: ast::ClassPerlKind::Space, - negated: false, - })) - ); - assert_eq!( - parser(r"\S").parse_escape(), - Ok(Primitive::Perl(ast::ClassPerl { - span: span(0..2), - kind: ast::ClassPerlKind::Space, - negated: true, - })) - ); - assert_eq!( - parser(r"\w").parse_escape(), - Ok(Primitive::Perl(ast::ClassPerl { - span: span(0..2), - kind: ast::ClassPerlKind::Word, - negated: false, - })) - ); - assert_eq!( - parser(r"\W").parse_escape(), - Ok(Primitive::Perl(ast::ClassPerl { - span: span(0..2), - kind: ast::ClassPerlKind::Word, - negated: true, - })) - ); - - assert_eq!( - parser(r"\d").parse(), - Ok(Ast::class_perl(ast::ClassPerl { - span: span(0..2), - kind: ast::ClassPerlKind::Digit, - negated: false, - })) - ); - assert_eq!( - parser(r"\dz").parse(), - Ok(Ast::concat(ast::Concat { - span: span(0..3), - asts: vec![ - Ast::class_perl(ast::ClassPerl { - span: span(0..2), - kind: ast::ClassPerlKind::Digit, - negated: false, - }), - Ast::literal(ast::Literal { - span: span(2..3), - kind: ast::LiteralKind::Verbatim, - c: 'z', - }), - ], - })) - ); - } - - // This tests a bug fix where the nest limit checker wasn't decrementing - // its depth during post-traversal, which causes long regexes to trip - // the default limit too aggressively. - #[test] - fn regression_454_nest_too_big() { - let pattern = r#" - 2(?: - [45]\d{3}| - 7(?: - 1[0-267]| - 2[0-289]| - 3[0-29]| - 4[01]| - 5[1-3]| - 6[013]| - 7[0178]| - 91 - )| - 8(?: - 0[125]| - [139][1-6]| - 2[0157-9]| - 41| - 6[1-35]| - 7[1-5]| - 8[1-8]| - 90 - )| - 9(?: - 0[0-2]| - 1[0-4]| - 2[568]| - 3[3-6]| - 5[5-7]| - 6[0167]| - 7[15]| - 8[0146-9] - ) - )\d{4} - "#; - assert!(parser_nest_limit(pattern, 50).parse().is_ok()); - } - - // This tests that we treat a trailing `-` in a character class as a - // literal `-` even when whitespace mode is enabled and there is whitespace - // after the trailing `-`. - #[test] - fn regression_455_trailing_dash_ignore_whitespace() { - assert!(parser("(?x)[ / - ]").parse().is_ok()); - assert!(parser("(?x)[ a - ]").parse().is_ok()); - assert!(parser( - "(?x)[ - a - - ] - " - ) - .parse() - .is_ok()); - assert!(parser( - "(?x)[ - a # wat - - ] - " - ) - .parse() - .is_ok()); - - assert!(parser("(?x)[ / -").parse().is_err()); - assert!(parser("(?x)[ / - ").parse().is_err()); - assert!(parser( - "(?x)[ - / - - " - ) - .parse() - .is_err()); - assert!(parser( - "(?x)[ - / - # wat - " - ) - .parse() - .is_err()); - } -} diff --git a/vendor/regex-syntax/src/ast/print.rs b/vendor/regex-syntax/src/ast/print.rs deleted file mode 100644 index 1ceb3c7f..00000000 --- a/vendor/regex-syntax/src/ast/print.rs +++ /dev/null @@ -1,577 +0,0 @@ -/*! -This module provides a regular expression printer for `Ast`. -*/ - -use core::fmt; - -use crate::ast::{ - self, - visitor::{self, Visitor}, - Ast, -}; - -/// A builder for constructing a printer. -/// -/// Note that since a printer doesn't have any configuration knobs, this type -/// remains unexported. -#[derive(Clone, Debug)] -struct PrinterBuilder { - _priv: (), -} - -impl Default for PrinterBuilder { - fn default() -> PrinterBuilder { - PrinterBuilder::new() - } -} - -impl PrinterBuilder { - fn new() -> PrinterBuilder { - PrinterBuilder { _priv: () } - } - - fn build(&self) -> Printer { - Printer { _priv: () } - } -} - -/// A printer for a regular expression abstract syntax tree. -/// -/// A printer converts an abstract syntax tree (AST) to a regular expression -/// pattern string. This particular printer uses constant stack space and heap -/// space proportional to the size of the AST. -/// -/// This printer will not necessarily preserve the original formatting of the -/// regular expression pattern string. For example, all whitespace and comments -/// are ignored. -#[derive(Debug)] -pub struct Printer { - _priv: (), -} - -impl Printer { - /// Create a new printer. - pub fn new() -> Printer { - PrinterBuilder::new().build() - } - - /// Print the given `Ast` to the given writer. The writer must implement - /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used - /// here are a `fmt::Formatter` (which is available in `fmt::Display` - /// implementations) or a `&mut String`. - pub fn print(&mut self, ast: &Ast, wtr: W) -> fmt::Result { - visitor::visit(ast, Writer { wtr }) - } -} - -#[derive(Debug)] -struct Writer { - wtr: W, -} - -impl Visitor for Writer { - type Output = (); - type Err = fmt::Error; - - fn finish(self) -> fmt::Result { - Ok(()) - } - - fn visit_pre(&mut self, ast: &Ast) -> fmt::Result { - match *ast { - Ast::Group(ref x) => self.fmt_group_pre(x), - Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_pre(x), - _ => Ok(()), - } - } - - fn visit_post(&mut self, ast: &Ast) -> fmt::Result { - match *ast { - Ast::Empty(_) => Ok(()), - Ast::Flags(ref x) => self.fmt_set_flags(x), - Ast::Literal(ref x) => self.fmt_literal(x), - Ast::Dot(_) => self.wtr.write_str("."), - Ast::Assertion(ref x) => self.fmt_assertion(x), - Ast::ClassPerl(ref x) => self.fmt_class_perl(x), - Ast::ClassUnicode(ref x) => self.fmt_class_unicode(x), - Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_post(x), - Ast::Repetition(ref x) => self.fmt_repetition(x), - Ast::Group(ref x) => self.fmt_group_post(x), - Ast::Alternation(_) => Ok(()), - Ast::Concat(_) => Ok(()), - } - } - - fn visit_alternation_in(&mut self) -> fmt::Result { - self.wtr.write_str("|") - } - - fn visit_class_set_item_pre( - &mut self, - ast: &ast::ClassSetItem, - ) -> Result<(), Self::Err> { - match *ast { - ast::ClassSetItem::Bracketed(ref x) => { - self.fmt_class_bracketed_pre(x) - } - _ => Ok(()), - } - } - - fn visit_class_set_item_post( - &mut self, - ast: &ast::ClassSetItem, - ) -> Result<(), Self::Err> { - use crate::ast::ClassSetItem::*; - - match *ast { - Empty(_) => Ok(()), - Literal(ref x) => self.fmt_literal(x), - Range(ref x) => { - self.fmt_literal(&x.start)?; - self.wtr.write_str("-")?; - self.fmt_literal(&x.end)?; - Ok(()) - } - Ascii(ref x) => self.fmt_class_ascii(x), - Unicode(ref x) => self.fmt_class_unicode(x), - Perl(ref x) => self.fmt_class_perl(x), - Bracketed(ref x) => self.fmt_class_bracketed_post(x), - Union(_) => Ok(()), - } - } - - fn visit_class_set_binary_op_in( - &mut self, - ast: &ast::ClassSetBinaryOp, - ) -> Result<(), Self::Err> { - self.fmt_class_set_binary_op_kind(&ast.kind) - } -} - -impl Writer { - fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result { - use crate::ast::GroupKind::*; - match ast.kind { - CaptureIndex(_) => self.wtr.write_str("("), - CaptureName { ref name, starts_with_p } => { - let start = if starts_with_p { "(?P<" } else { "(?<" }; - self.wtr.write_str(start)?; - self.wtr.write_str(&name.name)?; - self.wtr.write_str(">")?; - Ok(()) - } - NonCapturing(ref flags) => { - self.wtr.write_str("(?")?; - self.fmt_flags(flags)?; - self.wtr.write_str(":")?; - Ok(()) - } - } - } - - fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result { - self.wtr.write_str(")") - } - - fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result { - use crate::ast::RepetitionKind::*; - match ast.op.kind { - ZeroOrOne if ast.greedy => self.wtr.write_str("?"), - ZeroOrOne => self.wtr.write_str("??"), - ZeroOrMore if ast.greedy => self.wtr.write_str("*"), - ZeroOrMore => self.wtr.write_str("*?"), - OneOrMore if ast.greedy => self.wtr.write_str("+"), - OneOrMore => self.wtr.write_str("+?"), - Range(ref x) => { - self.fmt_repetition_range(x)?; - if !ast.greedy { - self.wtr.write_str("?")?; - } - Ok(()) - } - } - } - - fn fmt_repetition_range( - &mut self, - ast: &ast::RepetitionRange, - ) -> fmt::Result { - use crate::ast::RepetitionRange::*; - match *ast { - Exactly(x) => write!(self.wtr, "{{{}}}", x), - AtLeast(x) => write!(self.wtr, "{{{},}}", x), - Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y), - } - } - - fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result { - use crate::ast::LiteralKind::*; - - match ast.kind { - Verbatim => self.wtr.write_char(ast.c), - Meta | Superfluous => write!(self.wtr, r"\{}", ast.c), - Octal => write!(self.wtr, r"\{:o}", u32::from(ast.c)), - HexFixed(ast::HexLiteralKind::X) => { - write!(self.wtr, r"\x{:02X}", u32::from(ast.c)) - } - HexFixed(ast::HexLiteralKind::UnicodeShort) => { - write!(self.wtr, r"\u{:04X}", u32::from(ast.c)) - } - HexFixed(ast::HexLiteralKind::UnicodeLong) => { - write!(self.wtr, r"\U{:08X}", u32::from(ast.c)) - } - HexBrace(ast::HexLiteralKind::X) => { - write!(self.wtr, r"\x{{{:X}}}", u32::from(ast.c)) - } - HexBrace(ast::HexLiteralKind::UnicodeShort) => { - write!(self.wtr, r"\u{{{:X}}}", u32::from(ast.c)) - } - HexBrace(ast::HexLiteralKind::UnicodeLong) => { - write!(self.wtr, r"\U{{{:X}}}", u32::from(ast.c)) - } - Special(ast::SpecialLiteralKind::Bell) => { - self.wtr.write_str(r"\a") - } - Special(ast::SpecialLiteralKind::FormFeed) => { - self.wtr.write_str(r"\f") - } - Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"), - Special(ast::SpecialLiteralKind::LineFeed) => { - self.wtr.write_str(r"\n") - } - Special(ast::SpecialLiteralKind::CarriageReturn) => { - self.wtr.write_str(r"\r") - } - Special(ast::SpecialLiteralKind::VerticalTab) => { - self.wtr.write_str(r"\v") - } - Special(ast::SpecialLiteralKind::Space) => { - self.wtr.write_str(r"\ ") - } - } - } - - fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result { - use crate::ast::AssertionKind::*; - match ast.kind { - StartLine => self.wtr.write_str("^"), - EndLine => self.wtr.write_str("$"), - StartText => self.wtr.write_str(r"\A"), - EndText => self.wtr.write_str(r"\z"), - WordBoundary => self.wtr.write_str(r"\b"), - NotWordBoundary => self.wtr.write_str(r"\B"), - WordBoundaryStart => self.wtr.write_str(r"\b{start}"), - WordBoundaryEnd => self.wtr.write_str(r"\b{end}"), - WordBoundaryStartAngle => self.wtr.write_str(r"\<"), - WordBoundaryEndAngle => self.wtr.write_str(r"\>"), - WordBoundaryStartHalf => self.wtr.write_str(r"\b{start-half}"), - WordBoundaryEndHalf => self.wtr.write_str(r"\b{end-half}"), - } - } - - fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result { - self.wtr.write_str("(?")?; - self.fmt_flags(&ast.flags)?; - self.wtr.write_str(")")?; - Ok(()) - } - - fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result { - use crate::ast::{Flag, FlagsItemKind}; - - for item in &ast.items { - match item.kind { - FlagsItemKind::Negation => self.wtr.write_str("-"), - FlagsItemKind::Flag(ref flag) => match *flag { - Flag::CaseInsensitive => self.wtr.write_str("i"), - Flag::MultiLine => self.wtr.write_str("m"), - Flag::DotMatchesNewLine => self.wtr.write_str("s"), - Flag::SwapGreed => self.wtr.write_str("U"), - Flag::Unicode => self.wtr.write_str("u"), - Flag::CRLF => self.wtr.write_str("R"), - Flag::IgnoreWhitespace => self.wtr.write_str("x"), - }, - }?; - } - Ok(()) - } - - fn fmt_class_bracketed_pre( - &mut self, - ast: &ast::ClassBracketed, - ) -> fmt::Result { - if ast.negated { - self.wtr.write_str("[^") - } else { - self.wtr.write_str("[") - } - } - - fn fmt_class_bracketed_post( - &mut self, - _ast: &ast::ClassBracketed, - ) -> fmt::Result { - self.wtr.write_str("]") - } - - fn fmt_class_set_binary_op_kind( - &mut self, - ast: &ast::ClassSetBinaryOpKind, - ) -> fmt::Result { - use crate::ast::ClassSetBinaryOpKind::*; - match *ast { - Intersection => self.wtr.write_str("&&"), - Difference => self.wtr.write_str("--"), - SymmetricDifference => self.wtr.write_str("~~"), - } - } - - fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result { - use crate::ast::ClassPerlKind::*; - match ast.kind { - Digit if ast.negated => self.wtr.write_str(r"\D"), - Digit => self.wtr.write_str(r"\d"), - Space if ast.negated => self.wtr.write_str(r"\S"), - Space => self.wtr.write_str(r"\s"), - Word if ast.negated => self.wtr.write_str(r"\W"), - Word => self.wtr.write_str(r"\w"), - } - } - - fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result { - use crate::ast::ClassAsciiKind::*; - match ast.kind { - Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"), - Alnum => self.wtr.write_str("[:alnum:]"), - Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"), - Alpha => self.wtr.write_str("[:alpha:]"), - Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"), - Ascii => self.wtr.write_str("[:ascii:]"), - Blank if ast.negated => self.wtr.write_str("[:^blank:]"), - Blank => self.wtr.write_str("[:blank:]"), - Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"), - Cntrl => self.wtr.write_str("[:cntrl:]"), - Digit if ast.negated => self.wtr.write_str("[:^digit:]"), - Digit => self.wtr.write_str("[:digit:]"), - Graph if ast.negated => self.wtr.write_str("[:^graph:]"), - Graph => self.wtr.write_str("[:graph:]"), - Lower if ast.negated => self.wtr.write_str("[:^lower:]"), - Lower => self.wtr.write_str("[:lower:]"), - Print if ast.negated => self.wtr.write_str("[:^print:]"), - Print => self.wtr.write_str("[:print:]"), - Punct if ast.negated => self.wtr.write_str("[:^punct:]"), - Punct => self.wtr.write_str("[:punct:]"), - Space if ast.negated => self.wtr.write_str("[:^space:]"), - Space => self.wtr.write_str("[:space:]"), - Upper if ast.negated => self.wtr.write_str("[:^upper:]"), - Upper => self.wtr.write_str("[:upper:]"), - Word if ast.negated => self.wtr.write_str("[:^word:]"), - Word => self.wtr.write_str("[:word:]"), - Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"), - Xdigit => self.wtr.write_str("[:xdigit:]"), - } - } - - fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result { - use crate::ast::ClassUnicodeKind::*; - use crate::ast::ClassUnicodeOpKind::*; - - if ast.negated { - self.wtr.write_str(r"\P")?; - } else { - self.wtr.write_str(r"\p")?; - } - match ast.kind { - OneLetter(c) => self.wtr.write_char(c), - Named(ref x) => write!(self.wtr, "{{{}}}", x), - NamedValue { op: Equal, ref name, ref value } => { - write!(self.wtr, "{{{}={}}}", name, value) - } - NamedValue { op: Colon, ref name, ref value } => { - write!(self.wtr, "{{{}:{}}}", name, value) - } - NamedValue { op: NotEqual, ref name, ref value } => { - write!(self.wtr, "{{{}!={}}}", name, value) - } - } - } -} - -#[cfg(test)] -mod tests { - use alloc::string::String; - - use crate::ast::parse::ParserBuilder; - - use super::*; - - fn roundtrip(given: &str) { - roundtrip_with(|b| b, given); - } - - fn roundtrip_with(mut f: F, given: &str) - where - F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder, - { - let mut builder = ParserBuilder::new(); - f(&mut builder); - let ast = builder.build().parse(given).unwrap(); - - let mut printer = Printer::new(); - let mut dst = String::new(); - printer.print(&ast, &mut dst).unwrap(); - assert_eq!(given, dst); - } - - #[test] - fn print_literal() { - roundtrip("a"); - roundtrip(r"\["); - roundtrip_with(|b| b.octal(true), r"\141"); - roundtrip(r"\x61"); - roundtrip(r"\x7F"); - roundtrip(r"\u0061"); - roundtrip(r"\U00000061"); - roundtrip(r"\x{61}"); - roundtrip(r"\x{7F}"); - roundtrip(r"\u{61}"); - roundtrip(r"\U{61}"); - - roundtrip(r"\a"); - roundtrip(r"\f"); - roundtrip(r"\t"); - roundtrip(r"\n"); - roundtrip(r"\r"); - roundtrip(r"\v"); - roundtrip(r"(?x)\ "); - } - - #[test] - fn print_dot() { - roundtrip("."); - } - - #[test] - fn print_concat() { - roundtrip("ab"); - roundtrip("abcde"); - roundtrip("a(bcd)ef"); - } - - #[test] - fn print_alternation() { - roundtrip("a|b"); - roundtrip("a|b|c|d|e"); - roundtrip("|a|b|c|d|e"); - roundtrip("|a|b|c|d|e|"); - roundtrip("a(b|c|d)|e|f"); - } - - #[test] - fn print_assertion() { - roundtrip(r"^"); - roundtrip(r"$"); - roundtrip(r"\A"); - roundtrip(r"\z"); - roundtrip(r"\b"); - roundtrip(r"\B"); - } - - #[test] - fn print_repetition() { - roundtrip("a?"); - roundtrip("a??"); - roundtrip("a*"); - roundtrip("a*?"); - roundtrip("a+"); - roundtrip("a+?"); - roundtrip("a{5}"); - roundtrip("a{5}?"); - roundtrip("a{5,}"); - roundtrip("a{5,}?"); - roundtrip("a{5,10}"); - roundtrip("a{5,10}?"); - } - - #[test] - fn print_flags() { - roundtrip("(?i)"); - roundtrip("(?-i)"); - roundtrip("(?s-i)"); - roundtrip("(?-si)"); - roundtrip("(?siUmux)"); - } - - #[test] - fn print_group() { - roundtrip("(?i:a)"); - roundtrip("(?Pa)"); - roundtrip("(?a)"); - roundtrip("(a)"); - } - - #[test] - fn print_class() { - roundtrip(r"[abc]"); - roundtrip(r"[a-z]"); - roundtrip(r"[^a-z]"); - roundtrip(r"[a-z0-9]"); - roundtrip(r"[-a-z0-9]"); - roundtrip(r"[-a-z0-9]"); - roundtrip(r"[a-z0-9---]"); - roundtrip(r"[a-z&&m-n]"); - roundtrip(r"[[a-z&&m-n]]"); - roundtrip(r"[a-z--m-n]"); - roundtrip(r"[a-z~~m-n]"); - roundtrip(r"[a-z[0-9]]"); - roundtrip(r"[a-z[^0-9]]"); - - roundtrip(r"\d"); - roundtrip(r"\D"); - roundtrip(r"\s"); - roundtrip(r"\S"); - roundtrip(r"\w"); - roundtrip(r"\W"); - - roundtrip(r"[[:alnum:]]"); - roundtrip(r"[[:^alnum:]]"); - roundtrip(r"[[:alpha:]]"); - roundtrip(r"[[:^alpha:]]"); - roundtrip(r"[[:ascii:]]"); - roundtrip(r"[[:^ascii:]]"); - roundtrip(r"[[:blank:]]"); - roundtrip(r"[[:^blank:]]"); - roundtrip(r"[[:cntrl:]]"); - roundtrip(r"[[:^cntrl:]]"); - roundtrip(r"[[:digit:]]"); - roundtrip(r"[[:^digit:]]"); - roundtrip(r"[[:graph:]]"); - roundtrip(r"[[:^graph:]]"); - roundtrip(r"[[:lower:]]"); - roundtrip(r"[[:^lower:]]"); - roundtrip(r"[[:print:]]"); - roundtrip(r"[[:^print:]]"); - roundtrip(r"[[:punct:]]"); - roundtrip(r"[[:^punct:]]"); - roundtrip(r"[[:space:]]"); - roundtrip(r"[[:^space:]]"); - roundtrip(r"[[:upper:]]"); - roundtrip(r"[[:^upper:]]"); - roundtrip(r"[[:word:]]"); - roundtrip(r"[[:^word:]]"); - roundtrip(r"[[:xdigit:]]"); - roundtrip(r"[[:^xdigit:]]"); - - roundtrip(r"\pL"); - roundtrip(r"\PL"); - roundtrip(r"\p{L}"); - roundtrip(r"\P{L}"); - roundtrip(r"\p{X=Y}"); - roundtrip(r"\P{X=Y}"); - roundtrip(r"\p{X:Y}"); - roundtrip(r"\P{X:Y}"); - roundtrip(r"\p{X!=Y}"); - roundtrip(r"\P{X!=Y}"); - } -} diff --git a/vendor/regex-syntax/src/ast/visitor.rs b/vendor/regex-syntax/src/ast/visitor.rs deleted file mode 100644 index c1bb24d9..00000000 --- a/vendor/regex-syntax/src/ast/visitor.rs +++ /dev/null @@ -1,522 +0,0 @@ -use alloc::{vec, vec::Vec}; - -use crate::ast::{self, Ast}; - -/// A trait for visiting an abstract syntax tree (AST) in depth first order. -/// -/// The principle aim of this trait is to enable callers to perform case -/// analysis on an abstract syntax tree without necessarily using recursion. -/// In particular, this permits callers to do case analysis with constant stack -/// usage, which can be important since the size of an abstract syntax tree -/// may be proportional to end user input. -/// -/// Typical usage of this trait involves providing an implementation and then -/// running it using the [`visit`] function. -/// -/// Note that the abstract syntax tree for a regular expression is quite -/// complex. Unless you specifically need it, you might be able to use the much -/// simpler [high-level intermediate representation](crate::hir::Hir) and its -/// [corresponding `Visitor` trait](crate::hir::Visitor) instead. -pub trait Visitor { - /// The result of visiting an AST. - type Output; - /// An error that visiting an AST might return. - type Err; - - /// All implementors of `Visitor` must provide a `finish` method, which - /// yields the result of visiting the AST or an error. - fn finish(self) -> Result; - - /// This method is called before beginning traversal of the AST. - fn start(&mut self) {} - - /// This method is called on an `Ast` before descending into child `Ast` - /// nodes. - fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> { - Ok(()) - } - - /// This method is called on an `Ast` after descending all of its child - /// `Ast` nodes. - fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> { - Ok(()) - } - - /// This method is called between child nodes of an - /// [`Alternation`](ast::Alternation). - fn visit_alternation_in(&mut self) -> Result<(), Self::Err> { - Ok(()) - } - - /// This method is called between child nodes of a concatenation. - fn visit_concat_in(&mut self) -> Result<(), Self::Err> { - Ok(()) - } - - /// This method is called on every [`ClassSetItem`](ast::ClassSetItem) - /// before descending into child nodes. - fn visit_class_set_item_pre( - &mut self, - _ast: &ast::ClassSetItem, - ) -> Result<(), Self::Err> { - Ok(()) - } - - /// This method is called on every [`ClassSetItem`](ast::ClassSetItem) - /// after descending into child nodes. - fn visit_class_set_item_post( - &mut self, - _ast: &ast::ClassSetItem, - ) -> Result<(), Self::Err> { - Ok(()) - } - - /// This method is called on every - /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) before descending into - /// child nodes. - fn visit_class_set_binary_op_pre( - &mut self, - _ast: &ast::ClassSetBinaryOp, - ) -> Result<(), Self::Err> { - Ok(()) - } - - /// This method is called on every - /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) after descending into child - /// nodes. - fn visit_class_set_binary_op_post( - &mut self, - _ast: &ast::ClassSetBinaryOp, - ) -> Result<(), Self::Err> { - Ok(()) - } - - /// This method is called between the left hand and right hand child nodes - /// of a [`ClassSetBinaryOp`](ast::ClassSetBinaryOp). - fn visit_class_set_binary_op_in( - &mut self, - _ast: &ast::ClassSetBinaryOp, - ) -> Result<(), Self::Err> { - Ok(()) - } -} - -/// Executes an implementation of `Visitor` in constant stack space. -/// -/// This function will visit every node in the given `Ast` while calling the -/// appropriate methods provided by the [`Visitor`] trait. -/// -/// The primary use case for this method is when one wants to perform case -/// analysis over an `Ast` without using a stack size proportional to the depth -/// of the `Ast`. Namely, this method will instead use constant stack size, but -/// will use heap space proportional to the size of the `Ast`. This may be -/// desirable in cases where the size of `Ast` is proportional to end user -/// input. -/// -/// If the visitor returns an error at any point, then visiting is stopped and -/// the error is returned. -pub fn visit(ast: &Ast, visitor: V) -> Result { - HeapVisitor::new().visit(ast, visitor) -} - -/// HeapVisitor visits every item in an `Ast` recursively using constant stack -/// size and a heap size proportional to the size of the `Ast`. -struct HeapVisitor<'a> { - /// A stack of `Ast` nodes. This is roughly analogous to the call stack - /// used in a typical recursive visitor. - stack: Vec<(&'a Ast, Frame<'a>)>, - /// Similar to the `Ast` stack above, but is used only for character - /// classes. In particular, character classes embed their own mini - /// recursive syntax. - stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>, -} - -/// Represents a single stack frame while performing structural induction over -/// an `Ast`. -enum Frame<'a> { - /// A stack frame allocated just before descending into a repetition - /// operator's child node. - Repetition(&'a ast::Repetition), - /// A stack frame allocated just before descending into a group's child - /// node. - Group(&'a ast::Group), - /// The stack frame used while visiting every child node of a concatenation - /// of expressions. - Concat { - /// The child node we are currently visiting. - head: &'a Ast, - /// The remaining child nodes to visit (which may be empty). - tail: &'a [Ast], - }, - /// The stack frame used while visiting every child node of an alternation - /// of expressions. - Alternation { - /// The child node we are currently visiting. - head: &'a Ast, - /// The remaining child nodes to visit (which may be empty). - tail: &'a [Ast], - }, -} - -/// Represents a single stack frame while performing structural induction over -/// a character class. -enum ClassFrame<'a> { - /// The stack frame used while visiting every child node of a union of - /// character class items. - Union { - /// The child node we are currently visiting. - head: &'a ast::ClassSetItem, - /// The remaining child nodes to visit (which may be empty). - tail: &'a [ast::ClassSetItem], - }, - /// The stack frame used while a binary class operation. - Binary { op: &'a ast::ClassSetBinaryOp }, - /// A stack frame allocated just before descending into a binary operator's - /// left hand child node. - BinaryLHS { - op: &'a ast::ClassSetBinaryOp, - lhs: &'a ast::ClassSet, - rhs: &'a ast::ClassSet, - }, - /// A stack frame allocated just before descending into a binary operator's - /// right hand child node. - BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet }, -} - -/// A representation of the inductive step when performing structural induction -/// over a character class. -/// -/// Note that there is no analogous explicit type for the inductive step for -/// `Ast` nodes because the inductive step is just an `Ast`. For character -/// classes, the inductive step can produce one of two possible child nodes: -/// an item or a binary operation. (An item cannot be a binary operation -/// because that would imply binary operations can be unioned in the concrete -/// syntax, which is not possible.) -enum ClassInduct<'a> { - Item(&'a ast::ClassSetItem), - BinaryOp(&'a ast::ClassSetBinaryOp), -} - -impl<'a> HeapVisitor<'a> { - fn new() -> HeapVisitor<'a> { - HeapVisitor { stack: vec![], stack_class: vec![] } - } - - fn visit( - &mut self, - mut ast: &'a Ast, - mut visitor: V, - ) -> Result { - self.stack.clear(); - self.stack_class.clear(); - - visitor.start(); - loop { - visitor.visit_pre(ast)?; - if let Some(x) = self.induct(ast, &mut visitor)? { - let child = x.child(); - self.stack.push((ast, x)); - ast = child; - continue; - } - // No induction means we have a base case, so we can post visit - // it now. - visitor.visit_post(ast)?; - - // At this point, we now try to pop our call stack until it is - // either empty or we hit another inductive case. - loop { - let (post_ast, frame) = match self.stack.pop() { - None => return visitor.finish(), - Some((post_ast, frame)) => (post_ast, frame), - }; - // If this is a concat/alternate, then we might have additional - // inductive steps to process. - if let Some(x) = self.pop(frame) { - match x { - Frame::Alternation { .. } => { - visitor.visit_alternation_in()?; - } - Frame::Concat { .. } => { - visitor.visit_concat_in()?; - } - _ => {} - } - ast = x.child(); - self.stack.push((post_ast, x)); - break; - } - // Otherwise, we've finished visiting all the child nodes for - // this AST, so we can post visit it now. - visitor.visit_post(post_ast)?; - } - } - } - - /// Build a stack frame for the given AST if one is needed (which occurs if - /// and only if there are child nodes in the AST). Otherwise, return None. - /// - /// If this visits a class, then the underlying visitor implementation may - /// return an error which will be passed on here. - fn induct( - &mut self, - ast: &'a Ast, - visitor: &mut V, - ) -> Result>, V::Err> { - Ok(match *ast { - Ast::ClassBracketed(ref x) => { - self.visit_class(x, visitor)?; - None - } - Ast::Repetition(ref x) => Some(Frame::Repetition(x)), - Ast::Group(ref x) => Some(Frame::Group(x)), - Ast::Concat(ref x) if x.asts.is_empty() => None, - Ast::Concat(ref x) => { - Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] }) - } - Ast::Alternation(ref x) if x.asts.is_empty() => None, - Ast::Alternation(ref x) => Some(Frame::Alternation { - head: &x.asts[0], - tail: &x.asts[1..], - }), - _ => None, - }) - } - - /// Pops the given frame. If the frame has an additional inductive step, - /// then return it, otherwise return `None`. - fn pop(&self, induct: Frame<'a>) -> Option> { - match induct { - Frame::Repetition(_) => None, - Frame::Group(_) => None, - Frame::Concat { tail, .. } => { - if tail.is_empty() { - None - } else { - Some(Frame::Concat { head: &tail[0], tail: &tail[1..] }) - } - } - Frame::Alternation { tail, .. } => { - if tail.is_empty() { - None - } else { - Some(Frame::Alternation { - head: &tail[0], - tail: &tail[1..], - }) - } - } - } - } - - fn visit_class( - &mut self, - ast: &'a ast::ClassBracketed, - visitor: &mut V, - ) -> Result<(), V::Err> { - let mut ast = ClassInduct::from_bracketed(ast); - loop { - self.visit_class_pre(&ast, visitor)?; - if let Some(x) = self.induct_class(&ast) { - let child = x.child(); - self.stack_class.push((ast, x)); - ast = child; - continue; - } - self.visit_class_post(&ast, visitor)?; - - // At this point, we now try to pop our call stack until it is - // either empty or we hit another inductive case. - loop { - let (post_ast, frame) = match self.stack_class.pop() { - None => return Ok(()), - Some((post_ast, frame)) => (post_ast, frame), - }; - // If this is a union or a binary op, then we might have - // additional inductive steps to process. - if let Some(x) = self.pop_class(frame) { - if let ClassFrame::BinaryRHS { ref op, .. } = x { - visitor.visit_class_set_binary_op_in(op)?; - } - ast = x.child(); - self.stack_class.push((post_ast, x)); - break; - } - // Otherwise, we've finished visiting all the child nodes for - // this class node, so we can post visit it now. - self.visit_class_post(&post_ast, visitor)?; - } - } - } - - /// Call the appropriate `Visitor` methods given an inductive step. - fn visit_class_pre( - &self, - ast: &ClassInduct<'a>, - visitor: &mut V, - ) -> Result<(), V::Err> { - match *ast { - ClassInduct::Item(item) => { - visitor.visit_class_set_item_pre(item)?; - } - ClassInduct::BinaryOp(op) => { - visitor.visit_class_set_binary_op_pre(op)?; - } - } - Ok(()) - } - - /// Call the appropriate `Visitor` methods given an inductive step. - fn visit_class_post( - &self, - ast: &ClassInduct<'a>, - visitor: &mut V, - ) -> Result<(), V::Err> { - match *ast { - ClassInduct::Item(item) => { - visitor.visit_class_set_item_post(item)?; - } - ClassInduct::BinaryOp(op) => { - visitor.visit_class_set_binary_op_post(op)?; - } - } - Ok(()) - } - - /// Build a stack frame for the given class node if one is needed (which - /// occurs if and only if there are child nodes). Otherwise, return None. - fn induct_class(&self, ast: &ClassInduct<'a>) -> Option> { - match *ast { - ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => { - match x.kind { - ast::ClassSet::Item(ref item) => { - Some(ClassFrame::Union { head: item, tail: &[] }) - } - ast::ClassSet::BinaryOp(ref op) => { - Some(ClassFrame::Binary { op }) - } - } - } - ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => { - if x.items.is_empty() { - None - } else { - Some(ClassFrame::Union { - head: &x.items[0], - tail: &x.items[1..], - }) - } - } - ClassInduct::BinaryOp(op) => { - Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs }) - } - _ => None, - } - } - - /// Pops the given frame. If the frame has an additional inductive step, - /// then return it, otherwise return `None`. - fn pop_class(&self, induct: ClassFrame<'a>) -> Option> { - match induct { - ClassFrame::Union { tail, .. } => { - if tail.is_empty() { - None - } else { - Some(ClassFrame::Union { - head: &tail[0], - tail: &tail[1..], - }) - } - } - ClassFrame::Binary { .. } => None, - ClassFrame::BinaryLHS { op, rhs, .. } => { - Some(ClassFrame::BinaryRHS { op, rhs }) - } - ClassFrame::BinaryRHS { .. } => None, - } - } -} - -impl<'a> Frame<'a> { - /// Perform the next inductive step on this frame and return the next - /// child AST node to visit. - fn child(&self) -> &'a Ast { - match *self { - Frame::Repetition(rep) => &rep.ast, - Frame::Group(group) => &group.ast, - Frame::Concat { head, .. } => head, - Frame::Alternation { head, .. } => head, - } - } -} - -impl<'a> ClassFrame<'a> { - /// Perform the next inductive step on this frame and return the next - /// child class node to visit. - fn child(&self) -> ClassInduct<'a> { - match *self { - ClassFrame::Union { head, .. } => ClassInduct::Item(head), - ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op), - ClassFrame::BinaryLHS { ref lhs, .. } => { - ClassInduct::from_set(lhs) - } - ClassFrame::BinaryRHS { ref rhs, .. } => { - ClassInduct::from_set(rhs) - } - } - } -} - -impl<'a> ClassInduct<'a> { - fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> { - ClassInduct::from_set(&ast.kind) - } - - fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> { - match *ast { - ast::ClassSet::Item(ref item) => ClassInduct::Item(item), - ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op), - } - } -} - -impl<'a> core::fmt::Debug for ClassFrame<'a> { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let x = match *self { - ClassFrame::Union { .. } => "Union", - ClassFrame::Binary { .. } => "Binary", - ClassFrame::BinaryLHS { .. } => "BinaryLHS", - ClassFrame::BinaryRHS { .. } => "BinaryRHS", - }; - write!(f, "{}", x) - } -} - -impl<'a> core::fmt::Debug for ClassInduct<'a> { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let x = match *self { - ClassInduct::Item(it) => match *it { - ast::ClassSetItem::Empty(_) => "Item(Empty)", - ast::ClassSetItem::Literal(_) => "Item(Literal)", - ast::ClassSetItem::Range(_) => "Item(Range)", - ast::ClassSetItem::Ascii(_) => "Item(Ascii)", - ast::ClassSetItem::Perl(_) => "Item(Perl)", - ast::ClassSetItem::Unicode(_) => "Item(Unicode)", - ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)", - ast::ClassSetItem::Union(_) => "Item(Union)", - }, - ClassInduct::BinaryOp(it) => match it.kind { - ast::ClassSetBinaryOpKind::Intersection => { - "BinaryOp(Intersection)" - } - ast::ClassSetBinaryOpKind::Difference => { - "BinaryOp(Difference)" - } - ast::ClassSetBinaryOpKind::SymmetricDifference => { - "BinaryOp(SymmetricDifference)" - } - }, - }; - write!(f, "{}", x) - } -} diff --git a/vendor/regex-syntax/src/debug.rs b/vendor/regex-syntax/src/debug.rs deleted file mode 100644 index a0b051b4..00000000 --- a/vendor/regex-syntax/src/debug.rs +++ /dev/null @@ -1,107 +0,0 @@ -/// A type that wraps a single byte with a convenient fmt::Debug impl that -/// escapes the byte. -pub(crate) struct Byte(pub(crate) u8); - -impl core::fmt::Debug for Byte { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - // Special case ASCII space. It's too hard to read otherwise, so - // put quotes around it. I sometimes wonder whether just '\x20' would - // be better... - if self.0 == b' ' { - return write!(f, "' '"); - } - // 10 bytes is enough to cover any output from ascii::escape_default. - let mut bytes = [0u8; 10]; - let mut len = 0; - for (i, mut b) in core::ascii::escape_default(self.0).enumerate() { - // capitalize \xab to \xAB - if i >= 2 && b'a' <= b && b <= b'f' { - b -= 32; - } - bytes[len] = b; - len += 1; - } - write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap()) - } -} - -/// A type that provides a human readable debug impl for arbitrary bytes. -/// -/// This generally works best when the bytes are presumed to be mostly UTF-8, -/// but will work for anything. -/// -/// N.B. This is copied nearly verbatim from regex-automata. Sigh. -pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]); - -impl<'a> core::fmt::Debug for Bytes<'a> { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - write!(f, "\"")?; - // This is a sad re-implementation of a similar impl found in bstr. - let mut bytes = self.0; - while let Some(result) = utf8_decode(bytes) { - let ch = match result { - Ok(ch) => ch, - Err(byte) => { - write!(f, r"\x{:02x}", byte)?; - bytes = &bytes[1..]; - continue; - } - }; - bytes = &bytes[ch.len_utf8()..]; - match ch { - '\0' => write!(f, "\\0")?, - // ASCII control characters except \0, \n, \r, \t - '\x01'..='\x08' - | '\x0b' - | '\x0c' - | '\x0e'..='\x19' - | '\x7f' => { - write!(f, "\\x{:02x}", u32::from(ch))?; - } - '\n' | '\r' | '\t' | _ => { - write!(f, "{}", ch.escape_debug())?; - } - } - } - write!(f, "\"")?; - Ok(()) - } -} - -/// Decodes the next UTF-8 encoded codepoint from the given byte slice. -/// -/// If no valid encoding of a codepoint exists at the beginning of the given -/// byte slice, then the first byte is returned instead. -/// -/// This returns `None` if and only if `bytes` is empty. -pub(crate) fn utf8_decode(bytes: &[u8]) -> Option> { - fn len(byte: u8) -> Option { - if byte <= 0x7F { - return Some(1); - } else if byte & 0b1100_0000 == 0b1000_0000 { - return None; - } else if byte <= 0b1101_1111 { - Some(2) - } else if byte <= 0b1110_1111 { - Some(3) - } else if byte <= 0b1111_0111 { - Some(4) - } else { - None - } - } - - if bytes.is_empty() { - return None; - } - let len = match len(bytes[0]) { - None => return Some(Err(bytes[0])), - Some(len) if len > bytes.len() => return Some(Err(bytes[0])), - Some(1) => return Some(Ok(char::from(bytes[0]))), - Some(len) => len, - }; - match core::str::from_utf8(&bytes[..len]) { - Ok(s) => Some(Ok(s.chars().next().unwrap())), - Err(_) => Some(Err(bytes[0])), - } -} diff --git a/vendor/regex-syntax/src/either.rs b/vendor/regex-syntax/src/either.rs deleted file mode 100644 index 7ae41e4c..00000000 --- a/vendor/regex-syntax/src/either.rs +++ /dev/null @@ -1,8 +0,0 @@ -/// A simple binary sum type. -/// -/// This is occasionally useful in an ad hoc fashion. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Either { - Left(Left), - Right(Right), -} diff --git a/vendor/regex-syntax/src/error.rs b/vendor/regex-syntax/src/error.rs deleted file mode 100644 index 98869c4f..00000000 --- a/vendor/regex-syntax/src/error.rs +++ /dev/null @@ -1,311 +0,0 @@ -use alloc::{ - format, - string::{String, ToString}, - vec, - vec::Vec, -}; - -use crate::{ast, hir}; - -/// This error type encompasses any error that can be returned by this crate. -/// -/// This error type is marked as `non_exhaustive`. This means that adding a -/// new variant is not considered a breaking change. -#[non_exhaustive] -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Error { - /// An error that occurred while translating concrete syntax into abstract - /// syntax (AST). - Parse(ast::Error), - /// An error that occurred while translating abstract syntax into a high - /// level intermediate representation (HIR). - Translate(hir::Error), -} - -impl From for Error { - fn from(err: ast::Error) -> Error { - Error::Parse(err) - } -} - -impl From for Error { - fn from(err: hir::Error) -> Error { - Error::Translate(err) - } -} - -#[cfg(feature = "std")] -impl std::error::Error for Error {} - -impl core::fmt::Display for Error { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - match *self { - Error::Parse(ref x) => x.fmt(f), - Error::Translate(ref x) => x.fmt(f), - } - } -} - -/// A helper type for formatting nice error messages. -/// -/// This type is responsible for reporting regex parse errors in a nice human -/// readable format. Most of its complexity is from interspersing notational -/// markers pointing out the position where an error occurred. -#[derive(Debug)] -pub struct Formatter<'e, E> { - /// The original regex pattern in which the error occurred. - pattern: &'e str, - /// The error kind. It must impl fmt::Display. - err: &'e E, - /// The primary span of the error. - span: &'e ast::Span, - /// An auxiliary and optional span, in case the error needs to point to - /// two locations (e.g., when reporting a duplicate capture group name). - aux_span: Option<&'e ast::Span>, -} - -impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> { - fn from(err: &'e ast::Error) -> Self { - Formatter { - pattern: err.pattern(), - err: err.kind(), - span: err.span(), - aux_span: err.auxiliary_span(), - } - } -} - -impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> { - fn from(err: &'e hir::Error) -> Self { - Formatter { - pattern: err.pattern(), - err: err.kind(), - span: err.span(), - aux_span: None, - } - } -} - -impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let spans = Spans::from_formatter(self); - if self.pattern.contains('\n') { - let divider = repeat_char('~', 79); - - writeln!(f, "regex parse error:")?; - writeln!(f, "{}", divider)?; - let notated = spans.notate(); - write!(f, "{}", notated)?; - writeln!(f, "{}", divider)?; - // If we have error spans that cover multiple lines, then we just - // note the line numbers. - if !spans.multi_line.is_empty() { - let mut notes = vec![]; - for span in &spans.multi_line { - notes.push(format!( - "on line {} (column {}) through line {} (column {})", - span.start.line, - span.start.column, - span.end.line, - span.end.column - 1 - )); - } - writeln!(f, "{}", notes.join("\n"))?; - } - write!(f, "error: {}", self.err)?; - } else { - writeln!(f, "regex parse error:")?; - let notated = Spans::from_formatter(self).notate(); - write!(f, "{}", notated)?; - write!(f, "error: {}", self.err)?; - } - Ok(()) - } -} - -/// This type represents an arbitrary number of error spans in a way that makes -/// it convenient to notate the regex pattern. ("Notate" means "point out -/// exactly where the error occurred in the regex pattern.") -/// -/// Technically, we can only ever have two spans given our current error -/// structure. However, after toiling with a specific algorithm for handling -/// two spans, it became obvious that an algorithm to handle an arbitrary -/// number of spans was actually much simpler. -struct Spans<'p> { - /// The original regex pattern string. - pattern: &'p str, - /// The total width that should be used for line numbers. The width is - /// used for left padding the line numbers for alignment. - /// - /// A value of `0` means line numbers should not be displayed. That is, - /// the pattern is itself only one line. - line_number_width: usize, - /// All error spans that occur on a single line. This sequence always has - /// length equivalent to the number of lines in `pattern`, where the index - /// of the sequence represents a line number, starting at `0`. The spans - /// in each line are sorted in ascending order. - by_line: Vec>, - /// All error spans that occur over one or more lines. That is, the start - /// and end position of the span have different line numbers. The spans are - /// sorted in ascending order. - multi_line: Vec, -} - -impl<'p> Spans<'p> { - /// Build a sequence of spans from a formatter. - fn from_formatter<'e, E: core::fmt::Display>( - fmter: &'p Formatter<'e, E>, - ) -> Spans<'p> { - let mut line_count = fmter.pattern.lines().count(); - // If the pattern ends with a `\n` literal, then our line count is - // off by one, since a span can occur immediately after the last `\n`, - // which is consider to be an additional line. - if fmter.pattern.ends_with('\n') { - line_count += 1; - } - let line_number_width = - if line_count <= 1 { 0 } else { line_count.to_string().len() }; - let mut spans = Spans { - pattern: &fmter.pattern, - line_number_width, - by_line: vec![vec![]; line_count], - multi_line: vec![], - }; - spans.add(fmter.span.clone()); - if let Some(span) = fmter.aux_span { - spans.add(span.clone()); - } - spans - } - - /// Add the given span to this sequence, putting it in the right place. - fn add(&mut self, span: ast::Span) { - // This is grossly inefficient since we sort after each add, but right - // now, we only ever add two spans at most. - if span.is_one_line() { - let i = span.start.line - 1; // because lines are 1-indexed - self.by_line[i].push(span); - self.by_line[i].sort(); - } else { - self.multi_line.push(span); - self.multi_line.sort(); - } - } - - /// Notate the pattern string with carents (`^`) pointing at each span - /// location. This only applies to spans that occur within a single line. - fn notate(&self) -> String { - let mut notated = String::new(); - for (i, line) in self.pattern.lines().enumerate() { - if self.line_number_width > 0 { - notated.push_str(&self.left_pad_line_number(i + 1)); - notated.push_str(": "); - } else { - notated.push_str(" "); - } - notated.push_str(line); - notated.push('\n'); - if let Some(notes) = self.notate_line(i) { - notated.push_str(¬es); - notated.push('\n'); - } - } - notated - } - - /// Return notes for the line indexed at `i` (zero-based). If there are no - /// spans for the given line, then `None` is returned. Otherwise, an - /// appropriately space padded string with correctly positioned `^` is - /// returned, accounting for line numbers. - fn notate_line(&self, i: usize) -> Option { - let spans = &self.by_line[i]; - if spans.is_empty() { - return None; - } - let mut notes = String::new(); - for _ in 0..self.line_number_padding() { - notes.push(' '); - } - let mut pos = 0; - for span in spans { - for _ in pos..(span.start.column - 1) { - notes.push(' '); - pos += 1; - } - let note_len = span.end.column.saturating_sub(span.start.column); - for _ in 0..core::cmp::max(1, note_len) { - notes.push('^'); - pos += 1; - } - } - Some(notes) - } - - /// Left pad the given line number with spaces such that it is aligned with - /// other line numbers. - fn left_pad_line_number(&self, n: usize) -> String { - let n = n.to_string(); - let pad = self.line_number_width.checked_sub(n.len()).unwrap(); - let mut result = repeat_char(' ', pad); - result.push_str(&n); - result - } - - /// Return the line number padding beginning at the start of each line of - /// the pattern. - /// - /// If the pattern is only one line, then this returns a fixed padding - /// for visual indentation. - fn line_number_padding(&self) -> usize { - if self.line_number_width == 0 { - 4 - } else { - 2 + self.line_number_width - } - } -} - -fn repeat_char(c: char, count: usize) -> String { - core::iter::repeat(c).take(count).collect() -} - -#[cfg(test)] -mod tests { - use alloc::string::ToString; - - use crate::ast::parse::Parser; - - fn assert_panic_message(pattern: &str, expected_msg: &str) { - let result = Parser::new().parse(pattern); - match result { - Ok(_) => { - panic!("regex should not have parsed"); - } - Err(err) => { - assert_eq!(err.to_string(), expected_msg.trim()); - } - } - } - - // See: https://github.com/rust-lang/regex/issues/464 - #[test] - fn regression_464() { - let err = Parser::new().parse("a{\n").unwrap_err(); - // This test checks that the error formatter doesn't panic. - assert!(!err.to_string().is_empty()); - } - - // See: https://github.com/rust-lang/regex/issues/545 - #[test] - fn repetition_quantifier_expects_a_valid_decimal() { - assert_panic_message( - r"\\u{[^}]*}", - r#" -regex parse error: - \\u{[^}]*} - ^ -error: repetition quantifier expects a valid decimal -"#, - ); - } -} diff --git a/vendor/regex-syntax/src/hir/interval.rs b/vendor/regex-syntax/src/hir/interval.rs deleted file mode 100644 index d507ee72..00000000 --- a/vendor/regex-syntax/src/hir/interval.rs +++ /dev/null @@ -1,564 +0,0 @@ -use core::{char, cmp, fmt::Debug, slice}; - -use alloc::vec::Vec; - -use crate::unicode; - -// This module contains an *internal* implementation of interval sets. -// -// The primary invariant that interval sets guards is canonical ordering. That -// is, every interval set contains an ordered sequence of intervals where -// no two intervals are overlapping or adjacent. While this invariant is -// occasionally broken within the implementation, it should be impossible for -// callers to observe it. -// -// Since case folding (as implemented below) breaks that invariant, we roll -// that into this API even though it is a little out of place in an otherwise -// generic interval set. (Hence the reason why the `unicode` module is imported -// here.) -// -// Some of the implementation complexity here is a result of me wanting to -// preserve the sequential representation without using additional memory. -// In many cases, we do use linear extra memory, but it is at most 2x and it -// is amortized. If we relaxed the memory requirements, this implementation -// could become much simpler. The extra memory is honestly probably OK, but -// character classes (especially of the Unicode variety) can become quite -// large, and it would be nice to keep regex compilation snappy even in debug -// builds. (In the past, I have been careless with this area of code and it has -// caused slow regex compilations in debug mode, so this isn't entirely -// unwarranted.) -// -// Tests on this are relegated to the public API of HIR in src/hir.rs. - -#[derive(Clone, Debug)] -pub struct IntervalSet { - /// A sorted set of non-overlapping ranges. - ranges: Vec, - /// While not required at all for correctness, we keep track of whether an - /// interval set has been case folded or not. This helps us avoid doing - /// redundant work if, for example, a set has already been cased folded. - /// And note that whether a set is folded or not is preserved through - /// all of the pairwise set operations. That is, if both interval sets - /// have been case folded, then any of difference, union, intersection or - /// symmetric difference all produce a case folded set. - /// - /// Note that when this is true, it *must* be the case that the set is case - /// folded. But when it's false, the set *may* be case folded. In other - /// words, we only set this to true when we know it to be case, but we're - /// okay with it being false if it would otherwise be costly to determine - /// whether it should be true. This means code cannot assume that a false - /// value necessarily indicates that the set is not case folded. - /// - /// Bottom line: this is a performance optimization. - folded: bool, -} - -impl Eq for IntervalSet {} - -// We implement PartialEq manually so that we don't consider the set's internal -// 'folded' property to be part of its identity. The 'folded' property is -// strictly an optimization. -impl PartialEq for IntervalSet { - fn eq(&self, other: &IntervalSet) -> bool { - self.ranges.eq(&other.ranges) - } -} - -impl IntervalSet { - /// Create a new set from a sequence of intervals. Each interval is - /// specified as a pair of bounds, where both bounds are inclusive. - /// - /// The given ranges do not need to be in any specific order, and ranges - /// may overlap. - pub fn new>(intervals: T) -> IntervalSet { - let ranges: Vec = intervals.into_iter().collect(); - // An empty set is case folded. - let folded = ranges.is_empty(); - let mut set = IntervalSet { ranges, folded }; - set.canonicalize(); - set - } - - /// Add a new interval to this set. - pub fn push(&mut self, interval: I) { - // TODO: This could be faster. e.g., Push the interval such that - // it preserves canonicalization. - self.ranges.push(interval); - self.canonicalize(); - // We don't know whether the new interval added here is considered - // case folded, so we conservatively assume that the entire set is - // no longer case folded if it was previously. - self.folded = false; - } - - /// Return an iterator over all intervals in this set. - /// - /// The iterator yields intervals in ascending order. - pub fn iter(&self) -> IntervalSetIter<'_, I> { - IntervalSetIter(self.ranges.iter()) - } - - /// Return an immutable slice of intervals in this set. - /// - /// The sequence returned is in canonical ordering. - pub fn intervals(&self) -> &[I] { - &self.ranges - } - - /// Expand this interval set such that it contains all case folded - /// characters. For example, if this class consists of the range `a-z`, - /// then applying case folding will result in the class containing both the - /// ranges `a-z` and `A-Z`. - /// - /// This returns an error if the necessary case mapping data is not - /// available. - pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> { - if self.folded { - return Ok(()); - } - let len = self.ranges.len(); - for i in 0..len { - let range = self.ranges[i]; - if let Err(err) = range.case_fold_simple(&mut self.ranges) { - self.canonicalize(); - return Err(err); - } - } - self.canonicalize(); - self.folded = true; - Ok(()) - } - - /// Union this set with the given set, in place. - pub fn union(&mut self, other: &IntervalSet) { - if other.ranges.is_empty() || self.ranges == other.ranges { - return; - } - // This could almost certainly be done more efficiently. - self.ranges.extend(&other.ranges); - self.canonicalize(); - self.folded = self.folded && other.folded; - } - - /// Intersect this set with the given set, in place. - pub fn intersect(&mut self, other: &IntervalSet) { - if self.ranges.is_empty() { - return; - } - if other.ranges.is_empty() { - self.ranges.clear(); - // An empty set is case folded. - self.folded = true; - return; - } - - // There should be a way to do this in-place with constant memory, - // but I couldn't figure out a simple way to do it. So just append - // the intersection to the end of this range, and then drain it before - // we're done. - let drain_end = self.ranges.len(); - - let mut ita = 0..drain_end; - let mut itb = 0..other.ranges.len(); - let mut a = ita.next().unwrap(); - let mut b = itb.next().unwrap(); - loop { - if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) { - self.ranges.push(ab); - } - let (it, aorb) = - if self.ranges[a].upper() < other.ranges[b].upper() { - (&mut ita, &mut a) - } else { - (&mut itb, &mut b) - }; - match it.next() { - Some(v) => *aorb = v, - None => break, - } - } - self.ranges.drain(..drain_end); - self.folded = self.folded && other.folded; - } - - /// Subtract the given set from this set, in place. - pub fn difference(&mut self, other: &IntervalSet) { - if self.ranges.is_empty() || other.ranges.is_empty() { - return; - } - - // This algorithm is (to me) surprisingly complex. A search of the - // interwebs indicate that this is a potentially interesting problem. - // Folks seem to suggest interval or segment trees, but I'd like to - // avoid the overhead (both runtime and conceptual) of that. - // - // The following is basically my Shitty First Draft. Therefore, in - // order to grok it, you probably need to read each line carefully. - // Simplifications are most welcome! - // - // Remember, we can assume the canonical format invariant here, which - // says that all ranges are sorted, not overlapping and not adjacent in - // each class. - let drain_end = self.ranges.len(); - let (mut a, mut b) = (0, 0); - 'LOOP: while a < drain_end && b < other.ranges.len() { - // Basically, the easy cases are when neither range overlaps with - // each other. If the `b` range is less than our current `a` - // range, then we can skip it and move on. - if other.ranges[b].upper() < self.ranges[a].lower() { - b += 1; - continue; - } - // ... similarly for the `a` range. If it's less than the smallest - // `b` range, then we can add it as-is. - if self.ranges[a].upper() < other.ranges[b].lower() { - let range = self.ranges[a]; - self.ranges.push(range); - a += 1; - continue; - } - // Otherwise, we have overlapping ranges. - assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b])); - - // This part is tricky and was non-obvious to me without looking - // at explicit examples (see the tests). The trickiness stems from - // two things: 1) subtracting a range from another range could - // yield two ranges and 2) after subtracting a range, it's possible - // that future ranges can have an impact. The loop below advances - // the `b` ranges until they can't possible impact the current - // range. - // - // For example, if our `a` range is `a-t` and our next three `b` - // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply - // subtraction three times before moving on to the next `a` range. - let mut range = self.ranges[a]; - while b < other.ranges.len() - && !range.is_intersection_empty(&other.ranges[b]) - { - let old_range = range; - range = match range.difference(&other.ranges[b]) { - (None, None) => { - // We lost the entire range, so move on to the next - // without adding this one. - a += 1; - continue 'LOOP; - } - (Some(range1), None) | (None, Some(range1)) => range1, - (Some(range1), Some(range2)) => { - self.ranges.push(range1); - range2 - } - }; - // It's possible that the `b` range has more to contribute - // here. In particular, if it is greater than the original - // range, then it might impact the next `a` range *and* it - // has impacted the current `a` range as much as possible, - // so we can quit. We don't bump `b` so that the next `a` - // range can apply it. - if other.ranges[b].upper() > old_range.upper() { - break; - } - // Otherwise, the next `b` range might apply to the current - // `a` range. - b += 1; - } - self.ranges.push(range); - a += 1; - } - while a < drain_end { - let range = self.ranges[a]; - self.ranges.push(range); - a += 1; - } - self.ranges.drain(..drain_end); - self.folded = self.folded && other.folded; - } - - /// Compute the symmetric difference of the two sets, in place. - /// - /// This computes the symmetric difference of two interval sets. This - /// removes all elements in this set that are also in the given set, - /// but also adds all elements from the given set that aren't in this - /// set. That is, the set will contain all elements in either set, - /// but will not contain any elements that are in both sets. - pub fn symmetric_difference(&mut self, other: &IntervalSet) { - // TODO(burntsushi): Fix this so that it amortizes allocation. - let mut intersection = self.clone(); - intersection.intersect(other); - self.union(other); - self.difference(&intersection); - } - - /// Negate this interval set. - /// - /// For all `x` where `x` is any element, if `x` was in this set, then it - /// will not be in this set after negation. - pub fn negate(&mut self) { - if self.ranges.is_empty() { - let (min, max) = (I::Bound::min_value(), I::Bound::max_value()); - self.ranges.push(I::create(min, max)); - // The set containing everything must case folded. - self.folded = true; - return; - } - - // There should be a way to do this in-place with constant memory, - // but I couldn't figure out a simple way to do it. So just append - // the negation to the end of this range, and then drain it before - // we're done. - let drain_end = self.ranges.len(); - - // We do checked arithmetic below because of the canonical ordering - // invariant. - if self.ranges[0].lower() > I::Bound::min_value() { - let upper = self.ranges[0].lower().decrement(); - self.ranges.push(I::create(I::Bound::min_value(), upper)); - } - for i in 1..drain_end { - let lower = self.ranges[i - 1].upper().increment(); - let upper = self.ranges[i].lower().decrement(); - self.ranges.push(I::create(lower, upper)); - } - if self.ranges[drain_end - 1].upper() < I::Bound::max_value() { - let lower = self.ranges[drain_end - 1].upper().increment(); - self.ranges.push(I::create(lower, I::Bound::max_value())); - } - self.ranges.drain(..drain_end); - // We don't need to update whether this set is folded or not, because - // it is conservatively preserved through negation. Namely, if a set - // is not folded, then it is possible that its negation is folded, for - // example, [^☃]. But we're fine with assuming that the set is not - // folded in that case. (`folded` permits false negatives but not false - // positives.) - // - // But what about when a set is folded, is its negation also - // necessarily folded? Yes. Because if a set is folded, then for every - // character in the set, it necessarily included its equivalence class - // of case folded characters. Negating it in turn means that all - // equivalence classes in the set are negated, and any equivalence - // class that was previously not in the set is now entirely in the set. - } - - /// Converts this set into a canonical ordering. - fn canonicalize(&mut self) { - if self.is_canonical() { - return; - } - self.ranges.sort(); - assert!(!self.ranges.is_empty()); - - // Is there a way to do this in-place with constant memory? I couldn't - // figure out a way to do it. So just append the canonicalization to - // the end of this range, and then drain it before we're done. - let drain_end = self.ranges.len(); - for oldi in 0..drain_end { - // If we've added at least one new range, then check if we can - // merge this range in the previously added range. - if self.ranges.len() > drain_end { - let (last, rest) = self.ranges.split_last_mut().unwrap(); - if let Some(union) = last.union(&rest[oldi]) { - *last = union; - continue; - } - } - let range = self.ranges[oldi]; - self.ranges.push(range); - } - self.ranges.drain(..drain_end); - } - - /// Returns true if and only if this class is in a canonical ordering. - fn is_canonical(&self) -> bool { - for pair in self.ranges.windows(2) { - if pair[0] >= pair[1] { - return false; - } - if pair[0].is_contiguous(&pair[1]) { - return false; - } - } - true - } -} - -/// An iterator over intervals. -#[derive(Debug)] -pub struct IntervalSetIter<'a, I>(slice::Iter<'a, I>); - -impl<'a, I> Iterator for IntervalSetIter<'a, I> { - type Item = &'a I; - - fn next(&mut self) -> Option<&'a I> { - self.0.next() - } -} - -pub trait Interval: - Clone + Copy + Debug + Default + Eq + PartialEq + PartialOrd + Ord -{ - type Bound: Bound; - - fn lower(&self) -> Self::Bound; - fn upper(&self) -> Self::Bound; - fn set_lower(&mut self, bound: Self::Bound); - fn set_upper(&mut self, bound: Self::Bound); - fn case_fold_simple( - &self, - intervals: &mut Vec, - ) -> Result<(), unicode::CaseFoldError>; - - /// Create a new interval. - fn create(lower: Self::Bound, upper: Self::Bound) -> Self { - let mut int = Self::default(); - if lower <= upper { - int.set_lower(lower); - int.set_upper(upper); - } else { - int.set_lower(upper); - int.set_upper(lower); - } - int - } - - /// Union the given overlapping range into this range. - /// - /// If the two ranges aren't contiguous, then this returns `None`. - fn union(&self, other: &Self) -> Option { - if !self.is_contiguous(other) { - return None; - } - let lower = cmp::min(self.lower(), other.lower()); - let upper = cmp::max(self.upper(), other.upper()); - Some(Self::create(lower, upper)) - } - - /// Intersect this range with the given range and return the result. - /// - /// If the intersection is empty, then this returns `None`. - fn intersect(&self, other: &Self) -> Option { - let lower = cmp::max(self.lower(), other.lower()); - let upper = cmp::min(self.upper(), other.upper()); - if lower <= upper { - Some(Self::create(lower, upper)) - } else { - None - } - } - - /// Subtract the given range from this range and return the resulting - /// ranges. - /// - /// If subtraction would result in an empty range, then no ranges are - /// returned. - fn difference(&self, other: &Self) -> (Option, Option) { - if self.is_subset(other) { - return (None, None); - } - if self.is_intersection_empty(other) { - return (Some(self.clone()), None); - } - let add_lower = other.lower() > self.lower(); - let add_upper = other.upper() < self.upper(); - // We know this because !self.is_subset(other) and the ranges have - // a non-empty intersection. - assert!(add_lower || add_upper); - let mut ret = (None, None); - if add_lower { - let upper = other.lower().decrement(); - ret.0 = Some(Self::create(self.lower(), upper)); - } - if add_upper { - let lower = other.upper().increment(); - let range = Self::create(lower, self.upper()); - if ret.0.is_none() { - ret.0 = Some(range); - } else { - ret.1 = Some(range); - } - } - ret - } - - /// Returns true if and only if the two ranges are contiguous. Two ranges - /// are contiguous if and only if the ranges are either overlapping or - /// adjacent. - fn is_contiguous(&self, other: &Self) -> bool { - let lower1 = self.lower().as_u32(); - let upper1 = self.upper().as_u32(); - let lower2 = other.lower().as_u32(); - let upper2 = other.upper().as_u32(); - cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1) - } - - /// Returns true if and only if the intersection of this range and the - /// other range is empty. - fn is_intersection_empty(&self, other: &Self) -> bool { - let (lower1, upper1) = (self.lower(), self.upper()); - let (lower2, upper2) = (other.lower(), other.upper()); - cmp::max(lower1, lower2) > cmp::min(upper1, upper2) - } - - /// Returns true if and only if this range is a subset of the other range. - fn is_subset(&self, other: &Self) -> bool { - let (lower1, upper1) = (self.lower(), self.upper()); - let (lower2, upper2) = (other.lower(), other.upper()); - (lower2 <= lower1 && lower1 <= upper2) - && (lower2 <= upper1 && upper1 <= upper2) - } -} - -pub trait Bound: - Copy + Clone + Debug + Eq + PartialEq + PartialOrd + Ord -{ - fn min_value() -> Self; - fn max_value() -> Self; - fn as_u32(self) -> u32; - fn increment(self) -> Self; - fn decrement(self) -> Self; -} - -impl Bound for u8 { - fn min_value() -> Self { - u8::MIN - } - fn max_value() -> Self { - u8::MAX - } - fn as_u32(self) -> u32 { - u32::from(self) - } - fn increment(self) -> Self { - self.checked_add(1).unwrap() - } - fn decrement(self) -> Self { - self.checked_sub(1).unwrap() - } -} - -impl Bound for char { - fn min_value() -> Self { - '\x00' - } - fn max_value() -> Self { - '\u{10FFFF}' - } - fn as_u32(self) -> u32 { - u32::from(self) - } - - fn increment(self) -> Self { - match self { - '\u{D7FF}' => '\u{E000}', - c => char::from_u32(u32::from(c).checked_add(1).unwrap()).unwrap(), - } - } - - fn decrement(self) -> Self { - match self { - '\u{E000}' => '\u{D7FF}', - c => char::from_u32(u32::from(c).checked_sub(1).unwrap()).unwrap(), - } - } -} - -// Tests for interval sets are written in src/hir.rs against the public API. diff --git a/vendor/regex-syntax/src/hir/literal.rs b/vendor/regex-syntax/src/hir/literal.rs deleted file mode 100644 index a5a3737f..00000000 --- a/vendor/regex-syntax/src/hir/literal.rs +++ /dev/null @@ -1,3214 +0,0 @@ -/*! -Provides literal extraction from `Hir` expressions. - -An [`Extractor`] pulls literals out of [`Hir`] expressions and returns a -[`Seq`] of [`Literal`]s. - -The purpose of literal extraction is generally to provide avenues for -optimizing regex searches. The main idea is that substring searches can be an -order of magnitude faster than a regex search. Therefore, if one can execute -a substring search to find candidate match locations and only run the regex -search at those locations, then it is possible for huge improvements in -performance to be realized. - -With that said, literal optimizations are generally a black art because even -though substring search is generally faster, if the number of candidates -produced is high, then it can create a lot of overhead by ping-ponging between -the substring search and the regex search. - -Here are some heuristics that might be used to help increase the chances of -effective literal optimizations: - -* Stick to small [`Seq`]s. If you search for too many literals, it's likely -to lead to substring search that is only a little faster than a regex search, -and thus the overhead of using literal optimizations in the first place might -make things slower overall. -* The literals in your [`Seq`] shouldn't be too short. In general, longer is -better. A sequence corresponding to single bytes that occur frequently in the -haystack, for example, is probably a bad literal optimization because it's -likely to produce many false positive candidates. Longer literals are less -likely to match, and thus probably produce fewer false positives. -* If it's possible to estimate the approximate frequency of each byte according -to some pre-computed background distribution, it is possible to compute a score -of how "good" a `Seq` is. If a `Seq` isn't good enough, you might consider -skipping the literal optimization and just use the regex engine. - -(It should be noted that there are always pathological cases that can make -any kind of literal optimization be a net slower result. This is why it -might be a good idea to be conservative, or to even provide a means for -literal optimizations to be dynamically disabled if they are determined to be -ineffective according to some measure.) - -You're encouraged to explore the methods on [`Seq`], which permit shrinking -the size of sequences in a preference-order preserving fashion. - -Finally, note that it isn't strictly necessary to use an [`Extractor`]. Namely, -an `Extractor` only uses public APIs of the [`Seq`] and [`Literal`] types, -so it is possible to implement your own extractor. For example, for n-grams -or "inner" literals (i.e., not prefix or suffix literals). The `Extractor` -is mostly responsible for the case analysis over `Hir` expressions. Much of -the "trickier" parts are how to combine literal sequences, and that is all -implemented on [`Seq`]. -*/ - -use core::{cmp, mem, num::NonZeroUsize}; - -use alloc::{vec, vec::Vec}; - -use crate::hir::{self, Hir}; - -/// Extracts prefix or suffix literal sequences from [`Hir`] expressions. -/// -/// Literal extraction is based on the following observations: -/// -/// * Many regexes start with one or a small number of literals. -/// * Substring search for literals is often much faster (sometimes by an order -/// of magnitude) than a regex search. -/// -/// Thus, in many cases, one can search for literals to find candidate starting -/// locations of a match, and then only run the full regex engine at each such -/// location instead of over the full haystack. -/// -/// The main downside of literal extraction is that it can wind up causing a -/// search to be slower overall. For example, if there are many matches or if -/// there are many candidates that don't ultimately lead to a match, then a -/// lot of overhead will be spent in shuffing back-and-forth between substring -/// search and the regex engine. This is the fundamental reason why literal -/// optimizations for regex patterns is sometimes considered a "black art." -/// -/// # Look-around assertions -/// -/// Literal extraction treats all look-around assertions as-if they match every -/// empty string. So for example, the regex `\bquux\b` will yield a sequence -/// containing a single exact literal `quux`. However, not all occurrences -/// of `quux` correspond to a match a of the regex. For example, `\bquux\b` -/// does not match `ZquuxZ` anywhere because `quux` does not fall on a word -/// boundary. -/// -/// In effect, if your regex contains look-around assertions, then a match of -/// an exact literal does not necessarily mean the regex overall matches. So -/// you may still need to run the regex engine in such cases to confirm the -/// match. -/// -/// The precise guarantee you get from a literal sequence is: if every literal -/// in the sequence is exact and the original regex contains zero look-around -/// assertions, then a preference-order multi-substring search of those -/// literals will precisely match a preference-order search of the original -/// regex. -/// -/// # Example -/// -/// This shows how to extract prefixes: -/// -/// ``` -/// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse}; -/// -/// let hir = parse(r"(a|b|c)(x|y|z)[A-Z]+foo")?; -/// let got = Extractor::new().extract(&hir); -/// // All literals returned are "inexact" because none of them reach the -/// // match state. -/// let expected = Seq::from_iter([ -/// Literal::inexact("ax"), -/// Literal::inexact("ay"), -/// Literal::inexact("az"), -/// Literal::inexact("bx"), -/// Literal::inexact("by"), -/// Literal::inexact("bz"), -/// Literal::inexact("cx"), -/// Literal::inexact("cy"), -/// Literal::inexact("cz"), -/// ]); -/// assert_eq!(expected, got); -/// -/// # Ok::<(), Box>(()) -/// ``` -/// -/// This shows how to extract suffixes: -/// -/// ``` -/// use regex_syntax::{ -/// hir::literal::{Extractor, ExtractKind, Literal, Seq}, -/// parse, -/// }; -/// -/// let hir = parse(r"foo|[A-Z]+bar")?; -/// let got = Extractor::new().kind(ExtractKind::Suffix).extract(&hir); -/// // Since 'foo' gets to a match state, it is considered exact. But 'bar' -/// // does not because of the '[A-Z]+', and thus is marked inexact. -/// let expected = Seq::from_iter([ -/// Literal::exact("foo"), -/// Literal::inexact("bar"), -/// ]); -/// assert_eq!(expected, got); -/// -/// # Ok::<(), Box>(()) -/// ``` -#[derive(Clone, Debug)] -pub struct Extractor { - kind: ExtractKind, - limit_class: usize, - limit_repeat: usize, - limit_literal_len: usize, - limit_total: usize, -} - -impl Extractor { - /// Create a new extractor with a default configuration. - /// - /// The extractor can be optionally configured before calling - /// [`Extractor::extract`] to get a literal sequence. - pub fn new() -> Extractor { - Extractor { - kind: ExtractKind::Prefix, - limit_class: 10, - limit_repeat: 10, - limit_literal_len: 100, - limit_total: 250, - } - } - - /// Execute the extractor and return a sequence of literals. - pub fn extract(&self, hir: &Hir) -> Seq { - use crate::hir::HirKind::*; - - match *hir.kind() { - Empty | Look(_) => Seq::singleton(self::Literal::exact(vec![])), - Literal(hir::Literal(ref bytes)) => { - let mut seq = - Seq::singleton(self::Literal::exact(bytes.to_vec())); - self.enforce_literal_len(&mut seq); - seq - } - Class(hir::Class::Unicode(ref cls)) => { - self.extract_class_unicode(cls) - } - Class(hir::Class::Bytes(ref cls)) => self.extract_class_bytes(cls), - Repetition(ref rep) => self.extract_repetition(rep), - Capture(hir::Capture { ref sub, .. }) => self.extract(sub), - Concat(ref hirs) => match self.kind { - ExtractKind::Prefix => self.extract_concat(hirs.iter()), - ExtractKind::Suffix => self.extract_concat(hirs.iter().rev()), - }, - Alternation(ref hirs) => { - // Unlike concat, we always union starting from the beginning, - // since the beginning corresponds to the highest preference, - // which doesn't change based on forwards vs reverse. - self.extract_alternation(hirs.iter()) - } - } - } - - /// Set the kind of literal sequence to extract from an [`Hir`] expression. - /// - /// The default is to extract prefixes, but suffixes can be selected - /// instead. The contract for prefixes is that every match of the - /// corresponding `Hir` must start with one of the literals in the sequence - /// returned. Moreover, the _order_ of the sequence returned corresponds to - /// the preference order. - /// - /// Suffixes satisfy a similar contract in that every match of the - /// corresponding `Hir` must end with one of the literals in the sequence - /// returned. However, there is no guarantee that the literals are in - /// preference order. - /// - /// Remember that a sequence can be infinite. For example, unless the - /// limits are configured to be impractically large, attempting to extract - /// prefixes (or suffixes) for the pattern `[A-Z]` will return an infinite - /// sequence. Generally speaking, if the sequence returned is infinite, - /// then it is presumed to be unwise to do prefix (or suffix) optimizations - /// for the pattern. - pub fn kind(&mut self, kind: ExtractKind) -> &mut Extractor { - self.kind = kind; - self - } - - /// Configure a limit on the length of the sequence that is permitted for - /// a character class. If a character class exceeds this limit, then the - /// sequence returned for it is infinite. - /// - /// This prevents classes like `[A-Z]` or `\pL` from getting turned into - /// huge and likely unproductive sequences of literals. - /// - /// # Example - /// - /// This example shows how this limit can be lowered to decrease the tolerance - /// for character classes being turned into literal sequences. - /// - /// ``` - /// use regex_syntax::{hir::literal::{Extractor, Seq}, parse}; - /// - /// let hir = parse(r"[0-9]")?; - /// - /// let got = Extractor::new().extract(&hir); - /// let expected = Seq::new([ - /// "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", - /// ]); - /// assert_eq!(expected, got); - /// - /// // Now let's shrink the limit and see how that changes things. - /// let got = Extractor::new().limit_class(4).extract(&hir); - /// let expected = Seq::infinite(); - /// assert_eq!(expected, got); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn limit_class(&mut self, limit: usize) -> &mut Extractor { - self.limit_class = limit; - self - } - - /// Configure a limit on the total number of repetitions that is permitted - /// before literal extraction is stopped. - /// - /// This is useful for limiting things like `(abcde){50}`, or more - /// insidiously, `(?:){1000000000}`. This limit prevents any one single - /// repetition from adding too much to a literal sequence. - /// - /// With this limit set, repetitions that exceed it will be stopped and any - /// literals extracted up to that point will be made inexact. - /// - /// # Example - /// - /// This shows how to decrease the limit and compares it with the default. - /// - /// ``` - /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse}; - /// - /// let hir = parse(r"(abc){8}")?; - /// - /// let got = Extractor::new().extract(&hir); - /// let expected = Seq::new(["abcabcabcabcabcabcabcabc"]); - /// assert_eq!(expected, got); - /// - /// // Now let's shrink the limit and see how that changes things. - /// let got = Extractor::new().limit_repeat(4).extract(&hir); - /// let expected = Seq::from_iter([ - /// Literal::inexact("abcabcabcabc"), - /// ]); - /// assert_eq!(expected, got); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn limit_repeat(&mut self, limit: usize) -> &mut Extractor { - self.limit_repeat = limit; - self - } - - /// Configure a limit on the maximum length of any literal in a sequence. - /// - /// This is useful for limiting things like `(abcde){5}{5}{5}{5}`. While - /// each repetition or literal in that regex is small, when all the - /// repetitions are applied, one ends up with a literal of length `5^4 = - /// 625`. - /// - /// With this limit set, literals that exceed it will be made inexact and - /// thus prevented from growing. - /// - /// # Example - /// - /// This shows how to decrease the limit and compares it with the default. - /// - /// ``` - /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse}; - /// - /// let hir = parse(r"(abc){2}{2}{2}")?; - /// - /// let got = Extractor::new().extract(&hir); - /// let expected = Seq::new(["abcabcabcabcabcabcabcabc"]); - /// assert_eq!(expected, got); - /// - /// // Now let's shrink the limit and see how that changes things. - /// let got = Extractor::new().limit_literal_len(14).extract(&hir); - /// let expected = Seq::from_iter([ - /// Literal::inexact("abcabcabcabcab"), - /// ]); - /// assert_eq!(expected, got); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn limit_literal_len(&mut self, limit: usize) -> &mut Extractor { - self.limit_literal_len = limit; - self - } - - /// Configure a limit on the total number of literals that will be - /// returned. - /// - /// This is useful as a practical measure for avoiding the creation of - /// large sequences of literals. While the extractor will automatically - /// handle local creations of large sequences (for example, `[A-Z]` yields - /// an infinite sequence by default), large sequences can be created - /// through non-local means as well. - /// - /// For example, `[ab]{3}{3}` would yield a sequence of length `512 = 2^9` - /// despite each of the repetitions being small on their own. This limit - /// thus represents a "catch all" for avoiding locally small sequences from - /// combining into large sequences. - /// - /// # Example - /// - /// This example shows how reducing the limit will change the literal - /// sequence returned. - /// - /// ``` - /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse}; - /// - /// let hir = parse(r"[ab]{2}{2}")?; - /// - /// let got = Extractor::new().extract(&hir); - /// let expected = Seq::new([ - /// "aaaa", "aaab", "aaba", "aabb", - /// "abaa", "abab", "abba", "abbb", - /// "baaa", "baab", "baba", "babb", - /// "bbaa", "bbab", "bbba", "bbbb", - /// ]); - /// assert_eq!(expected, got); - /// - /// // The default limit is not too big, but big enough to extract all - /// // literals from '[ab]{2}{2}'. If we shrink the limit to less than 16, - /// // then we'll get a truncated set. Notice that it returns a sequence of - /// // length 4 even though our limit was 10. This is because the sequence - /// // is difficult to increase without blowing the limit. Notice also - /// // that every literal in the sequence is now inexact because they were - /// // stripped of some suffix. - /// let got = Extractor::new().limit_total(10).extract(&hir); - /// let expected = Seq::from_iter([ - /// Literal::inexact("aa"), - /// Literal::inexact("ab"), - /// Literal::inexact("ba"), - /// Literal::inexact("bb"), - /// ]); - /// assert_eq!(expected, got); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn limit_total(&mut self, limit: usize) -> &mut Extractor { - self.limit_total = limit; - self - } - - /// Extract a sequence from the given concatenation. Sequences from each of - /// the child HIR expressions are combined via cross product. - /// - /// This short circuits once the cross product turns into a sequence - /// containing only inexact literals. - fn extract_concat<'a, I: Iterator>(&self, it: I) -> Seq { - let mut seq = Seq::singleton(self::Literal::exact(vec![])); - for hir in it { - // If every element in the sequence is inexact, then a cross - // product will always be a no-op. Thus, there is nothing else we - // can add to it and can quit early. Note that this also includes - // infinite sequences. - if seq.is_inexact() { - break; - } - // Note that 'cross' also dispatches based on whether we're - // extracting prefixes or suffixes. - seq = self.cross(seq, &mut self.extract(hir)); - } - seq - } - - /// Extract a sequence from the given alternation. - /// - /// This short circuits once the union turns into an infinite sequence. - fn extract_alternation<'a, I: Iterator>( - &self, - it: I, - ) -> Seq { - let mut seq = Seq::empty(); - for hir in it { - // Once our 'seq' is infinite, every subsequent union - // operation on it will itself always result in an - // infinite sequence. Thus, it can never change and we can - // short-circuit. - if !seq.is_finite() { - break; - } - seq = self.union(seq, &mut self.extract(hir)); - } - seq - } - - /// Extract a sequence of literals from the given repetition. We do our - /// best, Some examples: - /// - /// 'a*' => [inexact(a), exact("")] - /// 'a*?' => [exact(""), inexact(a)] - /// 'a+' => [inexact(a)] - /// 'a{3}' => [exact(aaa)] - /// 'a{3,5} => [inexact(aaa)] - /// - /// The key here really is making sure we get the 'inexact' vs 'exact' - /// attributes correct on each of the literals we add. For example, the - /// fact that 'a*' gives us an inexact 'a' and an exact empty string means - /// that a regex like 'ab*c' will result in [inexact(ab), exact(ac)] - /// literals being extracted, which might actually be a better prefilter - /// than just 'a'. - fn extract_repetition(&self, rep: &hir::Repetition) -> Seq { - let mut subseq = self.extract(&rep.sub); - match *rep { - hir::Repetition { min: 0, max, greedy, .. } => { - // When 'max=1', we can retain exactness, since 'a?' is - // equivalent to 'a|'. Similarly below, 'a??' is equivalent to - // '|a'. - if max != Some(1) { - subseq.make_inexact(); - } - let mut empty = Seq::singleton(Literal::exact(vec![])); - if !greedy { - mem::swap(&mut subseq, &mut empty); - } - self.union(subseq, &mut empty) - } - hir::Repetition { min, max: Some(max), .. } if min == max => { - assert!(min > 0); // handled above - let limit = - u32::try_from(self.limit_repeat).unwrap_or(u32::MAX); - let mut seq = Seq::singleton(Literal::exact(vec![])); - for _ in 0..cmp::min(min, limit) { - if seq.is_inexact() { - break; - } - seq = self.cross(seq, &mut subseq.clone()); - } - if usize::try_from(min).is_err() || min > limit { - seq.make_inexact(); - } - seq - } - hir::Repetition { min, .. } => { - assert!(min > 0); // handled above - let limit = - u32::try_from(self.limit_repeat).unwrap_or(u32::MAX); - let mut seq = Seq::singleton(Literal::exact(vec![])); - for _ in 0..cmp::min(min, limit) { - if seq.is_inexact() { - break; - } - seq = self.cross(seq, &mut subseq.clone()); - } - seq.make_inexact(); - seq - } - } - } - - /// Convert the given Unicode class into a sequence of literals if the - /// class is small enough. If the class is too big, return an infinite - /// sequence. - fn extract_class_unicode(&self, cls: &hir::ClassUnicode) -> Seq { - if self.class_over_limit_unicode(cls) { - return Seq::infinite(); - } - let mut seq = Seq::empty(); - for r in cls.iter() { - for ch in r.start()..=r.end() { - seq.push(Literal::from(ch)); - } - } - self.enforce_literal_len(&mut seq); - seq - } - - /// Convert the given byte class into a sequence of literals if the class - /// is small enough. If the class is too big, return an infinite sequence. - fn extract_class_bytes(&self, cls: &hir::ClassBytes) -> Seq { - if self.class_over_limit_bytes(cls) { - return Seq::infinite(); - } - let mut seq = Seq::empty(); - for r in cls.iter() { - for b in r.start()..=r.end() { - seq.push(Literal::from(b)); - } - } - self.enforce_literal_len(&mut seq); - seq - } - - /// Returns true if the given Unicode class exceeds the configured limits - /// on this extractor. - fn class_over_limit_unicode(&self, cls: &hir::ClassUnicode) -> bool { - let mut count = 0; - for r in cls.iter() { - if count > self.limit_class { - return true; - } - count += r.len(); - } - count > self.limit_class - } - - /// Returns true if the given byte class exceeds the configured limits on - /// this extractor. - fn class_over_limit_bytes(&self, cls: &hir::ClassBytes) -> bool { - let mut count = 0; - for r in cls.iter() { - if count > self.limit_class { - return true; - } - count += r.len(); - } - count > self.limit_class - } - - /// Compute the cross product of the two sequences if the result would be - /// within configured limits. Otherwise, make `seq2` infinite and cross the - /// infinite sequence with `seq1`. - fn cross(&self, mut seq1: Seq, seq2: &mut Seq) -> Seq { - if seq1.max_cross_len(seq2).map_or(false, |len| len > self.limit_total) - { - seq2.make_infinite(); - } - if let ExtractKind::Suffix = self.kind { - seq1.cross_reverse(seq2); - } else { - seq1.cross_forward(seq2); - } - assert!(seq1.len().map_or(true, |x| x <= self.limit_total)); - self.enforce_literal_len(&mut seq1); - seq1 - } - - /// Union the two sequences if the result would be within configured - /// limits. Otherwise, make `seq2` infinite and union the infinite sequence - /// with `seq1`. - fn union(&self, mut seq1: Seq, seq2: &mut Seq) -> Seq { - if seq1.max_union_len(seq2).map_or(false, |len| len > self.limit_total) - { - // We try to trim our literal sequences to see if we can make - // room for more literals. The idea is that we'd rather trim down - // literals already in our sequence if it means we can add a few - // more and retain a finite sequence. Otherwise, we'll union with - // an infinite sequence and that infects everything and effectively - // stops literal extraction in its tracks. - // - // We do we keep 4 bytes here? Well, it's a bit of an abstraction - // leakage. Downstream, the literals may wind up getting fed to - // the Teddy algorithm, which supports searching literals up to - // length 4. So that's why we pick that number here. Arguably this - // should be a tuneable parameter, but it seems a little tricky to - // describe. And I'm still unsure if this is the right way to go - // about culling literal sequences. - match self.kind { - ExtractKind::Prefix => { - seq1.keep_first_bytes(4); - seq2.keep_first_bytes(4); - } - ExtractKind::Suffix => { - seq1.keep_last_bytes(4); - seq2.keep_last_bytes(4); - } - } - seq1.dedup(); - seq2.dedup(); - if seq1 - .max_union_len(seq2) - .map_or(false, |len| len > self.limit_total) - { - seq2.make_infinite(); - } - } - seq1.union(seq2); - assert!(seq1.len().map_or(true, |x| x <= self.limit_total)); - seq1 - } - - /// Applies the literal length limit to the given sequence. If none of the - /// literals in the sequence exceed the limit, then this is a no-op. - fn enforce_literal_len(&self, seq: &mut Seq) { - let len = self.limit_literal_len; - match self.kind { - ExtractKind::Prefix => seq.keep_first_bytes(len), - ExtractKind::Suffix => seq.keep_last_bytes(len), - } - } -} - -impl Default for Extractor { - fn default() -> Extractor { - Extractor::new() - } -} - -/// The kind of literals to extract from an [`Hir`] expression. -/// -/// The default extraction kind is `Prefix`. -#[non_exhaustive] -#[derive(Clone, Debug)] -pub enum ExtractKind { - /// Extracts only prefix literals from a regex. - Prefix, - /// Extracts only suffix literals from a regex. - /// - /// Note that the sequence returned by suffix literals currently may - /// not correctly represent leftmost-first or "preference" order match - /// semantics. - Suffix, -} - -impl ExtractKind { - /// Returns true if this kind is the `Prefix` variant. - pub fn is_prefix(&self) -> bool { - matches!(*self, ExtractKind::Prefix) - } - - /// Returns true if this kind is the `Suffix` variant. - pub fn is_suffix(&self) -> bool { - matches!(*self, ExtractKind::Suffix) - } -} - -impl Default for ExtractKind { - fn default() -> ExtractKind { - ExtractKind::Prefix - } -} - -/// A sequence of literals. -/// -/// A `Seq` is very much like a set in that it represents a union of its -/// members. That is, it corresponds to a set of literals where at least one -/// must match in order for a particular [`Hir`] expression to match. (Whether -/// this corresponds to the entire `Hir` expression, a prefix of it or a suffix -/// of it depends on how the `Seq` was extracted from the `Hir`.) -/// -/// It is also unlike a set in that multiple identical literals may appear, -/// and that the order of the literals in the `Seq` matters. For example, if -/// the sequence is `[sam, samwise]` and leftmost-first matching is used, then -/// `samwise` can never match and the sequence is equivalent to `[sam]`. -/// -/// # States of a sequence -/// -/// A `Seq` has a few different logical states to consider: -/// -/// * The sequence can represent "any" literal. When this happens, the set does -/// not have a finite size. The purpose of this state is to inhibit callers -/// from making assumptions about what literals are required in order to match -/// a particular [`Hir`] expression. Generally speaking, when a set is in this -/// state, literal optimizations are inhibited. A good example of a regex that -/// will cause this sort of set to appear is `[A-Za-z]`. The character class -/// is just too big (and also too narrow) to be usefully expanded into 52 -/// different literals. (Note that the decision for when a seq should become -/// infinite is determined by the caller. A seq itself has no hard-coded -/// limits.) -/// * The sequence can be empty, in which case, it is an affirmative statement -/// that there are no literals that can match the corresponding `Hir`. -/// Consequently, the `Hir` never matches any input. For example, `[a&&b]`. -/// * The sequence can be non-empty, in which case, at least one of the -/// literals must match in order for the corresponding `Hir` to match. -/// -/// # Example -/// -/// This example shows how literal sequences can be simplified by stripping -/// suffixes and minimizing while maintaining preference order. -/// -/// ``` -/// use regex_syntax::hir::literal::{Literal, Seq}; -/// -/// let mut seq = Seq::new(&[ -/// "farm", -/// "appliance", -/// "faraway", -/// "apple", -/// "fare", -/// "gap", -/// "applicant", -/// "applaud", -/// ]); -/// seq.keep_first_bytes(3); -/// seq.minimize_by_preference(); -/// // Notice that 'far' comes before 'app', which matches the order in the -/// // original sequence. This guarantees that leftmost-first semantics are -/// // not altered by simplifying the set. -/// let expected = Seq::from_iter([ -/// Literal::inexact("far"), -/// Literal::inexact("app"), -/// Literal::exact("gap"), -/// ]); -/// assert_eq!(expected, seq); -/// ``` -#[derive(Clone, Eq, PartialEq)] -pub struct Seq { - /// The members of this seq. - /// - /// When `None`, the seq represents all possible literals. That is, it - /// prevents one from making assumptions about specific literals in the - /// seq, and forces one to treat it as if any literal might be in the seq. - /// - /// Note that `Some(vec![])` is valid and corresponds to the empty seq of - /// literals, i.e., a regex that can never match. For example, `[a&&b]`. - /// It is distinct from `Some(vec![""])`, which corresponds to the seq - /// containing an empty string, which matches at every position. - literals: Option>, -} - -impl Seq { - /// Returns an empty sequence. - /// - /// An empty sequence matches zero literals, and thus corresponds to a - /// regex that itself can never match. - #[inline] - pub fn empty() -> Seq { - Seq { literals: Some(vec![]) } - } - - /// Returns a sequence of literals without a finite size and may contain - /// any literal. - /// - /// A sequence without finite size does not reveal anything about the - /// characteristics of the literals in its set. There are no fixed prefixes - /// or suffixes, nor are lower or upper bounds on the length of the literals - /// in the set known. - /// - /// This is useful to represent constructs in a regex that are "too big" - /// to useful represent as a sequence of literals. For example, `[A-Za-z]`. - /// When sequences get too big, they lose their discriminating nature and - /// are more likely to produce false positives, which in turn makes them - /// less likely to speed up searches. - /// - /// More pragmatically, for many regexes, enumerating all possible literals - /// is itself not possible or might otherwise use too many resources. So - /// constraining the size of sets during extraction is a practical trade - /// off to make. - #[inline] - pub fn infinite() -> Seq { - Seq { literals: None } - } - - /// Returns a sequence containing a single literal. - #[inline] - pub fn singleton(lit: Literal) -> Seq { - Seq { literals: Some(vec![lit]) } - } - - /// Returns a sequence of exact literals from the given byte strings. - #[inline] - pub fn new(it: I) -> Seq - where - I: IntoIterator, - B: AsRef<[u8]>, - { - it.into_iter().map(|b| Literal::exact(b.as_ref())).collect() - } - - /// If this is a finite sequence, return its members as a slice of - /// literals. - /// - /// The slice returned may be empty, in which case, there are no literals - /// that can match this sequence. - #[inline] - pub fn literals(&self) -> Option<&[Literal]> { - self.literals.as_deref() - } - - /// Push a literal to the end of this sequence. - /// - /// If this sequence is not finite, then this is a no-op. - /// - /// Similarly, if the most recently added item of this sequence is - /// equivalent to the literal given, then it is not added. This reflects - /// a `Seq`'s "set like" behavior, and represents a practical trade off. - /// Namely, there is never any need to have two adjacent and equivalent - /// literals in the same sequence, _and_ it is easy to detect in some - /// cases. - #[inline] - pub fn push(&mut self, lit: Literal) { - let lits = match self.literals { - None => return, - Some(ref mut lits) => lits, - }; - if lits.last().map_or(false, |m| m == &lit) { - return; - } - lits.push(lit); - } - - /// Make all of the literals in this sequence inexact. - /// - /// This is a no-op if this sequence is not finite. - #[inline] - pub fn make_inexact(&mut self) { - let lits = match self.literals { - None => return, - Some(ref mut lits) => lits, - }; - for lit in lits.iter_mut() { - lit.make_inexact(); - } - } - - /// Converts this sequence to an infinite sequence. - /// - /// This is a no-op if the sequence is already infinite. - #[inline] - pub fn make_infinite(&mut self) { - self.literals = None; - } - - /// Modify this sequence to contain the cross product between it and the - /// sequence given. - /// - /// The cross product only considers literals in this sequence that are - /// exact. That is, inexact literals are not extended. - /// - /// The literals are always drained from `other`, even if none are used. - /// This permits callers to reuse the sequence allocation elsewhere. - /// - /// If this sequence is infinite, then this is a no-op, regardless of what - /// `other` contains (and in this case, the literals are still drained from - /// `other`). If `other` is infinite and this sequence is finite, then this - /// is a no-op, unless this sequence contains a zero-length literal. In - /// which case, the infiniteness of `other` infects this sequence, and this - /// sequence is itself made infinite. - /// - /// Like [`Seq::union`], this may attempt to deduplicate literals. See - /// [`Seq::dedup`] for how deduplication deals with exact and inexact - /// literals. - /// - /// # Example - /// - /// This example shows basic usage and how exact and inexact literals - /// interact. - /// - /// ``` - /// use regex_syntax::hir::literal::{Literal, Seq}; - /// - /// let mut seq1 = Seq::from_iter([ - /// Literal::exact("foo"), - /// Literal::inexact("bar"), - /// ]); - /// let mut seq2 = Seq::from_iter([ - /// Literal::inexact("quux"), - /// Literal::exact("baz"), - /// ]); - /// seq1.cross_forward(&mut seq2); - /// - /// // The literals are pulled out of seq2. - /// assert_eq!(Some(0), seq2.len()); - /// - /// let expected = Seq::from_iter([ - /// Literal::inexact("fooquux"), - /// Literal::exact("foobaz"), - /// Literal::inexact("bar"), - /// ]); - /// assert_eq!(expected, seq1); - /// ``` - /// - /// This example shows the behavior of when `other` is an infinite - /// sequence. - /// - /// ``` - /// use regex_syntax::hir::literal::{Literal, Seq}; - /// - /// let mut seq1 = Seq::from_iter([ - /// Literal::exact("foo"), - /// Literal::inexact("bar"), - /// ]); - /// let mut seq2 = Seq::infinite(); - /// seq1.cross_forward(&mut seq2); - /// - /// // When seq2 is infinite, cross product doesn't add anything, but - /// // ensures all members of seq1 are inexact. - /// let expected = Seq::from_iter([ - /// Literal::inexact("foo"), - /// Literal::inexact("bar"), - /// ]); - /// assert_eq!(expected, seq1); - /// ``` - /// - /// This example is like the one above, but shows what happens when this - /// sequence contains an empty string. In this case, an infinite `other` - /// sequence infects this sequence (because the empty string means that - /// there are no finite prefixes): - /// - /// ``` - /// use regex_syntax::hir::literal::{Literal, Seq}; - /// - /// let mut seq1 = Seq::from_iter([ - /// Literal::exact("foo"), - /// Literal::exact(""), // inexact provokes same behavior - /// Literal::inexact("bar"), - /// ]); - /// let mut seq2 = Seq::infinite(); - /// seq1.cross_forward(&mut seq2); - /// - /// // seq1 is now infinite! - /// assert!(!seq1.is_finite()); - /// ``` - /// - /// This example shows the behavior of this sequence is infinite. - /// - /// ``` - /// use regex_syntax::hir::literal::{Literal, Seq}; - /// - /// let mut seq1 = Seq::infinite(); - /// let mut seq2 = Seq::from_iter([ - /// Literal::exact("foo"), - /// Literal::inexact("bar"), - /// ]); - /// seq1.cross_forward(&mut seq2); - /// - /// // seq1 remains unchanged. - /// assert!(!seq1.is_finite()); - /// // Even though the literals in seq2 weren't used, it was still drained. - /// assert_eq!(Some(0), seq2.len()); - /// ``` - #[inline] - pub fn cross_forward(&mut self, other: &mut Seq) { - let (lits1, lits2) = match self.cross_preamble(other) { - None => return, - Some((lits1, lits2)) => (lits1, lits2), - }; - let newcap = lits1.len().saturating_mul(lits2.len()); - for selflit in mem::replace(lits1, Vec::with_capacity(newcap)) { - if !selflit.is_exact() { - lits1.push(selflit); - continue; - } - for otherlit in lits2.iter() { - let mut newlit = Literal::exact(Vec::with_capacity( - selflit.len() + otherlit.len(), - )); - newlit.extend(&selflit); - newlit.extend(&otherlit); - if !otherlit.is_exact() { - newlit.make_inexact(); - } - lits1.push(newlit); - } - } - lits2.drain(..); - self.dedup(); - } - - /// Modify this sequence to contain the cross product between it and - /// the sequence given, where the sequences are treated as suffixes - /// instead of prefixes. Namely, the sequence `other` is *prepended* - /// to `self` (as opposed to `other` being *appended* to `self` in - /// [`Seq::cross_forward`]). - /// - /// The cross product only considers literals in this sequence that are - /// exact. That is, inexact literals are not extended. - /// - /// The literals are always drained from `other`, even if none are used. - /// This permits callers to reuse the sequence allocation elsewhere. - /// - /// If this sequence is infinite, then this is a no-op, regardless of what - /// `other` contains (and in this case, the literals are still drained from - /// `other`). If `other` is infinite and this sequence is finite, then this - /// is a no-op, unless this sequence contains a zero-length literal. In - /// which case, the infiniteness of `other` infects this sequence, and this - /// sequence is itself made infinite. - /// - /// Like [`Seq::union`], this may attempt to deduplicate literals. See - /// [`Seq::dedup`] for how deduplication deals with exact and inexact - /// literals. - /// - /// # Example - /// - /// This example shows basic usage and how exact and inexact literals - /// interact. - /// - /// ``` - /// use regex_syntax::hir::literal::{Literal, Seq}; - /// - /// let mut seq1 = Seq::from_iter([ - /// Literal::exact("foo"), - /// Literal::inexact("bar"), - /// ]); - /// let mut seq2 = Seq::from_iter([ - /// Literal::inexact("quux"), - /// Literal::exact("baz"), - /// ]); - /// seq1.cross_reverse(&mut seq2); - /// - /// // The literals are pulled out of seq2. - /// assert_eq!(Some(0), seq2.len()); - /// - /// let expected = Seq::from_iter([ - /// Literal::inexact("quuxfoo"), - /// Literal::inexact("bar"), - /// Literal::exact("bazfoo"), - /// ]); - /// assert_eq!(expected, seq1); - /// ``` - /// - /// This example shows the behavior of when `other` is an infinite - /// sequence. - /// - /// ``` - /// use regex_syntax::hir::literal::{Literal, Seq}; - /// - /// let mut seq1 = Seq::from_iter([ - /// Literal::exact("foo"), - /// Literal::inexact("bar"), - /// ]); - /// let mut seq2 = Seq::infinite(); - /// seq1.cross_reverse(&mut seq2); - /// - /// // When seq2 is infinite, cross product doesn't add anything, but - /// // ensures all members of seq1 are inexact. - /// let expected = Seq::from_iter([ - /// Literal::inexact("foo"), - /// Literal::inexact("bar"), - /// ]); - /// assert_eq!(expected, seq1); - /// ``` - /// - /// This example is like the one above, but shows what happens when this - /// sequence contains an empty string. In this case, an infinite `other` - /// sequence infects this sequence (because the empty string means that - /// there are no finite suffixes): - /// - /// ``` - /// use regex_syntax::hir::literal::{Literal, Seq}; - /// - /// let mut seq1 = Seq::from_iter([ - /// Literal::exact("foo"), - /// Literal::exact(""), // inexact provokes same behavior - /// Literal::inexact("bar"), - /// ]); - /// let mut seq2 = Seq::infinite(); - /// seq1.cross_reverse(&mut seq2); - /// - /// // seq1 is now infinite! - /// assert!(!seq1.is_finite()); - /// ``` - /// - /// This example shows the behavior when this sequence is infinite. - /// - /// ``` - /// use regex_syntax::hir::literal::{Literal, Seq}; - /// - /// let mut seq1 = Seq::infinite(); - /// let mut seq2 = Seq::from_iter([ - /// Literal::exact("foo"), - /// Literal::inexact("bar"), - /// ]); - /// seq1.cross_reverse(&mut seq2); - /// - /// // seq1 remains unchanged. - /// assert!(!seq1.is_finite()); - /// // Even though the literals in seq2 weren't used, it was still drained. - /// assert_eq!(Some(0), seq2.len()); - /// ``` - #[inline] - pub fn cross_reverse(&mut self, other: &mut Seq) { - let (lits1, lits2) = match self.cross_preamble(other) { - None => return, - Some((lits1, lits2)) => (lits1, lits2), - }; - // We basically proceed as we do in 'cross_forward' at this point, - // except that the outer loop is now 'other' and the inner loop is now - // 'self'. That's because 'self' corresponds to suffixes and 'other' - // corresponds to the sequence we want to *prepend* to the suffixes. - let newcap = lits1.len().saturating_mul(lits2.len()); - let selflits = mem::replace(lits1, Vec::with_capacity(newcap)); - for (i, otherlit) in lits2.drain(..).enumerate() { - for selflit in selflits.iter() { - if !selflit.is_exact() { - // If the suffix isn't exact, then we can't prepend - // anything to it. However, we still want to keep it. But - // we only want to keep one of them, to avoid duplication. - // (The duplication is okay from a correctness perspective, - // but wasteful.) - if i == 0 { - lits1.push(selflit.clone()); - } - continue; - } - let mut newlit = Literal::exact(Vec::with_capacity( - otherlit.len() + selflit.len(), - )); - newlit.extend(&otherlit); - newlit.extend(&selflit); - if !otherlit.is_exact() { - newlit.make_inexact(); - } - lits1.push(newlit); - } - } - self.dedup(); - } - - /// A helper function the corresponds to the subtle preamble for both - /// `cross_forward` and `cross_reverse`. In effect, it handles the cases - /// of infinite sequences for both `self` and `other`, as well as ensuring - /// that literals from `other` are drained even if they aren't used. - fn cross_preamble<'a>( - &'a mut self, - other: &'a mut Seq, - ) -> Option<(&'a mut Vec, &'a mut Vec)> { - let lits2 = match other.literals { - None => { - // If our current seq contains the empty string and the seq - // we're adding matches any literal, then it follows that the - // current seq must now also match any literal. - // - // Otherwise, we just have to make sure everything in this - // sequence is inexact. - if self.min_literal_len() == Some(0) { - *self = Seq::infinite(); - } else { - self.make_inexact(); - } - return None; - } - Some(ref mut lits) => lits, - }; - let lits1 = match self.literals { - None => { - // If we aren't going to make it to the end of this routine - // where lits2 is drained, then we need to do it now. - lits2.drain(..); - return None; - } - Some(ref mut lits) => lits, - }; - Some((lits1, lits2)) - } - - /// Unions the `other` sequence into this one. - /// - /// The literals are always drained out of the given `other` sequence, - /// even if they are being unioned into an infinite sequence. This permits - /// the caller to reuse the `other` sequence in another context. - /// - /// Some literal deduping may be performed. If any deduping happens, - /// any leftmost-first or "preference" order match semantics will be - /// preserved. - /// - /// # Example - /// - /// This example shows basic usage. - /// - /// ``` - /// use regex_syntax::hir::literal::Seq; - /// - /// let mut seq1 = Seq::new(&["foo", "bar"]); - /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]); - /// seq1.union(&mut seq2); - /// - /// // The literals are pulled out of seq2. - /// assert_eq!(Some(0), seq2.len()); - /// - /// // Adjacent literals are deduped, but non-adjacent literals may not be. - /// assert_eq!(Seq::new(&["foo", "bar", "quux", "foo"]), seq1); - /// ``` - /// - /// This example shows that literals are drained from `other` even when - /// they aren't necessarily used. - /// - /// ``` - /// use regex_syntax::hir::literal::Seq; - /// - /// let mut seq1 = Seq::infinite(); - /// // Infinite sequences have no finite length. - /// assert_eq!(None, seq1.len()); - /// - /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]); - /// seq1.union(&mut seq2); - /// - /// // seq1 is still infinite and seq2 has been drained. - /// assert_eq!(None, seq1.len()); - /// assert_eq!(Some(0), seq2.len()); - /// ``` - #[inline] - pub fn union(&mut self, other: &mut Seq) { - let lits2 = match other.literals { - None => { - // Unioning with an infinite sequence always results in an - // infinite sequence. - self.make_infinite(); - return; - } - Some(ref mut lits) => lits.drain(..), - }; - let lits1 = match self.literals { - None => return, - Some(ref mut lits) => lits, - }; - lits1.extend(lits2); - self.dedup(); - } - - /// Unions the `other` sequence into this one by splice the `other` - /// sequence at the position of the first zero-length literal. - /// - /// This is useful for preserving preference order semantics when combining - /// two literal sequences. For example, in the regex `(a||f)+foo`, the - /// correct preference order prefix sequence is `[a, foo, f]`. - /// - /// The literals are always drained out of the given `other` sequence, - /// even if they are being unioned into an infinite sequence. This permits - /// the caller to reuse the `other` sequence in another context. Note that - /// the literals are drained even if no union is performed as well, i.e., - /// when this sequence does not contain a zero-length literal. - /// - /// Some literal deduping may be performed. If any deduping happens, - /// any leftmost-first or "preference" order match semantics will be - /// preserved. - /// - /// # Example - /// - /// This example shows basic usage. - /// - /// ``` - /// use regex_syntax::hir::literal::Seq; - /// - /// let mut seq1 = Seq::new(&["a", "", "f", ""]); - /// let mut seq2 = Seq::new(&["foo"]); - /// seq1.union_into_empty(&mut seq2); - /// - /// // The literals are pulled out of seq2. - /// assert_eq!(Some(0), seq2.len()); - /// // 'foo' gets spliced into seq1 where the first empty string occurs. - /// assert_eq!(Seq::new(&["a", "foo", "f"]), seq1); - /// ``` - /// - /// This example shows that literals are drained from `other` even when - /// they aren't necessarily used. - /// - /// ``` - /// use regex_syntax::hir::literal::Seq; - /// - /// let mut seq1 = Seq::new(&["foo", "bar"]); - /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]); - /// seq1.union_into_empty(&mut seq2); - /// - /// // seq1 has no zero length literals, so no splicing happens. - /// assert_eq!(Seq::new(&["foo", "bar"]), seq1); - /// // Even though no splicing happens, seq2 is still drained. - /// assert_eq!(Some(0), seq2.len()); - /// ``` - #[inline] - pub fn union_into_empty(&mut self, other: &mut Seq) { - let lits2 = other.literals.as_mut().map(|lits| lits.drain(..)); - let lits1 = match self.literals { - None => return, - Some(ref mut lits) => lits, - }; - let first_empty = match lits1.iter().position(|m| m.is_empty()) { - None => return, - Some(i) => i, - }; - let lits2 = match lits2 { - None => { - // Note that we are only here if we've found an empty literal, - // which implies that an infinite sequence infects this seq and - // also turns it into an infinite sequence. - self.literals = None; - return; - } - Some(lits) => lits, - }; - // Clearing out the empties needs to come before the splice because - // the splice might add more empties that we don't want to get rid - // of. Since we're splicing into the position of the first empty, the - // 'first_empty' position computed above is still correct. - lits1.retain(|m| !m.is_empty()); - lits1.splice(first_empty..first_empty, lits2); - self.dedup(); - } - - /// Deduplicate adjacent equivalent literals in this sequence. - /// - /// If adjacent literals are equivalent strings but one is exact and the - /// other inexact, the inexact literal is kept and the exact one is - /// removed. - /// - /// Deduping an infinite sequence is a no-op. - /// - /// # Example - /// - /// This example shows how literals that are duplicate byte strings but - /// are not equivalent with respect to exactness are resolved. - /// - /// ``` - /// use regex_syntax::hir::literal::{Literal, Seq}; - /// - /// let mut seq = Seq::from_iter([ - /// Literal::exact("foo"), - /// Literal::inexact("foo"), - /// ]); - /// seq.dedup(); - /// - /// assert_eq!(Seq::from_iter([Literal::inexact("foo")]), seq); - /// ``` - #[inline] - pub fn dedup(&mut self) { - if let Some(ref mut lits) = self.literals { - lits.dedup_by(|lit1, lit2| { - if lit1.as_bytes() != lit2.as_bytes() { - return false; - } - if lit1.is_exact() != lit2.is_exact() { - lit1.make_inexact(); - lit2.make_inexact(); - } - true - }); - } - } - - /// Sorts this sequence of literals lexicographically. - /// - /// Note that if, before sorting, if a literal that is a prefix of another - /// literal appears after it, then after sorting, the sequence will not - /// represent the same preference order match semantics. For example, - /// sorting the sequence `[samwise, sam]` yields the sequence `[sam, - /// samwise]`. Under preference order semantics, the latter sequence will - /// never match `samwise` where as the first sequence can. - /// - /// # Example - /// - /// This example shows basic usage. - /// - /// ``` - /// use regex_syntax::hir::literal::Seq; - /// - /// let mut seq = Seq::new(&["foo", "quux", "bar"]); - /// seq.sort(); - /// - /// assert_eq!(Seq::new(&["bar", "foo", "quux"]), seq); - /// ``` - #[inline] - pub fn sort(&mut self) { - if let Some(ref mut lits) = self.literals { - lits.sort(); - } - } - - /// Reverses all of the literals in this sequence. - /// - /// The order of the sequence itself is preserved. - /// - /// # Example - /// - /// This example shows basic usage. - /// - /// ``` - /// use regex_syntax::hir::literal::Seq; - /// - /// let mut seq = Seq::new(&["oof", "rab"]); - /// seq.reverse_literals(); - /// assert_eq!(Seq::new(&["foo", "bar"]), seq); - /// ``` - #[inline] - pub fn reverse_literals(&mut self) { - if let Some(ref mut lits) = self.literals { - for lit in lits.iter_mut() { - lit.reverse(); - } - } - } - - /// Shrinks this seq to its minimal size while respecting the preference - /// order of its literals. - /// - /// While this routine will remove duplicate literals from this seq, it - /// will also remove literals that can never match in a leftmost-first or - /// "preference order" search. Similar to [`Seq::dedup`], if a literal is - /// deduped, then the one that remains is made inexact. - /// - /// This is a no-op on seqs that are empty or not finite. - /// - /// # Example - /// - /// This example shows the difference between `{sam, samwise}` and - /// `{samwise, sam}`. - /// - /// ``` - /// use regex_syntax::hir::literal::{Literal, Seq}; - /// - /// // If 'sam' comes before 'samwise' and a preference order search is - /// // executed, then 'samwise' can never match. - /// let mut seq = Seq::new(&["sam", "samwise"]); - /// seq.minimize_by_preference(); - /// assert_eq!(Seq::from_iter([Literal::inexact("sam")]), seq); - /// - /// // But if they are reversed, then it's possible for 'samwise' to match - /// // since it is given higher preference. - /// let mut seq = Seq::new(&["samwise", "sam"]); - /// seq.minimize_by_preference(); - /// assert_eq!(Seq::new(&["samwise", "sam"]), seq); - /// ``` - /// - /// This example shows that if an empty string is in this seq, then - /// anything that comes after it can never match. - /// - /// ``` - /// use regex_syntax::hir::literal::{Literal, Seq}; - /// - /// // An empty string is a prefix of all strings, so it automatically - /// // inhibits any subsequent strings from matching. - /// let mut seq = Seq::new(&["foo", "bar", "", "quux", "fox"]); - /// seq.minimize_by_preference(); - /// let expected = Seq::from_iter([ - /// Literal::exact("foo"), - /// Literal::exact("bar"), - /// Literal::inexact(""), - /// ]); - /// assert_eq!(expected, seq); - /// - /// // And of course, if it's at the beginning, then it makes it impossible - /// // for anything else to match. - /// let mut seq = Seq::new(&["", "foo", "quux", "fox"]); - /// seq.minimize_by_preference(); - /// assert_eq!(Seq::from_iter([Literal::inexact("")]), seq); - /// ``` - #[inline] - pub fn minimize_by_preference(&mut self) { - if let Some(ref mut lits) = self.literals { - PreferenceTrie::minimize(lits, false); - } - } - - /// Trims all literals in this seq such that only the first `len` bytes - /// remain. If a literal has less than or equal to `len` bytes, then it - /// remains unchanged. Otherwise, it is trimmed and made inexact. - /// - /// # Example - /// - /// ``` - /// use regex_syntax::hir::literal::{Literal, Seq}; - /// - /// let mut seq = Seq::new(&["a", "foo", "quux"]); - /// seq.keep_first_bytes(2); - /// - /// let expected = Seq::from_iter([ - /// Literal::exact("a"), - /// Literal::inexact("fo"), - /// Literal::inexact("qu"), - /// ]); - /// assert_eq!(expected, seq); - /// ``` - #[inline] - pub fn keep_first_bytes(&mut self, len: usize) { - if let Some(ref mut lits) = self.literals { - for m in lits.iter_mut() { - m.keep_first_bytes(len); - } - } - } - - /// Trims all literals in this seq such that only the last `len` bytes - /// remain. If a literal has less than or equal to `len` bytes, then it - /// remains unchanged. Otherwise, it is trimmed and made inexact. - /// - /// # Example - /// - /// ``` - /// use regex_syntax::hir::literal::{Literal, Seq}; - /// - /// let mut seq = Seq::new(&["a", "foo", "quux"]); - /// seq.keep_last_bytes(2); - /// - /// let expected = Seq::from_iter([ - /// Literal::exact("a"), - /// Literal::inexact("oo"), - /// Literal::inexact("ux"), - /// ]); - /// assert_eq!(expected, seq); - /// ``` - #[inline] - pub fn keep_last_bytes(&mut self, len: usize) { - if let Some(ref mut lits) = self.literals { - for m in lits.iter_mut() { - m.keep_last_bytes(len); - } - } - } - - /// Returns true if this sequence is finite. - /// - /// When false, this sequence is infinite and must be treated as if it - /// contains every possible literal. - #[inline] - pub fn is_finite(&self) -> bool { - self.literals.is_some() - } - - /// Returns true if and only if this sequence is finite and empty. - /// - /// An empty sequence never matches anything. It can only be produced by - /// literal extraction when the corresponding regex itself cannot match. - #[inline] - pub fn is_empty(&self) -> bool { - self.len() == Some(0) - } - - /// Returns the number of literals in this sequence if the sequence is - /// finite. If the sequence is infinite, then `None` is returned. - #[inline] - pub fn len(&self) -> Option { - self.literals.as_ref().map(|lits| lits.len()) - } - - /// Returns true if and only if all literals in this sequence are exact. - /// - /// This returns false if the sequence is infinite. - #[inline] - pub fn is_exact(&self) -> bool { - self.literals().map_or(false, |lits| lits.iter().all(|x| x.is_exact())) - } - - /// Returns true if and only if all literals in this sequence are inexact. - /// - /// This returns true if the sequence is infinite. - #[inline] - pub fn is_inexact(&self) -> bool { - self.literals().map_or(true, |lits| lits.iter().all(|x| !x.is_exact())) - } - - /// Return the maximum length of the sequence that would result from - /// unioning `self` with `other`. If either set is infinite, then this - /// returns `None`. - #[inline] - pub fn max_union_len(&self, other: &Seq) -> Option { - let len1 = self.len()?; - let len2 = other.len()?; - Some(len1.saturating_add(len2)) - } - - /// Return the maximum length of the sequence that would result from the - /// cross product of `self` with `other`. If either set is infinite, then - /// this returns `None`. - #[inline] - pub fn max_cross_len(&self, other: &Seq) -> Option { - let len1 = self.len()?; - let len2 = other.len()?; - Some(len1.saturating_mul(len2)) - } - - /// Returns the length of the shortest literal in this sequence. - /// - /// If the sequence is infinite or empty, then this returns `None`. - #[inline] - pub fn min_literal_len(&self) -> Option { - self.literals.as_ref()?.iter().map(|x| x.len()).min() - } - - /// Returns the length of the longest literal in this sequence. - /// - /// If the sequence is infinite or empty, then this returns `None`. - #[inline] - pub fn max_literal_len(&self) -> Option { - self.literals.as_ref()?.iter().map(|x| x.len()).max() - } - - /// Returns the longest common prefix from this seq. - /// - /// If the seq matches any literal or other contains no literals, then - /// there is no meaningful prefix and this returns `None`. - /// - /// # Example - /// - /// This shows some example seqs and their longest common prefix. - /// - /// ``` - /// use regex_syntax::hir::literal::Seq; - /// - /// let seq = Seq::new(&["foo", "foobar", "fo"]); - /// assert_eq!(Some(&b"fo"[..]), seq.longest_common_prefix()); - /// let seq = Seq::new(&["foo", "foo"]); - /// assert_eq!(Some(&b"foo"[..]), seq.longest_common_prefix()); - /// let seq = Seq::new(&["foo", "bar"]); - /// assert_eq!(Some(&b""[..]), seq.longest_common_prefix()); - /// let seq = Seq::new(&[""]); - /// assert_eq!(Some(&b""[..]), seq.longest_common_prefix()); - /// - /// let seq = Seq::infinite(); - /// assert_eq!(None, seq.longest_common_prefix()); - /// let seq = Seq::empty(); - /// assert_eq!(None, seq.longest_common_prefix()); - /// ``` - #[inline] - pub fn longest_common_prefix(&self) -> Option<&[u8]> { - // If we match everything or match nothing, then there's no meaningful - // longest common prefix. - let lits = match self.literals { - None => return None, - Some(ref lits) => lits, - }; - if lits.len() == 0 { - return None; - } - let base = lits[0].as_bytes(); - let mut len = base.len(); - for m in lits.iter().skip(1) { - len = m - .as_bytes() - .iter() - .zip(base[..len].iter()) - .take_while(|&(a, b)| a == b) - .count(); - if len == 0 { - return Some(&[]); - } - } - Some(&base[..len]) - } - - /// Returns the longest common suffix from this seq. - /// - /// If the seq matches any literal or other contains no literals, then - /// there is no meaningful suffix and this returns `None`. - /// - /// # Example - /// - /// This shows some example seqs and their longest common suffix. - /// - /// ``` - /// use regex_syntax::hir::literal::Seq; - /// - /// let seq = Seq::new(&["oof", "raboof", "of"]); - /// assert_eq!(Some(&b"of"[..]), seq.longest_common_suffix()); - /// let seq = Seq::new(&["foo", "foo"]); - /// assert_eq!(Some(&b"foo"[..]), seq.longest_common_suffix()); - /// let seq = Seq::new(&["foo", "bar"]); - /// assert_eq!(Some(&b""[..]), seq.longest_common_suffix()); - /// let seq = Seq::new(&[""]); - /// assert_eq!(Some(&b""[..]), seq.longest_common_suffix()); - /// - /// let seq = Seq::infinite(); - /// assert_eq!(None, seq.longest_common_suffix()); - /// let seq = Seq::empty(); - /// assert_eq!(None, seq.longest_common_suffix()); - /// ``` - #[inline] - pub fn longest_common_suffix(&self) -> Option<&[u8]> { - // If we match everything or match nothing, then there's no meaningful - // longest common suffix. - let lits = match self.literals { - None => return None, - Some(ref lits) => lits, - }; - if lits.len() == 0 { - return None; - } - let base = lits[0].as_bytes(); - let mut len = base.len(); - for m in lits.iter().skip(1) { - len = m - .as_bytes() - .iter() - .rev() - .zip(base[base.len() - len..].iter().rev()) - .take_while(|&(a, b)| a == b) - .count(); - if len == 0 { - return Some(&[]); - } - } - Some(&base[base.len() - len..]) - } - - /// Optimizes this seq while treating its literals as prefixes and - /// respecting the preference order of its literals. - /// - /// The specific way "optimization" works is meant to be an implementation - /// detail, as it essentially represents a set of heuristics. The goal - /// that optimization tries to accomplish is to make the literals in this - /// set reflect inputs that will result in a more effective prefilter. - /// Principally by reducing the false positive rate of candidates found by - /// the literals in this sequence. That is, when a match of a literal is - /// found, we would like it to be a strong predictor of the overall match - /// of the regex. If it isn't, then much time will be spent starting and - /// stopping the prefilter search and attempting to confirm the match only - /// to have it fail. - /// - /// Some of those heuristics might be: - /// - /// * Identifying a common prefix from a larger sequence of literals, and - /// shrinking the sequence down to that single common prefix. - /// * Rejecting the sequence entirely if it is believed to result in very - /// high false positive rate. When this happens, the sequence is made - /// infinite. - /// * Shrinking the sequence to a smaller number of literals representing - /// prefixes, but not shrinking it so much as to make literals too short. - /// (A sequence with very short literals, of 1 or 2 bytes, will typically - /// result in a higher false positive rate.) - /// - /// Optimization should only be run once extraction is complete. Namely, - /// optimization may make assumptions that do not compose with other - /// operations in the middle of extraction. For example, optimization will - /// reduce `[E(sam), E(samwise)]` to `[E(sam)]`, but such a transformation - /// is only valid if no other extraction will occur. If other extraction - /// may occur, then the correct transformation would be to `[I(sam)]`. - /// - /// The [`Seq::optimize_for_suffix_by_preference`] does the same thing, but - /// for suffixes. - /// - /// # Example - /// - /// This shows how optimization might transform a sequence. Note that - /// the specific behavior is not a documented guarantee. The heuristics - /// used are an implementation detail and may change over time in semver - /// compatible releases. - /// - /// ``` - /// use regex_syntax::hir::literal::{Seq, Literal}; - /// - /// let mut seq = Seq::new(&[ - /// "samantha", - /// "sam", - /// "samwise", - /// "frodo", - /// ]); - /// seq.optimize_for_prefix_by_preference(); - /// assert_eq!(Seq::from_iter([ - /// Literal::exact("samantha"), - /// // Kept exact even though 'samwise' got pruned - /// // because optimization assumes literal extraction - /// // has finished. - /// Literal::exact("sam"), - /// Literal::exact("frodo"), - /// ]), seq); - /// ``` - /// - /// # Example: optimization may make the sequence infinite - /// - /// If the heuristics deem that the sequence could cause a very high false - /// positive rate, then it may make the sequence infinite, effectively - /// disabling its use as a prefilter. - /// - /// ``` - /// use regex_syntax::hir::literal::{Seq, Literal}; - /// - /// let mut seq = Seq::new(&[ - /// "samantha", - /// // An empty string matches at every position, - /// // thus rendering the prefilter completely - /// // ineffective. - /// "", - /// "sam", - /// "samwise", - /// "frodo", - /// ]); - /// seq.optimize_for_prefix_by_preference(); - /// assert!(!seq.is_finite()); - /// ``` - /// - /// Do note that just because there is a `" "` in the sequence, that - /// doesn't mean the sequence will always be made infinite after it is - /// optimized. Namely, if the sequence is considered exact (any match - /// corresponds to an overall match of the original regex), then any match - /// is an overall match, and so the false positive rate is always `0`. - /// - /// To demonstrate this, we remove `samwise` from our sequence. This - /// results in no optimization happening and all literals remain exact. - /// Thus the entire sequence is exact, and it is kept as-is, even though - /// one is an ASCII space: - /// - /// ``` - /// use regex_syntax::hir::literal::{Seq, Literal}; - /// - /// let mut seq = Seq::new(&[ - /// "samantha", - /// " ", - /// "sam", - /// "frodo", - /// ]); - /// seq.optimize_for_prefix_by_preference(); - /// assert!(seq.is_finite()); - /// ``` - #[inline] - pub fn optimize_for_prefix_by_preference(&mut self) { - self.optimize_by_preference(true); - } - - /// Optimizes this seq while treating its literals as suffixes and - /// respecting the preference order of its literals. - /// - /// Optimization should only be run once extraction is complete. - /// - /// The [`Seq::optimize_for_prefix_by_preference`] does the same thing, but - /// for prefixes. See its documentation for more explanation. - #[inline] - pub fn optimize_for_suffix_by_preference(&mut self) { - self.optimize_by_preference(false); - } - - fn optimize_by_preference(&mut self, prefix: bool) { - let origlen = match self.len() { - None => return, - Some(len) => len, - }; - // Just give up now if our sequence contains an empty string. - if self.min_literal_len().map_or(false, |len| len == 0) { - // We squash the sequence so that nobody else gets any bright - // ideas to try and use it. An empty string implies a match at - // every position. A prefilter cannot help you here. - self.make_infinite(); - return; - } - // Make sure we start with the smallest sequence possible. We use a - // special version of preference minimization that retains exactness. - // This is legal because optimization is only expected to occur once - // extraction is complete. - if prefix { - if let Some(ref mut lits) = self.literals { - PreferenceTrie::minimize(lits, true); - } - } - - // Look for a common prefix (or suffix). If we found one of those and - // it's long enough, then it's a good bet that it will be our fastest - // possible prefilter since single-substring search is so fast. - let fix = if prefix { - self.longest_common_prefix() - } else { - self.longest_common_suffix() - }; - if let Some(fix) = fix { - // As a special case, if we have a common prefix and the leading - // byte of that prefix is one that we think probably occurs rarely, - // then strip everything down to just that single byte. This should - // promote the use of memchr. - // - // ... we only do this though if our sequence has more than one - // literal. Otherwise, we'd rather just stick with a single literal - // scan. That is, using memchr is probably better than looking - // for 2 or more literals, but probably not as good as a straight - // memmem search. - // - // ... and also only do this when the prefix is short and probably - // not too discriminatory anyway. If it's longer, then it's - // probably quite discriminatory and thus is likely to have a low - // false positive rate. - if prefix - && origlen > 1 - && fix.len() >= 1 - && fix.len() <= 3 - && rank(fix[0]) < 200 - { - self.keep_first_bytes(1); - self.dedup(); - return; - } - // We only strip down to the common prefix/suffix if we think - // the existing set of literals isn't great, or if the common - // prefix/suffix is expected to be particularly discriminatory. - let isfast = - self.is_exact() && self.len().map_or(false, |len| len <= 16); - let usefix = fix.len() > 4 || (fix.len() > 1 && !isfast); - if usefix { - // If we keep exactly the number of bytes equal to the length - // of the prefix (or suffix), then by the definition of a - // prefix, every literal in the sequence will be equivalent. - // Thus, 'dedup' will leave us with one literal. - // - // We do it this way to avoid an alloc, but also to make sure - // the exactness of literals is kept (or not). - if prefix { - self.keep_first_bytes(fix.len()); - } else { - self.keep_last_bytes(fix.len()); - } - self.dedup(); - assert_eq!(Some(1), self.len()); - // We still fall through here. In particular, we want our - // longest common prefix to be subject to the poison check. - } - } - // If we have an exact sequence, we *probably* just want to keep it - // as-is. But there are some cases where we don't. So we save a copy of - // the exact sequence now, and then try to do some more optimizations - // below. If those don't work out, we go back to this exact sequence. - // - // The specific motivation for this is that we sometimes wind up with - // an exact sequence with a hefty number of literals. Say, 100. If we - // stuck with that, it would be too big for Teddy and would result in - // using Aho-Corasick. Which is fine... but the lazy DFA is plenty - // suitable in such cases. The real issue is that we will wind up not - // using a fast prefilter at all. So in cases like this, even though - // we have an exact sequence, it would be better to try and shrink the - // sequence (which we do below) and use it as a prefilter that can - // produce false positive matches. - // - // But if the shrinking below results in a sequence that "sucks," then - // we don't want to use that because we already have an exact sequence - // in hand. - let exact: Option = - if self.is_exact() { Some(self.clone()) } else { None }; - // Now we attempt to shorten the sequence. The idea here is that we - // don't want to look for too many literals, but we want to shorten - // our sequence enough to improve our odds of using better algorithms - // downstream (such as Teddy). - // - // The pair of numbers in this list corresponds to the maximal prefix - // (in bytes) to keep for all literals and the length of the sequence - // at which to do it. - // - // So for example, the pair (3, 500) would mean, "if we have more than - // 500 literals in our sequence, then truncate all of our literals - // such that they are at most 3 bytes in length and the minimize the - // sequence." - const ATTEMPTS: [(usize, usize); 5] = - [(5, 10), (4, 10), (3, 64), (2, 64), (1, 10)]; - for (keep, limit) in ATTEMPTS { - let len = match self.len() { - None => break, - Some(len) => len, - }; - if len <= limit { - break; - } - if prefix { - self.keep_first_bytes(keep); - } else { - self.keep_last_bytes(keep); - } - if prefix { - if let Some(ref mut lits) = self.literals { - PreferenceTrie::minimize(lits, true); - } - } - } - // Check for a poison literal. A poison literal is one that is short - // and is believed to have a very high match count. These poisons - // generally lead to a prefilter with a very high false positive rate, - // and thus overall worse performance. - // - // We do this last because we could have gone from a non-poisonous - // sequence to a poisonous one. Perhaps we should add some code to - // prevent such transitions in the first place, but then again, we - // likely only made the transition in the first place if the sequence - // was itself huge. And huge sequences are themselves poisonous. So... - if let Some(lits) = self.literals() { - if lits.iter().any(|lit| lit.is_poisonous()) { - self.make_infinite(); - } - } - // OK, if we had an exact sequence before attempting more optimizations - // above and our post-optimized sequence sucks for some reason or - // another, then we go back to the exact sequence. - if let Some(exact) = exact { - // If optimizing resulted in dropping our literals, then certainly - // backup and use the exact sequence that we had. - if !self.is_finite() { - *self = exact; - return; - } - // If our optimized sequence contains a short literal, then it's - // *probably* not so great. So throw it away and revert to the - // exact sequence. - if self.min_literal_len().map_or(true, |len| len <= 2) { - *self = exact; - return; - } - // Finally, if our optimized sequence is "big" (i.e., can't use - // Teddy), then also don't use it and rely on the exact sequence. - if self.len().map_or(true, |len| len > 64) { - *self = exact; - return; - } - } - } -} - -impl core::fmt::Debug for Seq { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - write!(f, "Seq")?; - if let Some(lits) = self.literals() { - f.debug_list().entries(lits.iter()).finish() - } else { - write!(f, "[∞]") - } - } -} - -impl FromIterator for Seq { - fn from_iter>(it: T) -> Seq { - let mut seq = Seq::empty(); - for literal in it { - seq.push(literal); - } - seq - } -} - -/// A single literal extracted from an [`Hir`] expression. -/// -/// A literal is composed of two things: -/// -/// * A sequence of bytes. No guarantees with respect to UTF-8 are provided. -/// In particular, even if the regex a literal is extracted from is UTF-8, the -/// literal extracted may not be valid UTF-8. (For example, if an [`Extractor`] -/// limit resulted in trimming a literal in a way that splits a codepoint.) -/// * Whether the literal is "exact" or not. An "exact" literal means that it -/// has not been trimmed, and may continue to be extended. If a literal is -/// "exact" after visiting the entire `Hir` expression, then this implies that -/// the literal leads to a match state. (Although it doesn't necessarily imply -/// all occurrences of the literal correspond to a match of the regex, since -/// literal extraction ignores look-around assertions.) -#[derive(Clone, Eq, PartialEq, PartialOrd, Ord)] -pub struct Literal { - bytes: Vec, - exact: bool, -} - -impl Literal { - /// Returns a new exact literal containing the bytes given. - #[inline] - pub fn exact>>(bytes: B) -> Literal { - Literal { bytes: bytes.into(), exact: true } - } - - /// Returns a new inexact literal containing the bytes given. - #[inline] - pub fn inexact>>(bytes: B) -> Literal { - Literal { bytes: bytes.into(), exact: false } - } - - /// Returns the bytes in this literal. - #[inline] - pub fn as_bytes(&self) -> &[u8] { - &self.bytes - } - - /// Yields ownership of the bytes inside this literal. - /// - /// Note that this throws away whether the literal is "exact" or not. - #[inline] - pub fn into_bytes(self) -> Vec { - self.bytes - } - - /// Returns the length of this literal in bytes. - #[inline] - pub fn len(&self) -> usize { - self.as_bytes().len() - } - - /// Returns true if and only if this literal has zero bytes. - #[inline] - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns true if and only if this literal is exact. - #[inline] - pub fn is_exact(&self) -> bool { - self.exact - } - - /// Marks this literal as inexact. - /// - /// Inexact literals can never be extended. For example, - /// [`Seq::cross_forward`] will not extend inexact literals. - #[inline] - pub fn make_inexact(&mut self) { - self.exact = false; - } - - /// Reverse the bytes in this literal. - #[inline] - pub fn reverse(&mut self) { - self.bytes.reverse(); - } - - /// Extend this literal with the literal given. - /// - /// If this literal is inexact, then this is a no-op. - #[inline] - pub fn extend(&mut self, lit: &Literal) { - if !self.is_exact() { - return; - } - self.bytes.extend_from_slice(&lit.bytes); - } - - /// Trims this literal such that only the first `len` bytes remain. If - /// this literal has fewer than `len` bytes, then it remains unchanged. - /// Otherwise, the literal is marked as inexact. - #[inline] - pub fn keep_first_bytes(&mut self, len: usize) { - if len >= self.len() { - return; - } - self.make_inexact(); - self.bytes.truncate(len); - } - - /// Trims this literal such that only the last `len` bytes remain. If this - /// literal has fewer than `len` bytes, then it remains unchanged. - /// Otherwise, the literal is marked as inexact. - #[inline] - pub fn keep_last_bytes(&mut self, len: usize) { - if len >= self.len() { - return; - } - self.make_inexact(); - self.bytes.drain(..self.len() - len); - } - - /// Returns true if it is believe that this literal is likely to match very - /// frequently, and is thus not a good candidate for a prefilter. - fn is_poisonous(&self) -> bool { - self.is_empty() || (self.len() == 1 && rank(self.as_bytes()[0]) >= 250) - } -} - -impl From for Literal { - fn from(byte: u8) -> Literal { - Literal::exact(vec![byte]) - } -} - -impl From for Literal { - fn from(ch: char) -> Literal { - use alloc::string::ToString; - Literal::exact(ch.encode_utf8(&mut [0; 4]).to_string()) - } -} - -impl AsRef<[u8]> for Literal { - fn as_ref(&self) -> &[u8] { - self.as_bytes() - } -} - -impl core::fmt::Debug for Literal { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - let tag = if self.exact { "E" } else { "I" }; - f.debug_tuple(tag) - .field(&crate::debug::Bytes(self.as_bytes())) - .finish() - } -} - -/// A "preference" trie that rejects literals that will never match when -/// executing a leftmost first or "preference" search. -/// -/// For example, if 'sam' is inserted, then trying to insert 'samwise' will be -/// rejected because 'samwise' can never match since 'sam' will always take -/// priority. However, if 'samwise' is inserted first, then inserting 'sam' -/// after it is accepted. In this case, either 'samwise' or 'sam' can match in -/// a "preference" search. -/// -/// Note that we only use this trie as a "set." That is, given a sequence of -/// literals, we insert each one in order. An `insert` will reject a literal -/// if a prefix of that literal already exists in the trie. Thus, to rebuild -/// the "minimal" sequence, we simply only keep literals that were successfully -/// inserted. (Since we don't need traversal, one wonders whether we can make -/// some simplifications here, but I haven't given it a ton of thought and I've -/// never seen this show up on a profile. Because of the heuristic limits -/// imposed on literal extractions, the size of the inputs here is usually -/// very small.) -#[derive(Debug)] -struct PreferenceTrie { - /// The states in this trie. The index of a state in this vector is its ID. - states: Vec, - /// This vec indicates which states are match states. It always has - /// the same length as `states` and is indexed by the same state ID. - /// A state with identifier `sid` is a match state if and only if - /// `matches[sid].is_some()`. The option contains the index of the literal - /// corresponding to the match. The index is offset by 1 so that it fits in - /// a NonZeroUsize. - matches: Vec>, - /// The index to allocate to the next literal added to this trie. Starts at - /// 1 and increments by 1 for every literal successfully added to the trie. - next_literal_index: usize, -} - -/// A single state in a trie. Uses a sparse representation for its transitions. -#[derive(Debug, Default)] -struct State { - /// Sparse representation of the transitions out of this state. Transitions - /// are sorted by byte. There is at most one such transition for any - /// particular byte. - trans: Vec<(u8, usize)>, -} - -impl PreferenceTrie { - /// Minimizes the given sequence of literals while preserving preference - /// order semantics. - /// - /// When `keep_exact` is true, the exactness of every literal retained is - /// kept. This is useful when dealing with a fully extracted `Seq` that - /// only contains exact literals. In that case, we can keep all retained - /// literals as exact because we know we'll never need to match anything - /// after them and because any removed literals are guaranteed to never - /// match. - fn minimize(literals: &mut Vec, keep_exact: bool) { - let mut trie = PreferenceTrie { - states: vec![], - matches: vec![], - next_literal_index: 1, - }; - let mut make_inexact = vec![]; - literals.retain_mut(|lit| match trie.insert(lit.as_bytes()) { - Ok(_) => true, - Err(i) => { - if !keep_exact { - make_inexact.push(i.checked_sub(1).unwrap()); - } - false - } - }); - for i in make_inexact { - literals[i].make_inexact(); - } - } - - /// Returns `Ok` if the given byte string is accepted into this trie and - /// `Err` otherwise. The index for the success case corresponds to the - /// index of the literal added. The index for the error case corresponds to - /// the index of the literal already in the trie that prevented the given - /// byte string from being added. (Which implies it is a prefix of the one - /// given.) - /// - /// In short, the byte string given is accepted into the trie if and only - /// if it is possible for it to match when executing a preference order - /// search. - fn insert(&mut self, bytes: &[u8]) -> Result { - let mut prev = self.root(); - if let Some(idx) = self.matches[prev] { - return Err(idx.get()); - } - for &b in bytes.iter() { - match self.states[prev].trans.binary_search_by_key(&b, |t| t.0) { - Ok(i) => { - prev = self.states[prev].trans[i].1; - if let Some(idx) = self.matches[prev] { - return Err(idx.get()); - } - } - Err(i) => { - let next = self.create_state(); - self.states[prev].trans.insert(i, (b, next)); - prev = next; - } - } - } - let idx = self.next_literal_index; - self.next_literal_index += 1; - self.matches[prev] = NonZeroUsize::new(idx); - Ok(idx) - } - - /// Returns the root state ID, and if it doesn't exist, creates it. - fn root(&mut self) -> usize { - if !self.states.is_empty() { - 0 - } else { - self.create_state() - } - } - - /// Creates a new empty state and returns its ID. - fn create_state(&mut self) -> usize { - let id = self.states.len(); - self.states.push(State::default()); - self.matches.push(None); - id - } -} - -/// Returns the "rank" of the given byte. -/// -/// The minimum rank value is `0` and the maximum rank value is `255`. -/// -/// The rank of a byte is derived from a heuristic background distribution of -/// relative frequencies of bytes. The heuristic says that lower the rank of a -/// byte, the less likely that byte is to appear in any arbitrary haystack. -pub fn rank(byte: u8) -> u8 { - crate::rank::BYTE_FREQUENCIES[usize::from(byte)] -} - -#[cfg(test)] -mod tests { - use super::*; - - fn parse(pattern: &str) -> Hir { - crate::ParserBuilder::new().utf8(false).build().parse(pattern).unwrap() - } - - fn prefixes(pattern: &str) -> Seq { - Extractor::new().kind(ExtractKind::Prefix).extract(&parse(pattern)) - } - - fn suffixes(pattern: &str) -> Seq { - Extractor::new().kind(ExtractKind::Suffix).extract(&parse(pattern)) - } - - fn e(pattern: &str) -> (Seq, Seq) { - (prefixes(pattern), suffixes(pattern)) - } - - #[allow(non_snake_case)] - fn E(x: &str) -> Literal { - Literal::exact(x.as_bytes()) - } - - #[allow(non_snake_case)] - fn I(x: &str) -> Literal { - Literal::inexact(x.as_bytes()) - } - - fn seq>(it: I) -> Seq { - Seq::from_iter(it) - } - - fn infinite() -> (Seq, Seq) { - (Seq::infinite(), Seq::infinite()) - } - - fn inexact(it1: I1, it2: I2) -> (Seq, Seq) - where - I1: IntoIterator, - I2: IntoIterator, - { - (Seq::from_iter(it1), Seq::from_iter(it2)) - } - - fn exact, I: IntoIterator>(it: I) -> (Seq, Seq) { - let s1 = Seq::new(it); - let s2 = s1.clone(); - (s1, s2) - } - - fn opt, I: IntoIterator>(it: I) -> (Seq, Seq) { - let (mut p, mut s) = exact(it); - p.optimize_for_prefix_by_preference(); - s.optimize_for_suffix_by_preference(); - (p, s) - } - - #[test] - fn literal() { - assert_eq!(exact(["a"]), e("a")); - assert_eq!(exact(["aaaaa"]), e("aaaaa")); - assert_eq!(exact(["A", "a"]), e("(?i-u)a")); - assert_eq!(exact(["AB", "Ab", "aB", "ab"]), e("(?i-u)ab")); - assert_eq!(exact(["abC", "abc"]), e("ab(?i-u)c")); - - assert_eq!(exact([b"\xFF"]), e(r"(?-u:\xFF)")); - - #[cfg(feature = "unicode-case")] - { - assert_eq!(exact(["☃"]), e("☃")); - assert_eq!(exact(["☃"]), e("(?i)☃")); - assert_eq!(exact(["☃☃☃☃☃"]), e("☃☃☃☃☃")); - - assert_eq!(exact(["Δ"]), e("Δ")); - assert_eq!(exact(["δ"]), e("δ")); - assert_eq!(exact(["Δ", "δ"]), e("(?i)Δ")); - assert_eq!(exact(["Δ", "δ"]), e("(?i)δ")); - - assert_eq!(exact(["S", "s", "ſ"]), e("(?i)S")); - assert_eq!(exact(["S", "s", "ſ"]), e("(?i)s")); - assert_eq!(exact(["S", "s", "ſ"]), e("(?i)ſ")); - } - - let letters = "ͱͳͷΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋ"; - assert_eq!(exact([letters]), e(letters)); - } - - #[test] - fn class() { - assert_eq!(exact(["a", "b", "c"]), e("[abc]")); - assert_eq!(exact(["a1b", "a2b", "a3b"]), e("a[123]b")); - assert_eq!(exact(["δ", "ε"]), e("[εδ]")); - #[cfg(feature = "unicode-case")] - { - assert_eq!(exact(["Δ", "Ε", "δ", "ε", "ϵ"]), e(r"(?i)[εδ]")); - } - } - - #[test] - fn look() { - assert_eq!(exact(["ab"]), e(r"a\Ab")); - assert_eq!(exact(["ab"]), e(r"a\zb")); - assert_eq!(exact(["ab"]), e(r"a(?m:^)b")); - assert_eq!(exact(["ab"]), e(r"a(?m:$)b")); - assert_eq!(exact(["ab"]), e(r"a\bb")); - assert_eq!(exact(["ab"]), e(r"a\Bb")); - assert_eq!(exact(["ab"]), e(r"a(?-u:\b)b")); - assert_eq!(exact(["ab"]), e(r"a(?-u:\B)b")); - - assert_eq!(exact(["ab"]), e(r"^ab")); - assert_eq!(exact(["ab"]), e(r"$ab")); - assert_eq!(exact(["ab"]), e(r"(?m:^)ab")); - assert_eq!(exact(["ab"]), e(r"(?m:$)ab")); - assert_eq!(exact(["ab"]), e(r"\bab")); - assert_eq!(exact(["ab"]), e(r"\Bab")); - assert_eq!(exact(["ab"]), e(r"(?-u:\b)ab")); - assert_eq!(exact(["ab"]), e(r"(?-u:\B)ab")); - - assert_eq!(exact(["ab"]), e(r"ab^")); - assert_eq!(exact(["ab"]), e(r"ab$")); - assert_eq!(exact(["ab"]), e(r"ab(?m:^)")); - assert_eq!(exact(["ab"]), e(r"ab(?m:$)")); - assert_eq!(exact(["ab"]), e(r"ab\b")); - assert_eq!(exact(["ab"]), e(r"ab\B")); - assert_eq!(exact(["ab"]), e(r"ab(?-u:\b)")); - assert_eq!(exact(["ab"]), e(r"ab(?-u:\B)")); - - let expected = (seq([I("aZ"), E("ab")]), seq([I("Zb"), E("ab")])); - assert_eq!(expected, e(r"^aZ*b")); - } - - #[test] - fn repetition() { - assert_eq!(exact(["a", ""]), e(r"a?")); - assert_eq!(exact(["", "a"]), e(r"a??")); - assert_eq!(inexact([I("a"), E("")], [I("a"), E("")]), e(r"a*")); - assert_eq!(inexact([E(""), I("a")], [E(""), I("a")]), e(r"a*?")); - assert_eq!(inexact([I("a")], [I("a")]), e(r"a+")); - assert_eq!(inexact([I("a")], [I("a")]), e(r"(a+)+")); - - assert_eq!(exact(["ab"]), e(r"aZ{0}b")); - assert_eq!(exact(["aZb", "ab"]), e(r"aZ?b")); - assert_eq!(exact(["ab", "aZb"]), e(r"aZ??b")); - assert_eq!( - inexact([I("aZ"), E("ab")], [I("Zb"), E("ab")]), - e(r"aZ*b") - ); - assert_eq!( - inexact([E("ab"), I("aZ")], [E("ab"), I("Zb")]), - e(r"aZ*?b") - ); - assert_eq!(inexact([I("aZ")], [I("Zb")]), e(r"aZ+b")); - assert_eq!(inexact([I("aZ")], [I("Zb")]), e(r"aZ+?b")); - - assert_eq!(exact(["aZZb"]), e(r"aZ{2}b")); - assert_eq!(inexact([I("aZZ")], [I("ZZb")]), e(r"aZ{2,3}b")); - - assert_eq!(exact(["abc", ""]), e(r"(abc)?")); - assert_eq!(exact(["", "abc"]), e(r"(abc)??")); - - assert_eq!(inexact([I("a"), E("b")], [I("ab"), E("b")]), e(r"a*b")); - assert_eq!(inexact([E("b"), I("a")], [E("b"), I("ab")]), e(r"a*?b")); - assert_eq!(inexact([I("ab")], [I("b")]), e(r"ab+")); - assert_eq!(inexact([I("a"), I("b")], [I("b")]), e(r"a*b+")); - - // FIXME: The suffixes for this don't look quite right to me. I think - // the right suffixes would be: [I(ac), I(bc), E(c)]. The main issue I - // think is that suffixes are computed by iterating over concatenations - // in reverse, and then [bc, ac, c] ordering is indeed correct from - // that perspective. We also test a few more equivalent regexes, and - // we get the same result, so it is consistent at least I suppose. - // - // The reason why this isn't an issue is that it only messes up - // preference order, and currently, suffixes are never used in a - // context where preference order matters. For prefixes it matters - // because we sometimes want to use prefilters without confirmation - // when all of the literals are exact (and there's no look-around). But - // we never do that for suffixes. Any time we use suffixes, we always - // include a confirmation step. If that ever changes, then it's likely - // this bug will need to be fixed, but last time I looked, it appears - // hard to do so. - assert_eq!( - inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]), - e(r"a*b*c") - ); - assert_eq!( - inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]), - e(r"(a+)?(b+)?c") - ); - assert_eq!( - inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]), - e(r"(a+|)(b+|)c") - ); - // A few more similarish but not identical regexes. These may have a - // similar problem as above. - assert_eq!( - inexact( - [I("a"), I("b"), I("c"), E("")], - [I("c"), I("b"), I("a"), E("")] - ), - e(r"a*b*c*") - ); - assert_eq!(inexact([I("a"), I("b"), I("c")], [I("c")]), e(r"a*b*c+")); - assert_eq!(inexact([I("a"), I("b")], [I("bc")]), e(r"a*b+c")); - assert_eq!(inexact([I("a"), I("b")], [I("c"), I("b")]), e(r"a*b+c*")); - assert_eq!(inexact([I("ab"), E("a")], [I("b"), E("a")]), e(r"ab*")); - assert_eq!( - inexact([I("ab"), E("ac")], [I("bc"), E("ac")]), - e(r"ab*c") - ); - assert_eq!(inexact([I("ab")], [I("b")]), e(r"ab+")); - assert_eq!(inexact([I("ab")], [I("bc")]), e(r"ab+c")); - - assert_eq!( - inexact([I("z"), E("azb")], [I("zazb"), E("azb")]), - e(r"z*azb") - ); - - let expected = - exact(["aaa", "aab", "aba", "abb", "baa", "bab", "bba", "bbb"]); - assert_eq!(expected, e(r"[ab]{3}")); - let expected = inexact( - [ - I("aaa"), - I("aab"), - I("aba"), - I("abb"), - I("baa"), - I("bab"), - I("bba"), - I("bbb"), - ], - [ - I("aaa"), - I("aab"), - I("aba"), - I("abb"), - I("baa"), - I("bab"), - I("bba"), - I("bbb"), - ], - ); - assert_eq!(expected, e(r"[ab]{3,4}")); - } - - #[test] - fn concat() { - let empty: [&str; 0] = []; - - assert_eq!(exact(["abcxyz"]), e(r"abc()xyz")); - assert_eq!(exact(["abcxyz"]), e(r"(abc)(xyz)")); - assert_eq!(exact(["abcmnoxyz"]), e(r"abc()mno()xyz")); - assert_eq!(exact(empty), e(r"abc[a&&b]xyz")); - assert_eq!(exact(["abcxyz"]), e(r"abc[a&&b]*xyz")); - } - - #[test] - fn alternation() { - assert_eq!(exact(["abc", "mno", "xyz"]), e(r"abc|mno|xyz")); - assert_eq!( - inexact( - [E("abc"), I("mZ"), E("mo"), E("xyz")], - [E("abc"), I("Zo"), E("mo"), E("xyz")] - ), - e(r"abc|mZ*o|xyz") - ); - assert_eq!(exact(["abc", "xyz"]), e(r"abc|M[a&&b]N|xyz")); - assert_eq!(exact(["abc", "MN", "xyz"]), e(r"abc|M[a&&b]*N|xyz")); - - assert_eq!(exact(["aaa", "aaaaa"]), e(r"(?:|aa)aaa")); - assert_eq!( - inexact( - [I("aaa"), E(""), I("aaaaa"), E("aa")], - [I("aaa"), E(""), E("aa")] - ), - e(r"(?:|aa)(?:aaa)*") - ); - assert_eq!( - inexact( - [E(""), I("aaa"), E("aa"), I("aaaaa")], - [E(""), I("aaa"), E("aa")] - ), - e(r"(?:|aa)(?:aaa)*?") - ); - - assert_eq!( - inexact([E("a"), I("b"), E("")], [E("a"), I("b"), E("")]), - e(r"a|b*") - ); - assert_eq!(inexact([E("a"), I("b")], [E("a"), I("b")]), e(r"a|b+")); - - assert_eq!( - inexact([I("a"), E("b"), E("c")], [I("ab"), E("b"), E("c")]), - e(r"a*b|c") - ); - - assert_eq!( - inexact( - [E("a"), E("b"), I("c"), E("")], - [E("a"), E("b"), I("c"), E("")] - ), - e(r"a|(?:b|c*)") - ); - - assert_eq!( - inexact( - [I("a"), I("b"), E("c"), I("a"), I("ab"), E("c")], - [I("ac"), I("bc"), E("c"), I("ac"), I("abc"), E("c")], - ), - e(r"(a|b)*c|(a|ab)*c") - ); - - assert_eq!( - exact(["abef", "abgh", "cdef", "cdgh"]), - e(r"(ab|cd)(ef|gh)") - ); - assert_eq!( - exact([ - "abefij", "abefkl", "abghij", "abghkl", "cdefij", "cdefkl", - "cdghij", "cdghkl", - ]), - e(r"(ab|cd)(ef|gh)(ij|kl)") - ); - - assert_eq!(inexact([E("abab")], [E("abab")]), e(r"(ab){2}")); - - assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,3}")); - - assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,}")); - } - - #[test] - fn impossible() { - let empty: [&str; 0] = []; - - assert_eq!(exact(empty), e(r"[a&&b]")); - assert_eq!(exact(empty), e(r"a[a&&b]")); - assert_eq!(exact(empty), e(r"[a&&b]b")); - assert_eq!(exact(empty), e(r"a[a&&b]b")); - assert_eq!(exact(["a", "b"]), e(r"a|[a&&b]|b")); - assert_eq!(exact(["a", "b"]), e(r"a|c[a&&b]|b")); - assert_eq!(exact(["a", "b"]), e(r"a|[a&&b]d|b")); - assert_eq!(exact(["a", "b"]), e(r"a|c[a&&b]d|b")); - assert_eq!(exact([""]), e(r"[a&&b]*")); - assert_eq!(exact(["MN"]), e(r"M[a&&b]*N")); - } - - // This tests patterns that contain something that defeats literal - // detection, usually because it would blow some limit on the total number - // of literals that can be returned. - // - // The main idea is that when literal extraction sees something that - // it knows will blow a limit, it replaces it with a marker that says - // "any literal will match here." While not necessarily true, the - // over-estimation is just fine for the purposes of literal extraction, - // because the imprecision doesn't matter: too big is too big. - // - // This is one of the trickier parts of literal extraction, since we need - // to make sure all of our literal extraction operations correctly compose - // with the markers. - #[test] - fn anything() { - assert_eq!(infinite(), e(r".")); - assert_eq!(infinite(), e(r"(?s).")); - assert_eq!(infinite(), e(r"[A-Za-z]")); - assert_eq!(infinite(), e(r"[A-Z]")); - assert_eq!(exact([""]), e(r"[A-Z]{0}")); - assert_eq!(infinite(), e(r"[A-Z]?")); - assert_eq!(infinite(), e(r"[A-Z]*")); - assert_eq!(infinite(), e(r"[A-Z]+")); - assert_eq!((seq([I("1")]), Seq::infinite()), e(r"1[A-Z]")); - assert_eq!((seq([I("1")]), seq([I("2")])), e(r"1[A-Z]2")); - assert_eq!((Seq::infinite(), seq([I("123")])), e(r"[A-Z]+123")); - assert_eq!(infinite(), e(r"[A-Z]+123[A-Z]+")); - assert_eq!(infinite(), e(r"1|[A-Z]|3")); - assert_eq!( - (seq([E("1"), I("2"), E("3")]), Seq::infinite()), - e(r"1|2[A-Z]|3"), - ); - assert_eq!( - (Seq::infinite(), seq([E("1"), I("2"), E("3")])), - e(r"1|[A-Z]2|3"), - ); - assert_eq!( - (seq([E("1"), I("2"), E("4")]), seq([E("1"), I("3"), E("4")])), - e(r"1|2[A-Z]3|4"), - ); - assert_eq!((Seq::infinite(), seq([I("2")])), e(r"(?:|1)[A-Z]2")); - assert_eq!(inexact([I("a")], [I("z")]), e(r"a.z")); - } - - // Like the 'anything' test, but it uses smaller limits in order to test - // the logic for effectively aborting literal extraction when the seqs get - // too big. - #[test] - fn anything_small_limits() { - fn prefixes(pattern: &str) -> Seq { - Extractor::new() - .kind(ExtractKind::Prefix) - .limit_total(10) - .extract(&parse(pattern)) - } - - fn suffixes(pattern: &str) -> Seq { - Extractor::new() - .kind(ExtractKind::Suffix) - .limit_total(10) - .extract(&parse(pattern)) - } - - fn e(pattern: &str) -> (Seq, Seq) { - (prefixes(pattern), suffixes(pattern)) - } - - assert_eq!( - ( - seq([ - I("aaa"), - I("aab"), - I("aba"), - I("abb"), - I("baa"), - I("bab"), - I("bba"), - I("bbb") - ]), - seq([ - I("aaa"), - I("aab"), - I("aba"), - I("abb"), - I("baa"), - I("bab"), - I("bba"), - I("bbb") - ]) - ), - e(r"[ab]{3}{3}") - ); - - assert_eq!(infinite(), e(r"ab|cd|ef|gh|ij|kl|mn|op|qr|st|uv|wx|yz")); - } - - #[test] - fn empty() { - assert_eq!(exact([""]), e(r"")); - assert_eq!(exact([""]), e(r"^")); - assert_eq!(exact([""]), e(r"$")); - assert_eq!(exact([""]), e(r"(?m:^)")); - assert_eq!(exact([""]), e(r"(?m:$)")); - assert_eq!(exact([""]), e(r"\b")); - assert_eq!(exact([""]), e(r"\B")); - assert_eq!(exact([""]), e(r"(?-u:\b)")); - assert_eq!(exact([""]), e(r"(?-u:\B)")); - } - - #[test] - fn odds_and_ends() { - assert_eq!((Seq::infinite(), seq([I("a")])), e(r".a")); - assert_eq!((seq([I("a")]), Seq::infinite()), e(r"a.")); - assert_eq!(infinite(), e(r"a|.")); - assert_eq!(infinite(), e(r".|a")); - - let pat = r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]"; - let expected = inexact( - ["Mo'am", "Moam", "Mu'am", "Muam"].map(I), - [ - "ddafi", "ddafy", "dhafi", "dhafy", "dzafi", "dzafy", "dafi", - "dafy", "tdafi", "tdafy", "thafi", "thafy", "tzafi", "tzafy", - "tafi", "tafy", "zdafi", "zdafy", "zhafi", "zhafy", "zzafi", - "zzafy", "zafi", "zafy", - ] - .map(I), - ); - assert_eq!(expected, e(pat)); - - assert_eq!( - (seq(["fn is_", "fn as_"].map(I)), Seq::infinite()), - e(r"fn is_([A-Z]+)|fn as_([A-Z]+)"), - ); - assert_eq!( - inexact([I("foo")], [I("quux")]), - e(r"foo[A-Z]+bar[A-Z]+quux") - ); - assert_eq!(infinite(), e(r"[A-Z]+bar[A-Z]+")); - assert_eq!( - exact(["Sherlock Holmes"]), - e(r"(?m)^Sherlock Holmes|Sherlock Holmes$") - ); - - assert_eq!(exact(["sa", "sb"]), e(r"\bs(?:[ab])")); - } - - // This tests a specific regex along with some heuristic steps to reduce - // the sequences extracted. This is meant to roughly correspond to the - // types of heuristics used to shrink literal sets in practice. (Shrinking - // is done because you want to balance "spend too much work looking for - // too many literals" and "spend too much work processing false positive - // matches from short literals.") - #[test] - #[cfg(feature = "unicode-case")] - fn holmes() { - let expected = inexact( - ["HOL", "HOl", "HoL", "Hol", "hOL", "hOl", "hoL", "hol"].map(I), - [ - "MES", "MEs", "Eſ", "MeS", "Mes", "eſ", "mES", "mEs", "meS", - "mes", - ] - .map(I), - ); - let (mut prefixes, mut suffixes) = e(r"(?i)Holmes"); - prefixes.keep_first_bytes(3); - suffixes.keep_last_bytes(3); - prefixes.minimize_by_preference(); - suffixes.minimize_by_preference(); - assert_eq!(expected, (prefixes, suffixes)); - } - - // This tests that we get some kind of literals extracted for a beefier - // alternation with case insensitive mode enabled. At one point during - // development, this returned nothing, and motivated some special case - // code in Extractor::union to try and trim down the literal sequences - // if the union would blow the limits set. - #[test] - #[cfg(feature = "unicode-case")] - fn holmes_alt() { - let mut pre = - prefixes(r"(?i)Sherlock|Holmes|Watson|Irene|Adler|John|Baker"); - assert!(pre.len().unwrap() > 0); - pre.optimize_for_prefix_by_preference(); - assert!(pre.len().unwrap() > 0); - } - - // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 - // See: CVE-2022-24713 - // - // We test this here to ensure literal extraction completes in reasonable - // time and isn't materially impacted by these sorts of pathological - // repeats. - #[test] - fn crazy_repeats() { - assert_eq!(inexact([E("")], [E("")]), e(r"(?:){4294967295}")); - assert_eq!( - inexact([E("")], [E("")]), - e(r"(?:){64}{64}{64}{64}{64}{64}") - ); - assert_eq!(inexact([E("")], [E("")]), e(r"x{0}{4294967295}")); - assert_eq!(inexact([E("")], [E("")]), e(r"(?:|){4294967295}")); - - assert_eq!( - inexact([E("")], [E("")]), - e(r"(?:){8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}") - ); - let repa = "a".repeat(100); - assert_eq!( - inexact([I(&repa)], [I(&repa)]), - e(r"a{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}") - ); - } - - #[test] - fn huge() { - let pat = r#"(?-u) - 2(?: - [45]\d{3}| - 7(?: - 1[0-267]| - 2[0-289]| - 3[0-29]| - 4[01]| - 5[1-3]| - 6[013]| - 7[0178]| - 91 - )| - 8(?: - 0[125]| - [139][1-6]| - 2[0157-9]| - 41| - 6[1-35]| - 7[1-5]| - 8[1-8]| - 90 - )| - 9(?: - 0[0-2]| - 1[0-4]| - 2[568]| - 3[3-6]| - 5[5-7]| - 6[0167]| - 7[15]| - 8[0146-9] - ) - )\d{4}| - 3(?: - 12?[5-7]\d{2}| - 0(?: - 2(?: - [025-79]\d| - [348]\d{1,2} - )| - 3(?: - [2-4]\d| - [56]\d? - ) - )| - 2(?: - 1\d{2}| - 2(?: - [12]\d| - [35]\d{1,2}| - 4\d? - ) - )| - 3(?: - 1\d{2}| - 2(?: - [2356]\d| - 4\d{1,2} - ) - )| - 4(?: - 1\d{2}| - 2(?: - 2\d{1,2}| - [47]| - 5\d{2} - ) - )| - 5(?: - 1\d{2}| - 29 - )| - [67]1\d{2}| - 8(?: - 1\d{2}| - 2(?: - 2\d{2}| - 3| - 4\d - ) - ) - )\d{3}| - 4(?: - 0(?: - 2(?: - [09]\d| - 7 - )| - 33\d{2} - )| - 1\d{3}| - 2(?: - 1\d{2}| - 2(?: - [25]\d?| - [348]\d| - [67]\d{1,2} - ) - )| - 3(?: - 1\d{2}(?: - \d{2} - )?| - 2(?: - [045]\d| - [236-9]\d{1,2} - )| - 32\d{2} - )| - 4(?: - [18]\d{2}| - 2(?: - [2-46]\d{2}| - 3 - )| - 5[25]\d{2} - )| - 5(?: - 1\d{2}| - 2(?: - 3\d| - 5 - ) - )| - 6(?: - [18]\d{2}| - 2(?: - 3(?: - \d{2} - )?| - [46]\d{1,2}| - 5\d{2}| - 7\d - )| - 5(?: - 3\d?| - 4\d| - [57]\d{1,2}| - 6\d{2}| - 8 - ) - )| - 71\d{2}| - 8(?: - [18]\d{2}| - 23\d{2}| - 54\d{2} - )| - 9(?: - [18]\d{2}| - 2[2-5]\d{2}| - 53\d{1,2} - ) - )\d{3}| - 5(?: - 02[03489]\d{2}| - 1\d{2}| - 2(?: - 1\d{2}| - 2(?: - 2(?: - \d{2} - )?| - [457]\d{2} - ) - )| - 3(?: - 1\d{2}| - 2(?: - [37](?: - \d{2} - )?| - [569]\d{2} - ) - )| - 4(?: - 1\d{2}| - 2[46]\d{2} - )| - 5(?: - 1\d{2}| - 26\d{1,2} - )| - 6(?: - [18]\d{2}| - 2| - 53\d{2} - )| - 7(?: - 1| - 24 - )\d{2}| - 8(?: - 1| - 26 - )\d{2}| - 91\d{2} - )\d{3}| - 6(?: - 0(?: - 1\d{2}| - 2(?: - 3\d{2}| - 4\d{1,2} - ) - )| - 2(?: - 2[2-5]\d{2}| - 5(?: - [3-5]\d{2}| - 7 - )| - 8\d{2} - )| - 3(?: - 1| - 2[3478] - )\d{2}| - 4(?: - 1| - 2[34] - )\d{2}| - 5(?: - 1| - 2[47] - )\d{2}| - 6(?: - [18]\d{2}| - 6(?: - 2(?: - 2\d| - [34]\d{2} - )| - 5(?: - [24]\d{2}| - 3\d| - 5\d{1,2} - ) - ) - )| - 72[2-5]\d{2}| - 8(?: - 1\d{2}| - 2[2-5]\d{2} - )| - 9(?: - 1\d{2}| - 2[2-6]\d{2} - ) - )\d{3}| - 7(?: - (?: - 02| - [3-589]1| - 6[12]| - 72[24] - )\d{2}| - 21\d{3}| - 32 - )\d{3}| - 8(?: - (?: - 4[12]| - [5-7]2| - 1\d? - )| - (?: - 0| - 3[12]| - [5-7]1| - 217 - )\d - )\d{4}| - 9(?: - [35]1| - (?: - [024]2| - 81 - )\d| - (?: - 1| - [24]1 - )\d{2} - )\d{3} - "#; - // TODO: This is a good candidate of a seq of literals that could be - // shrunk quite a bit and still be very productive with respect to - // literal optimizations. - let (prefixes, suffixes) = e(pat); - assert!(!suffixes.is_finite()); - assert_eq!(Some(243), prefixes.len()); - } - - #[test] - fn optimize() { - // This gets a common prefix that isn't too short. - let (p, s) = - opt(["foobarfoobar", "foobar", "foobarzfoobar", "foobarfoobar"]); - assert_eq!(seq([I("foobar")]), p); - assert_eq!(seq([I("foobar")]), s); - - // This also finds a common prefix, but since it's only one byte, it - // prefers the multiple literals. - let (p, s) = opt(["abba", "akka", "abccba"]); - assert_eq!(exact(["abba", "akka", "abccba"]), (p, s)); - - let (p, s) = opt(["sam", "samwise"]); - assert_eq!((seq([E("sam")]), seq([E("sam"), E("samwise")])), (p, s)); - - // The empty string is poisonous, so our seq becomes infinite, even - // though all literals are exact. - let (p, s) = opt(["foobarfoo", "foo", "", "foozfoo", "foofoo"]); - assert!(!p.is_finite()); - assert!(!s.is_finite()); - - // A space is also poisonous, so our seq becomes infinite. But this - // only gets triggered when we don't have a completely exact sequence. - // When the sequence is exact, spaces are okay, since we presume that - // any prefilter will match a space more quickly than the regex engine. - // (When the sequence is exact, there's a chance of the prefilter being - // used without needing the regex engine at all.) - let mut p = seq([E("foobarfoo"), I("foo"), E(" "), E("foofoo")]); - p.optimize_for_prefix_by_preference(); - assert!(!p.is_finite()); - } -} diff --git a/vendor/regex-syntax/src/hir/mod.rs b/vendor/regex-syntax/src/hir/mod.rs deleted file mode 100644 index 5db78438..00000000 --- a/vendor/regex-syntax/src/hir/mod.rs +++ /dev/null @@ -1,3873 +0,0 @@ -/*! -Defines a high-level intermediate (HIR) representation for regular expressions. - -The HIR is represented by the [`Hir`] type, and it principally constructed via -[translation](translate) from an [`Ast`](crate::ast::Ast). Alternatively, users -may use the smart constructors defined on `Hir` to build their own by hand. The -smart constructors simultaneously simplify and "optimize" the HIR, and are also -the same routines used by translation. - -Most regex engines only have an HIR like this, and usually construct it -directly from the concrete syntax. This crate however first parses the -concrete syntax into an `Ast`, and only then creates the HIR from the `Ast`, -as mentioned above. It's done this way to facilitate better error reporting, -and to have a structured representation of a regex that faithfully represents -its concrete syntax. Namely, while an `Hir` value can be converted back to an -equivalent regex pattern string, it is unlikely to look like the original due -to its simplified structure. -*/ - -use core::{char, cmp}; - -use alloc::{ - boxed::Box, - format, - string::{String, ToString}, - vec, - vec::Vec, -}; - -use crate::{ - ast::Span, - hir::interval::{Interval, IntervalSet, IntervalSetIter}, - unicode, -}; - -pub use crate::{ - hir::visitor::{visit, Visitor}, - unicode::CaseFoldError, -}; - -mod interval; -pub mod literal; -pub mod print; -pub mod translate; -mod visitor; - -/// An error that can occur while translating an `Ast` to a `Hir`. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Error { - /// The kind of error. - kind: ErrorKind, - /// The original pattern that the translator's Ast was parsed from. Every - /// span in an error is a valid range into this string. - pattern: String, - /// The span of this error, derived from the Ast given to the translator. - span: Span, -} - -impl Error { - /// Return the type of this error. - pub fn kind(&self) -> &ErrorKind { - &self.kind - } - - /// The original pattern string in which this error occurred. - /// - /// Every span reported by this error is reported in terms of this string. - pub fn pattern(&self) -> &str { - &self.pattern - } - - /// Return the span at which this error occurred. - pub fn span(&self) -> &Span { - &self.span - } -} - -/// The type of an error that occurred while building an `Hir`. -/// -/// This error type is marked as `non_exhaustive`. This means that adding a -/// new variant is not considered a breaking change. -#[non_exhaustive] -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum ErrorKind { - /// This error occurs when a Unicode feature is used when Unicode - /// support is disabled. For example `(?-u:\pL)` would trigger this error. - UnicodeNotAllowed, - /// This error occurs when translating a pattern that could match a byte - /// sequence that isn't UTF-8 and `utf8` was enabled. - InvalidUtf8, - /// This error occurs when one uses a non-ASCII byte for a line terminator, - /// but where Unicode mode is enabled and UTF-8 mode is disabled. - InvalidLineTerminator, - /// This occurs when an unrecognized Unicode property name could not - /// be found. - UnicodePropertyNotFound, - /// This occurs when an unrecognized Unicode property value could not - /// be found. - UnicodePropertyValueNotFound, - /// This occurs when a Unicode-aware Perl character class (`\w`, `\s` or - /// `\d`) could not be found. This can occur when the `unicode-perl` - /// crate feature is not enabled. - UnicodePerlClassNotFound, - /// This occurs when the Unicode simple case mapping tables are not - /// available, and the regular expression required Unicode aware case - /// insensitivity. - UnicodeCaseUnavailable, -} - -#[cfg(feature = "std")] -impl std::error::Error for Error {} - -impl core::fmt::Display for Error { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::error::Formatter::from(self).fmt(f) - } -} - -impl core::fmt::Display for ErrorKind { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - use self::ErrorKind::*; - - let msg = match *self { - UnicodeNotAllowed => "Unicode not allowed here", - InvalidUtf8 => "pattern can match invalid UTF-8", - InvalidLineTerminator => "invalid line terminator, must be ASCII", - UnicodePropertyNotFound => "Unicode property not found", - UnicodePropertyValueNotFound => "Unicode property value not found", - UnicodePerlClassNotFound => { - "Unicode-aware Perl class not found \ - (make sure the unicode-perl feature is enabled)" - } - UnicodeCaseUnavailable => { - "Unicode-aware case insensitivity matching is not available \ - (make sure the unicode-case feature is enabled)" - } - }; - f.write_str(msg) - } -} - -/// A high-level intermediate representation (HIR) for a regular expression. -/// -/// An HIR value is a combination of a [`HirKind`] and a set of [`Properties`]. -/// An `HirKind` indicates what kind of regular expression it is (a literal, -/// a repetition, a look-around assertion, etc.), where as a `Properties` -/// describes various facts about the regular expression. For example, whether -/// it matches UTF-8 or if it matches the empty string. -/// -/// The HIR of a regular expression represents an intermediate step between -/// its abstract syntax (a structured description of the concrete syntax) and -/// an actual regex matcher. The purpose of HIR is to make regular expressions -/// easier to analyze. In particular, the AST is much more complex than the -/// HIR. For example, while an AST supports arbitrarily nested character -/// classes, the HIR will flatten all nested classes into a single set. The HIR -/// will also "compile away" every flag present in the concrete syntax. For -/// example, users of HIR expressions never need to worry about case folding; -/// it is handled automatically by the translator (e.g., by translating -/// `(?i:A)` to `[aA]`). -/// -/// The specific type of an HIR expression can be accessed via its `kind` -/// or `into_kind` methods. This extra level of indirection exists for two -/// reasons: -/// -/// 1. Construction of an HIR expression *must* use the constructor methods on -/// this `Hir` type instead of building the `HirKind` values directly. This -/// permits construction to enforce invariants like "concatenations always -/// consist of two or more sub-expressions." -/// 2. Every HIR expression contains attributes that are defined inductively, -/// and can be computed cheaply during the construction process. For example, -/// one such attribute is whether the expression must match at the beginning of -/// the haystack. -/// -/// In particular, if you have an `HirKind` value, then there is intentionally -/// no way to build an `Hir` value from it. You instead need to do case -/// analysis on the `HirKind` value and build the `Hir` value using its smart -/// constructors. -/// -/// # UTF-8 -/// -/// If the HIR was produced by a translator with -/// [`TranslatorBuilder::utf8`](translate::TranslatorBuilder::utf8) enabled, -/// then the HIR is guaranteed to match UTF-8 exclusively for all non-empty -/// matches. -/// -/// For empty matches, those can occur at any position. It is the -/// responsibility of the regex engine to determine whether empty matches are -/// permitted between the code units of a single codepoint. -/// -/// # Stack space -/// -/// This type defines its own destructor that uses constant stack space and -/// heap space proportional to the size of the HIR. -/// -/// Also, an `Hir`'s `fmt::Display` implementation prints an HIR as a regular -/// expression pattern string, and uses constant stack space and heap space -/// proportional to the size of the `Hir`. The regex it prints is guaranteed to -/// be _semantically_ equivalent to the original concrete syntax, but it may -/// look very different. (And potentially not practically readable by a human.) -/// -/// An `Hir`'s `fmt::Debug` implementation currently does not use constant -/// stack space. The implementation will also suppress some details (such as -/// the `Properties` inlined into every `Hir` value to make it less noisy). -#[derive(Clone, Eq, PartialEq)] -pub struct Hir { - /// The underlying HIR kind. - kind: HirKind, - /// Analysis info about this HIR, computed during construction. - props: Properties, -} - -/// Methods for accessing the underlying `HirKind` and `Properties`. -impl Hir { - /// Returns a reference to the underlying HIR kind. - pub fn kind(&self) -> &HirKind { - &self.kind - } - - /// Consumes ownership of this HIR expression and returns its underlying - /// `HirKind`. - pub fn into_kind(mut self) -> HirKind { - core::mem::replace(&mut self.kind, HirKind::Empty) - } - - /// Returns the properties computed for this `Hir`. - pub fn properties(&self) -> &Properties { - &self.props - } - - /// Splits this HIR into its constituent parts. - /// - /// This is useful because `let Hir { kind, props } = hir;` does not work - /// because of `Hir`'s custom `Drop` implementation. - fn into_parts(mut self) -> (HirKind, Properties) { - ( - core::mem::replace(&mut self.kind, HirKind::Empty), - core::mem::replace(&mut self.props, Properties::empty()), - ) - } -} - -/// Smart constructors for HIR values. -/// -/// These constructors are called "smart" because they do inductive work or -/// simplifications. For example, calling `Hir::repetition` with a repetition -/// like `a{0}` will actually return a `Hir` with a `HirKind::Empty` kind -/// since it is equivalent to an empty regex. Another example is calling -/// `Hir::concat(vec![expr])`. Instead of getting a `HirKind::Concat`, you'll -/// just get back the original `expr` since it's precisely equivalent. -/// -/// Smart constructors enable maintaining invariants about the HIR data type -/// while also simulanteously keeping the representation as simple as possible. -impl Hir { - /// Returns an empty HIR expression. - /// - /// An empty HIR expression always matches, including the empty string. - #[inline] - pub fn empty() -> Hir { - let props = Properties::empty(); - Hir { kind: HirKind::Empty, props } - } - - /// Returns an HIR expression that can never match anything. That is, - /// the size of the set of strings in the language described by the HIR - /// returned is `0`. - /// - /// This is distinct from [`Hir::empty`] in that the empty string matches - /// the HIR returned by `Hir::empty`. That is, the set of strings in the - /// language describe described by `Hir::empty` is non-empty. - /// - /// Note that currently, the HIR returned uses an empty character class to - /// indicate that nothing can match. An equivalent expression that cannot - /// match is an empty alternation, but all such "fail" expressions are - /// normalized (via smart constructors) to empty character classes. This is - /// because empty character classes can be spelled in the concrete syntax - /// of a regex (e.g., `\P{any}` or `(?-u:[^\x00-\xFF])` or `[a&&b]`), but - /// empty alternations cannot. - #[inline] - pub fn fail() -> Hir { - let class = Class::Bytes(ClassBytes::empty()); - let props = Properties::class(&class); - // We can't just call Hir::class here because it defers to Hir::fail - // in order to canonicalize the Hir value used to represent "cannot - // match." - Hir { kind: HirKind::Class(class), props } - } - - /// Creates a literal HIR expression. - /// - /// This accepts anything that can be converted into a `Box<[u8]>`. - /// - /// Note that there is no mechanism for storing a `char` or a `Box` - /// in an HIR. Everything is "just bytes." Whether a `Literal` (or - /// any HIR node) matches valid UTF-8 exclusively can be queried via - /// [`Properties::is_utf8`]. - /// - /// # Example - /// - /// This example shows that concatenations of `Literal` HIR values will - /// automatically get flattened and combined together. So for example, even - /// if you concat multiple `Literal` values that are themselves not valid - /// UTF-8, they might add up to valid UTF-8. This also demonstrates just - /// how "smart" Hir's smart constructors are. - /// - /// ``` - /// use regex_syntax::hir::{Hir, HirKind, Literal}; - /// - /// let literals = vec![ - /// Hir::literal([0xE2]), - /// Hir::literal([0x98]), - /// Hir::literal([0x83]), - /// ]; - /// // Each literal, on its own, is invalid UTF-8. - /// assert!(literals.iter().all(|hir| !hir.properties().is_utf8())); - /// - /// let concat = Hir::concat(literals); - /// // But the concatenation is valid UTF-8! - /// assert!(concat.properties().is_utf8()); - /// - /// // And also notice that the literals have been concatenated into a - /// // single `Literal`, to the point where there is no explicit `Concat`! - /// let expected = HirKind::Literal(Literal(Box::from("☃".as_bytes()))); - /// assert_eq!(&expected, concat.kind()); - /// ``` - /// - /// # Example: building a literal from a `char` - /// - /// This example shows how to build a single `Hir` literal from a `char` - /// value. Since a [`Literal`] is just bytes, we just need to UTF-8 - /// encode a `char` value: - /// - /// ``` - /// use regex_syntax::hir::{Hir, HirKind, Literal}; - /// - /// let ch = '☃'; - /// let got = Hir::literal(ch.encode_utf8(&mut [0; 4]).as_bytes()); - /// - /// let expected = HirKind::Literal(Literal(Box::from("☃".as_bytes()))); - /// assert_eq!(&expected, got.kind()); - /// ``` - #[inline] - pub fn literal>>(lit: B) -> Hir { - let bytes = lit.into(); - if bytes.is_empty() { - return Hir::empty(); - } - - let lit = Literal(bytes); - let props = Properties::literal(&lit); - Hir { kind: HirKind::Literal(lit), props } - } - - /// Creates a class HIR expression. The class may either be defined over - /// ranges of Unicode codepoints or ranges of raw byte values. - /// - /// Note that an empty class is permitted. An empty class is equivalent to - /// `Hir::fail()`. - #[inline] - pub fn class(class: Class) -> Hir { - if class.is_empty() { - return Hir::fail(); - } else if let Some(bytes) = class.literal() { - return Hir::literal(bytes); - } - let props = Properties::class(&class); - Hir { kind: HirKind::Class(class), props } - } - - /// Creates a look-around assertion HIR expression. - #[inline] - pub fn look(look: Look) -> Hir { - let props = Properties::look(look); - Hir { kind: HirKind::Look(look), props } - } - - /// Creates a repetition HIR expression. - #[inline] - pub fn repetition(mut rep: Repetition) -> Hir { - // If the sub-expression of a repetition can only match the empty - // string, then we force its maximum to be at most 1. - if rep.sub.properties().maximum_len() == Some(0) { - rep.min = cmp::min(rep.min, 1); - rep.max = rep.max.map(|n| cmp::min(n, 1)).or(Some(1)); - } - // The regex 'a{0}' is always equivalent to the empty regex. This is - // true even when 'a' is an expression that never matches anything - // (like '\P{any}'). - // - // Additionally, the regex 'a{1}' is always equivalent to 'a'. - if rep.min == 0 && rep.max == Some(0) { - return Hir::empty(); - } else if rep.min == 1 && rep.max == Some(1) { - return *rep.sub; - } - let props = Properties::repetition(&rep); - Hir { kind: HirKind::Repetition(rep), props } - } - - /// Creates a capture HIR expression. - /// - /// Note that there is no explicit HIR value for a non-capturing group. - /// Since a non-capturing group only exists to override precedence in the - /// concrete syntax and since an HIR already does its own grouping based on - /// what is parsed, there is no need to explicitly represent non-capturing - /// groups in the HIR. - #[inline] - pub fn capture(capture: Capture) -> Hir { - let props = Properties::capture(&capture); - Hir { kind: HirKind::Capture(capture), props } - } - - /// Returns the concatenation of the given expressions. - /// - /// This attempts to flatten and simplify the concatenation as appropriate. - /// - /// # Example - /// - /// This shows a simple example of basic flattening of both concatenations - /// and literals. - /// - /// ``` - /// use regex_syntax::hir::Hir; - /// - /// let hir = Hir::concat(vec![ - /// Hir::concat(vec![ - /// Hir::literal([b'a']), - /// Hir::literal([b'b']), - /// Hir::literal([b'c']), - /// ]), - /// Hir::concat(vec![ - /// Hir::literal([b'x']), - /// Hir::literal([b'y']), - /// Hir::literal([b'z']), - /// ]), - /// ]); - /// let expected = Hir::literal("abcxyz".as_bytes()); - /// assert_eq!(expected, hir); - /// ``` - pub fn concat(subs: Vec) -> Hir { - // We rebuild the concatenation by simplifying it. Would be nice to do - // it in place, but that seems a little tricky? - let mut new = vec![]; - // This gobbles up any adjacent literals in a concatenation and smushes - // them together. Basically, when we see a literal, we add its bytes - // to 'prior_lit', and whenever we see anything else, we first take - // any bytes in 'prior_lit' and add it to the 'new' concatenation. - let mut prior_lit: Option> = None; - for sub in subs { - let (kind, props) = sub.into_parts(); - match kind { - HirKind::Literal(Literal(bytes)) => { - if let Some(ref mut prior_bytes) = prior_lit { - prior_bytes.extend_from_slice(&bytes); - } else { - prior_lit = Some(bytes.to_vec()); - } - } - // We also flatten concats that are direct children of another - // concat. We only need to do this one level deep since - // Hir::concat is the only way to build concatenations, and so - // flattening happens inductively. - HirKind::Concat(subs2) => { - for sub2 in subs2 { - let (kind2, props2) = sub2.into_parts(); - match kind2 { - HirKind::Literal(Literal(bytes)) => { - if let Some(ref mut prior_bytes) = prior_lit { - prior_bytes.extend_from_slice(&bytes); - } else { - prior_lit = Some(bytes.to_vec()); - } - } - kind2 => { - if let Some(prior_bytes) = prior_lit.take() { - new.push(Hir::literal(prior_bytes)); - } - new.push(Hir { kind: kind2, props: props2 }); - } - } - } - } - // We can just skip empty HIRs. - HirKind::Empty => {} - kind => { - if let Some(prior_bytes) = prior_lit.take() { - new.push(Hir::literal(prior_bytes)); - } - new.push(Hir { kind, props }); - } - } - } - if let Some(prior_bytes) = prior_lit.take() { - new.push(Hir::literal(prior_bytes)); - } - if new.is_empty() { - return Hir::empty(); - } else if new.len() == 1 { - return new.pop().unwrap(); - } - let props = Properties::concat(&new); - Hir { kind: HirKind::Concat(new), props } - } - - /// Returns the alternation of the given expressions. - /// - /// This flattens and simplifies the alternation as appropriate. This may - /// include factoring out common prefixes or even rewriting the alternation - /// as a character class. - /// - /// Note that an empty alternation is equivalent to `Hir::fail()`. (It - /// is not possible for one to write an empty alternation, or even an - /// alternation with a single sub-expression, in the concrete syntax of a - /// regex.) - /// - /// # Example - /// - /// This is a simple example showing how an alternation might get - /// simplified. - /// - /// ``` - /// use regex_syntax::hir::{Hir, Class, ClassUnicode, ClassUnicodeRange}; - /// - /// let hir = Hir::alternation(vec![ - /// Hir::literal([b'a']), - /// Hir::literal([b'b']), - /// Hir::literal([b'c']), - /// Hir::literal([b'd']), - /// Hir::literal([b'e']), - /// Hir::literal([b'f']), - /// ]); - /// let expected = Hir::class(Class::Unicode(ClassUnicode::new([ - /// ClassUnicodeRange::new('a', 'f'), - /// ]))); - /// assert_eq!(expected, hir); - /// ``` - /// - /// And another example showing how common prefixes might get factored - /// out. - /// - /// ``` - /// use regex_syntax::hir::{Hir, Class, ClassUnicode, ClassUnicodeRange}; - /// - /// let hir = Hir::alternation(vec![ - /// Hir::concat(vec![ - /// Hir::literal("abc".as_bytes()), - /// Hir::class(Class::Unicode(ClassUnicode::new([ - /// ClassUnicodeRange::new('A', 'Z'), - /// ]))), - /// ]), - /// Hir::concat(vec![ - /// Hir::literal("abc".as_bytes()), - /// Hir::class(Class::Unicode(ClassUnicode::new([ - /// ClassUnicodeRange::new('a', 'z'), - /// ]))), - /// ]), - /// ]); - /// let expected = Hir::concat(vec![ - /// Hir::literal("abc".as_bytes()), - /// Hir::alternation(vec![ - /// Hir::class(Class::Unicode(ClassUnicode::new([ - /// ClassUnicodeRange::new('A', 'Z'), - /// ]))), - /// Hir::class(Class::Unicode(ClassUnicode::new([ - /// ClassUnicodeRange::new('a', 'z'), - /// ]))), - /// ]), - /// ]); - /// assert_eq!(expected, hir); - /// ``` - /// - /// Note that these sorts of simplifications are not guaranteed. - pub fn alternation(subs: Vec) -> Hir { - // We rebuild the alternation by simplifying it. We proceed similarly - // as the concatenation case. But in this case, there's no literal - // simplification happening. We're just flattening alternations. - let mut new = Vec::with_capacity(subs.len()); - for sub in subs { - let (kind, props) = sub.into_parts(); - match kind { - HirKind::Alternation(subs2) => { - new.extend(subs2); - } - kind => { - new.push(Hir { kind, props }); - } - } - } - if new.is_empty() { - return Hir::fail(); - } else if new.len() == 1 { - return new.pop().unwrap(); - } - // Now that it's completely flattened, look for the special case of - // 'char1|char2|...|charN' and collapse that into a class. Note that - // we look for 'char' first and then bytes. The issue here is that if - // we find both non-ASCII codepoints and non-ASCII singleton bytes, - // then it isn't actually possible to smush them into a single class. - // (Because classes are either "all codepoints" or "all bytes." You - // can have a class that both matches non-ASCII but valid UTF-8 and - // invalid UTF-8.) So we look for all chars and then all bytes, and - // don't handle anything else. - if let Some(singletons) = singleton_chars(&new) { - let it = singletons - .into_iter() - .map(|ch| ClassUnicodeRange { start: ch, end: ch }); - return Hir::class(Class::Unicode(ClassUnicode::new(it))); - } - if let Some(singletons) = singleton_bytes(&new) { - let it = singletons - .into_iter() - .map(|b| ClassBytesRange { start: b, end: b }); - return Hir::class(Class::Bytes(ClassBytes::new(it))); - } - // Similar to singleton chars, we can also look for alternations of - // classes. Those can be smushed into a single class. - if let Some(cls) = class_chars(&new) { - return Hir::class(cls); - } - if let Some(cls) = class_bytes(&new) { - return Hir::class(cls); - } - // Factor out a common prefix if we can, which might potentially - // simplify the expression and unlock other optimizations downstream. - // It also might generally make NFA matching and DFA construction - // faster by reducing the scope of branching in the regex. - new = match lift_common_prefix(new) { - Ok(hir) => return hir, - Err(unchanged) => unchanged, - }; - let props = Properties::alternation(&new); - Hir { kind: HirKind::Alternation(new), props } - } - - /// Returns an HIR expression for `.`. - /// - /// * [`Dot::AnyChar`] maps to `(?su-R:.)`. - /// * [`Dot::AnyByte`] maps to `(?s-Ru:.)`. - /// * [`Dot::AnyCharExceptLF`] maps to `(?u-Rs:.)`. - /// * [`Dot::AnyCharExceptCRLF`] maps to `(?Ru-s:.)`. - /// * [`Dot::AnyByteExceptLF`] maps to `(?-Rsu:.)`. - /// * [`Dot::AnyByteExceptCRLF`] maps to `(?R-su:.)`. - /// - /// # Example - /// - /// Note that this is a convenience routine for constructing the correct - /// character class based on the value of `Dot`. There is no explicit "dot" - /// HIR value. It is just an abbreviation for a common character class. - /// - /// ``` - /// use regex_syntax::hir::{Hir, Dot, Class, ClassBytes, ClassBytesRange}; - /// - /// let hir = Hir::dot(Dot::AnyByte); - /// let expected = Hir::class(Class::Bytes(ClassBytes::new([ - /// ClassBytesRange::new(0x00, 0xFF), - /// ]))); - /// assert_eq!(expected, hir); - /// ``` - #[inline] - pub fn dot(dot: Dot) -> Hir { - match dot { - Dot::AnyChar => Hir::class(Class::Unicode(ClassUnicode::new([ - ClassUnicodeRange::new('\0', '\u{10FFFF}'), - ]))), - Dot::AnyByte => Hir::class(Class::Bytes(ClassBytes::new([ - ClassBytesRange::new(b'\0', b'\xFF'), - ]))), - Dot::AnyCharExcept(ch) => { - let mut cls = - ClassUnicode::new([ClassUnicodeRange::new(ch, ch)]); - cls.negate(); - Hir::class(Class::Unicode(cls)) - } - Dot::AnyCharExceptLF => { - Hir::class(Class::Unicode(ClassUnicode::new([ - ClassUnicodeRange::new('\0', '\x09'), - ClassUnicodeRange::new('\x0B', '\u{10FFFF}'), - ]))) - } - Dot::AnyCharExceptCRLF => { - Hir::class(Class::Unicode(ClassUnicode::new([ - ClassUnicodeRange::new('\0', '\x09'), - ClassUnicodeRange::new('\x0B', '\x0C'), - ClassUnicodeRange::new('\x0E', '\u{10FFFF}'), - ]))) - } - Dot::AnyByteExcept(byte) => { - let mut cls = - ClassBytes::new([ClassBytesRange::new(byte, byte)]); - cls.negate(); - Hir::class(Class::Bytes(cls)) - } - Dot::AnyByteExceptLF => { - Hir::class(Class::Bytes(ClassBytes::new([ - ClassBytesRange::new(b'\0', b'\x09'), - ClassBytesRange::new(b'\x0B', b'\xFF'), - ]))) - } - Dot::AnyByteExceptCRLF => { - Hir::class(Class::Bytes(ClassBytes::new([ - ClassBytesRange::new(b'\0', b'\x09'), - ClassBytesRange::new(b'\x0B', b'\x0C'), - ClassBytesRange::new(b'\x0E', b'\xFF'), - ]))) - } - } - } -} - -/// The underlying kind of an arbitrary [`Hir`] expression. -/// -/// An `HirKind` is principally useful for doing case analysis on the type -/// of a regular expression. If you're looking to build new `Hir` values, -/// then you _must_ use the smart constructors defined on `Hir`, like -/// [`Hir::repetition`], to build new `Hir` values. The API intentionally does -/// not expose any way of building an `Hir` directly from an `HirKind`. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum HirKind { - /// The empty regular expression, which matches everything, including the - /// empty string. - Empty, - /// A literalstring that matches exactly these bytes. - Literal(Literal), - /// A single character class that matches any of the characters in the - /// class. A class can either consist of Unicode scalar values as - /// characters, or it can use bytes. - /// - /// A class may be empty. In which case, it matches nothing. - Class(Class), - /// A look-around assertion. A look-around match always has zero length. - Look(Look), - /// A repetition operation applied to a sub-expression. - Repetition(Repetition), - /// A capturing group, which contains a sub-expression. - Capture(Capture), - /// A concatenation of expressions. - /// - /// A concatenation matches only if each of its sub-expressions match one - /// after the other. - /// - /// Concatenations are guaranteed by `Hir`'s smart constructors to always - /// have at least two sub-expressions. - Concat(Vec), - /// An alternation of expressions. - /// - /// An alternation matches only if at least one of its sub-expressions - /// match. If multiple sub-expressions match, then the leftmost is - /// preferred. - /// - /// Alternations are guaranteed by `Hir`'s smart constructors to always - /// have at least two sub-expressions. - Alternation(Vec), -} - -impl HirKind { - /// Returns a slice of this kind's sub-expressions, if any. - pub fn subs(&self) -> &[Hir] { - use core::slice::from_ref; - - match *self { - HirKind::Empty - | HirKind::Literal(_) - | HirKind::Class(_) - | HirKind::Look(_) => &[], - HirKind::Repetition(Repetition { ref sub, .. }) => from_ref(sub), - HirKind::Capture(Capture { ref sub, .. }) => from_ref(sub), - HirKind::Concat(ref subs) => subs, - HirKind::Alternation(ref subs) => subs, - } - } -} - -impl core::fmt::Debug for Hir { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - self.kind.fmt(f) - } -} - -/// Print a display representation of this Hir. -/// -/// The result of this is a valid regular expression pattern string. -/// -/// This implementation uses constant stack space and heap space proportional -/// to the size of the `Hir`. -impl core::fmt::Display for Hir { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - crate::hir::print::Printer::new().print(self, f) - } -} - -/// The high-level intermediate representation of a literal. -/// -/// A literal corresponds to `0` or more bytes that should be matched -/// literally. The smart constructors defined on `Hir` will automatically -/// concatenate adjacent literals into one literal, and will even automatically -/// replace empty literals with `Hir::empty()`. -/// -/// Note that despite a literal being represented by a sequence of bytes, its -/// `Debug` implementation will attempt to print it as a normal string. (That -/// is, not a sequence of decimal numbers.) -#[derive(Clone, Eq, PartialEq)] -pub struct Literal(pub Box<[u8]>); - -impl core::fmt::Debug for Literal { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - crate::debug::Bytes(&self.0).fmt(f) - } -} - -/// The high-level intermediate representation of a character class. -/// -/// A character class corresponds to a set of characters. A character is either -/// defined by a Unicode scalar value or a byte. -/// -/// A character class, regardless of its character type, is represented by a -/// sequence of non-overlapping non-adjacent ranges of characters. -/// -/// There are no guarantees about which class variant is used. Generally -/// speaking, the Unicode variat is used whenever a class needs to contain -/// non-ASCII Unicode scalar values. But the Unicode variant can be used even -/// when Unicode mode is disabled. For example, at the time of writing, the -/// regex `(?-u:a|\xc2\xa0)` will compile down to HIR for the Unicode class -/// `[a\u00A0]` due to optimizations. -/// -/// Note that `Bytes` variant may be produced even when it exclusively matches -/// valid UTF-8. This is because a `Bytes` variant represents an intention by -/// the author of the regular expression to disable Unicode mode, which in turn -/// impacts the semantics of case insensitive matching. For example, `(?i)k` -/// and `(?i-u)k` will not match the same set of strings. -#[derive(Clone, Eq, PartialEq)] -pub enum Class { - /// A set of characters represented by Unicode scalar values. - Unicode(ClassUnicode), - /// A set of characters represented by arbitrary bytes (one byte per - /// character). - Bytes(ClassBytes), -} - -impl Class { - /// Apply Unicode simple case folding to this character class, in place. - /// The character class will be expanded to include all simple case folded - /// character variants. - /// - /// If this is a byte oriented character class, then this will be limited - /// to the ASCII ranges `A-Z` and `a-z`. - /// - /// # Panics - /// - /// This routine panics when the case mapping data necessary for this - /// routine to complete is unavailable. This occurs when the `unicode-case` - /// feature is not enabled and the underlying class is Unicode oriented. - /// - /// Callers should prefer using `try_case_fold_simple` instead, which will - /// return an error instead of panicking. - pub fn case_fold_simple(&mut self) { - match *self { - Class::Unicode(ref mut x) => x.case_fold_simple(), - Class::Bytes(ref mut x) => x.case_fold_simple(), - } - } - - /// Apply Unicode simple case folding to this character class, in place. - /// The character class will be expanded to include all simple case folded - /// character variants. - /// - /// If this is a byte oriented character class, then this will be limited - /// to the ASCII ranges `A-Z` and `a-z`. - /// - /// # Error - /// - /// This routine returns an error when the case mapping data necessary - /// for this routine to complete is unavailable. This occurs when the - /// `unicode-case` feature is not enabled and the underlying class is - /// Unicode oriented. - pub fn try_case_fold_simple( - &mut self, - ) -> core::result::Result<(), CaseFoldError> { - match *self { - Class::Unicode(ref mut x) => x.try_case_fold_simple()?, - Class::Bytes(ref mut x) => x.case_fold_simple(), - } - Ok(()) - } - - /// Negate this character class in place. - /// - /// After completion, this character class will contain precisely the - /// characters that weren't previously in the class. - pub fn negate(&mut self) { - match *self { - Class::Unicode(ref mut x) => x.negate(), - Class::Bytes(ref mut x) => x.negate(), - } - } - - /// Returns true if and only if this character class will only ever match - /// valid UTF-8. - /// - /// A character class can match invalid UTF-8 only when the following - /// conditions are met: - /// - /// 1. The translator was configured to permit generating an expression - /// that can match invalid UTF-8. (By default, this is disabled.) - /// 2. Unicode mode (via the `u` flag) was disabled either in the concrete - /// syntax or in the parser builder. By default, Unicode mode is - /// enabled. - pub fn is_utf8(&self) -> bool { - match *self { - Class::Unicode(_) => true, - Class::Bytes(ref x) => x.is_ascii(), - } - } - - /// Returns the length, in bytes, of the smallest string matched by this - /// character class. - /// - /// For non-empty byte oriented classes, this always returns `1`. For - /// non-empty Unicode oriented classes, this can return `1`, `2`, `3` or - /// `4`. For empty classes, `None` is returned. It is impossible for `0` to - /// be returned. - /// - /// # Example - /// - /// This example shows some examples of regexes and their corresponding - /// minimum length, if any. - /// - /// ``` - /// use regex_syntax::{hir::Properties, parse}; - /// - /// // The empty string has a min length of 0. - /// let hir = parse(r"")?; - /// assert_eq!(Some(0), hir.properties().minimum_len()); - /// // As do other types of regexes that only match the empty string. - /// let hir = parse(r"^$\b\B")?; - /// assert_eq!(Some(0), hir.properties().minimum_len()); - /// // A regex that can match the empty string but match more is still 0. - /// let hir = parse(r"a*")?; - /// assert_eq!(Some(0), hir.properties().minimum_len()); - /// // A regex that matches nothing has no minimum defined. - /// let hir = parse(r"[a&&b]")?; - /// assert_eq!(None, hir.properties().minimum_len()); - /// // Character classes usually have a minimum length of 1. - /// let hir = parse(r"\w")?; - /// assert_eq!(Some(1), hir.properties().minimum_len()); - /// // But sometimes Unicode classes might be bigger! - /// let hir = parse(r"\p{Cyrillic}")?; - /// assert_eq!(Some(2), hir.properties().minimum_len()); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn minimum_len(&self) -> Option { - match *self { - Class::Unicode(ref x) => x.minimum_len(), - Class::Bytes(ref x) => x.minimum_len(), - } - } - - /// Returns the length, in bytes, of the longest string matched by this - /// character class. - /// - /// For non-empty byte oriented classes, this always returns `1`. For - /// non-empty Unicode oriented classes, this can return `1`, `2`, `3` or - /// `4`. For empty classes, `None` is returned. It is impossible for `0` to - /// be returned. - /// - /// # Example - /// - /// This example shows some examples of regexes and their corresponding - /// maximum length, if any. - /// - /// ``` - /// use regex_syntax::{hir::Properties, parse}; - /// - /// // The empty string has a max length of 0. - /// let hir = parse(r"")?; - /// assert_eq!(Some(0), hir.properties().maximum_len()); - /// // As do other types of regexes that only match the empty string. - /// let hir = parse(r"^$\b\B")?; - /// assert_eq!(Some(0), hir.properties().maximum_len()); - /// // A regex that matches nothing has no maximum defined. - /// let hir = parse(r"[a&&b]")?; - /// assert_eq!(None, hir.properties().maximum_len()); - /// // Bounded repeats work as you expect. - /// let hir = parse(r"x{2,10}")?; - /// assert_eq!(Some(10), hir.properties().maximum_len()); - /// // An unbounded repeat means there is no maximum. - /// let hir = parse(r"x{2,}")?; - /// assert_eq!(None, hir.properties().maximum_len()); - /// // With Unicode enabled, \w can match up to 4 bytes! - /// let hir = parse(r"\w")?; - /// assert_eq!(Some(4), hir.properties().maximum_len()); - /// // Without Unicode enabled, \w matches at most 1 byte. - /// let hir = parse(r"(?-u)\w")?; - /// assert_eq!(Some(1), hir.properties().maximum_len()); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn maximum_len(&self) -> Option { - match *self { - Class::Unicode(ref x) => x.maximum_len(), - Class::Bytes(ref x) => x.maximum_len(), - } - } - - /// Returns true if and only if this character class is empty. That is, - /// it has no elements. - /// - /// An empty character can never match anything, including an empty string. - pub fn is_empty(&self) -> bool { - match *self { - Class::Unicode(ref x) => x.ranges().is_empty(), - Class::Bytes(ref x) => x.ranges().is_empty(), - } - } - - /// If this class consists of exactly one element (whether a codepoint or a - /// byte), then return it as a literal byte string. - /// - /// If this class is empty or contains more than one element, then `None` - /// is returned. - pub fn literal(&self) -> Option> { - match *self { - Class::Unicode(ref x) => x.literal(), - Class::Bytes(ref x) => x.literal(), - } - } -} - -impl core::fmt::Debug for Class { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - use crate::debug::Byte; - - let mut fmter = f.debug_set(); - match *self { - Class::Unicode(ref cls) => { - for r in cls.ranges().iter() { - fmter.entry(&(r.start..=r.end)); - } - } - Class::Bytes(ref cls) => { - for r in cls.ranges().iter() { - fmter.entry(&(Byte(r.start)..=Byte(r.end))); - } - } - } - fmter.finish() - } -} - -/// A set of characters represented by Unicode scalar values. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct ClassUnicode { - set: IntervalSet, -} - -impl ClassUnicode { - /// Create a new class from a sequence of ranges. - /// - /// The given ranges do not need to be in any specific order, and ranges - /// may overlap. Ranges will automatically be sorted into a canonical - /// non-overlapping order. - pub fn new(ranges: I) -> ClassUnicode - where - I: IntoIterator, - { - ClassUnicode { set: IntervalSet::new(ranges) } - } - - /// Create a new class with no ranges. - /// - /// An empty class matches nothing. That is, it is equivalent to - /// [`Hir::fail`]. - pub fn empty() -> ClassUnicode { - ClassUnicode::new(vec![]) - } - - /// Add a new range to this set. - pub fn push(&mut self, range: ClassUnicodeRange) { - self.set.push(range); - } - - /// Return an iterator over all ranges in this class. - /// - /// The iterator yields ranges in ascending order. - pub fn iter(&self) -> ClassUnicodeIter<'_> { - ClassUnicodeIter(self.set.iter()) - } - - /// Return the underlying ranges as a slice. - pub fn ranges(&self) -> &[ClassUnicodeRange] { - self.set.intervals() - } - - /// Expand this character class such that it contains all case folded - /// characters, according to Unicode's "simple" mapping. For example, if - /// this class consists of the range `a-z`, then applying case folding will - /// result in the class containing both the ranges `a-z` and `A-Z`. - /// - /// # Panics - /// - /// This routine panics when the case mapping data necessary for this - /// routine to complete is unavailable. This occurs when the `unicode-case` - /// feature is not enabled. - /// - /// Callers should prefer using `try_case_fold_simple` instead, which will - /// return an error instead of panicking. - pub fn case_fold_simple(&mut self) { - self.set - .case_fold_simple() - .expect("unicode-case feature must be enabled"); - } - - /// Expand this character class such that it contains all case folded - /// characters, according to Unicode's "simple" mapping. For example, if - /// this class consists of the range `a-z`, then applying case folding will - /// result in the class containing both the ranges `a-z` and `A-Z`. - /// - /// # Error - /// - /// This routine returns an error when the case mapping data necessary - /// for this routine to complete is unavailable. This occurs when the - /// `unicode-case` feature is not enabled. - pub fn try_case_fold_simple( - &mut self, - ) -> core::result::Result<(), CaseFoldError> { - self.set.case_fold_simple() - } - - /// Negate this character class. - /// - /// For all `c` where `c` is a Unicode scalar value, if `c` was in this - /// set, then it will not be in this set after negation. - pub fn negate(&mut self) { - self.set.negate(); - } - - /// Union this character class with the given character class, in place. - pub fn union(&mut self, other: &ClassUnicode) { - self.set.union(&other.set); - } - - /// Intersect this character class with the given character class, in - /// place. - pub fn intersect(&mut self, other: &ClassUnicode) { - self.set.intersect(&other.set); - } - - /// Subtract the given character class from this character class, in place. - pub fn difference(&mut self, other: &ClassUnicode) { - self.set.difference(&other.set); - } - - /// Compute the symmetric difference of the given character classes, in - /// place. - /// - /// This computes the symmetric difference of two character classes. This - /// removes all elements in this class that are also in the given class, - /// but all adds all elements from the given class that aren't in this - /// class. That is, the class will contain all elements in either class, - /// but will not contain any elements that are in both classes. - pub fn symmetric_difference(&mut self, other: &ClassUnicode) { - self.set.symmetric_difference(&other.set); - } - - /// Returns true if and only if this character class will either match - /// nothing or only ASCII bytes. Stated differently, this returns false - /// if and only if this class contains a non-ASCII codepoint. - pub fn is_ascii(&self) -> bool { - self.set.intervals().last().map_or(true, |r| r.end <= '\x7F') - } - - /// Returns the length, in bytes, of the smallest string matched by this - /// character class. - /// - /// Returns `None` when the class is empty. - pub fn minimum_len(&self) -> Option { - let first = self.ranges().get(0)?; - // Correct because c1 < c2 implies c1.len_utf8() < c2.len_utf8(). - Some(first.start.len_utf8()) - } - - /// Returns the length, in bytes, of the longest string matched by this - /// character class. - /// - /// Returns `None` when the class is empty. - pub fn maximum_len(&self) -> Option { - let last = self.ranges().last()?; - // Correct because c1 < c2 implies c1.len_utf8() < c2.len_utf8(). - Some(last.end.len_utf8()) - } - - /// If this class consists of exactly one codepoint, then return it as - /// a literal byte string. - /// - /// If this class is empty or contains more than one codepoint, then `None` - /// is returned. - pub fn literal(&self) -> Option> { - let rs = self.ranges(); - if rs.len() == 1 && rs[0].start == rs[0].end { - Some(rs[0].start.encode_utf8(&mut [0; 4]).to_string().into_bytes()) - } else { - None - } - } - - /// If this class consists of only ASCII ranges, then return its - /// corresponding and equivalent byte class. - pub fn to_byte_class(&self) -> Option { - if !self.is_ascii() { - return None; - } - Some(ClassBytes::new(self.ranges().iter().map(|r| { - // Since we are guaranteed that our codepoint range is ASCII, the - // 'u8::try_from' calls below are guaranteed to be correct. - ClassBytesRange { - start: u8::try_from(r.start).unwrap(), - end: u8::try_from(r.end).unwrap(), - } - }))) - } -} - -/// An iterator over all ranges in a Unicode character class. -/// -/// The lifetime `'a` refers to the lifetime of the underlying class. -#[derive(Debug)] -pub struct ClassUnicodeIter<'a>(IntervalSetIter<'a, ClassUnicodeRange>); - -impl<'a> Iterator for ClassUnicodeIter<'a> { - type Item = &'a ClassUnicodeRange; - - fn next(&mut self) -> Option<&'a ClassUnicodeRange> { - self.0.next() - } -} - -/// A single range of characters represented by Unicode scalar values. -/// -/// The range is closed. That is, the start and end of the range are included -/// in the range. -#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)] -pub struct ClassUnicodeRange { - start: char, - end: char, -} - -impl core::fmt::Debug for ClassUnicodeRange { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let start = if !self.start.is_whitespace() && !self.start.is_control() - { - self.start.to_string() - } else { - format!("0x{:X}", u32::from(self.start)) - }; - let end = if !self.end.is_whitespace() && !self.end.is_control() { - self.end.to_string() - } else { - format!("0x{:X}", u32::from(self.end)) - }; - f.debug_struct("ClassUnicodeRange") - .field("start", &start) - .field("end", &end) - .finish() - } -} - -impl Interval for ClassUnicodeRange { - type Bound = char; - - #[inline] - fn lower(&self) -> char { - self.start - } - #[inline] - fn upper(&self) -> char { - self.end - } - #[inline] - fn set_lower(&mut self, bound: char) { - self.start = bound; - } - #[inline] - fn set_upper(&mut self, bound: char) { - self.end = bound; - } - - /// Apply simple case folding to this Unicode scalar value range. - /// - /// Additional ranges are appended to the given vector. Canonical ordering - /// is *not* maintained in the given vector. - fn case_fold_simple( - &self, - ranges: &mut Vec, - ) -> Result<(), unicode::CaseFoldError> { - let mut folder = unicode::SimpleCaseFolder::new()?; - if !folder.overlaps(self.start, self.end) { - return Ok(()); - } - let (start, end) = (u32::from(self.start), u32::from(self.end)); - for cp in (start..=end).filter_map(char::from_u32) { - for &cp_folded in folder.mapping(cp) { - ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded)); - } - } - Ok(()) - } -} - -impl ClassUnicodeRange { - /// Create a new Unicode scalar value range for a character class. - /// - /// The returned range is always in a canonical form. That is, the range - /// returned always satisfies the invariant that `start <= end`. - pub fn new(start: char, end: char) -> ClassUnicodeRange { - ClassUnicodeRange::create(start, end) - } - - /// Return the start of this range. - /// - /// The start of a range is always less than or equal to the end of the - /// range. - pub fn start(&self) -> char { - self.start - } - - /// Return the end of this range. - /// - /// The end of a range is always greater than or equal to the start of the - /// range. - pub fn end(&self) -> char { - self.end - } - - /// Returns the number of codepoints in this range. - pub fn len(&self) -> usize { - let diff = 1 + u32::from(self.end) - u32::from(self.start); - // This is likely to panic in 16-bit targets since a usize can only fit - // 2^16. It's not clear what to do here, other than to return an error - // when building a Unicode class that contains a range whose length - // overflows usize. (Which, to be honest, is probably quite common on - // 16-bit targets. For example, this would imply that '.' and '\p{any}' - // would be impossible to build.) - usize::try_from(diff).expect("char class len fits in usize") - } -} - -/// A set of characters represented by arbitrary bytes. -/// -/// Each byte corresponds to one character. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct ClassBytes { - set: IntervalSet, -} - -impl ClassBytes { - /// Create a new class from a sequence of ranges. - /// - /// The given ranges do not need to be in any specific order, and ranges - /// may overlap. Ranges will automatically be sorted into a canonical - /// non-overlapping order. - pub fn new(ranges: I) -> ClassBytes - where - I: IntoIterator, - { - ClassBytes { set: IntervalSet::new(ranges) } - } - - /// Create a new class with no ranges. - /// - /// An empty class matches nothing. That is, it is equivalent to - /// [`Hir::fail`]. - pub fn empty() -> ClassBytes { - ClassBytes::new(vec![]) - } - - /// Add a new range to this set. - pub fn push(&mut self, range: ClassBytesRange) { - self.set.push(range); - } - - /// Return an iterator over all ranges in this class. - /// - /// The iterator yields ranges in ascending order. - pub fn iter(&self) -> ClassBytesIter<'_> { - ClassBytesIter(self.set.iter()) - } - - /// Return the underlying ranges as a slice. - pub fn ranges(&self) -> &[ClassBytesRange] { - self.set.intervals() - } - - /// Expand this character class such that it contains all case folded - /// characters. For example, if this class consists of the range `a-z`, - /// then applying case folding will result in the class containing both the - /// ranges `a-z` and `A-Z`. - /// - /// Note that this only applies ASCII case folding, which is limited to the - /// characters `a-z` and `A-Z`. - pub fn case_fold_simple(&mut self) { - self.set.case_fold_simple().expect("ASCII case folding never fails"); - } - - /// Negate this byte class. - /// - /// For all `b` where `b` is a any byte, if `b` was in this set, then it - /// will not be in this set after negation. - pub fn negate(&mut self) { - self.set.negate(); - } - - /// Union this byte class with the given byte class, in place. - pub fn union(&mut self, other: &ClassBytes) { - self.set.union(&other.set); - } - - /// Intersect this byte class with the given byte class, in place. - pub fn intersect(&mut self, other: &ClassBytes) { - self.set.intersect(&other.set); - } - - /// Subtract the given byte class from this byte class, in place. - pub fn difference(&mut self, other: &ClassBytes) { - self.set.difference(&other.set); - } - - /// Compute the symmetric difference of the given byte classes, in place. - /// - /// This computes the symmetric difference of two byte classes. This - /// removes all elements in this class that are also in the given class, - /// but all adds all elements from the given class that aren't in this - /// class. That is, the class will contain all elements in either class, - /// but will not contain any elements that are in both classes. - pub fn symmetric_difference(&mut self, other: &ClassBytes) { - self.set.symmetric_difference(&other.set); - } - - /// Returns true if and only if this character class will either match - /// nothing or only ASCII bytes. Stated differently, this returns false - /// if and only if this class contains a non-ASCII byte. - pub fn is_ascii(&self) -> bool { - self.set.intervals().last().map_or(true, |r| r.end <= 0x7F) - } - - /// Returns the length, in bytes, of the smallest string matched by this - /// character class. - /// - /// Returns `None` when the class is empty. - pub fn minimum_len(&self) -> Option { - if self.ranges().is_empty() { - None - } else { - Some(1) - } - } - - /// Returns the length, in bytes, of the longest string matched by this - /// character class. - /// - /// Returns `None` when the class is empty. - pub fn maximum_len(&self) -> Option { - if self.ranges().is_empty() { - None - } else { - Some(1) - } - } - - /// If this class consists of exactly one byte, then return it as - /// a literal byte string. - /// - /// If this class is empty or contains more than one byte, then `None` - /// is returned. - pub fn literal(&self) -> Option> { - let rs = self.ranges(); - if rs.len() == 1 && rs[0].start == rs[0].end { - Some(vec![rs[0].start]) - } else { - None - } - } - - /// If this class consists of only ASCII ranges, then return its - /// corresponding and equivalent Unicode class. - pub fn to_unicode_class(&self) -> Option { - if !self.is_ascii() { - return None; - } - Some(ClassUnicode::new(self.ranges().iter().map(|r| { - // Since we are guaranteed that our byte range is ASCII, the - // 'char::from' calls below are correct and will not erroneously - // convert a raw byte value into its corresponding codepoint. - ClassUnicodeRange { - start: char::from(r.start), - end: char::from(r.end), - } - }))) - } -} - -/// An iterator over all ranges in a byte character class. -/// -/// The lifetime `'a` refers to the lifetime of the underlying class. -#[derive(Debug)] -pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>); - -impl<'a> Iterator for ClassBytesIter<'a> { - type Item = &'a ClassBytesRange; - - fn next(&mut self) -> Option<&'a ClassBytesRange> { - self.0.next() - } -} - -/// A single range of characters represented by arbitrary bytes. -/// -/// The range is closed. That is, the start and end of the range are included -/// in the range. -#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)] -pub struct ClassBytesRange { - start: u8, - end: u8, -} - -impl Interval for ClassBytesRange { - type Bound = u8; - - #[inline] - fn lower(&self) -> u8 { - self.start - } - #[inline] - fn upper(&self) -> u8 { - self.end - } - #[inline] - fn set_lower(&mut self, bound: u8) { - self.start = bound; - } - #[inline] - fn set_upper(&mut self, bound: u8) { - self.end = bound; - } - - /// Apply simple case folding to this byte range. Only ASCII case mappings - /// (for a-z) are applied. - /// - /// Additional ranges are appended to the given vector. Canonical ordering - /// is *not* maintained in the given vector. - fn case_fold_simple( - &self, - ranges: &mut Vec, - ) -> Result<(), unicode::CaseFoldError> { - if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) { - let lower = cmp::max(self.start, b'a'); - let upper = cmp::min(self.end, b'z'); - ranges.push(ClassBytesRange::new(lower - 32, upper - 32)); - } - if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) { - let lower = cmp::max(self.start, b'A'); - let upper = cmp::min(self.end, b'Z'); - ranges.push(ClassBytesRange::new(lower + 32, upper + 32)); - } - Ok(()) - } -} - -impl ClassBytesRange { - /// Create a new byte range for a character class. - /// - /// The returned range is always in a canonical form. That is, the range - /// returned always satisfies the invariant that `start <= end`. - pub fn new(start: u8, end: u8) -> ClassBytesRange { - ClassBytesRange::create(start, end) - } - - /// Return the start of this range. - /// - /// The start of a range is always less than or equal to the end of the - /// range. - pub fn start(&self) -> u8 { - self.start - } - - /// Return the end of this range. - /// - /// The end of a range is always greater than or equal to the start of the - /// range. - pub fn end(&self) -> u8 { - self.end - } - - /// Returns the number of bytes in this range. - pub fn len(&self) -> usize { - usize::from(self.end.checked_sub(self.start).unwrap()) - .checked_add(1) - .unwrap() - } -} - -impl core::fmt::Debug for ClassBytesRange { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - f.debug_struct("ClassBytesRange") - .field("start", &crate::debug::Byte(self.start)) - .field("end", &crate::debug::Byte(self.end)) - .finish() - } -} - -/// The high-level intermediate representation for a look-around assertion. -/// -/// An assertion match is always zero-length. Also called an "empty match." -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum Look { - /// Match the beginning of text. Specifically, this matches at the starting - /// position of the input. - Start = 1 << 0, - /// Match the end of text. Specifically, this matches at the ending - /// position of the input. - End = 1 << 1, - /// Match the beginning of a line or the beginning of text. Specifically, - /// this matches at the starting position of the input, or at the position - /// immediately following a `\n` character. - StartLF = 1 << 2, - /// Match the end of a line or the end of text. Specifically, this matches - /// at the end position of the input, or at the position immediately - /// preceding a `\n` character. - EndLF = 1 << 3, - /// Match the beginning of a line or the beginning of text. Specifically, - /// this matches at the starting position of the input, or at the position - /// immediately following either a `\r` or `\n` character, but never after - /// a `\r` when a `\n` follows. - StartCRLF = 1 << 4, - /// Match the end of a line or the end of text. Specifically, this matches - /// at the end position of the input, or at the position immediately - /// preceding a `\r` or `\n` character, but never before a `\n` when a `\r` - /// precedes it. - EndCRLF = 1 << 5, - /// Match an ASCII-only word boundary. That is, this matches a position - /// where the left adjacent character and right adjacent character - /// correspond to a word and non-word or a non-word and word character. - WordAscii = 1 << 6, - /// Match an ASCII-only negation of a word boundary. - WordAsciiNegate = 1 << 7, - /// Match a Unicode-aware word boundary. That is, this matches a position - /// where the left adjacent character and right adjacent character - /// correspond to a word and non-word or a non-word and word character. - WordUnicode = 1 << 8, - /// Match a Unicode-aware negation of a word boundary. - WordUnicodeNegate = 1 << 9, - /// Match the start of an ASCII-only word boundary. That is, this matches a - /// position at either the beginning of the haystack or where the previous - /// character is not a word character and the following character is a word - /// character. - WordStartAscii = 1 << 10, - /// Match the end of an ASCII-only word boundary. That is, this matches - /// a position at either the end of the haystack or where the previous - /// character is a word character and the following character is not a word - /// character. - WordEndAscii = 1 << 11, - /// Match the start of a Unicode word boundary. That is, this matches a - /// position at either the beginning of the haystack or where the previous - /// character is not a word character and the following character is a word - /// character. - WordStartUnicode = 1 << 12, - /// Match the end of a Unicode word boundary. That is, this matches a - /// position at either the end of the haystack or where the previous - /// character is a word character and the following character is not a word - /// character. - WordEndUnicode = 1 << 13, - /// Match the start half of an ASCII-only word boundary. That is, this - /// matches a position at either the beginning of the haystack or where the - /// previous character is not a word character. - WordStartHalfAscii = 1 << 14, - /// Match the end half of an ASCII-only word boundary. That is, this - /// matches a position at either the end of the haystack or where the - /// following character is not a word character. - WordEndHalfAscii = 1 << 15, - /// Match the start half of a Unicode word boundary. That is, this matches - /// a position at either the beginning of the haystack or where the - /// previous character is not a word character. - WordStartHalfUnicode = 1 << 16, - /// Match the end half of a Unicode word boundary. That is, this matches - /// a position at either the end of the haystack or where the following - /// character is not a word character. - WordEndHalfUnicode = 1 << 17, -} - -impl Look { - /// Flip the look-around assertion to its equivalent for reverse searches. - /// For example, `StartLF` gets translated to `EndLF`. - /// - /// Some assertions, such as `WordUnicode`, remain the same since they - /// match the same positions regardless of the direction of the search. - #[inline] - pub const fn reversed(self) -> Look { - match self { - Look::Start => Look::End, - Look::End => Look::Start, - Look::StartLF => Look::EndLF, - Look::EndLF => Look::StartLF, - Look::StartCRLF => Look::EndCRLF, - Look::EndCRLF => Look::StartCRLF, - Look::WordAscii => Look::WordAscii, - Look::WordAsciiNegate => Look::WordAsciiNegate, - Look::WordUnicode => Look::WordUnicode, - Look::WordUnicodeNegate => Look::WordUnicodeNegate, - Look::WordStartAscii => Look::WordEndAscii, - Look::WordEndAscii => Look::WordStartAscii, - Look::WordStartUnicode => Look::WordEndUnicode, - Look::WordEndUnicode => Look::WordStartUnicode, - Look::WordStartHalfAscii => Look::WordEndHalfAscii, - Look::WordEndHalfAscii => Look::WordStartHalfAscii, - Look::WordStartHalfUnicode => Look::WordEndHalfUnicode, - Look::WordEndHalfUnicode => Look::WordStartHalfUnicode, - } - } - - /// Return the underlying representation of this look-around enumeration - /// as an integer. Giving the return value to the [`Look::from_repr`] - /// constructor is guaranteed to return the same look-around variant that - /// one started with within a semver compatible release of this crate. - #[inline] - pub const fn as_repr(self) -> u32 { - // AFAIK, 'as' is the only way to zero-cost convert an int enum to an - // actual int. - self as u32 - } - - /// Given the underlying representation of a `Look` value, return the - /// corresponding `Look` value if the representation is valid. Otherwise - /// `None` is returned. - #[inline] - pub const fn from_repr(repr: u32) -> Option { - match repr { - 0b00_0000_0000_0000_0001 => Some(Look::Start), - 0b00_0000_0000_0000_0010 => Some(Look::End), - 0b00_0000_0000_0000_0100 => Some(Look::StartLF), - 0b00_0000_0000_0000_1000 => Some(Look::EndLF), - 0b00_0000_0000_0001_0000 => Some(Look::StartCRLF), - 0b00_0000_0000_0010_0000 => Some(Look::EndCRLF), - 0b00_0000_0000_0100_0000 => Some(Look::WordAscii), - 0b00_0000_0000_1000_0000 => Some(Look::WordAsciiNegate), - 0b00_0000_0001_0000_0000 => Some(Look::WordUnicode), - 0b00_0000_0010_0000_0000 => Some(Look::WordUnicodeNegate), - 0b00_0000_0100_0000_0000 => Some(Look::WordStartAscii), - 0b00_0000_1000_0000_0000 => Some(Look::WordEndAscii), - 0b00_0001_0000_0000_0000 => Some(Look::WordStartUnicode), - 0b00_0010_0000_0000_0000 => Some(Look::WordEndUnicode), - 0b00_0100_0000_0000_0000 => Some(Look::WordStartHalfAscii), - 0b00_1000_0000_0000_0000 => Some(Look::WordEndHalfAscii), - 0b01_0000_0000_0000_0000 => Some(Look::WordStartHalfUnicode), - 0b10_0000_0000_0000_0000 => Some(Look::WordEndHalfUnicode), - _ => None, - } - } - - /// Returns a convenient single codepoint representation of this - /// look-around assertion. Each assertion is guaranteed to be represented - /// by a distinct character. - /// - /// This is useful for succinctly representing a look-around assertion in - /// human friendly but succinct output intended for a programmer working on - /// regex internals. - #[inline] - pub const fn as_char(self) -> char { - match self { - Look::Start => 'A', - Look::End => 'z', - Look::StartLF => '^', - Look::EndLF => '$', - Look::StartCRLF => 'r', - Look::EndCRLF => 'R', - Look::WordAscii => 'b', - Look::WordAsciiNegate => 'B', - Look::WordUnicode => '𝛃', - Look::WordUnicodeNegate => '𝚩', - Look::WordStartAscii => '<', - Look::WordEndAscii => '>', - Look::WordStartUnicode => '〈', - Look::WordEndUnicode => '〉', - Look::WordStartHalfAscii => '◁', - Look::WordEndHalfAscii => '▷', - Look::WordStartHalfUnicode => '◀', - Look::WordEndHalfUnicode => '▶', - } - } -} - -/// The high-level intermediate representation for a capturing group. -/// -/// A capturing group always has an index and a child expression. It may -/// also have a name associated with it (e.g., `(?P\w)`), but it's not -/// necessary. -/// -/// Note that there is no explicit representation of a non-capturing group -/// in a `Hir`. Instead, non-capturing grouping is handled automatically by -/// the recursive structure of the `Hir` itself. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Capture { - /// The capture index of the capture. - pub index: u32, - /// The name of the capture, if it exists. - pub name: Option>, - /// The expression inside the capturing group, which may be empty. - pub sub: Box, -} - -/// The high-level intermediate representation of a repetition operator. -/// -/// A repetition operator permits the repetition of an arbitrary -/// sub-expression. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Repetition { - /// The minimum range of the repetition. - /// - /// Note that special cases like `?`, `+` and `*` all get translated into - /// the ranges `{0,1}`, `{1,}` and `{0,}`, respectively. - /// - /// When `min` is zero, this expression can match the empty string - /// regardless of what its sub-expression is. - pub min: u32, - /// The maximum range of the repetition. - /// - /// Note that when `max` is `None`, `min` acts as a lower bound but where - /// there is no upper bound. For something like `x{5}` where the min and - /// max are equivalent, `min` will be set to `5` and `max` will be set to - /// `Some(5)`. - pub max: Option, - /// Whether this repetition operator is greedy or not. A greedy operator - /// will match as much as it can. A non-greedy operator will match as - /// little as it can. - /// - /// Typically, operators are greedy by default and are only non-greedy when - /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is - /// not. However, this can be inverted via the `U` "ungreedy" flag. - pub greedy: bool, - /// The expression being repeated. - pub sub: Box, -} - -impl Repetition { - /// Returns a new repetition with the same `min`, `max` and `greedy` - /// values, but with its sub-expression replaced with the one given. - pub fn with(&self, sub: Hir) -> Repetition { - Repetition { - min: self.min, - max: self.max, - greedy: self.greedy, - sub: Box::new(sub), - } - } -} - -/// A type describing the different flavors of `.`. -/// -/// This type is meant to be used with [`Hir::dot`], which is a convenience -/// routine for building HIR values derived from the `.` regex. -#[non_exhaustive] -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum Dot { - /// Matches the UTF-8 encoding of any Unicode scalar value. - /// - /// This is equivalent to `(?su:.)` and also `\p{any}`. - AnyChar, - /// Matches any byte value. - /// - /// This is equivalent to `(?s-u:.)` and also `(?-u:[\x00-\xFF])`. - AnyByte, - /// Matches the UTF-8 encoding of any Unicode scalar value except for the - /// `char` given. - /// - /// This is equivalent to using `(?u-s:.)` with the line terminator set - /// to a particular ASCII byte. (Because of peculiarities in the regex - /// engines, a line terminator must be a single byte. It follows that when - /// UTF-8 mode is enabled, this single byte must also be a Unicode scalar - /// value. That is, ti must be ASCII.) - /// - /// (This and `AnyCharExceptLF` both exist because of legacy reasons. - /// `AnyCharExceptLF` will be dropped in the next breaking change release.) - AnyCharExcept(char), - /// Matches the UTF-8 encoding of any Unicode scalar value except for `\n`. - /// - /// This is equivalent to `(?u-s:.)` and also `[\p{any}--\n]`. - AnyCharExceptLF, - /// Matches the UTF-8 encoding of any Unicode scalar value except for `\r` - /// and `\n`. - /// - /// This is equivalent to `(?uR-s:.)` and also `[\p{any}--\r\n]`. - AnyCharExceptCRLF, - /// Matches any byte value except for the `u8` given. - /// - /// This is equivalent to using `(?-us:.)` with the line terminator set - /// to a particular ASCII byte. (Because of peculiarities in the regex - /// engines, a line terminator must be a single byte. It follows that when - /// UTF-8 mode is enabled, this single byte must also be a Unicode scalar - /// value. That is, ti must be ASCII.) - /// - /// (This and `AnyByteExceptLF` both exist because of legacy reasons. - /// `AnyByteExceptLF` will be dropped in the next breaking change release.) - AnyByteExcept(u8), - /// Matches any byte value except for `\n`. - /// - /// This is equivalent to `(?-su:.)` and also `(?-u:[[\x00-\xFF]--\n])`. - AnyByteExceptLF, - /// Matches any byte value except for `\r` and `\n`. - /// - /// This is equivalent to `(?R-su:.)` and also `(?-u:[[\x00-\xFF]--\r\n])`. - AnyByteExceptCRLF, -} - -/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack -/// space but heap space proportional to the depth of the total `Hir`. -impl Drop for Hir { - fn drop(&mut self) { - use core::mem; - - match *self.kind() { - HirKind::Empty - | HirKind::Literal(_) - | HirKind::Class(_) - | HirKind::Look(_) => return, - HirKind::Capture(ref x) if x.sub.kind.subs().is_empty() => return, - HirKind::Repetition(ref x) if x.sub.kind.subs().is_empty() => { - return - } - HirKind::Concat(ref x) if x.is_empty() => return, - HirKind::Alternation(ref x) if x.is_empty() => return, - _ => {} - } - - let mut stack = vec![mem::replace(self, Hir::empty())]; - while let Some(mut expr) = stack.pop() { - match expr.kind { - HirKind::Empty - | HirKind::Literal(_) - | HirKind::Class(_) - | HirKind::Look(_) => {} - HirKind::Capture(ref mut x) => { - stack.push(mem::replace(&mut x.sub, Hir::empty())); - } - HirKind::Repetition(ref mut x) => { - stack.push(mem::replace(&mut x.sub, Hir::empty())); - } - HirKind::Concat(ref mut x) => { - stack.extend(x.drain(..)); - } - HirKind::Alternation(ref mut x) => { - stack.extend(x.drain(..)); - } - } - } - } -} - -/// A type that collects various properties of an HIR value. -/// -/// Properties are always scalar values and represent meta data that is -/// computed inductively on an HIR value. Properties are defined for all -/// HIR values. -/// -/// All methods on a `Properties` value take constant time and are meant to -/// be cheap to call. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Properties(Box); - -/// The property definition. It is split out so that we can box it, and -/// there by make `Properties` use less stack size. This is kind-of important -/// because every HIR value has a `Properties` attached to it. -/// -/// This does have the unfortunate consequence that creating any HIR value -/// always leads to at least one alloc for properties, but this is generally -/// true anyway (for pretty much all HirKinds except for look-arounds). -#[derive(Clone, Debug, Eq, PartialEq)] -struct PropertiesI { - minimum_len: Option, - maximum_len: Option, - look_set: LookSet, - look_set_prefix: LookSet, - look_set_suffix: LookSet, - look_set_prefix_any: LookSet, - look_set_suffix_any: LookSet, - utf8: bool, - explicit_captures_len: usize, - static_explicit_captures_len: Option, - literal: bool, - alternation_literal: bool, -} - -impl Properties { - /// Returns the length (in bytes) of the smallest string matched by this - /// HIR. - /// - /// A return value of `0` is possible and occurs when the HIR can match an - /// empty string. - /// - /// `None` is returned when there is no minimum length. This occurs in - /// precisely the cases where the HIR matches nothing. i.e., The language - /// the regex matches is empty. An example of such a regex is `\P{any}`. - #[inline] - pub fn minimum_len(&self) -> Option { - self.0.minimum_len - } - - /// Returns the length (in bytes) of the longest string matched by this - /// HIR. - /// - /// A return value of `0` is possible and occurs when nothing longer than - /// the empty string is in the language described by this HIR. - /// - /// `None` is returned when there is no longest matching string. This - /// occurs when the HIR matches nothing or when there is no upper bound on - /// the length of matching strings. Example of such regexes are `\P{any}` - /// (matches nothing) and `a+` (has no upper bound). - #[inline] - pub fn maximum_len(&self) -> Option { - self.0.maximum_len - } - - /// Returns a set of all look-around assertions that appear at least once - /// in this HIR value. - #[inline] - pub fn look_set(&self) -> LookSet { - self.0.look_set - } - - /// Returns a set of all look-around assertions that appear as a prefix for - /// this HIR value. That is, the set returned corresponds to the set of - /// assertions that must be passed before matching any bytes in a haystack. - /// - /// For example, `hir.look_set_prefix().contains(Look::Start)` returns true - /// if and only if the HIR is fully anchored at the start. - #[inline] - pub fn look_set_prefix(&self) -> LookSet { - self.0.look_set_prefix - } - - /// Returns a set of all look-around assertions that appear as a _possible_ - /// prefix for this HIR value. That is, the set returned corresponds to the - /// set of assertions that _may_ be passed before matching any bytes in a - /// haystack. - /// - /// For example, `hir.look_set_prefix_any().contains(Look::Start)` returns - /// true if and only if it's possible for the regex to match through a - /// anchored assertion before consuming any input. - #[inline] - pub fn look_set_prefix_any(&self) -> LookSet { - self.0.look_set_prefix_any - } - - /// Returns a set of all look-around assertions that appear as a suffix for - /// this HIR value. That is, the set returned corresponds to the set of - /// assertions that must be passed in order to be considered a match after - /// all other consuming HIR expressions. - /// - /// For example, `hir.look_set_suffix().contains(Look::End)` returns true - /// if and only if the HIR is fully anchored at the end. - #[inline] - pub fn look_set_suffix(&self) -> LookSet { - self.0.look_set_suffix - } - - /// Returns a set of all look-around assertions that appear as a _possible_ - /// suffix for this HIR value. That is, the set returned corresponds to the - /// set of assertions that _may_ be passed before matching any bytes in a - /// haystack. - /// - /// For example, `hir.look_set_suffix_any().contains(Look::End)` returns - /// true if and only if it's possible for the regex to match through a - /// anchored assertion at the end of a match without consuming any input. - #[inline] - pub fn look_set_suffix_any(&self) -> LookSet { - self.0.look_set_suffix_any - } - - /// Return true if and only if the corresponding HIR will always match - /// valid UTF-8. - /// - /// When this returns false, then it is possible for this HIR expression to - /// match invalid UTF-8, including by matching between the code units of - /// a single UTF-8 encoded codepoint. - /// - /// Note that this returns true even when the corresponding HIR can match - /// the empty string. Since an empty string can technically appear between - /// UTF-8 code units, it is possible for a match to be reported that splits - /// a codepoint which could in turn be considered matching invalid UTF-8. - /// However, it is generally assumed that such empty matches are handled - /// specially by the search routine if it is absolutely required that - /// matches not split a codepoint. - /// - /// # Example - /// - /// This code example shows the UTF-8 property of a variety of patterns. - /// - /// ``` - /// use regex_syntax::{ParserBuilder, parse}; - /// - /// // Examples of 'is_utf8() == true'. - /// assert!(parse(r"a")?.properties().is_utf8()); - /// assert!(parse(r"[^a]")?.properties().is_utf8()); - /// assert!(parse(r".")?.properties().is_utf8()); - /// assert!(parse(r"\W")?.properties().is_utf8()); - /// assert!(parse(r"\b")?.properties().is_utf8()); - /// assert!(parse(r"\B")?.properties().is_utf8()); - /// assert!(parse(r"(?-u)\b")?.properties().is_utf8()); - /// assert!(parse(r"(?-u)\B")?.properties().is_utf8()); - /// // Unicode mode is enabled by default, and in - /// // that mode, all \x hex escapes are treated as - /// // codepoints. So this actually matches the UTF-8 - /// // encoding of U+00FF. - /// assert!(parse(r"\xFF")?.properties().is_utf8()); - /// - /// // Now we show examples of 'is_utf8() == false'. - /// // The only way to do this is to force the parser - /// // to permit invalid UTF-8, otherwise all of these - /// // would fail to parse! - /// let parse = |pattern| { - /// ParserBuilder::new().utf8(false).build().parse(pattern) - /// }; - /// assert!(!parse(r"(?-u)[^a]")?.properties().is_utf8()); - /// assert!(!parse(r"(?-u).")?.properties().is_utf8()); - /// assert!(!parse(r"(?-u)\W")?.properties().is_utf8()); - /// // Conversely to the equivalent example above, - /// // when Unicode mode is disabled, \x hex escapes - /// // are treated as their raw byte values. - /// assert!(!parse(r"(?-u)\xFF")?.properties().is_utf8()); - /// // Note that just because we disabled UTF-8 in the - /// // parser doesn't mean we still can't use Unicode. - /// // It is enabled by default, so \xFF is still - /// // equivalent to matching the UTF-8 encoding of - /// // U+00FF by default. - /// assert!(parse(r"\xFF")?.properties().is_utf8()); - /// // Even though we use raw bytes that individually - /// // are not valid UTF-8, when combined together, the - /// // overall expression *does* match valid UTF-8! - /// assert!(parse(r"(?-u)\xE2\x98\x83")?.properties().is_utf8()); - /// - /// # Ok::<(), Box>(()) - /// ``` - #[inline] - pub fn is_utf8(&self) -> bool { - self.0.utf8 - } - - /// Returns the total number of explicit capturing groups in the - /// corresponding HIR. - /// - /// Note that this does not include the implicit capturing group - /// corresponding to the entire match that is typically included by regex - /// engines. - /// - /// # Example - /// - /// This method will return `0` for `a` and `1` for `(a)`: - /// - /// ``` - /// use regex_syntax::parse; - /// - /// assert_eq!(0, parse("a")?.properties().explicit_captures_len()); - /// assert_eq!(1, parse("(a)")?.properties().explicit_captures_len()); - /// - /// # Ok::<(), Box>(()) - /// ``` - #[inline] - pub fn explicit_captures_len(&self) -> usize { - self.0.explicit_captures_len - } - - /// Returns the total number of explicit capturing groups that appear in - /// every possible match. - /// - /// If the number of capture groups can vary depending on the match, then - /// this returns `None`. That is, a value is only returned when the number - /// of matching groups is invariant or "static." - /// - /// Note that this does not include the implicit capturing group - /// corresponding to the entire match. - /// - /// # Example - /// - /// This shows a few cases where a static number of capture groups is - /// available and a few cases where it is not. - /// - /// ``` - /// use regex_syntax::parse; - /// - /// let len = |pattern| { - /// parse(pattern).map(|h| { - /// h.properties().static_explicit_captures_len() - /// }) - /// }; - /// - /// assert_eq!(Some(0), len("a")?); - /// assert_eq!(Some(1), len("(a)")?); - /// assert_eq!(Some(1), len("(a)|(b)")?); - /// assert_eq!(Some(2), len("(a)(b)|(c)(d)")?); - /// assert_eq!(None, len("(a)|b")?); - /// assert_eq!(None, len("a|(b)")?); - /// assert_eq!(None, len("(b)*")?); - /// assert_eq!(Some(1), len("(b)+")?); - /// - /// # Ok::<(), Box>(()) - /// ``` - #[inline] - pub fn static_explicit_captures_len(&self) -> Option { - self.0.static_explicit_captures_len - } - - /// Return true if and only if this HIR is a simple literal. This is - /// only true when this HIR expression is either itself a `Literal` or a - /// concatenation of only `Literal`s. - /// - /// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()` and - /// the empty string are not (even though they contain sub-expressions that - /// are literals). - #[inline] - pub fn is_literal(&self) -> bool { - self.0.literal - } - - /// Return true if and only if this HIR is either a simple literal or an - /// alternation of simple literals. This is only - /// true when this HIR expression is either itself a `Literal` or a - /// concatenation of only `Literal`s or an alternation of only `Literal`s. - /// - /// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternation - /// literals, but `f+`, `(foo)`, `foo()`, and the empty pattern are not - /// (even though that contain sub-expressions that are literals). - #[inline] - pub fn is_alternation_literal(&self) -> bool { - self.0.alternation_literal - } - - /// Returns the total amount of heap memory usage, in bytes, used by this - /// `Properties` value. - #[inline] - pub fn memory_usage(&self) -> usize { - core::mem::size_of::() - } - - /// Returns a new set of properties that corresponds to the union of the - /// iterator of properties given. - /// - /// This is useful when one has multiple `Hir` expressions and wants - /// to combine them into a single alternation without constructing the - /// corresponding `Hir`. This routine provides a way of combining the - /// properties of each `Hir` expression into one set of properties - /// representing the union of those expressions. - /// - /// # Example: union with HIRs that never match - /// - /// This example shows that unioning properties together with one that - /// represents a regex that never matches will "poison" certain attributes, - /// like the minimum and maximum lengths. - /// - /// ``` - /// use regex_syntax::{hir::Properties, parse}; - /// - /// let hir1 = parse("ab?c?")?; - /// assert_eq!(Some(1), hir1.properties().minimum_len()); - /// assert_eq!(Some(3), hir1.properties().maximum_len()); - /// - /// let hir2 = parse(r"[a&&b]")?; - /// assert_eq!(None, hir2.properties().minimum_len()); - /// assert_eq!(None, hir2.properties().maximum_len()); - /// - /// let hir3 = parse(r"wxy?z?")?; - /// assert_eq!(Some(2), hir3.properties().minimum_len()); - /// assert_eq!(Some(4), hir3.properties().maximum_len()); - /// - /// let unioned = Properties::union([ - /// hir1.properties(), - /// hir2.properties(), - /// hir3.properties(), - /// ]); - /// assert_eq!(None, unioned.minimum_len()); - /// assert_eq!(None, unioned.maximum_len()); - /// - /// # Ok::<(), Box>(()) - /// ``` - /// - /// The maximum length can also be "poisoned" by a pattern that has no - /// upper bound on the length of a match. The minimum length remains - /// unaffected: - /// - /// ``` - /// use regex_syntax::{hir::Properties, parse}; - /// - /// let hir1 = parse("ab?c?")?; - /// assert_eq!(Some(1), hir1.properties().minimum_len()); - /// assert_eq!(Some(3), hir1.properties().maximum_len()); - /// - /// let hir2 = parse(r"a+")?; - /// assert_eq!(Some(1), hir2.properties().minimum_len()); - /// assert_eq!(None, hir2.properties().maximum_len()); - /// - /// let hir3 = parse(r"wxy?z?")?; - /// assert_eq!(Some(2), hir3.properties().minimum_len()); - /// assert_eq!(Some(4), hir3.properties().maximum_len()); - /// - /// let unioned = Properties::union([ - /// hir1.properties(), - /// hir2.properties(), - /// hir3.properties(), - /// ]); - /// assert_eq!(Some(1), unioned.minimum_len()); - /// assert_eq!(None, unioned.maximum_len()); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn union(props: I) -> Properties - where - I: IntoIterator, - P: core::borrow::Borrow, - { - let mut it = props.into_iter().peekable(); - // While empty alternations aren't possible, we still behave as if they - // are. When we have an empty alternate, then clearly the look-around - // prefix and suffix is empty. Otherwise, it is the intersection of all - // prefixes and suffixes (respectively) of the branches. - let fix = if it.peek().is_none() { - LookSet::empty() - } else { - LookSet::full() - }; - // And also, an empty alternate means we have 0 static capture groups, - // but we otherwise start with the number corresponding to the first - // alternate. If any subsequent alternate has a different number of - // static capture groups, then we overall have a variation and not a - // static number of groups. - let static_explicit_captures_len = - it.peek().and_then(|p| p.borrow().static_explicit_captures_len()); - // The base case is an empty alternation, which matches nothing. - // Note though that empty alternations aren't possible, because the - // Hir::alternation smart constructor rewrites those as empty character - // classes. - let mut props = PropertiesI { - minimum_len: None, - maximum_len: None, - look_set: LookSet::empty(), - look_set_prefix: fix, - look_set_suffix: fix, - look_set_prefix_any: LookSet::empty(), - look_set_suffix_any: LookSet::empty(), - utf8: true, - explicit_captures_len: 0, - static_explicit_captures_len, - literal: false, - alternation_literal: true, - }; - let (mut min_poisoned, mut max_poisoned) = (false, false); - // Handle properties that need to visit every child hir. - for prop in it { - let p = prop.borrow(); - props.look_set.set_union(p.look_set()); - props.look_set_prefix.set_intersect(p.look_set_prefix()); - props.look_set_suffix.set_intersect(p.look_set_suffix()); - props.look_set_prefix_any.set_union(p.look_set_prefix_any()); - props.look_set_suffix_any.set_union(p.look_set_suffix_any()); - props.utf8 = props.utf8 && p.is_utf8(); - props.explicit_captures_len = props - .explicit_captures_len - .saturating_add(p.explicit_captures_len()); - if props.static_explicit_captures_len - != p.static_explicit_captures_len() - { - props.static_explicit_captures_len = None; - } - props.alternation_literal = - props.alternation_literal && p.is_literal(); - if !min_poisoned { - if let Some(xmin) = p.minimum_len() { - if props.minimum_len.map_or(true, |pmin| xmin < pmin) { - props.minimum_len = Some(xmin); - } - } else { - props.minimum_len = None; - min_poisoned = true; - } - } - if !max_poisoned { - if let Some(xmax) = p.maximum_len() { - if props.maximum_len.map_or(true, |pmax| xmax > pmax) { - props.maximum_len = Some(xmax); - } - } else { - props.maximum_len = None; - max_poisoned = true; - } - } - } - Properties(Box::new(props)) - } -} - -impl Properties { - /// Create a new set of HIR properties for an empty regex. - fn empty() -> Properties { - let inner = PropertiesI { - minimum_len: Some(0), - maximum_len: Some(0), - look_set: LookSet::empty(), - look_set_prefix: LookSet::empty(), - look_set_suffix: LookSet::empty(), - look_set_prefix_any: LookSet::empty(), - look_set_suffix_any: LookSet::empty(), - // It is debatable whether an empty regex always matches at valid - // UTF-8 boundaries. Strictly speaking, at a byte oriented view, - // it is clearly false. There are, for example, many empty strings - // between the bytes encoding a '☃'. - // - // However, when Unicode mode is enabled, the fundamental atom - // of matching is really a codepoint. And in that scenario, an - // empty regex is defined to only match at valid UTF-8 boundaries - // and to never split a codepoint. It just so happens that this - // enforcement is somewhat tricky to do for regexes that match - // the empty string inside regex engines themselves. It usually - // requires some layer above the regex engine to filter out such - // matches. - // - // In any case, 'true' is really the only coherent option. If it - // were false, for example, then 'a*' would also need to be false - // since it too can match the empty string. - utf8: true, - explicit_captures_len: 0, - static_explicit_captures_len: Some(0), - literal: false, - alternation_literal: false, - }; - Properties(Box::new(inner)) - } - - /// Create a new set of HIR properties for a literal regex. - fn literal(lit: &Literal) -> Properties { - let inner = PropertiesI { - minimum_len: Some(lit.0.len()), - maximum_len: Some(lit.0.len()), - look_set: LookSet::empty(), - look_set_prefix: LookSet::empty(), - look_set_suffix: LookSet::empty(), - look_set_prefix_any: LookSet::empty(), - look_set_suffix_any: LookSet::empty(), - utf8: core::str::from_utf8(&lit.0).is_ok(), - explicit_captures_len: 0, - static_explicit_captures_len: Some(0), - literal: true, - alternation_literal: true, - }; - Properties(Box::new(inner)) - } - - /// Create a new set of HIR properties for a character class. - fn class(class: &Class) -> Properties { - let inner = PropertiesI { - minimum_len: class.minimum_len(), - maximum_len: class.maximum_len(), - look_set: LookSet::empty(), - look_set_prefix: LookSet::empty(), - look_set_suffix: LookSet::empty(), - look_set_prefix_any: LookSet::empty(), - look_set_suffix_any: LookSet::empty(), - utf8: class.is_utf8(), - explicit_captures_len: 0, - static_explicit_captures_len: Some(0), - literal: false, - alternation_literal: false, - }; - Properties(Box::new(inner)) - } - - /// Create a new set of HIR properties for a look-around assertion. - fn look(look: Look) -> Properties { - let inner = PropertiesI { - minimum_len: Some(0), - maximum_len: Some(0), - look_set: LookSet::singleton(look), - look_set_prefix: LookSet::singleton(look), - look_set_suffix: LookSet::singleton(look), - look_set_prefix_any: LookSet::singleton(look), - look_set_suffix_any: LookSet::singleton(look), - // This requires a little explanation. Basically, we don't consider - // matching an empty string to be equivalent to matching invalid - // UTF-8, even though technically matching every empty string will - // split the UTF-8 encoding of a single codepoint when treating a - // UTF-8 encoded string as a sequence of bytes. Our defense here is - // that in such a case, a codepoint should logically be treated as - // the fundamental atom for matching, and thus the only valid match - // points are between codepoints and not bytes. - // - // More practically, this is true here because it's also true - // for 'Hir::empty()', otherwise something like 'a*' would be - // considered to match invalid UTF-8. That in turn makes this - // property borderline useless. - utf8: true, - explicit_captures_len: 0, - static_explicit_captures_len: Some(0), - literal: false, - alternation_literal: false, - }; - Properties(Box::new(inner)) - } - - /// Create a new set of HIR properties for a repetition. - fn repetition(rep: &Repetition) -> Properties { - let p = rep.sub.properties(); - let minimum_len = p.minimum_len().map(|child_min| { - let rep_min = usize::try_from(rep.min).unwrap_or(usize::MAX); - child_min.saturating_mul(rep_min) - }); - let maximum_len = rep.max.and_then(|rep_max| { - let rep_max = usize::try_from(rep_max).ok()?; - let child_max = p.maximum_len()?; - child_max.checked_mul(rep_max) - }); - - let mut inner = PropertiesI { - minimum_len, - maximum_len, - look_set: p.look_set(), - look_set_prefix: LookSet::empty(), - look_set_suffix: LookSet::empty(), - look_set_prefix_any: p.look_set_prefix_any(), - look_set_suffix_any: p.look_set_suffix_any(), - utf8: p.is_utf8(), - explicit_captures_len: p.explicit_captures_len(), - static_explicit_captures_len: p.static_explicit_captures_len(), - literal: false, - alternation_literal: false, - }; - // If the repetition operator can match the empty string, then its - // lookset prefix and suffixes themselves remain empty since they are - // no longer required to match. - if rep.min > 0 { - inner.look_set_prefix = p.look_set_prefix(); - inner.look_set_suffix = p.look_set_suffix(); - } - // If the static captures len of the sub-expression is not known or - // is greater than zero, then it automatically propagates to the - // repetition, regardless of the repetition. Otherwise, it might - // change, but only when the repetition can match 0 times. - if rep.min == 0 - && inner.static_explicit_captures_len.map_or(false, |len| len > 0) - { - // If we require a match 0 times, then our captures len is - // guaranteed to be zero. Otherwise, if we *can* match the empty - // string, then it's impossible to know how many captures will be - // in the resulting match. - if rep.max == Some(0) { - inner.static_explicit_captures_len = Some(0); - } else { - inner.static_explicit_captures_len = None; - } - } - Properties(Box::new(inner)) - } - - /// Create a new set of HIR properties for a capture. - fn capture(capture: &Capture) -> Properties { - let p = capture.sub.properties(); - Properties(Box::new(PropertiesI { - explicit_captures_len: p.explicit_captures_len().saturating_add(1), - static_explicit_captures_len: p - .static_explicit_captures_len() - .map(|len| len.saturating_add(1)), - literal: false, - alternation_literal: false, - ..*p.0.clone() - })) - } - - /// Create a new set of HIR properties for a concatenation. - fn concat(concat: &[Hir]) -> Properties { - // The base case is an empty concatenation, which matches the empty - // string. Note though that empty concatenations aren't possible, - // because the Hir::concat smart constructor rewrites those as - // Hir::empty. - let mut props = PropertiesI { - minimum_len: Some(0), - maximum_len: Some(0), - look_set: LookSet::empty(), - look_set_prefix: LookSet::empty(), - look_set_suffix: LookSet::empty(), - look_set_prefix_any: LookSet::empty(), - look_set_suffix_any: LookSet::empty(), - utf8: true, - explicit_captures_len: 0, - static_explicit_captures_len: Some(0), - literal: true, - alternation_literal: true, - }; - // Handle properties that need to visit every child hir. - for x in concat.iter() { - let p = x.properties(); - props.look_set.set_union(p.look_set()); - props.utf8 = props.utf8 && p.is_utf8(); - props.explicit_captures_len = props - .explicit_captures_len - .saturating_add(p.explicit_captures_len()); - props.static_explicit_captures_len = p - .static_explicit_captures_len() - .and_then(|len1| { - Some((len1, props.static_explicit_captures_len?)) - }) - .and_then(|(len1, len2)| Some(len1.saturating_add(len2))); - props.literal = props.literal && p.is_literal(); - props.alternation_literal = - props.alternation_literal && p.is_alternation_literal(); - if let Some(minimum_len) = props.minimum_len { - match p.minimum_len() { - None => props.minimum_len = None, - Some(len) => { - // We use saturating arithmetic here because the - // minimum is just a lower bound. We can't go any - // higher than what our number types permit. - props.minimum_len = - Some(minimum_len.saturating_add(len)); - } - } - } - if let Some(maximum_len) = props.maximum_len { - match p.maximum_len() { - None => props.maximum_len = None, - Some(len) => { - props.maximum_len = maximum_len.checked_add(len) - } - } - } - } - // Handle the prefix properties, which only requires visiting - // child exprs until one matches more than the empty string. - let mut it = concat.iter(); - while let Some(x) = it.next() { - props.look_set_prefix.set_union(x.properties().look_set_prefix()); - props - .look_set_prefix_any - .set_union(x.properties().look_set_prefix_any()); - if x.properties().maximum_len().map_or(true, |x| x > 0) { - break; - } - } - // Same thing for the suffix properties, but in reverse. - let mut it = concat.iter().rev(); - while let Some(x) = it.next() { - props.look_set_suffix.set_union(x.properties().look_set_suffix()); - props - .look_set_suffix_any - .set_union(x.properties().look_set_suffix_any()); - if x.properties().maximum_len().map_or(true, |x| x > 0) { - break; - } - } - Properties(Box::new(props)) - } - - /// Create a new set of HIR properties for a concatenation. - fn alternation(alts: &[Hir]) -> Properties { - Properties::union(alts.iter().map(|hir| hir.properties())) - } -} - -/// A set of look-around assertions. -/// -/// This is useful for efficiently tracking look-around assertions. For -/// example, an [`Hir`] provides properties that return `LookSet`s. -#[derive(Clone, Copy, Default, Eq, PartialEq)] -pub struct LookSet { - /// The underlying representation this set is exposed to make it possible - /// to store it somewhere efficiently. The representation is that - /// of a bitset, where each assertion occupies bit `i` where `i = - /// Look::as_repr()`. - /// - /// Note that users of this internal representation must permit the full - /// range of `u16` values to be represented. For example, even if the - /// current implementation only makes use of the 10 least significant bits, - /// it may use more bits in a future semver compatible release. - pub bits: u32, -} - -impl LookSet { - /// Create an empty set of look-around assertions. - #[inline] - pub fn empty() -> LookSet { - LookSet { bits: 0 } - } - - /// Create a full set of look-around assertions. - /// - /// This set contains all possible look-around assertions. - #[inline] - pub fn full() -> LookSet { - LookSet { bits: !0 } - } - - /// Create a look-around set containing the look-around assertion given. - /// - /// This is a convenience routine for creating an empty set and inserting - /// one look-around assertions. - #[inline] - pub fn singleton(look: Look) -> LookSet { - LookSet::empty().insert(look) - } - - /// Returns the total number of look-around assertions in this set. - #[inline] - pub fn len(self) -> usize { - // OK because max value always fits in a u8, which in turn always - // fits in a usize, regardless of target. - usize::try_from(self.bits.count_ones()).unwrap() - } - - /// Returns true if and only if this set is empty. - #[inline] - pub fn is_empty(self) -> bool { - self.len() == 0 - } - - /// Returns true if and only if the given look-around assertion is in this - /// set. - #[inline] - pub fn contains(self, look: Look) -> bool { - self.bits & look.as_repr() != 0 - } - - /// Returns true if and only if this set contains any anchor assertions. - /// This includes both "start/end of haystack" and "start/end of line." - #[inline] - pub fn contains_anchor(&self) -> bool { - self.contains_anchor_haystack() || self.contains_anchor_line() - } - - /// Returns true if and only if this set contains any "start/end of - /// haystack" anchors. This doesn't include "start/end of line" anchors. - #[inline] - pub fn contains_anchor_haystack(&self) -> bool { - self.contains(Look::Start) || self.contains(Look::End) - } - - /// Returns true if and only if this set contains any "start/end of line" - /// anchors. This doesn't include "start/end of haystack" anchors. This - /// includes both `\n` line anchors and CRLF (`\r\n`) aware line anchors. - #[inline] - pub fn contains_anchor_line(&self) -> bool { - self.contains(Look::StartLF) - || self.contains(Look::EndLF) - || self.contains(Look::StartCRLF) - || self.contains(Look::EndCRLF) - } - - /// Returns true if and only if this set contains any "start/end of line" - /// anchors that only treat `\n` as line terminators. This does not include - /// haystack anchors or CRLF aware line anchors. - #[inline] - pub fn contains_anchor_lf(&self) -> bool { - self.contains(Look::StartLF) || self.contains(Look::EndLF) - } - - /// Returns true if and only if this set contains any "start/end of line" - /// anchors that are CRLF-aware. This doesn't include "start/end of - /// haystack" or "start/end of line-feed" anchors. - #[inline] - pub fn contains_anchor_crlf(&self) -> bool { - self.contains(Look::StartCRLF) || self.contains(Look::EndCRLF) - } - - /// Returns true if and only if this set contains any word boundary or - /// negated word boundary assertions. This include both Unicode and ASCII - /// word boundaries. - #[inline] - pub fn contains_word(self) -> bool { - self.contains_word_unicode() || self.contains_word_ascii() - } - - /// Returns true if and only if this set contains any Unicode word boundary - /// or negated Unicode word boundary assertions. - #[inline] - pub fn contains_word_unicode(self) -> bool { - self.contains(Look::WordUnicode) - || self.contains(Look::WordUnicodeNegate) - || self.contains(Look::WordStartUnicode) - || self.contains(Look::WordEndUnicode) - || self.contains(Look::WordStartHalfUnicode) - || self.contains(Look::WordEndHalfUnicode) - } - - /// Returns true if and only if this set contains any ASCII word boundary - /// or negated ASCII word boundary assertions. - #[inline] - pub fn contains_word_ascii(self) -> bool { - self.contains(Look::WordAscii) - || self.contains(Look::WordAsciiNegate) - || self.contains(Look::WordStartAscii) - || self.contains(Look::WordEndAscii) - || self.contains(Look::WordStartHalfAscii) - || self.contains(Look::WordEndHalfAscii) - } - - /// Returns an iterator over all of the look-around assertions in this set. - #[inline] - pub fn iter(self) -> LookSetIter { - LookSetIter { set: self } - } - - /// Return a new set that is equivalent to the original, but with the given - /// assertion added to it. If the assertion is already in the set, then the - /// returned set is equivalent to the original. - #[inline] - pub fn insert(self, look: Look) -> LookSet { - LookSet { bits: self.bits | look.as_repr() } - } - - /// Updates this set in place with the result of inserting the given - /// assertion into this set. - #[inline] - pub fn set_insert(&mut self, look: Look) { - *self = self.insert(look); - } - - /// Return a new set that is equivalent to the original, but with the given - /// assertion removed from it. If the assertion is not in the set, then the - /// returned set is equivalent to the original. - #[inline] - pub fn remove(self, look: Look) -> LookSet { - LookSet { bits: self.bits & !look.as_repr() } - } - - /// Updates this set in place with the result of removing the given - /// assertion from this set. - #[inline] - pub fn set_remove(&mut self, look: Look) { - *self = self.remove(look); - } - - /// Returns a new set that is the result of subtracting the given set from - /// this set. - #[inline] - pub fn subtract(self, other: LookSet) -> LookSet { - LookSet { bits: self.bits & !other.bits } - } - - /// Updates this set in place with the result of subtracting the given set - /// from this set. - #[inline] - pub fn set_subtract(&mut self, other: LookSet) { - *self = self.subtract(other); - } - - /// Returns a new set that is the union of this and the one given. - #[inline] - pub fn union(self, other: LookSet) -> LookSet { - LookSet { bits: self.bits | other.bits } - } - - /// Updates this set in place with the result of unioning it with the one - /// given. - #[inline] - pub fn set_union(&mut self, other: LookSet) { - *self = self.union(other); - } - - /// Returns a new set that is the intersection of this and the one given. - #[inline] - pub fn intersect(self, other: LookSet) -> LookSet { - LookSet { bits: self.bits & other.bits } - } - - /// Updates this set in place with the result of intersecting it with the - /// one given. - #[inline] - pub fn set_intersect(&mut self, other: LookSet) { - *self = self.intersect(other); - } - - /// Return a `LookSet` from the slice given as a native endian 32-bit - /// integer. - /// - /// # Panics - /// - /// This panics if `slice.len() < 4`. - #[inline] - pub fn read_repr(slice: &[u8]) -> LookSet { - let bits = u32::from_ne_bytes(slice[..4].try_into().unwrap()); - LookSet { bits } - } - - /// Write a `LookSet` as a native endian 32-bit integer to the beginning - /// of the slice given. - /// - /// # Panics - /// - /// This panics if `slice.len() < 4`. - #[inline] - pub fn write_repr(self, slice: &mut [u8]) { - let raw = self.bits.to_ne_bytes(); - slice[0] = raw[0]; - slice[1] = raw[1]; - slice[2] = raw[2]; - slice[3] = raw[3]; - } -} - -impl core::fmt::Debug for LookSet { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - if self.is_empty() { - return write!(f, "∅"); - } - for look in self.iter() { - write!(f, "{}", look.as_char())?; - } - Ok(()) - } -} - -/// An iterator over all look-around assertions in a [`LookSet`]. -/// -/// This iterator is created by [`LookSet::iter`]. -#[derive(Clone, Debug)] -pub struct LookSetIter { - set: LookSet, -} - -impl Iterator for LookSetIter { - type Item = Look; - - #[inline] - fn next(&mut self) -> Option { - if self.set.is_empty() { - return None; - } - // We'll never have more than u8::MAX distinct look-around assertions, - // so 'bit' will always fit into a u16. - let bit = u16::try_from(self.set.bits.trailing_zeros()).unwrap(); - let look = Look::from_repr(1 << bit)?; - self.set = self.set.remove(look); - Some(look) - } -} - -/// Given a sequence of HIR values where each value corresponds to a Unicode -/// class (or an all-ASCII byte class), return a single Unicode class -/// corresponding to the union of the classes found. -fn class_chars(hirs: &[Hir]) -> Option { - let mut cls = ClassUnicode::new(vec![]); - for hir in hirs.iter() { - match *hir.kind() { - HirKind::Class(Class::Unicode(ref cls2)) => { - cls.union(cls2); - } - HirKind::Class(Class::Bytes(ref cls2)) => { - cls.union(&cls2.to_unicode_class()?); - } - _ => return None, - }; - } - Some(Class::Unicode(cls)) -} - -/// Given a sequence of HIR values where each value corresponds to a byte class -/// (or an all-ASCII Unicode class), return a single byte class corresponding -/// to the union of the classes found. -fn class_bytes(hirs: &[Hir]) -> Option { - let mut cls = ClassBytes::new(vec![]); - for hir in hirs.iter() { - match *hir.kind() { - HirKind::Class(Class::Unicode(ref cls2)) => { - cls.union(&cls2.to_byte_class()?); - } - HirKind::Class(Class::Bytes(ref cls2)) => { - cls.union(cls2); - } - _ => return None, - }; - } - Some(Class::Bytes(cls)) -} - -/// Given a sequence of HIR values where each value corresponds to a literal -/// that is a single `char`, return that sequence of `char`s. Otherwise return -/// None. No deduplication is done. -fn singleton_chars(hirs: &[Hir]) -> Option> { - let mut singletons = vec![]; - for hir in hirs.iter() { - let literal = match *hir.kind() { - HirKind::Literal(Literal(ref bytes)) => bytes, - _ => return None, - }; - let ch = match crate::debug::utf8_decode(literal) { - None => return None, - Some(Err(_)) => return None, - Some(Ok(ch)) => ch, - }; - if literal.len() != ch.len_utf8() { - return None; - } - singletons.push(ch); - } - Some(singletons) -} - -/// Given a sequence of HIR values where each value corresponds to a literal -/// that is a single byte, return that sequence of bytes. Otherwise return -/// None. No deduplication is done. -fn singleton_bytes(hirs: &[Hir]) -> Option> { - let mut singletons = vec![]; - for hir in hirs.iter() { - let literal = match *hir.kind() { - HirKind::Literal(Literal(ref bytes)) => bytes, - _ => return None, - }; - if literal.len() != 1 { - return None; - } - singletons.push(literal[0]); - } - Some(singletons) -} - -/// Looks for a common prefix in the list of alternation branches given. If one -/// is found, then an equivalent but (hopefully) simplified Hir is returned. -/// Otherwise, the original given list of branches is returned unmodified. -/// -/// This is not quite as good as it could be. Right now, it requires that -/// all branches are 'Concat' expressions. It also doesn't do well with -/// literals. For example, given 'foofoo|foobar', it will not refactor it to -/// 'foo(?:foo|bar)' because literals are flattened into their own special -/// concatenation. (One wonders if perhaps 'Literal' should be a single atom -/// instead of a string of bytes because of this. Otherwise, handling the -/// current representation in this routine will be pretty gnarly. Sigh.) -fn lift_common_prefix(hirs: Vec) -> Result> { - if hirs.len() <= 1 { - return Err(hirs); - } - let mut prefix = match hirs[0].kind() { - HirKind::Concat(ref xs) => &**xs, - _ => return Err(hirs), - }; - if prefix.is_empty() { - return Err(hirs); - } - for h in hirs.iter().skip(1) { - let concat = match h.kind() { - HirKind::Concat(ref xs) => xs, - _ => return Err(hirs), - }; - let common_len = prefix - .iter() - .zip(concat.iter()) - .take_while(|(x, y)| x == y) - .count(); - prefix = &prefix[..common_len]; - if prefix.is_empty() { - return Err(hirs); - } - } - let len = prefix.len(); - assert_ne!(0, len); - let mut prefix_concat = vec![]; - let mut suffix_alts = vec![]; - for h in hirs { - let mut concat = match h.into_kind() { - HirKind::Concat(xs) => xs, - // We required all sub-expressions to be - // concats above, so we're only here if we - // have a concat. - _ => unreachable!(), - }; - suffix_alts.push(Hir::concat(concat.split_off(len))); - if prefix_concat.is_empty() { - prefix_concat = concat; - } - } - let mut concat = prefix_concat; - concat.push(Hir::alternation(suffix_alts)); - Ok(Hir::concat(concat)) -} - -#[cfg(test)] -mod tests { - use super::*; - - fn uclass(ranges: &[(char, char)]) -> ClassUnicode { - let ranges: Vec = ranges - .iter() - .map(|&(s, e)| ClassUnicodeRange::new(s, e)) - .collect(); - ClassUnicode::new(ranges) - } - - fn bclass(ranges: &[(u8, u8)]) -> ClassBytes { - let ranges: Vec = - ranges.iter().map(|&(s, e)| ClassBytesRange::new(s, e)).collect(); - ClassBytes::new(ranges) - } - - fn uranges(cls: &ClassUnicode) -> Vec<(char, char)> { - cls.iter().map(|x| (x.start(), x.end())).collect() - } - - #[cfg(feature = "unicode-case")] - fn ucasefold(cls: &ClassUnicode) -> ClassUnicode { - let mut cls_ = cls.clone(); - cls_.case_fold_simple(); - cls_ - } - - fn uunion(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode { - let mut cls_ = cls1.clone(); - cls_.union(cls2); - cls_ - } - - fn uintersect(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode { - let mut cls_ = cls1.clone(); - cls_.intersect(cls2); - cls_ - } - - fn udifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode { - let mut cls_ = cls1.clone(); - cls_.difference(cls2); - cls_ - } - - fn usymdifference( - cls1: &ClassUnicode, - cls2: &ClassUnicode, - ) -> ClassUnicode { - let mut cls_ = cls1.clone(); - cls_.symmetric_difference(cls2); - cls_ - } - - fn unegate(cls: &ClassUnicode) -> ClassUnicode { - let mut cls_ = cls.clone(); - cls_.negate(); - cls_ - } - - fn branges(cls: &ClassBytes) -> Vec<(u8, u8)> { - cls.iter().map(|x| (x.start(), x.end())).collect() - } - - fn bcasefold(cls: &ClassBytes) -> ClassBytes { - let mut cls_ = cls.clone(); - cls_.case_fold_simple(); - cls_ - } - - fn bunion(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes { - let mut cls_ = cls1.clone(); - cls_.union(cls2); - cls_ - } - - fn bintersect(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes { - let mut cls_ = cls1.clone(); - cls_.intersect(cls2); - cls_ - } - - fn bdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes { - let mut cls_ = cls1.clone(); - cls_.difference(cls2); - cls_ - } - - fn bsymdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes { - let mut cls_ = cls1.clone(); - cls_.symmetric_difference(cls2); - cls_ - } - - fn bnegate(cls: &ClassBytes) -> ClassBytes { - let mut cls_ = cls.clone(); - cls_.negate(); - cls_ - } - - #[test] - fn class_range_canonical_unicode() { - let range = ClassUnicodeRange::new('\u{00FF}', '\0'); - assert_eq!('\0', range.start()); - assert_eq!('\u{00FF}', range.end()); - } - - #[test] - fn class_range_canonical_bytes() { - let range = ClassBytesRange::new(b'\xFF', b'\0'); - assert_eq!(b'\0', range.start()); - assert_eq!(b'\xFF', range.end()); - } - - #[test] - fn class_canonicalize_unicode() { - let cls = uclass(&[('a', 'c'), ('x', 'z')]); - let expected = vec![('a', 'c'), ('x', 'z')]; - assert_eq!(expected, uranges(&cls)); - - let cls = uclass(&[('x', 'z'), ('a', 'c')]); - let expected = vec![('a', 'c'), ('x', 'z')]; - assert_eq!(expected, uranges(&cls)); - - let cls = uclass(&[('x', 'z'), ('w', 'y')]); - let expected = vec![('w', 'z')]; - assert_eq!(expected, uranges(&cls)); - - let cls = uclass(&[ - ('c', 'f'), - ('a', 'g'), - ('d', 'j'), - ('a', 'c'), - ('m', 'p'), - ('l', 's'), - ]); - let expected = vec![('a', 'j'), ('l', 's')]; - assert_eq!(expected, uranges(&cls)); - - let cls = uclass(&[('x', 'z'), ('u', 'w')]); - let expected = vec![('u', 'z')]; - assert_eq!(expected, uranges(&cls)); - - let cls = uclass(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]); - let expected = vec![('\x00', '\u{10FFFF}')]; - assert_eq!(expected, uranges(&cls)); - - let cls = uclass(&[('a', 'a'), ('b', 'b')]); - let expected = vec![('a', 'b')]; - assert_eq!(expected, uranges(&cls)); - } - - #[test] - fn class_canonicalize_bytes() { - let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]); - let expected = vec![(b'a', b'c'), (b'x', b'z')]; - assert_eq!(expected, branges(&cls)); - - let cls = bclass(&[(b'x', b'z'), (b'a', b'c')]); - let expected = vec![(b'a', b'c'), (b'x', b'z')]; - assert_eq!(expected, branges(&cls)); - - let cls = bclass(&[(b'x', b'z'), (b'w', b'y')]); - let expected = vec![(b'w', b'z')]; - assert_eq!(expected, branges(&cls)); - - let cls = bclass(&[ - (b'c', b'f'), - (b'a', b'g'), - (b'd', b'j'), - (b'a', b'c'), - (b'm', b'p'), - (b'l', b's'), - ]); - let expected = vec![(b'a', b'j'), (b'l', b's')]; - assert_eq!(expected, branges(&cls)); - - let cls = bclass(&[(b'x', b'z'), (b'u', b'w')]); - let expected = vec![(b'u', b'z')]; - assert_eq!(expected, branges(&cls)); - - let cls = bclass(&[(b'\x00', b'\xFF'), (b'\x00', b'\xFF')]); - let expected = vec![(b'\x00', b'\xFF')]; - assert_eq!(expected, branges(&cls)); - - let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]); - let expected = vec![(b'a', b'b')]; - assert_eq!(expected, branges(&cls)); - } - - #[test] - #[cfg(feature = "unicode-case")] - fn class_case_fold_unicode() { - let cls = uclass(&[ - ('C', 'F'), - ('A', 'G'), - ('D', 'J'), - ('A', 'C'), - ('M', 'P'), - ('L', 'S'), - ('c', 'f'), - ]); - let expected = uclass(&[ - ('A', 'J'), - ('L', 'S'), - ('a', 'j'), - ('l', 's'), - ('\u{17F}', '\u{17F}'), - ]); - assert_eq!(expected, ucasefold(&cls)); - - let cls = uclass(&[('A', 'Z')]); - let expected = uclass(&[ - ('A', 'Z'), - ('a', 'z'), - ('\u{17F}', '\u{17F}'), - ('\u{212A}', '\u{212A}'), - ]); - assert_eq!(expected, ucasefold(&cls)); - - let cls = uclass(&[('a', 'z')]); - let expected = uclass(&[ - ('A', 'Z'), - ('a', 'z'), - ('\u{17F}', '\u{17F}'), - ('\u{212A}', '\u{212A}'), - ]); - assert_eq!(expected, ucasefold(&cls)); - - let cls = uclass(&[('A', 'A'), ('_', '_')]); - let expected = uclass(&[('A', 'A'), ('_', '_'), ('a', 'a')]); - assert_eq!(expected, ucasefold(&cls)); - - let cls = uclass(&[('A', 'A'), ('=', '=')]); - let expected = uclass(&[('=', '='), ('A', 'A'), ('a', 'a')]); - assert_eq!(expected, ucasefold(&cls)); - - let cls = uclass(&[('\x00', '\x10')]); - assert_eq!(cls, ucasefold(&cls)); - - let cls = uclass(&[('k', 'k')]); - let expected = - uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}')]); - assert_eq!(expected, ucasefold(&cls)); - - let cls = uclass(&[('@', '@')]); - assert_eq!(cls, ucasefold(&cls)); - } - - #[test] - #[cfg(not(feature = "unicode-case"))] - fn class_case_fold_unicode_disabled() { - let mut cls = uclass(&[ - ('C', 'F'), - ('A', 'G'), - ('D', 'J'), - ('A', 'C'), - ('M', 'P'), - ('L', 'S'), - ('c', 'f'), - ]); - assert!(cls.try_case_fold_simple().is_err()); - } - - #[test] - #[should_panic] - #[cfg(not(feature = "unicode-case"))] - fn class_case_fold_unicode_disabled_panics() { - let mut cls = uclass(&[ - ('C', 'F'), - ('A', 'G'), - ('D', 'J'), - ('A', 'C'), - ('M', 'P'), - ('L', 'S'), - ('c', 'f'), - ]); - cls.case_fold_simple(); - } - - #[test] - fn class_case_fold_bytes() { - let cls = bclass(&[ - (b'C', b'F'), - (b'A', b'G'), - (b'D', b'J'), - (b'A', b'C'), - (b'M', b'P'), - (b'L', b'S'), - (b'c', b'f'), - ]); - let expected = - bclass(&[(b'A', b'J'), (b'L', b'S'), (b'a', b'j'), (b'l', b's')]); - assert_eq!(expected, bcasefold(&cls)); - - let cls = bclass(&[(b'A', b'Z')]); - let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]); - assert_eq!(expected, bcasefold(&cls)); - - let cls = bclass(&[(b'a', b'z')]); - let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]); - assert_eq!(expected, bcasefold(&cls)); - - let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]); - let expected = bclass(&[(b'A', b'A'), (b'_', b'_'), (b'a', b'a')]); - assert_eq!(expected, bcasefold(&cls)); - - let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]); - let expected = bclass(&[(b'=', b'='), (b'A', b'A'), (b'a', b'a')]); - assert_eq!(expected, bcasefold(&cls)); - - let cls = bclass(&[(b'\x00', b'\x10')]); - assert_eq!(cls, bcasefold(&cls)); - - let cls = bclass(&[(b'k', b'k')]); - let expected = bclass(&[(b'K', b'K'), (b'k', b'k')]); - assert_eq!(expected, bcasefold(&cls)); - - let cls = bclass(&[(b'@', b'@')]); - assert_eq!(cls, bcasefold(&cls)); - } - - #[test] - fn class_negate_unicode() { - let cls = uclass(&[('a', 'a')]); - let expected = uclass(&[('\x00', '\x60'), ('\x62', '\u{10FFFF}')]); - assert_eq!(expected, unegate(&cls)); - - let cls = uclass(&[('a', 'a'), ('b', 'b')]); - let expected = uclass(&[('\x00', '\x60'), ('\x63', '\u{10FFFF}')]); - assert_eq!(expected, unegate(&cls)); - - let cls = uclass(&[('a', 'c'), ('x', 'z')]); - let expected = uclass(&[ - ('\x00', '\x60'), - ('\x64', '\x77'), - ('\x7B', '\u{10FFFF}'), - ]); - assert_eq!(expected, unegate(&cls)); - - let cls = uclass(&[('\x00', 'a')]); - let expected = uclass(&[('\x62', '\u{10FFFF}')]); - assert_eq!(expected, unegate(&cls)); - - let cls = uclass(&[('a', '\u{10FFFF}')]); - let expected = uclass(&[('\x00', '\x60')]); - assert_eq!(expected, unegate(&cls)); - - let cls = uclass(&[('\x00', '\u{10FFFF}')]); - let expected = uclass(&[]); - assert_eq!(expected, unegate(&cls)); - - let cls = uclass(&[]); - let expected = uclass(&[('\x00', '\u{10FFFF}')]); - assert_eq!(expected, unegate(&cls)); - - let cls = - uclass(&[('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')]); - let expected = uclass(&[('\u{10FFFE}', '\u{10FFFE}')]); - assert_eq!(expected, unegate(&cls)); - - let cls = uclass(&[('\x00', '\u{D7FF}')]); - let expected = uclass(&[('\u{E000}', '\u{10FFFF}')]); - assert_eq!(expected, unegate(&cls)); - - let cls = uclass(&[('\x00', '\u{D7FE}')]); - let expected = uclass(&[('\u{D7FF}', '\u{10FFFF}')]); - assert_eq!(expected, unegate(&cls)); - - let cls = uclass(&[('\u{E000}', '\u{10FFFF}')]); - let expected = uclass(&[('\x00', '\u{D7FF}')]); - assert_eq!(expected, unegate(&cls)); - - let cls = uclass(&[('\u{E001}', '\u{10FFFF}')]); - let expected = uclass(&[('\x00', '\u{E000}')]); - assert_eq!(expected, unegate(&cls)); - } - - #[test] - fn class_negate_bytes() { - let cls = bclass(&[(b'a', b'a')]); - let expected = bclass(&[(b'\x00', b'\x60'), (b'\x62', b'\xFF')]); - assert_eq!(expected, bnegate(&cls)); - - let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]); - let expected = bclass(&[(b'\x00', b'\x60'), (b'\x63', b'\xFF')]); - assert_eq!(expected, bnegate(&cls)); - - let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]); - let expected = bclass(&[ - (b'\x00', b'\x60'), - (b'\x64', b'\x77'), - (b'\x7B', b'\xFF'), - ]); - assert_eq!(expected, bnegate(&cls)); - - let cls = bclass(&[(b'\x00', b'a')]); - let expected = bclass(&[(b'\x62', b'\xFF')]); - assert_eq!(expected, bnegate(&cls)); - - let cls = bclass(&[(b'a', b'\xFF')]); - let expected = bclass(&[(b'\x00', b'\x60')]); - assert_eq!(expected, bnegate(&cls)); - - let cls = bclass(&[(b'\x00', b'\xFF')]); - let expected = bclass(&[]); - assert_eq!(expected, bnegate(&cls)); - - let cls = bclass(&[]); - let expected = bclass(&[(b'\x00', b'\xFF')]); - assert_eq!(expected, bnegate(&cls)); - - let cls = bclass(&[(b'\x00', b'\xFD'), (b'\xFF', b'\xFF')]); - let expected = bclass(&[(b'\xFE', b'\xFE')]); - assert_eq!(expected, bnegate(&cls)); - } - - #[test] - fn class_union_unicode() { - let cls1 = uclass(&[('a', 'g'), ('m', 't'), ('A', 'C')]); - let cls2 = uclass(&[('a', 'z')]); - let expected = uclass(&[('a', 'z'), ('A', 'C')]); - assert_eq!(expected, uunion(&cls1, &cls2)); - } - - #[test] - fn class_union_bytes() { - let cls1 = bclass(&[(b'a', b'g'), (b'm', b't'), (b'A', b'C')]); - let cls2 = bclass(&[(b'a', b'z')]); - let expected = bclass(&[(b'a', b'z'), (b'A', b'C')]); - assert_eq!(expected, bunion(&cls1, &cls2)); - } - - #[test] - fn class_intersect_unicode() { - let cls1 = uclass(&[]); - let cls2 = uclass(&[('a', 'a')]); - let expected = uclass(&[]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'a')]); - let cls2 = uclass(&[('a', 'a')]); - let expected = uclass(&[('a', 'a')]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'a')]); - let cls2 = uclass(&[('b', 'b')]); - let expected = uclass(&[]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'a')]); - let cls2 = uclass(&[('a', 'c')]); - let expected = uclass(&[('a', 'a')]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'b')]); - let cls2 = uclass(&[('a', 'c')]); - let expected = uclass(&[('a', 'b')]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'b')]); - let cls2 = uclass(&[('b', 'c')]); - let expected = uclass(&[('b', 'b')]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'b')]); - let cls2 = uclass(&[('c', 'd')]); - let expected = uclass(&[]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - - let cls1 = uclass(&[('b', 'c')]); - let cls2 = uclass(&[('a', 'd')]); - let expected = uclass(&[('b', 'c')]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]); - let cls2 = uclass(&[('a', 'h')]); - let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]); - let cls2 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]); - let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'b'), ('g', 'h')]); - let cls2 = uclass(&[('d', 'e'), ('k', 'l')]); - let expected = uclass(&[]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]); - let cls2 = uclass(&[('h', 'h')]); - let expected = uclass(&[('h', 'h')]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'b'), ('e', 'f'), ('i', 'j')]); - let cls2 = uclass(&[('c', 'd'), ('g', 'h'), ('k', 'l')]); - let expected = uclass(&[]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'b'), ('c', 'd'), ('e', 'f')]); - let cls2 = uclass(&[('b', 'c'), ('d', 'e'), ('f', 'g')]); - let expected = uclass(&[('b', 'f')]); - assert_eq!(expected, uintersect(&cls1, &cls2)); - } - - #[test] - fn class_intersect_bytes() { - let cls1 = bclass(&[]); - let cls2 = bclass(&[(b'a', b'a')]); - let expected = bclass(&[]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'a')]); - let cls2 = bclass(&[(b'a', b'a')]); - let expected = bclass(&[(b'a', b'a')]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'a')]); - let cls2 = bclass(&[(b'b', b'b')]); - let expected = bclass(&[]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'a')]); - let cls2 = bclass(&[(b'a', b'c')]); - let expected = bclass(&[(b'a', b'a')]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'b')]); - let cls2 = bclass(&[(b'a', b'c')]); - let expected = bclass(&[(b'a', b'b')]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'b')]); - let cls2 = bclass(&[(b'b', b'c')]); - let expected = bclass(&[(b'b', b'b')]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'b')]); - let cls2 = bclass(&[(b'c', b'd')]); - let expected = bclass(&[]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - - let cls1 = bclass(&[(b'b', b'c')]); - let cls2 = bclass(&[(b'a', b'd')]); - let expected = bclass(&[(b'b', b'c')]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]); - let cls2 = bclass(&[(b'a', b'h')]); - let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]); - let cls2 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]); - let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'b'), (b'g', b'h')]); - let cls2 = bclass(&[(b'd', b'e'), (b'k', b'l')]); - let expected = bclass(&[]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]); - let cls2 = bclass(&[(b'h', b'h')]); - let expected = bclass(&[(b'h', b'h')]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'b'), (b'e', b'f'), (b'i', b'j')]); - let cls2 = bclass(&[(b'c', b'd'), (b'g', b'h'), (b'k', b'l')]); - let expected = bclass(&[]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]); - let cls2 = bclass(&[(b'b', b'c'), (b'd', b'e'), (b'f', b'g')]); - let expected = bclass(&[(b'b', b'f')]); - assert_eq!(expected, bintersect(&cls1, &cls2)); - } - - #[test] - fn class_difference_unicode() { - let cls1 = uclass(&[('a', 'a')]); - let cls2 = uclass(&[('a', 'a')]); - let expected = uclass(&[]); - assert_eq!(expected, udifference(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'a')]); - let cls2 = uclass(&[]); - let expected = uclass(&[('a', 'a')]); - assert_eq!(expected, udifference(&cls1, &cls2)); - - let cls1 = uclass(&[]); - let cls2 = uclass(&[('a', 'a')]); - let expected = uclass(&[]); - assert_eq!(expected, udifference(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'z')]); - let cls2 = uclass(&[('a', 'a')]); - let expected = uclass(&[('b', 'z')]); - assert_eq!(expected, udifference(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'z')]); - let cls2 = uclass(&[('z', 'z')]); - let expected = uclass(&[('a', 'y')]); - assert_eq!(expected, udifference(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'z')]); - let cls2 = uclass(&[('m', 'm')]); - let expected = uclass(&[('a', 'l'), ('n', 'z')]); - assert_eq!(expected, udifference(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]); - let cls2 = uclass(&[('a', 'z')]); - let expected = uclass(&[]); - assert_eq!(expected, udifference(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]); - let cls2 = uclass(&[('d', 'v')]); - let expected = uclass(&[('a', 'c')]); - assert_eq!(expected, udifference(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]); - let cls2 = uclass(&[('b', 'g'), ('s', 'u')]); - let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]); - assert_eq!(expected, udifference(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]); - let cls2 = uclass(&[('b', 'd'), ('e', 'g'), ('s', 'u')]); - let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]); - assert_eq!(expected, udifference(&cls1, &cls2)); - - let cls1 = uclass(&[('x', 'z')]); - let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]); - let expected = uclass(&[('x', 'z')]); - assert_eq!(expected, udifference(&cls1, &cls2)); - - let cls1 = uclass(&[('a', 'z')]); - let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]); - let expected = uclass(&[('d', 'd'), ('h', 'r'), ('v', 'z')]); - assert_eq!(expected, udifference(&cls1, &cls2)); - } - - #[test] - fn class_difference_bytes() { - let cls1 = bclass(&[(b'a', b'a')]); - let cls2 = bclass(&[(b'a', b'a')]); - let expected = bclass(&[]); - assert_eq!(expected, bdifference(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'a')]); - let cls2 = bclass(&[]); - let expected = bclass(&[(b'a', b'a')]); - assert_eq!(expected, bdifference(&cls1, &cls2)); - - let cls1 = bclass(&[]); - let cls2 = bclass(&[(b'a', b'a')]); - let expected = bclass(&[]); - assert_eq!(expected, bdifference(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'z')]); - let cls2 = bclass(&[(b'a', b'a')]); - let expected = bclass(&[(b'b', b'z')]); - assert_eq!(expected, bdifference(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'z')]); - let cls2 = bclass(&[(b'z', b'z')]); - let expected = bclass(&[(b'a', b'y')]); - assert_eq!(expected, bdifference(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'z')]); - let cls2 = bclass(&[(b'm', b'm')]); - let expected = bclass(&[(b'a', b'l'), (b'n', b'z')]); - assert_eq!(expected, bdifference(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]); - let cls2 = bclass(&[(b'a', b'z')]); - let expected = bclass(&[]); - assert_eq!(expected, bdifference(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]); - let cls2 = bclass(&[(b'd', b'v')]); - let expected = bclass(&[(b'a', b'c')]); - assert_eq!(expected, bdifference(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]); - let cls2 = bclass(&[(b'b', b'g'), (b's', b'u')]); - let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]); - assert_eq!(expected, bdifference(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]); - let cls2 = bclass(&[(b'b', b'd'), (b'e', b'g'), (b's', b'u')]); - let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]); - assert_eq!(expected, bdifference(&cls1, &cls2)); - - let cls1 = bclass(&[(b'x', b'z')]); - let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]); - let expected = bclass(&[(b'x', b'z')]); - assert_eq!(expected, bdifference(&cls1, &cls2)); - - let cls1 = bclass(&[(b'a', b'z')]); - let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]); - let expected = bclass(&[(b'd', b'd'), (b'h', b'r'), (b'v', b'z')]); - assert_eq!(expected, bdifference(&cls1, &cls2)); - } - - #[test] - fn class_symmetric_difference_unicode() { - let cls1 = uclass(&[('a', 'm')]); - let cls2 = uclass(&[('g', 't')]); - let expected = uclass(&[('a', 'f'), ('n', 't')]); - assert_eq!(expected, usymdifference(&cls1, &cls2)); - } - - #[test] - fn class_symmetric_difference_bytes() { - let cls1 = bclass(&[(b'a', b'm')]); - let cls2 = bclass(&[(b'g', b't')]); - let expected = bclass(&[(b'a', b'f'), (b'n', b't')]); - assert_eq!(expected, bsymdifference(&cls1, &cls2)); - } - - // We use a thread with an explicit stack size to test that our destructor - // for Hir can handle arbitrarily sized expressions in constant stack - // space. In case we run on a platform without threads (WASM?), we limit - // this test to Windows/Unix. - #[test] - #[cfg(any(unix, windows))] - fn no_stack_overflow_on_drop() { - use std::thread; - - let run = || { - let mut expr = Hir::empty(); - for _ in 0..100 { - expr = Hir::capture(Capture { - index: 1, - name: None, - sub: Box::new(expr), - }); - expr = Hir::repetition(Repetition { - min: 0, - max: Some(1), - greedy: true, - sub: Box::new(expr), - }); - - expr = Hir { - kind: HirKind::Concat(vec![expr]), - props: Properties::empty(), - }; - expr = Hir { - kind: HirKind::Alternation(vec![expr]), - props: Properties::empty(), - }; - } - assert!(!matches!(*expr.kind(), HirKind::Empty)); - }; - - // We run our test on a thread with a small stack size so we can - // force the issue more easily. - // - // NOTE(2023-03-21): See the corresponding test in 'crate::ast::tests' - // for context on the specific stack size chosen here. - thread::Builder::new() - .stack_size(16 << 10) - .spawn(run) - .unwrap() - .join() - .unwrap(); - } - - #[test] - fn look_set_iter() { - let set = LookSet::empty(); - assert_eq!(0, set.iter().count()); - - let set = LookSet::full(); - assert_eq!(18, set.iter().count()); - - let set = - LookSet::empty().insert(Look::StartLF).insert(Look::WordUnicode); - assert_eq!(2, set.iter().count()); - - let set = LookSet::empty().insert(Look::StartLF); - assert_eq!(1, set.iter().count()); - - let set = LookSet::empty().insert(Look::WordAsciiNegate); - assert_eq!(1, set.iter().count()); - } - - #[test] - fn look_set_debug() { - let res = format!("{:?}", LookSet::empty()); - assert_eq!("∅", res); - let res = format!("{:?}", LookSet::full()); - assert_eq!("Az^$rRbB𝛃𝚩<>〈〉◁▷◀▶", res); - } -} diff --git a/vendor/regex-syntax/src/hir/print.rs b/vendor/regex-syntax/src/hir/print.rs deleted file mode 100644 index dfa6d403..00000000 --- a/vendor/regex-syntax/src/hir/print.rs +++ /dev/null @@ -1,608 +0,0 @@ -/*! -This module provides a regular expression printer for `Hir`. -*/ - -use core::fmt; - -use crate::{ - hir::{ - self, - visitor::{self, Visitor}, - Hir, HirKind, - }, - is_meta_character, -}; - -/// A builder for constructing a printer. -/// -/// Note that since a printer doesn't have any configuration knobs, this type -/// remains unexported. -#[derive(Clone, Debug)] -struct PrinterBuilder { - _priv: (), -} - -impl Default for PrinterBuilder { - fn default() -> PrinterBuilder { - PrinterBuilder::new() - } -} - -impl PrinterBuilder { - fn new() -> PrinterBuilder { - PrinterBuilder { _priv: () } - } - - fn build(&self) -> Printer { - Printer { _priv: () } - } -} - -/// A printer for a regular expression's high-level intermediate -/// representation. -/// -/// A printer converts a high-level intermediate representation (HIR) to a -/// regular expression pattern string. This particular printer uses constant -/// stack space and heap space proportional to the size of the HIR. -/// -/// Since this printer is only using the HIR, the pattern it prints will likely -/// not resemble the original pattern at all. For example, a pattern like -/// `\pL` will have its entire class written out. -/// -/// The purpose of this printer is to provide a means to mutate an HIR and then -/// build a regular expression from the result of that mutation. (A regex -/// library could provide a constructor from this HIR explicitly, but that -/// creates an unnecessary public coupling between the regex library and this -/// specific HIR representation.) -#[derive(Debug)] -pub struct Printer { - _priv: (), -} - -impl Printer { - /// Create a new printer. - pub fn new() -> Printer { - PrinterBuilder::new().build() - } - - /// Print the given `Ast` to the given writer. The writer must implement - /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used - /// here are a `fmt::Formatter` (which is available in `fmt::Display` - /// implementations) or a `&mut String`. - pub fn print(&mut self, hir: &Hir, wtr: W) -> fmt::Result { - visitor::visit(hir, Writer { wtr }) - } -} - -#[derive(Debug)] -struct Writer { - wtr: W, -} - -impl Visitor for Writer { - type Output = (); - type Err = fmt::Error; - - fn finish(self) -> fmt::Result { - Ok(()) - } - - fn visit_pre(&mut self, hir: &Hir) -> fmt::Result { - match *hir.kind() { - HirKind::Empty => { - // Technically an empty sub-expression could be "printed" by - // just ignoring it, but in practice, you could have a - // repetition operator attached to an empty expression, and you - // really need something in the concrete syntax to make that - // work as you'd expect. - self.wtr.write_str(r"(?:)")?; - } - // Repetition operators are strictly suffix oriented. - HirKind::Repetition(_) => {} - HirKind::Literal(hir::Literal(ref bytes)) => { - // See the comment on the 'Concat' and 'Alternation' case below - // for why we put parens here. Literals are, conceptually, - // a special case of concatenation where each element is a - // character. The HIR flattens this into a Box<[u8]>, but we - // still need to treat it like a concatenation for correct - // printing. As a special case, we don't write parens if there - // is only one character. One character means there is no - // concat so we don't need parens. Adding parens would still be - // correct, but we drop them here because it tends to create - // rather noisy regexes even in simple cases. - let result = core::str::from_utf8(bytes); - let len = result.map_or(bytes.len(), |s| s.chars().count()); - if len > 1 { - self.wtr.write_str(r"(?:")?; - } - match result { - Ok(string) => { - for c in string.chars() { - self.write_literal_char(c)?; - } - } - Err(_) => { - for &b in bytes.iter() { - self.write_literal_byte(b)?; - } - } - } - if len > 1 { - self.wtr.write_str(r")")?; - } - } - HirKind::Class(hir::Class::Unicode(ref cls)) => { - if cls.ranges().is_empty() { - return self.wtr.write_str("[a&&b]"); - } - self.wtr.write_str("[")?; - for range in cls.iter() { - if range.start() == range.end() { - self.write_literal_char(range.start())?; - } else if u32::from(range.start()) + 1 - == u32::from(range.end()) - { - self.write_literal_char(range.start())?; - self.write_literal_char(range.end())?; - } else { - self.write_literal_char(range.start())?; - self.wtr.write_str("-")?; - self.write_literal_char(range.end())?; - } - } - self.wtr.write_str("]")?; - } - HirKind::Class(hir::Class::Bytes(ref cls)) => { - if cls.ranges().is_empty() { - return self.wtr.write_str("[a&&b]"); - } - self.wtr.write_str("(?-u:[")?; - for range in cls.iter() { - if range.start() == range.end() { - self.write_literal_class_byte(range.start())?; - } else if range.start() + 1 == range.end() { - self.write_literal_class_byte(range.start())?; - self.write_literal_class_byte(range.end())?; - } else { - self.write_literal_class_byte(range.start())?; - self.wtr.write_str("-")?; - self.write_literal_class_byte(range.end())?; - } - } - self.wtr.write_str("])")?; - } - HirKind::Look(ref look) => match *look { - hir::Look::Start => { - self.wtr.write_str(r"\A")?; - } - hir::Look::End => { - self.wtr.write_str(r"\z")?; - } - hir::Look::StartLF => { - self.wtr.write_str("(?m:^)")?; - } - hir::Look::EndLF => { - self.wtr.write_str("(?m:$)")?; - } - hir::Look::StartCRLF => { - self.wtr.write_str("(?mR:^)")?; - } - hir::Look::EndCRLF => { - self.wtr.write_str("(?mR:$)")?; - } - hir::Look::WordAscii => { - self.wtr.write_str(r"(?-u:\b)")?; - } - hir::Look::WordAsciiNegate => { - self.wtr.write_str(r"(?-u:\B)")?; - } - hir::Look::WordUnicode => { - self.wtr.write_str(r"\b")?; - } - hir::Look::WordUnicodeNegate => { - self.wtr.write_str(r"\B")?; - } - hir::Look::WordStartAscii => { - self.wtr.write_str(r"(?-u:\b{start})")?; - } - hir::Look::WordEndAscii => { - self.wtr.write_str(r"(?-u:\b{end})")?; - } - hir::Look::WordStartUnicode => { - self.wtr.write_str(r"\b{start}")?; - } - hir::Look::WordEndUnicode => { - self.wtr.write_str(r"\b{end}")?; - } - hir::Look::WordStartHalfAscii => { - self.wtr.write_str(r"(?-u:\b{start-half})")?; - } - hir::Look::WordEndHalfAscii => { - self.wtr.write_str(r"(?-u:\b{end-half})")?; - } - hir::Look::WordStartHalfUnicode => { - self.wtr.write_str(r"\b{start-half}")?; - } - hir::Look::WordEndHalfUnicode => { - self.wtr.write_str(r"\b{end-half}")?; - } - }, - HirKind::Capture(hir::Capture { ref name, .. }) => { - self.wtr.write_str("(")?; - if let Some(ref name) = *name { - write!(self.wtr, "?P<{}>", name)?; - } - } - // Why do this? Wrapping concats and alts in non-capturing groups - // is not *always* necessary, but is sometimes necessary. For - // example, 'concat(a, alt(b, c))' should be written as 'a(?:b|c)' - // and not 'ab|c'. The former is clearly the intended meaning, but - // the latter is actually 'alt(concat(a, b), c)'. - // - // It would be possible to only group these things in cases where - // it's strictly necessary, but it requires knowing the parent - // expression. And since this technique is simpler and always - // correct, we take this route. More to the point, it is a non-goal - // of an HIR printer to show a nice easy-to-read regex. Indeed, - // its construction forbids it from doing so. Therefore, inserting - // extra groups where they aren't necessary is perfectly okay. - HirKind::Concat(_) | HirKind::Alternation(_) => { - self.wtr.write_str(r"(?:")?; - } - } - Ok(()) - } - - fn visit_post(&mut self, hir: &Hir) -> fmt::Result { - match *hir.kind() { - // Handled during visit_pre - HirKind::Empty - | HirKind::Literal(_) - | HirKind::Class(_) - | HirKind::Look(_) => {} - HirKind::Repetition(ref x) => { - match (x.min, x.max) { - (0, Some(1)) => { - self.wtr.write_str("?")?; - } - (0, None) => { - self.wtr.write_str("*")?; - } - (1, None) => { - self.wtr.write_str("+")?; - } - (1, Some(1)) => { - // 'a{1}' and 'a{1}?' are exactly equivalent to 'a'. - return Ok(()); - } - (m, None) => { - write!(self.wtr, "{{{},}}", m)?; - } - (m, Some(n)) if m == n => { - write!(self.wtr, "{{{}}}", m)?; - // a{m} and a{m}? are always exactly equivalent. - return Ok(()); - } - (m, Some(n)) => { - write!(self.wtr, "{{{},{}}}", m, n)?; - } - } - if !x.greedy { - self.wtr.write_str("?")?; - } - } - HirKind::Capture(_) - | HirKind::Concat(_) - | HirKind::Alternation(_) => { - self.wtr.write_str(r")")?; - } - } - Ok(()) - } - - fn visit_alternation_in(&mut self) -> fmt::Result { - self.wtr.write_str("|") - } -} - -impl Writer { - fn write_literal_char(&mut self, c: char) -> fmt::Result { - if is_meta_character(c) { - self.wtr.write_str("\\")?; - } - self.wtr.write_char(c) - } - - fn write_literal_byte(&mut self, b: u8) -> fmt::Result { - if b <= 0x7F && !b.is_ascii_control() && !b.is_ascii_whitespace() { - self.write_literal_char(char::try_from(b).unwrap()) - } else { - write!(self.wtr, "(?-u:\\x{:02X})", b) - } - } - - fn write_literal_class_byte(&mut self, b: u8) -> fmt::Result { - if b <= 0x7F && !b.is_ascii_control() && !b.is_ascii_whitespace() { - self.write_literal_char(char::try_from(b).unwrap()) - } else { - write!(self.wtr, "\\x{:02X}", b) - } - } -} - -#[cfg(test)] -mod tests { - use alloc::{ - boxed::Box, - string::{String, ToString}, - }; - - use crate::ParserBuilder; - - use super::*; - - fn roundtrip(given: &str, expected: &str) { - roundtrip_with(|b| b, given, expected); - } - - fn roundtrip_bytes(given: &str, expected: &str) { - roundtrip_with(|b| b.utf8(false), given, expected); - } - - fn roundtrip_with(mut f: F, given: &str, expected: &str) - where - F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder, - { - let mut builder = ParserBuilder::new(); - f(&mut builder); - let hir = builder.build().parse(given).unwrap(); - - let mut printer = Printer::new(); - let mut dst = String::new(); - printer.print(&hir, &mut dst).unwrap(); - - // Check that the result is actually valid. - builder.build().parse(&dst).unwrap(); - - assert_eq!(expected, dst); - } - - #[test] - fn print_literal() { - roundtrip("a", "a"); - roundtrip(r"\xff", "\u{FF}"); - roundtrip_bytes(r"\xff", "\u{FF}"); - roundtrip_bytes(r"(?-u)\xff", r"(?-u:\xFF)"); - roundtrip("☃", "☃"); - } - - #[test] - fn print_class() { - roundtrip(r"[a]", r"a"); - roundtrip(r"[ab]", r"[ab]"); - roundtrip(r"[a-z]", r"[a-z]"); - roundtrip(r"[a-z--b-c--x-y]", r"[ad-wz]"); - roundtrip(r"[^\x01-\u{10FFFF}]", "\u{0}"); - roundtrip(r"[-]", r"\-"); - roundtrip(r"[☃-⛄]", r"[☃-⛄]"); - - roundtrip(r"(?-u)[a]", r"a"); - roundtrip(r"(?-u)[ab]", r"(?-u:[ab])"); - roundtrip(r"(?-u)[a-z]", r"(?-u:[a-z])"); - roundtrip_bytes(r"(?-u)[a-\xFF]", r"(?-u:[a-\xFF])"); - - // The following test that the printer escapes meta characters - // in character classes. - roundtrip(r"[\[]", r"\["); - roundtrip(r"[Z-_]", r"[Z-_]"); - roundtrip(r"[Z-_--Z]", r"[\[-_]"); - - // The following test that the printer escapes meta characters - // in byte oriented character classes. - roundtrip_bytes(r"(?-u)[\[]", r"\["); - roundtrip_bytes(r"(?-u)[Z-_]", r"(?-u:[Z-_])"); - roundtrip_bytes(r"(?-u)[Z-_--Z]", r"(?-u:[\[-_])"); - - // This tests that an empty character class is correctly roundtripped. - #[cfg(feature = "unicode-gencat")] - roundtrip(r"\P{any}", r"[a&&b]"); - roundtrip_bytes(r"(?-u)[^\x00-\xFF]", r"[a&&b]"); - } - - #[test] - fn print_anchor() { - roundtrip(r"^", r"\A"); - roundtrip(r"$", r"\z"); - roundtrip(r"(?m)^", r"(?m:^)"); - roundtrip(r"(?m)$", r"(?m:$)"); - } - - #[test] - fn print_word_boundary() { - roundtrip(r"\b", r"\b"); - roundtrip(r"\B", r"\B"); - roundtrip(r"(?-u)\b", r"(?-u:\b)"); - roundtrip_bytes(r"(?-u)\B", r"(?-u:\B)"); - } - - #[test] - fn print_repetition() { - roundtrip("a?", "a?"); - roundtrip("a??", "a??"); - roundtrip("(?U)a?", "a??"); - - roundtrip("a*", "a*"); - roundtrip("a*?", "a*?"); - roundtrip("(?U)a*", "a*?"); - - roundtrip("a+", "a+"); - roundtrip("a+?", "a+?"); - roundtrip("(?U)a+", "a+?"); - - roundtrip("a{1}", "a"); - roundtrip("a{2}", "a{2}"); - roundtrip("a{1,}", "a+"); - roundtrip("a{1,5}", "a{1,5}"); - roundtrip("a{1}?", "a"); - roundtrip("a{2}?", "a{2}"); - roundtrip("a{1,}?", "a+?"); - roundtrip("a{1,5}?", "a{1,5}?"); - roundtrip("(?U)a{1}", "a"); - roundtrip("(?U)a{2}", "a{2}"); - roundtrip("(?U)a{1,}", "a+?"); - roundtrip("(?U)a{1,5}", "a{1,5}?"); - - // Test that various zero-length repetitions always translate to an - // empty regex. This is more a property of HIR's smart constructors - // than the printer though. - roundtrip("a{0}", "(?:)"); - roundtrip("(?:ab){0}", "(?:)"); - #[cfg(feature = "unicode-gencat")] - { - roundtrip(r"\p{any}{0}", "(?:)"); - roundtrip(r"\P{any}{0}", "(?:)"); - } - } - - #[test] - fn print_group() { - roundtrip("()", "((?:))"); - roundtrip("(?P)", "(?P(?:))"); - roundtrip("(?:)", "(?:)"); - - roundtrip("(a)", "(a)"); - roundtrip("(?Pa)", "(?Pa)"); - roundtrip("(?:a)", "a"); - - roundtrip("((((a))))", "((((a))))"); - } - - #[test] - fn print_alternation() { - roundtrip("|", "(?:(?:)|(?:))"); - roundtrip("||", "(?:(?:)|(?:)|(?:))"); - - roundtrip("a|b", "[ab]"); - roundtrip("ab|cd", "(?:(?:ab)|(?:cd))"); - roundtrip("a|b|c", "[a-c]"); - roundtrip("ab|cd|ef", "(?:(?:ab)|(?:cd)|(?:ef))"); - roundtrip("foo|bar|quux", "(?:(?:foo)|(?:bar)|(?:quux))"); - } - - // This is a regression test that stresses a peculiarity of how the HIR - // is both constructed and printed. Namely, it is legal for a repetition - // to directly contain a concatenation. This particular construct isn't - // really possible to build from the concrete syntax directly, since you'd - // be forced to put the concatenation into (at least) a non-capturing - // group. Concurrently, the printer doesn't consider this case and just - // kind of naively prints the child expression and tacks on the repetition - // operator. - // - // As a result, if you attached '+' to a 'concat(a, b)', the printer gives - // you 'ab+', but clearly it really should be '(?:ab)+'. - // - // This bug isn't easy to surface because most ways of building an HIR - // come directly from the concrete syntax, and as mentioned above, it just - // isn't possible to build this kind of HIR from the concrete syntax. - // Nevertheless, this is definitely a bug. - // - // See: https://github.com/rust-lang/regex/issues/731 - #[test] - fn regression_repetition_concat() { - let expr = Hir::concat(alloc::vec![ - Hir::literal("x".as_bytes()), - Hir::repetition(hir::Repetition { - min: 1, - max: None, - greedy: true, - sub: Box::new(Hir::literal("ab".as_bytes())), - }), - Hir::literal("y".as_bytes()), - ]); - assert_eq!(r"(?:x(?:ab)+y)", expr.to_string()); - - let expr = Hir::concat(alloc::vec![ - Hir::look(hir::Look::Start), - Hir::repetition(hir::Repetition { - min: 1, - max: None, - greedy: true, - sub: Box::new(Hir::concat(alloc::vec![ - Hir::look(hir::Look::Start), - Hir::look(hir::Look::End), - ])), - }), - Hir::look(hir::Look::End), - ]); - assert_eq!(r"(?:\A\A\z\z)", expr.to_string()); - } - - // Just like regression_repetition_concat, but with the repetition using - // an alternation as a child expression instead. - // - // See: https://github.com/rust-lang/regex/issues/731 - #[test] - fn regression_repetition_alternation() { - let expr = Hir::concat(alloc::vec![ - Hir::literal("ab".as_bytes()), - Hir::repetition(hir::Repetition { - min: 1, - max: None, - greedy: true, - sub: Box::new(Hir::alternation(alloc::vec![ - Hir::literal("cd".as_bytes()), - Hir::literal("ef".as_bytes()), - ])), - }), - Hir::literal("gh".as_bytes()), - ]); - assert_eq!(r"(?:(?:ab)(?:(?:cd)|(?:ef))+(?:gh))", expr.to_string()); - - let expr = Hir::concat(alloc::vec![ - Hir::look(hir::Look::Start), - Hir::repetition(hir::Repetition { - min: 1, - max: None, - greedy: true, - sub: Box::new(Hir::alternation(alloc::vec![ - Hir::look(hir::Look::Start), - Hir::look(hir::Look::End), - ])), - }), - Hir::look(hir::Look::End), - ]); - assert_eq!(r"(?:\A(?:\A|\z)\z)", expr.to_string()); - } - - // This regression test is very similar in flavor to - // regression_repetition_concat in that the root of the issue lies in a - // peculiarity of how the HIR is represented and how the printer writes it - // out. Like the other regression, this one is also rooted in the fact that - // you can't produce the peculiar HIR from the concrete syntax. Namely, you - // just can't have a 'concat(a, alt(b, c))' because the 'alt' will normally - // be in (at least) a non-capturing group. Why? Because the '|' has very - // low precedence (lower that concatenation), and so something like 'ab|c' - // is actually 'alt(ab, c)'. - // - // See: https://github.com/rust-lang/regex/issues/516 - #[test] - fn regression_alternation_concat() { - let expr = Hir::concat(alloc::vec![ - Hir::literal("ab".as_bytes()), - Hir::alternation(alloc::vec![ - Hir::literal("mn".as_bytes()), - Hir::literal("xy".as_bytes()), - ]), - ]); - assert_eq!(r"(?:(?:ab)(?:(?:mn)|(?:xy)))", expr.to_string()); - - let expr = Hir::concat(alloc::vec![ - Hir::look(hir::Look::Start), - Hir::alternation(alloc::vec![ - Hir::look(hir::Look::Start), - Hir::look(hir::Look::End), - ]), - ]); - assert_eq!(r"(?:\A(?:\A|\z))", expr.to_string()); - } -} diff --git a/vendor/regex-syntax/src/hir/translate.rs b/vendor/regex-syntax/src/hir/translate.rs deleted file mode 100644 index e8e5a881..00000000 --- a/vendor/regex-syntax/src/hir/translate.rs +++ /dev/null @@ -1,3744 +0,0 @@ -/*! -Defines a translator that converts an `Ast` to an `Hir`. -*/ - -use core::cell::{Cell, RefCell}; - -use alloc::{boxed::Box, string::ToString, vec, vec::Vec}; - -use crate::{ - ast::{self, Ast, Span, Visitor}, - either::Either, - hir::{self, Error, ErrorKind, Hir, HirKind}, - unicode::{self, ClassQuery}, -}; - -type Result = core::result::Result; - -/// A builder for constructing an AST->HIR translator. -#[derive(Clone, Debug)] -pub struct TranslatorBuilder { - utf8: bool, - line_terminator: u8, - flags: Flags, -} - -impl Default for TranslatorBuilder { - fn default() -> TranslatorBuilder { - TranslatorBuilder::new() - } -} - -impl TranslatorBuilder { - /// Create a new translator builder with a default c onfiguration. - pub fn new() -> TranslatorBuilder { - TranslatorBuilder { - utf8: true, - line_terminator: b'\n', - flags: Flags::default(), - } - } - - /// Build a translator using the current configuration. - pub fn build(&self) -> Translator { - Translator { - stack: RefCell::new(vec![]), - flags: Cell::new(self.flags), - utf8: self.utf8, - line_terminator: self.line_terminator, - } - } - - /// When disabled, translation will permit the construction of a regular - /// expression that may match invalid UTF-8. - /// - /// When enabled (the default), the translator is guaranteed to produce an - /// expression that, for non-empty matches, will only ever produce spans - /// that are entirely valid UTF-8 (otherwise, the translator will return an - /// error). - /// - /// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even - /// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete - /// syntax) will be allowed even though they can produce matches that split - /// a UTF-8 encoded codepoint. This only applies to zero-width or "empty" - /// matches, and it is expected that the regex engine itself must handle - /// these cases if necessary (perhaps by suppressing any zero-width matches - /// that split a codepoint). - pub fn utf8(&mut self, yes: bool) -> &mut TranslatorBuilder { - self.utf8 = yes; - self - } - - /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`. - /// - /// Namely, instead of `.` (by default) matching everything except for `\n`, - /// this will cause `.` to match everything except for the byte given. - /// - /// If `.` is used in a context where Unicode mode is enabled and this byte - /// isn't ASCII, then an error will be returned. When Unicode mode is - /// disabled, then any byte is permitted, but will return an error if UTF-8 - /// mode is enabled and it is a non-ASCII byte. - /// - /// In short, any ASCII value for a line terminator is always okay. But a - /// non-ASCII byte might result in an error depending on whether Unicode - /// mode or UTF-8 mode are enabled. - /// - /// Note that if `R` mode is enabled then it always takes precedence and - /// the line terminator will be treated as `\r` and `\n` simultaneously. - /// - /// Note also that this *doesn't* impact the look-around assertions - /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional - /// configuration in the regex engine itself. - pub fn line_terminator(&mut self, byte: u8) -> &mut TranslatorBuilder { - self.line_terminator = byte; - self - } - - /// Enable or disable the case insensitive flag (`i`) by default. - pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder { - self.flags.case_insensitive = if yes { Some(true) } else { None }; - self - } - - /// Enable or disable the multi-line matching flag (`m`) by default. - pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder { - self.flags.multi_line = if yes { Some(true) } else { None }; - self - } - - /// Enable or disable the "dot matches any character" flag (`s`) by - /// default. - pub fn dot_matches_new_line( - &mut self, - yes: bool, - ) -> &mut TranslatorBuilder { - self.flags.dot_matches_new_line = if yes { Some(true) } else { None }; - self - } - - /// Enable or disable the CRLF mode flag (`R`) by default. - pub fn crlf(&mut self, yes: bool) -> &mut TranslatorBuilder { - self.flags.crlf = if yes { Some(true) } else { None }; - self - } - - /// Enable or disable the "swap greed" flag (`U`) by default. - pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder { - self.flags.swap_greed = if yes { Some(true) } else { None }; - self - } - - /// Enable or disable the Unicode flag (`u`) by default. - pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder { - self.flags.unicode = if yes { None } else { Some(false) }; - self - } -} - -/// A translator maps abstract syntax to a high level intermediate -/// representation. -/// -/// A translator may be benefit from reuse. That is, a translator can translate -/// many abstract syntax trees. -/// -/// A `Translator` can be configured in more detail via a -/// [`TranslatorBuilder`]. -#[derive(Clone, Debug)] -pub struct Translator { - /// Our call stack, but on the heap. - stack: RefCell>, - /// The current flag settings. - flags: Cell, - /// Whether we're allowed to produce HIR that can match arbitrary bytes. - utf8: bool, - /// The line terminator to use for `.`. - line_terminator: u8, -} - -impl Translator { - /// Create a new translator using the default configuration. - pub fn new() -> Translator { - TranslatorBuilder::new().build() - } - - /// Translate the given abstract syntax tree (AST) into a high level - /// intermediate representation (HIR). - /// - /// If there was a problem doing the translation, then an HIR-specific - /// error is returned. - /// - /// The original pattern string used to produce the `Ast` *must* also be - /// provided. The translator does not use the pattern string during any - /// correct translation, but is used for error reporting. - pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result { - ast::visit(ast, TranslatorI::new(self, pattern)) - } -} - -/// An HirFrame is a single stack frame, represented explicitly, which is -/// created for each item in the Ast that we traverse. -/// -/// Note that technically, this type doesn't represent our entire stack -/// frame. In particular, the Ast visitor represents any state associated with -/// traversing the Ast itself. -#[derive(Clone, Debug)] -enum HirFrame { - /// An arbitrary HIR expression. These get pushed whenever we hit a base - /// case in the Ast. They get popped after an inductive (i.e., recursive) - /// step is complete. - Expr(Hir), - /// A literal that is being constructed, character by character, from the - /// AST. We need this because the AST gives each individual character its - /// own node. So as we see characters, we peek at the top-most HirFrame. - /// If it's a literal, then we add to it. Otherwise, we push a new literal. - /// When it comes time to pop it, we convert it to an Hir via Hir::literal. - Literal(Vec), - /// A Unicode character class. This frame is mutated as we descend into - /// the Ast of a character class (which is itself its own mini recursive - /// structure). - ClassUnicode(hir::ClassUnicode), - /// A byte-oriented character class. This frame is mutated as we descend - /// into the Ast of a character class (which is itself its own mini - /// recursive structure). - /// - /// Byte character classes are created when Unicode mode (`u`) is disabled. - /// If `utf8` is enabled (the default), then a byte character is only - /// permitted to match ASCII text. - ClassBytes(hir::ClassBytes), - /// This is pushed whenever a repetition is observed. After visiting every - /// sub-expression in the repetition, the translator's stack is expected to - /// have this sentinel at the top. - /// - /// This sentinel only exists to stop other things (like flattening - /// literals) from reaching across repetition operators. - Repetition, - /// This is pushed on to the stack upon first seeing any kind of capture, - /// indicated by parentheses (including non-capturing groups). It is popped - /// upon leaving a group. - Group { - /// The old active flags when this group was opened. - /// - /// If this group sets flags, then the new active flags are set to the - /// result of merging the old flags with the flags introduced by this - /// group. If the group doesn't set any flags, then this is simply - /// equivalent to whatever flags were set when the group was opened. - /// - /// When this group is popped, the active flags should be restored to - /// the flags set here. - /// - /// The "active" flags correspond to whatever flags are set in the - /// Translator. - old_flags: Flags, - }, - /// This is pushed whenever a concatenation is observed. After visiting - /// every sub-expression in the concatenation, the translator's stack is - /// popped until it sees a Concat frame. - Concat, - /// This is pushed whenever an alternation is observed. After visiting - /// every sub-expression in the alternation, the translator's stack is - /// popped until it sees an Alternation frame. - Alternation, - /// This is pushed immediately before each sub-expression in an - /// alternation. This separates the branches of an alternation on the - /// stack and prevents literal flattening from reaching across alternation - /// branches. - /// - /// It is popped after each expression in a branch until an 'Alternation' - /// frame is observed when doing a post visit on an alternation. - AlternationBranch, -} - -impl HirFrame { - /// Assert that the current stack frame is an Hir expression and return it. - fn unwrap_expr(self) -> Hir { - match self { - HirFrame::Expr(expr) => expr, - HirFrame::Literal(lit) => Hir::literal(lit), - _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self), - } - } - - /// Assert that the current stack frame is a Unicode class expression and - /// return it. - fn unwrap_class_unicode(self) -> hir::ClassUnicode { - match self { - HirFrame::ClassUnicode(cls) => cls, - _ => panic!( - "tried to unwrap Unicode class \ - from HirFrame, got: {:?}", - self - ), - } - } - - /// Assert that the current stack frame is a byte class expression and - /// return it. - fn unwrap_class_bytes(self) -> hir::ClassBytes { - match self { - HirFrame::ClassBytes(cls) => cls, - _ => panic!( - "tried to unwrap byte class \ - from HirFrame, got: {:?}", - self - ), - } - } - - /// Assert that the current stack frame is a repetition sentinel. If it - /// isn't, then panic. - fn unwrap_repetition(self) { - match self { - HirFrame::Repetition => {} - _ => { - panic!( - "tried to unwrap repetition from HirFrame, got: {:?}", - self - ) - } - } - } - - /// Assert that the current stack frame is a group indicator and return - /// its corresponding flags (the flags that were active at the time the - /// group was entered). - fn unwrap_group(self) -> Flags { - match self { - HirFrame::Group { old_flags } => old_flags, - _ => { - panic!("tried to unwrap group from HirFrame, got: {:?}", self) - } - } - } - - /// Assert that the current stack frame is an alternation pipe sentinel. If - /// it isn't, then panic. - fn unwrap_alternation_pipe(self) { - match self { - HirFrame::AlternationBranch => {} - _ => { - panic!( - "tried to unwrap alt pipe from HirFrame, got: {:?}", - self - ) - } - } - } -} - -impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { - type Output = Hir; - type Err = Error; - - fn finish(self) -> Result { - // ... otherwise, we should have exactly one HIR on the stack. - assert_eq!(self.trans().stack.borrow().len(), 1); - Ok(self.pop().unwrap().unwrap_expr()) - } - - fn visit_pre(&mut self, ast: &Ast) -> Result<()> { - match *ast { - Ast::ClassBracketed(_) => { - if self.flags().unicode() { - let cls = hir::ClassUnicode::empty(); - self.push(HirFrame::ClassUnicode(cls)); - } else { - let cls = hir::ClassBytes::empty(); - self.push(HirFrame::ClassBytes(cls)); - } - } - Ast::Repetition(_) => self.push(HirFrame::Repetition), - Ast::Group(ref x) => { - let old_flags = x - .flags() - .map(|ast| self.set_flags(ast)) - .unwrap_or_else(|| self.flags()); - self.push(HirFrame::Group { old_flags }); - } - Ast::Concat(_) => { - self.push(HirFrame::Concat); - } - Ast::Alternation(ref x) => { - self.push(HirFrame::Alternation); - if !x.asts.is_empty() { - self.push(HirFrame::AlternationBranch); - } - } - _ => {} - } - Ok(()) - } - - fn visit_post(&mut self, ast: &Ast) -> Result<()> { - match *ast { - Ast::Empty(_) => { - self.push(HirFrame::Expr(Hir::empty())); - } - Ast::Flags(ref x) => { - self.set_flags(&x.flags); - // Flags in the AST are generally considered directives and - // not actual sub-expressions. However, they can be used in - // the concrete syntax like `((?i))`, and we need some kind of - // indication of an expression there, and Empty is the correct - // choice. - // - // There can also be things like `(?i)+`, but we rule those out - // in the parser. In the future, we might allow them for - // consistency sake. - self.push(HirFrame::Expr(Hir::empty())); - } - Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? { - Either::Right(byte) => self.push_byte(byte), - Either::Left(ch) => match self.case_fold_char(x.span, ch)? { - None => self.push_char(ch), - Some(expr) => self.push(HirFrame::Expr(expr)), - }, - }, - Ast::Dot(ref span) => { - self.push(HirFrame::Expr(self.hir_dot(**span)?)); - } - Ast::Assertion(ref x) => { - self.push(HirFrame::Expr(self.hir_assertion(x)?)); - } - Ast::ClassPerl(ref x) => { - if self.flags().unicode() { - let cls = self.hir_perl_unicode_class(x)?; - let hcls = hir::Class::Unicode(cls); - self.push(HirFrame::Expr(Hir::class(hcls))); - } else { - let cls = self.hir_perl_byte_class(x)?; - let hcls = hir::Class::Bytes(cls); - self.push(HirFrame::Expr(Hir::class(hcls))); - } - } - Ast::ClassUnicode(ref x) => { - let cls = hir::Class::Unicode(self.hir_unicode_class(x)?); - self.push(HirFrame::Expr(Hir::class(cls))); - } - Ast::ClassBracketed(ref ast) => { - if self.flags().unicode() { - let mut cls = self.pop().unwrap().unwrap_class_unicode(); - self.unicode_fold_and_negate( - &ast.span, - ast.negated, - &mut cls, - )?; - let expr = Hir::class(hir::Class::Unicode(cls)); - self.push(HirFrame::Expr(expr)); - } else { - let mut cls = self.pop().unwrap().unwrap_class_bytes(); - self.bytes_fold_and_negate( - &ast.span, - ast.negated, - &mut cls, - )?; - let expr = Hir::class(hir::Class::Bytes(cls)); - self.push(HirFrame::Expr(expr)); - } - } - Ast::Repetition(ref x) => { - let expr = self.pop().unwrap().unwrap_expr(); - self.pop().unwrap().unwrap_repetition(); - self.push(HirFrame::Expr(self.hir_repetition(x, expr))); - } - Ast::Group(ref x) => { - let expr = self.pop().unwrap().unwrap_expr(); - let old_flags = self.pop().unwrap().unwrap_group(); - self.trans().flags.set(old_flags); - self.push(HirFrame::Expr(self.hir_capture(x, expr))); - } - Ast::Concat(_) => { - let mut exprs = vec![]; - while let Some(expr) = self.pop_concat_expr() { - if !matches!(*expr.kind(), HirKind::Empty) { - exprs.push(expr); - } - } - exprs.reverse(); - self.push(HirFrame::Expr(Hir::concat(exprs))); - } - Ast::Alternation(_) => { - let mut exprs = vec![]; - while let Some(expr) = self.pop_alt_expr() { - self.pop().unwrap().unwrap_alternation_pipe(); - exprs.push(expr); - } - exprs.reverse(); - self.push(HirFrame::Expr(Hir::alternation(exprs))); - } - } - Ok(()) - } - - fn visit_alternation_in(&mut self) -> Result<()> { - self.push(HirFrame::AlternationBranch); - Ok(()) - } - - fn visit_class_set_item_pre( - &mut self, - ast: &ast::ClassSetItem, - ) -> Result<()> { - match *ast { - ast::ClassSetItem::Bracketed(_) => { - if self.flags().unicode() { - let cls = hir::ClassUnicode::empty(); - self.push(HirFrame::ClassUnicode(cls)); - } else { - let cls = hir::ClassBytes::empty(); - self.push(HirFrame::ClassBytes(cls)); - } - } - // We needn't handle the Union case here since the visitor will - // do it for us. - _ => {} - } - Ok(()) - } - - fn visit_class_set_item_post( - &mut self, - ast: &ast::ClassSetItem, - ) -> Result<()> { - match *ast { - ast::ClassSetItem::Empty(_) => {} - ast::ClassSetItem::Literal(ref x) => { - if self.flags().unicode() { - let mut cls = self.pop().unwrap().unwrap_class_unicode(); - cls.push(hir::ClassUnicodeRange::new(x.c, x.c)); - self.push(HirFrame::ClassUnicode(cls)); - } else { - let mut cls = self.pop().unwrap().unwrap_class_bytes(); - let byte = self.class_literal_byte(x)?; - cls.push(hir::ClassBytesRange::new(byte, byte)); - self.push(HirFrame::ClassBytes(cls)); - } - } - ast::ClassSetItem::Range(ref x) => { - if self.flags().unicode() { - let mut cls = self.pop().unwrap().unwrap_class_unicode(); - cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c)); - self.push(HirFrame::ClassUnicode(cls)); - } else { - let mut cls = self.pop().unwrap().unwrap_class_bytes(); - let start = self.class_literal_byte(&x.start)?; - let end = self.class_literal_byte(&x.end)?; - cls.push(hir::ClassBytesRange::new(start, end)); - self.push(HirFrame::ClassBytes(cls)); - } - } - ast::ClassSetItem::Ascii(ref x) => { - if self.flags().unicode() { - let xcls = self.hir_ascii_unicode_class(x)?; - let mut cls = self.pop().unwrap().unwrap_class_unicode(); - cls.union(&xcls); - self.push(HirFrame::ClassUnicode(cls)); - } else { - let xcls = self.hir_ascii_byte_class(x)?; - let mut cls = self.pop().unwrap().unwrap_class_bytes(); - cls.union(&xcls); - self.push(HirFrame::ClassBytes(cls)); - } - } - ast::ClassSetItem::Unicode(ref x) => { - let xcls = self.hir_unicode_class(x)?; - let mut cls = self.pop().unwrap().unwrap_class_unicode(); - cls.union(&xcls); - self.push(HirFrame::ClassUnicode(cls)); - } - ast::ClassSetItem::Perl(ref x) => { - if self.flags().unicode() { - let xcls = self.hir_perl_unicode_class(x)?; - let mut cls = self.pop().unwrap().unwrap_class_unicode(); - cls.union(&xcls); - self.push(HirFrame::ClassUnicode(cls)); - } else { - let xcls = self.hir_perl_byte_class(x)?; - let mut cls = self.pop().unwrap().unwrap_class_bytes(); - cls.union(&xcls); - self.push(HirFrame::ClassBytes(cls)); - } - } - ast::ClassSetItem::Bracketed(ref ast) => { - if self.flags().unicode() { - let mut cls1 = self.pop().unwrap().unwrap_class_unicode(); - self.unicode_fold_and_negate( - &ast.span, - ast.negated, - &mut cls1, - )?; - - let mut cls2 = self.pop().unwrap().unwrap_class_unicode(); - cls2.union(&cls1); - self.push(HirFrame::ClassUnicode(cls2)); - } else { - let mut cls1 = self.pop().unwrap().unwrap_class_bytes(); - self.bytes_fold_and_negate( - &ast.span, - ast.negated, - &mut cls1, - )?; - - let mut cls2 = self.pop().unwrap().unwrap_class_bytes(); - cls2.union(&cls1); - self.push(HirFrame::ClassBytes(cls2)); - } - } - // This is handled automatically by the visitor. - ast::ClassSetItem::Union(_) => {} - } - Ok(()) - } - - fn visit_class_set_binary_op_pre( - &mut self, - _op: &ast::ClassSetBinaryOp, - ) -> Result<()> { - if self.flags().unicode() { - let cls = hir::ClassUnicode::empty(); - self.push(HirFrame::ClassUnicode(cls)); - } else { - let cls = hir::ClassBytes::empty(); - self.push(HirFrame::ClassBytes(cls)); - } - Ok(()) - } - - fn visit_class_set_binary_op_in( - &mut self, - _op: &ast::ClassSetBinaryOp, - ) -> Result<()> { - if self.flags().unicode() { - let cls = hir::ClassUnicode::empty(); - self.push(HirFrame::ClassUnicode(cls)); - } else { - let cls = hir::ClassBytes::empty(); - self.push(HirFrame::ClassBytes(cls)); - } - Ok(()) - } - - fn visit_class_set_binary_op_post( - &mut self, - op: &ast::ClassSetBinaryOp, - ) -> Result<()> { - use crate::ast::ClassSetBinaryOpKind::*; - - if self.flags().unicode() { - let mut rhs = self.pop().unwrap().unwrap_class_unicode(); - let mut lhs = self.pop().unwrap().unwrap_class_unicode(); - let mut cls = self.pop().unwrap().unwrap_class_unicode(); - if self.flags().case_insensitive() { - rhs.try_case_fold_simple().map_err(|_| { - self.error( - op.rhs.span().clone(), - ErrorKind::UnicodeCaseUnavailable, - ) - })?; - lhs.try_case_fold_simple().map_err(|_| { - self.error( - op.lhs.span().clone(), - ErrorKind::UnicodeCaseUnavailable, - ) - })?; - } - match op.kind { - Intersection => lhs.intersect(&rhs), - Difference => lhs.difference(&rhs), - SymmetricDifference => lhs.symmetric_difference(&rhs), - } - cls.union(&lhs); - self.push(HirFrame::ClassUnicode(cls)); - } else { - let mut rhs = self.pop().unwrap().unwrap_class_bytes(); - let mut lhs = self.pop().unwrap().unwrap_class_bytes(); - let mut cls = self.pop().unwrap().unwrap_class_bytes(); - if self.flags().case_insensitive() { - rhs.case_fold_simple(); - lhs.case_fold_simple(); - } - match op.kind { - Intersection => lhs.intersect(&rhs), - Difference => lhs.difference(&rhs), - SymmetricDifference => lhs.symmetric_difference(&rhs), - } - cls.union(&lhs); - self.push(HirFrame::ClassBytes(cls)); - } - Ok(()) - } -} - -/// The internal implementation of a translator. -/// -/// This type is responsible for carrying around the original pattern string, -/// which is not tied to the internal state of a translator. -/// -/// A TranslatorI exists for the time it takes to translate a single Ast. -#[derive(Clone, Debug)] -struct TranslatorI<'t, 'p> { - trans: &'t Translator, - pattern: &'p str, -} - -impl<'t, 'p> TranslatorI<'t, 'p> { - /// Build a new internal translator. - fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> { - TranslatorI { trans, pattern } - } - - /// Return a reference to the underlying translator. - fn trans(&self) -> &Translator { - &self.trans - } - - /// Push the given frame on to the call stack. - fn push(&self, frame: HirFrame) { - self.trans().stack.borrow_mut().push(frame); - } - - /// Push the given literal char on to the call stack. - /// - /// If the top-most element of the stack is a literal, then the char - /// is appended to the end of that literal. Otherwise, a new literal - /// containing just the given char is pushed to the top of the stack. - fn push_char(&self, ch: char) { - let mut buf = [0; 4]; - let bytes = ch.encode_utf8(&mut buf).as_bytes(); - let mut stack = self.trans().stack.borrow_mut(); - if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() { - literal.extend_from_slice(bytes); - } else { - stack.push(HirFrame::Literal(bytes.to_vec())); - } - } - - /// Push the given literal byte on to the call stack. - /// - /// If the top-most element of the stack is a literal, then the byte - /// is appended to the end of that literal. Otherwise, a new literal - /// containing just the given byte is pushed to the top of the stack. - fn push_byte(&self, byte: u8) { - let mut stack = self.trans().stack.borrow_mut(); - if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() { - literal.push(byte); - } else { - stack.push(HirFrame::Literal(vec![byte])); - } - } - - /// Pop the top of the call stack. If the call stack is empty, return None. - fn pop(&self) -> Option { - self.trans().stack.borrow_mut().pop() - } - - /// Pop an HIR expression from the top of the stack for a concatenation. - /// - /// This returns None if the stack is empty or when a concat frame is seen. - /// Otherwise, it panics if it could not find an HIR expression. - fn pop_concat_expr(&self) -> Option { - let frame = self.pop()?; - match frame { - HirFrame::Concat => None, - HirFrame::Expr(expr) => Some(expr), - HirFrame::Literal(lit) => Some(Hir::literal(lit)), - HirFrame::ClassUnicode(_) => { - unreachable!("expected expr or concat, got Unicode class") - } - HirFrame::ClassBytes(_) => { - unreachable!("expected expr or concat, got byte class") - } - HirFrame::Repetition => { - unreachable!("expected expr or concat, got repetition") - } - HirFrame::Group { .. } => { - unreachable!("expected expr or concat, got group") - } - HirFrame::Alternation => { - unreachable!("expected expr or concat, got alt marker") - } - HirFrame::AlternationBranch => { - unreachable!("expected expr or concat, got alt branch marker") - } - } - } - - /// Pop an HIR expression from the top of the stack for an alternation. - /// - /// This returns None if the stack is empty or when an alternation frame is - /// seen. Otherwise, it panics if it could not find an HIR expression. - fn pop_alt_expr(&self) -> Option { - let frame = self.pop()?; - match frame { - HirFrame::Alternation => None, - HirFrame::Expr(expr) => Some(expr), - HirFrame::Literal(lit) => Some(Hir::literal(lit)), - HirFrame::ClassUnicode(_) => { - unreachable!("expected expr or alt, got Unicode class") - } - HirFrame::ClassBytes(_) => { - unreachable!("expected expr or alt, got byte class") - } - HirFrame::Repetition => { - unreachable!("expected expr or alt, got repetition") - } - HirFrame::Group { .. } => { - unreachable!("expected expr or alt, got group") - } - HirFrame::Concat => { - unreachable!("expected expr or alt, got concat marker") - } - HirFrame::AlternationBranch => { - unreachable!("expected expr or alt, got alt branch marker") - } - } - } - - /// Create a new error with the given span and error type. - fn error(&self, span: Span, kind: ErrorKind) -> Error { - Error { kind, pattern: self.pattern.to_string(), span } - } - - /// Return a copy of the active flags. - fn flags(&self) -> Flags { - self.trans().flags.get() - } - - /// Set the flags of this translator from the flags set in the given AST. - /// Then, return the old flags. - fn set_flags(&self, ast_flags: &ast::Flags) -> Flags { - let old_flags = self.flags(); - let mut new_flags = Flags::from_ast(ast_flags); - new_flags.merge(&old_flags); - self.trans().flags.set(new_flags); - old_flags - } - - /// Convert an Ast literal to its scalar representation. - /// - /// When Unicode mode is enabled, then this always succeeds and returns a - /// `char` (Unicode scalar value). - /// - /// When Unicode mode is disabled, then a `char` will still be returned - /// whenever possible. A byte is returned only when invalid UTF-8 is - /// allowed and when the byte is not ASCII. Otherwise, a non-ASCII byte - /// will result in an error when invalid UTF-8 is not allowed. - fn ast_literal_to_scalar( - &self, - lit: &ast::Literal, - ) -> Result> { - if self.flags().unicode() { - return Ok(Either::Left(lit.c)); - } - let byte = match lit.byte() { - None => return Ok(Either::Left(lit.c)), - Some(byte) => byte, - }; - if byte <= 0x7F { - return Ok(Either::Left(char::try_from(byte).unwrap())); - } - if self.trans().utf8 { - return Err(self.error(lit.span, ErrorKind::InvalidUtf8)); - } - Ok(Either::Right(byte)) - } - - fn case_fold_char(&self, span: Span, c: char) -> Result> { - if !self.flags().case_insensitive() { - return Ok(None); - } - if self.flags().unicode() { - // If case folding won't do anything, then don't bother trying. - let map = unicode::SimpleCaseFolder::new() - .map(|f| f.overlaps(c, c)) - .map_err(|_| { - self.error(span, ErrorKind::UnicodeCaseUnavailable) - })?; - if !map { - return Ok(None); - } - let mut cls = - hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new( - c, c, - )]); - cls.try_case_fold_simple().map_err(|_| { - self.error(span, ErrorKind::UnicodeCaseUnavailable) - })?; - Ok(Some(Hir::class(hir::Class::Unicode(cls)))) - } else { - if !c.is_ascii() { - return Ok(None); - } - // If case folding won't do anything, then don't bother trying. - match c { - 'A'..='Z' | 'a'..='z' => {} - _ => return Ok(None), - } - let mut cls = - hir::ClassBytes::new(vec![hir::ClassBytesRange::new( - // OK because 'c.len_utf8() == 1' which in turn implies - // that 'c' is ASCII. - u8::try_from(c).unwrap(), - u8::try_from(c).unwrap(), - )]); - cls.case_fold_simple(); - Ok(Some(Hir::class(hir::Class::Bytes(cls)))) - } - } - - fn hir_dot(&self, span: Span) -> Result { - let (utf8, lineterm, flags) = - (self.trans().utf8, self.trans().line_terminator, self.flags()); - if utf8 && (!flags.unicode() || !lineterm.is_ascii()) { - return Err(self.error(span, ErrorKind::InvalidUtf8)); - } - let dot = if flags.dot_matches_new_line() { - if flags.unicode() { - hir::Dot::AnyChar - } else { - hir::Dot::AnyByte - } - } else { - if flags.unicode() { - if flags.crlf() { - hir::Dot::AnyCharExceptCRLF - } else { - if !lineterm.is_ascii() { - return Err( - self.error(span, ErrorKind::InvalidLineTerminator) - ); - } - hir::Dot::AnyCharExcept(char::from(lineterm)) - } - } else { - if flags.crlf() { - hir::Dot::AnyByteExceptCRLF - } else { - hir::Dot::AnyByteExcept(lineterm) - } - } - }; - Ok(Hir::dot(dot)) - } - - fn hir_assertion(&self, asst: &ast::Assertion) -> Result { - let unicode = self.flags().unicode(); - let multi_line = self.flags().multi_line(); - let crlf = self.flags().crlf(); - Ok(match asst.kind { - ast::AssertionKind::StartLine => Hir::look(if multi_line { - if crlf { - hir::Look::StartCRLF - } else { - hir::Look::StartLF - } - } else { - hir::Look::Start - }), - ast::AssertionKind::EndLine => Hir::look(if multi_line { - if crlf { - hir::Look::EndCRLF - } else { - hir::Look::EndLF - } - } else { - hir::Look::End - }), - ast::AssertionKind::StartText => Hir::look(hir::Look::Start), - ast::AssertionKind::EndText => Hir::look(hir::Look::End), - ast::AssertionKind::WordBoundary => Hir::look(if unicode { - hir::Look::WordUnicode - } else { - hir::Look::WordAscii - }), - ast::AssertionKind::NotWordBoundary => Hir::look(if unicode { - hir::Look::WordUnicodeNegate - } else { - hir::Look::WordAsciiNegate - }), - ast::AssertionKind::WordBoundaryStart - | ast::AssertionKind::WordBoundaryStartAngle => { - Hir::look(if unicode { - hir::Look::WordStartUnicode - } else { - hir::Look::WordStartAscii - }) - } - ast::AssertionKind::WordBoundaryEnd - | ast::AssertionKind::WordBoundaryEndAngle => { - Hir::look(if unicode { - hir::Look::WordEndUnicode - } else { - hir::Look::WordEndAscii - }) - } - ast::AssertionKind::WordBoundaryStartHalf => { - Hir::look(if unicode { - hir::Look::WordStartHalfUnicode - } else { - hir::Look::WordStartHalfAscii - }) - } - ast::AssertionKind::WordBoundaryEndHalf => Hir::look(if unicode { - hir::Look::WordEndHalfUnicode - } else { - hir::Look::WordEndHalfAscii - }), - }) - } - - fn hir_capture(&self, group: &ast::Group, expr: Hir) -> Hir { - let (index, name) = match group.kind { - ast::GroupKind::CaptureIndex(index) => (index, None), - ast::GroupKind::CaptureName { ref name, .. } => { - (name.index, Some(name.name.clone().into_boxed_str())) - } - // The HIR doesn't need to use non-capturing groups, since the way - // in which the data type is defined handles this automatically. - ast::GroupKind::NonCapturing(_) => return expr, - }; - Hir::capture(hir::Capture { index, name, sub: Box::new(expr) }) - } - - fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir { - let (min, max) = match rep.op.kind { - ast::RepetitionKind::ZeroOrOne => (0, Some(1)), - ast::RepetitionKind::ZeroOrMore => (0, None), - ast::RepetitionKind::OneOrMore => (1, None), - ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => { - (m, Some(m)) - } - ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => { - (m, None) - } - ast::RepetitionKind::Range(ast::RepetitionRange::Bounded( - m, - n, - )) => (m, Some(n)), - }; - let greedy = - if self.flags().swap_greed() { !rep.greedy } else { rep.greedy }; - Hir::repetition(hir::Repetition { - min, - max, - greedy, - sub: Box::new(expr), - }) - } - - fn hir_unicode_class( - &self, - ast_class: &ast::ClassUnicode, - ) -> Result { - use crate::ast::ClassUnicodeKind::*; - - if !self.flags().unicode() { - return Err( - self.error(ast_class.span, ErrorKind::UnicodeNotAllowed) - ); - } - let query = match ast_class.kind { - OneLetter(name) => ClassQuery::OneLetter(name), - Named(ref name) => ClassQuery::Binary(name), - NamedValue { ref name, ref value, .. } => ClassQuery::ByValue { - property_name: name, - property_value: value, - }, - }; - let mut result = self.convert_unicode_class_error( - &ast_class.span, - unicode::class(query), - ); - if let Ok(ref mut class) = result { - self.unicode_fold_and_negate( - &ast_class.span, - ast_class.negated, - class, - )?; - } - result - } - - fn hir_ascii_unicode_class( - &self, - ast: &ast::ClassAscii, - ) -> Result { - let mut cls = hir::ClassUnicode::new( - ascii_class_as_chars(&ast.kind) - .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)), - ); - self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?; - Ok(cls) - } - - fn hir_ascii_byte_class( - &self, - ast: &ast::ClassAscii, - ) -> Result { - let mut cls = hir::ClassBytes::new( - ascii_class(&ast.kind) - .map(|(s, e)| hir::ClassBytesRange::new(s, e)), - ); - self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?; - Ok(cls) - } - - fn hir_perl_unicode_class( - &self, - ast_class: &ast::ClassPerl, - ) -> Result { - use crate::ast::ClassPerlKind::*; - - assert!(self.flags().unicode()); - let result = match ast_class.kind { - Digit => unicode::perl_digit(), - Space => unicode::perl_space(), - Word => unicode::perl_word(), - }; - let mut class = - self.convert_unicode_class_error(&ast_class.span, result)?; - // We needn't apply case folding here because the Perl Unicode classes - // are already closed under Unicode simple case folding. - if ast_class.negated { - class.negate(); - } - Ok(class) - } - - fn hir_perl_byte_class( - &self, - ast_class: &ast::ClassPerl, - ) -> Result { - use crate::ast::ClassPerlKind::*; - - assert!(!self.flags().unicode()); - let mut class = match ast_class.kind { - Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit), - Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space), - Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word), - }; - // We needn't apply case folding here because the Perl ASCII classes - // are already closed (under ASCII case folding). - if ast_class.negated { - class.negate(); - } - // Negating a Perl byte class is likely to cause it to match invalid - // UTF-8. That's only OK if the translator is configured to allow such - // things. - if self.trans().utf8 && !class.is_ascii() { - return Err(self.error(ast_class.span, ErrorKind::InvalidUtf8)); - } - Ok(class) - } - - /// Converts the given Unicode specific error to an HIR translation error. - /// - /// The span given should approximate the position at which an error would - /// occur. - fn convert_unicode_class_error( - &self, - span: &Span, - result: core::result::Result, - ) -> Result { - result.map_err(|err| { - let sp = span.clone(); - match err { - unicode::Error::PropertyNotFound => { - self.error(sp, ErrorKind::UnicodePropertyNotFound) - } - unicode::Error::PropertyValueNotFound => { - self.error(sp, ErrorKind::UnicodePropertyValueNotFound) - } - unicode::Error::PerlClassNotFound => { - self.error(sp, ErrorKind::UnicodePerlClassNotFound) - } - } - }) - } - - fn unicode_fold_and_negate( - &self, - span: &Span, - negated: bool, - class: &mut hir::ClassUnicode, - ) -> Result<()> { - // Note that we must apply case folding before negation! - // Consider `(?i)[^x]`. If we applied negation first, then - // the result would be the character class that matched any - // Unicode scalar value. - if self.flags().case_insensitive() { - class.try_case_fold_simple().map_err(|_| { - self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable) - })?; - } - if negated { - class.negate(); - } - Ok(()) - } - - fn bytes_fold_and_negate( - &self, - span: &Span, - negated: bool, - class: &mut hir::ClassBytes, - ) -> Result<()> { - // Note that we must apply case folding before negation! - // Consider `(?i)[^x]`. If we applied negation first, then - // the result would be the character class that matched any - // Unicode scalar value. - if self.flags().case_insensitive() { - class.case_fold_simple(); - } - if negated { - class.negate(); - } - if self.trans().utf8 && !class.is_ascii() { - return Err(self.error(span.clone(), ErrorKind::InvalidUtf8)); - } - Ok(()) - } - - /// Return a scalar byte value suitable for use as a literal in a byte - /// character class. - fn class_literal_byte(&self, ast: &ast::Literal) -> Result { - match self.ast_literal_to_scalar(ast)? { - Either::Right(byte) => Ok(byte), - Either::Left(ch) => { - if ch.is_ascii() { - Ok(u8::try_from(ch).unwrap()) - } else { - // We can't feasibly support Unicode in - // byte oriented classes. Byte classes don't - // do Unicode case folding. - Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed)) - } - } - } - } -} - -/// A translator's representation of a regular expression's flags at any given -/// moment in time. -/// -/// Each flag can be in one of three states: absent, present but disabled or -/// present but enabled. -#[derive(Clone, Copy, Debug, Default)] -struct Flags { - case_insensitive: Option, - multi_line: Option, - dot_matches_new_line: Option, - swap_greed: Option, - unicode: Option, - crlf: Option, - // Note that `ignore_whitespace` is omitted here because it is handled - // entirely in the parser. -} - -impl Flags { - fn from_ast(ast: &ast::Flags) -> Flags { - let mut flags = Flags::default(); - let mut enable = true; - for item in &ast.items { - match item.kind { - ast::FlagsItemKind::Negation => { - enable = false; - } - ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => { - flags.case_insensitive = Some(enable); - } - ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => { - flags.multi_line = Some(enable); - } - ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => { - flags.dot_matches_new_line = Some(enable); - } - ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => { - flags.swap_greed = Some(enable); - } - ast::FlagsItemKind::Flag(ast::Flag::Unicode) => { - flags.unicode = Some(enable); - } - ast::FlagsItemKind::Flag(ast::Flag::CRLF) => { - flags.crlf = Some(enable); - } - ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {} - } - } - flags - } - - fn merge(&mut self, previous: &Flags) { - if self.case_insensitive.is_none() { - self.case_insensitive = previous.case_insensitive; - } - if self.multi_line.is_none() { - self.multi_line = previous.multi_line; - } - if self.dot_matches_new_line.is_none() { - self.dot_matches_new_line = previous.dot_matches_new_line; - } - if self.swap_greed.is_none() { - self.swap_greed = previous.swap_greed; - } - if self.unicode.is_none() { - self.unicode = previous.unicode; - } - if self.crlf.is_none() { - self.crlf = previous.crlf; - } - } - - fn case_insensitive(&self) -> bool { - self.case_insensitive.unwrap_or(false) - } - - fn multi_line(&self) -> bool { - self.multi_line.unwrap_or(false) - } - - fn dot_matches_new_line(&self) -> bool { - self.dot_matches_new_line.unwrap_or(false) - } - - fn swap_greed(&self) -> bool { - self.swap_greed.unwrap_or(false) - } - - fn unicode(&self) -> bool { - self.unicode.unwrap_or(true) - } - - fn crlf(&self) -> bool { - self.crlf.unwrap_or(false) - } -} - -fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes { - let ranges: Vec<_> = ascii_class(kind) - .map(|(s, e)| hir::ClassBytesRange::new(s, e)) - .collect(); - hir::ClassBytes::new(ranges) -} - -fn ascii_class(kind: &ast::ClassAsciiKind) -> impl Iterator { - use crate::ast::ClassAsciiKind::*; - - let slice: &'static [(u8, u8)] = match *kind { - Alnum => &[(b'0', b'9'), (b'A', b'Z'), (b'a', b'z')], - Alpha => &[(b'A', b'Z'), (b'a', b'z')], - Ascii => &[(b'\x00', b'\x7F')], - Blank => &[(b'\t', b'\t'), (b' ', b' ')], - Cntrl => &[(b'\x00', b'\x1F'), (b'\x7F', b'\x7F')], - Digit => &[(b'0', b'9')], - Graph => &[(b'!', b'~')], - Lower => &[(b'a', b'z')], - Print => &[(b' ', b'~')], - Punct => &[(b'!', b'/'), (b':', b'@'), (b'[', b'`'), (b'{', b'~')], - Space => &[ - (b'\t', b'\t'), - (b'\n', b'\n'), - (b'\x0B', b'\x0B'), - (b'\x0C', b'\x0C'), - (b'\r', b'\r'), - (b' ', b' '), - ], - Upper => &[(b'A', b'Z')], - Word => &[(b'0', b'9'), (b'A', b'Z'), (b'_', b'_'), (b'a', b'z')], - Xdigit => &[(b'0', b'9'), (b'A', b'F'), (b'a', b'f')], - }; - slice.iter().copied() -} - -fn ascii_class_as_chars( - kind: &ast::ClassAsciiKind, -) -> impl Iterator { - ascii_class(kind).map(|(s, e)| (char::from(s), char::from(e))) -} - -#[cfg(test)] -mod tests { - use crate::{ - ast::{parse::ParserBuilder, Position}, - hir::{Look, Properties}, - }; - - use super::*; - - // We create these errors to compare with real hir::Errors in the tests. - // We define equality between TestError and hir::Error to disregard the - // pattern string in hir::Error, which is annoying to provide in tests. - #[derive(Clone, Debug)] - struct TestError { - span: Span, - kind: hir::ErrorKind, - } - - impl PartialEq for TestError { - fn eq(&self, other: &hir::Error) -> bool { - self.span == other.span && self.kind == other.kind - } - } - - impl PartialEq for hir::Error { - fn eq(&self, other: &TestError) -> bool { - self.span == other.span && self.kind == other.kind - } - } - - fn parse(pattern: &str) -> Ast { - ParserBuilder::new().octal(true).build().parse(pattern).unwrap() - } - - fn t(pattern: &str) -> Hir { - TranslatorBuilder::new() - .utf8(true) - .build() - .translate(pattern, &parse(pattern)) - .unwrap() - } - - fn t_err(pattern: &str) -> hir::Error { - TranslatorBuilder::new() - .utf8(true) - .build() - .translate(pattern, &parse(pattern)) - .unwrap_err() - } - - fn t_bytes(pattern: &str) -> Hir { - TranslatorBuilder::new() - .utf8(false) - .build() - .translate(pattern, &parse(pattern)) - .unwrap() - } - - fn props(pattern: &str) -> Properties { - t(pattern).properties().clone() - } - - fn props_bytes(pattern: &str) -> Properties { - t_bytes(pattern).properties().clone() - } - - fn hir_lit(s: &str) -> Hir { - hir_blit(s.as_bytes()) - } - - fn hir_blit(s: &[u8]) -> Hir { - Hir::literal(s) - } - - fn hir_capture(index: u32, expr: Hir) -> Hir { - Hir::capture(hir::Capture { index, name: None, sub: Box::new(expr) }) - } - - fn hir_capture_name(index: u32, name: &str, expr: Hir) -> Hir { - Hir::capture(hir::Capture { - index, - name: Some(name.into()), - sub: Box::new(expr), - }) - } - - fn hir_quest(greedy: bool, expr: Hir) -> Hir { - Hir::repetition(hir::Repetition { - min: 0, - max: Some(1), - greedy, - sub: Box::new(expr), - }) - } - - fn hir_star(greedy: bool, expr: Hir) -> Hir { - Hir::repetition(hir::Repetition { - min: 0, - max: None, - greedy, - sub: Box::new(expr), - }) - } - - fn hir_plus(greedy: bool, expr: Hir) -> Hir { - Hir::repetition(hir::Repetition { - min: 1, - max: None, - greedy, - sub: Box::new(expr), - }) - } - - fn hir_range(greedy: bool, min: u32, max: Option, expr: Hir) -> Hir { - Hir::repetition(hir::Repetition { - min, - max, - greedy, - sub: Box::new(expr), - }) - } - - fn hir_alt(alts: Vec) -> Hir { - Hir::alternation(alts) - } - - fn hir_cat(exprs: Vec) -> Hir { - Hir::concat(exprs) - } - - #[allow(dead_code)] - fn hir_uclass_query(query: ClassQuery<'_>) -> Hir { - Hir::class(hir::Class::Unicode(unicode::class(query).unwrap())) - } - - #[allow(dead_code)] - fn hir_uclass_perl_word() -> Hir { - Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap())) - } - - fn hir_ascii_uclass(kind: &ast::ClassAsciiKind) -> Hir { - Hir::class(hir::Class::Unicode(hir::ClassUnicode::new( - ascii_class_as_chars(kind) - .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)), - ))) - } - - fn hir_ascii_bclass(kind: &ast::ClassAsciiKind) -> Hir { - Hir::class(hir::Class::Bytes(hir::ClassBytes::new( - ascii_class(kind).map(|(s, e)| hir::ClassBytesRange::new(s, e)), - ))) - } - - fn hir_uclass(ranges: &[(char, char)]) -> Hir { - Hir::class(uclass(ranges)) - } - - fn hir_bclass(ranges: &[(u8, u8)]) -> Hir { - Hir::class(bclass(ranges)) - } - - fn hir_case_fold(expr: Hir) -> Hir { - match expr.into_kind() { - HirKind::Class(mut cls) => { - cls.case_fold_simple(); - Hir::class(cls) - } - _ => panic!("cannot case fold non-class Hir expr"), - } - } - - fn hir_negate(expr: Hir) -> Hir { - match expr.into_kind() { - HirKind::Class(mut cls) => { - cls.negate(); - Hir::class(cls) - } - _ => panic!("cannot negate non-class Hir expr"), - } - } - - fn uclass(ranges: &[(char, char)]) -> hir::Class { - let ranges: Vec = ranges - .iter() - .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)) - .collect(); - hir::Class::Unicode(hir::ClassUnicode::new(ranges)) - } - - fn bclass(ranges: &[(u8, u8)]) -> hir::Class { - let ranges: Vec = ranges - .iter() - .map(|&(s, e)| hir::ClassBytesRange::new(s, e)) - .collect(); - hir::Class::Bytes(hir::ClassBytes::new(ranges)) - } - - #[cfg(feature = "unicode-case")] - fn class_case_fold(mut cls: hir::Class) -> Hir { - cls.case_fold_simple(); - Hir::class(cls) - } - - fn class_negate(mut cls: hir::Class) -> Hir { - cls.negate(); - Hir::class(cls) - } - - #[allow(dead_code)] - fn hir_union(expr1: Hir, expr2: Hir) -> Hir { - use crate::hir::Class::{Bytes, Unicode}; - - match (expr1.into_kind(), expr2.into_kind()) { - (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => { - c1.union(&c2); - Hir::class(hir::Class::Unicode(c1)) - } - (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => { - c1.union(&c2); - Hir::class(hir::Class::Bytes(c1)) - } - _ => panic!("cannot union non-class Hir exprs"), - } - } - - #[allow(dead_code)] - fn hir_difference(expr1: Hir, expr2: Hir) -> Hir { - use crate::hir::Class::{Bytes, Unicode}; - - match (expr1.into_kind(), expr2.into_kind()) { - (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => { - c1.difference(&c2); - Hir::class(hir::Class::Unicode(c1)) - } - (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => { - c1.difference(&c2); - Hir::class(hir::Class::Bytes(c1)) - } - _ => panic!("cannot difference non-class Hir exprs"), - } - } - - fn hir_look(look: hir::Look) -> Hir { - Hir::look(look) - } - - #[test] - fn empty() { - assert_eq!(t(""), Hir::empty()); - assert_eq!(t("(?i)"), Hir::empty()); - assert_eq!(t("()"), hir_capture(1, Hir::empty())); - assert_eq!(t("(?:)"), Hir::empty()); - assert_eq!(t("(?P)"), hir_capture_name(1, "wat", Hir::empty())); - assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()])); - assert_eq!( - t("()|()"), - hir_alt(vec![ - hir_capture(1, Hir::empty()), - hir_capture(2, Hir::empty()), - ]) - ); - assert_eq!( - t("(|b)"), - hir_capture(1, hir_alt(vec![Hir::empty(), hir_lit("b"),])) - ); - assert_eq!( - t("(a|)"), - hir_capture(1, hir_alt(vec![hir_lit("a"), Hir::empty(),])) - ); - assert_eq!( - t("(a||c)"), - hir_capture( - 1, - hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),]) - ) - ); - assert_eq!( - t("(||)"), - hir_capture( - 1, - hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),]) - ) - ); - } - - #[test] - fn literal() { - assert_eq!(t("a"), hir_lit("a")); - assert_eq!(t("(?-u)a"), hir_lit("a")); - assert_eq!(t("☃"), hir_lit("☃")); - assert_eq!(t("abcd"), hir_lit("abcd")); - - assert_eq!(t_bytes("(?-u)a"), hir_lit("a")); - assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a")); - assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a")); - assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF")); - - assert_eq!(t("(?-u)☃"), hir_lit("☃")); - assert_eq!( - t_err(r"(?-u)\xFF"), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(5, 1, 6), - Position::new(9, 1, 10) - ), - } - ); - } - - #[test] - fn literal_case_insensitive() { - #[cfg(feature = "unicode-case")] - assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),])); - #[cfg(feature = "unicode-case")] - assert_eq!(t("(?i:a)"), hir_uclass(&[('A', 'A'), ('a', 'a')])); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("a(?i)a(?-i)a"), - hir_cat(vec![ - hir_lit("a"), - hir_uclass(&[('A', 'A'), ('a', 'a')]), - hir_lit("a"), - ]) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i)ab@c"), - hir_cat(vec![ - hir_uclass(&[('A', 'A'), ('a', 'a')]), - hir_uclass(&[('B', 'B'), ('b', 'b')]), - hir_lit("@"), - hir_uclass(&[('C', 'C'), ('c', 'c')]), - ]) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i)β"), - hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),]) - ); - - assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?-u)a(?i)a(?-i)a"), - hir_cat(vec![ - hir_lit("a"), - hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), - hir_lit("a"), - ]) - ); - assert_eq!( - t("(?i-u)ab@c"), - hir_cat(vec![ - hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), - hir_bclass(&[(b'B', b'B'), (b'b', b'b')]), - hir_lit("@"), - hir_bclass(&[(b'C', b'C'), (b'c', b'c')]), - ]) - ); - - assert_eq!( - t_bytes("(?i-u)a"), - hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]) - ); - assert_eq!( - t_bytes("(?i-u)\x61"), - hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]) - ); - assert_eq!( - t_bytes(r"(?i-u)\x61"), - hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]) - ); - assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF")); - - assert_eq!(t("(?i-u)β"), hir_lit("β"),); - } - - #[test] - fn dot() { - assert_eq!( - t("."), - hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}')]) - ); - assert_eq!( - t("(?R)."), - hir_uclass(&[ - ('\0', '\t'), - ('\x0B', '\x0C'), - ('\x0E', '\u{10FFFF}'), - ]) - ); - assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}')])); - assert_eq!(t("(?Rs)."), hir_uclass(&[('\0', '\u{10FFFF}')])); - assert_eq!( - t_bytes("(?-u)."), - hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF')]) - ); - assert_eq!( - t_bytes("(?R-u)."), - hir_bclass(&[ - (b'\0', b'\t'), - (b'\x0B', b'\x0C'), - (b'\x0E', b'\xFF'), - ]) - ); - assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),])); - assert_eq!(t_bytes("(?Rs-u)."), hir_bclass(&[(b'\0', b'\xFF'),])); - - // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed. - assert_eq!( - t_err("(?-u)."), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(5, 1, 6), - Position::new(6, 1, 7) - ), - } - ); - assert_eq!( - t_err("(?R-u)."), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(6, 1, 7), - Position::new(7, 1, 8) - ), - } - ); - assert_eq!( - t_err("(?s-u)."), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(6, 1, 7), - Position::new(7, 1, 8) - ), - } - ); - assert_eq!( - t_err("(?Rs-u)."), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(7, 1, 8), - Position::new(8, 1, 9) - ), - } - ); - } - - #[test] - fn assertions() { - assert_eq!(t("^"), hir_look(hir::Look::Start)); - assert_eq!(t("$"), hir_look(hir::Look::End)); - assert_eq!(t(r"\A"), hir_look(hir::Look::Start)); - assert_eq!(t(r"\z"), hir_look(hir::Look::End)); - assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF)); - assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF)); - assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start)); - assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End)); - - assert_eq!(t(r"\b"), hir_look(hir::Look::WordUnicode)); - assert_eq!(t(r"\B"), hir_look(hir::Look::WordUnicodeNegate)); - assert_eq!(t(r"(?-u)\b"), hir_look(hir::Look::WordAscii)); - assert_eq!(t(r"(?-u)\B"), hir_look(hir::Look::WordAsciiNegate)); - } - - #[test] - fn group() { - assert_eq!(t("(a)"), hir_capture(1, hir_lit("a"))); - assert_eq!( - t("(a)(b)"), - hir_cat(vec![ - hir_capture(1, hir_lit("a")), - hir_capture(2, hir_lit("b")), - ]) - ); - assert_eq!( - t("(a)|(b)"), - hir_alt(vec![ - hir_capture(1, hir_lit("a")), - hir_capture(2, hir_lit("b")), - ]) - ); - assert_eq!(t("(?P)"), hir_capture_name(1, "foo", Hir::empty())); - assert_eq!(t("(?Pa)"), hir_capture_name(1, "foo", hir_lit("a"))); - assert_eq!( - t("(?Pa)(?Pb)"), - hir_cat(vec![ - hir_capture_name(1, "foo", hir_lit("a")), - hir_capture_name(2, "bar", hir_lit("b")), - ]) - ); - assert_eq!(t("(?:)"), Hir::empty()); - assert_eq!(t("(?:a)"), hir_lit("a")); - assert_eq!( - t("(?:a)(b)"), - hir_cat(vec![hir_lit("a"), hir_capture(1, hir_lit("b")),]) - ); - assert_eq!( - t("(a)(?:b)(c)"), - hir_cat(vec![ - hir_capture(1, hir_lit("a")), - hir_lit("b"), - hir_capture(2, hir_lit("c")), - ]) - ); - assert_eq!( - t("(a)(?Pb)(c)"), - hir_cat(vec![ - hir_capture(1, hir_lit("a")), - hir_capture_name(2, "foo", hir_lit("b")), - hir_capture(3, hir_lit("c")), - ]) - ); - assert_eq!(t("()"), hir_capture(1, Hir::empty())); - assert_eq!(t("((?i))"), hir_capture(1, Hir::empty())); - assert_eq!(t("((?x))"), hir_capture(1, Hir::empty())); - assert_eq!( - t("(((?x)))"), - hir_capture(1, hir_capture(2, Hir::empty())) - ); - } - - #[test] - fn line_anchors() { - assert_eq!(t("^"), hir_look(hir::Look::Start)); - assert_eq!(t("$"), hir_look(hir::Look::End)); - assert_eq!(t(r"\A"), hir_look(hir::Look::Start)); - assert_eq!(t(r"\z"), hir_look(hir::Look::End)); - - assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start)); - assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End)); - assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF)); - assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF)); - - assert_eq!(t(r"(?R)\A"), hir_look(hir::Look::Start)); - assert_eq!(t(r"(?R)\z"), hir_look(hir::Look::End)); - assert_eq!(t("(?R)^"), hir_look(hir::Look::Start)); - assert_eq!(t("(?R)$"), hir_look(hir::Look::End)); - - assert_eq!(t(r"(?Rm)\A"), hir_look(hir::Look::Start)); - assert_eq!(t(r"(?Rm)\z"), hir_look(hir::Look::End)); - assert_eq!(t("(?Rm)^"), hir_look(hir::Look::StartCRLF)); - assert_eq!(t("(?Rm)$"), hir_look(hir::Look::EndCRLF)); - } - - #[test] - fn flags() { - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i:a)a"), - hir_cat( - vec![hir_uclass(&[('A', 'A'), ('a', 'a')]), hir_lit("a"),] - ) - ); - assert_eq!( - t("(?i-u:a)β"), - hir_cat(vec![ - hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), - hir_lit("β"), - ]) - ); - assert_eq!( - t("(?:(?i-u)a)b"), - hir_cat(vec![ - hir_bclass(&[(b'A', b'A'), (b'a', b'a')]), - hir_lit("b"), - ]) - ); - assert_eq!( - t("((?i-u)a)b"), - hir_cat(vec![ - hir_capture(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])), - hir_lit("b"), - ]) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i)(?-i:a)a"), - hir_cat( - vec![hir_lit("a"), hir_uclass(&[('A', 'A'), ('a', 'a')]),] - ) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?im)a^"), - hir_cat(vec![ - hir_uclass(&[('A', 'A'), ('a', 'a')]), - hir_look(hir::Look::StartLF), - ]) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?im)a^(?i-m)a^"), - hir_cat(vec![ - hir_uclass(&[('A', 'A'), ('a', 'a')]), - hir_look(hir::Look::StartLF), - hir_uclass(&[('A', 'A'), ('a', 'a')]), - hir_look(hir::Look::Start), - ]) - ); - assert_eq!( - t("(?U)a*a*?(?-U)a*a*?"), - hir_cat(vec![ - hir_star(false, hir_lit("a")), - hir_star(true, hir_lit("a")), - hir_star(true, hir_lit("a")), - hir_star(false, hir_lit("a")), - ]) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?:a(?i)a)a"), - hir_cat(vec![ - hir_cat(vec![ - hir_lit("a"), - hir_uclass(&[('A', 'A'), ('a', 'a')]), - ]), - hir_lit("a"), - ]) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i)(?:a(?-i)a)a"), - hir_cat(vec![ - hir_cat(vec![ - hir_uclass(&[('A', 'A'), ('a', 'a')]), - hir_lit("a"), - ]), - hir_uclass(&[('A', 'A'), ('a', 'a')]), - ]) - ); - } - - #[test] - fn escape() { - assert_eq!( - t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), - hir_lit(r"\.+*?()|[]{}^$#") - ); - } - - #[test] - fn repetition() { - assert_eq!(t("a?"), hir_quest(true, hir_lit("a"))); - assert_eq!(t("a*"), hir_star(true, hir_lit("a"))); - assert_eq!(t("a+"), hir_plus(true, hir_lit("a"))); - assert_eq!(t("a??"), hir_quest(false, hir_lit("a"))); - assert_eq!(t("a*?"), hir_star(false, hir_lit("a"))); - assert_eq!(t("a+?"), hir_plus(false, hir_lit("a"))); - - assert_eq!(t("a{1}"), hir_range(true, 1, Some(1), hir_lit("a"),)); - assert_eq!(t("a{1,}"), hir_range(true, 1, None, hir_lit("a"),)); - assert_eq!(t("a{1,2}"), hir_range(true, 1, Some(2), hir_lit("a"),)); - assert_eq!(t("a{1}?"), hir_range(false, 1, Some(1), hir_lit("a"),)); - assert_eq!(t("a{1,}?"), hir_range(false, 1, None, hir_lit("a"),)); - assert_eq!(t("a{1,2}?"), hir_range(false, 1, Some(2), hir_lit("a"),)); - - assert_eq!( - t("ab?"), - hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),]) - ); - assert_eq!(t("(ab)?"), hir_quest(true, hir_capture(1, hir_lit("ab")))); - assert_eq!( - t("a|b?"), - hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),]) - ); - } - - #[test] - fn cat_alt() { - let a = || hir_look(hir::Look::Start); - let b = || hir_look(hir::Look::End); - let c = || hir_look(hir::Look::WordUnicode); - let d = || hir_look(hir::Look::WordUnicodeNegate); - - assert_eq!(t("(^$)"), hir_capture(1, hir_cat(vec![a(), b()]))); - assert_eq!(t("^|$"), hir_alt(vec![a(), b()])); - assert_eq!(t(r"^|$|\b"), hir_alt(vec![a(), b(), c()])); - assert_eq!( - t(r"^$|$\b|\b\B"), - hir_alt(vec![ - hir_cat(vec![a(), b()]), - hir_cat(vec![b(), c()]), - hir_cat(vec![c(), d()]), - ]) - ); - assert_eq!(t("(^|$)"), hir_capture(1, hir_alt(vec![a(), b()]))); - assert_eq!( - t(r"(^|$|\b)"), - hir_capture(1, hir_alt(vec![a(), b(), c()])) - ); - assert_eq!( - t(r"(^$|$\b|\b\B)"), - hir_capture( - 1, - hir_alt(vec![ - hir_cat(vec![a(), b()]), - hir_cat(vec![b(), c()]), - hir_cat(vec![c(), d()]), - ]) - ) - ); - assert_eq!( - t(r"(^$|($\b|(\b\B)))"), - hir_capture( - 1, - hir_alt(vec![ - hir_cat(vec![a(), b()]), - hir_capture( - 2, - hir_alt(vec![ - hir_cat(vec![b(), c()]), - hir_capture(3, hir_cat(vec![c(), d()])), - ]) - ), - ]) - ) - ); - } - - // Tests the HIR transformation of things like '[a-z]|[A-Z]' into - // '[A-Za-z]'. In other words, an alternation of just classes is always - // equivalent to a single class corresponding to the union of the branches - // in that class. (Unless some branches match invalid UTF-8 and others - // match non-ASCII Unicode.) - #[test] - fn cat_class_flattened() { - assert_eq!(t(r"[a-z]|[A-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')])); - // Combining all of the letter properties should give us the one giant - // letter property. - #[cfg(feature = "unicode-gencat")] - assert_eq!( - t(r"(?x) - \p{Lowercase_Letter} - |\p{Uppercase_Letter} - |\p{Titlecase_Letter} - |\p{Modifier_Letter} - |\p{Other_Letter} - "), - hir_uclass_query(ClassQuery::Binary("letter")) - ); - // Byte classes that can truly match invalid UTF-8 cannot be combined - // with Unicode classes. - assert_eq!( - t_bytes(r"[Δδ]|(?-u:[\x90-\xFF])|[Λλ]"), - hir_alt(vec![ - hir_uclass(&[('Δ', 'Δ'), ('δ', 'δ')]), - hir_bclass(&[(b'\x90', b'\xFF')]), - hir_uclass(&[('Λ', 'Λ'), ('λ', 'λ')]), - ]) - ); - // Byte classes on their own can be combined, even if some are ASCII - // and others are invalid UTF-8. - assert_eq!( - t_bytes(r"[a-z]|(?-u:[\x90-\xFF])|[A-Z]"), - hir_bclass(&[(b'A', b'Z'), (b'a', b'z'), (b'\x90', b'\xFF')]), - ); - } - - #[test] - fn class_ascii() { - assert_eq!( - t("[[:alnum:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Alnum) - ); - assert_eq!( - t("[[:alpha:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Alpha) - ); - assert_eq!( - t("[[:ascii:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Ascii) - ); - assert_eq!( - t("[[:blank:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Blank) - ); - assert_eq!( - t("[[:cntrl:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Cntrl) - ); - assert_eq!( - t("[[:digit:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Digit) - ); - assert_eq!( - t("[[:graph:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Graph) - ); - assert_eq!( - t("[[:lower:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Lower) - ); - assert_eq!( - t("[[:print:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Print) - ); - assert_eq!( - t("[[:punct:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Punct) - ); - assert_eq!( - t("[[:space:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Space) - ); - assert_eq!( - t("[[:upper:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Upper) - ); - assert_eq!( - t("[[:word:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Word) - ); - assert_eq!( - t("[[:xdigit:]]"), - hir_ascii_uclass(&ast::ClassAsciiKind::Xdigit) - ); - - assert_eq!( - t("[[:^lower:]]"), - hir_negate(hir_ascii_uclass(&ast::ClassAsciiKind::Lower)) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i)[[:lower:]]"), - hir_uclass(&[ - ('A', 'Z'), - ('a', 'z'), - ('\u{17F}', '\u{17F}'), - ('\u{212A}', '\u{212A}'), - ]) - ); - - assert_eq!( - t("(?-u)[[:lower:]]"), - hir_ascii_bclass(&ast::ClassAsciiKind::Lower) - ); - assert_eq!( - t("(?i-u)[[:lower:]]"), - hir_case_fold(hir_ascii_bclass(&ast::ClassAsciiKind::Lower)) - ); - - assert_eq!( - t_err("(?-u)[[:^lower:]]"), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(6, 1, 7), - Position::new(16, 1, 17) - ), - } - ); - assert_eq!( - t_err("(?i-u)[[:^lower:]]"), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(7, 1, 8), - Position::new(17, 1, 18) - ), - } - ); - } - - #[test] - fn class_ascii_multiple() { - // See: https://github.com/rust-lang/regex/issues/680 - assert_eq!( - t("[[:alnum:][:^ascii:]]"), - hir_union( - hir_ascii_uclass(&ast::ClassAsciiKind::Alnum), - hir_uclass(&[('\u{80}', '\u{10FFFF}')]), - ), - ); - assert_eq!( - t_bytes("(?-u)[[:alnum:][:^ascii:]]"), - hir_union( - hir_ascii_bclass(&ast::ClassAsciiKind::Alnum), - hir_bclass(&[(0x80, 0xFF)]), - ), - ); - } - - #[test] - #[cfg(feature = "unicode-perl")] - fn class_perl_unicode() { - // Unicode - assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit"))); - assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space"))); - assert_eq!(t(r"\w"), hir_uclass_perl_word()); - #[cfg(feature = "unicode-case")] - assert_eq!( - t(r"(?i)\d"), - hir_uclass_query(ClassQuery::Binary("digit")) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t(r"(?i)\s"), - hir_uclass_query(ClassQuery::Binary("space")) - ); - #[cfg(feature = "unicode-case")] - assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word()); - - // Unicode, negated - assert_eq!( - t(r"\D"), - hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) - ); - assert_eq!( - t(r"\S"), - hir_negate(hir_uclass_query(ClassQuery::Binary("space"))) - ); - assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word())); - #[cfg(feature = "unicode-case")] - assert_eq!( - t(r"(?i)\D"), - hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t(r"(?i)\S"), - hir_negate(hir_uclass_query(ClassQuery::Binary("space"))) - ); - #[cfg(feature = "unicode-case")] - assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word())); - } - - #[test] - fn class_perl_ascii() { - // ASCII only - assert_eq!( - t(r"(?-u)\d"), - hir_ascii_bclass(&ast::ClassAsciiKind::Digit) - ); - assert_eq!( - t(r"(?-u)\s"), - hir_ascii_bclass(&ast::ClassAsciiKind::Space) - ); - assert_eq!( - t(r"(?-u)\w"), - hir_ascii_bclass(&ast::ClassAsciiKind::Word) - ); - assert_eq!( - t(r"(?i-u)\d"), - hir_ascii_bclass(&ast::ClassAsciiKind::Digit) - ); - assert_eq!( - t(r"(?i-u)\s"), - hir_ascii_bclass(&ast::ClassAsciiKind::Space) - ); - assert_eq!( - t(r"(?i-u)\w"), - hir_ascii_bclass(&ast::ClassAsciiKind::Word) - ); - - // ASCII only, negated - assert_eq!( - t_bytes(r"(?-u)\D"), - hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit)) - ); - assert_eq!( - t_bytes(r"(?-u)\S"), - hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space)) - ); - assert_eq!( - t_bytes(r"(?-u)\W"), - hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word)) - ); - assert_eq!( - t_bytes(r"(?i-u)\D"), - hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit)) - ); - assert_eq!( - t_bytes(r"(?i-u)\S"), - hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space)) - ); - assert_eq!( - t_bytes(r"(?i-u)\W"), - hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word)) - ); - - // ASCII only, negated, with UTF-8 mode enabled. - // In this case, negating any Perl class results in an error because - // all such classes can match invalid UTF-8. - assert_eq!( - t_err(r"(?-u)\D"), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(5, 1, 6), - Position::new(7, 1, 8), - ), - }, - ); - assert_eq!( - t_err(r"(?-u)\S"), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(5, 1, 6), - Position::new(7, 1, 8), - ), - }, - ); - assert_eq!( - t_err(r"(?-u)\W"), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(5, 1, 6), - Position::new(7, 1, 8), - ), - }, - ); - assert_eq!( - t_err(r"(?i-u)\D"), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(6, 1, 7), - Position::new(8, 1, 9), - ), - }, - ); - assert_eq!( - t_err(r"(?i-u)\S"), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(6, 1, 7), - Position::new(8, 1, 9), - ), - }, - ); - assert_eq!( - t_err(r"(?i-u)\W"), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(6, 1, 7), - Position::new(8, 1, 9), - ), - }, - ); - } - - #[test] - #[cfg(not(feature = "unicode-perl"))] - fn class_perl_word_disabled() { - assert_eq!( - t_err(r"\w"), - TestError { - kind: hir::ErrorKind::UnicodePerlClassNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(2, 1, 3) - ), - } - ); - } - - #[test] - #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))] - fn class_perl_space_disabled() { - assert_eq!( - t_err(r"\s"), - TestError { - kind: hir::ErrorKind::UnicodePerlClassNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(2, 1, 3) - ), - } - ); - } - - #[test] - #[cfg(all( - not(feature = "unicode-perl"), - not(feature = "unicode-gencat") - ))] - fn class_perl_digit_disabled() { - assert_eq!( - t_err(r"\d"), - TestError { - kind: hir::ErrorKind::UnicodePerlClassNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(2, 1, 3) - ), - } - ); - } - - #[test] - #[cfg(feature = "unicode-gencat")] - fn class_unicode_gencat() { - assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z"))); - assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z"))); - assert_eq!( - t(r"\p{Separator}"), - hir_uclass_query(ClassQuery::Binary("Z")) - ); - assert_eq!( - t(r"\p{se PaRa ToR}"), - hir_uclass_query(ClassQuery::Binary("Z")) - ); - assert_eq!( - t(r"\p{gc:Separator}"), - hir_uclass_query(ClassQuery::Binary("Z")) - ); - assert_eq!( - t(r"\p{gc=Separator}"), - hir_uclass_query(ClassQuery::Binary("Z")) - ); - assert_eq!( - t(r"\p{Other}"), - hir_uclass_query(ClassQuery::Binary("Other")) - ); - assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other"))); - - assert_eq!( - t(r"\PZ"), - hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) - ); - assert_eq!( - t(r"\P{separator}"), - hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) - ); - assert_eq!( - t(r"\P{gc!=separator}"), - hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))) - ); - - assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any"))); - assert_eq!( - t(r"\p{assigned}"), - hir_uclass_query(ClassQuery::Binary("Assigned")) - ); - assert_eq!( - t(r"\p{ascii}"), - hir_uclass_query(ClassQuery::Binary("ASCII")) - ); - assert_eq!( - t(r"\p{gc:any}"), - hir_uclass_query(ClassQuery::Binary("Any")) - ); - assert_eq!( - t(r"\p{gc:assigned}"), - hir_uclass_query(ClassQuery::Binary("Assigned")) - ); - assert_eq!( - t(r"\p{gc:ascii}"), - hir_uclass_query(ClassQuery::Binary("ASCII")) - ); - - assert_eq!( - t_err(r"(?-u)\pZ"), - TestError { - kind: hir::ErrorKind::UnicodeNotAllowed, - span: Span::new( - Position::new(5, 1, 6), - Position::new(8, 1, 9) - ), - } - ); - assert_eq!( - t_err(r"(?-u)\p{Separator}"), - TestError { - kind: hir::ErrorKind::UnicodeNotAllowed, - span: Span::new( - Position::new(5, 1, 6), - Position::new(18, 1, 19) - ), - } - ); - assert_eq!( - t_err(r"\pE"), - TestError { - kind: hir::ErrorKind::UnicodePropertyNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(3, 1, 4) - ), - } - ); - assert_eq!( - t_err(r"\p{Foo}"), - TestError { - kind: hir::ErrorKind::UnicodePropertyNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(7, 1, 8) - ), - } - ); - assert_eq!( - t_err(r"\p{gc:Foo}"), - TestError { - kind: hir::ErrorKind::UnicodePropertyValueNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(10, 1, 11) - ), - } - ); - } - - #[test] - #[cfg(not(feature = "unicode-gencat"))] - fn class_unicode_gencat_disabled() { - assert_eq!( - t_err(r"\p{Separator}"), - TestError { - kind: hir::ErrorKind::UnicodePropertyNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(13, 1, 14) - ), - } - ); - - assert_eq!( - t_err(r"\p{Any}"), - TestError { - kind: hir::ErrorKind::UnicodePropertyNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(7, 1, 8) - ), - } - ); - } - - #[test] - #[cfg(feature = "unicode-script")] - fn class_unicode_script() { - assert_eq!( - t(r"\p{Greek}"), - hir_uclass_query(ClassQuery::Binary("Greek")) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t(r"(?i)\p{Greek}"), - hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek"))) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t(r"(?i)\P{Greek}"), - hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( - "Greek" - )))) - ); - - assert_eq!( - t_err(r"\p{sc:Foo}"), - TestError { - kind: hir::ErrorKind::UnicodePropertyValueNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(10, 1, 11) - ), - } - ); - assert_eq!( - t_err(r"\p{scx:Foo}"), - TestError { - kind: hir::ErrorKind::UnicodePropertyValueNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(11, 1, 12) - ), - } - ); - } - - #[test] - #[cfg(not(feature = "unicode-script"))] - fn class_unicode_script_disabled() { - assert_eq!( - t_err(r"\p{Greek}"), - TestError { - kind: hir::ErrorKind::UnicodePropertyNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(9, 1, 10) - ), - } - ); - - assert_eq!( - t_err(r"\p{scx:Greek}"), - TestError { - kind: hir::ErrorKind::UnicodePropertyNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(13, 1, 14) - ), - } - ); - } - - #[test] - #[cfg(feature = "unicode-age")] - fn class_unicode_age() { - assert_eq!( - t_err(r"\p{age:Foo}"), - TestError { - kind: hir::ErrorKind::UnicodePropertyValueNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(11, 1, 12) - ), - } - ); - } - - #[test] - #[cfg(feature = "unicode-gencat")] - fn class_unicode_any_empty() { - assert_eq!(t(r"\P{any}"), hir_uclass(&[]),); - } - - #[test] - #[cfg(not(feature = "unicode-age"))] - fn class_unicode_age_disabled() { - assert_eq!( - t_err(r"\p{age:3.0}"), - TestError { - kind: hir::ErrorKind::UnicodePropertyNotFound, - span: Span::new( - Position::new(0, 1, 1), - Position::new(11, 1, 12) - ), - } - ); - } - - #[test] - fn class_bracketed() { - assert_eq!(t("[a]"), hir_lit("a")); - assert_eq!(t("[ab]"), hir_uclass(&[('a', 'b')])); - assert_eq!(t("[^[a]]"), class_negate(uclass(&[('a', 'a')]))); - assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')])); - assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')])); - assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')])); - assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')])); - assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')])); - assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')])); - #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))] - assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit"))); - #[cfg(feature = "unicode-gencat")] - assert_eq!( - t(r"[\pZ]"), - hir_uclass_query(ClassQuery::Binary("separator")) - ); - #[cfg(feature = "unicode-gencat")] - assert_eq!( - t(r"[\p{separator}]"), - hir_uclass_query(ClassQuery::Binary("separator")) - ); - #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))] - assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit"))); - #[cfg(feature = "unicode-gencat")] - assert_eq!( - t(r"[^\PZ]"), - hir_uclass_query(ClassQuery::Binary("separator")) - ); - #[cfg(feature = "unicode-gencat")] - assert_eq!( - t(r"[^\P{separator}]"), - hir_uclass_query(ClassQuery::Binary("separator")) - ); - #[cfg(all( - feature = "unicode-case", - any(feature = "unicode-perl", feature = "unicode-gencat") - ))] - assert_eq!( - t(r"(?i)[^\D]"), - hir_uclass_query(ClassQuery::Binary("digit")) - ); - #[cfg(all(feature = "unicode-case", feature = "unicode-script"))] - assert_eq!( - t(r"(?i)[^\P{greek}]"), - hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek"))) - ); - - assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')])); - assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')])); - assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')])); - - #[cfg(feature = "unicode-case")] - assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')])); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i)[k]"), - hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),]) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i)[β]"), - hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),]) - ); - assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),])); - - assert_eq!(t("[^a]"), class_negate(uclass(&[('a', 'a')]))); - assert_eq!(t(r"[^\x00]"), class_negate(uclass(&[('\0', '\0')]))); - assert_eq!( - t_bytes("(?-u)[^a]"), - class_negate(bclass(&[(b'a', b'a')])) - ); - #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))] - assert_eq!( - t(r"[^\d]"), - hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) - ); - #[cfg(feature = "unicode-gencat")] - assert_eq!( - t(r"[^\pZ]"), - hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))) - ); - #[cfg(feature = "unicode-gencat")] - assert_eq!( - t(r"[^\p{separator}]"), - hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))) - ); - #[cfg(all(feature = "unicode-case", feature = "unicode-script"))] - assert_eq!( - t(r"(?i)[^\p{greek}]"), - hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( - "greek" - )))) - ); - #[cfg(all(feature = "unicode-case", feature = "unicode-script"))] - assert_eq!( - t(r"(?i)[\P{greek}]"), - hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( - "greek" - )))) - ); - - // Test some weird cases. - assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')])); - - assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')])); - assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')])); - assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')])); - assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')])); - assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')])); - - assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')])); - assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')])); - assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')])); - assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')])); - assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')])); - - assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')])); - assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')])); - assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')])); - assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')])); - assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')])); - - assert_eq!( - t_err("(?-u)[^a]"), - TestError { - kind: hir::ErrorKind::InvalidUtf8, - span: Span::new( - Position::new(5, 1, 6), - Position::new(9, 1, 10) - ), - } - ); - #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))] - assert_eq!(t(r"[^\s\S]"), hir_uclass(&[]),); - #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))] - assert_eq!(t_bytes(r"(?-u)[^\s\S]"), hir_bclass(&[]),); - } - - #[test] - fn class_bracketed_union() { - assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')])); - #[cfg(feature = "unicode-gencat")] - assert_eq!( - t(r"[a\pZb]"), - hir_union( - hir_uclass(&[('a', 'b')]), - hir_uclass_query(ClassQuery::Binary("separator")) - ) - ); - #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))] - assert_eq!( - t(r"[\pZ\p{Greek}]"), - hir_union( - hir_uclass_query(ClassQuery::Binary("greek")), - hir_uclass_query(ClassQuery::Binary("separator")) - ) - ); - #[cfg(all( - feature = "unicode-age", - feature = "unicode-gencat", - feature = "unicode-script" - ))] - assert_eq!( - t(r"[\p{age:3.0}\pZ\p{Greek}]"), - hir_union( - hir_uclass_query(ClassQuery::ByValue { - property_name: "age", - property_value: "3.0", - }), - hir_union( - hir_uclass_query(ClassQuery::Binary("greek")), - hir_uclass_query(ClassQuery::Binary("separator")) - ) - ) - ); - #[cfg(all( - feature = "unicode-age", - feature = "unicode-gencat", - feature = "unicode-script" - ))] - assert_eq!( - t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"), - hir_union( - hir_uclass_query(ClassQuery::ByValue { - property_name: "age", - property_value: "3.0", - }), - hir_union( - hir_uclass_query(ClassQuery::Binary("cyrillic")), - hir_union( - hir_uclass_query(ClassQuery::Binary("greek")), - hir_uclass_query(ClassQuery::Binary("separator")) - ) - ) - ) - ); - - #[cfg(all( - feature = "unicode-age", - feature = "unicode-case", - feature = "unicode-gencat", - feature = "unicode-script" - ))] - assert_eq!( - t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"), - hir_case_fold(hir_union( - hir_uclass_query(ClassQuery::ByValue { - property_name: "age", - property_value: "3.0", - }), - hir_union( - hir_uclass_query(ClassQuery::Binary("greek")), - hir_uclass_query(ClassQuery::Binary("separator")) - ) - )) - ); - #[cfg(all( - feature = "unicode-age", - feature = "unicode-gencat", - feature = "unicode-script" - ))] - assert_eq!( - t(r"[^\p{age:3.0}\pZ\p{Greek}]"), - hir_negate(hir_union( - hir_uclass_query(ClassQuery::ByValue { - property_name: "age", - property_value: "3.0", - }), - hir_union( - hir_uclass_query(ClassQuery::Binary("greek")), - hir_uclass_query(ClassQuery::Binary("separator")) - ) - )) - ); - #[cfg(all( - feature = "unicode-age", - feature = "unicode-case", - feature = "unicode-gencat", - feature = "unicode-script" - ))] - assert_eq!( - t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"), - hir_negate(hir_case_fold(hir_union( - hir_uclass_query(ClassQuery::ByValue { - property_name: "age", - property_value: "3.0", - }), - hir_union( - hir_uclass_query(ClassQuery::Binary("greek")), - hir_uclass_query(ClassQuery::Binary("separator")) - ) - ))) - ); - } - - #[test] - fn class_bracketed_nested() { - assert_eq!(t(r"[a[^c]]"), class_negate(uclass(&[('c', 'c')]))); - assert_eq!(t(r"[a-b[^c]]"), class_negate(uclass(&[('c', 'c')]))); - assert_eq!(t(r"[a-c[^c]]"), class_negate(uclass(&[]))); - - assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')])); - assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')])); - - #[cfg(feature = "unicode-case")] - assert_eq!( - t(r"(?i)[a[^c]]"), - hir_negate(class_case_fold(uclass(&[('c', 'c')]))) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t(r"(?i)[a-b[^c]]"), - hir_negate(class_case_fold(uclass(&[('c', 'c')]))) - ); - - #[cfg(feature = "unicode-case")] - assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')])); - #[cfg(feature = "unicode-case")] - assert_eq!( - t(r"(?i)[^a-b[^c]]"), - hir_uclass(&[('C', 'C'), ('c', 'c')]) - ); - - assert_eq!(t(r"[^a-c[^c]]"), hir_uclass(&[]),); - #[cfg(feature = "unicode-case")] - assert_eq!(t(r"(?i)[^a-c[^c]]"), hir_uclass(&[]),); - } - - #[test] - fn class_bracketed_intersect() { - assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')])); - assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')])); - assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')])); - assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')])); - assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')])); - assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')])); - assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')])); - assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')])); - assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')]))); - - assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')])); - assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')])); - assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')])); - assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')])); - assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')])); - assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')])); - - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i)[abc&&b-c]"), - hir_case_fold(hir_uclass(&[('b', 'c')])) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i)[abc&&[b-c]]"), - hir_case_fold(hir_uclass(&[('b', 'c')])) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i)[[abc]&&[b-c]]"), - hir_case_fold(hir_uclass(&[('b', 'c')])) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i)[a-z&&b-y&&c-x]"), - hir_case_fold(hir_uclass(&[('c', 'x')])) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i)[c-da-b&&a-d]"), - hir_case_fold(hir_uclass(&[('a', 'd')])) - ); - #[cfg(feature = "unicode-case")] - assert_eq!( - t("(?i)[a-d&&c-da-b]"), - hir_case_fold(hir_uclass(&[('a', 'd')])) - ); - - assert_eq!( - t("(?i-u)[abc&&b-c]"), - hir_case_fold(hir_bclass(&[(b'b', b'c')])) - ); - assert_eq!( - t("(?i-u)[abc&&[b-c]]"), - hir_case_fold(hir_bclass(&[(b'b', b'c')])) - ); - assert_eq!( - t("(?i-u)[[abc]&&[b-c]]"), - hir_case_fold(hir_bclass(&[(b'b', b'c')])) - ); - assert_eq!( - t("(?i-u)[a-z&&b-y&&c-x]"), - hir_case_fold(hir_bclass(&[(b'c', b'x')])) - ); - assert_eq!( - t("(?i-u)[c-da-b&&a-d]"), - hir_case_fold(hir_bclass(&[(b'a', b'd')])) - ); - assert_eq!( - t("(?i-u)[a-d&&c-da-b]"), - hir_case_fold(hir_bclass(&[(b'a', b'd')])) - ); - - // In `[a^]`, `^` does not need to be escaped, so it makes sense that - // `^` is also allowed to be unescaped after `&&`. - assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')])); - // `]` needs to be escaped after `&&` since it's not at start of class. - assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')])); - assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')])); - assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')])); - assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')])); - // Test precedence. - assert_eq!( - t(r"[a-w&&[^c-g]z]"), - hir_uclass(&[('a', 'b'), ('h', 'w')]) - ); - } - - #[test] - fn class_bracketed_intersect_negate() { - #[cfg(feature = "unicode-perl")] - assert_eq!( - t(r"[^\w&&\d]"), - hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) - ); - assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')]))); - #[cfg(feature = "unicode-perl")] - assert_eq!( - t(r"[^[\w&&\d]]"), - hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))) - ); - #[cfg(feature = "unicode-perl")] - assert_eq!( - t(r"[^[^\w&&\d]]"), - hir_uclass_query(ClassQuery::Binary("digit")) - ); - #[cfg(feature = "unicode-perl")] - assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word())); - - #[cfg(feature = "unicode-perl")] - assert_eq!( - t_bytes(r"(?-u)[^\w&&\d]"), - hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit)) - ); - assert_eq!( - t_bytes(r"(?-u)[^[a-z&&a-c]]"), - hir_negate(hir_bclass(&[(b'a', b'c')])) - ); - assert_eq!( - t_bytes(r"(?-u)[^[\w&&\d]]"), - hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit)) - ); - assert_eq!( - t_bytes(r"(?-u)[^[^\w&&\d]]"), - hir_ascii_bclass(&ast::ClassAsciiKind::Digit) - ); - assert_eq!( - t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"), - hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word)) - ); - } - - #[test] - fn class_bracketed_difference() { - #[cfg(feature = "unicode-gencat")] - assert_eq!( - t(r"[\pL--[:ascii:]]"), - hir_difference( - hir_uclass_query(ClassQuery::Binary("letter")), - hir_uclass(&[('\0', '\x7F')]) - ) - ); - - assert_eq!( - t(r"(?-u)[[:alpha:]--[:lower:]]"), - hir_bclass(&[(b'A', b'Z')]) - ); - } - - #[test] - fn class_bracketed_symmetric_difference() { - #[cfg(feature = "unicode-script")] - assert_eq!( - t(r"[\p{sc:Greek}~~\p{scx:Greek}]"), - // Class({ - // '·'..='·', - // '\u{300}'..='\u{301}', - // '\u{304}'..='\u{304}', - // '\u{306}'..='\u{306}', - // '\u{308}'..='\u{308}', - // '\u{313}'..='\u{313}', - // '\u{342}'..='\u{342}', - // '\u{345}'..='\u{345}', - // 'ʹ'..='ʹ', - // '\u{1dc0}'..='\u{1dc1}', - // '⁝'..='⁝', - // }) - hir_uclass(&[ - ('·', '·'), - ('\u{0300}', '\u{0301}'), - ('\u{0304}', '\u{0304}'), - ('\u{0306}', '\u{0306}'), - ('\u{0308}', '\u{0308}'), - ('\u{0313}', '\u{0313}'), - ('\u{0342}', '\u{0342}'), - ('\u{0345}', '\u{0345}'), - ('ʹ', 'ʹ'), - ('\u{1DC0}', '\u{1DC1}'), - ('⁝', '⁝'), - ]) - ); - assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')])); - - assert_eq!( - t(r"(?-u)[a-g~~c-j]"), - hir_bclass(&[(b'a', b'b'), (b'h', b'j')]) - ); - } - - #[test] - fn ignore_whitespace() { - assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3")); - assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S")); - assert_eq!( - t(r"(?x)\x # comment -{ # comment - 53 # comment -} #comment"), - hir_lit("S") - ); - - assert_eq!(t(r"(?x)\x 53"), hir_lit("S")); - assert_eq!( - t(r"(?x)\x # comment - 53 # comment"), - hir_lit("S") - ); - assert_eq!(t(r"(?x)\x5 3"), hir_lit("S")); - - #[cfg(feature = "unicode-gencat")] - assert_eq!( - t(r"(?x)\p # comment -{ # comment - Separator # comment -} # comment"), - hir_uclass_query(ClassQuery::Binary("separator")) - ); - - assert_eq!( - t(r"(?x)a # comment -{ # comment - 5 # comment - , # comment - 10 # comment -} # comment"), - hir_range(true, 5, Some(10), hir_lit("a")) - ); - - assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a ")); - } - - #[test] - fn analysis_is_utf8() { - // Positive examples. - assert!(props_bytes(r"a").is_utf8()); - assert!(props_bytes(r"ab").is_utf8()); - assert!(props_bytes(r"(?-u)a").is_utf8()); - assert!(props_bytes(r"(?-u)ab").is_utf8()); - assert!(props_bytes(r"\xFF").is_utf8()); - assert!(props_bytes(r"\xFF\xFF").is_utf8()); - assert!(props_bytes(r"[^a]").is_utf8()); - assert!(props_bytes(r"[^a][^a]").is_utf8()); - assert!(props_bytes(r"\b").is_utf8()); - assert!(props_bytes(r"\B").is_utf8()); - assert!(props_bytes(r"(?-u)\b").is_utf8()); - assert!(props_bytes(r"(?-u)\B").is_utf8()); - - // Negative examples. - assert!(!props_bytes(r"(?-u)\xFF").is_utf8()); - assert!(!props_bytes(r"(?-u)\xFF\xFF").is_utf8()); - assert!(!props_bytes(r"(?-u)[^a]").is_utf8()); - assert!(!props_bytes(r"(?-u)[^a][^a]").is_utf8()); - } - - #[test] - fn analysis_captures_len() { - assert_eq!(0, props(r"a").explicit_captures_len()); - assert_eq!(0, props(r"(?:a)").explicit_captures_len()); - assert_eq!(0, props(r"(?i-u:a)").explicit_captures_len()); - assert_eq!(0, props(r"(?i-u)a").explicit_captures_len()); - assert_eq!(1, props(r"(a)").explicit_captures_len()); - assert_eq!(1, props(r"(?Pa)").explicit_captures_len()); - assert_eq!(1, props(r"()").explicit_captures_len()); - assert_eq!(1, props(r"()a").explicit_captures_len()); - assert_eq!(1, props(r"(a)+").explicit_captures_len()); - assert_eq!(2, props(r"(a)(b)").explicit_captures_len()); - assert_eq!(2, props(r"(a)|(b)").explicit_captures_len()); - assert_eq!(2, props(r"((a))").explicit_captures_len()); - assert_eq!(1, props(r"([a&&b])").explicit_captures_len()); - } - - #[test] - fn analysis_static_captures_len() { - let len = |pattern| props(pattern).static_explicit_captures_len(); - assert_eq!(Some(0), len(r"")); - assert_eq!(Some(0), len(r"foo|bar")); - assert_eq!(None, len(r"(foo)|bar")); - assert_eq!(None, len(r"foo|(bar)")); - assert_eq!(Some(1), len(r"(foo|bar)")); - assert_eq!(Some(1), len(r"(a|b|c|d|e|f)")); - assert_eq!(Some(1), len(r"(a)|(b)|(c)|(d)|(e)|(f)")); - assert_eq!(Some(2), len(r"(a)(b)|(c)(d)|(e)(f)")); - assert_eq!(Some(6), len(r"(a)(b)(c)(d)(e)(f)")); - assert_eq!(Some(3), len(r"(a)(b)(extra)|(a)(b)()")); - assert_eq!(Some(3), len(r"(a)(b)((?:extra)?)")); - assert_eq!(None, len(r"(a)(b)(extra)?")); - assert_eq!(Some(1), len(r"(foo)|(bar)")); - assert_eq!(Some(2), len(r"(foo)(bar)")); - assert_eq!(Some(2), len(r"(foo)+(bar)")); - assert_eq!(None, len(r"(foo)*(bar)")); - assert_eq!(Some(0), len(r"(foo)?{0}")); - assert_eq!(None, len(r"(foo)?{1}")); - assert_eq!(Some(1), len(r"(foo){1}")); - assert_eq!(Some(1), len(r"(foo){1,}")); - assert_eq!(Some(1), len(r"(foo){1,}?")); - assert_eq!(None, len(r"(foo){1,}??")); - assert_eq!(None, len(r"(foo){0,}")); - assert_eq!(Some(1), len(r"(foo)(?:bar)")); - assert_eq!(Some(2), len(r"(foo(?:bar)+)(?:baz(boo))")); - assert_eq!(Some(2), len(r"(?Pfoo)(?:bar)(bal|loon)")); - assert_eq!( - Some(2), - len(r#"<(a)[^>]+href="([^"]+)"|<(img)[^>]+src="([^"]+)""#) - ); - } - - #[test] - fn analysis_is_all_assertions() { - // Positive examples. - let p = props(r"\b"); - assert!(!p.look_set().is_empty()); - assert_eq!(p.minimum_len(), Some(0)); - - let p = props(r"\B"); - assert!(!p.look_set().is_empty()); - assert_eq!(p.minimum_len(), Some(0)); - - let p = props(r"^"); - assert!(!p.look_set().is_empty()); - assert_eq!(p.minimum_len(), Some(0)); - - let p = props(r"$"); - assert!(!p.look_set().is_empty()); - assert_eq!(p.minimum_len(), Some(0)); - - let p = props(r"\A"); - assert!(!p.look_set().is_empty()); - assert_eq!(p.minimum_len(), Some(0)); - - let p = props(r"\z"); - assert!(!p.look_set().is_empty()); - assert_eq!(p.minimum_len(), Some(0)); - - let p = props(r"$^\z\A\b\B"); - assert!(!p.look_set().is_empty()); - assert_eq!(p.minimum_len(), Some(0)); - - let p = props(r"$|^|\z|\A|\b|\B"); - assert!(!p.look_set().is_empty()); - assert_eq!(p.minimum_len(), Some(0)); - - let p = props(r"^$|$^"); - assert!(!p.look_set().is_empty()); - assert_eq!(p.minimum_len(), Some(0)); - - let p = props(r"((\b)+())*^"); - assert!(!p.look_set().is_empty()); - assert_eq!(p.minimum_len(), Some(0)); - - // Negative examples. - let p = props(r"^a"); - assert!(!p.look_set().is_empty()); - assert_eq!(p.minimum_len(), Some(1)); - } - - #[test] - fn analysis_look_set_prefix_any() { - let p = props(r"(?-u)(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))"); - assert!(p.look_set_prefix_any().contains(Look::WordAscii)); - } - - #[test] - fn analysis_is_anchored() { - let is_start = |p| props(p).look_set_prefix().contains(Look::Start); - let is_end = |p| props(p).look_set_suffix().contains(Look::End); - - // Positive examples. - assert!(is_start(r"^")); - assert!(is_end(r"$")); - - assert!(is_start(r"^^")); - assert!(props(r"$$").look_set_suffix().contains(Look::End)); - - assert!(is_start(r"^$")); - assert!(is_end(r"^$")); - - assert!(is_start(r"^foo")); - assert!(is_end(r"foo$")); - - assert!(is_start(r"^foo|^bar")); - assert!(is_end(r"foo$|bar$")); - - assert!(is_start(r"^(foo|bar)")); - assert!(is_end(r"(foo|bar)$")); - - assert!(is_start(r"^+")); - assert!(is_end(r"$+")); - assert!(is_start(r"^++")); - assert!(is_end(r"$++")); - assert!(is_start(r"(^)+")); - assert!(is_end(r"($)+")); - - assert!(is_start(r"$^")); - assert!(is_start(r"$^")); - assert!(is_start(r"$^|^$")); - assert!(is_end(r"$^|^$")); - - assert!(is_start(r"\b^")); - assert!(is_end(r"$\b")); - assert!(is_start(r"^(?m:^)")); - assert!(is_end(r"(?m:$)$")); - assert!(is_start(r"(?m:^)^")); - assert!(is_end(r"$(?m:$)")); - - // Negative examples. - assert!(!is_start(r"(?m)^")); - assert!(!is_end(r"(?m)$")); - assert!(!is_start(r"(?m:^$)|$^")); - assert!(!is_end(r"(?m:^$)|$^")); - assert!(!is_start(r"$^|(?m:^$)")); - assert!(!is_end(r"$^|(?m:^$)")); - - assert!(!is_start(r"a^")); - assert!(!is_start(r"$a")); - - assert!(!is_end(r"a^")); - assert!(!is_end(r"$a")); - - assert!(!is_start(r"^foo|bar")); - assert!(!is_end(r"foo|bar$")); - - assert!(!is_start(r"^*")); - assert!(!is_end(r"$*")); - assert!(!is_start(r"^*+")); - assert!(!is_end(r"$*+")); - assert!(!is_start(r"^+*")); - assert!(!is_end(r"$+*")); - assert!(!is_start(r"(^)*")); - assert!(!is_end(r"($)*")); - } - - #[test] - fn analysis_is_any_anchored() { - let is_start = |p| props(p).look_set().contains(Look::Start); - let is_end = |p| props(p).look_set().contains(Look::End); - - // Positive examples. - assert!(is_start(r"^")); - assert!(is_end(r"$")); - assert!(is_start(r"\A")); - assert!(is_end(r"\z")); - - // Negative examples. - assert!(!is_start(r"(?m)^")); - assert!(!is_end(r"(?m)$")); - assert!(!is_start(r"$")); - assert!(!is_end(r"^")); - } - - #[test] - fn analysis_can_empty() { - // Positive examples. - let assert_empty = - |p| assert_eq!(Some(0), props_bytes(p).minimum_len()); - assert_empty(r""); - assert_empty(r"()"); - assert_empty(r"()*"); - assert_empty(r"()+"); - assert_empty(r"()?"); - assert_empty(r"a*"); - assert_empty(r"a?"); - assert_empty(r"a{0}"); - assert_empty(r"a{0,}"); - assert_empty(r"a{0,1}"); - assert_empty(r"a{0,10}"); - #[cfg(feature = "unicode-gencat")] - assert_empty(r"\pL*"); - assert_empty(r"a*|b"); - assert_empty(r"b|a*"); - assert_empty(r"a|"); - assert_empty(r"|a"); - assert_empty(r"a||b"); - assert_empty(r"a*a?(abcd)*"); - assert_empty(r"^"); - assert_empty(r"$"); - assert_empty(r"(?m)^"); - assert_empty(r"(?m)$"); - assert_empty(r"\A"); - assert_empty(r"\z"); - assert_empty(r"\B"); - assert_empty(r"(?-u)\B"); - assert_empty(r"\b"); - assert_empty(r"(?-u)\b"); - - // Negative examples. - let assert_non_empty = - |p| assert_ne!(Some(0), props_bytes(p).minimum_len()); - assert_non_empty(r"a+"); - assert_non_empty(r"a{1}"); - assert_non_empty(r"a{1,}"); - assert_non_empty(r"a{1,2}"); - assert_non_empty(r"a{1,10}"); - assert_non_empty(r"b|a"); - assert_non_empty(r"a*a+(abcd)*"); - #[cfg(feature = "unicode-gencat")] - assert_non_empty(r"\P{any}"); - assert_non_empty(r"[a--a]"); - assert_non_empty(r"[a&&b]"); - } - - #[test] - fn analysis_is_literal() { - // Positive examples. - assert!(props(r"a").is_literal()); - assert!(props(r"ab").is_literal()); - assert!(props(r"abc").is_literal()); - assert!(props(r"(?m)abc").is_literal()); - assert!(props(r"(?:a)").is_literal()); - assert!(props(r"foo(?:a)").is_literal()); - assert!(props(r"(?:a)foo").is_literal()); - assert!(props(r"[a]").is_literal()); - - // Negative examples. - assert!(!props(r"").is_literal()); - assert!(!props(r"^").is_literal()); - assert!(!props(r"a|b").is_literal()); - assert!(!props(r"(a)").is_literal()); - assert!(!props(r"a+").is_literal()); - assert!(!props(r"foo(a)").is_literal()); - assert!(!props(r"(a)foo").is_literal()); - assert!(!props(r"[ab]").is_literal()); - } - - #[test] - fn analysis_is_alternation_literal() { - // Positive examples. - assert!(props(r"a").is_alternation_literal()); - assert!(props(r"ab").is_alternation_literal()); - assert!(props(r"abc").is_alternation_literal()); - assert!(props(r"(?m)abc").is_alternation_literal()); - assert!(props(r"foo|bar").is_alternation_literal()); - assert!(props(r"foo|bar|baz").is_alternation_literal()); - assert!(props(r"[a]").is_alternation_literal()); - assert!(props(r"(?:ab)|cd").is_alternation_literal()); - assert!(props(r"ab|(?:cd)").is_alternation_literal()); - - // Negative examples. - assert!(!props(r"").is_alternation_literal()); - assert!(!props(r"^").is_alternation_literal()); - assert!(!props(r"(a)").is_alternation_literal()); - assert!(!props(r"a+").is_alternation_literal()); - assert!(!props(r"foo(a)").is_alternation_literal()); - assert!(!props(r"(a)foo").is_alternation_literal()); - assert!(!props(r"[ab]").is_alternation_literal()); - assert!(!props(r"[ab]|b").is_alternation_literal()); - assert!(!props(r"a|[ab]").is_alternation_literal()); - assert!(!props(r"(a)|b").is_alternation_literal()); - assert!(!props(r"a|(b)").is_alternation_literal()); - assert!(!props(r"a|b").is_alternation_literal()); - assert!(!props(r"a|b|c").is_alternation_literal()); - assert!(!props(r"[a]|b").is_alternation_literal()); - assert!(!props(r"a|[b]").is_alternation_literal()); - assert!(!props(r"(?:a)|b").is_alternation_literal()); - assert!(!props(r"a|(?:b)").is_alternation_literal()); - assert!(!props(r"(?:z|xx)@|xx").is_alternation_literal()); - } - - // This tests that the smart Hir::repetition constructors does some basic - // simplifications. - #[test] - fn smart_repetition() { - assert_eq!(t(r"a{0}"), Hir::empty()); - assert_eq!(t(r"a{1}"), hir_lit("a")); - assert_eq!(t(r"\B{32111}"), hir_look(hir::Look::WordUnicodeNegate)); - } - - // This tests that the smart Hir::concat constructor simplifies the given - // exprs in a way we expect. - #[test] - fn smart_concat() { - assert_eq!(t(""), Hir::empty()); - assert_eq!(t("(?:)"), Hir::empty()); - assert_eq!(t("abc"), hir_lit("abc")); - assert_eq!(t("(?:foo)(?:bar)"), hir_lit("foobar")); - assert_eq!(t("quux(?:foo)(?:bar)baz"), hir_lit("quuxfoobarbaz")); - assert_eq!( - t("foo(?:bar^baz)quux"), - hir_cat(vec![ - hir_lit("foobar"), - hir_look(hir::Look::Start), - hir_lit("bazquux"), - ]) - ); - assert_eq!( - t("foo(?:ba(?:r^b)az)quux"), - hir_cat(vec![ - hir_lit("foobar"), - hir_look(hir::Look::Start), - hir_lit("bazquux"), - ]) - ); - } - - // This tests that the smart Hir::alternation constructor simplifies the - // given exprs in a way we expect. - #[test] - fn smart_alternation() { - assert_eq!( - t("(?:foo)|(?:bar)"), - hir_alt(vec![hir_lit("foo"), hir_lit("bar")]) - ); - assert_eq!( - t("quux|(?:abc|def|xyz)|baz"), - hir_alt(vec![ - hir_lit("quux"), - hir_lit("abc"), - hir_lit("def"), - hir_lit("xyz"), - hir_lit("baz"), - ]) - ); - assert_eq!( - t("quux|(?:abc|(?:def|mno)|xyz)|baz"), - hir_alt(vec![ - hir_lit("quux"), - hir_lit("abc"), - hir_lit("def"), - hir_lit("mno"), - hir_lit("xyz"), - hir_lit("baz"), - ]) - ); - assert_eq!( - t("a|b|c|d|e|f|x|y|z"), - hir_uclass(&[('a', 'f'), ('x', 'z')]), - ); - // Tests that we lift common prefixes out of an alternation. - assert_eq!( - t("[A-Z]foo|[A-Z]quux"), - hir_cat(vec![ - hir_uclass(&[('A', 'Z')]), - hir_alt(vec![hir_lit("foo"), hir_lit("quux")]), - ]), - ); - assert_eq!( - t("[A-Z][A-Z]|[A-Z]quux"), - hir_cat(vec![ - hir_uclass(&[('A', 'Z')]), - hir_alt(vec![hir_uclass(&[('A', 'Z')]), hir_lit("quux")]), - ]), - ); - assert_eq!( - t("[A-Z][A-Z]|[A-Z][A-Z]quux"), - hir_cat(vec![ - hir_uclass(&[('A', 'Z')]), - hir_uclass(&[('A', 'Z')]), - hir_alt(vec![Hir::empty(), hir_lit("quux")]), - ]), - ); - assert_eq!( - t("[A-Z]foo|[A-Z]foobar"), - hir_cat(vec![ - hir_uclass(&[('A', 'Z')]), - hir_alt(vec![hir_lit("foo"), hir_lit("foobar")]), - ]), - ); - } - - #[test] - fn regression_alt_empty_concat() { - use crate::ast::{self, Ast}; - - let span = Span::splat(Position::new(0, 0, 0)); - let ast = Ast::alternation(ast::Alternation { - span, - asts: vec![Ast::concat(ast::Concat { span, asts: vec![] })], - }); - - let mut t = Translator::new(); - assert_eq!(Ok(Hir::empty()), t.translate("", &ast)); - } - - #[test] - fn regression_empty_alt() { - use crate::ast::{self, Ast}; - - let span = Span::splat(Position::new(0, 0, 0)); - let ast = Ast::concat(ast::Concat { - span, - asts: vec![Ast::alternation(ast::Alternation { - span, - asts: vec![], - })], - }); - - let mut t = Translator::new(); - assert_eq!(Ok(Hir::fail()), t.translate("", &ast)); - } - - #[test] - fn regression_singleton_alt() { - use crate::{ - ast::{self, Ast}, - hir::Dot, - }; - - let span = Span::splat(Position::new(0, 0, 0)); - let ast = Ast::concat(ast::Concat { - span, - asts: vec![Ast::alternation(ast::Alternation { - span, - asts: vec![Ast::dot(span)], - })], - }); - - let mut t = Translator::new(); - assert_eq!(Ok(Hir::dot(Dot::AnyCharExceptLF)), t.translate("", &ast)); - } - - // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63168 - #[test] - fn regression_fuzz_match() { - let pat = "[(\u{6} \0-\u{afdf5}] \0 "; - let ast = ParserBuilder::new() - .octal(false) - .ignore_whitespace(true) - .build() - .parse(pat) - .unwrap(); - let hir = TranslatorBuilder::new() - .utf8(true) - .case_insensitive(false) - .multi_line(false) - .dot_matches_new_line(false) - .swap_greed(true) - .unicode(true) - .build() - .translate(pat, &ast) - .unwrap(); - assert_eq!( - hir, - Hir::concat(vec![ - hir_uclass(&[('\0', '\u{afdf5}')]), - hir_lit("\0"), - ]) - ); - } - - // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63155 - #[cfg(feature = "unicode")] - #[test] - fn regression_fuzz_difference1() { - let pat = r"\W\W|\W[^\v--\W\W\P{Script_Extensions:Pau_Cin_Hau}\u10A1A1-\U{3E3E3}--~~~~--~~~~~~~~------~~~~~~--~~~~~~]*"; - let _ = t(pat); // shouldn't panic - } - - // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63153 - #[test] - fn regression_fuzz_char_decrement1() { - let pat = "w[w[^w?\rw\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0*\0\0\u{1}\0]\0\0-*\0][^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0x\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\0\0*??\0\u{7f}{2}\u{10}??\0\0\0\0\0\0\0\0\0\u{3}\0\0\0}\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\u{1}\0]\0\u{1}\u{1}H-i]-]\0\0\0\0\u{1}\0]\0\0\0\u{1}\0]\0\0-*\0\0\0\0\u{1}9-\u{7f}]\0'|-\u{7f}]\0'|(?i-ux)[-\u{7f}]\0'\u{3}\0\0\0}\0-*\0] Result; - - /// This method is called before beginning traversal of the HIR. - fn start(&mut self) {} - - /// This method is called on an `Hir` before descending into child `Hir` - /// nodes. - fn visit_pre(&mut self, _hir: &Hir) -> Result<(), Self::Err> { - Ok(()) - } - - /// This method is called on an `Hir` after descending all of its child - /// `Hir` nodes. - fn visit_post(&mut self, _hir: &Hir) -> Result<(), Self::Err> { - Ok(()) - } - - /// This method is called between child nodes of an alternation. - fn visit_alternation_in(&mut self) -> Result<(), Self::Err> { - Ok(()) - } - - /// This method is called between child nodes of a concatenation. - fn visit_concat_in(&mut self) -> Result<(), Self::Err> { - Ok(()) - } -} - -/// Executes an implementation of `Visitor` in constant stack space. -/// -/// This function will visit every node in the given `Hir` while calling -/// appropriate methods provided by the [`Visitor`] trait. -/// -/// The primary use case for this method is when one wants to perform case -/// analysis over an `Hir` without using a stack size proportional to the depth -/// of the `Hir`. Namely, this method will instead use constant stack space, -/// but will use heap space proportional to the size of the `Hir`. This may be -/// desirable in cases where the size of `Hir` is proportional to end user -/// input. -/// -/// If the visitor returns an error at any point, then visiting is stopped and -/// the error is returned. -pub fn visit(hir: &Hir, visitor: V) -> Result { - HeapVisitor::new().visit(hir, visitor) -} - -/// HeapVisitor visits every item in an `Hir` recursively using constant stack -/// size and a heap size proportional to the size of the `Hir`. -struct HeapVisitor<'a> { - /// A stack of `Hir` nodes. This is roughly analogous to the call stack - /// used in a typical recursive visitor. - stack: Vec<(&'a Hir, Frame<'a>)>, -} - -/// Represents a single stack frame while performing structural induction over -/// an `Hir`. -enum Frame<'a> { - /// A stack frame allocated just before descending into a repetition - /// operator's child node. - Repetition(&'a hir::Repetition), - /// A stack frame allocated just before descending into a capture's child - /// node. - Capture(&'a hir::Capture), - /// The stack frame used while visiting every child node of a concatenation - /// of expressions. - Concat { - /// The child node we are currently visiting. - head: &'a Hir, - /// The remaining child nodes to visit (which may be empty). - tail: &'a [Hir], - }, - /// The stack frame used while visiting every child node of an alternation - /// of expressions. - Alternation { - /// The child node we are currently visiting. - head: &'a Hir, - /// The remaining child nodes to visit (which may be empty). - tail: &'a [Hir], - }, -} - -impl<'a> HeapVisitor<'a> { - fn new() -> HeapVisitor<'a> { - HeapVisitor { stack: vec![] } - } - - fn visit( - &mut self, - mut hir: &'a Hir, - mut visitor: V, - ) -> Result { - self.stack.clear(); - - visitor.start(); - loop { - visitor.visit_pre(hir)?; - if let Some(x) = self.induct(hir) { - let child = x.child(); - self.stack.push((hir, x)); - hir = child; - continue; - } - // No induction means we have a base case, so we can post visit - // it now. - visitor.visit_post(hir)?; - - // At this point, we now try to pop our call stack until it is - // either empty or we hit another inductive case. - loop { - let (post_hir, frame) = match self.stack.pop() { - None => return visitor.finish(), - Some((post_hir, frame)) => (post_hir, frame), - }; - // If this is a concat/alternate, then we might have additional - // inductive steps to process. - if let Some(x) = self.pop(frame) { - match x { - Frame::Alternation { .. } => { - visitor.visit_alternation_in()?; - } - Frame::Concat { .. } => { - visitor.visit_concat_in()?; - } - _ => {} - } - hir = x.child(); - self.stack.push((post_hir, x)); - break; - } - // Otherwise, we've finished visiting all the child nodes for - // this HIR, so we can post visit it now. - visitor.visit_post(post_hir)?; - } - } - } - - /// Build a stack frame for the given HIR if one is needed (which occurs if - /// and only if there are child nodes in the HIR). Otherwise, return None. - fn induct(&mut self, hir: &'a Hir) -> Option> { - match *hir.kind() { - HirKind::Repetition(ref x) => Some(Frame::Repetition(x)), - HirKind::Capture(ref x) => Some(Frame::Capture(x)), - HirKind::Concat(ref x) if x.is_empty() => None, - HirKind::Concat(ref x) => { - Some(Frame::Concat { head: &x[0], tail: &x[1..] }) - } - HirKind::Alternation(ref x) if x.is_empty() => None, - HirKind::Alternation(ref x) => { - Some(Frame::Alternation { head: &x[0], tail: &x[1..] }) - } - _ => None, - } - } - - /// Pops the given frame. If the frame has an additional inductive step, - /// then return it, otherwise return `None`. - fn pop(&self, induct: Frame<'a>) -> Option> { - match induct { - Frame::Repetition(_) => None, - Frame::Capture(_) => None, - Frame::Concat { tail, .. } => { - if tail.is_empty() { - None - } else { - Some(Frame::Concat { head: &tail[0], tail: &tail[1..] }) - } - } - Frame::Alternation { tail, .. } => { - if tail.is_empty() { - None - } else { - Some(Frame::Alternation { - head: &tail[0], - tail: &tail[1..], - }) - } - } - } - } -} - -impl<'a> Frame<'a> { - /// Perform the next inductive step on this frame and return the next - /// child HIR node to visit. - fn child(&self) -> &'a Hir { - match *self { - Frame::Repetition(rep) => &rep.sub, - Frame::Capture(capture) => &capture.sub, - Frame::Concat { head, .. } => head, - Frame::Alternation { head, .. } => head, - } - } -} diff --git a/vendor/regex-syntax/src/lib.rs b/vendor/regex-syntax/src/lib.rs deleted file mode 100644 index 20f25db7..00000000 --- a/vendor/regex-syntax/src/lib.rs +++ /dev/null @@ -1,431 +0,0 @@ -/*! -This crate provides a robust regular expression parser. - -This crate defines two primary types: - -* [`Ast`](ast::Ast) is the abstract syntax of a regular expression. - An abstract syntax corresponds to a *structured representation* of the - concrete syntax of a regular expression, where the concrete syntax is the - pattern string itself (e.g., `foo(bar)+`). Given some abstract syntax, it - can be converted back to the original concrete syntax (modulo some details, - like whitespace). To a first approximation, the abstract syntax is complex - and difficult to analyze. -* [`Hir`](hir::Hir) is the high-level intermediate representation - ("HIR" or "high-level IR" for short) of regular expression. It corresponds to - an intermediate state of a regular expression that sits between the abstract - syntax and the low level compiled opcodes that are eventually responsible for - executing a regular expression search. Given some high-level IR, it is not - possible to produce the original concrete syntax (although it is possible to - produce an equivalent concrete syntax, but it will likely scarcely resemble - the original pattern). To a first approximation, the high-level IR is simple - and easy to analyze. - -These two types come with conversion routines: - -* An [`ast::parse::Parser`] converts concrete syntax (a `&str`) to an -[`Ast`](ast::Ast). -* A [`hir::translate::Translator`] converts an [`Ast`](ast::Ast) to a -[`Hir`](hir::Hir). - -As a convenience, the above two conversion routines are combined into one via -the top-level [`Parser`] type. This `Parser` will first convert your pattern to -an `Ast` and then convert the `Ast` to an `Hir`. It's also exposed as top-level -[`parse`] free function. - - -# Example - -This example shows how to parse a pattern string into its HIR: - -``` -use regex_syntax::{hir::Hir, parse}; - -let hir = parse("a|b")?; -assert_eq!(hir, Hir::alternation(vec![ - Hir::literal("a".as_bytes()), - Hir::literal("b".as_bytes()), -])); -# Ok::<(), Box>(()) -``` - - -# Concrete syntax supported - -The concrete syntax is documented as part of the public API of the -[`regex` crate](https://docs.rs/regex/%2A/regex/#syntax). - - -# Input safety - -A key feature of this library is that it is safe to use with end user facing -input. This plays a significant role in the internal implementation. In -particular: - -1. Parsers provide a `nest_limit` option that permits callers to control how - deeply nested a regular expression is allowed to be. This makes it possible - to do case analysis over an `Ast` or an `Hir` using recursion without - worrying about stack overflow. -2. Since relying on a particular stack size is brittle, this crate goes to - great lengths to ensure that all interactions with both the `Ast` and the - `Hir` do not use recursion. Namely, they use constant stack space and heap - space proportional to the size of the original pattern string (in bytes). - This includes the type's corresponding destructors. (One exception to this - is literal extraction, but this will eventually get fixed.) - - -# Error reporting - -The `Display` implementations on all `Error` types exposed in this library -provide nice human readable errors that are suitable for showing to end users -in a monospace font. - - -# Literal extraction - -This crate provides limited support for [literal extraction from `Hir` -values](hir::literal). Be warned that literal extraction uses recursion, and -therefore, stack size proportional to the size of the `Hir`. - -The purpose of literal extraction is to speed up searches. That is, if you -know a regular expression must match a prefix or suffix literal, then it is -often quicker to search for instances of that literal, and then confirm or deny -the match using the full regular expression engine. These optimizations are -done automatically in the `regex` crate. - - -# Crate features - -An important feature provided by this crate is its Unicode support. This -includes things like case folding, boolean properties, general categories, -scripts and Unicode-aware support for the Perl classes `\w`, `\s` and `\d`. -However, a downside of this support is that it requires bundling several -Unicode data tables that are substantial in size. - -A fair number of use cases do not require full Unicode support. For this -reason, this crate exposes a number of features to control which Unicode -data is available. - -If a regular expression attempts to use a Unicode feature that is not available -because the corresponding crate feature was disabled, then translating that -regular expression to an `Hir` will return an error. (It is still possible -construct an `Ast` for such a regular expression, since Unicode data is not -used until translation to an `Hir`.) Stated differently, enabling or disabling -any of the features below can only add or subtract from the total set of valid -regular expressions. Enabling or disabling a feature will never modify the -match semantics of a regular expression. - -The following features are available: - -* **std** - - Enables support for the standard library. This feature is enabled by default. - When disabled, only `core` and `alloc` are used. Otherwise, enabling `std` - generally just enables `std::error::Error` trait impls for the various error - types. -* **unicode** - - Enables all Unicode features. This feature is enabled by default, and will - always cover all Unicode features, even if more are added in the future. -* **unicode-age** - - Provide the data for the - [Unicode `Age` property](https://www.unicode.org/reports/tr44/tr44-24.html#Character_Age). - This makes it possible to use classes like `\p{Age:6.0}` to refer to all - codepoints first introduced in Unicode 6.0 -* **unicode-bool** - - Provide the data for numerous Unicode boolean properties. The full list - is not included here, but contains properties like `Alphabetic`, `Emoji`, - `Lowercase`, `Math`, `Uppercase` and `White_Space`. -* **unicode-case** - - Provide the data for case insensitive matching using - [Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches). -* **unicode-gencat** - - Provide the data for - [Unicode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values). - This includes, but is not limited to, `Decimal_Number`, `Letter`, - `Math_Symbol`, `Number` and `Punctuation`. -* **unicode-perl** - - Provide the data for supporting the Unicode-aware Perl character classes, - corresponding to `\w`, `\s` and `\d`. This is also necessary for using - Unicode-aware word boundary assertions. Note that if this feature is - disabled, the `\s` and `\d` character classes are still available if the - `unicode-bool` and `unicode-gencat` features are enabled, respectively. -* **unicode-script** - - Provide the data for - [Unicode scripts and script extensions](https://www.unicode.org/reports/tr24/). - This includes, but is not limited to, `Arabic`, `Cyrillic`, `Hebrew`, - `Latin` and `Thai`. -* **unicode-segment** - - Provide the data necessary to provide the properties used to implement the - [Unicode text segmentation algorithms](https://www.unicode.org/reports/tr29/). - This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and - `\p{sb=ATerm}`. -* **arbitrary** - - Enabling this feature introduces a public dependency on the - [`arbitrary`](https://crates.io/crates/arbitrary) - crate. Namely, it implements the `Arbitrary` trait from that crate for the - [`Ast`](crate::ast::Ast) type. This feature is disabled by default. -*/ - -#![no_std] -#![forbid(unsafe_code)] -#![deny(missing_docs, rustdoc::broken_intra_doc_links)] -#![warn(missing_debug_implementations)] -#![cfg_attr(docsrs, feature(doc_auto_cfg))] - -#[cfg(any(test, feature = "std"))] -extern crate std; - -extern crate alloc; - -pub use crate::{ - error::Error, - parser::{parse, Parser, ParserBuilder}, - unicode::UnicodeWordError, -}; - -use alloc::string::String; - -pub mod ast; -mod debug; -mod either; -mod error; -pub mod hir; -mod parser; -mod rank; -mod unicode; -mod unicode_tables; -pub mod utf8; - -/// Escapes all regular expression meta characters in `text`. -/// -/// The string returned may be safely used as a literal in a regular -/// expression. -pub fn escape(text: &str) -> String { - let mut quoted = String::new(); - escape_into(text, &mut quoted); - quoted -} - -/// Escapes all meta characters in `text` and writes the result into `buf`. -/// -/// This will append escape characters into the given buffer. The characters -/// that are appended are safe to use as a literal in a regular expression. -pub fn escape_into(text: &str, buf: &mut String) { - buf.reserve(text.len()); - for c in text.chars() { - if is_meta_character(c) { - buf.push('\\'); - } - buf.push(c); - } -} - -/// Returns true if the given character has significance in a regex. -/// -/// Generally speaking, these are the only characters which _must_ be escaped -/// in order to match their literal meaning. For example, to match a literal -/// `|`, one could write `\|`. Sometimes escaping isn't always necessary. For -/// example, `-` is treated as a meta character because of its significance -/// for writing ranges inside of character classes, but the regex `-` will -/// match a literal `-` because `-` has no special meaning outside of character -/// classes. -/// -/// In order to determine whether a character may be escaped at all, the -/// [`is_escapeable_character`] routine should be used. The difference between -/// `is_meta_character` and `is_escapeable_character` is that the latter will -/// return true for some characters that are _not_ meta characters. For -/// example, `%` and `\%` both match a literal `%` in all contexts. In other -/// words, `is_escapeable_character` includes "superfluous" escapes. -/// -/// Note that the set of characters for which this function returns `true` or -/// `false` is fixed and won't change in a semver compatible release. (In this -/// case, "semver compatible release" actually refers to the `regex` crate -/// itself, since reducing or expanding the set of meta characters would be a -/// breaking change for not just `regex-syntax` but also `regex` itself.) -/// -/// # Example -/// -/// ``` -/// use regex_syntax::is_meta_character; -/// -/// assert!(is_meta_character('?')); -/// assert!(is_meta_character('-')); -/// assert!(is_meta_character('&')); -/// assert!(is_meta_character('#')); -/// -/// assert!(!is_meta_character('%')); -/// assert!(!is_meta_character('/')); -/// assert!(!is_meta_character('!')); -/// assert!(!is_meta_character('"')); -/// assert!(!is_meta_character('e')); -/// ``` -pub fn is_meta_character(c: char) -> bool { - match c { - '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' - | '}' | '^' | '$' | '#' | '&' | '-' | '~' => true, - _ => false, - } -} - -/// Returns true if the given character can be escaped in a regex. -/// -/// This returns true in all cases that `is_meta_character` returns true, but -/// also returns true in some cases where `is_meta_character` returns false. -/// For example, `%` is not a meta character, but it is escapeable. That is, -/// `%` and `\%` both match a literal `%` in all contexts. -/// -/// The purpose of this routine is to provide knowledge about what characters -/// may be escaped. Namely, most regex engines permit "superfluous" escapes -/// where characters without any special significance may be escaped even -/// though there is no actual _need_ to do so. -/// -/// This will return false for some characters. For example, `e` is not -/// escapeable. Therefore, `\e` will either result in a parse error (which is -/// true today), or it could backwards compatibly evolve into a new construct -/// with its own meaning. Indeed, that is the purpose of banning _some_ -/// superfluous escapes: it provides a way to evolve the syntax in a compatible -/// manner. -/// -/// # Example -/// -/// ``` -/// use regex_syntax::is_escapeable_character; -/// -/// assert!(is_escapeable_character('?')); -/// assert!(is_escapeable_character('-')); -/// assert!(is_escapeable_character('&')); -/// assert!(is_escapeable_character('#')); -/// assert!(is_escapeable_character('%')); -/// assert!(is_escapeable_character('/')); -/// assert!(is_escapeable_character('!')); -/// assert!(is_escapeable_character('"')); -/// -/// assert!(!is_escapeable_character('e')); -/// ``` -pub fn is_escapeable_character(c: char) -> bool { - // Certainly escapeable if it's a meta character. - if is_meta_character(c) { - return true; - } - // Any character that isn't ASCII is definitely not escapeable. There's - // no real need to allow things like \☃ right? - if !c.is_ascii() { - return false; - } - // Otherwise, we basically say that everything is escapeable unless it's a - // letter or digit. Things like \3 are either octal (when enabled) or an - // error, and we should keep it that way. Otherwise, letters are reserved - // for adding new syntax in a backwards compatible way. - match c { - '0'..='9' | 'A'..='Z' | 'a'..='z' => false, - // While not currently supported, we keep these as not escapeable to - // give us some flexibility with respect to supporting the \< and - // \> word boundary assertions in the future. By rejecting them as - // escapeable, \< and \> will result in a parse error. Thus, we can - // turn them into something else in the future without it being a - // backwards incompatible change. - // - // OK, now we support \< and \>, and we need to retain them as *not* - // escapeable here since the escape sequence is significant. - '<' | '>' => false, - _ => true, - } -} - -/// Returns true if and only if the given character is a Unicode word -/// character. -/// -/// A Unicode word character is defined by -/// [UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties). -/// In particular, a character -/// is considered a word character if it is in either of the `Alphabetic` or -/// `Join_Control` properties, or is in one of the `Decimal_Number`, `Mark` -/// or `Connector_Punctuation` general categories. -/// -/// # Panics -/// -/// If the `unicode-perl` feature is not enabled, then this function -/// panics. For this reason, it is recommended that callers use -/// [`try_is_word_character`] instead. -pub fn is_word_character(c: char) -> bool { - try_is_word_character(c).expect("unicode-perl feature must be enabled") -} - -/// Returns true if and only if the given character is a Unicode word -/// character. -/// -/// A Unicode word character is defined by -/// [UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties). -/// In particular, a character -/// is considered a word character if it is in either of the `Alphabetic` or -/// `Join_Control` properties, or is in one of the `Decimal_Number`, `Mark` -/// or `Connector_Punctuation` general categories. -/// -/// # Errors -/// -/// If the `unicode-perl` feature is not enabled, then this function always -/// returns an error. -pub fn try_is_word_character( - c: char, -) -> core::result::Result { - unicode::is_word_character(c) -} - -/// Returns true if and only if the given character is an ASCII word character. -/// -/// An ASCII word character is defined by the following character class: -/// `[_0-9a-zA-Z]`. -pub fn is_word_byte(c: u8) -> bool { - match c { - b'_' | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' => true, - _ => false, - } -} - -#[cfg(test)] -mod tests { - use alloc::string::ToString; - - use super::*; - - #[test] - fn escape_meta() { - assert_eq!( - escape(r"\.+*?()|[]{}^$#&-~"), - r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~".to_string() - ); - } - - #[test] - fn word_byte() { - assert!(is_word_byte(b'a')); - assert!(!is_word_byte(b'-')); - } - - #[test] - #[cfg(feature = "unicode-perl")] - fn word_char() { - assert!(is_word_character('a'), "ASCII"); - assert!(is_word_character('à'), "Latin-1"); - assert!(is_word_character('β'), "Greek"); - assert!(is_word_character('\u{11011}'), "Brahmi (Unicode 6.0)"); - assert!(is_word_character('\u{11611}'), "Modi (Unicode 7.0)"); - assert!(is_word_character('\u{11711}'), "Ahom (Unicode 8.0)"); - assert!(is_word_character('\u{17828}'), "Tangut (Unicode 9.0)"); - assert!(is_word_character('\u{1B1B1}'), "Nushu (Unicode 10.0)"); - assert!(is_word_character('\u{16E40}'), "Medefaidrin (Unicode 11.0)"); - assert!(!is_word_character('-')); - assert!(!is_word_character('☃')); - } - - #[test] - #[should_panic] - #[cfg(not(feature = "unicode-perl"))] - fn word_char_disabled_panic() { - assert!(is_word_character('a')); - } - - #[test] - #[cfg(not(feature = "unicode-perl"))] - fn word_char_disabled_error() { - assert!(try_is_word_character('a').is_err()); - } -} diff --git a/vendor/regex-syntax/src/parser.rs b/vendor/regex-syntax/src/parser.rs deleted file mode 100644 index f482b846..00000000 --- a/vendor/regex-syntax/src/parser.rs +++ /dev/null @@ -1,254 +0,0 @@ -use crate::{ast, hir, Error}; - -/// A convenience routine for parsing a regex using default options. -/// -/// This is equivalent to `Parser::new().parse(pattern)`. -/// -/// If you need to set non-default options, then use a [`ParserBuilder`]. -/// -/// This routine returns an [`Hir`](hir::Hir) value. Namely, it automatically -/// parses the pattern as an [`Ast`](ast::Ast) and then invokes the translator -/// to convert the `Ast` into an `Hir`. If you need access to the `Ast`, then -/// you should use a [`ast::parse::Parser`]. -pub fn parse(pattern: &str) -> Result { - Parser::new().parse(pattern) -} - -/// A builder for a regular expression parser. -/// -/// This builder permits modifying configuration options for the parser. -/// -/// This type combines the builder options for both the [AST -/// `ParserBuilder`](ast::parse::ParserBuilder) and the [HIR -/// `TranslatorBuilder`](hir::translate::TranslatorBuilder). -#[derive(Clone, Debug, Default)] -pub struct ParserBuilder { - ast: ast::parse::ParserBuilder, - hir: hir::translate::TranslatorBuilder, -} - -impl ParserBuilder { - /// Create a new parser builder with a default configuration. - pub fn new() -> ParserBuilder { - ParserBuilder::default() - } - - /// Build a parser from this configuration with the given pattern. - pub fn build(&self) -> Parser { - Parser { ast: self.ast.build(), hir: self.hir.build() } - } - - /// Set the nesting limit for this parser. - /// - /// The nesting limit controls how deep the abstract syntax tree is allowed - /// to be. If the AST exceeds the given limit (e.g., with too many nested - /// groups), then an error is returned by the parser. - /// - /// The purpose of this limit is to act as a heuristic to prevent stack - /// overflow for consumers that do structural induction on an `Ast` using - /// explicit recursion. While this crate never does this (instead using - /// constant stack space and moving the call stack to the heap), other - /// crates may. - /// - /// This limit is not checked until the entire Ast is parsed. Therefore, - /// if callers want to put a limit on the amount of heap space used, then - /// they should impose a limit on the length, in bytes, of the concrete - /// pattern string. In particular, this is viable since this parser - /// implementation will limit itself to heap space proportional to the - /// length of the pattern string. - /// - /// Note that a nest limit of `0` will return a nest limit error for most - /// patterns but not all. For example, a nest limit of `0` permits `a` but - /// not `ab`, since `ab` requires a concatenation, which results in a nest - /// depth of `1`. In general, a nest limit is not something that manifests - /// in an obvious way in the concrete syntax, therefore, it should not be - /// used in a granular way. - pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder { - self.ast.nest_limit(limit); - self - } - - /// Whether to support octal syntax or not. - /// - /// Octal syntax is a little-known way of uttering Unicode codepoints in - /// a regular expression. For example, `a`, `\x61`, `\u0061` and - /// `\141` are all equivalent regular expressions, where the last example - /// shows octal syntax. - /// - /// While supporting octal syntax isn't in and of itself a problem, it does - /// make good error messages harder. That is, in PCRE based regex engines, - /// syntax like `\0` invokes a backreference, which is explicitly - /// unsupported in Rust's regex engine. However, many users expect it to - /// be supported. Therefore, when octal support is disabled, the error - /// message will explicitly mention that backreferences aren't supported. - /// - /// Octal syntax is disabled by default. - pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder { - self.ast.octal(yes); - self - } - - /// When disabled, translation will permit the construction of a regular - /// expression that may match invalid UTF-8. - /// - /// When enabled (the default), the translator is guaranteed to produce an - /// expression that, for non-empty matches, will only ever produce spans - /// that are entirely valid UTF-8 (otherwise, the translator will return an - /// error). - /// - /// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even - /// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete - /// syntax) will be allowed even though they can produce matches that split - /// a UTF-8 encoded codepoint. This only applies to zero-width or "empty" - /// matches, and it is expected that the regex engine itself must handle - /// these cases if necessary (perhaps by suppressing any zero-width matches - /// that split a codepoint). - pub fn utf8(&mut self, yes: bool) -> &mut ParserBuilder { - self.hir.utf8(yes); - self - } - - /// Enable verbose mode in the regular expression. - /// - /// When enabled, verbose mode permits insignificant whitespace in many - /// places in the regular expression, as well as comments. Comments are - /// started using `#` and continue until the end of the line. - /// - /// By default, this is disabled. It may be selectively enabled in the - /// regular expression by using the `x` flag regardless of this setting. - pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder { - self.ast.ignore_whitespace(yes); - self - } - - /// Enable or disable the case insensitive flag by default. - /// - /// By default this is disabled. It may alternatively be selectively - /// enabled in the regular expression itself via the `i` flag. - pub fn case_insensitive(&mut self, yes: bool) -> &mut ParserBuilder { - self.hir.case_insensitive(yes); - self - } - - /// Enable or disable the multi-line matching flag by default. - /// - /// By default this is disabled. It may alternatively be selectively - /// enabled in the regular expression itself via the `m` flag. - pub fn multi_line(&mut self, yes: bool) -> &mut ParserBuilder { - self.hir.multi_line(yes); - self - } - - /// Enable or disable the "dot matches any character" flag by default. - /// - /// By default this is disabled. It may alternatively be selectively - /// enabled in the regular expression itself via the `s` flag. - pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut ParserBuilder { - self.hir.dot_matches_new_line(yes); - self - } - - /// Enable or disable the CRLF mode flag by default. - /// - /// By default this is disabled. It may alternatively be selectively - /// enabled in the regular expression itself via the `R` flag. - /// - /// When CRLF mode is enabled, the following happens: - /// - /// * Unless `dot_matches_new_line` is enabled, `.` will match any character - /// except for `\r` and `\n`. - /// * When `multi_line` mode is enabled, `^` and `$` will treat `\r\n`, - /// `\r` and `\n` as line terminators. And in particular, neither will - /// match between a `\r` and a `\n`. - pub fn crlf(&mut self, yes: bool) -> &mut ParserBuilder { - self.hir.crlf(yes); - self - } - - /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`. - /// - /// Namely, instead of `.` (by default) matching everything except for `\n`, - /// this will cause `.` to match everything except for the byte given. - /// - /// If `.` is used in a context where Unicode mode is enabled and this byte - /// isn't ASCII, then an error will be returned. When Unicode mode is - /// disabled, then any byte is permitted, but will return an error if UTF-8 - /// mode is enabled and it is a non-ASCII byte. - /// - /// In short, any ASCII value for a line terminator is always okay. But a - /// non-ASCII byte might result in an error depending on whether Unicode - /// mode or UTF-8 mode are enabled. - /// - /// Note that if `R` mode is enabled then it always takes precedence and - /// the line terminator will be treated as `\r` and `\n` simultaneously. - /// - /// Note also that this *doesn't* impact the look-around assertions - /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional - /// configuration in the regex engine itself. - pub fn line_terminator(&mut self, byte: u8) -> &mut ParserBuilder { - self.hir.line_terminator(byte); - self - } - - /// Enable or disable the "swap greed" flag by default. - /// - /// By default this is disabled. It may alternatively be selectively - /// enabled in the regular expression itself via the `U` flag. - pub fn swap_greed(&mut self, yes: bool) -> &mut ParserBuilder { - self.hir.swap_greed(yes); - self - } - - /// Enable or disable the Unicode flag (`u`) by default. - /// - /// By default this is **enabled**. It may alternatively be selectively - /// disabled in the regular expression itself via the `u` flag. - /// - /// Note that unless `utf8` is disabled (it's enabled by default), a - /// regular expression will fail to parse if Unicode mode is disabled and a - /// sub-expression could possibly match invalid UTF-8. - pub fn unicode(&mut self, yes: bool) -> &mut ParserBuilder { - self.hir.unicode(yes); - self - } -} - -/// A convenience parser for regular expressions. -/// -/// This parser takes as input a regular expression pattern string (the -/// "concrete syntax") and returns a high-level intermediate representation -/// (the HIR) suitable for most types of analysis. In particular, this parser -/// hides the intermediate state of producing an AST (the "abstract syntax"). -/// The AST is itself far more complex than the HIR, so this parser serves as a -/// convenience for never having to deal with it at all. -/// -/// If callers have more fine grained use cases that need an AST, then please -/// see the [`ast::parse`] module. -/// -/// A `Parser` can be configured in more detail via a [`ParserBuilder`]. -#[derive(Clone, Debug)] -pub struct Parser { - ast: ast::parse::Parser, - hir: hir::translate::Translator, -} - -impl Parser { - /// Create a new parser with a default configuration. - /// - /// The parser can be run with `parse` method. The parse method returns - /// a high level intermediate representation of the given regular - /// expression. - /// - /// To set configuration options on the parser, use [`ParserBuilder`]. - pub fn new() -> Parser { - ParserBuilder::new().build() - } - - /// Parse the regular expression into a high level intermediate - /// representation. - pub fn parse(&mut self, pattern: &str) -> Result { - let ast = self.ast.parse(pattern)?; - let hir = self.hir.translate(pattern, &ast)?; - Ok(hir) - } -} diff --git a/vendor/regex-syntax/src/rank.rs b/vendor/regex-syntax/src/rank.rs deleted file mode 100644 index ccb25a20..00000000 --- a/vendor/regex-syntax/src/rank.rs +++ /dev/null @@ -1,258 +0,0 @@ -pub(crate) const BYTE_FREQUENCIES: [u8; 256] = [ - 55, // '\x00' - 52, // '\x01' - 51, // '\x02' - 50, // '\x03' - 49, // '\x04' - 48, // '\x05' - 47, // '\x06' - 46, // '\x07' - 45, // '\x08' - 103, // '\t' - 242, // '\n' - 66, // '\x0b' - 67, // '\x0c' - 229, // '\r' - 44, // '\x0e' - 43, // '\x0f' - 42, // '\x10' - 41, // '\x11' - 40, // '\x12' - 39, // '\x13' - 38, // '\x14' - 37, // '\x15' - 36, // '\x16' - 35, // '\x17' - 34, // '\x18' - 33, // '\x19' - 56, // '\x1a' - 32, // '\x1b' - 31, // '\x1c' - 30, // '\x1d' - 29, // '\x1e' - 28, // '\x1f' - 255, // ' ' - 148, // '!' - 164, // '"' - 149, // '#' - 136, // '$' - 160, // '%' - 155, // '&' - 173, // "'" - 221, // '(' - 222, // ')' - 134, // '*' - 122, // '+' - 232, // ',' - 202, // '-' - 215, // '.' - 224, // '/' - 208, // '0' - 220, // '1' - 204, // '2' - 187, // '3' - 183, // '4' - 179, // '5' - 177, // '6' - 168, // '7' - 178, // '8' - 200, // '9' - 226, // ':' - 195, // ';' - 154, // '<' - 184, // '=' - 174, // '>' - 126, // '?' - 120, // '@' - 191, // 'A' - 157, // 'B' - 194, // 'C' - 170, // 'D' - 189, // 'E' - 162, // 'F' - 161, // 'G' - 150, // 'H' - 193, // 'I' - 142, // 'J' - 137, // 'K' - 171, // 'L' - 176, // 'M' - 185, // 'N' - 167, // 'O' - 186, // 'P' - 112, // 'Q' - 175, // 'R' - 192, // 'S' - 188, // 'T' - 156, // 'U' - 140, // 'V' - 143, // 'W' - 123, // 'X' - 133, // 'Y' - 128, // 'Z' - 147, // '[' - 138, // '\\' - 146, // ']' - 114, // '^' - 223, // '_' - 151, // '`' - 249, // 'a' - 216, // 'b' - 238, // 'c' - 236, // 'd' - 253, // 'e' - 227, // 'f' - 218, // 'g' - 230, // 'h' - 247, // 'i' - 135, // 'j' - 180, // 'k' - 241, // 'l' - 233, // 'm' - 246, // 'n' - 244, // 'o' - 231, // 'p' - 139, // 'q' - 245, // 'r' - 243, // 's' - 251, // 't' - 235, // 'u' - 201, // 'v' - 196, // 'w' - 240, // 'x' - 214, // 'y' - 152, // 'z' - 182, // '{' - 205, // '|' - 181, // '}' - 127, // '~' - 27, // '\x7f' - 212, // '\x80' - 211, // '\x81' - 210, // '\x82' - 213, // '\x83' - 228, // '\x84' - 197, // '\x85' - 169, // '\x86' - 159, // '\x87' - 131, // '\x88' - 172, // '\x89' - 105, // '\x8a' - 80, // '\x8b' - 98, // '\x8c' - 96, // '\x8d' - 97, // '\x8e' - 81, // '\x8f' - 207, // '\x90' - 145, // '\x91' - 116, // '\x92' - 115, // '\x93' - 144, // '\x94' - 130, // '\x95' - 153, // '\x96' - 121, // '\x97' - 107, // '\x98' - 132, // '\x99' - 109, // '\x9a' - 110, // '\x9b' - 124, // '\x9c' - 111, // '\x9d' - 82, // '\x9e' - 108, // '\x9f' - 118, // '\xa0' - 141, // '¡' - 113, // '¢' - 129, // '£' - 119, // '¤' - 125, // '¥' - 165, // '¦' - 117, // '§' - 92, // '¨' - 106, // '©' - 83, // 'ª' - 72, // '«' - 99, // '¬' - 93, // '\xad' - 65, // '®' - 79, // '¯' - 166, // '°' - 237, // '±' - 163, // '²' - 199, // '³' - 190, // '´' - 225, // 'µ' - 209, // '¶' - 203, // '·' - 198, // '¸' - 217, // '¹' - 219, // 'º' - 206, // '»' - 234, // '¼' - 248, // '½' - 158, // '¾' - 239, // '¿' - 255, // 'À' - 255, // 'Á' - 255, // 'Â' - 255, // 'Ã' - 255, // 'Ä' - 255, // 'Å' - 255, // 'Æ' - 255, // 'Ç' - 255, // 'È' - 255, // 'É' - 255, // 'Ê' - 255, // 'Ë' - 255, // 'Ì' - 255, // 'Í' - 255, // 'Î' - 255, // 'Ï' - 255, // 'Ð' - 255, // 'Ñ' - 255, // 'Ò' - 255, // 'Ó' - 255, // 'Ô' - 255, // 'Õ' - 255, // 'Ö' - 255, // '×' - 255, // 'Ø' - 255, // 'Ù' - 255, // 'Ú' - 255, // 'Û' - 255, // 'Ü' - 255, // 'Ý' - 255, // 'Þ' - 255, // 'ß' - 255, // 'à' - 255, // 'á' - 255, // 'â' - 255, // 'ã' - 255, // 'ä' - 255, // 'å' - 255, // 'æ' - 255, // 'ç' - 255, // 'è' - 255, // 'é' - 255, // 'ê' - 255, // 'ë' - 255, // 'ì' - 255, // 'í' - 255, // 'î' - 255, // 'ï' - 255, // 'ð' - 255, // 'ñ' - 255, // 'ò' - 255, // 'ó' - 255, // 'ô' - 255, // 'õ' - 255, // 'ö' - 255, // '÷' - 255, // 'ø' - 255, // 'ù' - 255, // 'ú' - 255, // 'û' - 255, // 'ü' - 255, // 'ý' - 255, // 'þ' - 255, // 'ÿ' -]; diff --git a/vendor/regex-syntax/src/unicode.rs b/vendor/regex-syntax/src/unicode.rs deleted file mode 100644 index 07f78194..00000000 --- a/vendor/regex-syntax/src/unicode.rs +++ /dev/null @@ -1,1041 +0,0 @@ -use alloc::{ - string::{String, ToString}, - vec::Vec, -}; - -use crate::hir; - -/// An inclusive range of codepoints from a generated file (hence the static -/// lifetime). -type Range = &'static [(char, char)]; - -/// An error that occurs when dealing with Unicode. -/// -/// We don't impl the Error trait here because these always get converted -/// into other public errors. (This error type isn't exported.) -#[derive(Debug)] -pub enum Error { - PropertyNotFound, - PropertyValueNotFound, - // Not used when unicode-perl is enabled. - #[allow(dead_code)] - PerlClassNotFound, -} - -/// An error that occurs when Unicode-aware simple case folding fails. -/// -/// This error can occur when the case mapping tables necessary for Unicode -/// aware case folding are unavailable. This only occurs when the -/// `unicode-case` feature is disabled. (The feature is enabled by default.) -#[derive(Debug)] -pub struct CaseFoldError(()); - -#[cfg(feature = "std")] -impl std::error::Error for CaseFoldError {} - -impl core::fmt::Display for CaseFoldError { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!( - f, - "Unicode-aware case folding is not available \ - (probably because the unicode-case feature is not enabled)" - ) - } -} - -/// An error that occurs when the Unicode-aware `\w` class is unavailable. -/// -/// This error can occur when the data tables necessary for the Unicode aware -/// Perl character class `\w` are unavailable. This only occurs when the -/// `unicode-perl` feature is disabled. (The feature is enabled by default.) -#[derive(Debug)] -pub struct UnicodeWordError(()); - -#[cfg(feature = "std")] -impl std::error::Error for UnicodeWordError {} - -impl core::fmt::Display for UnicodeWordError { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!( - f, - "Unicode-aware \\w class is not available \ - (probably because the unicode-perl feature is not enabled)" - ) - } -} - -/// A state oriented traverser of the simple case folding table. -/// -/// A case folder can be constructed via `SimpleCaseFolder::new()`, which will -/// return an error if the underlying case folding table is unavailable. -/// -/// After construction, it is expected that callers will use -/// `SimpleCaseFolder::mapping` by calling it with codepoints in strictly -/// increasing order. For example, calling it on `b` and then on `a` is illegal -/// and will result in a panic. -/// -/// The main idea of this type is that it tries hard to make mapping lookups -/// fast by exploiting the structure of the underlying table, and the ordering -/// assumption enables this. -#[derive(Debug)] -pub struct SimpleCaseFolder { - /// The simple case fold table. It's a sorted association list, where the - /// keys are Unicode scalar values and the values are the corresponding - /// equivalence class (not including the key) of the "simple" case folded - /// Unicode scalar values. - table: &'static [(char, &'static [char])], - /// The last codepoint that was used for a lookup. - last: Option, - /// The index to the entry in `table` corresponding to the smallest key `k` - /// such that `k > k0`, where `k0` is the most recent key lookup. Note that - /// in particular, `k0` may not be in the table! - next: usize, -} - -impl SimpleCaseFolder { - /// Create a new simple case folder, returning an error if the underlying - /// case folding table is unavailable. - pub fn new() -> Result { - #[cfg(not(feature = "unicode-case"))] - { - Err(CaseFoldError(())) - } - #[cfg(feature = "unicode-case")] - { - Ok(SimpleCaseFolder { - table: crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE, - last: None, - next: 0, - }) - } - } - - /// Return the equivalence class of case folded codepoints for the given - /// codepoint. The equivalence class returned never includes the codepoint - /// given. If the given codepoint has no case folded codepoints (i.e., - /// no entry in the underlying case folding table), then this returns an - /// empty slice. - /// - /// # Panics - /// - /// This panics when called with a `c` that is less than or equal to the - /// previous call. In other words, callers need to use this method with - /// strictly increasing values of `c`. - pub fn mapping(&mut self, c: char) -> &'static [char] { - if let Some(last) = self.last { - assert!( - last < c, - "got codepoint U+{:X} which occurs before \ - last codepoint U+{:X}", - u32::from(c), - u32::from(last), - ); - } - self.last = Some(c); - if self.next >= self.table.len() { - return &[]; - } - let (k, v) = self.table[self.next]; - if k == c { - self.next += 1; - return v; - } - match self.get(c) { - Err(i) => { - self.next = i; - &[] - } - Ok(i) => { - // Since we require lookups to proceed - // in order, anything we find should be - // after whatever we thought might be - // next. Otherwise, the caller is either - // going out of order or we would have - // found our next key at 'self.next'. - assert!(i > self.next); - self.next = i + 1; - self.table[i].1 - } - } - } - - /// Returns true if and only if the given range overlaps with any region - /// of the underlying case folding table. That is, when true, there exists - /// at least one codepoint in the inclusive range `[start, end]` that has - /// a non-trivial equivalence class of case folded codepoints. Conversely, - /// when this returns false, all codepoints in the range `[start, end]` - /// correspond to the trivial equivalence class of case folded codepoints, - /// i.e., itself. - /// - /// This is useful to call before iterating over the codepoints in the - /// range and looking up the mapping for each. If you know none of the - /// mappings will return anything, then you might be able to skip doing it - /// altogether. - /// - /// # Panics - /// - /// This panics when `end < start`. - pub fn overlaps(&self, start: char, end: char) -> bool { - use core::cmp::Ordering; - - assert!(start <= end); - self.table - .binary_search_by(|&(c, _)| { - if start <= c && c <= end { - Ordering::Equal - } else if c > end { - Ordering::Greater - } else { - Ordering::Less - } - }) - .is_ok() - } - - /// Returns the index at which `c` occurs in the simple case fold table. If - /// `c` does not occur, then this returns an `i` such that `table[i-1].0 < - /// c` and `table[i].0 > c`. - fn get(&self, c: char) -> Result { - self.table.binary_search_by_key(&c, |&(c1, _)| c1) - } -} - -/// A query for finding a character class defined by Unicode. This supports -/// either use of a property name directly, or lookup by property value. The -/// former generally refers to Binary properties (see UTS#44, Table 8), but -/// as a special exception (see UTS#18, Section 1.2) both general categories -/// (an enumeration) and scripts (a catalog) are supported as if each of their -/// possible values were a binary property. -/// -/// In all circumstances, property names and values are normalized and -/// canonicalized. That is, `GC == gc == GeneralCategory == general_category`. -/// -/// The lifetime `'a` refers to the shorter of the lifetimes of property name -/// and property value. -#[derive(Debug)] -pub enum ClassQuery<'a> { - /// Return a class corresponding to a Unicode binary property, named by - /// a single letter. - OneLetter(char), - /// Return a class corresponding to a Unicode binary property. - /// - /// Note that, by special exception (see UTS#18, Section 1.2), both - /// general category values and script values are permitted here as if - /// they were a binary property. - Binary(&'a str), - /// Return a class corresponding to all codepoints whose property - /// (identified by `property_name`) corresponds to the given value - /// (identified by `property_value`). - ByValue { - /// A property name. - property_name: &'a str, - /// A property value. - property_value: &'a str, - }, -} - -impl<'a> ClassQuery<'a> { - fn canonicalize(&self) -> Result { - match *self { - ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()), - ClassQuery::Binary(name) => self.canonical_binary(name), - ClassQuery::ByValue { property_name, property_value } => { - let property_name = symbolic_name_normalize(property_name); - let property_value = symbolic_name_normalize(property_value); - - let canon_name = match canonical_prop(&property_name)? { - None => return Err(Error::PropertyNotFound), - Some(canon_name) => canon_name, - }; - Ok(match canon_name { - "General_Category" => { - let canon = match canonical_gencat(&property_value)? { - None => return Err(Error::PropertyValueNotFound), - Some(canon) => canon, - }; - CanonicalClassQuery::GeneralCategory(canon) - } - "Script" => { - let canon = match canonical_script(&property_value)? { - None => return Err(Error::PropertyValueNotFound), - Some(canon) => canon, - }; - CanonicalClassQuery::Script(canon) - } - _ => { - let vals = match property_values(canon_name)? { - None => return Err(Error::PropertyValueNotFound), - Some(vals) => vals, - }; - let canon_val = - match canonical_value(vals, &property_value) { - None => { - return Err(Error::PropertyValueNotFound) - } - Some(canon_val) => canon_val, - }; - CanonicalClassQuery::ByValue { - property_name: canon_name, - property_value: canon_val, - } - } - }) - } - } - } - - fn canonical_binary( - &self, - name: &str, - ) -> Result { - let norm = symbolic_name_normalize(name); - - // This is a special case where 'cf' refers to the 'Format' general - // category, but where the 'cf' abbreviation is also an abbreviation - // for the 'Case_Folding' property. But we want to treat it as - // a general category. (Currently, we don't even support the - // 'Case_Folding' property. But if we do in the future, users will be - // required to spell it out.) - // - // Also 'sc' refers to the 'Currency_Symbol' general category, but is - // also the abbreviation for the 'Script' property. So we avoid calling - // 'canonical_prop' for it too, which would erroneously normalize it - // to 'Script'. - // - // Another case: 'lc' is an abbreviation for the 'Cased_Letter' - // general category, but is also an abbreviation for the 'Lowercase_Mapping' - // property. We don't currently support the latter, so as with 'cf' - // above, we treat 'lc' as 'Cased_Letter'. - if norm != "cf" && norm != "sc" && norm != "lc" { - if let Some(canon) = canonical_prop(&norm)? { - return Ok(CanonicalClassQuery::Binary(canon)); - } - } - if let Some(canon) = canonical_gencat(&norm)? { - return Ok(CanonicalClassQuery::GeneralCategory(canon)); - } - if let Some(canon) = canonical_script(&norm)? { - return Ok(CanonicalClassQuery::Script(canon)); - } - Err(Error::PropertyNotFound) - } -} - -/// Like ClassQuery, but its parameters have been canonicalized. This also -/// differentiates binary properties from flattened general categories and -/// scripts. -#[derive(Debug, Eq, PartialEq)] -enum CanonicalClassQuery { - /// The canonical binary property name. - Binary(&'static str), - /// The canonical general category name. - GeneralCategory(&'static str), - /// The canonical script name. - Script(&'static str), - /// An arbitrary association between property and value, both of which - /// have been canonicalized. - /// - /// Note that by construction, the property name of ByValue will never - /// be General_Category or Script. Those two cases are subsumed by the - /// eponymous variants. - ByValue { - /// The canonical property name. - property_name: &'static str, - /// The canonical property value. - property_value: &'static str, - }, -} - -/// Looks up a Unicode class given a query. If one doesn't exist, then -/// `None` is returned. -pub fn class(query: ClassQuery<'_>) -> Result { - use self::CanonicalClassQuery::*; - - match query.canonicalize()? { - Binary(name) => bool_property(name), - GeneralCategory(name) => gencat(name), - Script(name) => script(name), - ByValue { property_name: "Age", property_value } => { - let mut class = hir::ClassUnicode::empty(); - for set in ages(property_value)? { - class.union(&hir_class(set)); - } - Ok(class) - } - ByValue { property_name: "Script_Extensions", property_value } => { - script_extension(property_value) - } - ByValue { - property_name: "Grapheme_Cluster_Break", - property_value, - } => gcb(property_value), - ByValue { property_name: "Sentence_Break", property_value } => { - sb(property_value) - } - ByValue { property_name: "Word_Break", property_value } => { - wb(property_value) - } - _ => { - // What else should we support? - Err(Error::PropertyNotFound) - } - } -} - -/// Returns a Unicode aware class for \w. -/// -/// This returns an error if the data is not available for \w. -pub fn perl_word() -> Result { - #[cfg(not(feature = "unicode-perl"))] - fn imp() -> Result { - Err(Error::PerlClassNotFound) - } - - #[cfg(feature = "unicode-perl")] - fn imp() -> Result { - use crate::unicode_tables::perl_word::PERL_WORD; - Ok(hir_class(PERL_WORD)) - } - - imp() -} - -/// Returns a Unicode aware class for \s. -/// -/// This returns an error if the data is not available for \s. -pub fn perl_space() -> Result { - #[cfg(not(any(feature = "unicode-perl", feature = "unicode-bool")))] - fn imp() -> Result { - Err(Error::PerlClassNotFound) - } - - #[cfg(all(feature = "unicode-perl", not(feature = "unicode-bool")))] - fn imp() -> Result { - use crate::unicode_tables::perl_space::WHITE_SPACE; - Ok(hir_class(WHITE_SPACE)) - } - - #[cfg(feature = "unicode-bool")] - fn imp() -> Result { - use crate::unicode_tables::property_bool::WHITE_SPACE; - Ok(hir_class(WHITE_SPACE)) - } - - imp() -} - -/// Returns a Unicode aware class for \d. -/// -/// This returns an error if the data is not available for \d. -pub fn perl_digit() -> Result { - #[cfg(not(any(feature = "unicode-perl", feature = "unicode-gencat")))] - fn imp() -> Result { - Err(Error::PerlClassNotFound) - } - - #[cfg(all(feature = "unicode-perl", not(feature = "unicode-gencat")))] - fn imp() -> Result { - use crate::unicode_tables::perl_decimal::DECIMAL_NUMBER; - Ok(hir_class(DECIMAL_NUMBER)) - } - - #[cfg(feature = "unicode-gencat")] - fn imp() -> Result { - use crate::unicode_tables::general_category::DECIMAL_NUMBER; - Ok(hir_class(DECIMAL_NUMBER)) - } - - imp() -} - -/// Build a Unicode HIR class from a sequence of Unicode scalar value ranges. -pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode { - let hir_ranges: Vec = ranges - .iter() - .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)) - .collect(); - hir::ClassUnicode::new(hir_ranges) -} - -/// Returns true only if the given codepoint is in the `\w` character class. -/// -/// If the `unicode-perl` feature is not enabled, then this returns an error. -pub fn is_word_character(c: char) -> Result { - #[cfg(not(feature = "unicode-perl"))] - fn imp(_: char) -> Result { - Err(UnicodeWordError(())) - } - - #[cfg(feature = "unicode-perl")] - fn imp(c: char) -> Result { - use crate::{is_word_byte, unicode_tables::perl_word::PERL_WORD}; - - if u8::try_from(c).map_or(false, is_word_byte) { - return Ok(true); - } - Ok(PERL_WORD - .binary_search_by(|&(start, end)| { - use core::cmp::Ordering; - - if start <= c && c <= end { - Ordering::Equal - } else if start > c { - Ordering::Greater - } else { - Ordering::Less - } - }) - .is_ok()) - } - - imp(c) -} - -/// A mapping of property values for a specific property. -/// -/// The first element of each tuple is a normalized property value while the -/// second element of each tuple is the corresponding canonical property -/// value. -type PropertyValues = &'static [(&'static str, &'static str)]; - -fn canonical_gencat( - normalized_value: &str, -) -> Result, Error> { - Ok(match normalized_value { - "any" => Some("Any"), - "assigned" => Some("Assigned"), - "ascii" => Some("ASCII"), - _ => { - let gencats = property_values("General_Category")?.unwrap(); - canonical_value(gencats, normalized_value) - } - }) -} - -fn canonical_script( - normalized_value: &str, -) -> Result, Error> { - let scripts = property_values("Script")?.unwrap(); - Ok(canonical_value(scripts, normalized_value)) -} - -/// Find the canonical property name for the given normalized property name. -/// -/// If no such property exists, then `None` is returned. -/// -/// The normalized property name must have been normalized according to -/// UAX44 LM3, which can be done using `symbolic_name_normalize`. -/// -/// If the property names data is not available, then an error is returned. -fn canonical_prop( - normalized_name: &str, -) -> Result, Error> { - #[cfg(not(any( - feature = "unicode-age", - feature = "unicode-bool", - feature = "unicode-gencat", - feature = "unicode-perl", - feature = "unicode-script", - feature = "unicode-segment", - )))] - fn imp(_: &str) -> Result, Error> { - Err(Error::PropertyNotFound) - } - - #[cfg(any( - feature = "unicode-age", - feature = "unicode-bool", - feature = "unicode-gencat", - feature = "unicode-perl", - feature = "unicode-script", - feature = "unicode-segment", - ))] - fn imp(name: &str) -> Result, Error> { - use crate::unicode_tables::property_names::PROPERTY_NAMES; - - Ok(PROPERTY_NAMES - .binary_search_by_key(&name, |&(n, _)| n) - .ok() - .map(|i| PROPERTY_NAMES[i].1)) - } - - imp(normalized_name) -} - -/// Find the canonical property value for the given normalized property -/// value. -/// -/// The given property values should correspond to the values for the property -/// under question, which can be found using `property_values`. -/// -/// If no such property value exists, then `None` is returned. -/// -/// The normalized property value must have been normalized according to -/// UAX44 LM3, which can be done using `symbolic_name_normalize`. -fn canonical_value( - vals: PropertyValues, - normalized_value: &str, -) -> Option<&'static str> { - vals.binary_search_by_key(&normalized_value, |&(n, _)| n) - .ok() - .map(|i| vals[i].1) -} - -/// Return the table of property values for the given property name. -/// -/// If the property values data is not available, then an error is returned. -fn property_values( - canonical_property_name: &'static str, -) -> Result, Error> { - #[cfg(not(any( - feature = "unicode-age", - feature = "unicode-bool", - feature = "unicode-gencat", - feature = "unicode-perl", - feature = "unicode-script", - feature = "unicode-segment", - )))] - fn imp(_: &'static str) -> Result, Error> { - Err(Error::PropertyValueNotFound) - } - - #[cfg(any( - feature = "unicode-age", - feature = "unicode-bool", - feature = "unicode-gencat", - feature = "unicode-perl", - feature = "unicode-script", - feature = "unicode-segment", - ))] - fn imp(name: &'static str) -> Result, Error> { - use crate::unicode_tables::property_values::PROPERTY_VALUES; - - Ok(PROPERTY_VALUES - .binary_search_by_key(&name, |&(n, _)| n) - .ok() - .map(|i| PROPERTY_VALUES[i].1)) - } - - imp(canonical_property_name) -} - -// This is only used in some cases, but small enough to just let it be dead -// instead of figuring out (and maintaining) the right set of features. -#[allow(dead_code)] -fn property_set( - name_map: &'static [(&'static str, Range)], - canonical: &'static str, -) -> Option { - name_map - .binary_search_by_key(&canonical, |x| x.0) - .ok() - .map(|i| name_map[i].1) -} - -/// Returns an iterator over Unicode Age sets. Each item corresponds to a set -/// of codepoints that were added in a particular revision of Unicode. The -/// iterator yields items in chronological order. -/// -/// If the given age value isn't valid or if the data isn't available, then an -/// error is returned instead. -fn ages(canonical_age: &str) -> Result, Error> { - #[cfg(not(feature = "unicode-age"))] - fn imp(_: &str) -> Result, Error> { - use core::option::IntoIter; - Err::, _>(Error::PropertyNotFound) - } - - #[cfg(feature = "unicode-age")] - fn imp(canonical_age: &str) -> Result, Error> { - use crate::unicode_tables::age; - - const AGES: &[(&str, Range)] = &[ - ("V1_1", age::V1_1), - ("V2_0", age::V2_0), - ("V2_1", age::V2_1), - ("V3_0", age::V3_0), - ("V3_1", age::V3_1), - ("V3_2", age::V3_2), - ("V4_0", age::V4_0), - ("V4_1", age::V4_1), - ("V5_0", age::V5_0), - ("V5_1", age::V5_1), - ("V5_2", age::V5_2), - ("V6_0", age::V6_0), - ("V6_1", age::V6_1), - ("V6_2", age::V6_2), - ("V6_3", age::V6_3), - ("V7_0", age::V7_0), - ("V8_0", age::V8_0), - ("V9_0", age::V9_0), - ("V10_0", age::V10_0), - ("V11_0", age::V11_0), - ("V12_0", age::V12_0), - ("V12_1", age::V12_1), - ("V13_0", age::V13_0), - ("V14_0", age::V14_0), - ("V15_0", age::V15_0), - ("V15_1", age::V15_1), - ("V16_0", age::V16_0), - ]; - assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync"); - - let pos = AGES.iter().position(|&(age, _)| canonical_age == age); - match pos { - None => Err(Error::PropertyValueNotFound), - Some(i) => Ok(AGES[..=i].iter().map(|&(_, classes)| classes)), - } - } - - imp(canonical_age) -} - -/// Returns the Unicode HIR class corresponding to the given general category. -/// -/// Name canonicalization is assumed to be performed by the caller. -/// -/// If the given general category could not be found, or if the general -/// category data is not available, then an error is returned. -fn gencat(canonical_name: &'static str) -> Result { - #[cfg(not(feature = "unicode-gencat"))] - fn imp(_: &'static str) -> Result { - Err(Error::PropertyNotFound) - } - - #[cfg(feature = "unicode-gencat")] - fn imp(name: &'static str) -> Result { - use crate::unicode_tables::general_category::BY_NAME; - match name { - "ASCII" => Ok(hir_class(&[('\0', '\x7F')])), - "Any" => Ok(hir_class(&[('\0', '\u{10FFFF}')])), - "Assigned" => { - let mut cls = gencat("Unassigned")?; - cls.negate(); - Ok(cls) - } - name => property_set(BY_NAME, name) - .map(hir_class) - .ok_or(Error::PropertyValueNotFound), - } - } - - match canonical_name { - "Decimal_Number" => perl_digit(), - name => imp(name), - } -} - -/// Returns the Unicode HIR class corresponding to the given script. -/// -/// Name canonicalization is assumed to be performed by the caller. -/// -/// If the given script could not be found, or if the script data is not -/// available, then an error is returned. -fn script(canonical_name: &'static str) -> Result { - #[cfg(not(feature = "unicode-script"))] - fn imp(_: &'static str) -> Result { - Err(Error::PropertyNotFound) - } - - #[cfg(feature = "unicode-script")] - fn imp(name: &'static str) -> Result { - use crate::unicode_tables::script::BY_NAME; - property_set(BY_NAME, name) - .map(hir_class) - .ok_or(Error::PropertyValueNotFound) - } - - imp(canonical_name) -} - -/// Returns the Unicode HIR class corresponding to the given script extension. -/// -/// Name canonicalization is assumed to be performed by the caller. -/// -/// If the given script extension could not be found, or if the script data is -/// not available, then an error is returned. -fn script_extension( - canonical_name: &'static str, -) -> Result { - #[cfg(not(feature = "unicode-script"))] - fn imp(_: &'static str) -> Result { - Err(Error::PropertyNotFound) - } - - #[cfg(feature = "unicode-script")] - fn imp(name: &'static str) -> Result { - use crate::unicode_tables::script_extension::BY_NAME; - property_set(BY_NAME, name) - .map(hir_class) - .ok_or(Error::PropertyValueNotFound) - } - - imp(canonical_name) -} - -/// Returns the Unicode HIR class corresponding to the given Unicode boolean -/// property. -/// -/// Name canonicalization is assumed to be performed by the caller. -/// -/// If the given boolean property could not be found, or if the boolean -/// property data is not available, then an error is returned. -fn bool_property( - canonical_name: &'static str, -) -> Result { - #[cfg(not(feature = "unicode-bool"))] - fn imp(_: &'static str) -> Result { - Err(Error::PropertyNotFound) - } - - #[cfg(feature = "unicode-bool")] - fn imp(name: &'static str) -> Result { - use crate::unicode_tables::property_bool::BY_NAME; - property_set(BY_NAME, name) - .map(hir_class) - .ok_or(Error::PropertyNotFound) - } - - match canonical_name { - "Decimal_Number" => perl_digit(), - "White_Space" => perl_space(), - name => imp(name), - } -} - -/// Returns the Unicode HIR class corresponding to the given grapheme cluster -/// break property. -/// -/// Name canonicalization is assumed to be performed by the caller. -/// -/// If the given property could not be found, or if the corresponding data is -/// not available, then an error is returned. -fn gcb(canonical_name: &'static str) -> Result { - #[cfg(not(feature = "unicode-segment"))] - fn imp(_: &'static str) -> Result { - Err(Error::PropertyNotFound) - } - - #[cfg(feature = "unicode-segment")] - fn imp(name: &'static str) -> Result { - use crate::unicode_tables::grapheme_cluster_break::BY_NAME; - property_set(BY_NAME, name) - .map(hir_class) - .ok_or(Error::PropertyValueNotFound) - } - - imp(canonical_name) -} - -/// Returns the Unicode HIR class corresponding to the given word break -/// property. -/// -/// Name canonicalization is assumed to be performed by the caller. -/// -/// If the given property could not be found, or if the corresponding data is -/// not available, then an error is returned. -fn wb(canonical_name: &'static str) -> Result { - #[cfg(not(feature = "unicode-segment"))] - fn imp(_: &'static str) -> Result { - Err(Error::PropertyNotFound) - } - - #[cfg(feature = "unicode-segment")] - fn imp(name: &'static str) -> Result { - use crate::unicode_tables::word_break::BY_NAME; - property_set(BY_NAME, name) - .map(hir_class) - .ok_or(Error::PropertyValueNotFound) - } - - imp(canonical_name) -} - -/// Returns the Unicode HIR class corresponding to the given sentence -/// break property. -/// -/// Name canonicalization is assumed to be performed by the caller. -/// -/// If the given property could not be found, or if the corresponding data is -/// not available, then an error is returned. -fn sb(canonical_name: &'static str) -> Result { - #[cfg(not(feature = "unicode-segment"))] - fn imp(_: &'static str) -> Result { - Err(Error::PropertyNotFound) - } - - #[cfg(feature = "unicode-segment")] - fn imp(name: &'static str) -> Result { - use crate::unicode_tables::sentence_break::BY_NAME; - property_set(BY_NAME, name) - .map(hir_class) - .ok_or(Error::PropertyValueNotFound) - } - - imp(canonical_name) -} - -/// Like symbolic_name_normalize_bytes, but operates on a string. -fn symbolic_name_normalize(x: &str) -> String { - let mut tmp = x.as_bytes().to_vec(); - let len = symbolic_name_normalize_bytes(&mut tmp).len(); - tmp.truncate(len); - // This should always succeed because `symbolic_name_normalize_bytes` - // guarantees that `&tmp[..len]` is always valid UTF-8. - // - // N.B. We could avoid the additional UTF-8 check here, but it's unlikely - // to be worth skipping the additional safety check. A benchmark must - // justify it first. - String::from_utf8(tmp).unwrap() -} - -/// Normalize the given symbolic name in place according to UAX44-LM3. -/// -/// A "symbolic name" typically corresponds to property names and property -/// value aliases. Note, though, that it should not be applied to property -/// string values. -/// -/// The slice returned is guaranteed to be valid UTF-8 for all possible values -/// of `slice`. -/// -/// See: https://unicode.org/reports/tr44/#UAX44-LM3 -fn symbolic_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] { - // I couldn't find a place in the standard that specified that property - // names/aliases had a particular structure (unlike character names), but - // we assume that it's ASCII only and drop anything that isn't ASCII. - let mut start = 0; - let mut starts_with_is = false; - if slice.len() >= 2 { - // Ignore any "is" prefix. - starts_with_is = slice[0..2] == b"is"[..] - || slice[0..2] == b"IS"[..] - || slice[0..2] == b"iS"[..] - || slice[0..2] == b"Is"[..]; - if starts_with_is { - start = 2; - } - } - let mut next_write = 0; - for i in start..slice.len() { - // VALIDITY ARGUMENT: To guarantee that the resulting slice is valid - // UTF-8, we ensure that the slice contains only ASCII bytes. In - // particular, we drop every non-ASCII byte from the normalized string. - let b = slice[i]; - if b == b' ' || b == b'_' || b == b'-' { - continue; - } else if b'A' <= b && b <= b'Z' { - slice[next_write] = b + (b'a' - b'A'); - next_write += 1; - } else if b <= 0x7F { - slice[next_write] = b; - next_write += 1; - } - } - // Special case: ISO_Comment has a 'isc' abbreviation. Since we generally - // ignore 'is' prefixes, the 'isc' abbreviation gets caught in the cross - // fire and ends up creating an alias for 'c' to 'ISO_Comment', but it - // is actually an alias for the 'Other' general category. - if starts_with_is && next_write == 1 && slice[0] == b'c' { - slice[0] = b'i'; - slice[1] = b's'; - slice[2] = b'c'; - next_write = 3; - } - &mut slice[..next_write] -} - -#[cfg(test)] -mod tests { - use super::*; - - #[cfg(feature = "unicode-case")] - fn simple_fold_ok(c: char) -> impl Iterator { - SimpleCaseFolder::new().unwrap().mapping(c).iter().copied() - } - - #[cfg(feature = "unicode-case")] - fn contains_case_map(start: char, end: char) -> bool { - SimpleCaseFolder::new().unwrap().overlaps(start, end) - } - - #[test] - #[cfg(feature = "unicode-case")] - fn simple_fold_k() { - let xs: Vec = simple_fold_ok('k').collect(); - assert_eq!(xs, alloc::vec!['K', 'K']); - - let xs: Vec = simple_fold_ok('K').collect(); - assert_eq!(xs, alloc::vec!['k', 'K']); - - let xs: Vec = simple_fold_ok('K').collect(); - assert_eq!(xs, alloc::vec!['K', 'k']); - } - - #[test] - #[cfg(feature = "unicode-case")] - fn simple_fold_a() { - let xs: Vec = simple_fold_ok('a').collect(); - assert_eq!(xs, alloc::vec!['A']); - - let xs: Vec = simple_fold_ok('A').collect(); - assert_eq!(xs, alloc::vec!['a']); - } - - #[test] - #[cfg(not(feature = "unicode-case"))] - fn simple_fold_disabled() { - assert!(SimpleCaseFolder::new().is_err()); - } - - #[test] - #[cfg(feature = "unicode-case")] - fn range_contains() { - assert!(contains_case_map('A', 'A')); - assert!(contains_case_map('Z', 'Z')); - assert!(contains_case_map('A', 'Z')); - assert!(contains_case_map('@', 'A')); - assert!(contains_case_map('Z', '[')); - assert!(contains_case_map('☃', 'Ⰰ')); - - assert!(!contains_case_map('[', '[')); - assert!(!contains_case_map('[', '`')); - - assert!(!contains_case_map('☃', '☃')); - } - - #[test] - #[cfg(feature = "unicode-gencat")] - fn regression_466() { - use super::{CanonicalClassQuery, ClassQuery}; - - let q = ClassQuery::OneLetter('C'); - assert_eq!( - q.canonicalize().unwrap(), - CanonicalClassQuery::GeneralCategory("Other") - ); - } - - #[test] - fn sym_normalize() { - let sym_norm = symbolic_name_normalize; - - assert_eq!(sym_norm("Line_Break"), "linebreak"); - assert_eq!(sym_norm("Line-break"), "linebreak"); - assert_eq!(sym_norm("linebreak"), "linebreak"); - assert_eq!(sym_norm("BA"), "ba"); - assert_eq!(sym_norm("ba"), "ba"); - assert_eq!(sym_norm("Greek"), "greek"); - assert_eq!(sym_norm("isGreek"), "greek"); - assert_eq!(sym_norm("IS_Greek"), "greek"); - assert_eq!(sym_norm("isc"), "isc"); - assert_eq!(sym_norm("is c"), "isc"); - assert_eq!(sym_norm("is_c"), "isc"); - } - - #[test] - fn valid_utf8_symbolic() { - let mut x = b"abc\xFFxyz".to_vec(); - let y = symbolic_name_normalize_bytes(&mut x); - assert_eq!(y, b"abcxyz"); - } -} diff --git a/vendor/regex-syntax/src/unicode_tables/LICENSE-UNICODE b/vendor/regex-syntax/src/unicode_tables/LICENSE-UNICODE deleted file mode 100644 index b82826bd..00000000 --- a/vendor/regex-syntax/src/unicode_tables/LICENSE-UNICODE +++ /dev/null @@ -1,57 +0,0 @@ -UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE - -Unicode Data Files include all data files under the directories -http://www.unicode.org/Public/, http://www.unicode.org/reports/, -http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and -http://www.unicode.org/utility/trac/browser/. - -Unicode Data Files do not include PDF online code charts under the -directory http://www.unicode.org/Public/. - -Software includes any source code published in the Unicode Standard -or under the directories -http://www.unicode.org/Public/, http://www.unicode.org/reports/, -http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and -http://www.unicode.org/utility/trac/browser/. - -NOTICE TO USER: Carefully read the following legal agreement. -BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S -DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), -YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE -TERMS AND CONDITIONS OF THIS AGREEMENT. -IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE -THE DATA FILES OR SOFTWARE. - -COPYRIGHT AND PERMISSION NOTICE - -Copyright © 1991-2018 Unicode, Inc. All rights reserved. -Distributed under the Terms of Use in http://www.unicode.org/copyright.html. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of the Unicode data files and any associated documentation -(the "Data Files") or Unicode software and any associated documentation -(the "Software") to deal in the Data Files or Software -without restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, and/or sell copies of -the Data Files or Software, and to permit persons to whom the Data Files -or Software are furnished to do so, provided that either -(a) this copyright and permission notice appear with all copies -of the Data Files or Software, or -(b) this copyright and permission notice appear in associated -Documentation. - -THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT OF THIRD PARTY RIGHTS. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS -NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL -DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, -DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THE DATA FILES OR SOFTWARE. - -Except as contained in this notice, the name of a copyright holder -shall not be used in advertising or otherwise to promote the sale, -use or other dealings in these Data Files or Software without prior -written authorization of the copyright holder. diff --git a/vendor/regex-syntax/src/unicode_tables/age.rs b/vendor/regex-syntax/src/unicode_tables/age.rs deleted file mode 100644 index 466510c9..00000000 --- a/vendor/regex-syntax/src/unicode_tables/age.rs +++ /dev/null @@ -1,1846 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate age ucd-16.0.0 --chars -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ - ("V10_0", V10_0), - ("V11_0", V11_0), - ("V12_0", V12_0), - ("V12_1", V12_1), - ("V13_0", V13_0), - ("V14_0", V14_0), - ("V15_0", V15_0), - ("V15_1", V15_1), - ("V16_0", V16_0), - ("V1_1", V1_1), - ("V2_0", V2_0), - ("V2_1", V2_1), - ("V3_0", V3_0), - ("V3_1", V3_1), - ("V3_2", V3_2), - ("V4_0", V4_0), - ("V4_1", V4_1), - ("V5_0", V5_0), - ("V5_1", V5_1), - ("V5_2", V5_2), - ("V6_0", V6_0), - ("V6_1", V6_1), - ("V6_2", V6_2), - ("V6_3", V6_3), - ("V7_0", V7_0), - ("V8_0", V8_0), - ("V9_0", V9_0), -]; - -pub const V10_0: &'static [(char, char)] = &[ - ('ࡠ', 'ࡪ'), - ('ৼ', '৽'), - ('\u{afa}', '\u{aff}'), - ('\u{d00}', '\u{d00}'), - ('\u{d3b}', '\u{d3c}'), - ('᳷', '᳷'), - ('\u{1df6}', '\u{1df9}'), - ('₿', '₿'), - ('⏿', '⏿'), - ('⯒', '⯒'), - ('⹅', '⹉'), - ('ㄮ', 'ㄮ'), - ('鿖', '鿪'), - ('𐌭', '𐌯'), - ('𑨀', '\u{11a47}'), - ('𑩐', '𑪃'), - ('𑪆', '𑪜'), - ('𑪞', '𑪢'), - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d47}'), - ('𑵐', '𑵙'), - ('𖿡', '𖿡'), - ('𛀂', '𛄞'), - ('𛅰', '𛋻'), - ('🉠', '🉥'), - ('🛓', '🛔'), - ('🛷', '🛸'), - ('🤀', '🤋'), - ('🤟', '🤟'), - ('🤨', '🤯'), - ('🤱', '🤲'), - ('🥌', '🥌'), - ('🥟', '🥫'), - ('🦒', '🦗'), - ('🧐', '🧦'), - ('𬺰', '𮯠'), -]; - -pub const V11_0: &'static [(char, char)] = &[ - ('ՠ', 'ՠ'), - ('ֈ', 'ֈ'), - ('ׯ', 'ׯ'), - ('\u{7fd}', '߿'), - ('\u{8d3}', '\u{8d3}'), - ('\u{9fe}', '\u{9fe}'), - ('੶', '੶'), - ('\u{c04}', '\u{c04}'), - ('಄', '಄'), - ('ᡸ', 'ᡸ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('⮺', '⮼'), - ('⯓', '⯫'), - ('⯰', '⯾'), - ('⹊', '⹎'), - ('ㄯ', 'ㄯ'), - ('鿫', '鿯'), - ('ꞯ', 'ꞯ'), - ('Ꞹ', 'ꞹ'), - ('ꣾ', '\u{a8ff}'), - ('𐨴', '𐨵'), - ('𐩈', '𐩈'), - ('𐴀', '\u{10d27}'), - ('𐴰', '𐴹'), - ('𐼀', '𐼧'), - ('𐼰', '𐽙'), - ('\u{110cd}', '\u{110cd}'), - ('𑅄', '𑅆'), - ('\u{1133b}', '\u{1133b}'), - ('\u{1145e}', '\u{1145e}'), - ('𑜚', '𑜚'), - ('𑠀', '𑠻'), - ('𑪝', '𑪝'), - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶎'), - ('\u{11d90}', '\u{11d91}'), - ('𑶓', '𑶘'), - ('𑶠', '𑶩'), - ('𑻠', '𑻸'), - ('𖹀', '𖺚'), - ('𘟭', '𘟱'), - ('𝋠', '𝋳'), - ('𝍲', '𝍸'), - ('𞱱', '𞲴'), - ('🄯', '🄯'), - ('🛹', '🛹'), - ('🟕', '🟘'), - ('🥍', '🥏'), - ('🥬', '🥰'), - ('🥳', '🥶'), - ('🥺', '🥺'), - ('🥼', '🥿'), - ('🦘', '🦢'), - ('🦰', '🦹'), - ('🧁', '🧂'), - ('🧧', '🧿'), - ('🩠', '🩭'), -]; - -pub const V12_0: &'static [(char, char)] = &[ - ('౷', '౷'), - ('ຆ', 'ຆ'), - ('ຉ', 'ຉ'), - ('ຌ', 'ຌ'), - ('ຎ', 'ຓ'), - ('ຘ', 'ຘ'), - ('ຠ', 'ຠ'), - ('ຨ', 'ຩ'), - ('ຬ', 'ຬ'), - ('\u{eba}', '\u{eba}'), - ('ᳺ', 'ᳺ'), - ('⯉', '⯉'), - ('⯿', '⯿'), - ('⹏', '⹏'), - ('Ꞻ', 'ꞿ'), - ('Ꟃ', 'Ᶎ'), - ('ꭦ', 'ꭧ'), - ('𐿠', '𐿶'), - ('𑑟', '𑑟'), - ('𑚸', '𑚸'), - ('𑦠', '𑦧'), - ('𑦪', '\u{119d7}'), - ('\u{119da}', '𑧤'), - ('𑪄', '𑪅'), - ('𑿀', '𑿱'), - ('𑿿', '𑿿'), - ('\u{13430}', '\u{13438}'), - ('𖽅', '𖽊'), - ('\u{16f4f}', '\u{16f4f}'), - ('𖽿', '𖾇'), - ('𖿢', '𖿣'), - ('𘟲', '𘟷'), - ('𛅐', '𛅒'), - ('𛅤', '𛅧'), - ('𞄀', '𞄬'), - ('\u{1e130}', '𞄽'), - ('𞅀', '𞅉'), - ('𞅎', '𞅏'), - ('𞋀', '𞋹'), - ('𞋿', '𞋿'), - ('𞥋', '𞥋'), - ('𞴁', '𞴽'), - ('🅬', '🅬'), - ('🛕', '🛕'), - ('🛺', '🛺'), - ('🟠', '🟫'), - ('🤍', '🤏'), - ('🤿', '🤿'), - ('🥱', '🥱'), - ('🥻', '🥻'), - ('🦥', '🦪'), - ('🦮', '🦯'), - ('🦺', '🦿'), - ('🧃', '🧊'), - ('🧍', '🧏'), - ('🨀', '🩓'), - ('🩰', '🩳'), - ('🩸', '🩺'), - ('🪀', '🪂'), - ('🪐', '🪕'), -]; - -pub const V12_1: &'static [(char, char)] = &[('㋿', '㋿')]; - -pub const V13_0: &'static [(char, char)] = &[ - ('ࢾ', 'ࣇ'), - ('\u{b55}', '\u{b55}'), - ('ഄ', 'ഄ'), - ('\u{d81}', '\u{d81}'), - ('\u{1abf}', '\u{1ac0}'), - ('⮗', '⮗'), - ('⹐', '⹒'), - ('ㆻ', 'ㆿ'), - ('䶶', '䶿'), - ('鿰', '鿼'), - ('Ꟈ', 'ꟊ'), - ('Ꟶ', 'ꟶ'), - ('\u{a82c}', '\u{a82c}'), - ('ꭨ', '꭫'), - ('𐆜', '𐆜'), - ('𐺀', '𐺩'), - ('\u{10eab}', '𐺭'), - ('𐺰', '𐺱'), - ('𐾰', '𐿋'), - ('𑅇', '𑅇'), - ('𑇎', '\u{111cf}'), - ('𑑚', '𑑚'), - ('𑑠', '𑑡'), - ('𑤀', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤵'), - ('𑤷', '𑤸'), - ('\u{1193b}', '𑥆'), - ('𑥐', '𑥙'), - ('𑾰', '𑾰'), - ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), - ('𘫳', '𘳕'), - ('𘴀', '𘴈'), - ('🄍', '🄏'), - ('🅭', '🅯'), - ('🆭', '🆭'), - ('🛖', '🛗'), - ('🛻', '🛼'), - ('🢰', '🢱'), - ('🤌', '🤌'), - ('🥲', '🥲'), - ('🥷', '🥸'), - ('🦣', '🦤'), - ('🦫', '🦭'), - ('🧋', '🧋'), - ('🩴', '🩴'), - ('🪃', '🪆'), - ('🪖', '🪨'), - ('🪰', '🪶'), - ('🫀', '🫂'), - ('🫐', '🫖'), - ('🬀', '🮒'), - ('🮔', '🯊'), - ('🯰', '🯹'), - ('𪛗', '𪛝'), - ('𰀀', '𱍊'), -]; - -pub const V14_0: &'static [(char, char)] = &[ - ('؝', '؝'), - ('ࡰ', 'ࢎ'), - ('\u{890}', '\u{891}'), - ('\u{898}', '\u{89f}'), - ('ࢵ', 'ࢵ'), - ('ࣈ', '\u{8d2}'), - ('\u{c3c}', '\u{c3c}'), - ('ౝ', 'ౝ'), - ('ೝ', 'ೝ'), - ('ᜍ', 'ᜍ'), - ('\u{1715}', '\u{1715}'), - ('ᜟ', 'ᜟ'), - ('\u{180f}', '\u{180f}'), - ('\u{1ac1}', '\u{1ace}'), - ('ᭌ', 'ᭌ'), - ('᭽', '᭾'), - ('\u{1dfa}', '\u{1dfa}'), - ('⃀', '⃀'), - ('Ⱟ', 'Ⱟ'), - ('ⱟ', 'ⱟ'), - ('⹓', '⹝'), - ('鿽', '鿿'), - ('Ꟁ', 'ꟁ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), - ('ꟲ', 'ꟴ'), - ('﯂', '﯂'), - ('﵀', '﵏'), - ('﷏', '﷏'), - ('﷾', '﷿'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐽰', '𐾉'), - ('\u{11070}', '𑁵'), - ('\u{110c2}', '\u{110c2}'), - ('𑚹', '𑚹'), - ('𑝀', '𑝆'), - ('𑪰', '𑪿'), - ('𒾐', '𒿲'), - ('𖩰', '𖪾'), - ('𖫀', '𖫉'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𛄟', '𛄢'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('𜽐', '𜿃'), - ('𝇩', '𝇪'), - ('𝼀', '𝼞'), - ('𞊐', '\u{1e2ae}'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), - ('🛝', '🛟'), - ('🟰', '🟰'), - ('🥹', '🥹'), - ('🧌', '🧌'), - ('🩻', '🩼'), - ('🪩', '🪬'), - ('🪷', '🪺'), - ('🫃', '🫅'), - ('🫗', '🫙'), - ('🫠', '🫧'), - ('🫰', '🫶'), - ('𪛞', '𪛟'), - ('𫜵', '𫜸'), -]; - -pub const V15_0: &'static [(char, char)] = &[ - ('ೳ', 'ೳ'), - ('\u{ece}', '\u{ece}'), - ('\u{10efd}', '\u{10eff}'), - ('𑈿', '\u{11241}'), - ('𑬀', '𑬉'), - ('\u{11f00}', '𑼐'), - ('𑼒', '\u{11f3a}'), - ('𑼾', '𑽙'), - ('𓐯', '𓐯'), - ('\u{13439}', '\u{13455}'), - ('𛄲', '𛄲'), - ('𛅕', '𛅕'), - ('𝋀', '𝋓'), - ('𝼥', '𝼪'), - ('𞀰', '𞁭'), - ('\u{1e08f}', '\u{1e08f}'), - ('𞓐', '𞓹'), - ('🛜', '🛜'), - ('🝴', '🝶'), - ('🝻', '🝿'), - ('🟙', '🟙'), - ('🩵', '🩷'), - ('🪇', '🪈'), - ('🪭', '🪯'), - ('🪻', '🪽'), - ('🪿', '🪿'), - ('🫎', '🫏'), - ('🫚', '🫛'), - ('🫨', '🫨'), - ('🫷', '🫸'), - ('𫜹', '𫜹'), - ('𱍐', '𲎯'), -]; - -pub const V15_1: &'static [(char, char)] = - &[('⿼', '⿿'), ('㇯', '㇯'), ('𮯰', '𮹝')]; - -pub const V16_0: &'static [(char, char)] = &[ - ('\u{897}', '\u{897}'), - ('᭎', '᭏'), - ('᭿', '᭿'), - ('Ᲊ', 'ᲊ'), - ('␧', '␩'), - ('㇤', '㇥'), - ('Ɤ', 'ꟍ'), - ('Ꟛ', 'Ƛ'), - ('𐗀', '𐗳'), - ('𐵀', '𐵥'), - ('\u{10d69}', '𐶅'), - ('𐶎', '𐶏'), - ('𐻂', '𐻄'), - ('\u{10efc}', '\u{10efc}'), - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '𑏊'), - ('𑏌', '𑏕'), - ('𑏗', '𑏘'), - ('\u{113e1}', '\u{113e2}'), - ('𑛐', '𑛣'), - ('𑯀', '𑯡'), - ('𑯰', '𑯹'), - ('\u{11f5a}', '\u{11f5a}'), - ('𓑠', '𔏺'), - ('𖄀', '𖄹'), - ('𖵀', '𖵹'), - ('𘳿', '𘳿'), - ('𜰀', '𜳹'), - ('𜴀', '𜺳'), - ('𞗐', '𞗺'), - ('𞗿', '𞗿'), - ('🢲', '🢻'), - ('🣀', '🣁'), - ('🪉', '🪉'), - ('🪏', '🪏'), - ('🪾', '🪾'), - ('🫆', '🫆'), - ('🫜', '🫜'), - ('🫟', '🫟'), - ('🫩', '🫩'), - ('🯋', '🯯'), -]; - -pub const V1_1: &'static [(char, char)] = &[ - ('\0', 'ǵ'), - ('Ǻ', 'ȗ'), - ('ɐ', 'ʨ'), - ('ʰ', '˞'), - ('ˠ', '˩'), - ('\u{300}', '\u{345}'), - ('\u{360}', '\u{361}'), - ('ʹ', '͵'), - ('ͺ', 'ͺ'), - (';', ';'), - ('΄', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ώ'), - ('ϐ', 'ϖ'), - ('Ϛ', 'Ϛ'), - ('Ϝ', 'Ϝ'), - ('Ϟ', 'Ϟ'), - ('Ϡ', 'Ϡ'), - ('Ϣ', 'ϳ'), - ('Ё', 'Ќ'), - ('Ў', 'я'), - ('ё', 'ќ'), - ('ў', '\u{486}'), - ('Ґ', 'ӄ'), - ('Ӈ', 'ӈ'), - ('Ӌ', 'ӌ'), - ('Ӑ', 'ӫ'), - ('Ӯ', 'ӵ'), - ('Ӹ', 'ӹ'), - ('Ա', 'Ֆ'), - ('ՙ', '՟'), - ('ա', 'և'), - ('։', '։'), - ('\u{5b0}', '\u{5b9}'), - ('\u{5bb}', '׃'), - ('א', 'ת'), - ('װ', '״'), - ('،', '،'), - ('؛', '؛'), - ('؟', '؟'), - ('ء', 'غ'), - ('ـ', '\u{652}'), - ('٠', '٭'), - ('\u{670}', 'ڷ'), - ('ں', 'ھ'), - ('ۀ', 'ێ'), - ('ې', '\u{6ed}'), - ('۰', '۹'), - ('\u{901}', 'ः'), - ('अ', 'ह'), - ('\u{93c}', '\u{94d}'), - ('ॐ', '\u{954}'), - ('क़', '॰'), - ('\u{981}', 'ঃ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('\u{9bc}', '\u{9bc}'), - ('\u{9be}', '\u{9c4}'), - ('ে', 'ৈ'), - ('ো', '\u{9cd}'), - ('\u{9d7}', '\u{9d7}'), - ('ড়', 'ঢ়'), - ('য়', '\u{9e3}'), - ('০', '৺'), - ('\u{a02}', '\u{a02}'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('\u{a3c}', '\u{a3c}'), - ('ਾ', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('੦', 'ੴ'), - ('\u{a81}', 'ઃ'), - ('અ', 'ઋ'), - ('ઍ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('\u{abc}', '\u{ac5}'), - ('\u{ac7}', 'ૉ'), - ('ો', '\u{acd}'), - ('ૐ', 'ૐ'), - ('ૠ', 'ૠ'), - ('૦', '૯'), - ('\u{b01}', 'ଃ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଶ', 'ହ'), - ('\u{b3c}', '\u{b43}'), - ('େ', 'ୈ'), - ('ୋ', '\u{b4d}'), - ('\u{b56}', '\u{b57}'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', 'ୡ'), - ('୦', '୰'), - ('\u{b82}', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'வ'), - ('ஷ', 'ஹ'), - ('\u{bbe}', 'ூ'), - ('ெ', 'ை'), - ('ொ', '\u{bcd}'), - ('\u{bd7}', '\u{bd7}'), - ('௧', '௲'), - ('ఁ', 'ః'), - ('అ', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'ళ'), - ('వ', 'హ'), - ('\u{c3e}', 'ౄ'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('ౠ', 'ౡ'), - ('౦', '౯'), - ('ಂ', 'ಃ'), - ('ಅ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('ಾ', 'ೄ'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccd}'), - ('\u{cd5}', '\u{cd6}'), - ('ೞ', 'ೞ'), - ('ೠ', 'ೡ'), - ('೦', '೯'), - ('ം', 'ഃ'), - ('അ', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', 'ന'), - ('പ', 'ഹ'), - ('\u{d3e}', '\u{d43}'), - ('െ', 'ൈ'), - ('ൊ', '\u{d4d}'), - ('\u{d57}', '\u{d57}'), - ('ൠ', 'ൡ'), - ('൦', '൯'), - ('ก', '\u{e3a}'), - ('฿', '๛'), - ('ກ', 'ຂ'), - ('ຄ', 'ຄ'), - ('ງ', 'ຈ'), - ('ຊ', 'ຊ'), - ('ຍ', 'ຍ'), - ('ດ', 'ທ'), - ('ນ', 'ຟ'), - ('ມ', 'ຣ'), - ('ລ', 'ລ'), - ('ວ', 'ວ'), - ('ສ', 'ຫ'), - ('ອ', '\u{eb9}'), - ('\u{ebb}', 'ຽ'), - ('ເ', 'ໄ'), - ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ecd}'), - ('໐', '໙'), - ('ໜ', 'ໝ'), - ('Ⴀ', 'Ⴥ'), - ('ა', 'ჶ'), - ('჻', '჻'), - ('ᄀ', 'ᅙ'), - ('ᅟ', 'ᆢ'), - ('ᆨ', 'ᇹ'), - ('Ḁ', 'ẚ'), - ('Ạ', 'ỹ'), - ('ἀ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ῄ'), - ('ῆ', 'ΐ'), - ('ῖ', 'Ί'), - ('῝', '`'), - ('ῲ', 'ῴ'), - ('ῶ', '῾'), - ('\u{2000}', '\u{202e}'), - ('‰', '⁆'), - ('\u{206a}', '⁰'), - ('⁴', '₎'), - ('₠', '₪'), - ('\u{20d0}', '\u{20e1}'), - ('℀', 'ℸ'), - ('⅓', 'ↂ'), - ('←', '⇪'), - ('∀', '⋱'), - ('⌀', '⌀'), - ('⌂', '⍺'), - ('␀', '␤'), - ('⑀', '⑊'), - ('①', '⓪'), - ('─', '▕'), - ('■', '◯'), - ('☀', '☓'), - ('☚', '♯'), - ('✁', '✄'), - ('✆', '✉'), - ('✌', '✧'), - ('✩', '❋'), - ('❍', '❍'), - ('❏', '❒'), - ('❖', '❖'), - ('❘', '❞'), - ('❡', '❧'), - ('❶', '➔'), - ('➘', '➯'), - ('➱', '➾'), - ('\u{3000}', '〷'), - ('〿', '〿'), - ('ぁ', 'ゔ'), - ('\u{3099}', 'ゞ'), - ('ァ', 'ヾ'), - ('ㄅ', 'ㄬ'), - ('ㄱ', 'ㆎ'), - ('㆐', '㆟'), - ('㈀', '㈜'), - ('㈠', '㉃'), - ('㉠', '㉻'), - ('㉿', '㊰'), - ('㋀', '㋋'), - ('㋐', '㋾'), - ('㌀', '㍶'), - ('㍻', '㏝'), - ('㏠', '㏾'), - ('一', '龥'), - ('\u{e000}', '鶴'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('\u{fb1e}', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', 'ﮱ'), - ('ﯓ', '﴿'), - ('ﵐ', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('ﷰ', 'ﷻ'), - ('\u{fe20}', '\u{fe23}'), - ('︰', '﹄'), - ('﹉', '﹒'), - ('﹔', '﹦'), - ('﹨', '﹫'), - ('ﹰ', 'ﹲ'), - ('ﹴ', 'ﹴ'), - ('ﹶ', 'ﻼ'), - ('\u{feff}', '\u{feff}'), - ('!', '~'), - ('。', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), - ('¢', '₩'), - ('│', '○'), - ('�', '\u{ffff}'), -]; - -pub const V2_0: &'static [(char, char)] = &[ - ('\u{591}', '\u{5a1}'), - ('\u{5a3}', '\u{5af}'), - ('\u{5c4}', '\u{5c4}'), - ('ༀ', 'ཇ'), - ('ཉ', 'ཀྵ'), - ('\u{f71}', 'ྋ'), - ('\u{f90}', '\u{f95}'), - ('\u{f97}', '\u{f97}'), - ('\u{f99}', '\u{fad}'), - ('\u{fb1}', '\u{fb7}'), - ('\u{fb9}', '\u{fb9}'), - ('ẛ', 'ẛ'), - ('₫', '₫'), - ('가', '힣'), - ('\u{1fffe}', '\u{1ffff}'), - ('\u{2fffe}', '\u{2ffff}'), - ('\u{3fffe}', '\u{3ffff}'), - ('\u{4fffe}', '\u{4ffff}'), - ('\u{5fffe}', '\u{5ffff}'), - ('\u{6fffe}', '\u{6ffff}'), - ('\u{7fffe}', '\u{7ffff}'), - ('\u{8fffe}', '\u{8ffff}'), - ('\u{9fffe}', '\u{9ffff}'), - ('\u{afffe}', '\u{affff}'), - ('\u{bfffe}', '\u{bffff}'), - ('\u{cfffe}', '\u{cffff}'), - ('\u{dfffe}', '\u{dffff}'), - ('\u{efffe}', '\u{10ffff}'), -]; - -pub const V2_1: &'static [(char, char)] = &[('€', '€'), ('', '')]; - -pub const V3_0: &'static [(char, char)] = &[ - ('Ƕ', 'ǹ'), - ('Ș', 'ȟ'), - ('Ȣ', 'ȳ'), - ('ʩ', 'ʭ'), - ('˟', '˟'), - ('˪', 'ˮ'), - ('\u{346}', '\u{34e}'), - ('\u{362}', '\u{362}'), - ('ϗ', 'ϗ'), - ('ϛ', 'ϛ'), - ('ϝ', 'ϝ'), - ('ϟ', 'ϟ'), - ('ϡ', 'ϡ'), - ('Ѐ', 'Ѐ'), - ('Ѝ', 'Ѝ'), - ('ѐ', 'ѐ'), - ('ѝ', 'ѝ'), - ('\u{488}', '\u{489}'), - ('Ҍ', 'ҏ'), - ('Ӭ', 'ӭ'), - ('֊', '֊'), - ('\u{653}', '\u{655}'), - ('ڸ', 'ڹ'), - ('ڿ', 'ڿ'), - ('ۏ', 'ۏ'), - ('ۺ', '۾'), - ('܀', '܍'), - ('\u{70f}', 'ܬ'), - ('\u{730}', '\u{74a}'), - ('ހ', '\u{7b0}'), - ('ං', 'ඃ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('\u{dca}', '\u{dca}'), - ('\u{dcf}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('ෘ', '\u{ddf}'), - ('ෲ', '෴'), - ('ཪ', 'ཪ'), - ('\u{f96}', '\u{f96}'), - ('\u{fae}', '\u{fb0}'), - ('\u{fb8}', '\u{fb8}'), - ('\u{fba}', '\u{fbc}'), - ('྾', '࿌'), - ('࿏', '࿏'), - ('က', 'အ'), - ('ဣ', 'ဧ'), - ('ဩ', 'ဪ'), - ('ာ', '\u{1032}'), - ('\u{1036}', '\u{1039}'), - ('၀', '\u{1059}'), - ('ሀ', 'ሆ'), - ('ለ', 'ቆ'), - ('ቈ', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኆ'), - ('ኈ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኮ'), - ('ኰ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዎ'), - ('ዐ', 'ዖ'), - ('ዘ', 'ዮ'), - ('ደ', 'ጎ'), - ('ጐ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ጞ'), - ('ጠ', 'ፆ'), - ('ፈ', 'ፚ'), - ('፡', '፼'), - ('Ꭰ', 'Ᏼ'), - ('ᐁ', 'ᙶ'), - ('\u{1680}', '᚜'), - ('ᚠ', 'ᛰ'), - ('ក', 'ៜ'), - ('០', '៩'), - ('᠀', '\u{180e}'), - ('᠐', '᠙'), - ('ᠠ', 'ᡷ'), - ('ᢀ', '\u{18a9}'), - ('\u{202f}', '\u{202f}'), - ('⁈', '⁍'), - ('₭', '₯'), - ('\u{20e2}', '\u{20e3}'), - ('ℹ', '℺'), - ('Ↄ', 'Ↄ'), - ('⇫', '⇳'), - ('⌁', '⌁'), - ('⍻', '⍻'), - ('⍽', '⎚'), - ('␥', '␦'), - ('◰', '◷'), - ('☙', '☙'), - ('♰', '♱'), - ('⠀', '⣿'), - ('⺀', '⺙'), - ('⺛', '⻳'), - ('⼀', '⿕'), - ('⿰', '⿻'), - ('〸', '〺'), - ('〾', '〾'), - ('ㆠ', 'ㆷ'), - ('㐀', '䶵'), - ('ꀀ', 'ꒌ'), - ('꒐', '꒡'), - ('꒤', '꒳'), - ('꒵', '꓀'), - ('꓂', '꓄'), - ('꓆', '꓆'), - ('יִ', 'יִ'), - ('\u{fff9}', '\u{fffb}'), -]; - -pub const V3_1: &'static [(char, char)] = &[ - ('ϴ', 'ϵ'), - ('\u{fdd0}', '\u{fdef}'), - ('𐌀', '𐌞'), - ('𐌠', '𐌣'), - ('𐌰', '𐍊'), - ('𐐀', '𐐥'), - ('𐐨', '𐑍'), - ('𝀀', '𝃵'), - ('𝄀', '𝄦'), - ('𝄪', '𝇝'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓀'), - ('𝓂', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚣'), - ('𝚨', '𝟉'), - ('𝟎', '𝟿'), - ('𠀀', '𪛖'), - ('丽', '𪘀'), - ('\u{e0001}', '\u{e0001}'), - ('\u{e0020}', '\u{e007f}'), -]; - -pub const V3_2: &'static [(char, char)] = &[ - ('Ƞ', 'Ƞ'), - ('\u{34f}', '\u{34f}'), - ('\u{363}', '\u{36f}'), - ('Ϙ', 'ϙ'), - ('϶', '϶'), - ('Ҋ', 'ҋ'), - ('Ӆ', 'ӆ'), - ('Ӊ', 'ӊ'), - ('Ӎ', 'ӎ'), - ('Ԁ', 'ԏ'), - ('ٮ', 'ٯ'), - ('ޱ', 'ޱ'), - ('ჷ', 'ჸ'), - ('ᜀ', 'ᜌ'), - ('ᜎ', '\u{1714}'), - ('ᜠ', '᜶'), - ('ᝀ', '\u{1753}'), - ('ᝠ', 'ᝬ'), - ('ᝮ', 'ᝰ'), - ('\u{1772}', '\u{1773}'), - ('⁇', '⁇'), - ('⁎', '⁒'), - ('⁗', '⁗'), - ('\u{205f}', '\u{2063}'), - ('ⁱ', 'ⁱ'), - ('₰', '₱'), - ('\u{20e4}', '\u{20ea}'), - ('ℽ', '⅋'), - ('⇴', '⇿'), - ('⋲', '⋿'), - ('⍼', '⍼'), - ('⎛', '⏎'), - ('⓫', '⓾'), - ('▖', '▟'), - ('◸', '◿'), - ('☖', '☗'), - ('♲', '♽'), - ('⚀', '⚉'), - ('❨', '❵'), - ('⟐', '⟫'), - ('⟰', '⟿'), - ('⤀', '⫿'), - ('〻', '〽'), - ('ゕ', 'ゖ'), - ('ゟ', '゠'), - ('ヿ', 'ヿ'), - ('ㇰ', 'ㇿ'), - ('㉑', '㉟'), - ('㊱', '㊿'), - ('꒢', '꒣'), - ('꒴', '꒴'), - ('꓁', '꓁'), - ('꓅', '꓅'), - ('侮', '頻'), - ('﷼', '﷼'), - ('\u{fe00}', '\u{fe0f}'), - ('﹅', '﹆'), - ('ﹳ', 'ﹳ'), - ('⦅', '⦆'), -]; - -pub const V4_0: &'static [(char, char)] = &[ - ('ȡ', 'ȡ'), - ('ȴ', 'ȶ'), - ('ʮ', 'ʯ'), - ('˯', '˿'), - ('\u{350}', '\u{357}'), - ('\u{35d}', '\u{35f}'), - ('Ϸ', 'ϻ'), - ('\u{600}', '\u{603}'), - ('؍', '\u{615}'), - ('\u{656}', '\u{658}'), - ('ۮ', 'ۯ'), - ('ۿ', 'ۿ'), - ('ܭ', 'ܯ'), - ('ݍ', 'ݏ'), - ('ऄ', 'ऄ'), - ('ঽ', 'ঽ'), - ('\u{a01}', '\u{a01}'), - ('ਃ', 'ਃ'), - ('ઌ', 'ઌ'), - ('ૡ', '\u{ae3}'), - ('૱', '૱'), - ('ଵ', 'ଵ'), - ('ୱ', 'ୱ'), - ('௳', '௺'), - ('\u{cbc}', 'ಽ'), - ('\u{17dd}', '\u{17dd}'), - ('៰', '៹'), - ('ᤀ', 'ᤜ'), - ('\u{1920}', 'ᤫ'), - ('ᤰ', '\u{193b}'), - ('᥀', '᥀'), - ('᥄', 'ᥭ'), - ('ᥰ', 'ᥴ'), - ('᧠', '᧿'), - ('ᴀ', 'ᵫ'), - ('⁓', '⁔'), - ('℻', '℻'), - ('⏏', '⏐'), - ('⓿', '⓿'), - ('☔', '☕'), - ('⚊', '⚑'), - ('⚠', '⚡'), - ('⬀', '⬍'), - ('㈝', '㈞'), - ('㉐', '㉐'), - ('㉼', '㉽'), - ('㋌', '㋏'), - ('㍷', '㍺'), - ('㏞', '㏟'), - ('㏿', '㏿'), - ('䷀', '䷿'), - ('﷽', '﷽'), - ('﹇', '﹈'), - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), - ('𐄀', '𐄂'), - ('𐄇', '𐄳'), - ('𐄷', '𐄿'), - ('𐎀', '𐎝'), - ('𐎟', '𐎟'), - ('𐐦', '𐐧'), - ('𐑎', '𐒝'), - ('𐒠', '𐒩'), - ('𐠀', '𐠅'), - ('𐠈', '𐠈'), - ('𐠊', '𐠵'), - ('𐠷', '𐠸'), - ('𐠼', '𐠼'), - ('𐠿', '𐠿'), - ('𝌀', '𝍖'), - ('𝓁', '𝓁'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const V4_1: &'static [(char, char)] = &[ - ('ȷ', 'Ɂ'), - ('\u{358}', '\u{35c}'), - ('ϼ', 'Ͽ'), - ('Ӷ', 'ӷ'), - ('\u{5a2}', '\u{5a2}'), - ('\u{5c5}', '\u{5c7}'), - ('؋', '؋'), - ('؞', '؞'), - ('\u{659}', '\u{65e}'), - ('ݐ', 'ݭ'), - ('ॽ', 'ॽ'), - ('ৎ', 'ৎ'), - ('ஶ', 'ஶ'), - ('௦', '௦'), - ('࿐', '࿑'), - ('ჹ', 'ჺ'), - ('ჼ', 'ჼ'), - ('ሇ', 'ሇ'), - ('ቇ', 'ቇ'), - ('ኇ', 'ኇ'), - ('ኯ', 'ኯ'), - ('ዏ', 'ዏ'), - ('ዯ', 'ዯ'), - ('ጏ', 'ጏ'), - ('ጟ', 'ጟ'), - ('ፇ', 'ፇ'), - ('\u{135f}', '፠'), - ('ᎀ', '᎙'), - ('ᦀ', 'ᦩ'), - ('ᦰ', 'ᧉ'), - ('᧐', '᧙'), - ('᧞', '᧟'), - ('ᨀ', '\u{1a1b}'), - ('᨞', '᨟'), - ('ᵬ', '\u{1dc3}'), - ('⁕', '⁖'), - ('⁘', '⁞'), - ('ₐ', 'ₔ'), - ('₲', '₵'), - ('\u{20eb}', '\u{20eb}'), - ('ℼ', 'ℼ'), - ('⅌', '⅌'), - ('⏑', '⏛'), - ('☘', '☘'), - ('♾', '♿'), - ('⚒', '⚜'), - ('⚢', '⚱'), - ('⟀', '⟆'), - ('⬎', '⬓'), - ('Ⰰ', 'Ⱞ'), - ('ⰰ', 'ⱞ'), - ('Ⲁ', '⳪'), - ('⳹', 'ⴥ'), - ('ⴰ', 'ⵥ'), - ('ⵯ', 'ⵯ'), - ('ⶀ', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('⸀', '⸗'), - ('⸜', '⸝'), - ('㇀', '㇏'), - ('㉾', '㉾'), - ('龦', '龻'), - ('꜀', '꜖'), - ('ꠀ', '꠫'), - ('並', '龎'), - ('︐', '︙'), - ('𐅀', '𐆊'), - ('𐎠', '𐏃'), - ('𐏈', '𐏕'), - ('𐨀', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨳'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '𐩇'), - ('𐩐', '𐩘'), - ('𝈀', '𝉅'), - ('𝚤', '𝚥'), -]; - -pub const V5_0: &'static [(char, char)] = &[ - ('ɂ', 'ɏ'), - ('ͻ', 'ͽ'), - ('ӏ', 'ӏ'), - ('Ӻ', 'ӿ'), - ('Ԑ', 'ԓ'), - ('\u{5ba}', '\u{5ba}'), - ('߀', 'ߺ'), - ('ॻ', 'ॼ'), - ('ॾ', 'ॿ'), - ('\u{ce2}', '\u{ce3}'), - ('ೱ', 'ೲ'), - ('\u{1b00}', 'ᭋ'), - ('᭐', '᭼'), - ('\u{1dc4}', '\u{1dca}'), - ('\u{1dfe}', '\u{1dff}'), - ('\u{20ec}', '\u{20ef}'), - ('⅍', 'ⅎ'), - ('ↄ', 'ↄ'), - ('⏜', '⏧'), - ('⚲', '⚲'), - ('⟇', '⟊'), - ('⬔', '⬚'), - ('⬠', '⬣'), - ('Ⱡ', 'ⱬ'), - ('ⱴ', 'ⱷ'), - ('ꜗ', 'ꜚ'), - ('꜠', '꜡'), - ('ꡀ', '꡷'), - ('𐤀', '𐤙'), - ('𐤟', '𐤟'), - ('𒀀', '𒍮'), - ('𒐀', '𒑢'), - ('𒑰', '𒑳'), - ('𝍠', '𝍱'), - ('𝟊', '𝟋'), -]; - -pub const V5_1: &'static [(char, char)] = &[ - ('Ͱ', 'ͳ'), - ('Ͷ', 'ͷ'), - ('Ϗ', 'Ϗ'), - ('\u{487}', '\u{487}'), - ('Ԕ', 'ԣ'), - ('؆', '؊'), - ('\u{616}', '\u{61a}'), - ('ػ', 'ؿ'), - ('ݮ', 'ݿ'), - ('ॱ', 'ॲ'), - ('\u{a51}', '\u{a51}'), - ('\u{a75}', '\u{a75}'), - ('\u{b44}', '\u{b44}'), - ('\u{b62}', '\u{b63}'), - ('ௐ', 'ௐ'), - ('ఽ', 'ఽ'), - ('ౘ', 'ౙ'), - ('\u{c62}', '\u{c63}'), - ('౸', '౿'), - ('ഽ', 'ഽ'), - ('\u{d44}', '\u{d44}'), - ('\u{d62}', '\u{d63}'), - ('൰', '൵'), - ('൹', 'ൿ'), - ('ཫ', 'ཬ'), - ('࿎', '࿎'), - ('࿒', '࿔'), - ('ဢ', 'ဢ'), - ('ဨ', 'ဨ'), - ('ါ', 'ါ'), - ('\u{1033}', '\u{1035}'), - ('\u{103a}', 'ဿ'), - ('ၚ', '႙'), - ('႞', '႟'), - ('ᢪ', 'ᢪ'), - ('\u{1b80}', '\u{1baa}'), - ('ᮮ', '᮹'), - ('ᰀ', '\u{1c37}'), - ('᰻', '᱉'), - ('ᱍ', '᱿'), - ('\u{1dcb}', '\u{1de6}'), - ('ẜ', 'ẟ'), - ('Ỻ', 'ỿ'), - ('\u{2064}', '\u{2064}'), - ('\u{20f0}', '\u{20f0}'), - ('⅏', '⅏'), - ('ↅ', 'ↈ'), - ('⚝', '⚝'), - ('⚳', '⚼'), - ('⛀', '⛃'), - ('⟌', '⟌'), - ('⟬', '⟯'), - ('⬛', '⬟'), - ('⬤', '⭌'), - ('⭐', '⭔'), - ('Ɑ', 'Ɐ'), - ('ⱱ', 'ⱳ'), - ('ⱸ', 'ⱽ'), - ('\u{2de0}', '\u{2dff}'), - ('⸘', '⸛'), - ('⸞', '⸰'), - ('ㄭ', 'ㄭ'), - ('㇐', '㇣'), - ('龼', '鿃'), - ('ꔀ', 'ꘫ'), - ('Ꙁ', 'ꙟ'), - ('Ꙣ', '꙳'), - ('\u{a67c}', 'ꚗ'), - ('ꜛ', 'ꜟ'), - ('Ꜣ', 'ꞌ'), - ('ꟻ', 'ꟿ'), - ('ꢀ', '\u{a8c4}'), - ('꣎', '꣙'), - ('꤀', '\u{a953}'), - ('꥟', '꥟'), - ('ꨀ', '\u{aa36}'), - ('ꩀ', 'ꩍ'), - ('꩐', '꩙'), - ('꩜', '꩟'), - ('\u{fe24}', '\u{fe26}'), - ('𐆐', '𐆛'), - ('𐇐', '\u{101fd}'), - ('𐊀', '𐊜'), - ('𐊠', '𐋐'), - ('𐤠', '𐤹'), - ('𐤿', '𐤿'), - ('𝄩', '𝄩'), - ('🀀', '🀫'), - ('🀰', '🂓'), -]; - -pub const V5_2: &'static [(char, char)] = &[ - ('Ԥ', 'ԥ'), - ('ࠀ', '\u{82d}'), - ('࠰', '࠾'), - ('\u{900}', '\u{900}'), - ('ॎ', 'ॎ'), - ('\u{955}', '\u{955}'), - ('ॹ', 'ॺ'), - ('৻', '৻'), - ('࿕', '࿘'), - ('ႚ', '\u{109d}'), - ('ᅚ', 'ᅞ'), - ('ᆣ', 'ᆧ'), - ('ᇺ', 'ᇿ'), - ('᐀', '᐀'), - ('ᙷ', 'ᙿ'), - ('ᢰ', 'ᣵ'), - ('ᦪ', 'ᦫ'), - ('᧚', '᧚'), - ('ᨠ', '\u{1a5e}'), - ('\u{1a60}', '\u{1a7c}'), - ('\u{1a7f}', '᪉'), - ('᪐', '᪙'), - ('᪠', '᪭'), - ('\u{1cd0}', 'ᳲ'), - ('\u{1dfd}', '\u{1dfd}'), - ('₶', '₸'), - ('⅐', '⅒'), - ('↉', '↉'), - ('⏨', '⏨'), - ('⚞', '⚟'), - ('⚽', '⚿'), - ('⛄', '⛍'), - ('⛏', '⛡'), - ('⛣', '⛣'), - ('⛨', '⛿'), - ('❗', '❗'), - ('⭕', '⭙'), - ('Ɒ', 'Ɒ'), - ('Ȿ', 'Ɀ'), - ('Ⳬ', '\u{2cf1}'), - ('⸱', '⸱'), - ('㉄', '㉏'), - ('鿄', '鿋'), - ('ꓐ', '꓿'), - ('ꚠ', '꛷'), - ('꠰', '꠹'), - ('\u{a8e0}', 'ꣻ'), - ('ꥠ', 'ꥼ'), - ('\u{a980}', '꧍'), - ('ꧏ', '꧙'), - ('꧞', '꧟'), - ('ꩠ', 'ꩻ'), - ('ꪀ', 'ꫂ'), - ('ꫛ', '꫟'), - ('ꯀ', '\u{abed}'), - ('꯰', '꯹'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('恵', '舘'), - ('𐡀', '𐡕'), - ('𐡗', '𐡟'), - ('𐤚', '𐤛'), - ('𐩠', '𐩿'), - ('𐬀', '𐬵'), - ('𐬹', '𐭕'), - ('𐭘', '𐭲'), - ('𐭸', '𐭿'), - ('𐰀', '𐱈'), - ('𐹠', '𐹾'), - ('\u{11080}', '𑃁'), - ('𓀀', '𓐮'), - ('🄀', '🄊'), - ('🄐', '🄮'), - ('🄱', '🄱'), - ('🄽', '🄽'), - ('🄿', '🄿'), - ('🅂', '🅂'), - ('🅆', '🅆'), - ('🅊', '🅎'), - ('🅗', '🅗'), - ('🅟', '🅟'), - ('🅹', '🅹'), - ('🅻', '🅼'), - ('🅿', '🅿'), - ('🆊', '🆍'), - ('🆐', '🆐'), - ('🈀', '🈀'), - ('🈐', '🈱'), - ('🉀', '🉈'), - ('𪜀', '𫜴'), -]; - -pub const V6_0: &'static [(char, char)] = &[ - ('Ԧ', 'ԧ'), - ('ؠ', 'ؠ'), - ('\u{65f}', '\u{65f}'), - ('ࡀ', '\u{85b}'), - ('࡞', '࡞'), - ('\u{93a}', 'ऻ'), - ('ॏ', 'ॏ'), - ('\u{956}', '\u{957}'), - ('ॳ', 'ॷ'), - ('୲', '୷'), - ('ഩ', 'ഩ'), - ('ഺ', 'ഺ'), - ('ൎ', 'ൎ'), - ('ྌ', '\u{f8f}'), - ('࿙', '࿚'), - ('\u{135d}', '\u{135e}'), - ('ᯀ', '\u{1bf3}'), - ('᯼', '᯿'), - ('\u{1dfc}', '\u{1dfc}'), - ('ₕ', 'ₜ'), - ('₹', '₹'), - ('⏩', '⏳'), - ('⛎', '⛎'), - ('⛢', '⛢'), - ('⛤', '⛧'), - ('✅', '✅'), - ('✊', '✋'), - ('✨', '✨'), - ('❌', '❌'), - ('❎', '❎'), - ('❓', '❕'), - ('❟', '❠'), - ('➕', '➗'), - ('➰', '➰'), - ('➿', '➿'), - ('⟎', '⟏'), - ('⵰', '⵰'), - ('\u{2d7f}', '\u{2d7f}'), - ('ㆸ', 'ㆺ'), - ('Ꙡ', 'ꙡ'), - ('Ɥ', 'ꞎ'), - ('Ꞑ', 'ꞑ'), - ('Ꞡ', 'ꞩ'), - ('ꟺ', 'ꟺ'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('﮲', '﯁'), - ('𑀀', '𑁍'), - ('𑁒', '𑁯'), - ('𖠀', '𖨸'), - ('𛀀', '𛀁'), - ('🂠', '🂮'), - ('🂱', '🂾'), - ('🃁', '🃏'), - ('🃑', '🃟'), - ('🄰', '🄰'), - ('🄲', '🄼'), - ('🄾', '🄾'), - ('🅀', '🅁'), - ('🅃', '🅅'), - ('🅇', '🅉'), - ('🅏', '🅖'), - ('🅘', '🅞'), - ('🅠', '🅩'), - ('🅰', '🅸'), - ('🅺', '🅺'), - ('🅽', '🅾'), - ('🆀', '🆉'), - ('🆎', '🆏'), - ('🆑', '🆚'), - ('🇦', '🇿'), - ('🈁', '🈂'), - ('🈲', '🈺'), - ('🉐', '🉑'), - ('🌀', '🌠'), - ('🌰', '🌵'), - ('🌷', '🍼'), - ('🎀', '🎓'), - ('🎠', '🏄'), - ('🏆', '🏊'), - ('🏠', '🏰'), - ('🐀', '🐾'), - ('👀', '👀'), - ('👂', '📷'), - ('📹', '📼'), - ('🔀', '🔽'), - ('🕐', '🕧'), - ('🗻', '🗿'), - ('😁', '😐'), - ('😒', '😔'), - ('😖', '😖'), - ('😘', '😘'), - ('😚', '😚'), - ('😜', '😞'), - ('😠', '😥'), - ('😨', '😫'), - ('😭', '😭'), - ('😰', '😳'), - ('😵', '🙀'), - ('🙅', '🙏'), - ('🚀', '🛅'), - ('🜀', '🝳'), - ('𫝀', '𫠝'), -]; - -pub const V6_1: &'static [(char, char)] = &[ - ('֏', '֏'), - ('\u{604}', '\u{604}'), - ('ࢠ', 'ࢠ'), - ('ࢢ', 'ࢬ'), - ('\u{8e4}', '\u{8fe}'), - ('૰', '૰'), - ('ໞ', 'ໟ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ჽ', 'ჿ'), - ('\u{1bab}', '\u{1bad}'), - ('ᮺ', 'ᮿ'), - ('᳀', '᳇'), - ('ᳳ', 'ᳶ'), - ('⟋', '⟋'), - ('⟍', '⟍'), - ('Ⳳ', 'ⳳ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ⵦ', 'ⵧ'), - ('⸲', '⸻'), - ('鿌', '鿌'), - ('\u{a674}', '\u{a67b}'), - ('\u{a69f}', '\u{a69f}'), - ('Ꞓ', 'ꞓ'), - ('Ɦ', 'Ɦ'), - ('ꟸ', 'ꟹ'), - ('ꫠ', '\u{aaf6}'), - ('郞', '隷'), - ('𐦀', '𐦷'), - ('𐦾', '𐦿'), - ('𑃐', '𑃨'), - ('𑃰', '𑃹'), - ('\u{11100}', '\u{11134}'), - ('𑄶', '𑅃'), - ('\u{11180}', '𑇈'), - ('𑇐', '𑇙'), - ('𑚀', '\u{116b7}'), - ('𑛀', '𑛉'), - ('𖼀', '𖽄'), - ('𖽐', '𖽾'), - ('\u{16f8f}', '𖾟'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('𞻰', '𞻱'), - ('🅪', '🅫'), - ('🕀', '🕃'), - ('😀', '😀'), - ('😑', '😑'), - ('😕', '😕'), - ('😗', '😗'), - ('😙', '😙'), - ('😛', '😛'), - ('😟', '😟'), - ('😦', '😧'), - ('😬', '😬'), - ('😮', '😯'), - ('😴', '😴'), -]; - -pub const V6_2: &'static [(char, char)] = &[('₺', '₺')]; - -pub const V6_3: &'static [(char, char)] = - &[('\u{61c}', '\u{61c}'), ('\u{2066}', '\u{2069}')]; - -pub const V7_0: &'static [(char, char)] = &[ - ('Ϳ', 'Ϳ'), - ('Ԩ', 'ԯ'), - ('֍', '֎'), - ('\u{605}', '\u{605}'), - ('ࢡ', 'ࢡ'), - ('ࢭ', 'ࢲ'), - ('\u{8ff}', '\u{8ff}'), - ('ॸ', 'ॸ'), - ('ঀ', 'ঀ'), - ('\u{c00}', '\u{c00}'), - ('ఴ', 'ఴ'), - ('\u{c81}', '\u{c81}'), - ('\u{d01}', '\u{d01}'), - ('෦', '෯'), - ('ᛱ', 'ᛸ'), - ('ᤝ', 'ᤞ'), - ('\u{1ab0}', '\u{1abe}'), - ('\u{1cf8}', '\u{1cf9}'), - ('\u{1de7}', '\u{1df5}'), - ('₻', '₽'), - ('⏴', '⏺'), - ('✀', '✀'), - ('⭍', '⭏'), - ('⭚', '⭳'), - ('⭶', '⮕'), - ('⮘', '⮹'), - ('⮽', '⯈'), - ('⯊', '⯑'), - ('⸼', '⹂'), - ('Ꚙ', 'ꚝ'), - ('ꞔ', 'ꞟ'), - ('Ɜ', 'Ɬ'), - ('Ʞ', 'Ʇ'), - ('ꟷ', 'ꟷ'), - ('ꧠ', 'ꧾ'), - ('\u{aa7c}', 'ꩿ'), - ('ꬰ', 'ꭟ'), - ('ꭤ', 'ꭥ'), - ('\u{fe27}', '\u{fe2d}'), - ('𐆋', '𐆌'), - ('𐆠', '𐆠'), - ('\u{102e0}', '𐋻'), - ('𐌟', '𐌟'), - ('𐍐', '\u{1037a}'), - ('𐔀', '𐔧'), - ('𐔰', '𐕣'), - ('𐕯', '𐕯'), - ('𐘀', '𐜶'), - ('𐝀', '𐝕'), - ('𐝠', '𐝧'), - ('𐡠', '𐢞'), - ('𐢧', '𐢯'), - ('𐪀', '𐪟'), - ('𐫀', '\u{10ae6}'), - ('𐫫', '𐫶'), - ('𐮀', '𐮑'), - ('𐮙', '𐮜'), - ('𐮩', '𐮯'), - ('\u{1107f}', '\u{1107f}'), - ('𑅐', '𑅶'), - ('𑇍', '𑇍'), - ('𑇚', '𑇚'), - ('𑇡', '𑇴'), - ('𑈀', '𑈑'), - ('𑈓', '𑈽'), - ('𑊰', '\u{112ea}'), - ('𑋰', '𑋹'), - ('\u{11301}', '𑌃'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('\u{1133c}', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '\u{1134d}'), - ('\u{11357}', '\u{11357}'), - ('𑍝', '𑍣'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('𑒀', '𑓇'), - ('𑓐', '𑓙'), - ('𑖀', '\u{115b5}'), - ('𑖸', '𑗉'), - ('𑘀', '𑙄'), - ('𑙐', '𑙙'), - ('𑢠', '𑣲'), - ('𑣿', '𑣿'), - ('𑫀', '𑫸'), - ('𒍯', '𒎘'), - ('𒑣', '𒑮'), - ('𒑴', '𒑴'), - ('𖩀', '𖩞'), - ('𖩠', '𖩩'), - ('𖩮', '𖩯'), - ('𖫐', '𖫭'), - ('\u{16af0}', '𖫵'), - ('𖬀', '𖭅'), - ('𖭐', '𖭙'), - ('𖭛', '𖭡'), - ('𖭣', '𖭷'), - ('𖭽', '𖮏'), - ('𛰀', '𛱪'), - ('𛱰', '𛱼'), - ('𛲀', '𛲈'), - ('𛲐', '𛲙'), - ('𛲜', '\u{1bca3}'), - ('𞠀', '𞣄'), - ('𞣇', '\u{1e8d6}'), - ('🂿', '🂿'), - ('🃠', '🃵'), - ('🄋', '🄌'), - ('🌡', '🌬'), - ('🌶', '🌶'), - ('🍽', '🍽'), - ('🎔', '🎟'), - ('🏅', '🏅'), - ('🏋', '🏎'), - ('🏔', '🏟'), - ('🏱', '🏷'), - ('🐿', '🐿'), - ('👁', '👁'), - ('📸', '📸'), - ('📽', '📾'), - ('🔾', '🔿'), - ('🕄', '🕊'), - ('🕨', '🕹'), - ('🕻', '🖣'), - ('🖥', '🗺'), - ('🙁', '🙂'), - ('🙐', '🙿'), - ('🛆', '🛏'), - ('🛠', '🛬'), - ('🛰', '🛳'), - ('🞀', '🟔'), - ('🠀', '🠋'), - ('🠐', '🡇'), - ('🡐', '🡙'), - ('🡠', '🢇'), - ('🢐', '🢭'), -]; - -pub const V8_0: &'static [(char, char)] = &[ - ('ࢳ', 'ࢴ'), - ('\u{8e3}', '\u{8e3}'), - ('ૹ', 'ૹ'), - ('ౚ', 'ౚ'), - ('ൟ', 'ൟ'), - ('Ᏽ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('₾', '₾'), - ('↊', '↋'), - ('⯬', '⯯'), - ('鿍', '鿕'), - ('\u{a69e}', '\u{a69e}'), - ('ꞏ', 'ꞏ'), - ('Ʝ', 'ꞷ'), - ('꣼', 'ꣽ'), - ('ꭠ', 'ꭣ'), - ('ꭰ', 'ꮿ'), - ('\u{fe2e}', '\u{fe2f}'), - ('𐣠', '𐣲'), - ('𐣴', '𐣵'), - ('𐣻', '𐣿'), - ('𐦼', '𐦽'), - ('𐧀', '𐧏'), - ('𐧒', '𐧿'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐳺', '𐳿'), - ('\u{111c9}', '\u{111cc}'), - ('𑇛', '𑇟'), - ('𑊀', '𑊆'), - ('𑊈', '𑊈'), - ('𑊊', '𑊍'), - ('𑊏', '𑊝'), - ('𑊟', '𑊩'), - ('\u{11300}', '\u{11300}'), - ('𑍐', '𑍐'), - ('𑗊', '\u{115dd}'), - ('𑜀', '𑜙'), - ('\u{1171d}', '\u{1172b}'), - ('𑜰', '𑜿'), - ('𒎙', '𒎙'), - ('𒒀', '𒕃'), - ('𔐀', '𔙆'), - ('𝇞', '𝇨'), - ('𝠀', '𝪋'), - ('\u{1da9b}', '\u{1da9f}'), - ('\u{1daa1}', '\u{1daaf}'), - ('🌭', '🌯'), - ('🍾', '🍿'), - ('🏏', '🏓'), - ('🏸', '🏿'), - ('📿', '📿'), - ('🕋', '🕏'), - ('🙃', '🙄'), - ('🛐', '🛐'), - ('🤐', '🤘'), - ('🦀', '🦄'), - ('🧀', '🧀'), - ('𫠠', '𬺡'), -]; - -pub const V9_0: &'static [(char, char)] = &[ - ('ࢶ', 'ࢽ'), - ('\u{8d4}', '\u{8e2}'), - ('ಀ', 'ಀ'), - ('൏', '൏'), - ('ൔ', 'ൖ'), - ('൘', '൞'), - ('൶', '൸'), - ('ᲀ', 'ᲈ'), - ('\u{1dfb}', '\u{1dfb}'), - ('⏻', '⏾'), - ('⹃', '⹄'), - ('Ɪ', 'Ɪ'), - ('\u{a8c5}', '\u{a8c5}'), - ('𐆍', '𐆎'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), - ('\u{1123e}', '\u{1123e}'), - ('𑐀', '𑑙'), - ('𑑛', '𑑛'), - ('𑑝', '𑑝'), - ('𑙠', '𑙬'), - ('𑰀', '𑰈'), - ('𑰊', '\u{11c36}'), - ('\u{11c38}', '𑱅'), - ('𑱐', '𑱬'), - ('𑱰', '𑲏'), - ('\u{11c92}', '\u{11ca7}'), - ('𑲩', '\u{11cb6}'), - ('𖿠', '𖿠'), - ('𗀀', '𘟬'), - ('𘠀', '𘫲'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('𞤀', '\u{1e94a}'), - ('𞥐', '𞥙'), - ('𞥞', '𞥟'), - ('🆛', '🆬'), - ('🈻', '🈻'), - ('🕺', '🕺'), - ('🖤', '🖤'), - ('🛑', '🛒'), - ('🛴', '🛶'), - ('🤙', '🤞'), - ('🤠', '🤧'), - ('🤰', '🤰'), - ('🤳', '🤾'), - ('🥀', '🥋'), - ('🥐', '🥞'), - ('🦅', '🦑'), -]; diff --git a/vendor/regex-syntax/src/unicode_tables/case_folding_simple.rs b/vendor/regex-syntax/src/unicode_tables/case_folding_simple.rs deleted file mode 100644 index 07f6ff2f..00000000 --- a/vendor/regex-syntax/src/unicode_tables/case_folding_simple.rs +++ /dev/null @@ -1,2948 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate case-folding-simple ucd-16.0.0 --chars --all-pairs -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ - ('A', &['a']), - ('B', &['b']), - ('C', &['c']), - ('D', &['d']), - ('E', &['e']), - ('F', &['f']), - ('G', &['g']), - ('H', &['h']), - ('I', &['i']), - ('J', &['j']), - ('K', &['k', 'K']), - ('L', &['l']), - ('M', &['m']), - ('N', &['n']), - ('O', &['o']), - ('P', &['p']), - ('Q', &['q']), - ('R', &['r']), - ('S', &['s', 'ſ']), - ('T', &['t']), - ('U', &['u']), - ('V', &['v']), - ('W', &['w']), - ('X', &['x']), - ('Y', &['y']), - ('Z', &['z']), - ('a', &['A']), - ('b', &['B']), - ('c', &['C']), - ('d', &['D']), - ('e', &['E']), - ('f', &['F']), - ('g', &['G']), - ('h', &['H']), - ('i', &['I']), - ('j', &['J']), - ('k', &['K', 'K']), - ('l', &['L']), - ('m', &['M']), - ('n', &['N']), - ('o', &['O']), - ('p', &['P']), - ('q', &['Q']), - ('r', &['R']), - ('s', &['S', 'ſ']), - ('t', &['T']), - ('u', &['U']), - ('v', &['V']), - ('w', &['W']), - ('x', &['X']), - ('y', &['Y']), - ('z', &['Z']), - ('µ', &['Μ', 'μ']), - ('À', &['à']), - ('Á', &['á']), - ('Â', &['â']), - ('Ã', &['ã']), - ('Ä', &['ä']), - ('Å', &['å', 'Å']), - ('Æ', &['æ']), - ('Ç', &['ç']), - ('È', &['è']), - ('É', &['é']), - ('Ê', &['ê']), - ('Ë', &['ë']), - ('Ì', &['ì']), - ('Í', &['í']), - ('Î', &['î']), - ('Ï', &['ï']), - ('Ð', &['ð']), - ('Ñ', &['ñ']), - ('Ò', &['ò']), - ('Ó', &['ó']), - ('Ô', &['ô']), - ('Õ', &['õ']), - ('Ö', &['ö']), - ('Ø', &['ø']), - ('Ù', &['ù']), - ('Ú', &['ú']), - ('Û', &['û']), - ('Ü', &['ü']), - ('Ý', &['ý']), - ('Þ', &['þ']), - ('ß', &['ẞ']), - ('à', &['À']), - ('á', &['Á']), - ('â', &['Â']), - ('ã', &['Ã']), - ('ä', &['Ä']), - ('å', &['Å', 'Å']), - ('æ', &['Æ']), - ('ç', &['Ç']), - ('è', &['È']), - ('é', &['É']), - ('ê', &['Ê']), - ('ë', &['Ë']), - ('ì', &['Ì']), - ('í', &['Í']), - ('î', &['Î']), - ('ï', &['Ï']), - ('ð', &['Ð']), - ('ñ', &['Ñ']), - ('ò', &['Ò']), - ('ó', &['Ó']), - ('ô', &['Ô']), - ('õ', &['Õ']), - ('ö', &['Ö']), - ('ø', &['Ø']), - ('ù', &['Ù']), - ('ú', &['Ú']), - ('û', &['Û']), - ('ü', &['Ü']), - ('ý', &['Ý']), - ('þ', &['Þ']), - ('ÿ', &['Ÿ']), - ('Ā', &['ā']), - ('ā', &['Ā']), - ('Ă', &['ă']), - ('ă', &['Ă']), - ('Ą', &['ą']), - ('ą', &['Ą']), - ('Ć', &['ć']), - ('ć', &['Ć']), - ('Ĉ', &['ĉ']), - ('ĉ', &['Ĉ']), - ('Ċ', &['ċ']), - ('ċ', &['Ċ']), - ('Č', &['č']), - ('č', &['Č']), - ('Ď', &['ď']), - ('ď', &['Ď']), - ('Đ', &['đ']), - ('đ', &['Đ']), - ('Ē', &['ē']), - ('ē', &['Ē']), - ('Ĕ', &['ĕ']), - ('ĕ', &['Ĕ']), - ('Ė', &['ė']), - ('ė', &['Ė']), - ('Ę', &['ę']), - ('ę', &['Ę']), - ('Ě', &['ě']), - ('ě', &['Ě']), - ('Ĝ', &['ĝ']), - ('ĝ', &['Ĝ']), - ('Ğ', &['ğ']), - ('ğ', &['Ğ']), - ('Ġ', &['ġ']), - ('ġ', &['Ġ']), - ('Ģ', &['ģ']), - ('ģ', &['Ģ']), - ('Ĥ', &['ĥ']), - ('ĥ', &['Ĥ']), - ('Ħ', &['ħ']), - ('ħ', &['Ħ']), - ('Ĩ', &['ĩ']), - ('ĩ', &['Ĩ']), - ('Ī', &['ī']), - ('ī', &['Ī']), - ('Ĭ', &['ĭ']), - ('ĭ', &['Ĭ']), - ('Į', &['į']), - ('į', &['Į']), - ('IJ', &['ij']), - ('ij', &['IJ']), - ('Ĵ', &['ĵ']), - ('ĵ', &['Ĵ']), - ('Ķ', &['ķ']), - ('ķ', &['Ķ']), - ('Ĺ', &['ĺ']), - ('ĺ', &['Ĺ']), - ('Ļ', &['ļ']), - ('ļ', &['Ļ']), - ('Ľ', &['ľ']), - ('ľ', &['Ľ']), - ('Ŀ', &['ŀ']), - ('ŀ', &['Ŀ']), - ('Ł', &['ł']), - ('ł', &['Ł']), - ('Ń', &['ń']), - ('ń', &['Ń']), - ('Ņ', &['ņ']), - ('ņ', &['Ņ']), - ('Ň', &['ň']), - ('ň', &['Ň']), - ('Ŋ', &['ŋ']), - ('ŋ', &['Ŋ']), - ('Ō', &['ō']), - ('ō', &['Ō']), - ('Ŏ', &['ŏ']), - ('ŏ', &['Ŏ']), - ('Ő', &['ő']), - ('ő', &['Ő']), - ('Œ', &['œ']), - ('œ', &['Œ']), - ('Ŕ', &['ŕ']), - ('ŕ', &['Ŕ']), - ('Ŗ', &['ŗ']), - ('ŗ', &['Ŗ']), - ('Ř', &['ř']), - ('ř', &['Ř']), - ('Ś', &['ś']), - ('ś', &['Ś']), - ('Ŝ', &['ŝ']), - ('ŝ', &['Ŝ']), - ('Ş', &['ş']), - ('ş', &['Ş']), - ('Š', &['š']), - ('š', &['Š']), - ('Ţ', &['ţ']), - ('ţ', &['Ţ']), - ('Ť', &['ť']), - ('ť', &['Ť']), - ('Ŧ', &['ŧ']), - ('ŧ', &['Ŧ']), - ('Ũ', &['ũ']), - ('ũ', &['Ũ']), - ('Ū', &['ū']), - ('ū', &['Ū']), - ('Ŭ', &['ŭ']), - ('ŭ', &['Ŭ']), - ('Ů', &['ů']), - ('ů', &['Ů']), - ('Ű', &['ű']), - ('ű', &['Ű']), - ('Ų', &['ų']), - ('ų', &['Ų']), - ('Ŵ', &['ŵ']), - ('ŵ', &['Ŵ']), - ('Ŷ', &['ŷ']), - ('ŷ', &['Ŷ']), - ('Ÿ', &['ÿ']), - ('Ź', &['ź']), - ('ź', &['Ź']), - ('Ż', &['ż']), - ('ż', &['Ż']), - ('Ž', &['ž']), - ('ž', &['Ž']), - ('ſ', &['S', 's']), - ('ƀ', &['Ƀ']), - ('Ɓ', &['ɓ']), - ('Ƃ', &['ƃ']), - ('ƃ', &['Ƃ']), - ('Ƅ', &['ƅ']), - ('ƅ', &['Ƅ']), - ('Ɔ', &['ɔ']), - ('Ƈ', &['ƈ']), - ('ƈ', &['Ƈ']), - ('Ɖ', &['ɖ']), - ('Ɗ', &['ɗ']), - ('Ƌ', &['ƌ']), - ('ƌ', &['Ƌ']), - ('Ǝ', &['ǝ']), - ('Ə', &['ə']), - ('Ɛ', &['ɛ']), - ('Ƒ', &['ƒ']), - ('ƒ', &['Ƒ']), - ('Ɠ', &['ɠ']), - ('Ɣ', &['ɣ']), - ('ƕ', &['Ƕ']), - ('Ɩ', &['ɩ']), - ('Ɨ', &['ɨ']), - ('Ƙ', &['ƙ']), - ('ƙ', &['Ƙ']), - ('ƚ', &['Ƚ']), - ('ƛ', &['Ƛ']), - ('Ɯ', &['ɯ']), - ('Ɲ', &['ɲ']), - ('ƞ', &['Ƞ']), - ('Ɵ', &['ɵ']), - ('Ơ', &['ơ']), - ('ơ', &['Ơ']), - ('Ƣ', &['ƣ']), - ('ƣ', &['Ƣ']), - ('Ƥ', &['ƥ']), - ('ƥ', &['Ƥ']), - ('Ʀ', &['ʀ']), - ('Ƨ', &['ƨ']), - ('ƨ', &['Ƨ']), - ('Ʃ', &['ʃ']), - ('Ƭ', &['ƭ']), - ('ƭ', &['Ƭ']), - ('Ʈ', &['ʈ']), - ('Ư', &['ư']), - ('ư', &['Ư']), - ('Ʊ', &['ʊ']), - ('Ʋ', &['ʋ']), - ('Ƴ', &['ƴ']), - ('ƴ', &['Ƴ']), - ('Ƶ', &['ƶ']), - ('ƶ', &['Ƶ']), - ('Ʒ', &['ʒ']), - ('Ƹ', &['ƹ']), - ('ƹ', &['Ƹ']), - ('Ƽ', &['ƽ']), - ('ƽ', &['Ƽ']), - ('ƿ', &['Ƿ']), - ('DŽ', &['Dž', 'dž']), - ('Dž', &['DŽ', 'dž']), - ('dž', &['DŽ', 'Dž']), - ('LJ', &['Lj', 'lj']), - ('Lj', &['LJ', 'lj']), - ('lj', &['LJ', 'Lj']), - ('NJ', &['Nj', 'nj']), - ('Nj', &['NJ', 'nj']), - ('nj', &['NJ', 'Nj']), - ('Ǎ', &['ǎ']), - ('ǎ', &['Ǎ']), - ('Ǐ', &['ǐ']), - ('ǐ', &['Ǐ']), - ('Ǒ', &['ǒ']), - ('ǒ', &['Ǒ']), - ('Ǔ', &['ǔ']), - ('ǔ', &['Ǔ']), - ('Ǖ', &['ǖ']), - ('ǖ', &['Ǖ']), - ('Ǘ', &['ǘ']), - ('ǘ', &['Ǘ']), - ('Ǚ', &['ǚ']), - ('ǚ', &['Ǚ']), - ('Ǜ', &['ǜ']), - ('ǜ', &['Ǜ']), - ('ǝ', &['Ǝ']), - ('Ǟ', &['ǟ']), - ('ǟ', &['Ǟ']), - ('Ǡ', &['ǡ']), - ('ǡ', &['Ǡ']), - ('Ǣ', &['ǣ']), - ('ǣ', &['Ǣ']), - ('Ǥ', &['ǥ']), - ('ǥ', &['Ǥ']), - ('Ǧ', &['ǧ']), - ('ǧ', &['Ǧ']), - ('Ǩ', &['ǩ']), - ('ǩ', &['Ǩ']), - ('Ǫ', &['ǫ']), - ('ǫ', &['Ǫ']), - ('Ǭ', &['ǭ']), - ('ǭ', &['Ǭ']), - ('Ǯ', &['ǯ']), - ('ǯ', &['Ǯ']), - ('DZ', &['Dz', 'dz']), - ('Dz', &['DZ', 'dz']), - ('dz', &['DZ', 'Dz']), - ('Ǵ', &['ǵ']), - ('ǵ', &['Ǵ']), - ('Ƕ', &['ƕ']), - ('Ƿ', &['ƿ']), - ('Ǹ', &['ǹ']), - ('ǹ', &['Ǹ']), - ('Ǻ', &['ǻ']), - ('ǻ', &['Ǻ']), - ('Ǽ', &['ǽ']), - ('ǽ', &['Ǽ']), - ('Ǿ', &['ǿ']), - ('ǿ', &['Ǿ']), - ('Ȁ', &['ȁ']), - ('ȁ', &['Ȁ']), - ('Ȃ', &['ȃ']), - ('ȃ', &['Ȃ']), - ('Ȅ', &['ȅ']), - ('ȅ', &['Ȅ']), - ('Ȇ', &['ȇ']), - ('ȇ', &['Ȇ']), - ('Ȉ', &['ȉ']), - ('ȉ', &['Ȉ']), - ('Ȋ', &['ȋ']), - ('ȋ', &['Ȋ']), - ('Ȍ', &['ȍ']), - ('ȍ', &['Ȍ']), - ('Ȏ', &['ȏ']), - ('ȏ', &['Ȏ']), - ('Ȑ', &['ȑ']), - ('ȑ', &['Ȑ']), - ('Ȓ', &['ȓ']), - ('ȓ', &['Ȓ']), - ('Ȕ', &['ȕ']), - ('ȕ', &['Ȕ']), - ('Ȗ', &['ȗ']), - ('ȗ', &['Ȗ']), - ('Ș', &['ș']), - ('ș', &['Ș']), - ('Ț', &['ț']), - ('ț', &['Ț']), - ('Ȝ', &['ȝ']), - ('ȝ', &['Ȝ']), - ('Ȟ', &['ȟ']), - ('ȟ', &['Ȟ']), - ('Ƞ', &['ƞ']), - ('Ȣ', &['ȣ']), - ('ȣ', &['Ȣ']), - ('Ȥ', &['ȥ']), - ('ȥ', &['Ȥ']), - ('Ȧ', &['ȧ']), - ('ȧ', &['Ȧ']), - ('Ȩ', &['ȩ']), - ('ȩ', &['Ȩ']), - ('Ȫ', &['ȫ']), - ('ȫ', &['Ȫ']), - ('Ȭ', &['ȭ']), - ('ȭ', &['Ȭ']), - ('Ȯ', &['ȯ']), - ('ȯ', &['Ȯ']), - ('Ȱ', &['ȱ']), - ('ȱ', &['Ȱ']), - ('Ȳ', &['ȳ']), - ('ȳ', &['Ȳ']), - ('Ⱥ', &['ⱥ']), - ('Ȼ', &['ȼ']), - ('ȼ', &['Ȼ']), - ('Ƚ', &['ƚ']), - ('Ⱦ', &['ⱦ']), - ('ȿ', &['Ȿ']), - ('ɀ', &['Ɀ']), - ('Ɂ', &['ɂ']), - ('ɂ', &['Ɂ']), - ('Ƀ', &['ƀ']), - ('Ʉ', &['ʉ']), - ('Ʌ', &['ʌ']), - ('Ɇ', &['ɇ']), - ('ɇ', &['Ɇ']), - ('Ɉ', &['ɉ']), - ('ɉ', &['Ɉ']), - ('Ɋ', &['ɋ']), - ('ɋ', &['Ɋ']), - ('Ɍ', &['ɍ']), - ('ɍ', &['Ɍ']), - ('Ɏ', &['ɏ']), - ('ɏ', &['Ɏ']), - ('ɐ', &['Ɐ']), - ('ɑ', &['Ɑ']), - ('ɒ', &['Ɒ']), - ('ɓ', &['Ɓ']), - ('ɔ', &['Ɔ']), - ('ɖ', &['Ɖ']), - ('ɗ', &['Ɗ']), - ('ə', &['Ə']), - ('ɛ', &['Ɛ']), - ('ɜ', &['Ɜ']), - ('ɠ', &['Ɠ']), - ('ɡ', &['Ɡ']), - ('ɣ', &['Ɣ']), - ('ɤ', &['Ɤ']), - ('ɥ', &['Ɥ']), - ('ɦ', &['Ɦ']), - ('ɨ', &['Ɨ']), - ('ɩ', &['Ɩ']), - ('ɪ', &['Ɪ']), - ('ɫ', &['Ɫ']), - ('ɬ', &['Ɬ']), - ('ɯ', &['Ɯ']), - ('ɱ', &['Ɱ']), - ('ɲ', &['Ɲ']), - ('ɵ', &['Ɵ']), - ('ɽ', &['Ɽ']), - ('ʀ', &['Ʀ']), - ('ʂ', &['Ʂ']), - ('ʃ', &['Ʃ']), - ('ʇ', &['Ʇ']), - ('ʈ', &['Ʈ']), - ('ʉ', &['Ʉ']), - ('ʊ', &['Ʊ']), - ('ʋ', &['Ʋ']), - ('ʌ', &['Ʌ']), - ('ʒ', &['Ʒ']), - ('ʝ', &['Ʝ']), - ('ʞ', &['Ʞ']), - ('\u{345}', &['Ι', 'ι', 'ι']), - ('Ͱ', &['ͱ']), - ('ͱ', &['Ͱ']), - ('Ͳ', &['ͳ']), - ('ͳ', &['Ͳ']), - ('Ͷ', &['ͷ']), - ('ͷ', &['Ͷ']), - ('ͻ', &['Ͻ']), - ('ͼ', &['Ͼ']), - ('ͽ', &['Ͽ']), - ('Ϳ', &['ϳ']), - ('Ά', &['ά']), - ('Έ', &['έ']), - ('Ή', &['ή']), - ('Ί', &['ί']), - ('Ό', &['ό']), - ('Ύ', &['ύ']), - ('Ώ', &['ώ']), - ('ΐ', &['ΐ']), - ('Α', &['α']), - ('Β', &['β', 'ϐ']), - ('Γ', &['γ']), - ('Δ', &['δ']), - ('Ε', &['ε', 'ϵ']), - ('Ζ', &['ζ']), - ('Η', &['η']), - ('Θ', &['θ', 'ϑ', 'ϴ']), - ('Ι', &['\u{345}', 'ι', 'ι']), - ('Κ', &['κ', 'ϰ']), - ('Λ', &['λ']), - ('Μ', &['µ', 'μ']), - ('Ν', &['ν']), - ('Ξ', &['ξ']), - ('Ο', &['ο']), - ('Π', &['π', 'ϖ']), - ('Ρ', &['ρ', 'ϱ']), - ('Σ', &['ς', 'σ']), - ('Τ', &['τ']), - ('Υ', &['υ']), - ('Φ', &['φ', 'ϕ']), - ('Χ', &['χ']), - ('Ψ', &['ψ']), - ('Ω', &['ω', 'Ω']), - ('Ϊ', &['ϊ']), - ('Ϋ', &['ϋ']), - ('ά', &['Ά']), - ('έ', &['Έ']), - ('ή', &['Ή']), - ('ί', &['Ί']), - ('ΰ', &['ΰ']), - ('α', &['Α']), - ('β', &['Β', 'ϐ']), - ('γ', &['Γ']), - ('δ', &['Δ']), - ('ε', &['Ε', 'ϵ']), - ('ζ', &['Ζ']), - ('η', &['Η']), - ('θ', &['Θ', 'ϑ', 'ϴ']), - ('ι', &['\u{345}', 'Ι', 'ι']), - ('κ', &['Κ', 'ϰ']), - ('λ', &['Λ']), - ('μ', &['µ', 'Μ']), - ('ν', &['Ν']), - ('ξ', &['Ξ']), - ('ο', &['Ο']), - ('π', &['Π', 'ϖ']), - ('ρ', &['Ρ', 'ϱ']), - ('ς', &['Σ', 'σ']), - ('σ', &['Σ', 'ς']), - ('τ', &['Τ']), - ('υ', &['Υ']), - ('φ', &['Φ', 'ϕ']), - ('χ', &['Χ']), - ('ψ', &['Ψ']), - ('ω', &['Ω', 'Ω']), - ('ϊ', &['Ϊ']), - ('ϋ', &['Ϋ']), - ('ό', &['Ό']), - ('ύ', &['Ύ']), - ('ώ', &['Ώ']), - ('Ϗ', &['ϗ']), - ('ϐ', &['Β', 'β']), - ('ϑ', &['Θ', 'θ', 'ϴ']), - ('ϕ', &['Φ', 'φ']), - ('ϖ', &['Π', 'π']), - ('ϗ', &['Ϗ']), - ('Ϙ', &['ϙ']), - ('ϙ', &['Ϙ']), - ('Ϛ', &['ϛ']), - ('ϛ', &['Ϛ']), - ('Ϝ', &['ϝ']), - ('ϝ', &['Ϝ']), - ('Ϟ', &['ϟ']), - ('ϟ', &['Ϟ']), - ('Ϡ', &['ϡ']), - ('ϡ', &['Ϡ']), - ('Ϣ', &['ϣ']), - ('ϣ', &['Ϣ']), - ('Ϥ', &['ϥ']), - ('ϥ', &['Ϥ']), - ('Ϧ', &['ϧ']), - ('ϧ', &['Ϧ']), - ('Ϩ', &['ϩ']), - ('ϩ', &['Ϩ']), - ('Ϫ', &['ϫ']), - ('ϫ', &['Ϫ']), - ('Ϭ', &['ϭ']), - ('ϭ', &['Ϭ']), - ('Ϯ', &['ϯ']), - ('ϯ', &['Ϯ']), - ('ϰ', &['Κ', 'κ']), - ('ϱ', &['Ρ', 'ρ']), - ('ϲ', &['Ϲ']), - ('ϳ', &['Ϳ']), - ('ϴ', &['Θ', 'θ', 'ϑ']), - ('ϵ', &['Ε', 'ε']), - ('Ϸ', &['ϸ']), - ('ϸ', &['Ϸ']), - ('Ϲ', &['ϲ']), - ('Ϻ', &['ϻ']), - ('ϻ', &['Ϻ']), - ('Ͻ', &['ͻ']), - ('Ͼ', &['ͼ']), - ('Ͽ', &['ͽ']), - ('Ѐ', &['ѐ']), - ('Ё', &['ё']), - ('Ђ', &['ђ']), - ('Ѓ', &['ѓ']), - ('Є', &['є']), - ('Ѕ', &['ѕ']), - ('І', &['і']), - ('Ї', &['ї']), - ('Ј', &['ј']), - ('Љ', &['љ']), - ('Њ', &['њ']), - ('Ћ', &['ћ']), - ('Ќ', &['ќ']), - ('Ѝ', &['ѝ']), - ('Ў', &['ў']), - ('Џ', &['џ']), - ('А', &['а']), - ('Б', &['б']), - ('В', &['в', 'ᲀ']), - ('Г', &['г']), - ('Д', &['д', 'ᲁ']), - ('Е', &['е']), - ('Ж', &['ж']), - ('З', &['з']), - ('И', &['и']), - ('Й', &['й']), - ('К', &['к']), - ('Л', &['л']), - ('М', &['м']), - ('Н', &['н']), - ('О', &['о', 'ᲂ']), - ('П', &['п']), - ('Р', &['р']), - ('С', &['с', 'ᲃ']), - ('Т', &['т', 'ᲄ', 'ᲅ']), - ('У', &['у']), - ('Ф', &['ф']), - ('Х', &['х']), - ('Ц', &['ц']), - ('Ч', &['ч']), - ('Ш', &['ш']), - ('Щ', &['щ']), - ('Ъ', &['ъ', 'ᲆ']), - ('Ы', &['ы']), - ('Ь', &['ь']), - ('Э', &['э']), - ('Ю', &['ю']), - ('Я', &['я']), - ('а', &['А']), - ('б', &['Б']), - ('в', &['В', 'ᲀ']), - ('г', &['Г']), - ('д', &['Д', 'ᲁ']), - ('е', &['Е']), - ('ж', &['Ж']), - ('з', &['З']), - ('и', &['И']), - ('й', &['Й']), - ('к', &['К']), - ('л', &['Л']), - ('м', &['М']), - ('н', &['Н']), - ('о', &['О', 'ᲂ']), - ('п', &['П']), - ('р', &['Р']), - ('с', &['С', 'ᲃ']), - ('т', &['Т', 'ᲄ', 'ᲅ']), - ('у', &['У']), - ('ф', &['Ф']), - ('х', &['Х']), - ('ц', &['Ц']), - ('ч', &['Ч']), - ('ш', &['Ш']), - ('щ', &['Щ']), - ('ъ', &['Ъ', 'ᲆ']), - ('ы', &['Ы']), - ('ь', &['Ь']), - ('э', &['Э']), - ('ю', &['Ю']), - ('я', &['Я']), - ('ѐ', &['Ѐ']), - ('ё', &['Ё']), - ('ђ', &['Ђ']), - ('ѓ', &['Ѓ']), - ('є', &['Є']), - ('ѕ', &['Ѕ']), - ('і', &['І']), - ('ї', &['Ї']), - ('ј', &['Ј']), - ('љ', &['Љ']), - ('њ', &['Њ']), - ('ћ', &['Ћ']), - ('ќ', &['Ќ']), - ('ѝ', &['Ѝ']), - ('ў', &['Ў']), - ('џ', &['Џ']), - ('Ѡ', &['ѡ']), - ('ѡ', &['Ѡ']), - ('Ѣ', &['ѣ', 'ᲇ']), - ('ѣ', &['Ѣ', 'ᲇ']), - ('Ѥ', &['ѥ']), - ('ѥ', &['Ѥ']), - ('Ѧ', &['ѧ']), - ('ѧ', &['Ѧ']), - ('Ѩ', &['ѩ']), - ('ѩ', &['Ѩ']), - ('Ѫ', &['ѫ']), - ('ѫ', &['Ѫ']), - ('Ѭ', &['ѭ']), - ('ѭ', &['Ѭ']), - ('Ѯ', &['ѯ']), - ('ѯ', &['Ѯ']), - ('Ѱ', &['ѱ']), - ('ѱ', &['Ѱ']), - ('Ѳ', &['ѳ']), - ('ѳ', &['Ѳ']), - ('Ѵ', &['ѵ']), - ('ѵ', &['Ѵ']), - ('Ѷ', &['ѷ']), - ('ѷ', &['Ѷ']), - ('Ѹ', &['ѹ']), - ('ѹ', &['Ѹ']), - ('Ѻ', &['ѻ']), - ('ѻ', &['Ѻ']), - ('Ѽ', &['ѽ']), - ('ѽ', &['Ѽ']), - ('Ѿ', &['ѿ']), - ('ѿ', &['Ѿ']), - ('Ҁ', &['ҁ']), - ('ҁ', &['Ҁ']), - ('Ҋ', &['ҋ']), - ('ҋ', &['Ҋ']), - ('Ҍ', &['ҍ']), - ('ҍ', &['Ҍ']), - ('Ҏ', &['ҏ']), - ('ҏ', &['Ҏ']), - ('Ґ', &['ґ']), - ('ґ', &['Ґ']), - ('Ғ', &['ғ']), - ('ғ', &['Ғ']), - ('Ҕ', &['ҕ']), - ('ҕ', &['Ҕ']), - ('Җ', &['җ']), - ('җ', &['Җ']), - ('Ҙ', &['ҙ']), - ('ҙ', &['Ҙ']), - ('Қ', &['қ']), - ('қ', &['Қ']), - ('Ҝ', &['ҝ']), - ('ҝ', &['Ҝ']), - ('Ҟ', &['ҟ']), - ('ҟ', &['Ҟ']), - ('Ҡ', &['ҡ']), - ('ҡ', &['Ҡ']), - ('Ң', &['ң']), - ('ң', &['Ң']), - ('Ҥ', &['ҥ']), - ('ҥ', &['Ҥ']), - ('Ҧ', &['ҧ']), - ('ҧ', &['Ҧ']), - ('Ҩ', &['ҩ']), - ('ҩ', &['Ҩ']), - ('Ҫ', &['ҫ']), - ('ҫ', &['Ҫ']), - ('Ҭ', &['ҭ']), - ('ҭ', &['Ҭ']), - ('Ү', &['ү']), - ('ү', &['Ү']), - ('Ұ', &['ұ']), - ('ұ', &['Ұ']), - ('Ҳ', &['ҳ']), - ('ҳ', &['Ҳ']), - ('Ҵ', &['ҵ']), - ('ҵ', &['Ҵ']), - ('Ҷ', &['ҷ']), - ('ҷ', &['Ҷ']), - ('Ҹ', &['ҹ']), - ('ҹ', &['Ҹ']), - ('Һ', &['һ']), - ('һ', &['Һ']), - ('Ҽ', &['ҽ']), - ('ҽ', &['Ҽ']), - ('Ҿ', &['ҿ']), - ('ҿ', &['Ҿ']), - ('Ӏ', &['ӏ']), - ('Ӂ', &['ӂ']), - ('ӂ', &['Ӂ']), - ('Ӄ', &['ӄ']), - ('ӄ', &['Ӄ']), - ('Ӆ', &['ӆ']), - ('ӆ', &['Ӆ']), - ('Ӈ', &['ӈ']), - ('ӈ', &['Ӈ']), - ('Ӊ', &['ӊ']), - ('ӊ', &['Ӊ']), - ('Ӌ', &['ӌ']), - ('ӌ', &['Ӌ']), - ('Ӎ', &['ӎ']), - ('ӎ', &['Ӎ']), - ('ӏ', &['Ӏ']), - ('Ӑ', &['ӑ']), - ('ӑ', &['Ӑ']), - ('Ӓ', &['ӓ']), - ('ӓ', &['Ӓ']), - ('Ӕ', &['ӕ']), - ('ӕ', &['Ӕ']), - ('Ӗ', &['ӗ']), - ('ӗ', &['Ӗ']), - ('Ә', &['ә']), - ('ә', &['Ә']), - ('Ӛ', &['ӛ']), - ('ӛ', &['Ӛ']), - ('Ӝ', &['ӝ']), - ('ӝ', &['Ӝ']), - ('Ӟ', &['ӟ']), - ('ӟ', &['Ӟ']), - ('Ӡ', &['ӡ']), - ('ӡ', &['Ӡ']), - ('Ӣ', &['ӣ']), - ('ӣ', &['Ӣ']), - ('Ӥ', &['ӥ']), - ('ӥ', &['Ӥ']), - ('Ӧ', &['ӧ']), - ('ӧ', &['Ӧ']), - ('Ө', &['ө']), - ('ө', &['Ө']), - ('Ӫ', &['ӫ']), - ('ӫ', &['Ӫ']), - ('Ӭ', &['ӭ']), - ('ӭ', &['Ӭ']), - ('Ӯ', &['ӯ']), - ('ӯ', &['Ӯ']), - ('Ӱ', &['ӱ']), - ('ӱ', &['Ӱ']), - ('Ӳ', &['ӳ']), - ('ӳ', &['Ӳ']), - ('Ӵ', &['ӵ']), - ('ӵ', &['Ӵ']), - ('Ӷ', &['ӷ']), - ('ӷ', &['Ӷ']), - ('Ӹ', &['ӹ']), - ('ӹ', &['Ӹ']), - ('Ӻ', &['ӻ']), - ('ӻ', &['Ӻ']), - ('Ӽ', &['ӽ']), - ('ӽ', &['Ӽ']), - ('Ӿ', &['ӿ']), - ('ӿ', &['Ӿ']), - ('Ԁ', &['ԁ']), - ('ԁ', &['Ԁ']), - ('Ԃ', &['ԃ']), - ('ԃ', &['Ԃ']), - ('Ԅ', &['ԅ']), - ('ԅ', &['Ԅ']), - ('Ԇ', &['ԇ']), - ('ԇ', &['Ԇ']), - ('Ԉ', &['ԉ']), - ('ԉ', &['Ԉ']), - ('Ԋ', &['ԋ']), - ('ԋ', &['Ԋ']), - ('Ԍ', &['ԍ']), - ('ԍ', &['Ԍ']), - ('Ԏ', &['ԏ']), - ('ԏ', &['Ԏ']), - ('Ԑ', &['ԑ']), - ('ԑ', &['Ԑ']), - ('Ԓ', &['ԓ']), - ('ԓ', &['Ԓ']), - ('Ԕ', &['ԕ']), - ('ԕ', &['Ԕ']), - ('Ԗ', &['ԗ']), - ('ԗ', &['Ԗ']), - ('Ԙ', &['ԙ']), - ('ԙ', &['Ԙ']), - ('Ԛ', &['ԛ']), - ('ԛ', &['Ԛ']), - ('Ԝ', &['ԝ']), - ('ԝ', &['Ԝ']), - ('Ԟ', &['ԟ']), - ('ԟ', &['Ԟ']), - ('Ԡ', &['ԡ']), - ('ԡ', &['Ԡ']), - ('Ԣ', &['ԣ']), - ('ԣ', &['Ԣ']), - ('Ԥ', &['ԥ']), - ('ԥ', &['Ԥ']), - ('Ԧ', &['ԧ']), - ('ԧ', &['Ԧ']), - ('Ԩ', &['ԩ']), - ('ԩ', &['Ԩ']), - ('Ԫ', &['ԫ']), - ('ԫ', &['Ԫ']), - ('Ԭ', &['ԭ']), - ('ԭ', &['Ԭ']), - ('Ԯ', &['ԯ']), - ('ԯ', &['Ԯ']), - ('Ա', &['ա']), - ('Բ', &['բ']), - ('Գ', &['գ']), - ('Դ', &['դ']), - ('Ե', &['ե']), - ('Զ', &['զ']), - ('Է', &['է']), - ('Ը', &['ը']), - ('Թ', &['թ']), - ('Ժ', &['ժ']), - ('Ի', &['ի']), - ('Լ', &['լ']), - ('Խ', &['խ']), - ('Ծ', &['ծ']), - ('Կ', &['կ']), - ('Հ', &['հ']), - ('Ձ', &['ձ']), - ('Ղ', &['ղ']), - ('Ճ', &['ճ']), - ('Մ', &['մ']), - ('Յ', &['յ']), - ('Ն', &['ն']), - ('Շ', &['շ']), - ('Ո', &['ո']), - ('Չ', &['չ']), - ('Պ', &['պ']), - ('Ջ', &['ջ']), - ('Ռ', &['ռ']), - ('Ս', &['ս']), - ('Վ', &['վ']), - ('Տ', &['տ']), - ('Ր', &['ր']), - ('Ց', &['ց']), - ('Ւ', &['ւ']), - ('Փ', &['փ']), - ('Ք', &['ք']), - ('Օ', &['օ']), - ('Ֆ', &['ֆ']), - ('ա', &['Ա']), - ('բ', &['Բ']), - ('գ', &['Գ']), - ('դ', &['Դ']), - ('ե', &['Ե']), - ('զ', &['Զ']), - ('է', &['Է']), - ('ը', &['Ը']), - ('թ', &['Թ']), - ('ժ', &['Ժ']), - ('ի', &['Ի']), - ('լ', &['Լ']), - ('խ', &['Խ']), - ('ծ', &['Ծ']), - ('կ', &['Կ']), - ('հ', &['Հ']), - ('ձ', &['Ձ']), - ('ղ', &['Ղ']), - ('ճ', &['Ճ']), - ('մ', &['Մ']), - ('յ', &['Յ']), - ('ն', &['Ն']), - ('շ', &['Շ']), - ('ո', &['Ո']), - ('չ', &['Չ']), - ('պ', &['Պ']), - ('ջ', &['Ջ']), - ('ռ', &['Ռ']), - ('ս', &['Ս']), - ('վ', &['Վ']), - ('տ', &['Տ']), - ('ր', &['Ր']), - ('ց', &['Ց']), - ('ւ', &['Ւ']), - ('փ', &['Փ']), - ('ք', &['Ք']), - ('օ', &['Օ']), - ('ֆ', &['Ֆ']), - ('Ⴀ', &['ⴀ']), - ('Ⴁ', &['ⴁ']), - ('Ⴂ', &['ⴂ']), - ('Ⴃ', &['ⴃ']), - ('Ⴄ', &['ⴄ']), - ('Ⴅ', &['ⴅ']), - ('Ⴆ', &['ⴆ']), - ('Ⴇ', &['ⴇ']), - ('Ⴈ', &['ⴈ']), - ('Ⴉ', &['ⴉ']), - ('Ⴊ', &['ⴊ']), - ('Ⴋ', &['ⴋ']), - ('Ⴌ', &['ⴌ']), - ('Ⴍ', &['ⴍ']), - ('Ⴎ', &['ⴎ']), - ('Ⴏ', &['ⴏ']), - ('Ⴐ', &['ⴐ']), - ('Ⴑ', &['ⴑ']), - ('Ⴒ', &['ⴒ']), - ('Ⴓ', &['ⴓ']), - ('Ⴔ', &['ⴔ']), - ('Ⴕ', &['ⴕ']), - ('Ⴖ', &['ⴖ']), - ('Ⴗ', &['ⴗ']), - ('Ⴘ', &['ⴘ']), - ('Ⴙ', &['ⴙ']), - ('Ⴚ', &['ⴚ']), - ('Ⴛ', &['ⴛ']), - ('Ⴜ', &['ⴜ']), - ('Ⴝ', &['ⴝ']), - ('Ⴞ', &['ⴞ']), - ('Ⴟ', &['ⴟ']), - ('Ⴠ', &['ⴠ']), - ('Ⴡ', &['ⴡ']), - ('Ⴢ', &['ⴢ']), - ('Ⴣ', &['ⴣ']), - ('Ⴤ', &['ⴤ']), - ('Ⴥ', &['ⴥ']), - ('Ⴧ', &['ⴧ']), - ('Ⴭ', &['ⴭ']), - ('ა', &['Ა']), - ('ბ', &['Ბ']), - ('გ', &['Გ']), - ('დ', &['Დ']), - ('ე', &['Ე']), - ('ვ', &['Ვ']), - ('ზ', &['Ზ']), - ('თ', &['Თ']), - ('ი', &['Ი']), - ('კ', &['Კ']), - ('ლ', &['Ლ']), - ('მ', &['Მ']), - ('ნ', &['Ნ']), - ('ო', &['Ო']), - ('პ', &['Პ']), - ('ჟ', &['Ჟ']), - ('რ', &['Რ']), - ('ს', &['Ს']), - ('ტ', &['Ტ']), - ('უ', &['Უ']), - ('ფ', &['Ფ']), - ('ქ', &['Ქ']), - ('ღ', &['Ღ']), - ('ყ', &['Ყ']), - ('შ', &['Შ']), - ('ჩ', &['Ჩ']), - ('ც', &['Ც']), - ('ძ', &['Ძ']), - ('წ', &['Წ']), - ('ჭ', &['Ჭ']), - ('ხ', &['Ხ']), - ('ჯ', &['Ჯ']), - ('ჰ', &['Ჰ']), - ('ჱ', &['Ჱ']), - ('ჲ', &['Ჲ']), - ('ჳ', &['Ჳ']), - ('ჴ', &['Ჴ']), - ('ჵ', &['Ჵ']), - ('ჶ', &['Ჶ']), - ('ჷ', &['Ჷ']), - ('ჸ', &['Ჸ']), - ('ჹ', &['Ჹ']), - ('ჺ', &['Ჺ']), - ('ჽ', &['Ჽ']), - ('ჾ', &['Ჾ']), - ('ჿ', &['Ჿ']), - ('Ꭰ', &['ꭰ']), - ('Ꭱ', &['ꭱ']), - ('Ꭲ', &['ꭲ']), - ('Ꭳ', &['ꭳ']), - ('Ꭴ', &['ꭴ']), - ('Ꭵ', &['ꭵ']), - ('Ꭶ', &['ꭶ']), - ('Ꭷ', &['ꭷ']), - ('Ꭸ', &['ꭸ']), - ('Ꭹ', &['ꭹ']), - ('Ꭺ', &['ꭺ']), - ('Ꭻ', &['ꭻ']), - ('Ꭼ', &['ꭼ']), - ('Ꭽ', &['ꭽ']), - ('Ꭾ', &['ꭾ']), - ('Ꭿ', &['ꭿ']), - ('Ꮀ', &['ꮀ']), - ('Ꮁ', &['ꮁ']), - ('Ꮂ', &['ꮂ']), - ('Ꮃ', &['ꮃ']), - ('Ꮄ', &['ꮄ']), - ('Ꮅ', &['ꮅ']), - ('Ꮆ', &['ꮆ']), - ('Ꮇ', &['ꮇ']), - ('Ꮈ', &['ꮈ']), - ('Ꮉ', &['ꮉ']), - ('Ꮊ', &['ꮊ']), - ('Ꮋ', &['ꮋ']), - ('Ꮌ', &['ꮌ']), - ('Ꮍ', &['ꮍ']), - ('Ꮎ', &['ꮎ']), - ('Ꮏ', &['ꮏ']), - ('Ꮐ', &['ꮐ']), - ('Ꮑ', &['ꮑ']), - ('Ꮒ', &['ꮒ']), - ('Ꮓ', &['ꮓ']), - ('Ꮔ', &['ꮔ']), - ('Ꮕ', &['ꮕ']), - ('Ꮖ', &['ꮖ']), - ('Ꮗ', &['ꮗ']), - ('Ꮘ', &['ꮘ']), - ('Ꮙ', &['ꮙ']), - ('Ꮚ', &['ꮚ']), - ('Ꮛ', &['ꮛ']), - ('Ꮜ', &['ꮜ']), - ('Ꮝ', &['ꮝ']), - ('Ꮞ', &['ꮞ']), - ('Ꮟ', &['ꮟ']), - ('Ꮠ', &['ꮠ']), - ('Ꮡ', &['ꮡ']), - ('Ꮢ', &['ꮢ']), - ('Ꮣ', &['ꮣ']), - ('Ꮤ', &['ꮤ']), - ('Ꮥ', &['ꮥ']), - ('Ꮦ', &['ꮦ']), - ('Ꮧ', &['ꮧ']), - ('Ꮨ', &['ꮨ']), - ('Ꮩ', &['ꮩ']), - ('Ꮪ', &['ꮪ']), - ('Ꮫ', &['ꮫ']), - ('Ꮬ', &['ꮬ']), - ('Ꮭ', &['ꮭ']), - ('Ꮮ', &['ꮮ']), - ('Ꮯ', &['ꮯ']), - ('Ꮰ', &['ꮰ']), - ('Ꮱ', &['ꮱ']), - ('Ꮲ', &['ꮲ']), - ('Ꮳ', &['ꮳ']), - ('Ꮴ', &['ꮴ']), - ('Ꮵ', &['ꮵ']), - ('Ꮶ', &['ꮶ']), - ('Ꮷ', &['ꮷ']), - ('Ꮸ', &['ꮸ']), - ('Ꮹ', &['ꮹ']), - ('Ꮺ', &['ꮺ']), - ('Ꮻ', &['ꮻ']), - ('Ꮼ', &['ꮼ']), - ('Ꮽ', &['ꮽ']), - ('Ꮾ', &['ꮾ']), - ('Ꮿ', &['ꮿ']), - ('Ᏸ', &['ᏸ']), - ('Ᏹ', &['ᏹ']), - ('Ᏺ', &['ᏺ']), - ('Ᏻ', &['ᏻ']), - ('Ᏼ', &['ᏼ']), - ('Ᏽ', &['ᏽ']), - ('ᏸ', &['Ᏸ']), - ('ᏹ', &['Ᏹ']), - ('ᏺ', &['Ᏺ']), - ('ᏻ', &['Ᏻ']), - ('ᏼ', &['Ᏼ']), - ('ᏽ', &['Ᏽ']), - ('ᲀ', &['В', 'в']), - ('ᲁ', &['Д', 'д']), - ('ᲂ', &['О', 'о']), - ('ᲃ', &['С', 'с']), - ('ᲄ', &['Т', 'т', 'ᲅ']), - ('ᲅ', &['Т', 'т', 'ᲄ']), - ('ᲆ', &['Ъ', 'ъ']), - ('ᲇ', &['Ѣ', 'ѣ']), - ('ᲈ', &['Ꙋ', 'ꙋ']), - ('Ᲊ', &['ᲊ']), - ('ᲊ', &['Ᲊ']), - ('Ა', &['ა']), - ('Ბ', &['ბ']), - ('Გ', &['გ']), - ('Დ', &['დ']), - ('Ე', &['ე']), - ('Ვ', &['ვ']), - ('Ზ', &['ზ']), - ('Თ', &['თ']), - ('Ი', &['ი']), - ('Კ', &['კ']), - ('Ლ', &['ლ']), - ('Მ', &['მ']), - ('Ნ', &['ნ']), - ('Ო', &['ო']), - ('Პ', &['პ']), - ('Ჟ', &['ჟ']), - ('Რ', &['რ']), - ('Ს', &['ს']), - ('Ტ', &['ტ']), - ('Უ', &['უ']), - ('Ფ', &['ფ']), - ('Ქ', &['ქ']), - ('Ღ', &['ღ']), - ('Ყ', &['ყ']), - ('Შ', &['შ']), - ('Ჩ', &['ჩ']), - ('Ც', &['ც']), - ('Ძ', &['ძ']), - ('Წ', &['წ']), - ('Ჭ', &['ჭ']), - ('Ხ', &['ხ']), - ('Ჯ', &['ჯ']), - ('Ჰ', &['ჰ']), - ('Ჱ', &['ჱ']), - ('Ჲ', &['ჲ']), - ('Ჳ', &['ჳ']), - ('Ჴ', &['ჴ']), - ('Ჵ', &['ჵ']), - ('Ჶ', &['ჶ']), - ('Ჷ', &['ჷ']), - ('Ჸ', &['ჸ']), - ('Ჹ', &['ჹ']), - ('Ჺ', &['ჺ']), - ('Ჽ', &['ჽ']), - ('Ჾ', &['ჾ']), - ('Ჿ', &['ჿ']), - ('ᵹ', &['Ᵹ']), - ('ᵽ', &['Ᵽ']), - ('ᶎ', &['Ᶎ']), - ('Ḁ', &['ḁ']), - ('ḁ', &['Ḁ']), - ('Ḃ', &['ḃ']), - ('ḃ', &['Ḃ']), - ('Ḅ', &['ḅ']), - ('ḅ', &['Ḅ']), - ('Ḇ', &['ḇ']), - ('ḇ', &['Ḇ']), - ('Ḉ', &['ḉ']), - ('ḉ', &['Ḉ']), - ('Ḋ', &['ḋ']), - ('ḋ', &['Ḋ']), - ('Ḍ', &['ḍ']), - ('ḍ', &['Ḍ']), - ('Ḏ', &['ḏ']), - ('ḏ', &['Ḏ']), - ('Ḑ', &['ḑ']), - ('ḑ', &['Ḑ']), - ('Ḓ', &['ḓ']), - ('ḓ', &['Ḓ']), - ('Ḕ', &['ḕ']), - ('ḕ', &['Ḕ']), - ('Ḗ', &['ḗ']), - ('ḗ', &['Ḗ']), - ('Ḙ', &['ḙ']), - ('ḙ', &['Ḙ']), - ('Ḛ', &['ḛ']), - ('ḛ', &['Ḛ']), - ('Ḝ', &['ḝ']), - ('ḝ', &['Ḝ']), - ('Ḟ', &['ḟ']), - ('ḟ', &['Ḟ']), - ('Ḡ', &['ḡ']), - ('ḡ', &['Ḡ']), - ('Ḣ', &['ḣ']), - ('ḣ', &['Ḣ']), - ('Ḥ', &['ḥ']), - ('ḥ', &['Ḥ']), - ('Ḧ', &['ḧ']), - ('ḧ', &['Ḧ']), - ('Ḩ', &['ḩ']), - ('ḩ', &['Ḩ']), - ('Ḫ', &['ḫ']), - ('ḫ', &['Ḫ']), - ('Ḭ', &['ḭ']), - ('ḭ', &['Ḭ']), - ('Ḯ', &['ḯ']), - ('ḯ', &['Ḯ']), - ('Ḱ', &['ḱ']), - ('ḱ', &['Ḱ']), - ('Ḳ', &['ḳ']), - ('ḳ', &['Ḳ']), - ('Ḵ', &['ḵ']), - ('ḵ', &['Ḵ']), - ('Ḷ', &['ḷ']), - ('ḷ', &['Ḷ']), - ('Ḹ', &['ḹ']), - ('ḹ', &['Ḹ']), - ('Ḻ', &['ḻ']), - ('ḻ', &['Ḻ']), - ('Ḽ', &['ḽ']), - ('ḽ', &['Ḽ']), - ('Ḿ', &['ḿ']), - ('ḿ', &['Ḿ']), - ('Ṁ', &['ṁ']), - ('ṁ', &['Ṁ']), - ('Ṃ', &['ṃ']), - ('ṃ', &['Ṃ']), - ('Ṅ', &['ṅ']), - ('ṅ', &['Ṅ']), - ('Ṇ', &['ṇ']), - ('ṇ', &['Ṇ']), - ('Ṉ', &['ṉ']), - ('ṉ', &['Ṉ']), - ('Ṋ', &['ṋ']), - ('ṋ', &['Ṋ']), - ('Ṍ', &['ṍ']), - ('ṍ', &['Ṍ']), - ('Ṏ', &['ṏ']), - ('ṏ', &['Ṏ']), - ('Ṑ', &['ṑ']), - ('ṑ', &['Ṑ']), - ('Ṓ', &['ṓ']), - ('ṓ', &['Ṓ']), - ('Ṕ', &['ṕ']), - ('ṕ', &['Ṕ']), - ('Ṗ', &['ṗ']), - ('ṗ', &['Ṗ']), - ('Ṙ', &['ṙ']), - ('ṙ', &['Ṙ']), - ('Ṛ', &['ṛ']), - ('ṛ', &['Ṛ']), - ('Ṝ', &['ṝ']), - ('ṝ', &['Ṝ']), - ('Ṟ', &['ṟ']), - ('ṟ', &['Ṟ']), - ('Ṡ', &['ṡ', 'ẛ']), - ('ṡ', &['Ṡ', 'ẛ']), - ('Ṣ', &['ṣ']), - ('ṣ', &['Ṣ']), - ('Ṥ', &['ṥ']), - ('ṥ', &['Ṥ']), - ('Ṧ', &['ṧ']), - ('ṧ', &['Ṧ']), - ('Ṩ', &['ṩ']), - ('ṩ', &['Ṩ']), - ('Ṫ', &['ṫ']), - ('ṫ', &['Ṫ']), - ('Ṭ', &['ṭ']), - ('ṭ', &['Ṭ']), - ('Ṯ', &['ṯ']), - ('ṯ', &['Ṯ']), - ('Ṱ', &['ṱ']), - ('ṱ', &['Ṱ']), - ('Ṳ', &['ṳ']), - ('ṳ', &['Ṳ']), - ('Ṵ', &['ṵ']), - ('ṵ', &['Ṵ']), - ('Ṷ', &['ṷ']), - ('ṷ', &['Ṷ']), - ('Ṹ', &['ṹ']), - ('ṹ', &['Ṹ']), - ('Ṻ', &['ṻ']), - ('ṻ', &['Ṻ']), - ('Ṽ', &['ṽ']), - ('ṽ', &['Ṽ']), - ('Ṿ', &['ṿ']), - ('ṿ', &['Ṿ']), - ('Ẁ', &['ẁ']), - ('ẁ', &['Ẁ']), - ('Ẃ', &['ẃ']), - ('ẃ', &['Ẃ']), - ('Ẅ', &['ẅ']), - ('ẅ', &['Ẅ']), - ('Ẇ', &['ẇ']), - ('ẇ', &['Ẇ']), - ('Ẉ', &['ẉ']), - ('ẉ', &['Ẉ']), - ('Ẋ', &['ẋ']), - ('ẋ', &['Ẋ']), - ('Ẍ', &['ẍ']), - ('ẍ', &['Ẍ']), - ('Ẏ', &['ẏ']), - ('ẏ', &['Ẏ']), - ('Ẑ', &['ẑ']), - ('ẑ', &['Ẑ']), - ('Ẓ', &['ẓ']), - ('ẓ', &['Ẓ']), - ('Ẕ', &['ẕ']), - ('ẕ', &['Ẕ']), - ('ẛ', &['Ṡ', 'ṡ']), - ('ẞ', &['ß']), - ('Ạ', &['ạ']), - ('ạ', &['Ạ']), - ('Ả', &['ả']), - ('ả', &['Ả']), - ('Ấ', &['ấ']), - ('ấ', &['Ấ']), - ('Ầ', &['ầ']), - ('ầ', &['Ầ']), - ('Ẩ', &['ẩ']), - ('ẩ', &['Ẩ']), - ('Ẫ', &['ẫ']), - ('ẫ', &['Ẫ']), - ('Ậ', &['ậ']), - ('ậ', &['Ậ']), - ('Ắ', &['ắ']), - ('ắ', &['Ắ']), - ('Ằ', &['ằ']), - ('ằ', &['Ằ']), - ('Ẳ', &['ẳ']), - ('ẳ', &['Ẳ']), - ('Ẵ', &['ẵ']), - ('ẵ', &['Ẵ']), - ('Ặ', &['ặ']), - ('ặ', &['Ặ']), - ('Ẹ', &['ẹ']), - ('ẹ', &['Ẹ']), - ('Ẻ', &['ẻ']), - ('ẻ', &['Ẻ']), - ('Ẽ', &['ẽ']), - ('ẽ', &['Ẽ']), - ('Ế', &['ế']), - ('ế', &['Ế']), - ('Ề', &['ề']), - ('ề', &['Ề']), - ('Ể', &['ể']), - ('ể', &['Ể']), - ('Ễ', &['ễ']), - ('ễ', &['Ễ']), - ('Ệ', &['ệ']), - ('ệ', &['Ệ']), - ('Ỉ', &['ỉ']), - ('ỉ', &['Ỉ']), - ('Ị', &['ị']), - ('ị', &['Ị']), - ('Ọ', &['ọ']), - ('ọ', &['Ọ']), - ('Ỏ', &['ỏ']), - ('ỏ', &['Ỏ']), - ('Ố', &['ố']), - ('ố', &['Ố']), - ('Ồ', &['ồ']), - ('ồ', &['Ồ']), - ('Ổ', &['ổ']), - ('ổ', &['Ổ']), - ('Ỗ', &['ỗ']), - ('ỗ', &['Ỗ']), - ('Ộ', &['ộ']), - ('ộ', &['Ộ']), - ('Ớ', &['ớ']), - ('ớ', &['Ớ']), - ('Ờ', &['ờ']), - ('ờ', &['Ờ']), - ('Ở', &['ở']), - ('ở', &['Ở']), - ('Ỡ', &['ỡ']), - ('ỡ', &['Ỡ']), - ('Ợ', &['ợ']), - ('ợ', &['Ợ']), - ('Ụ', &['ụ']), - ('ụ', &['Ụ']), - ('Ủ', &['ủ']), - ('ủ', &['Ủ']), - ('Ứ', &['ứ']), - ('ứ', &['Ứ']), - ('Ừ', &['ừ']), - ('ừ', &['Ừ']), - ('Ử', &['ử']), - ('ử', &['Ử']), - ('Ữ', &['ữ']), - ('ữ', &['Ữ']), - ('Ự', &['ự']), - ('ự', &['Ự']), - ('Ỳ', &['ỳ']), - ('ỳ', &['Ỳ']), - ('Ỵ', &['ỵ']), - ('ỵ', &['Ỵ']), - ('Ỷ', &['ỷ']), - ('ỷ', &['Ỷ']), - ('Ỹ', &['ỹ']), - ('ỹ', &['Ỹ']), - ('Ỻ', &['ỻ']), - ('ỻ', &['Ỻ']), - ('Ỽ', &['ỽ']), - ('ỽ', &['Ỽ']), - ('Ỿ', &['ỿ']), - ('ỿ', &['Ỿ']), - ('ἀ', &['Ἀ']), - ('ἁ', &['Ἁ']), - ('ἂ', &['Ἂ']), - ('ἃ', &['Ἃ']), - ('ἄ', &['Ἄ']), - ('ἅ', &['Ἅ']), - ('ἆ', &['Ἆ']), - ('ἇ', &['Ἇ']), - ('Ἀ', &['ἀ']), - ('Ἁ', &['ἁ']), - ('Ἂ', &['ἂ']), - ('Ἃ', &['ἃ']), - ('Ἄ', &['ἄ']), - ('Ἅ', &['ἅ']), - ('Ἆ', &['ἆ']), - ('Ἇ', &['ἇ']), - ('ἐ', &['Ἐ']), - ('ἑ', &['Ἑ']), - ('ἒ', &['Ἒ']), - ('ἓ', &['Ἓ']), - ('ἔ', &['Ἔ']), - ('ἕ', &['Ἕ']), - ('Ἐ', &['ἐ']), - ('Ἑ', &['ἑ']), - ('Ἒ', &['ἒ']), - ('Ἓ', &['ἓ']), - ('Ἔ', &['ἔ']), - ('Ἕ', &['ἕ']), - ('ἠ', &['Ἠ']), - ('ἡ', &['Ἡ']), - ('ἢ', &['Ἢ']), - ('ἣ', &['Ἣ']), - ('ἤ', &['Ἤ']), - ('ἥ', &['Ἥ']), - ('ἦ', &['Ἦ']), - ('ἧ', &['Ἧ']), - ('Ἠ', &['ἠ']), - ('Ἡ', &['ἡ']), - ('Ἢ', &['ἢ']), - ('Ἣ', &['ἣ']), - ('Ἤ', &['ἤ']), - ('Ἥ', &['ἥ']), - ('Ἦ', &['ἦ']), - ('Ἧ', &['ἧ']), - ('ἰ', &['Ἰ']), - ('ἱ', &['Ἱ']), - ('ἲ', &['Ἲ']), - ('ἳ', &['Ἳ']), - ('ἴ', &['Ἴ']), - ('ἵ', &['Ἵ']), - ('ἶ', &['Ἶ']), - ('ἷ', &['Ἷ']), - ('Ἰ', &['ἰ']), - ('Ἱ', &['ἱ']), - ('Ἲ', &['ἲ']), - ('Ἳ', &['ἳ']), - ('Ἴ', &['ἴ']), - ('Ἵ', &['ἵ']), - ('Ἶ', &['ἶ']), - ('Ἷ', &['ἷ']), - ('ὀ', &['Ὀ']), - ('ὁ', &['Ὁ']), - ('ὂ', &['Ὂ']), - ('ὃ', &['Ὃ']), - ('ὄ', &['Ὄ']), - ('ὅ', &['Ὅ']), - ('Ὀ', &['ὀ']), - ('Ὁ', &['ὁ']), - ('Ὂ', &['ὂ']), - ('Ὃ', &['ὃ']), - ('Ὄ', &['ὄ']), - ('Ὅ', &['ὅ']), - ('ὑ', &['Ὑ']), - ('ὓ', &['Ὓ']), - ('ὕ', &['Ὕ']), - ('ὗ', &['Ὗ']), - ('Ὑ', &['ὑ']), - ('Ὓ', &['ὓ']), - ('Ὕ', &['ὕ']), - ('Ὗ', &['ὗ']), - ('ὠ', &['Ὠ']), - ('ὡ', &['Ὡ']), - ('ὢ', &['Ὢ']), - ('ὣ', &['Ὣ']), - ('ὤ', &['Ὤ']), - ('ὥ', &['Ὥ']), - ('ὦ', &['Ὦ']), - ('ὧ', &['Ὧ']), - ('Ὠ', &['ὠ']), - ('Ὡ', &['ὡ']), - ('Ὢ', &['ὢ']), - ('Ὣ', &['ὣ']), - ('Ὤ', &['ὤ']), - ('Ὥ', &['ὥ']), - ('Ὦ', &['ὦ']), - ('Ὧ', &['ὧ']), - ('ὰ', &['Ὰ']), - ('ά', &['Ά']), - ('ὲ', &['Ὲ']), - ('έ', &['Έ']), - ('ὴ', &['Ὴ']), - ('ή', &['Ή']), - ('ὶ', &['Ὶ']), - ('ί', &['Ί']), - ('ὸ', &['Ὸ']), - ('ό', &['Ό']), - ('ὺ', &['Ὺ']), - ('ύ', &['Ύ']), - ('ὼ', &['Ὼ']), - ('ώ', &['Ώ']), - ('ᾀ', &['ᾈ']), - ('ᾁ', &['ᾉ']), - ('ᾂ', &['ᾊ']), - ('ᾃ', &['ᾋ']), - ('ᾄ', &['ᾌ']), - ('ᾅ', &['ᾍ']), - ('ᾆ', &['ᾎ']), - ('ᾇ', &['ᾏ']), - ('ᾈ', &['ᾀ']), - ('ᾉ', &['ᾁ']), - ('ᾊ', &['ᾂ']), - ('ᾋ', &['ᾃ']), - ('ᾌ', &['ᾄ']), - ('ᾍ', &['ᾅ']), - ('ᾎ', &['ᾆ']), - ('ᾏ', &['ᾇ']), - ('ᾐ', &['ᾘ']), - ('ᾑ', &['ᾙ']), - ('ᾒ', &['ᾚ']), - ('ᾓ', &['ᾛ']), - ('ᾔ', &['ᾜ']), - ('ᾕ', &['ᾝ']), - ('ᾖ', &['ᾞ']), - ('ᾗ', &['ᾟ']), - ('ᾘ', &['ᾐ']), - ('ᾙ', &['ᾑ']), - ('ᾚ', &['ᾒ']), - ('ᾛ', &['ᾓ']), - ('ᾜ', &['ᾔ']), - ('ᾝ', &['ᾕ']), - ('ᾞ', &['ᾖ']), - ('ᾟ', &['ᾗ']), - ('ᾠ', &['ᾨ']), - ('ᾡ', &['ᾩ']), - ('ᾢ', &['ᾪ']), - ('ᾣ', &['ᾫ']), - ('ᾤ', &['ᾬ']), - ('ᾥ', &['ᾭ']), - ('ᾦ', &['ᾮ']), - ('ᾧ', &['ᾯ']), - ('ᾨ', &['ᾠ']), - ('ᾩ', &['ᾡ']), - ('ᾪ', &['ᾢ']), - ('ᾫ', &['ᾣ']), - ('ᾬ', &['ᾤ']), - ('ᾭ', &['ᾥ']), - ('ᾮ', &['ᾦ']), - ('ᾯ', &['ᾧ']), - ('ᾰ', &['Ᾰ']), - ('ᾱ', &['Ᾱ']), - ('ᾳ', &['ᾼ']), - ('Ᾰ', &['ᾰ']), - ('Ᾱ', &['ᾱ']), - ('Ὰ', &['ὰ']), - ('Ά', &['ά']), - ('ᾼ', &['ᾳ']), - ('ι', &['\u{345}', 'Ι', 'ι']), - ('ῃ', &['ῌ']), - ('Ὲ', &['ὲ']), - ('Έ', &['έ']), - ('Ὴ', &['ὴ']), - ('Ή', &['ή']), - ('ῌ', &['ῃ']), - ('ῐ', &['Ῐ']), - ('ῑ', &['Ῑ']), - ('ΐ', &['ΐ']), - ('Ῐ', &['ῐ']), - ('Ῑ', &['ῑ']), - ('Ὶ', &['ὶ']), - ('Ί', &['ί']), - ('ῠ', &['Ῠ']), - ('ῡ', &['Ῡ']), - ('ΰ', &['ΰ']), - ('ῥ', &['Ῥ']), - ('Ῠ', &['ῠ']), - ('Ῡ', &['ῡ']), - ('Ὺ', &['ὺ']), - ('Ύ', &['ύ']), - ('Ῥ', &['ῥ']), - ('ῳ', &['ῼ']), - ('Ὸ', &['ὸ']), - ('Ό', &['ό']), - ('Ὼ', &['ὼ']), - ('Ώ', &['ώ']), - ('ῼ', &['ῳ']), - ('Ω', &['Ω', 'ω']), - ('K', &['K', 'k']), - ('Å', &['Å', 'å']), - ('Ⅎ', &['ⅎ']), - ('ⅎ', &['Ⅎ']), - ('Ⅰ', &['ⅰ']), - ('Ⅱ', &['ⅱ']), - ('Ⅲ', &['ⅲ']), - ('Ⅳ', &['ⅳ']), - ('Ⅴ', &['ⅴ']), - ('Ⅵ', &['ⅵ']), - ('Ⅶ', &['ⅶ']), - ('Ⅷ', &['ⅷ']), - ('Ⅸ', &['ⅸ']), - ('Ⅹ', &['ⅹ']), - ('Ⅺ', &['ⅺ']), - ('Ⅻ', &['ⅻ']), - ('Ⅼ', &['ⅼ']), - ('Ⅽ', &['ⅽ']), - ('Ⅾ', &['ⅾ']), - ('Ⅿ', &['ⅿ']), - ('ⅰ', &['Ⅰ']), - ('ⅱ', &['Ⅱ']), - ('ⅲ', &['Ⅲ']), - ('ⅳ', &['Ⅳ']), - ('ⅴ', &['Ⅴ']), - ('ⅵ', &['Ⅵ']), - ('ⅶ', &['Ⅶ']), - ('ⅷ', &['Ⅷ']), - ('ⅸ', &['Ⅸ']), - ('ⅹ', &['Ⅹ']), - ('ⅺ', &['Ⅺ']), - ('ⅻ', &['Ⅻ']), - ('ⅼ', &['Ⅼ']), - ('ⅽ', &['Ⅽ']), - ('ⅾ', &['Ⅾ']), - ('ⅿ', &['Ⅿ']), - ('Ↄ', &['ↄ']), - ('ↄ', &['Ↄ']), - ('Ⓐ', &['ⓐ']), - ('Ⓑ', &['ⓑ']), - ('Ⓒ', &['ⓒ']), - ('Ⓓ', &['ⓓ']), - ('Ⓔ', &['ⓔ']), - ('Ⓕ', &['ⓕ']), - ('Ⓖ', &['ⓖ']), - ('Ⓗ', &['ⓗ']), - ('Ⓘ', &['ⓘ']), - ('Ⓙ', &['ⓙ']), - ('Ⓚ', &['ⓚ']), - ('Ⓛ', &['ⓛ']), - ('Ⓜ', &['ⓜ']), - ('Ⓝ', &['ⓝ']), - ('Ⓞ', &['ⓞ']), - ('Ⓟ', &['ⓟ']), - ('Ⓠ', &['ⓠ']), - ('Ⓡ', &['ⓡ']), - ('Ⓢ', &['ⓢ']), - ('Ⓣ', &['ⓣ']), - ('Ⓤ', &['ⓤ']), - ('Ⓥ', &['ⓥ']), - ('Ⓦ', &['ⓦ']), - ('Ⓧ', &['ⓧ']), - ('Ⓨ', &['ⓨ']), - ('Ⓩ', &['ⓩ']), - ('ⓐ', &['Ⓐ']), - ('ⓑ', &['Ⓑ']), - ('ⓒ', &['Ⓒ']), - ('ⓓ', &['Ⓓ']), - ('ⓔ', &['Ⓔ']), - ('ⓕ', &['Ⓕ']), - ('ⓖ', &['Ⓖ']), - ('ⓗ', &['Ⓗ']), - ('ⓘ', &['Ⓘ']), - ('ⓙ', &['Ⓙ']), - ('ⓚ', &['Ⓚ']), - ('ⓛ', &['Ⓛ']), - ('ⓜ', &['Ⓜ']), - ('ⓝ', &['Ⓝ']), - ('ⓞ', &['Ⓞ']), - ('ⓟ', &['Ⓟ']), - ('ⓠ', &['Ⓠ']), - ('ⓡ', &['Ⓡ']), - ('ⓢ', &['Ⓢ']), - ('ⓣ', &['Ⓣ']), - ('ⓤ', &['Ⓤ']), - ('ⓥ', &['Ⓥ']), - ('ⓦ', &['Ⓦ']), - ('ⓧ', &['Ⓧ']), - ('ⓨ', &['Ⓨ']), - ('ⓩ', &['Ⓩ']), - ('Ⰰ', &['ⰰ']), - ('Ⰱ', &['ⰱ']), - ('Ⰲ', &['ⰲ']), - ('Ⰳ', &['ⰳ']), - ('Ⰴ', &['ⰴ']), - ('Ⰵ', &['ⰵ']), - ('Ⰶ', &['ⰶ']), - ('Ⰷ', &['ⰷ']), - ('Ⰸ', &['ⰸ']), - ('Ⰹ', &['ⰹ']), - ('Ⰺ', &['ⰺ']), - ('Ⰻ', &['ⰻ']), - ('Ⰼ', &['ⰼ']), - ('Ⰽ', &['ⰽ']), - ('Ⰾ', &['ⰾ']), - ('Ⰿ', &['ⰿ']), - ('Ⱀ', &['ⱀ']), - ('Ⱁ', &['ⱁ']), - ('Ⱂ', &['ⱂ']), - ('Ⱃ', &['ⱃ']), - ('Ⱄ', &['ⱄ']), - ('Ⱅ', &['ⱅ']), - ('Ⱆ', &['ⱆ']), - ('Ⱇ', &['ⱇ']), - ('Ⱈ', &['ⱈ']), - ('Ⱉ', &['ⱉ']), - ('Ⱊ', &['ⱊ']), - ('Ⱋ', &['ⱋ']), - ('Ⱌ', &['ⱌ']), - ('Ⱍ', &['ⱍ']), - ('Ⱎ', &['ⱎ']), - ('Ⱏ', &['ⱏ']), - ('Ⱐ', &['ⱐ']), - ('Ⱑ', &['ⱑ']), - ('Ⱒ', &['ⱒ']), - ('Ⱓ', &['ⱓ']), - ('Ⱔ', &['ⱔ']), - ('Ⱕ', &['ⱕ']), - ('Ⱖ', &['ⱖ']), - ('Ⱗ', &['ⱗ']), - ('Ⱘ', &['ⱘ']), - ('Ⱙ', &['ⱙ']), - ('Ⱚ', &['ⱚ']), - ('Ⱛ', &['ⱛ']), - ('Ⱜ', &['ⱜ']), - ('Ⱝ', &['ⱝ']), - ('Ⱞ', &['ⱞ']), - ('Ⱟ', &['ⱟ']), - ('ⰰ', &['Ⰰ']), - ('ⰱ', &['Ⰱ']), - ('ⰲ', &['Ⰲ']), - ('ⰳ', &['Ⰳ']), - ('ⰴ', &['Ⰴ']), - ('ⰵ', &['Ⰵ']), - ('ⰶ', &['Ⰶ']), - ('ⰷ', &['Ⰷ']), - ('ⰸ', &['Ⰸ']), - ('ⰹ', &['Ⰹ']), - ('ⰺ', &['Ⰺ']), - ('ⰻ', &['Ⰻ']), - ('ⰼ', &['Ⰼ']), - ('ⰽ', &['Ⰽ']), - ('ⰾ', &['Ⰾ']), - ('ⰿ', &['Ⰿ']), - ('ⱀ', &['Ⱀ']), - ('ⱁ', &['Ⱁ']), - ('ⱂ', &['Ⱂ']), - ('ⱃ', &['Ⱃ']), - ('ⱄ', &['Ⱄ']), - ('ⱅ', &['Ⱅ']), - ('ⱆ', &['Ⱆ']), - ('ⱇ', &['Ⱇ']), - ('ⱈ', &['Ⱈ']), - ('ⱉ', &['Ⱉ']), - ('ⱊ', &['Ⱊ']), - ('ⱋ', &['Ⱋ']), - ('ⱌ', &['Ⱌ']), - ('ⱍ', &['Ⱍ']), - ('ⱎ', &['Ⱎ']), - ('ⱏ', &['Ⱏ']), - ('ⱐ', &['Ⱐ']), - ('ⱑ', &['Ⱑ']), - ('ⱒ', &['Ⱒ']), - ('ⱓ', &['Ⱓ']), - ('ⱔ', &['Ⱔ']), - ('ⱕ', &['Ⱕ']), - ('ⱖ', &['Ⱖ']), - ('ⱗ', &['Ⱗ']), - ('ⱘ', &['Ⱘ']), - ('ⱙ', &['Ⱙ']), - ('ⱚ', &['Ⱚ']), - ('ⱛ', &['Ⱛ']), - ('ⱜ', &['Ⱜ']), - ('ⱝ', &['Ⱝ']), - ('ⱞ', &['Ⱞ']), - ('ⱟ', &['Ⱟ']), - ('Ⱡ', &['ⱡ']), - ('ⱡ', &['Ⱡ']), - ('Ɫ', &['ɫ']), - ('Ᵽ', &['ᵽ']), - ('Ɽ', &['ɽ']), - ('ⱥ', &['Ⱥ']), - ('ⱦ', &['Ⱦ']), - ('Ⱨ', &['ⱨ']), - ('ⱨ', &['Ⱨ']), - ('Ⱪ', &['ⱪ']), - ('ⱪ', &['Ⱪ']), - ('Ⱬ', &['ⱬ']), - ('ⱬ', &['Ⱬ']), - ('Ɑ', &['ɑ']), - ('Ɱ', &['ɱ']), - ('Ɐ', &['ɐ']), - ('Ɒ', &['ɒ']), - ('Ⱳ', &['ⱳ']), - ('ⱳ', &['Ⱳ']), - ('Ⱶ', &['ⱶ']), - ('ⱶ', &['Ⱶ']), - ('Ȿ', &['ȿ']), - ('Ɀ', &['ɀ']), - ('Ⲁ', &['ⲁ']), - ('ⲁ', &['Ⲁ']), - ('Ⲃ', &['ⲃ']), - ('ⲃ', &['Ⲃ']), - ('Ⲅ', &['ⲅ']), - ('ⲅ', &['Ⲅ']), - ('Ⲇ', &['ⲇ']), - ('ⲇ', &['Ⲇ']), - ('Ⲉ', &['ⲉ']), - ('ⲉ', &['Ⲉ']), - ('Ⲋ', &['ⲋ']), - ('ⲋ', &['Ⲋ']), - ('Ⲍ', &['ⲍ']), - ('ⲍ', &['Ⲍ']), - ('Ⲏ', &['ⲏ']), - ('ⲏ', &['Ⲏ']), - ('Ⲑ', &['ⲑ']), - ('ⲑ', &['Ⲑ']), - ('Ⲓ', &['ⲓ']), - ('ⲓ', &['Ⲓ']), - ('Ⲕ', &['ⲕ']), - ('ⲕ', &['Ⲕ']), - ('Ⲗ', &['ⲗ']), - ('ⲗ', &['Ⲗ']), - ('Ⲙ', &['ⲙ']), - ('ⲙ', &['Ⲙ']), - ('Ⲛ', &['ⲛ']), - ('ⲛ', &['Ⲛ']), - ('Ⲝ', &['ⲝ']), - ('ⲝ', &['Ⲝ']), - ('Ⲟ', &['ⲟ']), - ('ⲟ', &['Ⲟ']), - ('Ⲡ', &['ⲡ']), - ('ⲡ', &['Ⲡ']), - ('Ⲣ', &['ⲣ']), - ('ⲣ', &['Ⲣ']), - ('Ⲥ', &['ⲥ']), - ('ⲥ', &['Ⲥ']), - ('Ⲧ', &['ⲧ']), - ('ⲧ', &['Ⲧ']), - ('Ⲩ', &['ⲩ']), - ('ⲩ', &['Ⲩ']), - ('Ⲫ', &['ⲫ']), - ('ⲫ', &['Ⲫ']), - ('Ⲭ', &['ⲭ']), - ('ⲭ', &['Ⲭ']), - ('Ⲯ', &['ⲯ']), - ('ⲯ', &['Ⲯ']), - ('Ⲱ', &['ⲱ']), - ('ⲱ', &['Ⲱ']), - ('Ⲳ', &['ⲳ']), - ('ⲳ', &['Ⲳ']), - ('Ⲵ', &['ⲵ']), - ('ⲵ', &['Ⲵ']), - ('Ⲷ', &['ⲷ']), - ('ⲷ', &['Ⲷ']), - ('Ⲹ', &['ⲹ']), - ('ⲹ', &['Ⲹ']), - ('Ⲻ', &['ⲻ']), - ('ⲻ', &['Ⲻ']), - ('Ⲽ', &['ⲽ']), - ('ⲽ', &['Ⲽ']), - ('Ⲿ', &['ⲿ']), - ('ⲿ', &['Ⲿ']), - ('Ⳁ', &['ⳁ']), - ('ⳁ', &['Ⳁ']), - ('Ⳃ', &['ⳃ']), - ('ⳃ', &['Ⳃ']), - ('Ⳅ', &['ⳅ']), - ('ⳅ', &['Ⳅ']), - ('Ⳇ', &['ⳇ']), - ('ⳇ', &['Ⳇ']), - ('Ⳉ', &['ⳉ']), - ('ⳉ', &['Ⳉ']), - ('Ⳋ', &['ⳋ']), - ('ⳋ', &['Ⳋ']), - ('Ⳍ', &['ⳍ']), - ('ⳍ', &['Ⳍ']), - ('Ⳏ', &['ⳏ']), - ('ⳏ', &['Ⳏ']), - ('Ⳑ', &['ⳑ']), - ('ⳑ', &['Ⳑ']), - ('Ⳓ', &['ⳓ']), - ('ⳓ', &['Ⳓ']), - ('Ⳕ', &['ⳕ']), - ('ⳕ', &['Ⳕ']), - ('Ⳗ', &['ⳗ']), - ('ⳗ', &['Ⳗ']), - ('Ⳙ', &['ⳙ']), - ('ⳙ', &['Ⳙ']), - ('Ⳛ', &['ⳛ']), - ('ⳛ', &['Ⳛ']), - ('Ⳝ', &['ⳝ']), - ('ⳝ', &['Ⳝ']), - ('Ⳟ', &['ⳟ']), - ('ⳟ', &['Ⳟ']), - ('Ⳡ', &['ⳡ']), - ('ⳡ', &['Ⳡ']), - ('Ⳣ', &['ⳣ']), - ('ⳣ', &['Ⳣ']), - ('Ⳬ', &['ⳬ']), - ('ⳬ', &['Ⳬ']), - ('Ⳮ', &['ⳮ']), - ('ⳮ', &['Ⳮ']), - ('Ⳳ', &['ⳳ']), - ('ⳳ', &['Ⳳ']), - ('ⴀ', &['Ⴀ']), - ('ⴁ', &['Ⴁ']), - ('ⴂ', &['Ⴂ']), - ('ⴃ', &['Ⴃ']), - ('ⴄ', &['Ⴄ']), - ('ⴅ', &['Ⴅ']), - ('ⴆ', &['Ⴆ']), - ('ⴇ', &['Ⴇ']), - ('ⴈ', &['Ⴈ']), - ('ⴉ', &['Ⴉ']), - ('ⴊ', &['Ⴊ']), - ('ⴋ', &['Ⴋ']), - ('ⴌ', &['Ⴌ']), - ('ⴍ', &['Ⴍ']), - ('ⴎ', &['Ⴎ']), - ('ⴏ', &['Ⴏ']), - ('ⴐ', &['Ⴐ']), - ('ⴑ', &['Ⴑ']), - ('ⴒ', &['Ⴒ']), - ('ⴓ', &['Ⴓ']), - ('ⴔ', &['Ⴔ']), - ('ⴕ', &['Ⴕ']), - ('ⴖ', &['Ⴖ']), - ('ⴗ', &['Ⴗ']), - ('ⴘ', &['Ⴘ']), - ('ⴙ', &['Ⴙ']), - ('ⴚ', &['Ⴚ']), - ('ⴛ', &['Ⴛ']), - ('ⴜ', &['Ⴜ']), - ('ⴝ', &['Ⴝ']), - ('ⴞ', &['Ⴞ']), - ('ⴟ', &['Ⴟ']), - ('ⴠ', &['Ⴠ']), - ('ⴡ', &['Ⴡ']), - ('ⴢ', &['Ⴢ']), - ('ⴣ', &['Ⴣ']), - ('ⴤ', &['Ⴤ']), - ('ⴥ', &['Ⴥ']), - ('ⴧ', &['Ⴧ']), - ('ⴭ', &['Ⴭ']), - ('Ꙁ', &['ꙁ']), - ('ꙁ', &['Ꙁ']), - ('Ꙃ', &['ꙃ']), - ('ꙃ', &['Ꙃ']), - ('Ꙅ', &['ꙅ']), - ('ꙅ', &['Ꙅ']), - ('Ꙇ', &['ꙇ']), - ('ꙇ', &['Ꙇ']), - ('Ꙉ', &['ꙉ']), - ('ꙉ', &['Ꙉ']), - ('Ꙋ', &['ᲈ', 'ꙋ']), - ('ꙋ', &['ᲈ', 'Ꙋ']), - ('Ꙍ', &['ꙍ']), - ('ꙍ', &['Ꙍ']), - ('Ꙏ', &['ꙏ']), - ('ꙏ', &['Ꙏ']), - ('Ꙑ', &['ꙑ']), - ('ꙑ', &['Ꙑ']), - ('Ꙓ', &['ꙓ']), - ('ꙓ', &['Ꙓ']), - ('Ꙕ', &['ꙕ']), - ('ꙕ', &['Ꙕ']), - ('Ꙗ', &['ꙗ']), - ('ꙗ', &['Ꙗ']), - ('Ꙙ', &['ꙙ']), - ('ꙙ', &['Ꙙ']), - ('Ꙛ', &['ꙛ']), - ('ꙛ', &['Ꙛ']), - ('Ꙝ', &['ꙝ']), - ('ꙝ', &['Ꙝ']), - ('Ꙟ', &['ꙟ']), - ('ꙟ', &['Ꙟ']), - ('Ꙡ', &['ꙡ']), - ('ꙡ', &['Ꙡ']), - ('Ꙣ', &['ꙣ']), - ('ꙣ', &['Ꙣ']), - ('Ꙥ', &['ꙥ']), - ('ꙥ', &['Ꙥ']), - ('Ꙧ', &['ꙧ']), - ('ꙧ', &['Ꙧ']), - ('Ꙩ', &['ꙩ']), - ('ꙩ', &['Ꙩ']), - ('Ꙫ', &['ꙫ']), - ('ꙫ', &['Ꙫ']), - ('Ꙭ', &['ꙭ']), - ('ꙭ', &['Ꙭ']), - ('Ꚁ', &['ꚁ']), - ('ꚁ', &['Ꚁ']), - ('Ꚃ', &['ꚃ']), - ('ꚃ', &['Ꚃ']), - ('Ꚅ', &['ꚅ']), - ('ꚅ', &['Ꚅ']), - ('Ꚇ', &['ꚇ']), - ('ꚇ', &['Ꚇ']), - ('Ꚉ', &['ꚉ']), - ('ꚉ', &['Ꚉ']), - ('Ꚋ', &['ꚋ']), - ('ꚋ', &['Ꚋ']), - ('Ꚍ', &['ꚍ']), - ('ꚍ', &['Ꚍ']), - ('Ꚏ', &['ꚏ']), - ('ꚏ', &['Ꚏ']), - ('Ꚑ', &['ꚑ']), - ('ꚑ', &['Ꚑ']), - ('Ꚓ', &['ꚓ']), - ('ꚓ', &['Ꚓ']), - ('Ꚕ', &['ꚕ']), - ('ꚕ', &['Ꚕ']), - ('Ꚗ', &['ꚗ']), - ('ꚗ', &['Ꚗ']), - ('Ꚙ', &['ꚙ']), - ('ꚙ', &['Ꚙ']), - ('Ꚛ', &['ꚛ']), - ('ꚛ', &['Ꚛ']), - ('Ꜣ', &['ꜣ']), - ('ꜣ', &['Ꜣ']), - ('Ꜥ', &['ꜥ']), - ('ꜥ', &['Ꜥ']), - ('Ꜧ', &['ꜧ']), - ('ꜧ', &['Ꜧ']), - ('Ꜩ', &['ꜩ']), - ('ꜩ', &['Ꜩ']), - ('Ꜫ', &['ꜫ']), - ('ꜫ', &['Ꜫ']), - ('Ꜭ', &['ꜭ']), - ('ꜭ', &['Ꜭ']), - ('Ꜯ', &['ꜯ']), - ('ꜯ', &['Ꜯ']), - ('Ꜳ', &['ꜳ']), - ('ꜳ', &['Ꜳ']), - ('Ꜵ', &['ꜵ']), - ('ꜵ', &['Ꜵ']), - ('Ꜷ', &['ꜷ']), - ('ꜷ', &['Ꜷ']), - ('Ꜹ', &['ꜹ']), - ('ꜹ', &['Ꜹ']), - ('Ꜻ', &['ꜻ']), - ('ꜻ', &['Ꜻ']), - ('Ꜽ', &['ꜽ']), - ('ꜽ', &['Ꜽ']), - ('Ꜿ', &['ꜿ']), - ('ꜿ', &['Ꜿ']), - ('Ꝁ', &['ꝁ']), - ('ꝁ', &['Ꝁ']), - ('Ꝃ', &['ꝃ']), - ('ꝃ', &['Ꝃ']), - ('Ꝅ', &['ꝅ']), - ('ꝅ', &['Ꝅ']), - ('Ꝇ', &['ꝇ']), - ('ꝇ', &['Ꝇ']), - ('Ꝉ', &['ꝉ']), - ('ꝉ', &['Ꝉ']), - ('Ꝋ', &['ꝋ']), - ('ꝋ', &['Ꝋ']), - ('Ꝍ', &['ꝍ']), - ('ꝍ', &['Ꝍ']), - ('Ꝏ', &['ꝏ']), - ('ꝏ', &['Ꝏ']), - ('Ꝑ', &['ꝑ']), - ('ꝑ', &['Ꝑ']), - ('Ꝓ', &['ꝓ']), - ('ꝓ', &['Ꝓ']), - ('Ꝕ', &['ꝕ']), - ('ꝕ', &['Ꝕ']), - ('Ꝗ', &['ꝗ']), - ('ꝗ', &['Ꝗ']), - ('Ꝙ', &['ꝙ']), - ('ꝙ', &['Ꝙ']), - ('Ꝛ', &['ꝛ']), - ('ꝛ', &['Ꝛ']), - ('Ꝝ', &['ꝝ']), - ('ꝝ', &['Ꝝ']), - ('Ꝟ', &['ꝟ']), - ('ꝟ', &['Ꝟ']), - ('Ꝡ', &['ꝡ']), - ('ꝡ', &['Ꝡ']), - ('Ꝣ', &['ꝣ']), - ('ꝣ', &['Ꝣ']), - ('Ꝥ', &['ꝥ']), - ('ꝥ', &['Ꝥ']), - ('Ꝧ', &['ꝧ']), - ('ꝧ', &['Ꝧ']), - ('Ꝩ', &['ꝩ']), - ('ꝩ', &['Ꝩ']), - ('Ꝫ', &['ꝫ']), - ('ꝫ', &['Ꝫ']), - ('Ꝭ', &['ꝭ']), - ('ꝭ', &['Ꝭ']), - ('Ꝯ', &['ꝯ']), - ('ꝯ', &['Ꝯ']), - ('Ꝺ', &['ꝺ']), - ('ꝺ', &['Ꝺ']), - ('Ꝼ', &['ꝼ']), - ('ꝼ', &['Ꝼ']), - ('Ᵹ', &['ᵹ']), - ('Ꝿ', &['ꝿ']), - ('ꝿ', &['Ꝿ']), - ('Ꞁ', &['ꞁ']), - ('ꞁ', &['Ꞁ']), - ('Ꞃ', &['ꞃ']), - ('ꞃ', &['Ꞃ']), - ('Ꞅ', &['ꞅ']), - ('ꞅ', &['Ꞅ']), - ('Ꞇ', &['ꞇ']), - ('ꞇ', &['Ꞇ']), - ('Ꞌ', &['ꞌ']), - ('ꞌ', &['Ꞌ']), - ('Ɥ', &['ɥ']), - ('Ꞑ', &['ꞑ']), - ('ꞑ', &['Ꞑ']), - ('Ꞓ', &['ꞓ']), - ('ꞓ', &['Ꞓ']), - ('ꞔ', &['Ꞔ']), - ('Ꞗ', &['ꞗ']), - ('ꞗ', &['Ꞗ']), - ('Ꞙ', &['ꞙ']), - ('ꞙ', &['Ꞙ']), - ('Ꞛ', &['ꞛ']), - ('ꞛ', &['Ꞛ']), - ('Ꞝ', &['ꞝ']), - ('ꞝ', &['Ꞝ']), - ('Ꞟ', &['ꞟ']), - ('ꞟ', &['Ꞟ']), - ('Ꞡ', &['ꞡ']), - ('ꞡ', &['Ꞡ']), - ('Ꞣ', &['ꞣ']), - ('ꞣ', &['Ꞣ']), - ('Ꞥ', &['ꞥ']), - ('ꞥ', &['Ꞥ']), - ('Ꞧ', &['ꞧ']), - ('ꞧ', &['Ꞧ']), - ('Ꞩ', &['ꞩ']), - ('ꞩ', &['Ꞩ']), - ('Ɦ', &['ɦ']), - ('Ɜ', &['ɜ']), - ('Ɡ', &['ɡ']), - ('Ɬ', &['ɬ']), - ('Ɪ', &['ɪ']), - ('Ʞ', &['ʞ']), - ('Ʇ', &['ʇ']), - ('Ʝ', &['ʝ']), - ('Ꭓ', &['ꭓ']), - ('Ꞵ', &['ꞵ']), - ('ꞵ', &['Ꞵ']), - ('Ꞷ', &['ꞷ']), - ('ꞷ', &['Ꞷ']), - ('Ꞹ', &['ꞹ']), - ('ꞹ', &['Ꞹ']), - ('Ꞻ', &['ꞻ']), - ('ꞻ', &['Ꞻ']), - ('Ꞽ', &['ꞽ']), - ('ꞽ', &['Ꞽ']), - ('Ꞿ', &['ꞿ']), - ('ꞿ', &['Ꞿ']), - ('Ꟁ', &['ꟁ']), - ('ꟁ', &['Ꟁ']), - ('Ꟃ', &['ꟃ']), - ('ꟃ', &['Ꟃ']), - ('Ꞔ', &['ꞔ']), - ('Ʂ', &['ʂ']), - ('Ᶎ', &['ᶎ']), - ('Ꟈ', &['ꟈ']), - ('ꟈ', &['Ꟈ']), - ('Ꟊ', &['ꟊ']), - ('ꟊ', &['Ꟊ']), - ('Ɤ', &['ɤ']), - ('Ꟍ', &['ꟍ']), - ('ꟍ', &['Ꟍ']), - ('Ꟑ', &['ꟑ']), - ('ꟑ', &['Ꟑ']), - ('Ꟗ', &['ꟗ']), - ('ꟗ', &['Ꟗ']), - ('Ꟙ', &['ꟙ']), - ('ꟙ', &['Ꟙ']), - ('Ꟛ', &['ꟛ']), - ('ꟛ', &['Ꟛ']), - ('Ƛ', &['ƛ']), - ('Ꟶ', &['ꟶ']), - ('ꟶ', &['Ꟶ']), - ('ꭓ', &['Ꭓ']), - ('ꭰ', &['Ꭰ']), - ('ꭱ', &['Ꭱ']), - ('ꭲ', &['Ꭲ']), - ('ꭳ', &['Ꭳ']), - ('ꭴ', &['Ꭴ']), - ('ꭵ', &['Ꭵ']), - ('ꭶ', &['Ꭶ']), - ('ꭷ', &['Ꭷ']), - ('ꭸ', &['Ꭸ']), - ('ꭹ', &['Ꭹ']), - ('ꭺ', &['Ꭺ']), - ('ꭻ', &['Ꭻ']), - ('ꭼ', &['Ꭼ']), - ('ꭽ', &['Ꭽ']), - ('ꭾ', &['Ꭾ']), - ('ꭿ', &['Ꭿ']), - ('ꮀ', &['Ꮀ']), - ('ꮁ', &['Ꮁ']), - ('ꮂ', &['Ꮂ']), - ('ꮃ', &['Ꮃ']), - ('ꮄ', &['Ꮄ']), - ('ꮅ', &['Ꮅ']), - ('ꮆ', &['Ꮆ']), - ('ꮇ', &['Ꮇ']), - ('ꮈ', &['Ꮈ']), - ('ꮉ', &['Ꮉ']), - ('ꮊ', &['Ꮊ']), - ('ꮋ', &['Ꮋ']), - ('ꮌ', &['Ꮌ']), - ('ꮍ', &['Ꮍ']), - ('ꮎ', &['Ꮎ']), - ('ꮏ', &['Ꮏ']), - ('ꮐ', &['Ꮐ']), - ('ꮑ', &['Ꮑ']), - ('ꮒ', &['Ꮒ']), - ('ꮓ', &['Ꮓ']), - ('ꮔ', &['Ꮔ']), - ('ꮕ', &['Ꮕ']), - ('ꮖ', &['Ꮖ']), - ('ꮗ', &['Ꮗ']), - ('ꮘ', &['Ꮘ']), - ('ꮙ', &['Ꮙ']), - ('ꮚ', &['Ꮚ']), - ('ꮛ', &['Ꮛ']), - ('ꮜ', &['Ꮜ']), - ('ꮝ', &['Ꮝ']), - ('ꮞ', &['Ꮞ']), - ('ꮟ', &['Ꮟ']), - ('ꮠ', &['Ꮠ']), - ('ꮡ', &['Ꮡ']), - ('ꮢ', &['Ꮢ']), - ('ꮣ', &['Ꮣ']), - ('ꮤ', &['Ꮤ']), - ('ꮥ', &['Ꮥ']), - ('ꮦ', &['Ꮦ']), - ('ꮧ', &['Ꮧ']), - ('ꮨ', &['Ꮨ']), - ('ꮩ', &['Ꮩ']), - ('ꮪ', &['Ꮪ']), - ('ꮫ', &['Ꮫ']), - ('ꮬ', &['Ꮬ']), - ('ꮭ', &['Ꮭ']), - ('ꮮ', &['Ꮮ']), - ('ꮯ', &['Ꮯ']), - ('ꮰ', &['Ꮰ']), - ('ꮱ', &['Ꮱ']), - ('ꮲ', &['Ꮲ']), - ('ꮳ', &['Ꮳ']), - ('ꮴ', &['Ꮴ']), - ('ꮵ', &['Ꮵ']), - ('ꮶ', &['Ꮶ']), - ('ꮷ', &['Ꮷ']), - ('ꮸ', &['Ꮸ']), - ('ꮹ', &['Ꮹ']), - ('ꮺ', &['Ꮺ']), - ('ꮻ', &['Ꮻ']), - ('ꮼ', &['Ꮼ']), - ('ꮽ', &['Ꮽ']), - ('ꮾ', &['Ꮾ']), - ('ꮿ', &['Ꮿ']), - ('ſt', &['st']), - ('st', &['ſt']), - ('A', &['a']), - ('B', &['b']), - ('C', &['c']), - ('D', &['d']), - ('E', &['e']), - ('F', &['f']), - ('G', &['g']), - ('H', &['h']), - ('I', &['i']), - ('J', &['j']), - ('K', &['k']), - ('L', &['l']), - ('M', &['m']), - ('N', &['n']), - ('O', &['o']), - ('P', &['p']), - ('Q', &['q']), - ('R', &['r']), - ('S', &['s']), - ('T', &['t']), - ('U', &['u']), - ('V', &['v']), - ('W', &['w']), - ('X', &['x']), - ('Y', &['y']), - ('Z', &['z']), - ('a', &['A']), - ('b', &['B']), - ('c', &['C']), - ('d', &['D']), - ('e', &['E']), - ('f', &['F']), - ('g', &['G']), - ('h', &['H']), - ('i', &['I']), - ('j', &['J']), - ('k', &['K']), - ('l', &['L']), - ('m', &['M']), - ('n', &['N']), - ('o', &['O']), - ('p', &['P']), - ('q', &['Q']), - ('r', &['R']), - ('s', &['S']), - ('t', &['T']), - ('u', &['U']), - ('v', &['V']), - ('w', &['W']), - ('x', &['X']), - ('y', &['Y']), - ('z', &['Z']), - ('𐐀', &['𐐨']), - ('𐐁', &['𐐩']), - ('𐐂', &['𐐪']), - ('𐐃', &['𐐫']), - ('𐐄', &['𐐬']), - ('𐐅', &['𐐭']), - ('𐐆', &['𐐮']), - ('𐐇', &['𐐯']), - ('𐐈', &['𐐰']), - ('𐐉', &['𐐱']), - ('𐐊', &['𐐲']), - ('𐐋', &['𐐳']), - ('𐐌', &['𐐴']), - ('𐐍', &['𐐵']), - ('𐐎', &['𐐶']), - ('𐐏', &['𐐷']), - ('𐐐', &['𐐸']), - ('𐐑', &['𐐹']), - ('𐐒', &['𐐺']), - ('𐐓', &['𐐻']), - ('𐐔', &['𐐼']), - ('𐐕', &['𐐽']), - ('𐐖', &['𐐾']), - ('𐐗', &['𐐿']), - ('𐐘', &['𐑀']), - ('𐐙', &['𐑁']), - ('𐐚', &['𐑂']), - ('𐐛', &['𐑃']), - ('𐐜', &['𐑄']), - ('𐐝', &['𐑅']), - ('𐐞', &['𐑆']), - ('𐐟', &['𐑇']), - ('𐐠', &['𐑈']), - ('𐐡', &['𐑉']), - ('𐐢', &['𐑊']), - ('𐐣', &['𐑋']), - ('𐐤', &['𐑌']), - ('𐐥', &['𐑍']), - ('𐐦', &['𐑎']), - ('𐐧', &['𐑏']), - ('𐐨', &['𐐀']), - ('𐐩', &['𐐁']), - ('𐐪', &['𐐂']), - ('𐐫', &['𐐃']), - ('𐐬', &['𐐄']), - ('𐐭', &['𐐅']), - ('𐐮', &['𐐆']), - ('𐐯', &['𐐇']), - ('𐐰', &['𐐈']), - ('𐐱', &['𐐉']), - ('𐐲', &['𐐊']), - ('𐐳', &['𐐋']), - ('𐐴', &['𐐌']), - ('𐐵', &['𐐍']), - ('𐐶', &['𐐎']), - ('𐐷', &['𐐏']), - ('𐐸', &['𐐐']), - ('𐐹', &['𐐑']), - ('𐐺', &['𐐒']), - ('𐐻', &['𐐓']), - ('𐐼', &['𐐔']), - ('𐐽', &['𐐕']), - ('𐐾', &['𐐖']), - ('𐐿', &['𐐗']), - ('𐑀', &['𐐘']), - ('𐑁', &['𐐙']), - ('𐑂', &['𐐚']), - ('𐑃', &['𐐛']), - ('𐑄', &['𐐜']), - ('𐑅', &['𐐝']), - ('𐑆', &['𐐞']), - ('𐑇', &['𐐟']), - ('𐑈', &['𐐠']), - ('𐑉', &['𐐡']), - ('𐑊', &['𐐢']), - ('𐑋', &['𐐣']), - ('𐑌', &['𐐤']), - ('𐑍', &['𐐥']), - ('𐑎', &['𐐦']), - ('𐑏', &['𐐧']), - ('𐒰', &['𐓘']), - ('𐒱', &['𐓙']), - ('𐒲', &['𐓚']), - ('𐒳', &['𐓛']), - ('𐒴', &['𐓜']), - ('𐒵', &['𐓝']), - ('𐒶', &['𐓞']), - ('𐒷', &['𐓟']), - ('𐒸', &['𐓠']), - ('𐒹', &['𐓡']), - ('𐒺', &['𐓢']), - ('𐒻', &['𐓣']), - ('𐒼', &['𐓤']), - ('𐒽', &['𐓥']), - ('𐒾', &['𐓦']), - ('𐒿', &['𐓧']), - ('𐓀', &['𐓨']), - ('𐓁', &['𐓩']), - ('𐓂', &['𐓪']), - ('𐓃', &['𐓫']), - ('𐓄', &['𐓬']), - ('𐓅', &['𐓭']), - ('𐓆', &['𐓮']), - ('𐓇', &['𐓯']), - ('𐓈', &['𐓰']), - ('𐓉', &['𐓱']), - ('𐓊', &['𐓲']), - ('𐓋', &['𐓳']), - ('𐓌', &['𐓴']), - ('𐓍', &['𐓵']), - ('𐓎', &['𐓶']), - ('𐓏', &['𐓷']), - ('𐓐', &['𐓸']), - ('𐓑', &['𐓹']), - ('𐓒', &['𐓺']), - ('𐓓', &['𐓻']), - ('𐓘', &['𐒰']), - ('𐓙', &['𐒱']), - ('𐓚', &['𐒲']), - ('𐓛', &['𐒳']), - ('𐓜', &['𐒴']), - ('𐓝', &['𐒵']), - ('𐓞', &['𐒶']), - ('𐓟', &['𐒷']), - ('𐓠', &['𐒸']), - ('𐓡', &['𐒹']), - ('𐓢', &['𐒺']), - ('𐓣', &['𐒻']), - ('𐓤', &['𐒼']), - ('𐓥', &['𐒽']), - ('𐓦', &['𐒾']), - ('𐓧', &['𐒿']), - ('𐓨', &['𐓀']), - ('𐓩', &['𐓁']), - ('𐓪', &['𐓂']), - ('𐓫', &['𐓃']), - ('𐓬', &['𐓄']), - ('𐓭', &['𐓅']), - ('𐓮', &['𐓆']), - ('𐓯', &['𐓇']), - ('𐓰', &['𐓈']), - ('𐓱', &['𐓉']), - ('𐓲', &['𐓊']), - ('𐓳', &['𐓋']), - ('𐓴', &['𐓌']), - ('𐓵', &['𐓍']), - ('𐓶', &['𐓎']), - ('𐓷', &['𐓏']), - ('𐓸', &['𐓐']), - ('𐓹', &['𐓑']), - ('𐓺', &['𐓒']), - ('𐓻', &['𐓓']), - ('𐕰', &['𐖗']), - ('𐕱', &['𐖘']), - ('𐕲', &['𐖙']), - ('𐕳', &['𐖚']), - ('𐕴', &['𐖛']), - ('𐕵', &['𐖜']), - ('𐕶', &['𐖝']), - ('𐕷', &['𐖞']), - ('𐕸', &['𐖟']), - ('𐕹', &['𐖠']), - ('𐕺', &['𐖡']), - ('𐕼', &['𐖣']), - ('𐕽', &['𐖤']), - ('𐕾', &['𐖥']), - ('𐕿', &['𐖦']), - ('𐖀', &['𐖧']), - ('𐖁', &['𐖨']), - ('𐖂', &['𐖩']), - ('𐖃', &['𐖪']), - ('𐖄', &['𐖫']), - ('𐖅', &['𐖬']), - ('𐖆', &['𐖭']), - ('𐖇', &['𐖮']), - ('𐖈', &['𐖯']), - ('𐖉', &['𐖰']), - ('𐖊', &['𐖱']), - ('𐖌', &['𐖳']), - ('𐖍', &['𐖴']), - ('𐖎', &['𐖵']), - ('𐖏', &['𐖶']), - ('𐖐', &['𐖷']), - ('𐖑', &['𐖸']), - ('𐖒', &['𐖹']), - ('𐖔', &['𐖻']), - ('𐖕', &['𐖼']), - ('𐖗', &['𐕰']), - ('𐖘', &['𐕱']), - ('𐖙', &['𐕲']), - ('𐖚', &['𐕳']), - ('𐖛', &['𐕴']), - ('𐖜', &['𐕵']), - ('𐖝', &['𐕶']), - ('𐖞', &['𐕷']), - ('𐖟', &['𐕸']), - ('𐖠', &['𐕹']), - ('𐖡', &['𐕺']), - ('𐖣', &['𐕼']), - ('𐖤', &['𐕽']), - ('𐖥', &['𐕾']), - ('𐖦', &['𐕿']), - ('𐖧', &['𐖀']), - ('𐖨', &['𐖁']), - ('𐖩', &['𐖂']), - ('𐖪', &['𐖃']), - ('𐖫', &['𐖄']), - ('𐖬', &['𐖅']), - ('𐖭', &['𐖆']), - ('𐖮', &['𐖇']), - ('𐖯', &['𐖈']), - ('𐖰', &['𐖉']), - ('𐖱', &['𐖊']), - ('𐖳', &['𐖌']), - ('𐖴', &['𐖍']), - ('𐖵', &['𐖎']), - ('𐖶', &['𐖏']), - ('𐖷', &['𐖐']), - ('𐖸', &['𐖑']), - ('𐖹', &['𐖒']), - ('𐖻', &['𐖔']), - ('𐖼', &['𐖕']), - ('𐲀', &['𐳀']), - ('𐲁', &['𐳁']), - ('𐲂', &['𐳂']), - ('𐲃', &['𐳃']), - ('𐲄', &['𐳄']), - ('𐲅', &['𐳅']), - ('𐲆', &['𐳆']), - ('𐲇', &['𐳇']), - ('𐲈', &['𐳈']), - ('𐲉', &['𐳉']), - ('𐲊', &['𐳊']), - ('𐲋', &['𐳋']), - ('𐲌', &['𐳌']), - ('𐲍', &['𐳍']), - ('𐲎', &['𐳎']), - ('𐲏', &['𐳏']), - ('𐲐', &['𐳐']), - ('𐲑', &['𐳑']), - ('𐲒', &['𐳒']), - ('𐲓', &['𐳓']), - ('𐲔', &['𐳔']), - ('𐲕', &['𐳕']), - ('𐲖', &['𐳖']), - ('𐲗', &['𐳗']), - ('𐲘', &['𐳘']), - ('𐲙', &['𐳙']), - ('𐲚', &['𐳚']), - ('𐲛', &['𐳛']), - ('𐲜', &['𐳜']), - ('𐲝', &['𐳝']), - ('𐲞', &['𐳞']), - ('𐲟', &['𐳟']), - ('𐲠', &['𐳠']), - ('𐲡', &['𐳡']), - ('𐲢', &['𐳢']), - ('𐲣', &['𐳣']), - ('𐲤', &['𐳤']), - ('𐲥', &['𐳥']), - ('𐲦', &['𐳦']), - ('𐲧', &['𐳧']), - ('𐲨', &['𐳨']), - ('𐲩', &['𐳩']), - ('𐲪', &['𐳪']), - ('𐲫', &['𐳫']), - ('𐲬', &['𐳬']), - ('𐲭', &['𐳭']), - ('𐲮', &['𐳮']), - ('𐲯', &['𐳯']), - ('𐲰', &['𐳰']), - ('𐲱', &['𐳱']), - ('𐲲', &['𐳲']), - ('𐳀', &['𐲀']), - ('𐳁', &['𐲁']), - ('𐳂', &['𐲂']), - ('𐳃', &['𐲃']), - ('𐳄', &['𐲄']), - ('𐳅', &['𐲅']), - ('𐳆', &['𐲆']), - ('𐳇', &['𐲇']), - ('𐳈', &['𐲈']), - ('𐳉', &['𐲉']), - ('𐳊', &['𐲊']), - ('𐳋', &['𐲋']), - ('𐳌', &['𐲌']), - ('𐳍', &['𐲍']), - ('𐳎', &['𐲎']), - ('𐳏', &['𐲏']), - ('𐳐', &['𐲐']), - ('𐳑', &['𐲑']), - ('𐳒', &['𐲒']), - ('𐳓', &['𐲓']), - ('𐳔', &['𐲔']), - ('𐳕', &['𐲕']), - ('𐳖', &['𐲖']), - ('𐳗', &['𐲗']), - ('𐳘', &['𐲘']), - ('𐳙', &['𐲙']), - ('𐳚', &['𐲚']), - ('𐳛', &['𐲛']), - ('𐳜', &['𐲜']), - ('𐳝', &['𐲝']), - ('𐳞', &['𐲞']), - ('𐳟', &['𐲟']), - ('𐳠', &['𐲠']), - ('𐳡', &['𐲡']), - ('𐳢', &['𐲢']), - ('𐳣', &['𐲣']), - ('𐳤', &['𐲤']), - ('𐳥', &['𐲥']), - ('𐳦', &['𐲦']), - ('𐳧', &['𐲧']), - ('𐳨', &['𐲨']), - ('𐳩', &['𐲩']), - ('𐳪', &['𐲪']), - ('𐳫', &['𐲫']), - ('𐳬', &['𐲬']), - ('𐳭', &['𐲭']), - ('𐳮', &['𐲮']), - ('𐳯', &['𐲯']), - ('𐳰', &['𐲰']), - ('𐳱', &['𐲱']), - ('𐳲', &['𐲲']), - ('𐵐', &['𐵰']), - ('𐵑', &['𐵱']), - ('𐵒', &['𐵲']), - ('𐵓', &['𐵳']), - ('𐵔', &['𐵴']), - ('𐵕', &['𐵵']), - ('𐵖', &['𐵶']), - ('𐵗', &['𐵷']), - ('𐵘', &['𐵸']), - ('𐵙', &['𐵹']), - ('𐵚', &['𐵺']), - ('𐵛', &['𐵻']), - ('𐵜', &['𐵼']), - ('𐵝', &['𐵽']), - ('𐵞', &['𐵾']), - ('𐵟', &['𐵿']), - ('𐵠', &['𐶀']), - ('𐵡', &['𐶁']), - ('𐵢', &['𐶂']), - ('𐵣', &['𐶃']), - ('𐵤', &['𐶄']), - ('𐵥', &['𐶅']), - ('𐵰', &['𐵐']), - ('𐵱', &['𐵑']), - ('𐵲', &['𐵒']), - ('𐵳', &['𐵓']), - ('𐵴', &['𐵔']), - ('𐵵', &['𐵕']), - ('𐵶', &['𐵖']), - ('𐵷', &['𐵗']), - ('𐵸', &['𐵘']), - ('𐵹', &['𐵙']), - ('𐵺', &['𐵚']), - ('𐵻', &['𐵛']), - ('𐵼', &['𐵜']), - ('𐵽', &['𐵝']), - ('𐵾', &['𐵞']), - ('𐵿', &['𐵟']), - ('𐶀', &['𐵠']), - ('𐶁', &['𐵡']), - ('𐶂', &['𐵢']), - ('𐶃', &['𐵣']), - ('𐶄', &['𐵤']), - ('𐶅', &['𐵥']), - ('𑢠', &['𑣀']), - ('𑢡', &['𑣁']), - ('𑢢', &['𑣂']), - ('𑢣', &['𑣃']), - ('𑢤', &['𑣄']), - ('𑢥', &['𑣅']), - ('𑢦', &['𑣆']), - ('𑢧', &['𑣇']), - ('𑢨', &['𑣈']), - ('𑢩', &['𑣉']), - ('𑢪', &['𑣊']), - ('𑢫', &['𑣋']), - ('𑢬', &['𑣌']), - ('𑢭', &['𑣍']), - ('𑢮', &['𑣎']), - ('𑢯', &['𑣏']), - ('𑢰', &['𑣐']), - ('𑢱', &['𑣑']), - ('𑢲', &['𑣒']), - ('𑢳', &['𑣓']), - ('𑢴', &['𑣔']), - ('𑢵', &['𑣕']), - ('𑢶', &['𑣖']), - ('𑢷', &['𑣗']), - ('𑢸', &['𑣘']), - ('𑢹', &['𑣙']), - ('𑢺', &['𑣚']), - ('𑢻', &['𑣛']), - ('𑢼', &['𑣜']), - ('𑢽', &['𑣝']), - ('𑢾', &['𑣞']), - ('𑢿', &['𑣟']), - ('𑣀', &['𑢠']), - ('𑣁', &['𑢡']), - ('𑣂', &['𑢢']), - ('𑣃', &['𑢣']), - ('𑣄', &['𑢤']), - ('𑣅', &['𑢥']), - ('𑣆', &['𑢦']), - ('𑣇', &['𑢧']), - ('𑣈', &['𑢨']), - ('𑣉', &['𑢩']), - ('𑣊', &['𑢪']), - ('𑣋', &['𑢫']), - ('𑣌', &['𑢬']), - ('𑣍', &['𑢭']), - ('𑣎', &['𑢮']), - ('𑣏', &['𑢯']), - ('𑣐', &['𑢰']), - ('𑣑', &['𑢱']), - ('𑣒', &['𑢲']), - ('𑣓', &['𑢳']), - ('𑣔', &['𑢴']), - ('𑣕', &['𑢵']), - ('𑣖', &['𑢶']), - ('𑣗', &['𑢷']), - ('𑣘', &['𑢸']), - ('𑣙', &['𑢹']), - ('𑣚', &['𑢺']), - ('𑣛', &['𑢻']), - ('𑣜', &['𑢼']), - ('𑣝', &['𑢽']), - ('𑣞', &['𑢾']), - ('𑣟', &['𑢿']), - ('𖹀', &['𖹠']), - ('𖹁', &['𖹡']), - ('𖹂', &['𖹢']), - ('𖹃', &['𖹣']), - ('𖹄', &['𖹤']), - ('𖹅', &['𖹥']), - ('𖹆', &['𖹦']), - ('𖹇', &['𖹧']), - ('𖹈', &['𖹨']), - ('𖹉', &['𖹩']), - ('𖹊', &['𖹪']), - ('𖹋', &['𖹫']), - ('𖹌', &['𖹬']), - ('𖹍', &['𖹭']), - ('𖹎', &['𖹮']), - ('𖹏', &['𖹯']), - ('𖹐', &['𖹰']), - ('𖹑', &['𖹱']), - ('𖹒', &['𖹲']), - ('𖹓', &['𖹳']), - ('𖹔', &['𖹴']), - ('𖹕', &['𖹵']), - ('𖹖', &['𖹶']), - ('𖹗', &['𖹷']), - ('𖹘', &['𖹸']), - ('𖹙', &['𖹹']), - ('𖹚', &['𖹺']), - ('𖹛', &['𖹻']), - ('𖹜', &['𖹼']), - ('𖹝', &['𖹽']), - ('𖹞', &['𖹾']), - ('𖹟', &['𖹿']), - ('𖹠', &['𖹀']), - ('𖹡', &['𖹁']), - ('𖹢', &['𖹂']), - ('𖹣', &['𖹃']), - ('𖹤', &['𖹄']), - ('𖹥', &['𖹅']), - ('𖹦', &['𖹆']), - ('𖹧', &['𖹇']), - ('𖹨', &['𖹈']), - ('𖹩', &['𖹉']), - ('𖹪', &['𖹊']), - ('𖹫', &['𖹋']), - ('𖹬', &['𖹌']), - ('𖹭', &['𖹍']), - ('𖹮', &['𖹎']), - ('𖹯', &['𖹏']), - ('𖹰', &['𖹐']), - ('𖹱', &['𖹑']), - ('𖹲', &['𖹒']), - ('𖹳', &['𖹓']), - ('𖹴', &['𖹔']), - ('𖹵', &['𖹕']), - ('𖹶', &['𖹖']), - ('𖹷', &['𖹗']), - ('𖹸', &['𖹘']), - ('𖹹', &['𖹙']), - ('𖹺', &['𖹚']), - ('𖹻', &['𖹛']), - ('𖹼', &['𖹜']), - ('𖹽', &['𖹝']), - ('𖹾', &['𖹞']), - ('𖹿', &['𖹟']), - ('𞤀', &['𞤢']), - ('𞤁', &['𞤣']), - ('𞤂', &['𞤤']), - ('𞤃', &['𞤥']), - ('𞤄', &['𞤦']), - ('𞤅', &['𞤧']), - ('𞤆', &['𞤨']), - ('𞤇', &['𞤩']), - ('𞤈', &['𞤪']), - ('𞤉', &['𞤫']), - ('𞤊', &['𞤬']), - ('𞤋', &['𞤭']), - ('𞤌', &['𞤮']), - ('𞤍', &['𞤯']), - ('𞤎', &['𞤰']), - ('𞤏', &['𞤱']), - ('𞤐', &['𞤲']), - ('𞤑', &['𞤳']), - ('𞤒', &['𞤴']), - ('𞤓', &['𞤵']), - ('𞤔', &['𞤶']), - ('𞤕', &['𞤷']), - ('𞤖', &['𞤸']), - ('𞤗', &['𞤹']), - ('𞤘', &['𞤺']), - ('𞤙', &['𞤻']), - ('𞤚', &['𞤼']), - ('𞤛', &['𞤽']), - ('𞤜', &['𞤾']), - ('𞤝', &['𞤿']), - ('𞤞', &['𞥀']), - ('𞤟', &['𞥁']), - ('𞤠', &['𞥂']), - ('𞤡', &['𞥃']), - ('𞤢', &['𞤀']), - ('𞤣', &['𞤁']), - ('𞤤', &['𞤂']), - ('𞤥', &['𞤃']), - ('𞤦', &['𞤄']), - ('𞤧', &['𞤅']), - ('𞤨', &['𞤆']), - ('𞤩', &['𞤇']), - ('𞤪', &['𞤈']), - ('𞤫', &['𞤉']), - ('𞤬', &['𞤊']), - ('𞤭', &['𞤋']), - ('𞤮', &['𞤌']), - ('𞤯', &['𞤍']), - ('𞤰', &['𞤎']), - ('𞤱', &['𞤏']), - ('𞤲', &['𞤐']), - ('𞤳', &['𞤑']), - ('𞤴', &['𞤒']), - ('𞤵', &['𞤓']), - ('𞤶', &['𞤔']), - ('𞤷', &['𞤕']), - ('𞤸', &['𞤖']), - ('𞤹', &['𞤗']), - ('𞤺', &['𞤘']), - ('𞤻', &['𞤙']), - ('𞤼', &['𞤚']), - ('𞤽', &['𞤛']), - ('𞤾', &['𞤜']), - ('𞤿', &['𞤝']), - ('𞥀', &['𞤞']), - ('𞥁', &['𞤟']), - ('𞥂', &['𞤠']), - ('𞥃', &['𞤡']), -]; diff --git a/vendor/regex-syntax/src/unicode_tables/general_category.rs b/vendor/regex-syntax/src/unicode_tables/general_category.rs deleted file mode 100644 index 6ff6b538..00000000 --- a/vendor/regex-syntax/src/unicode_tables/general_category.rs +++ /dev/null @@ -1,6717 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate general-category ucd-16.0.0 --chars --exclude surrogate -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ - ("Cased_Letter", CASED_LETTER), - ("Close_Punctuation", CLOSE_PUNCTUATION), - ("Connector_Punctuation", CONNECTOR_PUNCTUATION), - ("Control", CONTROL), - ("Currency_Symbol", CURRENCY_SYMBOL), - ("Dash_Punctuation", DASH_PUNCTUATION), - ("Decimal_Number", DECIMAL_NUMBER), - ("Enclosing_Mark", ENCLOSING_MARK), - ("Final_Punctuation", FINAL_PUNCTUATION), - ("Format", FORMAT), - ("Initial_Punctuation", INITIAL_PUNCTUATION), - ("Letter", LETTER), - ("Letter_Number", LETTER_NUMBER), - ("Line_Separator", LINE_SEPARATOR), - ("Lowercase_Letter", LOWERCASE_LETTER), - ("Mark", MARK), - ("Math_Symbol", MATH_SYMBOL), - ("Modifier_Letter", MODIFIER_LETTER), - ("Modifier_Symbol", MODIFIER_SYMBOL), - ("Nonspacing_Mark", NONSPACING_MARK), - ("Number", NUMBER), - ("Open_Punctuation", OPEN_PUNCTUATION), - ("Other", OTHER), - ("Other_Letter", OTHER_LETTER), - ("Other_Number", OTHER_NUMBER), - ("Other_Punctuation", OTHER_PUNCTUATION), - ("Other_Symbol", OTHER_SYMBOL), - ("Paragraph_Separator", PARAGRAPH_SEPARATOR), - ("Private_Use", PRIVATE_USE), - ("Punctuation", PUNCTUATION), - ("Separator", SEPARATOR), - ("Space_Separator", SPACE_SEPARATOR), - ("Spacing_Mark", SPACING_MARK), - ("Symbol", SYMBOL), - ("Titlecase_Letter", TITLECASE_LETTER), - ("Unassigned", UNASSIGNED), - ("Uppercase_Letter", UPPERCASE_LETTER), -]; - -pub const CASED_LETTER: &'static [(char, char)] = &[ - ('A', 'Z'), - ('a', 'z'), - ('µ', 'µ'), - ('À', 'Ö'), - ('Ø', 'ö'), - ('ø', 'ƺ'), - ('Ƽ', 'ƿ'), - ('DŽ', 'ʓ'), - ('ʕ', 'ʯ'), - ('Ͱ', 'ͳ'), - ('Ͷ', 'ͷ'), - ('ͻ', 'ͽ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ϵ'), - ('Ϸ', 'ҁ'), - ('Ҋ', 'ԯ'), - ('Ա', 'Ֆ'), - ('ՠ', 'ֈ'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ჺ'), - ('ჽ', 'ჿ'), - ('Ꭰ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('ᴀ', 'ᴫ'), - ('ᵫ', 'ᵷ'), - ('ᵹ', 'ᶚ'), - ('Ḁ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ᾼ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῌ'), - ('ῐ', 'ΐ'), - ('ῖ', 'Ί'), - ('ῠ', 'Ῥ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῼ'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℊ', 'ℓ'), - ('ℕ', 'ℕ'), - ('ℙ', 'ℝ'), - ('ℤ', 'ℤ'), - ('Ω', 'Ω'), - ('ℨ', 'ℨ'), - ('K', 'ℭ'), - ('ℯ', 'ℴ'), - ('ℹ', 'ℹ'), - ('ℼ', 'ℿ'), - ('ⅅ', 'ⅉ'), - ('ⅎ', 'ⅎ'), - ('Ↄ', 'ↄ'), - ('Ⰰ', 'ⱻ'), - ('Ȿ', 'ⳤ'), - ('Ⳬ', 'ⳮ'), - ('Ⳳ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('Ꙁ', 'ꙭ'), - ('Ꚁ', 'ꚛ'), - ('Ꜣ', 'ꝯ'), - ('ꝱ', 'ꞇ'), - ('Ꞌ', 'ꞎ'), - ('Ꞑ', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'Ƛ'), - ('Ꟶ', 'ꟶ'), - ('ꟺ', 'ꟺ'), - ('ꬰ', 'ꭚ'), - ('ꭠ', 'ꭨ'), - ('ꭰ', 'ꮿ'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('A', 'Z'), - ('a', 'z'), - ('𐐀', '𐑏'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐵐', '𐵥'), - ('𐵰', '𐶅'), - ('𑢠', '𑣟'), - ('𖹀', '𖹿'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝛀'), - ('𝛂', '𝛚'), - ('𝛜', '𝛺'), - ('𝛼', '𝜔'), - ('𝜖', '𝜴'), - ('𝜶', '𝝎'), - ('𝝐', '𝝮'), - ('𝝰', '𝞈'), - ('𝞊', '𝞨'), - ('𝞪', '𝟂'), - ('𝟄', '𝟋'), - ('𝼀', '𝼉'), - ('𝼋', '𝼞'), - ('𝼥', '𝼪'), - ('𞤀', '𞥃'), -]; - -pub const CLOSE_PUNCTUATION: &'static [(char, char)] = &[ - (')', ')'), - (']', ']'), - ('}', '}'), - ('༻', '༻'), - ('༽', '༽'), - ('᚜', '᚜'), - ('⁆', '⁆'), - ('⁾', '⁾'), - ('₎', '₎'), - ('⌉', '⌉'), - ('⌋', '⌋'), - ('〉', '〉'), - ('❩', '❩'), - ('❫', '❫'), - ('❭', '❭'), - ('❯', '❯'), - ('❱', '❱'), - ('❳', '❳'), - ('❵', '❵'), - ('⟆', '⟆'), - ('⟧', '⟧'), - ('⟩', '⟩'), - ('⟫', '⟫'), - ('⟭', '⟭'), - ('⟯', '⟯'), - ('⦄', '⦄'), - ('⦆', '⦆'), - ('⦈', '⦈'), - ('⦊', '⦊'), - ('⦌', '⦌'), - ('⦎', '⦎'), - ('⦐', '⦐'), - ('⦒', '⦒'), - ('⦔', '⦔'), - ('⦖', '⦖'), - ('⦘', '⦘'), - ('⧙', '⧙'), - ('⧛', '⧛'), - ('⧽', '⧽'), - ('⸣', '⸣'), - ('⸥', '⸥'), - ('⸧', '⸧'), - ('⸩', '⸩'), - ('⹖', '⹖'), - ('⹘', '⹘'), - ('⹚', '⹚'), - ('⹜', '⹜'), - ('〉', '〉'), - ('》', '》'), - ('」', '」'), - ('』', '』'), - ('】', '】'), - ('〕', '〕'), - ('〗', '〗'), - ('〙', '〙'), - ('〛', '〛'), - ('〞', '〟'), - ('﴾', '﴾'), - ('︘', '︘'), - ('︶', '︶'), - ('︸', '︸'), - ('︺', '︺'), - ('︼', '︼'), - ('︾', '︾'), - ('﹀', '﹀'), - ('﹂', '﹂'), - ('﹄', '﹄'), - ('﹈', '﹈'), - ('﹚', '﹚'), - ('﹜', '﹜'), - ('﹞', '﹞'), - (')', ')'), - (']', ']'), - ('}', '}'), - ('⦆', '⦆'), - ('」', '」'), -]; - -pub const CONNECTOR_PUNCTUATION: &'static [(char, char)] = &[ - ('_', '_'), - ('‿', '⁀'), - ('⁔', '⁔'), - ('︳', '︴'), - ('﹍', '﹏'), - ('_', '_'), -]; - -pub const CONTROL: &'static [(char, char)] = - &[('\0', '\u{1f}'), ('\u{7f}', '\u{9f}')]; - -pub const CURRENCY_SYMBOL: &'static [(char, char)] = &[ - ('$', '$'), - ('¢', '¥'), - ('֏', '֏'), - ('؋', '؋'), - ('߾', '߿'), - ('৲', '৳'), - ('৻', '৻'), - ('૱', '૱'), - ('௹', '௹'), - ('฿', '฿'), - ('៛', '៛'), - ('₠', '⃀'), - ('꠸', '꠸'), - ('﷼', '﷼'), - ('﹩', '﹩'), - ('$', '$'), - ('¢', '£'), - ('¥', '₩'), - ('𑿝', '𑿠'), - ('𞋿', '𞋿'), - ('𞲰', '𞲰'), -]; - -pub const DASH_PUNCTUATION: &'static [(char, char)] = &[ - ('-', '-'), - ('֊', '֊'), - ('־', '־'), - ('᐀', '᐀'), - ('᠆', '᠆'), - ('‐', '―'), - ('⸗', '⸗'), - ('⸚', '⸚'), - ('⸺', '⸻'), - ('⹀', '⹀'), - ('⹝', '⹝'), - ('〜', '〜'), - ('〰', '〰'), - ('゠', '゠'), - ('︱', '︲'), - ('﹘', '﹘'), - ('﹣', '﹣'), - ('-', '-'), - ('𐵮', '𐵮'), - ('𐺭', '𐺭'), -]; - -pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ - ('0', '9'), - ('٠', '٩'), - ('۰', '۹'), - ('߀', '߉'), - ('०', '९'), - ('০', '৯'), - ('੦', '੯'), - ('૦', '૯'), - ('୦', '୯'), - ('௦', '௯'), - ('౦', '౯'), - ('೦', '೯'), - ('൦', '൯'), - ('෦', '෯'), - ('๐', '๙'), - ('໐', '໙'), - ('༠', '༩'), - ('၀', '၉'), - ('႐', '႙'), - ('០', '៩'), - ('᠐', '᠙'), - ('᥆', '᥏'), - ('᧐', '᧙'), - ('᪀', '᪉'), - ('᪐', '᪙'), - ('᭐', '᭙'), - ('᮰', '᮹'), - ('᱀', '᱉'), - ('᱐', '᱙'), - ('꘠', '꘩'), - ('꣐', '꣙'), - ('꤀', '꤉'), - ('꧐', '꧙'), - ('꧰', '꧹'), - ('꩐', '꩙'), - ('꯰', '꯹'), - ('0', '9'), - ('𐒠', '𐒩'), - ('𐴰', '𐴹'), - ('𐵀', '𐵉'), - ('𑁦', '𑁯'), - ('𑃰', '𑃹'), - ('𑄶', '𑄿'), - ('𑇐', '𑇙'), - ('𑋰', '𑋹'), - ('𑑐', '𑑙'), - ('𑓐', '𑓙'), - ('𑙐', '𑙙'), - ('𑛀', '𑛉'), - ('𑛐', '𑛣'), - ('𑜰', '𑜹'), - ('𑣠', '𑣩'), - ('𑥐', '𑥙'), - ('𑯰', '𑯹'), - ('𑱐', '𑱙'), - ('𑵐', '𑵙'), - ('𑶠', '𑶩'), - ('𑽐', '𑽙'), - ('𖄰', '𖄹'), - ('𖩠', '𖩩'), - ('𖫀', '𖫉'), - ('𖭐', '𖭙'), - ('𖵰', '𖵹'), - ('𜳰', '𜳹'), - ('𝟎', '𝟿'), - ('𞅀', '𞅉'), - ('𞋰', '𞋹'), - ('𞓰', '𞓹'), - ('𞗱', '𞗺'), - ('𞥐', '𞥙'), - ('🯰', '🯹'), -]; - -pub const ENCLOSING_MARK: &'static [(char, char)] = &[ - ('\u{488}', '\u{489}'), - ('\u{1abe}', '\u{1abe}'), - ('\u{20dd}', '\u{20e0}'), - ('\u{20e2}', '\u{20e4}'), - ('\u{a670}', '\u{a672}'), -]; - -pub const FINAL_PUNCTUATION: &'static [(char, char)] = &[ - ('»', '»'), - ('’', '’'), - ('”', '”'), - ('›', '›'), - ('⸃', '⸃'), - ('⸅', '⸅'), - ('⸊', '⸊'), - ('⸍', '⸍'), - ('⸝', '⸝'), - ('⸡', '⸡'), -]; - -pub const FORMAT: &'static [(char, char)] = &[ - ('\u{ad}', '\u{ad}'), - ('\u{600}', '\u{605}'), - ('\u{61c}', '\u{61c}'), - ('\u{6dd}', '\u{6dd}'), - ('\u{70f}', '\u{70f}'), - ('\u{890}', '\u{891}'), - ('\u{8e2}', '\u{8e2}'), - ('\u{180e}', '\u{180e}'), - ('\u{200b}', '\u{200f}'), - ('\u{202a}', '\u{202e}'), - ('\u{2060}', '\u{2064}'), - ('\u{2066}', '\u{206f}'), - ('\u{feff}', '\u{feff}'), - ('\u{fff9}', '\u{fffb}'), - ('\u{110bd}', '\u{110bd}'), - ('\u{110cd}', '\u{110cd}'), - ('\u{13430}', '\u{1343f}'), - ('\u{1bca0}', '\u{1bca3}'), - ('\u{1d173}', '\u{1d17a}'), - ('\u{e0001}', '\u{e0001}'), - ('\u{e0020}', '\u{e007f}'), -]; - -pub const INITIAL_PUNCTUATION: &'static [(char, char)] = &[ - ('«', '«'), - ('‘', '‘'), - ('‛', '“'), - ('‟', '‟'), - ('‹', '‹'), - ('⸂', '⸂'), - ('⸄', '⸄'), - ('⸉', '⸉'), - ('⸌', '⸌'), - ('⸜', '⸜'), - ('⸠', '⸠'), -]; - -pub const LETTER: &'static [(char, char)] = &[ - ('A', 'Z'), - ('a', 'z'), - ('ª', 'ª'), - ('µ', 'µ'), - ('º', 'º'), - ('À', 'Ö'), - ('Ø', 'ö'), - ('ø', 'ˁ'), - ('ˆ', 'ˑ'), - ('ˠ', 'ˤ'), - ('ˬ', 'ˬ'), - ('ˮ', 'ˮ'), - ('Ͱ', 'ʹ'), - ('Ͷ', 'ͷ'), - ('ͺ', 'ͽ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ϵ'), - ('Ϸ', 'ҁ'), - ('Ҋ', 'ԯ'), - ('Ա', 'Ֆ'), - ('ՙ', 'ՙ'), - ('ՠ', 'ֈ'), - ('א', 'ת'), - ('ׯ', 'ײ'), - ('ؠ', 'ي'), - ('ٮ', 'ٯ'), - ('ٱ', 'ۓ'), - ('ە', 'ە'), - ('ۥ', 'ۦ'), - ('ۮ', 'ۯ'), - ('ۺ', 'ۼ'), - ('ۿ', 'ۿ'), - ('ܐ', 'ܐ'), - ('ܒ', 'ܯ'), - ('ݍ', 'ޥ'), - ('ޱ', 'ޱ'), - ('ߊ', 'ߪ'), - ('ߴ', 'ߵ'), - ('ߺ', 'ߺ'), - ('ࠀ', 'ࠕ'), - ('ࠚ', 'ࠚ'), - ('ࠤ', 'ࠤ'), - ('ࠨ', 'ࠨ'), - ('ࡀ', 'ࡘ'), - ('ࡠ', 'ࡪ'), - ('ࡰ', 'ࢇ'), - ('ࢉ', 'ࢎ'), - ('ࢠ', 'ࣉ'), - ('ऄ', 'ह'), - ('ऽ', 'ऽ'), - ('ॐ', 'ॐ'), - ('क़', 'ॡ'), - ('ॱ', 'ঀ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('ঽ', 'ঽ'), - ('ৎ', 'ৎ'), - ('ড়', 'ঢ়'), - ('য়', 'ৡ'), - ('ৰ', 'ৱ'), - ('ৼ', 'ৼ'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('ੲ', 'ੴ'), - ('અ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('ઽ', 'ઽ'), - ('ૐ', 'ૐ'), - ('ૠ', 'ૡ'), - ('ૹ', 'ૹ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('ଽ', 'ଽ'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', 'ୡ'), - ('ୱ', 'ୱ'), - ('ஃ', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'ஹ'), - ('ௐ', 'ௐ'), - ('అ', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'హ'), - ('ఽ', 'ఽ'), - ('ౘ', 'ౚ'), - ('ౝ', 'ౝ'), - ('ౠ', 'ౡ'), - ('ಀ', 'ಀ'), - ('ಅ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('ಽ', 'ಽ'), - ('ೝ', 'ೞ'), - ('ೠ', 'ೡ'), - ('ೱ', 'ೲ'), - ('ഄ', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', 'ഺ'), - ('ഽ', 'ഽ'), - ('ൎ', 'ൎ'), - ('ൔ', 'ൖ'), - ('ൟ', 'ൡ'), - ('ൺ', 'ൿ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('ก', 'ะ'), - ('า', 'ำ'), - ('เ', 'ๆ'), - ('ກ', 'ຂ'), - ('ຄ', 'ຄ'), - ('ຆ', 'ຊ'), - ('ຌ', 'ຣ'), - ('ລ', 'ລ'), - ('ວ', 'ະ'), - ('າ', 'ຳ'), - ('ຽ', 'ຽ'), - ('ເ', 'ໄ'), - ('ໆ', 'ໆ'), - ('ໜ', 'ໟ'), - ('ༀ', 'ༀ'), - ('ཀ', 'ཇ'), - ('ཉ', 'ཬ'), - ('ྈ', 'ྌ'), - ('က', 'ဪ'), - ('ဿ', 'ဿ'), - ('ၐ', 'ၕ'), - ('ၚ', 'ၝ'), - ('ၡ', 'ၡ'), - ('ၥ', 'ၦ'), - ('ၮ', 'ၰ'), - ('ၵ', 'ႁ'), - ('ႎ', 'ႎ'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ჺ'), - ('ჼ', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዖ'), - ('ዘ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ፚ'), - ('ᎀ', 'ᎏ'), - ('Ꭰ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('ᐁ', 'ᙬ'), - ('ᙯ', 'ᙿ'), - ('ᚁ', 'ᚚ'), - ('ᚠ', 'ᛪ'), - ('ᛱ', 'ᛸ'), - ('ᜀ', 'ᜑ'), - ('ᜟ', 'ᜱ'), - ('ᝀ', 'ᝑ'), - ('ᝠ', 'ᝬ'), - ('ᝮ', 'ᝰ'), - ('ក', 'ឳ'), - ('ៗ', 'ៗ'), - ('ៜ', 'ៜ'), - ('ᠠ', 'ᡸ'), - ('ᢀ', 'ᢄ'), - ('ᢇ', 'ᢨ'), - ('ᢪ', 'ᢪ'), - ('ᢰ', 'ᣵ'), - ('ᤀ', 'ᤞ'), - ('ᥐ', 'ᥭ'), - ('ᥰ', 'ᥴ'), - ('ᦀ', 'ᦫ'), - ('ᦰ', 'ᧉ'), - ('ᨀ', 'ᨖ'), - ('ᨠ', 'ᩔ'), - ('ᪧ', 'ᪧ'), - ('ᬅ', 'ᬳ'), - ('ᭅ', 'ᭌ'), - ('ᮃ', 'ᮠ'), - ('ᮮ', 'ᮯ'), - ('ᮺ', 'ᯥ'), - ('ᰀ', 'ᰣ'), - ('ᱍ', 'ᱏ'), - ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('ᳩ', 'ᳬ'), - ('ᳮ', 'ᳳ'), - ('ᳵ', 'ᳶ'), - ('ᳺ', 'ᳺ'), - ('ᴀ', 'ᶿ'), - ('Ḁ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ᾼ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῌ'), - ('ῐ', 'ΐ'), - ('ῖ', 'Ί'), - ('ῠ', 'Ῥ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῼ'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℊ', 'ℓ'), - ('ℕ', 'ℕ'), - ('ℙ', 'ℝ'), - ('ℤ', 'ℤ'), - ('Ω', 'Ω'), - ('ℨ', 'ℨ'), - ('K', 'ℭ'), - ('ℯ', 'ℹ'), - ('ℼ', 'ℿ'), - ('ⅅ', 'ⅉ'), - ('ⅎ', 'ⅎ'), - ('Ↄ', 'ↄ'), - ('Ⰰ', 'ⳤ'), - ('Ⳬ', 'ⳮ'), - ('Ⳳ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ⴰ', 'ⵧ'), - ('ⵯ', 'ⵯ'), - ('ⶀ', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('ⸯ', 'ⸯ'), - ('々', '〆'), - ('〱', '〵'), - ('〻', '〼'), - ('ぁ', 'ゖ'), - ('ゝ', 'ゟ'), - ('ァ', 'ヺ'), - ('ー', 'ヿ'), - ('ㄅ', 'ㄯ'), - ('ㄱ', 'ㆎ'), - ('ㆠ', 'ㆿ'), - ('ㇰ', 'ㇿ'), - ('㐀', '䶿'), - ('一', 'ꒌ'), - ('ꓐ', 'ꓽ'), - ('ꔀ', 'ꘌ'), - ('ꘐ', 'ꘟ'), - ('ꘪ', 'ꘫ'), - ('Ꙁ', 'ꙮ'), - ('ꙿ', 'ꚝ'), - ('ꚠ', 'ꛥ'), - ('ꜗ', 'ꜟ'), - ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'Ƛ'), - ('ꟲ', 'ꠁ'), - ('ꠃ', 'ꠅ'), - ('ꠇ', 'ꠊ'), - ('ꠌ', 'ꠢ'), - ('ꡀ', 'ꡳ'), - ('ꢂ', 'ꢳ'), - ('ꣲ', 'ꣷ'), - ('ꣻ', 'ꣻ'), - ('ꣽ', 'ꣾ'), - ('ꤊ', 'ꤥ'), - ('ꤰ', 'ꥆ'), - ('ꥠ', 'ꥼ'), - ('ꦄ', 'ꦲ'), - ('ꧏ', 'ꧏ'), - ('ꧠ', 'ꧤ'), - ('ꧦ', 'ꧯ'), - ('ꧺ', 'ꧾ'), - ('ꨀ', 'ꨨ'), - ('ꩀ', 'ꩂ'), - ('ꩄ', 'ꩋ'), - ('ꩠ', 'ꩶ'), - ('ꩺ', 'ꩺ'), - ('ꩾ', 'ꪯ'), - ('ꪱ', 'ꪱ'), - ('ꪵ', 'ꪶ'), - ('ꪹ', 'ꪽ'), - ('ꫀ', 'ꫀ'), - ('ꫂ', 'ꫂ'), - ('ꫛ', 'ꫝ'), - ('ꫠ', 'ꫪ'), - ('ꫲ', 'ꫴ'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('ꬰ', 'ꭚ'), - ('ꭜ', 'ꭩ'), - ('ꭰ', 'ꯢ'), - ('가', '힣'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('豈', '舘'), - ('並', '龎'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('יִ', 'יִ'), - ('ײַ', 'ﬨ'), - ('שׁ', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', 'ﮱ'), - ('ﯓ', 'ﴽ'), - ('ﵐ', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('ﷰ', 'ﷻ'), - ('ﹰ', 'ﹴ'), - ('ﹶ', 'ﻼ'), - ('A', 'Z'), - ('a', 'z'), - ('ヲ', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), - ('𐊀', '𐊜'), - ('𐊠', '𐋐'), - ('𐌀', '𐌟'), - ('𐌭', '𐍀'), - ('𐍂', '𐍉'), - ('𐍐', '𐍵'), - ('𐎀', '𐎝'), - ('𐎠', '𐏃'), - ('𐏈', '𐏏'), - ('𐐀', '𐒝'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), - ('𐔀', '𐔧'), - ('𐔰', '𐕣'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐗀', '𐗳'), - ('𐘀', '𐜶'), - ('𐝀', '𐝕'), - ('𐝠', '𐝧'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐠀', '𐠅'), - ('𐠈', '𐠈'), - ('𐠊', '𐠵'), - ('𐠷', '𐠸'), - ('𐠼', '𐠼'), - ('𐠿', '𐡕'), - ('𐡠', '𐡶'), - ('𐢀', '𐢞'), - ('𐣠', '𐣲'), - ('𐣴', '𐣵'), - ('𐤀', '𐤕'), - ('𐤠', '𐤹'), - ('𐦀', '𐦷'), - ('𐦾', '𐦿'), - ('𐨀', '𐨀'), - ('𐨐', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨵'), - ('𐩠', '𐩼'), - ('𐪀', '𐪜'), - ('𐫀', '𐫇'), - ('𐫉', '𐫤'), - ('𐬀', '𐬵'), - ('𐭀', '𐭕'), - ('𐭠', '𐭲'), - ('𐮀', '𐮑'), - ('𐰀', '𐱈'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐴀', '𐴣'), - ('𐵊', '𐵥'), - ('𐵯', '𐶅'), - ('𐺀', '𐺩'), - ('𐺰', '𐺱'), - ('𐻂', '𐻄'), - ('𐼀', '𐼜'), - ('𐼧', '𐼧'), - ('𐼰', '𐽅'), - ('𐽰', '𐾁'), - ('𐾰', '𐿄'), - ('𐿠', '𐿶'), - ('𑀃', '𑀷'), - ('𑁱', '𑁲'), - ('𑁵', '𑁵'), - ('𑂃', '𑂯'), - ('𑃐', '𑃨'), - ('𑄃', '𑄦'), - ('𑅄', '𑅄'), - ('𑅇', '𑅇'), - ('𑅐', '𑅲'), - ('𑅶', '𑅶'), - ('𑆃', '𑆲'), - ('𑇁', '𑇄'), - ('𑇚', '𑇚'), - ('𑇜', '𑇜'), - ('𑈀', '𑈑'), - ('𑈓', '𑈫'), - ('𑈿', '𑉀'), - ('𑊀', '𑊆'), - ('𑊈', '𑊈'), - ('𑊊', '𑊍'), - ('𑊏', '𑊝'), - ('𑊟', '𑊨'), - ('𑊰', '𑋞'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('𑌽', '𑌽'), - ('𑍐', '𑍐'), - ('𑍝', '𑍡'), - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '𑎷'), - ('𑏑', '𑏑'), - ('𑏓', '𑏓'), - ('𑐀', '𑐴'), - ('𑑇', '𑑊'), - ('𑑟', '𑑡'), - ('𑒀', '𑒯'), - ('𑓄', '𑓅'), - ('𑓇', '𑓇'), - ('𑖀', '𑖮'), - ('𑗘', '𑗛'), - ('𑘀', '𑘯'), - ('𑙄', '𑙄'), - ('𑚀', '𑚪'), - ('𑚸', '𑚸'), - ('𑜀', '𑜚'), - ('𑝀', '𑝆'), - ('𑠀', '𑠫'), - ('𑢠', '𑣟'), - ('𑣿', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤯'), - ('𑤿', '𑤿'), - ('𑥁', '𑥁'), - ('𑦠', '𑦧'), - ('𑦪', '𑧐'), - ('𑧡', '𑧡'), - ('𑧣', '𑧣'), - ('𑨀', '𑨀'), - ('𑨋', '𑨲'), - ('𑨺', '𑨺'), - ('𑩐', '𑩐'), - ('𑩜', '𑪉'), - ('𑪝', '𑪝'), - ('𑪰', '𑫸'), - ('𑯀', '𑯠'), - ('𑰀', '𑰈'), - ('𑰊', '𑰮'), - ('𑱀', '𑱀'), - ('𑱲', '𑲏'), - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '𑴰'), - ('𑵆', '𑵆'), - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶉'), - ('𑶘', '𑶘'), - ('𑻠', '𑻲'), - ('𑼂', '𑼂'), - ('𑼄', '𑼐'), - ('𑼒', '𑼳'), - ('𑾰', '𑾰'), - ('𒀀', '𒎙'), - ('𒒀', '𒕃'), - ('𒾐', '𒿰'), - ('𓀀', '𓐯'), - ('𓑁', '𓑆'), - ('𓑠', '𔏺'), - ('𔐀', '𔙆'), - ('𖄀', '𖄝'), - ('𖠀', '𖨸'), - ('𖩀', '𖩞'), - ('𖩰', '𖪾'), - ('𖫐', '𖫭'), - ('𖬀', '𖬯'), - ('𖭀', '𖭃'), - ('𖭣', '𖭷'), - ('𖭽', '𖮏'), - ('𖵀', '𖵬'), - ('𖹀', '𖹿'), - ('𖼀', '𖽊'), - ('𖽐', '𖽐'), - ('𖾓', '𖾟'), - ('𖿠', '𖿡'), - ('𖿣', '𖿣'), - ('𗀀', '𘟷'), - ('𘠀', '𘳕'), - ('𘳿', '𘴈'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𛀀', '𛄢'), - ('𛄲', '𛄲'), - ('𛅐', '𛅒'), - ('𛅕', '𛅕'), - ('𛅤', '𛅧'), - ('𛅰', '𛋻'), - ('𛰀', '𛱪'), - ('𛱰', '𛱼'), - ('𛲀', '𛲈'), - ('𛲐', '𛲙'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝛀'), - ('𝛂', '𝛚'), - ('𝛜', '𝛺'), - ('𝛼', '𝜔'), - ('𝜖', '𝜴'), - ('𝜶', '𝝎'), - ('𝝐', '𝝮'), - ('𝝰', '𝞈'), - ('𝞊', '𝞨'), - ('𝞪', '𝟂'), - ('𝟄', '𝟋'), - ('𝼀', '𝼞'), - ('𝼥', '𝼪'), - ('𞀰', '𞁭'), - ('𞄀', '𞄬'), - ('𞄷', '𞄽'), - ('𞅎', '𞅎'), - ('𞊐', '𞊭'), - ('𞋀', '𞋫'), - ('𞓐', '𞓫'), - ('𞗐', '𞗭'), - ('𞗰', '𞗰'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), - ('𞠀', '𞣄'), - ('𞤀', '𞥃'), - ('𞥋', '𞥋'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('丽', '𪘀'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), -]; - -pub const LETTER_NUMBER: &'static [(char, char)] = &[ - ('ᛮ', 'ᛰ'), - ('Ⅰ', 'ↂ'), - ('ↅ', 'ↈ'), - ('〇', '〇'), - ('〡', '〩'), - ('〸', '〺'), - ('ꛦ', 'ꛯ'), - ('𐅀', '𐅴'), - ('𐍁', '𐍁'), - ('𐍊', '𐍊'), - ('𐏑', '𐏕'), - ('𒐀', '𒑮'), -]; - -pub const LINE_SEPARATOR: &'static [(char, char)] = - &[('\u{2028}', '\u{2028}')]; - -pub const LOWERCASE_LETTER: &'static [(char, char)] = &[ - ('a', 'z'), - ('µ', 'µ'), - ('ß', 'ö'), - ('ø', 'ÿ'), - ('ā', 'ā'), - ('ă', 'ă'), - ('ą', 'ą'), - ('ć', 'ć'), - ('ĉ', 'ĉ'), - ('ċ', 'ċ'), - ('č', 'č'), - ('ď', 'ď'), - ('đ', 'đ'), - ('ē', 'ē'), - ('ĕ', 'ĕ'), - ('ė', 'ė'), - ('ę', 'ę'), - ('ě', 'ě'), - ('ĝ', 'ĝ'), - ('ğ', 'ğ'), - ('ġ', 'ġ'), - ('ģ', 'ģ'), - ('ĥ', 'ĥ'), - ('ħ', 'ħ'), - ('ĩ', 'ĩ'), - ('ī', 'ī'), - ('ĭ', 'ĭ'), - ('į', 'į'), - ('ı', 'ı'), - ('ij', 'ij'), - ('ĵ', 'ĵ'), - ('ķ', 'ĸ'), - ('ĺ', 'ĺ'), - ('ļ', 'ļ'), - ('ľ', 'ľ'), - ('ŀ', 'ŀ'), - ('ł', 'ł'), - ('ń', 'ń'), - ('ņ', 'ņ'), - ('ň', 'ʼn'), - ('ŋ', 'ŋ'), - ('ō', 'ō'), - ('ŏ', 'ŏ'), - ('ő', 'ő'), - ('œ', 'œ'), - ('ŕ', 'ŕ'), - ('ŗ', 'ŗ'), - ('ř', 'ř'), - ('ś', 'ś'), - ('ŝ', 'ŝ'), - ('ş', 'ş'), - ('š', 'š'), - ('ţ', 'ţ'), - ('ť', 'ť'), - ('ŧ', 'ŧ'), - ('ũ', 'ũ'), - ('ū', 'ū'), - ('ŭ', 'ŭ'), - ('ů', 'ů'), - ('ű', 'ű'), - ('ų', 'ų'), - ('ŵ', 'ŵ'), - ('ŷ', 'ŷ'), - ('ź', 'ź'), - ('ż', 'ż'), - ('ž', 'ƀ'), - ('ƃ', 'ƃ'), - ('ƅ', 'ƅ'), - ('ƈ', 'ƈ'), - ('ƌ', 'ƍ'), - ('ƒ', 'ƒ'), - ('ƕ', 'ƕ'), - ('ƙ', 'ƛ'), - ('ƞ', 'ƞ'), - ('ơ', 'ơ'), - ('ƣ', 'ƣ'), - ('ƥ', 'ƥ'), - ('ƨ', 'ƨ'), - ('ƪ', 'ƫ'), - ('ƭ', 'ƭ'), - ('ư', 'ư'), - ('ƴ', 'ƴ'), - ('ƶ', 'ƶ'), - ('ƹ', 'ƺ'), - ('ƽ', 'ƿ'), - ('dž', 'dž'), - ('lj', 'lj'), - ('nj', 'nj'), - ('ǎ', 'ǎ'), - ('ǐ', 'ǐ'), - ('ǒ', 'ǒ'), - ('ǔ', 'ǔ'), - ('ǖ', 'ǖ'), - ('ǘ', 'ǘ'), - ('ǚ', 'ǚ'), - ('ǜ', 'ǝ'), - ('ǟ', 'ǟ'), - ('ǡ', 'ǡ'), - ('ǣ', 'ǣ'), - ('ǥ', 'ǥ'), - ('ǧ', 'ǧ'), - ('ǩ', 'ǩ'), - ('ǫ', 'ǫ'), - ('ǭ', 'ǭ'), - ('ǯ', 'ǰ'), - ('dz', 'dz'), - ('ǵ', 'ǵ'), - ('ǹ', 'ǹ'), - ('ǻ', 'ǻ'), - ('ǽ', 'ǽ'), - ('ǿ', 'ǿ'), - ('ȁ', 'ȁ'), - ('ȃ', 'ȃ'), - ('ȅ', 'ȅ'), - ('ȇ', 'ȇ'), - ('ȉ', 'ȉ'), - ('ȋ', 'ȋ'), - ('ȍ', 'ȍ'), - ('ȏ', 'ȏ'), - ('ȑ', 'ȑ'), - ('ȓ', 'ȓ'), - ('ȕ', 'ȕ'), - ('ȗ', 'ȗ'), - ('ș', 'ș'), - ('ț', 'ț'), - ('ȝ', 'ȝ'), - ('ȟ', 'ȟ'), - ('ȡ', 'ȡ'), - ('ȣ', 'ȣ'), - ('ȥ', 'ȥ'), - ('ȧ', 'ȧ'), - ('ȩ', 'ȩ'), - ('ȫ', 'ȫ'), - ('ȭ', 'ȭ'), - ('ȯ', 'ȯ'), - ('ȱ', 'ȱ'), - ('ȳ', 'ȹ'), - ('ȼ', 'ȼ'), - ('ȿ', 'ɀ'), - ('ɂ', 'ɂ'), - ('ɇ', 'ɇ'), - ('ɉ', 'ɉ'), - ('ɋ', 'ɋ'), - ('ɍ', 'ɍ'), - ('ɏ', 'ʓ'), - ('ʕ', 'ʯ'), - ('ͱ', 'ͱ'), - ('ͳ', 'ͳ'), - ('ͷ', 'ͷ'), - ('ͻ', 'ͽ'), - ('ΐ', 'ΐ'), - ('ά', 'ώ'), - ('ϐ', 'ϑ'), - ('ϕ', 'ϗ'), - ('ϙ', 'ϙ'), - ('ϛ', 'ϛ'), - ('ϝ', 'ϝ'), - ('ϟ', 'ϟ'), - ('ϡ', 'ϡ'), - ('ϣ', 'ϣ'), - ('ϥ', 'ϥ'), - ('ϧ', 'ϧ'), - ('ϩ', 'ϩ'), - ('ϫ', 'ϫ'), - ('ϭ', 'ϭ'), - ('ϯ', 'ϳ'), - ('ϵ', 'ϵ'), - ('ϸ', 'ϸ'), - ('ϻ', 'ϼ'), - ('а', 'џ'), - ('ѡ', 'ѡ'), - ('ѣ', 'ѣ'), - ('ѥ', 'ѥ'), - ('ѧ', 'ѧ'), - ('ѩ', 'ѩ'), - ('ѫ', 'ѫ'), - ('ѭ', 'ѭ'), - ('ѯ', 'ѯ'), - ('ѱ', 'ѱ'), - ('ѳ', 'ѳ'), - ('ѵ', 'ѵ'), - ('ѷ', 'ѷ'), - ('ѹ', 'ѹ'), - ('ѻ', 'ѻ'), - ('ѽ', 'ѽ'), - ('ѿ', 'ѿ'), - ('ҁ', 'ҁ'), - ('ҋ', 'ҋ'), - ('ҍ', 'ҍ'), - ('ҏ', 'ҏ'), - ('ґ', 'ґ'), - ('ғ', 'ғ'), - ('ҕ', 'ҕ'), - ('җ', 'җ'), - ('ҙ', 'ҙ'), - ('қ', 'қ'), - ('ҝ', 'ҝ'), - ('ҟ', 'ҟ'), - ('ҡ', 'ҡ'), - ('ң', 'ң'), - ('ҥ', 'ҥ'), - ('ҧ', 'ҧ'), - ('ҩ', 'ҩ'), - ('ҫ', 'ҫ'), - ('ҭ', 'ҭ'), - ('ү', 'ү'), - ('ұ', 'ұ'), - ('ҳ', 'ҳ'), - ('ҵ', 'ҵ'), - ('ҷ', 'ҷ'), - ('ҹ', 'ҹ'), - ('һ', 'һ'), - ('ҽ', 'ҽ'), - ('ҿ', 'ҿ'), - ('ӂ', 'ӂ'), - ('ӄ', 'ӄ'), - ('ӆ', 'ӆ'), - ('ӈ', 'ӈ'), - ('ӊ', 'ӊ'), - ('ӌ', 'ӌ'), - ('ӎ', 'ӏ'), - ('ӑ', 'ӑ'), - ('ӓ', 'ӓ'), - ('ӕ', 'ӕ'), - ('ӗ', 'ӗ'), - ('ә', 'ә'), - ('ӛ', 'ӛ'), - ('ӝ', 'ӝ'), - ('ӟ', 'ӟ'), - ('ӡ', 'ӡ'), - ('ӣ', 'ӣ'), - ('ӥ', 'ӥ'), - ('ӧ', 'ӧ'), - ('ө', 'ө'), - ('ӫ', 'ӫ'), - ('ӭ', 'ӭ'), - ('ӯ', 'ӯ'), - ('ӱ', 'ӱ'), - ('ӳ', 'ӳ'), - ('ӵ', 'ӵ'), - ('ӷ', 'ӷ'), - ('ӹ', 'ӹ'), - ('ӻ', 'ӻ'), - ('ӽ', 'ӽ'), - ('ӿ', 'ӿ'), - ('ԁ', 'ԁ'), - ('ԃ', 'ԃ'), - ('ԅ', 'ԅ'), - ('ԇ', 'ԇ'), - ('ԉ', 'ԉ'), - ('ԋ', 'ԋ'), - ('ԍ', 'ԍ'), - ('ԏ', 'ԏ'), - ('ԑ', 'ԑ'), - ('ԓ', 'ԓ'), - ('ԕ', 'ԕ'), - ('ԗ', 'ԗ'), - ('ԙ', 'ԙ'), - ('ԛ', 'ԛ'), - ('ԝ', 'ԝ'), - ('ԟ', 'ԟ'), - ('ԡ', 'ԡ'), - ('ԣ', 'ԣ'), - ('ԥ', 'ԥ'), - ('ԧ', 'ԧ'), - ('ԩ', 'ԩ'), - ('ԫ', 'ԫ'), - ('ԭ', 'ԭ'), - ('ԯ', 'ԯ'), - ('ՠ', 'ֈ'), - ('ა', 'ჺ'), - ('ჽ', 'ჿ'), - ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲈ'), - ('ᲊ', 'ᲊ'), - ('ᴀ', 'ᴫ'), - ('ᵫ', 'ᵷ'), - ('ᵹ', 'ᶚ'), - ('ḁ', 'ḁ'), - ('ḃ', 'ḃ'), - ('ḅ', 'ḅ'), - ('ḇ', 'ḇ'), - ('ḉ', 'ḉ'), - ('ḋ', 'ḋ'), - ('ḍ', 'ḍ'), - ('ḏ', 'ḏ'), - ('ḑ', 'ḑ'), - ('ḓ', 'ḓ'), - ('ḕ', 'ḕ'), - ('ḗ', 'ḗ'), - ('ḙ', 'ḙ'), - ('ḛ', 'ḛ'), - ('ḝ', 'ḝ'), - ('ḟ', 'ḟ'), - ('ḡ', 'ḡ'), - ('ḣ', 'ḣ'), - ('ḥ', 'ḥ'), - ('ḧ', 'ḧ'), - ('ḩ', 'ḩ'), - ('ḫ', 'ḫ'), - ('ḭ', 'ḭ'), - ('ḯ', 'ḯ'), - ('ḱ', 'ḱ'), - ('ḳ', 'ḳ'), - ('ḵ', 'ḵ'), - ('ḷ', 'ḷ'), - ('ḹ', 'ḹ'), - ('ḻ', 'ḻ'), - ('ḽ', 'ḽ'), - ('ḿ', 'ḿ'), - ('ṁ', 'ṁ'), - ('ṃ', 'ṃ'), - ('ṅ', 'ṅ'), - ('ṇ', 'ṇ'), - ('ṉ', 'ṉ'), - ('ṋ', 'ṋ'), - ('ṍ', 'ṍ'), - ('ṏ', 'ṏ'), - ('ṑ', 'ṑ'), - ('ṓ', 'ṓ'), - ('ṕ', 'ṕ'), - ('ṗ', 'ṗ'), - ('ṙ', 'ṙ'), - ('ṛ', 'ṛ'), - ('ṝ', 'ṝ'), - ('ṟ', 'ṟ'), - ('ṡ', 'ṡ'), - ('ṣ', 'ṣ'), - ('ṥ', 'ṥ'), - ('ṧ', 'ṧ'), - ('ṩ', 'ṩ'), - ('ṫ', 'ṫ'), - ('ṭ', 'ṭ'), - ('ṯ', 'ṯ'), - ('ṱ', 'ṱ'), - ('ṳ', 'ṳ'), - ('ṵ', 'ṵ'), - ('ṷ', 'ṷ'), - ('ṹ', 'ṹ'), - ('ṻ', 'ṻ'), - ('ṽ', 'ṽ'), - ('ṿ', 'ṿ'), - ('ẁ', 'ẁ'), - ('ẃ', 'ẃ'), - ('ẅ', 'ẅ'), - ('ẇ', 'ẇ'), - ('ẉ', 'ẉ'), - ('ẋ', 'ẋ'), - ('ẍ', 'ẍ'), - ('ẏ', 'ẏ'), - ('ẑ', 'ẑ'), - ('ẓ', 'ẓ'), - ('ẕ', 'ẝ'), - ('ẟ', 'ẟ'), - ('ạ', 'ạ'), - ('ả', 'ả'), - ('ấ', 'ấ'), - ('ầ', 'ầ'), - ('ẩ', 'ẩ'), - ('ẫ', 'ẫ'), - ('ậ', 'ậ'), - ('ắ', 'ắ'), - ('ằ', 'ằ'), - ('ẳ', 'ẳ'), - ('ẵ', 'ẵ'), - ('ặ', 'ặ'), - ('ẹ', 'ẹ'), - ('ẻ', 'ẻ'), - ('ẽ', 'ẽ'), - ('ế', 'ế'), - ('ề', 'ề'), - ('ể', 'ể'), - ('ễ', 'ễ'), - ('ệ', 'ệ'), - ('ỉ', 'ỉ'), - ('ị', 'ị'), - ('ọ', 'ọ'), - ('ỏ', 'ỏ'), - ('ố', 'ố'), - ('ồ', 'ồ'), - ('ổ', 'ổ'), - ('ỗ', 'ỗ'), - ('ộ', 'ộ'), - ('ớ', 'ớ'), - ('ờ', 'ờ'), - ('ở', 'ở'), - ('ỡ', 'ỡ'), - ('ợ', 'ợ'), - ('ụ', 'ụ'), - ('ủ', 'ủ'), - ('ứ', 'ứ'), - ('ừ', 'ừ'), - ('ử', 'ử'), - ('ữ', 'ữ'), - ('ự', 'ự'), - ('ỳ', 'ỳ'), - ('ỵ', 'ỵ'), - ('ỷ', 'ỷ'), - ('ỹ', 'ỹ'), - ('ỻ', 'ỻ'), - ('ỽ', 'ỽ'), - ('ỿ', 'ἇ'), - ('ἐ', 'ἕ'), - ('ἠ', 'ἧ'), - ('ἰ', 'ἷ'), - ('ὀ', 'ὅ'), - ('ὐ', 'ὗ'), - ('ὠ', 'ὧ'), - ('ὰ', 'ώ'), - ('ᾀ', 'ᾇ'), - ('ᾐ', 'ᾗ'), - ('ᾠ', 'ᾧ'), - ('ᾰ', 'ᾴ'), - ('ᾶ', 'ᾷ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῇ'), - ('ῐ', 'ΐ'), - ('ῖ', 'ῗ'), - ('ῠ', 'ῧ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῷ'), - ('ℊ', 'ℊ'), - ('ℎ', 'ℏ'), - ('ℓ', 'ℓ'), - ('ℯ', 'ℯ'), - ('ℴ', 'ℴ'), - ('ℹ', 'ℹ'), - ('ℼ', 'ℽ'), - ('ⅆ', 'ⅉ'), - ('ⅎ', 'ⅎ'), - ('ↄ', 'ↄ'), - ('ⰰ', 'ⱟ'), - ('ⱡ', 'ⱡ'), - ('ⱥ', 'ⱦ'), - ('ⱨ', 'ⱨ'), - ('ⱪ', 'ⱪ'), - ('ⱬ', 'ⱬ'), - ('ⱱ', 'ⱱ'), - ('ⱳ', 'ⱴ'), - ('ⱶ', 'ⱻ'), - ('ⲁ', 'ⲁ'), - ('ⲃ', 'ⲃ'), - ('ⲅ', 'ⲅ'), - ('ⲇ', 'ⲇ'), - ('ⲉ', 'ⲉ'), - ('ⲋ', 'ⲋ'), - ('ⲍ', 'ⲍ'), - ('ⲏ', 'ⲏ'), - ('ⲑ', 'ⲑ'), - ('ⲓ', 'ⲓ'), - ('ⲕ', 'ⲕ'), - ('ⲗ', 'ⲗ'), - ('ⲙ', 'ⲙ'), - ('ⲛ', 'ⲛ'), - ('ⲝ', 'ⲝ'), - ('ⲟ', 'ⲟ'), - ('ⲡ', 'ⲡ'), - ('ⲣ', 'ⲣ'), - ('ⲥ', 'ⲥ'), - ('ⲧ', 'ⲧ'), - ('ⲩ', 'ⲩ'), - ('ⲫ', 'ⲫ'), - ('ⲭ', 'ⲭ'), - ('ⲯ', 'ⲯ'), - ('ⲱ', 'ⲱ'), - ('ⲳ', 'ⲳ'), - ('ⲵ', 'ⲵ'), - ('ⲷ', 'ⲷ'), - ('ⲹ', 'ⲹ'), - ('ⲻ', 'ⲻ'), - ('ⲽ', 'ⲽ'), - ('ⲿ', 'ⲿ'), - ('ⳁ', 'ⳁ'), - ('ⳃ', 'ⳃ'), - ('ⳅ', 'ⳅ'), - ('ⳇ', 'ⳇ'), - ('ⳉ', 'ⳉ'), - ('ⳋ', 'ⳋ'), - ('ⳍ', 'ⳍ'), - ('ⳏ', 'ⳏ'), - ('ⳑ', 'ⳑ'), - ('ⳓ', 'ⳓ'), - ('ⳕ', 'ⳕ'), - ('ⳗ', 'ⳗ'), - ('ⳙ', 'ⳙ'), - ('ⳛ', 'ⳛ'), - ('ⳝ', 'ⳝ'), - ('ⳟ', 'ⳟ'), - ('ⳡ', 'ⳡ'), - ('ⳣ', 'ⳤ'), - ('ⳬ', 'ⳬ'), - ('ⳮ', 'ⳮ'), - ('ⳳ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ꙁ', 'ꙁ'), - ('ꙃ', 'ꙃ'), - ('ꙅ', 'ꙅ'), - ('ꙇ', 'ꙇ'), - ('ꙉ', 'ꙉ'), - ('ꙋ', 'ꙋ'), - ('ꙍ', 'ꙍ'), - ('ꙏ', 'ꙏ'), - ('ꙑ', 'ꙑ'), - ('ꙓ', 'ꙓ'), - ('ꙕ', 'ꙕ'), - ('ꙗ', 'ꙗ'), - ('ꙙ', 'ꙙ'), - ('ꙛ', 'ꙛ'), - ('ꙝ', 'ꙝ'), - ('ꙟ', 'ꙟ'), - ('ꙡ', 'ꙡ'), - ('ꙣ', 'ꙣ'), - ('ꙥ', 'ꙥ'), - ('ꙧ', 'ꙧ'), - ('ꙩ', 'ꙩ'), - ('ꙫ', 'ꙫ'), - ('ꙭ', 'ꙭ'), - ('ꚁ', 'ꚁ'), - ('ꚃ', 'ꚃ'), - ('ꚅ', 'ꚅ'), - ('ꚇ', 'ꚇ'), - ('ꚉ', 'ꚉ'), - ('ꚋ', 'ꚋ'), - ('ꚍ', 'ꚍ'), - ('ꚏ', 'ꚏ'), - ('ꚑ', 'ꚑ'), - ('ꚓ', 'ꚓ'), - ('ꚕ', 'ꚕ'), - ('ꚗ', 'ꚗ'), - ('ꚙ', 'ꚙ'), - ('ꚛ', 'ꚛ'), - ('ꜣ', 'ꜣ'), - ('ꜥ', 'ꜥ'), - ('ꜧ', 'ꜧ'), - ('ꜩ', 'ꜩ'), - ('ꜫ', 'ꜫ'), - ('ꜭ', 'ꜭ'), - ('ꜯ', 'ꜱ'), - ('ꜳ', 'ꜳ'), - ('ꜵ', 'ꜵ'), - ('ꜷ', 'ꜷ'), - ('ꜹ', 'ꜹ'), - ('ꜻ', 'ꜻ'), - ('ꜽ', 'ꜽ'), - ('ꜿ', 'ꜿ'), - ('ꝁ', 'ꝁ'), - ('ꝃ', 'ꝃ'), - ('ꝅ', 'ꝅ'), - ('ꝇ', 'ꝇ'), - ('ꝉ', 'ꝉ'), - ('ꝋ', 'ꝋ'), - ('ꝍ', 'ꝍ'), - ('ꝏ', 'ꝏ'), - ('ꝑ', 'ꝑ'), - ('ꝓ', 'ꝓ'), - ('ꝕ', 'ꝕ'), - ('ꝗ', 'ꝗ'), - ('ꝙ', 'ꝙ'), - ('ꝛ', 'ꝛ'), - ('ꝝ', 'ꝝ'), - ('ꝟ', 'ꝟ'), - ('ꝡ', 'ꝡ'), - ('ꝣ', 'ꝣ'), - ('ꝥ', 'ꝥ'), - ('ꝧ', 'ꝧ'), - ('ꝩ', 'ꝩ'), - ('ꝫ', 'ꝫ'), - ('ꝭ', 'ꝭ'), - ('ꝯ', 'ꝯ'), - ('ꝱ', 'ꝸ'), - ('ꝺ', 'ꝺ'), - ('ꝼ', 'ꝼ'), - ('ꝿ', 'ꝿ'), - ('ꞁ', 'ꞁ'), - ('ꞃ', 'ꞃ'), - ('ꞅ', 'ꞅ'), - ('ꞇ', 'ꞇ'), - ('ꞌ', 'ꞌ'), - ('ꞎ', 'ꞎ'), - ('ꞑ', 'ꞑ'), - ('ꞓ', 'ꞕ'), - ('ꞗ', 'ꞗ'), - ('ꞙ', 'ꞙ'), - ('ꞛ', 'ꞛ'), - ('ꞝ', 'ꞝ'), - ('ꞟ', 'ꞟ'), - ('ꞡ', 'ꞡ'), - ('ꞣ', 'ꞣ'), - ('ꞥ', 'ꞥ'), - ('ꞧ', 'ꞧ'), - ('ꞩ', 'ꞩ'), - ('ꞯ', 'ꞯ'), - ('ꞵ', 'ꞵ'), - ('ꞷ', 'ꞷ'), - ('ꞹ', 'ꞹ'), - ('ꞻ', 'ꞻ'), - ('ꞽ', 'ꞽ'), - ('ꞿ', 'ꞿ'), - ('ꟁ', 'ꟁ'), - ('ꟃ', 'ꟃ'), - ('ꟈ', 'ꟈ'), - ('ꟊ', 'ꟊ'), - ('ꟍ', 'ꟍ'), - ('ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟕ'), - ('ꟗ', 'ꟗ'), - ('ꟙ', 'ꟙ'), - ('ꟛ', 'ꟛ'), - ('ꟶ', 'ꟶ'), - ('ꟺ', 'ꟺ'), - ('ꬰ', 'ꭚ'), - ('ꭠ', 'ꭨ'), - ('ꭰ', 'ꮿ'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('a', 'z'), - ('𐐨', '𐑏'), - ('𐓘', '𐓻'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐳀', '𐳲'), - ('𐵰', '𐶅'), - ('𑣀', '𑣟'), - ('𖹠', '𖹿'), - ('𝐚', '𝐳'), - ('𝑎', '𝑔'), - ('𝑖', '𝑧'), - ('𝒂', '𝒛'), - ('𝒶', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝓏'), - ('𝓪', '𝔃'), - ('𝔞', '𝔷'), - ('𝕒', '𝕫'), - ('𝖆', '𝖟'), - ('𝖺', '𝗓'), - ('𝗮', '𝘇'), - ('𝘢', '𝘻'), - ('𝙖', '𝙯'), - ('𝚊', '𝚥'), - ('𝛂', '𝛚'), - ('𝛜', '𝛡'), - ('𝛼', '𝜔'), - ('𝜖', '𝜛'), - ('𝜶', '𝝎'), - ('𝝐', '𝝕'), - ('𝝰', '𝞈'), - ('𝞊', '𝞏'), - ('𝞪', '𝟂'), - ('𝟄', '𝟉'), - ('𝟋', '𝟋'), - ('𝼀', '𝼉'), - ('𝼋', '𝼞'), - ('𝼥', '𝼪'), - ('𞤢', '𞥃'), -]; - -pub const MARK: &'static [(char, char)] = &[ - ('\u{300}', '\u{36f}'), - ('\u{483}', '\u{489}'), - ('\u{591}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c5}'), - ('\u{5c7}', '\u{5c7}'), - ('\u{610}', '\u{61a}'), - ('\u{64b}', '\u{65f}'), - ('\u{670}', '\u{670}'), - ('\u{6d6}', '\u{6dc}'), - ('\u{6df}', '\u{6e4}'), - ('\u{6e7}', '\u{6e8}'), - ('\u{6ea}', '\u{6ed}'), - ('\u{711}', '\u{711}'), - ('\u{730}', '\u{74a}'), - ('\u{7a6}', '\u{7b0}'), - ('\u{7eb}', '\u{7f3}'), - ('\u{7fd}', '\u{7fd}'), - ('\u{816}', '\u{819}'), - ('\u{81b}', '\u{823}'), - ('\u{825}', '\u{827}'), - ('\u{829}', '\u{82d}'), - ('\u{859}', '\u{85b}'), - ('\u{897}', '\u{89f}'), - ('\u{8ca}', '\u{8e1}'), - ('\u{8e3}', 'ः'), - ('\u{93a}', '\u{93c}'), - ('ा', 'ॏ'), - ('\u{951}', '\u{957}'), - ('\u{962}', '\u{963}'), - ('\u{981}', 'ঃ'), - ('\u{9bc}', '\u{9bc}'), - ('\u{9be}', '\u{9c4}'), - ('ে', 'ৈ'), - ('ো', '\u{9cd}'), - ('\u{9d7}', '\u{9d7}'), - ('\u{9e2}', '\u{9e3}'), - ('\u{9fe}', '\u{9fe}'), - ('\u{a01}', 'ਃ'), - ('\u{a3c}', '\u{a3c}'), - ('ਾ', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('\u{a51}', '\u{a51}'), - ('\u{a70}', '\u{a71}'), - ('\u{a75}', '\u{a75}'), - ('\u{a81}', 'ઃ'), - ('\u{abc}', '\u{abc}'), - ('ા', '\u{ac5}'), - ('\u{ac7}', 'ૉ'), - ('ો', '\u{acd}'), - ('\u{ae2}', '\u{ae3}'), - ('\u{afa}', '\u{aff}'), - ('\u{b01}', 'ଃ'), - ('\u{b3c}', '\u{b3c}'), - ('\u{b3e}', '\u{b44}'), - ('େ', 'ୈ'), - ('ୋ', '\u{b4d}'), - ('\u{b55}', '\u{b57}'), - ('\u{b62}', '\u{b63}'), - ('\u{b82}', '\u{b82}'), - ('\u{bbe}', 'ூ'), - ('ெ', 'ை'), - ('ொ', '\u{bcd}'), - ('\u{bd7}', '\u{bd7}'), - ('\u{c00}', '\u{c04}'), - ('\u{c3c}', '\u{c3c}'), - ('\u{c3e}', 'ౄ'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('\u{c62}', '\u{c63}'), - ('\u{c81}', 'ಃ'), - ('\u{cbc}', '\u{cbc}'), - ('ಾ', 'ೄ'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccd}'), - ('\u{cd5}', '\u{cd6}'), - ('\u{ce2}', '\u{ce3}'), - ('ೳ', 'ೳ'), - ('\u{d00}', 'ഃ'), - ('\u{d3b}', '\u{d3c}'), - ('\u{d3e}', '\u{d44}'), - ('െ', 'ൈ'), - ('ൊ', '\u{d4d}'), - ('\u{d57}', '\u{d57}'), - ('\u{d62}', '\u{d63}'), - ('\u{d81}', 'ඃ'), - ('\u{dca}', '\u{dca}'), - ('\u{dcf}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('ෘ', '\u{ddf}'), - ('ෲ', 'ෳ'), - ('\u{e31}', '\u{e31}'), - ('\u{e34}', '\u{e3a}'), - ('\u{e47}', '\u{e4e}'), - ('\u{eb1}', '\u{eb1}'), - ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ece}'), - ('\u{f18}', '\u{f19}'), - ('\u{f35}', '\u{f35}'), - ('\u{f37}', '\u{f37}'), - ('\u{f39}', '\u{f39}'), - ('༾', '༿'), - ('\u{f71}', '\u{f84}'), - ('\u{f86}', '\u{f87}'), - ('\u{f8d}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('\u{fc6}', '\u{fc6}'), - ('ါ', '\u{103e}'), - ('ၖ', '\u{1059}'), - ('\u{105e}', '\u{1060}'), - ('ၢ', 'ၤ'), - ('ၧ', 'ၭ'), - ('\u{1071}', '\u{1074}'), - ('\u{1082}', '\u{108d}'), - ('ႏ', 'ႏ'), - ('ႚ', '\u{109d}'), - ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1715}'), - ('\u{1732}', '\u{1734}'), - ('\u{1752}', '\u{1753}'), - ('\u{1772}', '\u{1773}'), - ('\u{17b4}', '\u{17d3}'), - ('\u{17dd}', '\u{17dd}'), - ('\u{180b}', '\u{180d}'), - ('\u{180f}', '\u{180f}'), - ('\u{1885}', '\u{1886}'), - ('\u{18a9}', '\u{18a9}'), - ('\u{1920}', 'ᤫ'), - ('ᤰ', '\u{193b}'), - ('\u{1a17}', '\u{1a1b}'), - ('ᩕ', '\u{1a5e}'), - ('\u{1a60}', '\u{1a7c}'), - ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ace}'), - ('\u{1b00}', 'ᬄ'), - ('\u{1b34}', '\u{1b44}'), - ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', 'ᮂ'), - ('ᮡ', '\u{1bad}'), - ('\u{1be6}', '\u{1bf3}'), - ('ᰤ', '\u{1c37}'), - ('\u{1cd0}', '\u{1cd2}'), - ('\u{1cd4}', '\u{1ce8}'), - ('\u{1ced}', '\u{1ced}'), - ('\u{1cf4}', '\u{1cf4}'), - ('᳷', '\u{1cf9}'), - ('\u{1dc0}', '\u{1dff}'), - ('\u{20d0}', '\u{20f0}'), - ('\u{2cef}', '\u{2cf1}'), - ('\u{2d7f}', '\u{2d7f}'), - ('\u{2de0}', '\u{2dff}'), - ('\u{302a}', '\u{302f}'), - ('\u{3099}', '\u{309a}'), - ('\u{a66f}', '\u{a672}'), - ('\u{a674}', '\u{a67d}'), - ('\u{a69e}', '\u{a69f}'), - ('\u{a6f0}', '\u{a6f1}'), - ('\u{a802}', '\u{a802}'), - ('\u{a806}', '\u{a806}'), - ('\u{a80b}', '\u{a80b}'), - ('ꠣ', 'ꠧ'), - ('\u{a82c}', '\u{a82c}'), - ('ꢀ', 'ꢁ'), - ('ꢴ', '\u{a8c5}'), - ('\u{a8e0}', '\u{a8f1}'), - ('\u{a8ff}', '\u{a8ff}'), - ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '\u{a953}'), - ('\u{a980}', 'ꦃ'), - ('\u{a9b3}', '\u{a9c0}'), - ('\u{a9e5}', '\u{a9e5}'), - ('\u{aa29}', '\u{aa36}'), - ('\u{aa43}', '\u{aa43}'), - ('\u{aa4c}', 'ꩍ'), - ('ꩻ', 'ꩽ'), - ('\u{aab0}', '\u{aab0}'), - ('\u{aab2}', '\u{aab4}'), - ('\u{aab7}', '\u{aab8}'), - ('\u{aabe}', '\u{aabf}'), - ('\u{aac1}', '\u{aac1}'), - ('ꫫ', 'ꫯ'), - ('ꫵ', '\u{aaf6}'), - ('ꯣ', 'ꯪ'), - ('꯬', '\u{abed}'), - ('\u{fb1e}', '\u{fb1e}'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{fe20}', '\u{fe2f}'), - ('\u{101fd}', '\u{101fd}'), - ('\u{102e0}', '\u{102e0}'), - ('\u{10376}', '\u{1037a}'), - ('\u{10a01}', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '\u{10a0f}'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '\u{10a3f}'), - ('\u{10ae5}', '\u{10ae6}'), - ('\u{10d24}', '\u{10d27}'), - ('\u{10d69}', '\u{10d6d}'), - ('\u{10eab}', '\u{10eac}'), - ('\u{10efc}', '\u{10eff}'), - ('\u{10f46}', '\u{10f50}'), - ('\u{10f82}', '\u{10f85}'), - ('𑀀', '𑀂'), - ('\u{11038}', '\u{11046}'), - ('\u{11070}', '\u{11070}'), - ('\u{11073}', '\u{11074}'), - ('\u{1107f}', '𑂂'), - ('𑂰', '\u{110ba}'), - ('\u{110c2}', '\u{110c2}'), - ('\u{11100}', '\u{11102}'), - ('\u{11127}', '\u{11134}'), - ('𑅅', '𑅆'), - ('\u{11173}', '\u{11173}'), - ('\u{11180}', '𑆂'), - ('𑆳', '\u{111c0}'), - ('\u{111c9}', '\u{111cc}'), - ('𑇎', '\u{111cf}'), - ('𑈬', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), - ('\u{11241}', '\u{11241}'), - ('\u{112df}', '\u{112ea}'), - ('\u{11300}', '𑌃'), - ('\u{1133b}', '\u{1133c}'), - ('\u{1133e}', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '\u{1134d}'), - ('\u{11357}', '\u{11357}'), - ('𑍢', '𑍣'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('\u{113b8}', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '𑏊'), - ('𑏌', '\u{113d0}'), - ('\u{113d2}', '\u{113d2}'), - ('\u{113e1}', '\u{113e2}'), - ('𑐵', '\u{11446}'), - ('\u{1145e}', '\u{1145e}'), - ('\u{114b0}', '\u{114c3}'), - ('\u{115af}', '\u{115b5}'), - ('𑖸', '\u{115c0}'), - ('\u{115dc}', '\u{115dd}'), - ('𑘰', '\u{11640}'), - ('\u{116ab}', '\u{116b7}'), - ('\u{1171d}', '\u{1172b}'), - ('𑠬', '\u{1183a}'), - ('\u{11930}', '𑤵'), - ('𑤷', '𑤸'), - ('\u{1193b}', '\u{1193e}'), - ('𑥀', '𑥀'), - ('𑥂', '\u{11943}'), - ('𑧑', '\u{119d7}'), - ('\u{119da}', '\u{119e0}'), - ('𑧤', '𑧤'), - ('\u{11a01}', '\u{11a0a}'), - ('\u{11a33}', '𑨹'), - ('\u{11a3b}', '\u{11a3e}'), - ('\u{11a47}', '\u{11a47}'), - ('\u{11a51}', '\u{11a5b}'), - ('\u{11a8a}', '\u{11a99}'), - ('𑰯', '\u{11c36}'), - ('\u{11c38}', '\u{11c3f}'), - ('\u{11c92}', '\u{11ca7}'), - ('𑲩', '\u{11cb6}'), - ('\u{11d31}', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d45}'), - ('\u{11d47}', '\u{11d47}'), - ('𑶊', '𑶎'), - ('\u{11d90}', '\u{11d91}'), - ('𑶓', '\u{11d97}'), - ('\u{11ef3}', '𑻶'), - ('\u{11f00}', '\u{11f01}'), - ('𑼃', '𑼃'), - ('𑼴', '\u{11f3a}'), - ('𑼾', '\u{11f42}'), - ('\u{11f5a}', '\u{11f5a}'), - ('\u{13440}', '\u{13440}'), - ('\u{13447}', '\u{13455}'), - ('\u{1611e}', '\u{1612f}'), - ('\u{16af0}', '\u{16af4}'), - ('\u{16b30}', '\u{16b36}'), - ('\u{16f4f}', '\u{16f4f}'), - ('𖽑', '𖾇'), - ('\u{16f8f}', '\u{16f92}'), - ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), - ('\u{1bc9d}', '\u{1bc9e}'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d165}', '\u{1d169}'), - ('\u{1d16d}', '\u{1d172}'), - ('\u{1d17b}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('\u{1d242}', '\u{1d244}'), - ('\u{1da00}', '\u{1da36}'), - ('\u{1da3b}', '\u{1da6c}'), - ('\u{1da75}', '\u{1da75}'), - ('\u{1da84}', '\u{1da84}'), - ('\u{1da9b}', '\u{1da9f}'), - ('\u{1daa1}', '\u{1daaf}'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('\u{1e08f}', '\u{1e08f}'), - ('\u{1e130}', '\u{1e136}'), - ('\u{1e2ae}', '\u{1e2ae}'), - ('\u{1e2ec}', '\u{1e2ef}'), - ('\u{1e4ec}', '\u{1e4ef}'), - ('\u{1e5ee}', '\u{1e5ef}'), - ('\u{1e8d0}', '\u{1e8d6}'), - ('\u{1e944}', '\u{1e94a}'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const MATH_SYMBOL: &'static [(char, char)] = &[ - ('+', '+'), - ('<', '>'), - ('|', '|'), - ('~', '~'), - ('¬', '¬'), - ('±', '±'), - ('×', '×'), - ('÷', '÷'), - ('϶', '϶'), - ('؆', '؈'), - ('⁄', '⁄'), - ('⁒', '⁒'), - ('⁺', '⁼'), - ('₊', '₌'), - ('℘', '℘'), - ('⅀', '⅄'), - ('⅋', '⅋'), - ('←', '↔'), - ('↚', '↛'), - ('↠', '↠'), - ('↣', '↣'), - ('↦', '↦'), - ('↮', '↮'), - ('⇎', '⇏'), - ('⇒', '⇒'), - ('⇔', '⇔'), - ('⇴', '⋿'), - ('⌠', '⌡'), - ('⍼', '⍼'), - ('⎛', '⎳'), - ('⏜', '⏡'), - ('▷', '▷'), - ('◁', '◁'), - ('◸', '◿'), - ('♯', '♯'), - ('⟀', '⟄'), - ('⟇', '⟥'), - ('⟰', '⟿'), - ('⤀', '⦂'), - ('⦙', '⧗'), - ('⧜', '⧻'), - ('⧾', '⫿'), - ('⬰', '⭄'), - ('⭇', '⭌'), - ('﬩', '﬩'), - ('﹢', '﹢'), - ('﹤', '﹦'), - ('+', '+'), - ('<', '>'), - ('|', '|'), - ('~', '~'), - ('¬', '¬'), - ('←', '↓'), - ('𐶎', '𐶏'), - ('𝛁', '𝛁'), - ('𝛛', '𝛛'), - ('𝛻', '𝛻'), - ('𝜕', '𝜕'), - ('𝜵', '𝜵'), - ('𝝏', '𝝏'), - ('𝝯', '𝝯'), - ('𝞉', '𝞉'), - ('𝞩', '𝞩'), - ('𝟃', '𝟃'), - ('𞻰', '𞻱'), -]; - -pub const MODIFIER_LETTER: &'static [(char, char)] = &[ - ('ʰ', 'ˁ'), - ('ˆ', 'ˑ'), - ('ˠ', 'ˤ'), - ('ˬ', 'ˬ'), - ('ˮ', 'ˮ'), - ('ʹ', 'ʹ'), - ('ͺ', 'ͺ'), - ('ՙ', 'ՙ'), - ('ـ', 'ـ'), - ('ۥ', 'ۦ'), - ('ߴ', 'ߵ'), - ('ߺ', 'ߺ'), - ('ࠚ', 'ࠚ'), - ('ࠤ', 'ࠤ'), - ('ࠨ', 'ࠨ'), - ('ࣉ', 'ࣉ'), - ('ॱ', 'ॱ'), - ('ๆ', 'ๆ'), - ('ໆ', 'ໆ'), - ('ჼ', 'ჼ'), - ('ៗ', 'ៗ'), - ('ᡃ', 'ᡃ'), - ('ᪧ', 'ᪧ'), - ('ᱸ', 'ᱽ'), - ('ᴬ', 'ᵪ'), - ('ᵸ', 'ᵸ'), - ('ᶛ', 'ᶿ'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('ⱼ', 'ⱽ'), - ('ⵯ', 'ⵯ'), - ('ⸯ', 'ⸯ'), - ('々', '々'), - ('〱', '〵'), - ('〻', '〻'), - ('ゝ', 'ゞ'), - ('ー', 'ヾ'), - ('ꀕ', 'ꀕ'), - ('ꓸ', 'ꓽ'), - ('ꘌ', 'ꘌ'), - ('ꙿ', 'ꙿ'), - ('ꚜ', 'ꚝ'), - ('ꜗ', 'ꜟ'), - ('ꝰ', 'ꝰ'), - ('ꞈ', 'ꞈ'), - ('ꟲ', 'ꟴ'), - ('ꟸ', 'ꟹ'), - ('ꧏ', 'ꧏ'), - ('ꧦ', 'ꧦ'), - ('ꩰ', 'ꩰ'), - ('ꫝ', 'ꫝ'), - ('ꫳ', 'ꫴ'), - ('ꭜ', 'ꭟ'), - ('ꭩ', 'ꭩ'), - ('ー', 'ー'), - ('\u{ff9e}', '\u{ff9f}'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐵎', '𐵎'), - ('𐵯', '𐵯'), - ('𖭀', '𖭃'), - ('𖵀', '𖵂'), - ('𖵫', '𖵬'), - ('𖾓', '𖾟'), - ('𖿠', '𖿡'), - ('𖿣', '𖿣'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𞀰', '𞁭'), - ('𞄷', '𞄽'), - ('𞓫', '𞓫'), - ('𞥋', '𞥋'), -]; - -pub const MODIFIER_SYMBOL: &'static [(char, char)] = &[ - ('^', '^'), - ('`', '`'), - ('¨', '¨'), - ('¯', '¯'), - ('´', '´'), - ('¸', '¸'), - ('˂', '˅'), - ('˒', '˟'), - ('˥', '˫'), - ('˭', '˭'), - ('˯', '˿'), - ('͵', '͵'), - ('΄', '΅'), - ('࢈', '࢈'), - ('᾽', '᾽'), - ('᾿', '῁'), - ('῍', '῏'), - ('῝', '῟'), - ('῭', '`'), - ('´', '῾'), - ('゛', '゜'), - ('꜀', '꜖'), - ('꜠', '꜡'), - ('꞉', '꞊'), - ('꭛', '꭛'), - ('꭪', '꭫'), - ('﮲', '﯂'), - ('^', '^'), - ('`', '`'), - (' ̄', ' ̄'), - ('🏻', '🏿'), -]; - -pub const NONSPACING_MARK: &'static [(char, char)] = &[ - ('\u{300}', '\u{36f}'), - ('\u{483}', '\u{487}'), - ('\u{591}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c5}'), - ('\u{5c7}', '\u{5c7}'), - ('\u{610}', '\u{61a}'), - ('\u{64b}', '\u{65f}'), - ('\u{670}', '\u{670}'), - ('\u{6d6}', '\u{6dc}'), - ('\u{6df}', '\u{6e4}'), - ('\u{6e7}', '\u{6e8}'), - ('\u{6ea}', '\u{6ed}'), - ('\u{711}', '\u{711}'), - ('\u{730}', '\u{74a}'), - ('\u{7a6}', '\u{7b0}'), - ('\u{7eb}', '\u{7f3}'), - ('\u{7fd}', '\u{7fd}'), - ('\u{816}', '\u{819}'), - ('\u{81b}', '\u{823}'), - ('\u{825}', '\u{827}'), - ('\u{829}', '\u{82d}'), - ('\u{859}', '\u{85b}'), - ('\u{897}', '\u{89f}'), - ('\u{8ca}', '\u{8e1}'), - ('\u{8e3}', '\u{902}'), - ('\u{93a}', '\u{93a}'), - ('\u{93c}', '\u{93c}'), - ('\u{941}', '\u{948}'), - ('\u{94d}', '\u{94d}'), - ('\u{951}', '\u{957}'), - ('\u{962}', '\u{963}'), - ('\u{981}', '\u{981}'), - ('\u{9bc}', '\u{9bc}'), - ('\u{9c1}', '\u{9c4}'), - ('\u{9cd}', '\u{9cd}'), - ('\u{9e2}', '\u{9e3}'), - ('\u{9fe}', '\u{9fe}'), - ('\u{a01}', '\u{a02}'), - ('\u{a3c}', '\u{a3c}'), - ('\u{a41}', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('\u{a51}', '\u{a51}'), - ('\u{a70}', '\u{a71}'), - ('\u{a75}', '\u{a75}'), - ('\u{a81}', '\u{a82}'), - ('\u{abc}', '\u{abc}'), - ('\u{ac1}', '\u{ac5}'), - ('\u{ac7}', '\u{ac8}'), - ('\u{acd}', '\u{acd}'), - ('\u{ae2}', '\u{ae3}'), - ('\u{afa}', '\u{aff}'), - ('\u{b01}', '\u{b01}'), - ('\u{b3c}', '\u{b3c}'), - ('\u{b3f}', '\u{b3f}'), - ('\u{b41}', '\u{b44}'), - ('\u{b4d}', '\u{b4d}'), - ('\u{b55}', '\u{b56}'), - ('\u{b62}', '\u{b63}'), - ('\u{b82}', '\u{b82}'), - ('\u{bc0}', '\u{bc0}'), - ('\u{bcd}', '\u{bcd}'), - ('\u{c00}', '\u{c00}'), - ('\u{c04}', '\u{c04}'), - ('\u{c3c}', '\u{c3c}'), - ('\u{c3e}', '\u{c40}'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('\u{c62}', '\u{c63}'), - ('\u{c81}', '\u{c81}'), - ('\u{cbc}', '\u{cbc}'), - ('\u{cbf}', '\u{cbf}'), - ('\u{cc6}', '\u{cc6}'), - ('\u{ccc}', '\u{ccd}'), - ('\u{ce2}', '\u{ce3}'), - ('\u{d00}', '\u{d01}'), - ('\u{d3b}', '\u{d3c}'), - ('\u{d41}', '\u{d44}'), - ('\u{d4d}', '\u{d4d}'), - ('\u{d62}', '\u{d63}'), - ('\u{d81}', '\u{d81}'), - ('\u{dca}', '\u{dca}'), - ('\u{dd2}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('\u{e31}', '\u{e31}'), - ('\u{e34}', '\u{e3a}'), - ('\u{e47}', '\u{e4e}'), - ('\u{eb1}', '\u{eb1}'), - ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ece}'), - ('\u{f18}', '\u{f19}'), - ('\u{f35}', '\u{f35}'), - ('\u{f37}', '\u{f37}'), - ('\u{f39}', '\u{f39}'), - ('\u{f71}', '\u{f7e}'), - ('\u{f80}', '\u{f84}'), - ('\u{f86}', '\u{f87}'), - ('\u{f8d}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('\u{fc6}', '\u{fc6}'), - ('\u{102d}', '\u{1030}'), - ('\u{1032}', '\u{1037}'), - ('\u{1039}', '\u{103a}'), - ('\u{103d}', '\u{103e}'), - ('\u{1058}', '\u{1059}'), - ('\u{105e}', '\u{1060}'), - ('\u{1071}', '\u{1074}'), - ('\u{1082}', '\u{1082}'), - ('\u{1085}', '\u{1086}'), - ('\u{108d}', '\u{108d}'), - ('\u{109d}', '\u{109d}'), - ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1733}'), - ('\u{1752}', '\u{1753}'), - ('\u{1772}', '\u{1773}'), - ('\u{17b4}', '\u{17b5}'), - ('\u{17b7}', '\u{17bd}'), - ('\u{17c6}', '\u{17c6}'), - ('\u{17c9}', '\u{17d3}'), - ('\u{17dd}', '\u{17dd}'), - ('\u{180b}', '\u{180d}'), - ('\u{180f}', '\u{180f}'), - ('\u{1885}', '\u{1886}'), - ('\u{18a9}', '\u{18a9}'), - ('\u{1920}', '\u{1922}'), - ('\u{1927}', '\u{1928}'), - ('\u{1932}', '\u{1932}'), - ('\u{1939}', '\u{193b}'), - ('\u{1a17}', '\u{1a18}'), - ('\u{1a1b}', '\u{1a1b}'), - ('\u{1a56}', '\u{1a56}'), - ('\u{1a58}', '\u{1a5e}'), - ('\u{1a60}', '\u{1a60}'), - ('\u{1a62}', '\u{1a62}'), - ('\u{1a65}', '\u{1a6c}'), - ('\u{1a73}', '\u{1a7c}'), - ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1abd}'), - ('\u{1abf}', '\u{1ace}'), - ('\u{1b00}', '\u{1b03}'), - ('\u{1b34}', '\u{1b34}'), - ('\u{1b36}', '\u{1b3a}'), - ('\u{1b3c}', '\u{1b3c}'), - ('\u{1b42}', '\u{1b42}'), - ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '\u{1b81}'), - ('\u{1ba2}', '\u{1ba5}'), - ('\u{1ba8}', '\u{1ba9}'), - ('\u{1bab}', '\u{1bad}'), - ('\u{1be6}', '\u{1be6}'), - ('\u{1be8}', '\u{1be9}'), - ('\u{1bed}', '\u{1bed}'), - ('\u{1bef}', '\u{1bf1}'), - ('\u{1c2c}', '\u{1c33}'), - ('\u{1c36}', '\u{1c37}'), - ('\u{1cd0}', '\u{1cd2}'), - ('\u{1cd4}', '\u{1ce0}'), - ('\u{1ce2}', '\u{1ce8}'), - ('\u{1ced}', '\u{1ced}'), - ('\u{1cf4}', '\u{1cf4}'), - ('\u{1cf8}', '\u{1cf9}'), - ('\u{1dc0}', '\u{1dff}'), - ('\u{20d0}', '\u{20dc}'), - ('\u{20e1}', '\u{20e1}'), - ('\u{20e5}', '\u{20f0}'), - ('\u{2cef}', '\u{2cf1}'), - ('\u{2d7f}', '\u{2d7f}'), - ('\u{2de0}', '\u{2dff}'), - ('\u{302a}', '\u{302d}'), - ('\u{3099}', '\u{309a}'), - ('\u{a66f}', '\u{a66f}'), - ('\u{a674}', '\u{a67d}'), - ('\u{a69e}', '\u{a69f}'), - ('\u{a6f0}', '\u{a6f1}'), - ('\u{a802}', '\u{a802}'), - ('\u{a806}', '\u{a806}'), - ('\u{a80b}', '\u{a80b}'), - ('\u{a825}', '\u{a826}'), - ('\u{a82c}', '\u{a82c}'), - ('\u{a8c4}', '\u{a8c5}'), - ('\u{a8e0}', '\u{a8f1}'), - ('\u{a8ff}', '\u{a8ff}'), - ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '\u{a951}'), - ('\u{a980}', '\u{a982}'), - ('\u{a9b3}', '\u{a9b3}'), - ('\u{a9b6}', '\u{a9b9}'), - ('\u{a9bc}', '\u{a9bd}'), - ('\u{a9e5}', '\u{a9e5}'), - ('\u{aa29}', '\u{aa2e}'), - ('\u{aa31}', '\u{aa32}'), - ('\u{aa35}', '\u{aa36}'), - ('\u{aa43}', '\u{aa43}'), - ('\u{aa4c}', '\u{aa4c}'), - ('\u{aa7c}', '\u{aa7c}'), - ('\u{aab0}', '\u{aab0}'), - ('\u{aab2}', '\u{aab4}'), - ('\u{aab7}', '\u{aab8}'), - ('\u{aabe}', '\u{aabf}'), - ('\u{aac1}', '\u{aac1}'), - ('\u{aaec}', '\u{aaed}'), - ('\u{aaf6}', '\u{aaf6}'), - ('\u{abe5}', '\u{abe5}'), - ('\u{abe8}', '\u{abe8}'), - ('\u{abed}', '\u{abed}'), - ('\u{fb1e}', '\u{fb1e}'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{fe20}', '\u{fe2f}'), - ('\u{101fd}', '\u{101fd}'), - ('\u{102e0}', '\u{102e0}'), - ('\u{10376}', '\u{1037a}'), - ('\u{10a01}', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '\u{10a0f}'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '\u{10a3f}'), - ('\u{10ae5}', '\u{10ae6}'), - ('\u{10d24}', '\u{10d27}'), - ('\u{10d69}', '\u{10d6d}'), - ('\u{10eab}', '\u{10eac}'), - ('\u{10efc}', '\u{10eff}'), - ('\u{10f46}', '\u{10f50}'), - ('\u{10f82}', '\u{10f85}'), - ('\u{11001}', '\u{11001}'), - ('\u{11038}', '\u{11046}'), - ('\u{11070}', '\u{11070}'), - ('\u{11073}', '\u{11074}'), - ('\u{1107f}', '\u{11081}'), - ('\u{110b3}', '\u{110b6}'), - ('\u{110b9}', '\u{110ba}'), - ('\u{110c2}', '\u{110c2}'), - ('\u{11100}', '\u{11102}'), - ('\u{11127}', '\u{1112b}'), - ('\u{1112d}', '\u{11134}'), - ('\u{11173}', '\u{11173}'), - ('\u{11180}', '\u{11181}'), - ('\u{111b6}', '\u{111be}'), - ('\u{111c9}', '\u{111cc}'), - ('\u{111cf}', '\u{111cf}'), - ('\u{1122f}', '\u{11231}'), - ('\u{11234}', '\u{11234}'), - ('\u{11236}', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), - ('\u{11241}', '\u{11241}'), - ('\u{112df}', '\u{112df}'), - ('\u{112e3}', '\u{112ea}'), - ('\u{11300}', '\u{11301}'), - ('\u{1133b}', '\u{1133c}'), - ('\u{11340}', '\u{11340}'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('\u{113bb}', '\u{113c0}'), - ('\u{113ce}', '\u{113ce}'), - ('\u{113d0}', '\u{113d0}'), - ('\u{113d2}', '\u{113d2}'), - ('\u{113e1}', '\u{113e2}'), - ('\u{11438}', '\u{1143f}'), - ('\u{11442}', '\u{11444}'), - ('\u{11446}', '\u{11446}'), - ('\u{1145e}', '\u{1145e}'), - ('\u{114b3}', '\u{114b8}'), - ('\u{114ba}', '\u{114ba}'), - ('\u{114bf}', '\u{114c0}'), - ('\u{114c2}', '\u{114c3}'), - ('\u{115b2}', '\u{115b5}'), - ('\u{115bc}', '\u{115bd}'), - ('\u{115bf}', '\u{115c0}'), - ('\u{115dc}', '\u{115dd}'), - ('\u{11633}', '\u{1163a}'), - ('\u{1163d}', '\u{1163d}'), - ('\u{1163f}', '\u{11640}'), - ('\u{116ab}', '\u{116ab}'), - ('\u{116ad}', '\u{116ad}'), - ('\u{116b0}', '\u{116b5}'), - ('\u{116b7}', '\u{116b7}'), - ('\u{1171d}', '\u{1171d}'), - ('\u{1171f}', '\u{1171f}'), - ('\u{11722}', '\u{11725}'), - ('\u{11727}', '\u{1172b}'), - ('\u{1182f}', '\u{11837}'), - ('\u{11839}', '\u{1183a}'), - ('\u{1193b}', '\u{1193c}'), - ('\u{1193e}', '\u{1193e}'), - ('\u{11943}', '\u{11943}'), - ('\u{119d4}', '\u{119d7}'), - ('\u{119da}', '\u{119db}'), - ('\u{119e0}', '\u{119e0}'), - ('\u{11a01}', '\u{11a0a}'), - ('\u{11a33}', '\u{11a38}'), - ('\u{11a3b}', '\u{11a3e}'), - ('\u{11a47}', '\u{11a47}'), - ('\u{11a51}', '\u{11a56}'), - ('\u{11a59}', '\u{11a5b}'), - ('\u{11a8a}', '\u{11a96}'), - ('\u{11a98}', '\u{11a99}'), - ('\u{11c30}', '\u{11c36}'), - ('\u{11c38}', '\u{11c3d}'), - ('\u{11c3f}', '\u{11c3f}'), - ('\u{11c92}', '\u{11ca7}'), - ('\u{11caa}', '\u{11cb0}'), - ('\u{11cb2}', '\u{11cb3}'), - ('\u{11cb5}', '\u{11cb6}'), - ('\u{11d31}', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d45}'), - ('\u{11d47}', '\u{11d47}'), - ('\u{11d90}', '\u{11d91}'), - ('\u{11d95}', '\u{11d95}'), - ('\u{11d97}', '\u{11d97}'), - ('\u{11ef3}', '\u{11ef4}'), - ('\u{11f00}', '\u{11f01}'), - ('\u{11f36}', '\u{11f3a}'), - ('\u{11f40}', '\u{11f40}'), - ('\u{11f42}', '\u{11f42}'), - ('\u{11f5a}', '\u{11f5a}'), - ('\u{13440}', '\u{13440}'), - ('\u{13447}', '\u{13455}'), - ('\u{1611e}', '\u{16129}'), - ('\u{1612d}', '\u{1612f}'), - ('\u{16af0}', '\u{16af4}'), - ('\u{16b30}', '\u{16b36}'), - ('\u{16f4f}', '\u{16f4f}'), - ('\u{16f8f}', '\u{16f92}'), - ('\u{16fe4}', '\u{16fe4}'), - ('\u{1bc9d}', '\u{1bc9e}'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d167}', '\u{1d169}'), - ('\u{1d17b}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('\u{1d242}', '\u{1d244}'), - ('\u{1da00}', '\u{1da36}'), - ('\u{1da3b}', '\u{1da6c}'), - ('\u{1da75}', '\u{1da75}'), - ('\u{1da84}', '\u{1da84}'), - ('\u{1da9b}', '\u{1da9f}'), - ('\u{1daa1}', '\u{1daaf}'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('\u{1e08f}', '\u{1e08f}'), - ('\u{1e130}', '\u{1e136}'), - ('\u{1e2ae}', '\u{1e2ae}'), - ('\u{1e2ec}', '\u{1e2ef}'), - ('\u{1e4ec}', '\u{1e4ef}'), - ('\u{1e5ee}', '\u{1e5ef}'), - ('\u{1e8d0}', '\u{1e8d6}'), - ('\u{1e944}', '\u{1e94a}'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const NUMBER: &'static [(char, char)] = &[ - ('0', '9'), - ('²', '³'), - ('¹', '¹'), - ('¼', '¾'), - ('٠', '٩'), - ('۰', '۹'), - ('߀', '߉'), - ('०', '९'), - ('০', '৯'), - ('৴', '৹'), - ('੦', '੯'), - ('૦', '૯'), - ('୦', '୯'), - ('୲', '୷'), - ('௦', '௲'), - ('౦', '౯'), - ('౸', '౾'), - ('೦', '೯'), - ('൘', '൞'), - ('൦', '൸'), - ('෦', '෯'), - ('๐', '๙'), - ('໐', '໙'), - ('༠', '༳'), - ('၀', '၉'), - ('႐', '႙'), - ('፩', '፼'), - ('ᛮ', 'ᛰ'), - ('០', '៩'), - ('៰', '៹'), - ('᠐', '᠙'), - ('᥆', '᥏'), - ('᧐', '᧚'), - ('᪀', '᪉'), - ('᪐', '᪙'), - ('᭐', '᭙'), - ('᮰', '᮹'), - ('᱀', '᱉'), - ('᱐', '᱙'), - ('⁰', '⁰'), - ('⁴', '⁹'), - ('₀', '₉'), - ('⅐', 'ↂ'), - ('ↅ', '↉'), - ('①', '⒛'), - ('⓪', '⓿'), - ('❶', '➓'), - ('⳽', '⳽'), - ('〇', '〇'), - ('〡', '〩'), - ('〸', '〺'), - ('㆒', '㆕'), - ('㈠', '㈩'), - ('㉈', '㉏'), - ('㉑', '㉟'), - ('㊀', '㊉'), - ('㊱', '㊿'), - ('꘠', '꘩'), - ('ꛦ', 'ꛯ'), - ('꠰', '꠵'), - ('꣐', '꣙'), - ('꤀', '꤉'), - ('꧐', '꧙'), - ('꧰', '꧹'), - ('꩐', '꩙'), - ('꯰', '꯹'), - ('0', '9'), - ('𐄇', '𐄳'), - ('𐅀', '𐅸'), - ('𐆊', '𐆋'), - ('𐋡', '𐋻'), - ('𐌠', '𐌣'), - ('𐍁', '𐍁'), - ('𐍊', '𐍊'), - ('𐏑', '𐏕'), - ('𐒠', '𐒩'), - ('𐡘', '𐡟'), - ('𐡹', '𐡿'), - ('𐢧', '𐢯'), - ('𐣻', '𐣿'), - ('𐤖', '𐤛'), - ('𐦼', '𐦽'), - ('𐧀', '𐧏'), - ('𐧒', '𐧿'), - ('𐩀', '𐩈'), - ('𐩽', '𐩾'), - ('𐪝', '𐪟'), - ('𐫫', '𐫯'), - ('𐭘', '𐭟'), - ('𐭸', '𐭿'), - ('𐮩', '𐮯'), - ('𐳺', '𐳿'), - ('𐴰', '𐴹'), - ('𐵀', '𐵉'), - ('𐹠', '𐹾'), - ('𐼝', '𐼦'), - ('𐽑', '𐽔'), - ('𐿅', '𐿋'), - ('𑁒', '𑁯'), - ('𑃰', '𑃹'), - ('𑄶', '𑄿'), - ('𑇐', '𑇙'), - ('𑇡', '𑇴'), - ('𑋰', '𑋹'), - ('𑑐', '𑑙'), - ('𑓐', '𑓙'), - ('𑙐', '𑙙'), - ('𑛀', '𑛉'), - ('𑛐', '𑛣'), - ('𑜰', '𑜻'), - ('𑣠', '𑣲'), - ('𑥐', '𑥙'), - ('𑯰', '𑯹'), - ('𑱐', '𑱬'), - ('𑵐', '𑵙'), - ('𑶠', '𑶩'), - ('𑽐', '𑽙'), - ('𑿀', '𑿔'), - ('𒐀', '𒑮'), - ('𖄰', '𖄹'), - ('𖩠', '𖩩'), - ('𖫀', '𖫉'), - ('𖭐', '𖭙'), - ('𖭛', '𖭡'), - ('𖵰', '𖵹'), - ('𖺀', '𖺖'), - ('𜳰', '𜳹'), - ('𝋀', '𝋓'), - ('𝋠', '𝋳'), - ('𝍠', '𝍸'), - ('𝟎', '𝟿'), - ('𞅀', '𞅉'), - ('𞋰', '𞋹'), - ('𞓰', '𞓹'), - ('𞗱', '𞗺'), - ('𞣇', '𞣏'), - ('𞥐', '𞥙'), - ('𞱱', '𞲫'), - ('𞲭', '𞲯'), - ('𞲱', '𞲴'), - ('𞴁', '𞴭'), - ('𞴯', '𞴽'), - ('🄀', '🄌'), - ('🯰', '🯹'), -]; - -pub const OPEN_PUNCTUATION: &'static [(char, char)] = &[ - ('(', '('), - ('[', '['), - ('{', '{'), - ('༺', '༺'), - ('༼', '༼'), - ('᚛', '᚛'), - ('‚', '‚'), - ('„', '„'), - ('⁅', '⁅'), - ('⁽', '⁽'), - ('₍', '₍'), - ('⌈', '⌈'), - ('⌊', '⌊'), - ('〈', '〈'), - ('❨', '❨'), - ('❪', '❪'), - ('❬', '❬'), - ('❮', '❮'), - ('❰', '❰'), - ('❲', '❲'), - ('❴', '❴'), - ('⟅', '⟅'), - ('⟦', '⟦'), - ('⟨', '⟨'), - ('⟪', '⟪'), - ('⟬', '⟬'), - ('⟮', '⟮'), - ('⦃', '⦃'), - ('⦅', '⦅'), - ('⦇', '⦇'), - ('⦉', '⦉'), - ('⦋', '⦋'), - ('⦍', '⦍'), - ('⦏', '⦏'), - ('⦑', '⦑'), - ('⦓', '⦓'), - ('⦕', '⦕'), - ('⦗', '⦗'), - ('⧘', '⧘'), - ('⧚', '⧚'), - ('⧼', '⧼'), - ('⸢', '⸢'), - ('⸤', '⸤'), - ('⸦', '⸦'), - ('⸨', '⸨'), - ('⹂', '⹂'), - ('⹕', '⹕'), - ('⹗', '⹗'), - ('⹙', '⹙'), - ('⹛', '⹛'), - ('〈', '〈'), - ('《', '《'), - ('「', '「'), - ('『', '『'), - ('【', '【'), - ('〔', '〔'), - ('〖', '〖'), - ('〘', '〘'), - ('〚', '〚'), - ('〝', '〝'), - ('﴿', '﴿'), - ('︗', '︗'), - ('︵', '︵'), - ('︷', '︷'), - ('︹', '︹'), - ('︻', '︻'), - ('︽', '︽'), - ('︿', '︿'), - ('﹁', '﹁'), - ('﹃', '﹃'), - ('﹇', '﹇'), - ('﹙', '﹙'), - ('﹛', '﹛'), - ('﹝', '﹝'), - ('(', '('), - ('[', '['), - ('{', '{'), - ('⦅', '⦅'), - ('「', '「'), -]; - -pub const OTHER: &'static [(char, char)] = &[ - ('\0', '\u{1f}'), - ('\u{7f}', '\u{9f}'), - ('\u{ad}', '\u{ad}'), - ('\u{378}', '\u{379}'), - ('\u{380}', '\u{383}'), - ('\u{38b}', '\u{38b}'), - ('\u{38d}', '\u{38d}'), - ('\u{3a2}', '\u{3a2}'), - ('\u{530}', '\u{530}'), - ('\u{557}', '\u{558}'), - ('\u{58b}', '\u{58c}'), - ('\u{590}', '\u{590}'), - ('\u{5c8}', '\u{5cf}'), - ('\u{5eb}', '\u{5ee}'), - ('\u{5f5}', '\u{605}'), - ('\u{61c}', '\u{61c}'), - ('\u{6dd}', '\u{6dd}'), - ('\u{70e}', '\u{70f}'), - ('\u{74b}', '\u{74c}'), - ('\u{7b2}', '\u{7bf}'), - ('\u{7fb}', '\u{7fc}'), - ('\u{82e}', '\u{82f}'), - ('\u{83f}', '\u{83f}'), - ('\u{85c}', '\u{85d}'), - ('\u{85f}', '\u{85f}'), - ('\u{86b}', '\u{86f}'), - ('\u{88f}', '\u{896}'), - ('\u{8e2}', '\u{8e2}'), - ('\u{984}', '\u{984}'), - ('\u{98d}', '\u{98e}'), - ('\u{991}', '\u{992}'), - ('\u{9a9}', '\u{9a9}'), - ('\u{9b1}', '\u{9b1}'), - ('\u{9b3}', '\u{9b5}'), - ('\u{9ba}', '\u{9bb}'), - ('\u{9c5}', '\u{9c6}'), - ('\u{9c9}', '\u{9ca}'), - ('\u{9cf}', '\u{9d6}'), - ('\u{9d8}', '\u{9db}'), - ('\u{9de}', '\u{9de}'), - ('\u{9e4}', '\u{9e5}'), - ('\u{9ff}', '\u{a00}'), - ('\u{a04}', '\u{a04}'), - ('\u{a0b}', '\u{a0e}'), - ('\u{a11}', '\u{a12}'), - ('\u{a29}', '\u{a29}'), - ('\u{a31}', '\u{a31}'), - ('\u{a34}', '\u{a34}'), - ('\u{a37}', '\u{a37}'), - ('\u{a3a}', '\u{a3b}'), - ('\u{a3d}', '\u{a3d}'), - ('\u{a43}', '\u{a46}'), - ('\u{a49}', '\u{a4a}'), - ('\u{a4e}', '\u{a50}'), - ('\u{a52}', '\u{a58}'), - ('\u{a5d}', '\u{a5d}'), - ('\u{a5f}', '\u{a65}'), - ('\u{a77}', '\u{a80}'), - ('\u{a84}', '\u{a84}'), - ('\u{a8e}', '\u{a8e}'), - ('\u{a92}', '\u{a92}'), - ('\u{aa9}', '\u{aa9}'), - ('\u{ab1}', '\u{ab1}'), - ('\u{ab4}', '\u{ab4}'), - ('\u{aba}', '\u{abb}'), - ('\u{ac6}', '\u{ac6}'), - ('\u{aca}', '\u{aca}'), - ('\u{ace}', '\u{acf}'), - ('\u{ad1}', '\u{adf}'), - ('\u{ae4}', '\u{ae5}'), - ('\u{af2}', '\u{af8}'), - ('\u{b00}', '\u{b00}'), - ('\u{b04}', '\u{b04}'), - ('\u{b0d}', '\u{b0e}'), - ('\u{b11}', '\u{b12}'), - ('\u{b29}', '\u{b29}'), - ('\u{b31}', '\u{b31}'), - ('\u{b34}', '\u{b34}'), - ('\u{b3a}', '\u{b3b}'), - ('\u{b45}', '\u{b46}'), - ('\u{b49}', '\u{b4a}'), - ('\u{b4e}', '\u{b54}'), - ('\u{b58}', '\u{b5b}'), - ('\u{b5e}', '\u{b5e}'), - ('\u{b64}', '\u{b65}'), - ('\u{b78}', '\u{b81}'), - ('\u{b84}', '\u{b84}'), - ('\u{b8b}', '\u{b8d}'), - ('\u{b91}', '\u{b91}'), - ('\u{b96}', '\u{b98}'), - ('\u{b9b}', '\u{b9b}'), - ('\u{b9d}', '\u{b9d}'), - ('\u{ba0}', '\u{ba2}'), - ('\u{ba5}', '\u{ba7}'), - ('\u{bab}', '\u{bad}'), - ('\u{bba}', '\u{bbd}'), - ('\u{bc3}', '\u{bc5}'), - ('\u{bc9}', '\u{bc9}'), - ('\u{bce}', '\u{bcf}'), - ('\u{bd1}', '\u{bd6}'), - ('\u{bd8}', '\u{be5}'), - ('\u{bfb}', '\u{bff}'), - ('\u{c0d}', '\u{c0d}'), - ('\u{c11}', '\u{c11}'), - ('\u{c29}', '\u{c29}'), - ('\u{c3a}', '\u{c3b}'), - ('\u{c45}', '\u{c45}'), - ('\u{c49}', '\u{c49}'), - ('\u{c4e}', '\u{c54}'), - ('\u{c57}', '\u{c57}'), - ('\u{c5b}', '\u{c5c}'), - ('\u{c5e}', '\u{c5f}'), - ('\u{c64}', '\u{c65}'), - ('\u{c70}', '\u{c76}'), - ('\u{c8d}', '\u{c8d}'), - ('\u{c91}', '\u{c91}'), - ('\u{ca9}', '\u{ca9}'), - ('\u{cb4}', '\u{cb4}'), - ('\u{cba}', '\u{cbb}'), - ('\u{cc5}', '\u{cc5}'), - ('\u{cc9}', '\u{cc9}'), - ('\u{cce}', '\u{cd4}'), - ('\u{cd7}', '\u{cdc}'), - ('\u{cdf}', '\u{cdf}'), - ('\u{ce4}', '\u{ce5}'), - ('\u{cf0}', '\u{cf0}'), - ('\u{cf4}', '\u{cff}'), - ('\u{d0d}', '\u{d0d}'), - ('\u{d11}', '\u{d11}'), - ('\u{d45}', '\u{d45}'), - ('\u{d49}', '\u{d49}'), - ('\u{d50}', '\u{d53}'), - ('\u{d64}', '\u{d65}'), - ('\u{d80}', '\u{d80}'), - ('\u{d84}', '\u{d84}'), - ('\u{d97}', '\u{d99}'), - ('\u{db2}', '\u{db2}'), - ('\u{dbc}', '\u{dbc}'), - ('\u{dbe}', '\u{dbf}'), - ('\u{dc7}', '\u{dc9}'), - ('\u{dcb}', '\u{dce}'), - ('\u{dd5}', '\u{dd5}'), - ('\u{dd7}', '\u{dd7}'), - ('\u{de0}', '\u{de5}'), - ('\u{df0}', '\u{df1}'), - ('\u{df5}', '\u{e00}'), - ('\u{e3b}', '\u{e3e}'), - ('\u{e5c}', '\u{e80}'), - ('\u{e83}', '\u{e83}'), - ('\u{e85}', '\u{e85}'), - ('\u{e8b}', '\u{e8b}'), - ('\u{ea4}', '\u{ea4}'), - ('\u{ea6}', '\u{ea6}'), - ('\u{ebe}', '\u{ebf}'), - ('\u{ec5}', '\u{ec5}'), - ('\u{ec7}', '\u{ec7}'), - ('\u{ecf}', '\u{ecf}'), - ('\u{eda}', '\u{edb}'), - ('\u{ee0}', '\u{eff}'), - ('\u{f48}', '\u{f48}'), - ('\u{f6d}', '\u{f70}'), - ('\u{f98}', '\u{f98}'), - ('\u{fbd}', '\u{fbd}'), - ('\u{fcd}', '\u{fcd}'), - ('\u{fdb}', '\u{fff}'), - ('\u{10c6}', '\u{10c6}'), - ('\u{10c8}', '\u{10cc}'), - ('\u{10ce}', '\u{10cf}'), - ('\u{1249}', '\u{1249}'), - ('\u{124e}', '\u{124f}'), - ('\u{1257}', '\u{1257}'), - ('\u{1259}', '\u{1259}'), - ('\u{125e}', '\u{125f}'), - ('\u{1289}', '\u{1289}'), - ('\u{128e}', '\u{128f}'), - ('\u{12b1}', '\u{12b1}'), - ('\u{12b6}', '\u{12b7}'), - ('\u{12bf}', '\u{12bf}'), - ('\u{12c1}', '\u{12c1}'), - ('\u{12c6}', '\u{12c7}'), - ('\u{12d7}', '\u{12d7}'), - ('\u{1311}', '\u{1311}'), - ('\u{1316}', '\u{1317}'), - ('\u{135b}', '\u{135c}'), - ('\u{137d}', '\u{137f}'), - ('\u{139a}', '\u{139f}'), - ('\u{13f6}', '\u{13f7}'), - ('\u{13fe}', '\u{13ff}'), - ('\u{169d}', '\u{169f}'), - ('\u{16f9}', '\u{16ff}'), - ('\u{1716}', '\u{171e}'), - ('\u{1737}', '\u{173f}'), - ('\u{1754}', '\u{175f}'), - ('\u{176d}', '\u{176d}'), - ('\u{1771}', '\u{1771}'), - ('\u{1774}', '\u{177f}'), - ('\u{17de}', '\u{17df}'), - ('\u{17ea}', '\u{17ef}'), - ('\u{17fa}', '\u{17ff}'), - ('\u{180e}', '\u{180e}'), - ('\u{181a}', '\u{181f}'), - ('\u{1879}', '\u{187f}'), - ('\u{18ab}', '\u{18af}'), - ('\u{18f6}', '\u{18ff}'), - ('\u{191f}', '\u{191f}'), - ('\u{192c}', '\u{192f}'), - ('\u{193c}', '\u{193f}'), - ('\u{1941}', '\u{1943}'), - ('\u{196e}', '\u{196f}'), - ('\u{1975}', '\u{197f}'), - ('\u{19ac}', '\u{19af}'), - ('\u{19ca}', '\u{19cf}'), - ('\u{19db}', '\u{19dd}'), - ('\u{1a1c}', '\u{1a1d}'), - ('\u{1a5f}', '\u{1a5f}'), - ('\u{1a7d}', '\u{1a7e}'), - ('\u{1a8a}', '\u{1a8f}'), - ('\u{1a9a}', '\u{1a9f}'), - ('\u{1aae}', '\u{1aaf}'), - ('\u{1acf}', '\u{1aff}'), - ('\u{1b4d}', '\u{1b4d}'), - ('\u{1bf4}', '\u{1bfb}'), - ('\u{1c38}', '\u{1c3a}'), - ('\u{1c4a}', '\u{1c4c}'), - ('\u{1c8b}', '\u{1c8f}'), - ('\u{1cbb}', '\u{1cbc}'), - ('\u{1cc8}', '\u{1ccf}'), - ('\u{1cfb}', '\u{1cff}'), - ('\u{1f16}', '\u{1f17}'), - ('\u{1f1e}', '\u{1f1f}'), - ('\u{1f46}', '\u{1f47}'), - ('\u{1f4e}', '\u{1f4f}'), - ('\u{1f58}', '\u{1f58}'), - ('\u{1f5a}', '\u{1f5a}'), - ('\u{1f5c}', '\u{1f5c}'), - ('\u{1f5e}', '\u{1f5e}'), - ('\u{1f7e}', '\u{1f7f}'), - ('\u{1fb5}', '\u{1fb5}'), - ('\u{1fc5}', '\u{1fc5}'), - ('\u{1fd4}', '\u{1fd5}'), - ('\u{1fdc}', '\u{1fdc}'), - ('\u{1ff0}', '\u{1ff1}'), - ('\u{1ff5}', '\u{1ff5}'), - ('\u{1fff}', '\u{1fff}'), - ('\u{200b}', '\u{200f}'), - ('\u{202a}', '\u{202e}'), - ('\u{2060}', '\u{206f}'), - ('\u{2072}', '\u{2073}'), - ('\u{208f}', '\u{208f}'), - ('\u{209d}', '\u{209f}'), - ('\u{20c1}', '\u{20cf}'), - ('\u{20f1}', '\u{20ff}'), - ('\u{218c}', '\u{218f}'), - ('\u{242a}', '\u{243f}'), - ('\u{244b}', '\u{245f}'), - ('\u{2b74}', '\u{2b75}'), - ('\u{2b96}', '\u{2b96}'), - ('\u{2cf4}', '\u{2cf8}'), - ('\u{2d26}', '\u{2d26}'), - ('\u{2d28}', '\u{2d2c}'), - ('\u{2d2e}', '\u{2d2f}'), - ('\u{2d68}', '\u{2d6e}'), - ('\u{2d71}', '\u{2d7e}'), - ('\u{2d97}', '\u{2d9f}'), - ('\u{2da7}', '\u{2da7}'), - ('\u{2daf}', '\u{2daf}'), - ('\u{2db7}', '\u{2db7}'), - ('\u{2dbf}', '\u{2dbf}'), - ('\u{2dc7}', '\u{2dc7}'), - ('\u{2dcf}', '\u{2dcf}'), - ('\u{2dd7}', '\u{2dd7}'), - ('\u{2ddf}', '\u{2ddf}'), - ('\u{2e5e}', '\u{2e7f}'), - ('\u{2e9a}', '\u{2e9a}'), - ('\u{2ef4}', '\u{2eff}'), - ('\u{2fd6}', '\u{2fef}'), - ('\u{3040}', '\u{3040}'), - ('\u{3097}', '\u{3098}'), - ('\u{3100}', '\u{3104}'), - ('\u{3130}', '\u{3130}'), - ('\u{318f}', '\u{318f}'), - ('\u{31e6}', '\u{31ee}'), - ('\u{321f}', '\u{321f}'), - ('\u{a48d}', '\u{a48f}'), - ('\u{a4c7}', '\u{a4cf}'), - ('\u{a62c}', '\u{a63f}'), - ('\u{a6f8}', '\u{a6ff}'), - ('\u{a7ce}', '\u{a7cf}'), - ('\u{a7d2}', '\u{a7d2}'), - ('\u{a7d4}', '\u{a7d4}'), - ('\u{a7dd}', '\u{a7f1}'), - ('\u{a82d}', '\u{a82f}'), - ('\u{a83a}', '\u{a83f}'), - ('\u{a878}', '\u{a87f}'), - ('\u{a8c6}', '\u{a8cd}'), - ('\u{a8da}', '\u{a8df}'), - ('\u{a954}', '\u{a95e}'), - ('\u{a97d}', '\u{a97f}'), - ('\u{a9ce}', '\u{a9ce}'), - ('\u{a9da}', '\u{a9dd}'), - ('\u{a9ff}', '\u{a9ff}'), - ('\u{aa37}', '\u{aa3f}'), - ('\u{aa4e}', '\u{aa4f}'), - ('\u{aa5a}', '\u{aa5b}'), - ('\u{aac3}', '\u{aada}'), - ('\u{aaf7}', '\u{ab00}'), - ('\u{ab07}', '\u{ab08}'), - ('\u{ab0f}', '\u{ab10}'), - ('\u{ab17}', '\u{ab1f}'), - ('\u{ab27}', '\u{ab27}'), - ('\u{ab2f}', '\u{ab2f}'), - ('\u{ab6c}', '\u{ab6f}'), - ('\u{abee}', '\u{abef}'), - ('\u{abfa}', '\u{abff}'), - ('\u{d7a4}', '\u{d7af}'), - ('\u{d7c7}', '\u{d7ca}'), - ('\u{d7fc}', '\u{f8ff}'), - ('\u{fa6e}', '\u{fa6f}'), - ('\u{fada}', '\u{faff}'), - ('\u{fb07}', '\u{fb12}'), - ('\u{fb18}', '\u{fb1c}'), - ('\u{fb37}', '\u{fb37}'), - ('\u{fb3d}', '\u{fb3d}'), - ('\u{fb3f}', '\u{fb3f}'), - ('\u{fb42}', '\u{fb42}'), - ('\u{fb45}', '\u{fb45}'), - ('\u{fbc3}', '\u{fbd2}'), - ('\u{fd90}', '\u{fd91}'), - ('\u{fdc8}', '\u{fdce}'), - ('\u{fdd0}', '\u{fdef}'), - ('\u{fe1a}', '\u{fe1f}'), - ('\u{fe53}', '\u{fe53}'), - ('\u{fe67}', '\u{fe67}'), - ('\u{fe6c}', '\u{fe6f}'), - ('\u{fe75}', '\u{fe75}'), - ('\u{fefd}', '\u{ff00}'), - ('\u{ffbf}', '\u{ffc1}'), - ('\u{ffc8}', '\u{ffc9}'), - ('\u{ffd0}', '\u{ffd1}'), - ('\u{ffd8}', '\u{ffd9}'), - ('\u{ffdd}', '\u{ffdf}'), - ('\u{ffe7}', '\u{ffe7}'), - ('\u{ffef}', '\u{fffb}'), - ('\u{fffe}', '\u{ffff}'), - ('\u{1000c}', '\u{1000c}'), - ('\u{10027}', '\u{10027}'), - ('\u{1003b}', '\u{1003b}'), - ('\u{1003e}', '\u{1003e}'), - ('\u{1004e}', '\u{1004f}'), - ('\u{1005e}', '\u{1007f}'), - ('\u{100fb}', '\u{100ff}'), - ('\u{10103}', '\u{10106}'), - ('\u{10134}', '\u{10136}'), - ('\u{1018f}', '\u{1018f}'), - ('\u{1019d}', '\u{1019f}'), - ('\u{101a1}', '\u{101cf}'), - ('\u{101fe}', '\u{1027f}'), - ('\u{1029d}', '\u{1029f}'), - ('\u{102d1}', '\u{102df}'), - ('\u{102fc}', '\u{102ff}'), - ('\u{10324}', '\u{1032c}'), - ('\u{1034b}', '\u{1034f}'), - ('\u{1037b}', '\u{1037f}'), - ('\u{1039e}', '\u{1039e}'), - ('\u{103c4}', '\u{103c7}'), - ('\u{103d6}', '\u{103ff}'), - ('\u{1049e}', '\u{1049f}'), - ('\u{104aa}', '\u{104af}'), - ('\u{104d4}', '\u{104d7}'), - ('\u{104fc}', '\u{104ff}'), - ('\u{10528}', '\u{1052f}'), - ('\u{10564}', '\u{1056e}'), - ('\u{1057b}', '\u{1057b}'), - ('\u{1058b}', '\u{1058b}'), - ('\u{10593}', '\u{10593}'), - ('\u{10596}', '\u{10596}'), - ('\u{105a2}', '\u{105a2}'), - ('\u{105b2}', '\u{105b2}'), - ('\u{105ba}', '\u{105ba}'), - ('\u{105bd}', '\u{105bf}'), - ('\u{105f4}', '\u{105ff}'), - ('\u{10737}', '\u{1073f}'), - ('\u{10756}', '\u{1075f}'), - ('\u{10768}', '\u{1077f}'), - ('\u{10786}', '\u{10786}'), - ('\u{107b1}', '\u{107b1}'), - ('\u{107bb}', '\u{107ff}'), - ('\u{10806}', '\u{10807}'), - ('\u{10809}', '\u{10809}'), - ('\u{10836}', '\u{10836}'), - ('\u{10839}', '\u{1083b}'), - ('\u{1083d}', '\u{1083e}'), - ('\u{10856}', '\u{10856}'), - ('\u{1089f}', '\u{108a6}'), - ('\u{108b0}', '\u{108df}'), - ('\u{108f3}', '\u{108f3}'), - ('\u{108f6}', '\u{108fa}'), - ('\u{1091c}', '\u{1091e}'), - ('\u{1093a}', '\u{1093e}'), - ('\u{10940}', '\u{1097f}'), - ('\u{109b8}', '\u{109bb}'), - ('\u{109d0}', '\u{109d1}'), - ('\u{10a04}', '\u{10a04}'), - ('\u{10a07}', '\u{10a0b}'), - ('\u{10a14}', '\u{10a14}'), - ('\u{10a18}', '\u{10a18}'), - ('\u{10a36}', '\u{10a37}'), - ('\u{10a3b}', '\u{10a3e}'), - ('\u{10a49}', '\u{10a4f}'), - ('\u{10a59}', '\u{10a5f}'), - ('\u{10aa0}', '\u{10abf}'), - ('\u{10ae7}', '\u{10aea}'), - ('\u{10af7}', '\u{10aff}'), - ('\u{10b36}', '\u{10b38}'), - ('\u{10b56}', '\u{10b57}'), - ('\u{10b73}', '\u{10b77}'), - ('\u{10b92}', '\u{10b98}'), - ('\u{10b9d}', '\u{10ba8}'), - ('\u{10bb0}', '\u{10bff}'), - ('\u{10c49}', '\u{10c7f}'), - ('\u{10cb3}', '\u{10cbf}'), - ('\u{10cf3}', '\u{10cf9}'), - ('\u{10d28}', '\u{10d2f}'), - ('\u{10d3a}', '\u{10d3f}'), - ('\u{10d66}', '\u{10d68}'), - ('\u{10d86}', '\u{10d8d}'), - ('\u{10d90}', '\u{10e5f}'), - ('\u{10e7f}', '\u{10e7f}'), - ('\u{10eaa}', '\u{10eaa}'), - ('\u{10eae}', '\u{10eaf}'), - ('\u{10eb2}', '\u{10ec1}'), - ('\u{10ec5}', '\u{10efb}'), - ('\u{10f28}', '\u{10f2f}'), - ('\u{10f5a}', '\u{10f6f}'), - ('\u{10f8a}', '\u{10faf}'), - ('\u{10fcc}', '\u{10fdf}'), - ('\u{10ff7}', '\u{10fff}'), - ('\u{1104e}', '\u{11051}'), - ('\u{11076}', '\u{1107e}'), - ('\u{110bd}', '\u{110bd}'), - ('\u{110c3}', '\u{110cf}'), - ('\u{110e9}', '\u{110ef}'), - ('\u{110fa}', '\u{110ff}'), - ('\u{11135}', '\u{11135}'), - ('\u{11148}', '\u{1114f}'), - ('\u{11177}', '\u{1117f}'), - ('\u{111e0}', '\u{111e0}'), - ('\u{111f5}', '\u{111ff}'), - ('\u{11212}', '\u{11212}'), - ('\u{11242}', '\u{1127f}'), - ('\u{11287}', '\u{11287}'), - ('\u{11289}', '\u{11289}'), - ('\u{1128e}', '\u{1128e}'), - ('\u{1129e}', '\u{1129e}'), - ('\u{112aa}', '\u{112af}'), - ('\u{112eb}', '\u{112ef}'), - ('\u{112fa}', '\u{112ff}'), - ('\u{11304}', '\u{11304}'), - ('\u{1130d}', '\u{1130e}'), - ('\u{11311}', '\u{11312}'), - ('\u{11329}', '\u{11329}'), - ('\u{11331}', '\u{11331}'), - ('\u{11334}', '\u{11334}'), - ('\u{1133a}', '\u{1133a}'), - ('\u{11345}', '\u{11346}'), - ('\u{11349}', '\u{1134a}'), - ('\u{1134e}', '\u{1134f}'), - ('\u{11351}', '\u{11356}'), - ('\u{11358}', '\u{1135c}'), - ('\u{11364}', '\u{11365}'), - ('\u{1136d}', '\u{1136f}'), - ('\u{11375}', '\u{1137f}'), - ('\u{1138a}', '\u{1138a}'), - ('\u{1138c}', '\u{1138d}'), - ('\u{1138f}', '\u{1138f}'), - ('\u{113b6}', '\u{113b6}'), - ('\u{113c1}', '\u{113c1}'), - ('\u{113c3}', '\u{113c4}'), - ('\u{113c6}', '\u{113c6}'), - ('\u{113cb}', '\u{113cb}'), - ('\u{113d6}', '\u{113d6}'), - ('\u{113d9}', '\u{113e0}'), - ('\u{113e3}', '\u{113ff}'), - ('\u{1145c}', '\u{1145c}'), - ('\u{11462}', '\u{1147f}'), - ('\u{114c8}', '\u{114cf}'), - ('\u{114da}', '\u{1157f}'), - ('\u{115b6}', '\u{115b7}'), - ('\u{115de}', '\u{115ff}'), - ('\u{11645}', '\u{1164f}'), - ('\u{1165a}', '\u{1165f}'), - ('\u{1166d}', '\u{1167f}'), - ('\u{116ba}', '\u{116bf}'), - ('\u{116ca}', '\u{116cf}'), - ('\u{116e4}', '\u{116ff}'), - ('\u{1171b}', '\u{1171c}'), - ('\u{1172c}', '\u{1172f}'), - ('\u{11747}', '\u{117ff}'), - ('\u{1183c}', '\u{1189f}'), - ('\u{118f3}', '\u{118fe}'), - ('\u{11907}', '\u{11908}'), - ('\u{1190a}', '\u{1190b}'), - ('\u{11914}', '\u{11914}'), - ('\u{11917}', '\u{11917}'), - ('\u{11936}', '\u{11936}'), - ('\u{11939}', '\u{1193a}'), - ('\u{11947}', '\u{1194f}'), - ('\u{1195a}', '\u{1199f}'), - ('\u{119a8}', '\u{119a9}'), - ('\u{119d8}', '\u{119d9}'), - ('\u{119e5}', '\u{119ff}'), - ('\u{11a48}', '\u{11a4f}'), - ('\u{11aa3}', '\u{11aaf}'), - ('\u{11af9}', '\u{11aff}'), - ('\u{11b0a}', '\u{11bbf}'), - ('\u{11be2}', '\u{11bef}'), - ('\u{11bfa}', '\u{11bff}'), - ('\u{11c09}', '\u{11c09}'), - ('\u{11c37}', '\u{11c37}'), - ('\u{11c46}', '\u{11c4f}'), - ('\u{11c6d}', '\u{11c6f}'), - ('\u{11c90}', '\u{11c91}'), - ('\u{11ca8}', '\u{11ca8}'), - ('\u{11cb7}', '\u{11cff}'), - ('\u{11d07}', '\u{11d07}'), - ('\u{11d0a}', '\u{11d0a}'), - ('\u{11d37}', '\u{11d39}'), - ('\u{11d3b}', '\u{11d3b}'), - ('\u{11d3e}', '\u{11d3e}'), - ('\u{11d48}', '\u{11d4f}'), - ('\u{11d5a}', '\u{11d5f}'), - ('\u{11d66}', '\u{11d66}'), - ('\u{11d69}', '\u{11d69}'), - ('\u{11d8f}', '\u{11d8f}'), - ('\u{11d92}', '\u{11d92}'), - ('\u{11d99}', '\u{11d9f}'), - ('\u{11daa}', '\u{11edf}'), - ('\u{11ef9}', '\u{11eff}'), - ('\u{11f11}', '\u{11f11}'), - ('\u{11f3b}', '\u{11f3d}'), - ('\u{11f5b}', '\u{11faf}'), - ('\u{11fb1}', '\u{11fbf}'), - ('\u{11ff2}', '\u{11ffe}'), - ('\u{1239a}', '\u{123ff}'), - ('\u{1246f}', '\u{1246f}'), - ('\u{12475}', '\u{1247f}'), - ('\u{12544}', '\u{12f8f}'), - ('\u{12ff3}', '\u{12fff}'), - ('\u{13430}', '\u{1343f}'), - ('\u{13456}', '\u{1345f}'), - ('\u{143fb}', '\u{143ff}'), - ('\u{14647}', '\u{160ff}'), - ('\u{1613a}', '\u{167ff}'), - ('\u{16a39}', '\u{16a3f}'), - ('\u{16a5f}', '\u{16a5f}'), - ('\u{16a6a}', '\u{16a6d}'), - ('\u{16abf}', '\u{16abf}'), - ('\u{16aca}', '\u{16acf}'), - ('\u{16aee}', '\u{16aef}'), - ('\u{16af6}', '\u{16aff}'), - ('\u{16b46}', '\u{16b4f}'), - ('\u{16b5a}', '\u{16b5a}'), - ('\u{16b62}', '\u{16b62}'), - ('\u{16b78}', '\u{16b7c}'), - ('\u{16b90}', '\u{16d3f}'), - ('\u{16d7a}', '\u{16e3f}'), - ('\u{16e9b}', '\u{16eff}'), - ('\u{16f4b}', '\u{16f4e}'), - ('\u{16f88}', '\u{16f8e}'), - ('\u{16fa0}', '\u{16fdf}'), - ('\u{16fe5}', '\u{16fef}'), - ('\u{16ff2}', '\u{16fff}'), - ('\u{187f8}', '\u{187ff}'), - ('\u{18cd6}', '\u{18cfe}'), - ('\u{18d09}', '\u{1afef}'), - ('\u{1aff4}', '\u{1aff4}'), - ('\u{1affc}', '\u{1affc}'), - ('\u{1afff}', '\u{1afff}'), - ('\u{1b123}', '\u{1b131}'), - ('\u{1b133}', '\u{1b14f}'), - ('\u{1b153}', '\u{1b154}'), - ('\u{1b156}', '\u{1b163}'), - ('\u{1b168}', '\u{1b16f}'), - ('\u{1b2fc}', '\u{1bbff}'), - ('\u{1bc6b}', '\u{1bc6f}'), - ('\u{1bc7d}', '\u{1bc7f}'), - ('\u{1bc89}', '\u{1bc8f}'), - ('\u{1bc9a}', '\u{1bc9b}'), - ('\u{1bca0}', '\u{1cbff}'), - ('\u{1ccfa}', '\u{1ccff}'), - ('\u{1ceb4}', '\u{1ceff}'), - ('\u{1cf2e}', '\u{1cf2f}'), - ('\u{1cf47}', '\u{1cf4f}'), - ('\u{1cfc4}', '\u{1cfff}'), - ('\u{1d0f6}', '\u{1d0ff}'), - ('\u{1d127}', '\u{1d128}'), - ('\u{1d173}', '\u{1d17a}'), - ('\u{1d1eb}', '\u{1d1ff}'), - ('\u{1d246}', '\u{1d2bf}'), - ('\u{1d2d4}', '\u{1d2df}'), - ('\u{1d2f4}', '\u{1d2ff}'), - ('\u{1d357}', '\u{1d35f}'), - ('\u{1d379}', '\u{1d3ff}'), - ('\u{1d455}', '\u{1d455}'), - ('\u{1d49d}', '\u{1d49d}'), - ('\u{1d4a0}', '\u{1d4a1}'), - ('\u{1d4a3}', '\u{1d4a4}'), - ('\u{1d4a7}', '\u{1d4a8}'), - ('\u{1d4ad}', '\u{1d4ad}'), - ('\u{1d4ba}', '\u{1d4ba}'), - ('\u{1d4bc}', '\u{1d4bc}'), - ('\u{1d4c4}', '\u{1d4c4}'), - ('\u{1d506}', '\u{1d506}'), - ('\u{1d50b}', '\u{1d50c}'), - ('\u{1d515}', '\u{1d515}'), - ('\u{1d51d}', '\u{1d51d}'), - ('\u{1d53a}', '\u{1d53a}'), - ('\u{1d53f}', '\u{1d53f}'), - ('\u{1d545}', '\u{1d545}'), - ('\u{1d547}', '\u{1d549}'), - ('\u{1d551}', '\u{1d551}'), - ('\u{1d6a6}', '\u{1d6a7}'), - ('\u{1d7cc}', '\u{1d7cd}'), - ('\u{1da8c}', '\u{1da9a}'), - ('\u{1daa0}', '\u{1daa0}'), - ('\u{1dab0}', '\u{1deff}'), - ('\u{1df1f}', '\u{1df24}'), - ('\u{1df2b}', '\u{1dfff}'), - ('\u{1e007}', '\u{1e007}'), - ('\u{1e019}', '\u{1e01a}'), - ('\u{1e022}', '\u{1e022}'), - ('\u{1e025}', '\u{1e025}'), - ('\u{1e02b}', '\u{1e02f}'), - ('\u{1e06e}', '\u{1e08e}'), - ('\u{1e090}', '\u{1e0ff}'), - ('\u{1e12d}', '\u{1e12f}'), - ('\u{1e13e}', '\u{1e13f}'), - ('\u{1e14a}', '\u{1e14d}'), - ('\u{1e150}', '\u{1e28f}'), - ('\u{1e2af}', '\u{1e2bf}'), - ('\u{1e2fa}', '\u{1e2fe}'), - ('\u{1e300}', '\u{1e4cf}'), - ('\u{1e4fa}', '\u{1e5cf}'), - ('\u{1e5fb}', '\u{1e5fe}'), - ('\u{1e600}', '\u{1e7df}'), - ('\u{1e7e7}', '\u{1e7e7}'), - ('\u{1e7ec}', '\u{1e7ec}'), - ('\u{1e7ef}', '\u{1e7ef}'), - ('\u{1e7ff}', '\u{1e7ff}'), - ('\u{1e8c5}', '\u{1e8c6}'), - ('\u{1e8d7}', '\u{1e8ff}'), - ('\u{1e94c}', '\u{1e94f}'), - ('\u{1e95a}', '\u{1e95d}'), - ('\u{1e960}', '\u{1ec70}'), - ('\u{1ecb5}', '\u{1ed00}'), - ('\u{1ed3e}', '\u{1edff}'), - ('\u{1ee04}', '\u{1ee04}'), - ('\u{1ee20}', '\u{1ee20}'), - ('\u{1ee23}', '\u{1ee23}'), - ('\u{1ee25}', '\u{1ee26}'), - ('\u{1ee28}', '\u{1ee28}'), - ('\u{1ee33}', '\u{1ee33}'), - ('\u{1ee38}', '\u{1ee38}'), - ('\u{1ee3a}', '\u{1ee3a}'), - ('\u{1ee3c}', '\u{1ee41}'), - ('\u{1ee43}', '\u{1ee46}'), - ('\u{1ee48}', '\u{1ee48}'), - ('\u{1ee4a}', '\u{1ee4a}'), - ('\u{1ee4c}', '\u{1ee4c}'), - ('\u{1ee50}', '\u{1ee50}'), - ('\u{1ee53}', '\u{1ee53}'), - ('\u{1ee55}', '\u{1ee56}'), - ('\u{1ee58}', '\u{1ee58}'), - ('\u{1ee5a}', '\u{1ee5a}'), - ('\u{1ee5c}', '\u{1ee5c}'), - ('\u{1ee5e}', '\u{1ee5e}'), - ('\u{1ee60}', '\u{1ee60}'), - ('\u{1ee63}', '\u{1ee63}'), - ('\u{1ee65}', '\u{1ee66}'), - ('\u{1ee6b}', '\u{1ee6b}'), - ('\u{1ee73}', '\u{1ee73}'), - ('\u{1ee78}', '\u{1ee78}'), - ('\u{1ee7d}', '\u{1ee7d}'), - ('\u{1ee7f}', '\u{1ee7f}'), - ('\u{1ee8a}', '\u{1ee8a}'), - ('\u{1ee9c}', '\u{1eea0}'), - ('\u{1eea4}', '\u{1eea4}'), - ('\u{1eeaa}', '\u{1eeaa}'), - ('\u{1eebc}', '\u{1eeef}'), - ('\u{1eef2}', '\u{1efff}'), - ('\u{1f02c}', '\u{1f02f}'), - ('\u{1f094}', '\u{1f09f}'), - ('\u{1f0af}', '\u{1f0b0}'), - ('\u{1f0c0}', '\u{1f0c0}'), - ('\u{1f0d0}', '\u{1f0d0}'), - ('\u{1f0f6}', '\u{1f0ff}'), - ('\u{1f1ae}', '\u{1f1e5}'), - ('\u{1f203}', '\u{1f20f}'), - ('\u{1f23c}', '\u{1f23f}'), - ('\u{1f249}', '\u{1f24f}'), - ('\u{1f252}', '\u{1f25f}'), - ('\u{1f266}', '\u{1f2ff}'), - ('\u{1f6d8}', '\u{1f6db}'), - ('\u{1f6ed}', '\u{1f6ef}'), - ('\u{1f6fd}', '\u{1f6ff}'), - ('\u{1f777}', '\u{1f77a}'), - ('\u{1f7da}', '\u{1f7df}'), - ('\u{1f7ec}', '\u{1f7ef}'), - ('\u{1f7f1}', '\u{1f7ff}'), - ('\u{1f80c}', '\u{1f80f}'), - ('\u{1f848}', '\u{1f84f}'), - ('\u{1f85a}', '\u{1f85f}'), - ('\u{1f888}', '\u{1f88f}'), - ('\u{1f8ae}', '\u{1f8af}'), - ('\u{1f8bc}', '\u{1f8bf}'), - ('\u{1f8c2}', '\u{1f8ff}'), - ('\u{1fa54}', '\u{1fa5f}'), - ('\u{1fa6e}', '\u{1fa6f}'), - ('\u{1fa7d}', '\u{1fa7f}'), - ('\u{1fa8a}', '\u{1fa8e}'), - ('\u{1fac7}', '\u{1facd}'), - ('\u{1fadd}', '\u{1fade}'), - ('\u{1faea}', '\u{1faef}'), - ('\u{1faf9}', '\u{1faff}'), - ('\u{1fb93}', '\u{1fb93}'), - ('\u{1fbfa}', '\u{1ffff}'), - ('\u{2a6e0}', '\u{2a6ff}'), - ('\u{2b73a}', '\u{2b73f}'), - ('\u{2b81e}', '\u{2b81f}'), - ('\u{2cea2}', '\u{2ceaf}'), - ('\u{2ebe1}', '\u{2ebef}'), - ('\u{2ee5e}', '\u{2f7ff}'), - ('\u{2fa1e}', '\u{2ffff}'), - ('\u{3134b}', '\u{3134f}'), - ('\u{323b0}', '\u{e00ff}'), - ('\u{e01f0}', '\u{10ffff}'), -]; - -pub const OTHER_LETTER: &'static [(char, char)] = &[ - ('ª', 'ª'), - ('º', 'º'), - ('ƻ', 'ƻ'), - ('ǀ', 'ǃ'), - ('ʔ', 'ʔ'), - ('א', 'ת'), - ('ׯ', 'ײ'), - ('ؠ', 'ؿ'), - ('ف', 'ي'), - ('ٮ', 'ٯ'), - ('ٱ', 'ۓ'), - ('ە', 'ە'), - ('ۮ', 'ۯ'), - ('ۺ', 'ۼ'), - ('ۿ', 'ۿ'), - ('ܐ', 'ܐ'), - ('ܒ', 'ܯ'), - ('ݍ', 'ޥ'), - ('ޱ', 'ޱ'), - ('ߊ', 'ߪ'), - ('ࠀ', 'ࠕ'), - ('ࡀ', 'ࡘ'), - ('ࡠ', 'ࡪ'), - ('ࡰ', 'ࢇ'), - ('ࢉ', 'ࢎ'), - ('ࢠ', 'ࣈ'), - ('ऄ', 'ह'), - ('ऽ', 'ऽ'), - ('ॐ', 'ॐ'), - ('क़', 'ॡ'), - ('ॲ', 'ঀ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('ঽ', 'ঽ'), - ('ৎ', 'ৎ'), - ('ড়', 'ঢ়'), - ('য়', 'ৡ'), - ('ৰ', 'ৱ'), - ('ৼ', 'ৼ'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('ੲ', 'ੴ'), - ('અ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('ઽ', 'ઽ'), - ('ૐ', 'ૐ'), - ('ૠ', 'ૡ'), - ('ૹ', 'ૹ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('ଽ', 'ଽ'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', 'ୡ'), - ('ୱ', 'ୱ'), - ('ஃ', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'ஹ'), - ('ௐ', 'ௐ'), - ('అ', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'హ'), - ('ఽ', 'ఽ'), - ('ౘ', 'ౚ'), - ('ౝ', 'ౝ'), - ('ౠ', 'ౡ'), - ('ಀ', 'ಀ'), - ('ಅ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('ಽ', 'ಽ'), - ('ೝ', 'ೞ'), - ('ೠ', 'ೡ'), - ('ೱ', 'ೲ'), - ('ഄ', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', 'ഺ'), - ('ഽ', 'ഽ'), - ('ൎ', 'ൎ'), - ('ൔ', 'ൖ'), - ('ൟ', 'ൡ'), - ('ൺ', 'ൿ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('ก', 'ะ'), - ('า', 'ำ'), - ('เ', 'ๅ'), - ('ກ', 'ຂ'), - ('ຄ', 'ຄ'), - ('ຆ', 'ຊ'), - ('ຌ', 'ຣ'), - ('ລ', 'ລ'), - ('ວ', 'ະ'), - ('າ', 'ຳ'), - ('ຽ', 'ຽ'), - ('ເ', 'ໄ'), - ('ໜ', 'ໟ'), - ('ༀ', 'ༀ'), - ('ཀ', 'ཇ'), - ('ཉ', 'ཬ'), - ('ྈ', 'ྌ'), - ('က', 'ဪ'), - ('ဿ', 'ဿ'), - ('ၐ', 'ၕ'), - ('ၚ', 'ၝ'), - ('ၡ', 'ၡ'), - ('ၥ', 'ၦ'), - ('ၮ', 'ၰ'), - ('ၵ', 'ႁ'), - ('ႎ', 'ႎ'), - ('ᄀ', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዖ'), - ('ዘ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ፚ'), - ('ᎀ', 'ᎏ'), - ('ᐁ', 'ᙬ'), - ('ᙯ', 'ᙿ'), - ('ᚁ', 'ᚚ'), - ('ᚠ', 'ᛪ'), - ('ᛱ', 'ᛸ'), - ('ᜀ', 'ᜑ'), - ('ᜟ', 'ᜱ'), - ('ᝀ', 'ᝑ'), - ('ᝠ', 'ᝬ'), - ('ᝮ', 'ᝰ'), - ('ក', 'ឳ'), - ('ៜ', 'ៜ'), - ('ᠠ', 'ᡂ'), - ('ᡄ', 'ᡸ'), - ('ᢀ', 'ᢄ'), - ('ᢇ', 'ᢨ'), - ('ᢪ', 'ᢪ'), - ('ᢰ', 'ᣵ'), - ('ᤀ', 'ᤞ'), - ('ᥐ', 'ᥭ'), - ('ᥰ', 'ᥴ'), - ('ᦀ', 'ᦫ'), - ('ᦰ', 'ᧉ'), - ('ᨀ', 'ᨖ'), - ('ᨠ', 'ᩔ'), - ('ᬅ', 'ᬳ'), - ('ᭅ', 'ᭌ'), - ('ᮃ', 'ᮠ'), - ('ᮮ', 'ᮯ'), - ('ᮺ', 'ᯥ'), - ('ᰀ', 'ᰣ'), - ('ᱍ', 'ᱏ'), - ('ᱚ', 'ᱷ'), - ('ᳩ', 'ᳬ'), - ('ᳮ', 'ᳳ'), - ('ᳵ', 'ᳶ'), - ('ᳺ', 'ᳺ'), - ('ℵ', 'ℸ'), - ('ⴰ', 'ⵧ'), - ('ⶀ', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('〆', '〆'), - ('〼', '〼'), - ('ぁ', 'ゖ'), - ('ゟ', 'ゟ'), - ('ァ', 'ヺ'), - ('ヿ', 'ヿ'), - ('ㄅ', 'ㄯ'), - ('ㄱ', 'ㆎ'), - ('ㆠ', 'ㆿ'), - ('ㇰ', 'ㇿ'), - ('㐀', '䶿'), - ('一', 'ꀔ'), - ('ꀖ', 'ꒌ'), - ('ꓐ', 'ꓷ'), - ('ꔀ', 'ꘋ'), - ('ꘐ', 'ꘟ'), - ('ꘪ', 'ꘫ'), - ('ꙮ', 'ꙮ'), - ('ꚠ', 'ꛥ'), - ('ꞏ', 'ꞏ'), - ('ꟷ', 'ꟷ'), - ('ꟻ', 'ꠁ'), - ('ꠃ', 'ꠅ'), - ('ꠇ', 'ꠊ'), - ('ꠌ', 'ꠢ'), - ('ꡀ', 'ꡳ'), - ('ꢂ', 'ꢳ'), - ('ꣲ', 'ꣷ'), - ('ꣻ', 'ꣻ'), - ('ꣽ', 'ꣾ'), - ('ꤊ', 'ꤥ'), - ('ꤰ', 'ꥆ'), - ('ꥠ', 'ꥼ'), - ('ꦄ', 'ꦲ'), - ('ꧠ', 'ꧤ'), - ('ꧧ', 'ꧯ'), - ('ꧺ', 'ꧾ'), - ('ꨀ', 'ꨨ'), - ('ꩀ', 'ꩂ'), - ('ꩄ', 'ꩋ'), - ('ꩠ', 'ꩯ'), - ('ꩱ', 'ꩶ'), - ('ꩺ', 'ꩺ'), - ('ꩾ', 'ꪯ'), - ('ꪱ', 'ꪱ'), - ('ꪵ', 'ꪶ'), - ('ꪹ', 'ꪽ'), - ('ꫀ', 'ꫀ'), - ('ꫂ', 'ꫂ'), - ('ꫛ', 'ꫜ'), - ('ꫠ', 'ꫪ'), - ('ꫲ', 'ꫲ'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('ꯀ', 'ꯢ'), - ('가', '힣'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('豈', '舘'), - ('並', '龎'), - ('יִ', 'יִ'), - ('ײַ', 'ﬨ'), - ('שׁ', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', 'ﮱ'), - ('ﯓ', 'ﴽ'), - ('ﵐ', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('ﷰ', 'ﷻ'), - ('ﹰ', 'ﹴ'), - ('ﹶ', 'ﻼ'), - ('ヲ', 'ッ'), - ('ア', 'ン'), - ('ᅠ', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), - ('𐊀', '𐊜'), - ('𐊠', '𐋐'), - ('𐌀', '𐌟'), - ('𐌭', '𐍀'), - ('𐍂', '𐍉'), - ('𐍐', '𐍵'), - ('𐎀', '𐎝'), - ('𐎠', '𐏃'), - ('𐏈', '𐏏'), - ('𐑐', '𐒝'), - ('𐔀', '𐔧'), - ('𐔰', '𐕣'), - ('𐗀', '𐗳'), - ('𐘀', '𐜶'), - ('𐝀', '𐝕'), - ('𐝠', '𐝧'), - ('𐠀', '𐠅'), - ('𐠈', '𐠈'), - ('𐠊', '𐠵'), - ('𐠷', '𐠸'), - ('𐠼', '𐠼'), - ('𐠿', '𐡕'), - ('𐡠', '𐡶'), - ('𐢀', '𐢞'), - ('𐣠', '𐣲'), - ('𐣴', '𐣵'), - ('𐤀', '𐤕'), - ('𐤠', '𐤹'), - ('𐦀', '𐦷'), - ('𐦾', '𐦿'), - ('𐨀', '𐨀'), - ('𐨐', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨵'), - ('𐩠', '𐩼'), - ('𐪀', '𐪜'), - ('𐫀', '𐫇'), - ('𐫉', '𐫤'), - ('𐬀', '𐬵'), - ('𐭀', '𐭕'), - ('𐭠', '𐭲'), - ('𐮀', '𐮑'), - ('𐰀', '𐱈'), - ('𐴀', '𐴣'), - ('𐵊', '𐵍'), - ('𐵏', '𐵏'), - ('𐺀', '𐺩'), - ('𐺰', '𐺱'), - ('𐻂', '𐻄'), - ('𐼀', '𐼜'), - ('𐼧', '𐼧'), - ('𐼰', '𐽅'), - ('𐽰', '𐾁'), - ('𐾰', '𐿄'), - ('𐿠', '𐿶'), - ('𑀃', '𑀷'), - ('𑁱', '𑁲'), - ('𑁵', '𑁵'), - ('𑂃', '𑂯'), - ('𑃐', '𑃨'), - ('𑄃', '𑄦'), - ('𑅄', '𑅄'), - ('𑅇', '𑅇'), - ('𑅐', '𑅲'), - ('𑅶', '𑅶'), - ('𑆃', '𑆲'), - ('𑇁', '𑇄'), - ('𑇚', '𑇚'), - ('𑇜', '𑇜'), - ('𑈀', '𑈑'), - ('𑈓', '𑈫'), - ('𑈿', '𑉀'), - ('𑊀', '𑊆'), - ('𑊈', '𑊈'), - ('𑊊', '𑊍'), - ('𑊏', '𑊝'), - ('𑊟', '𑊨'), - ('𑊰', '𑋞'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('𑌽', '𑌽'), - ('𑍐', '𑍐'), - ('𑍝', '𑍡'), - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '𑎷'), - ('𑏑', '𑏑'), - ('𑏓', '𑏓'), - ('𑐀', '𑐴'), - ('𑑇', '𑑊'), - ('𑑟', '𑑡'), - ('𑒀', '𑒯'), - ('𑓄', '𑓅'), - ('𑓇', '𑓇'), - ('𑖀', '𑖮'), - ('𑗘', '𑗛'), - ('𑘀', '𑘯'), - ('𑙄', '𑙄'), - ('𑚀', '𑚪'), - ('𑚸', '𑚸'), - ('𑜀', '𑜚'), - ('𑝀', '𑝆'), - ('𑠀', '𑠫'), - ('𑣿', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤯'), - ('𑤿', '𑤿'), - ('𑥁', '𑥁'), - ('𑦠', '𑦧'), - ('𑦪', '𑧐'), - ('𑧡', '𑧡'), - ('𑧣', '𑧣'), - ('𑨀', '𑨀'), - ('𑨋', '𑨲'), - ('𑨺', '𑨺'), - ('𑩐', '𑩐'), - ('𑩜', '𑪉'), - ('𑪝', '𑪝'), - ('𑪰', '𑫸'), - ('𑯀', '𑯠'), - ('𑰀', '𑰈'), - ('𑰊', '𑰮'), - ('𑱀', '𑱀'), - ('𑱲', '𑲏'), - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '𑴰'), - ('𑵆', '𑵆'), - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶉'), - ('𑶘', '𑶘'), - ('𑻠', '𑻲'), - ('𑼂', '𑼂'), - ('𑼄', '𑼐'), - ('𑼒', '𑼳'), - ('𑾰', '𑾰'), - ('𒀀', '𒎙'), - ('𒒀', '𒕃'), - ('𒾐', '𒿰'), - ('𓀀', '𓐯'), - ('𓑁', '𓑆'), - ('𓑠', '𔏺'), - ('𔐀', '𔙆'), - ('𖄀', '𖄝'), - ('𖠀', '𖨸'), - ('𖩀', '𖩞'), - ('𖩰', '𖪾'), - ('𖫐', '𖫭'), - ('𖬀', '𖬯'), - ('𖭣', '𖭷'), - ('𖭽', '𖮏'), - ('𖵃', '𖵪'), - ('𖼀', '𖽊'), - ('𖽐', '𖽐'), - ('𗀀', '𘟷'), - ('𘠀', '𘳕'), - ('𘳿', '𘴈'), - ('𛀀', '𛄢'), - ('𛄲', '𛄲'), - ('𛅐', '𛅒'), - ('𛅕', '𛅕'), - ('𛅤', '𛅧'), - ('𛅰', '𛋻'), - ('𛰀', '𛱪'), - ('𛱰', '𛱼'), - ('𛲀', '𛲈'), - ('𛲐', '𛲙'), - ('𝼊', '𝼊'), - ('𞄀', '𞄬'), - ('𞅎', '𞅎'), - ('𞊐', '𞊭'), - ('𞋀', '𞋫'), - ('𞓐', '𞓪'), - ('𞗐', '𞗭'), - ('𞗰', '𞗰'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), - ('𞠀', '𞣄'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('丽', '𪘀'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), -]; - -pub const OTHER_NUMBER: &'static [(char, char)] = &[ - ('²', '³'), - ('¹', '¹'), - ('¼', '¾'), - ('৴', '৹'), - ('୲', '୷'), - ('௰', '௲'), - ('౸', '౾'), - ('൘', '൞'), - ('൰', '൸'), - ('༪', '༳'), - ('፩', '፼'), - ('៰', '៹'), - ('᧚', '᧚'), - ('⁰', '⁰'), - ('⁴', '⁹'), - ('₀', '₉'), - ('⅐', '⅟'), - ('↉', '↉'), - ('①', '⒛'), - ('⓪', '⓿'), - ('❶', '➓'), - ('⳽', '⳽'), - ('㆒', '㆕'), - ('㈠', '㈩'), - ('㉈', '㉏'), - ('㉑', '㉟'), - ('㊀', '㊉'), - ('㊱', '㊿'), - ('꠰', '꠵'), - ('𐄇', '𐄳'), - ('𐅵', '𐅸'), - ('𐆊', '𐆋'), - ('𐋡', '𐋻'), - ('𐌠', '𐌣'), - ('𐡘', '𐡟'), - ('𐡹', '𐡿'), - ('𐢧', '𐢯'), - ('𐣻', '𐣿'), - ('𐤖', '𐤛'), - ('𐦼', '𐦽'), - ('𐧀', '𐧏'), - ('𐧒', '𐧿'), - ('𐩀', '𐩈'), - ('𐩽', '𐩾'), - ('𐪝', '𐪟'), - ('𐫫', '𐫯'), - ('𐭘', '𐭟'), - ('𐭸', '𐭿'), - ('𐮩', '𐮯'), - ('𐳺', '𐳿'), - ('𐹠', '𐹾'), - ('𐼝', '𐼦'), - ('𐽑', '𐽔'), - ('𐿅', '𐿋'), - ('𑁒', '𑁥'), - ('𑇡', '𑇴'), - ('𑜺', '𑜻'), - ('𑣪', '𑣲'), - ('𑱚', '𑱬'), - ('𑿀', '𑿔'), - ('𖭛', '𖭡'), - ('𖺀', '𖺖'), - ('𝋀', '𝋓'), - ('𝋠', '𝋳'), - ('𝍠', '𝍸'), - ('𞣇', '𞣏'), - ('𞱱', '𞲫'), - ('𞲭', '𞲯'), - ('𞲱', '𞲴'), - ('𞴁', '𞴭'), - ('𞴯', '𞴽'), - ('🄀', '🄌'), -]; - -pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ - ('!', '#'), - ('%', '\''), - ('*', '*'), - (',', ','), - ('.', '/'), - (':', ';'), - ('?', '@'), - ('\\', '\\'), - ('¡', '¡'), - ('§', '§'), - ('¶', '·'), - ('¿', '¿'), - (';', ';'), - ('·', '·'), - ('՚', '՟'), - ('։', '։'), - ('׀', '׀'), - ('׃', '׃'), - ('׆', '׆'), - ('׳', '״'), - ('؉', '؊'), - ('،', '؍'), - ('؛', '؛'), - ('؝', '؟'), - ('٪', '٭'), - ('۔', '۔'), - ('܀', '܍'), - ('߷', '߹'), - ('࠰', '࠾'), - ('࡞', '࡞'), - ('।', '॥'), - ('॰', '॰'), - ('৽', '৽'), - ('੶', '੶'), - ('૰', '૰'), - ('౷', '౷'), - ('಄', '಄'), - ('෴', '෴'), - ('๏', '๏'), - ('๚', '๛'), - ('༄', '༒'), - ('༔', '༔'), - ('྅', '྅'), - ('࿐', '࿔'), - ('࿙', '࿚'), - ('၊', '၏'), - ('჻', '჻'), - ('፠', '፨'), - ('᙮', '᙮'), - ('᛫', '᛭'), - ('᜵', '᜶'), - ('។', '៖'), - ('៘', '៚'), - ('᠀', '᠅'), - ('᠇', '᠊'), - ('᥄', '᥅'), - ('᨞', '᨟'), - ('᪠', '᪦'), - ('᪨', '᪭'), - ('᭎', '᭏'), - ('᭚', '᭠'), - ('᭽', '᭿'), - ('᯼', '᯿'), - ('᰻', '᰿'), - ('᱾', '᱿'), - ('᳀', '᳇'), - ('᳓', '᳓'), - ('‖', '‗'), - ('†', '‧'), - ('‰', '‸'), - ('※', '‾'), - ('⁁', '⁃'), - ('⁇', '⁑'), - ('⁓', '⁓'), - ('⁕', '⁞'), - ('⳹', '⳼'), - ('⳾', '⳿'), - ('⵰', '⵰'), - ('⸀', '⸁'), - ('⸆', '⸈'), - ('⸋', '⸋'), - ('⸎', '⸖'), - ('⸘', '⸙'), - ('⸛', '⸛'), - ('⸞', '⸟'), - ('⸪', '⸮'), - ('⸰', '⸹'), - ('⸼', '⸿'), - ('⹁', '⹁'), - ('⹃', '⹏'), - ('⹒', '⹔'), - ('、', '〃'), - ('〽', '〽'), - ('・', '・'), - ('꓾', '꓿'), - ('꘍', '꘏'), - ('꙳', '꙳'), - ('꙾', '꙾'), - ('꛲', '꛷'), - ('꡴', '꡷'), - ('꣎', '꣏'), - ('꣸', '꣺'), - ('꣼', '꣼'), - ('꤮', '꤯'), - ('꥟', '꥟'), - ('꧁', '꧍'), - ('꧞', '꧟'), - ('꩜', '꩟'), - ('꫞', '꫟'), - ('꫰', '꫱'), - ('꯫', '꯫'), - ('︐', '︖'), - ('︙', '︙'), - ('︰', '︰'), - ('﹅', '﹆'), - ('﹉', '﹌'), - ('﹐', '﹒'), - ('﹔', '﹗'), - ('﹟', '﹡'), - ('﹨', '﹨'), - ('﹪', '﹫'), - ('!', '#'), - ('%', '''), - ('*', '*'), - (',', ','), - ('.', '/'), - (':', ';'), - ('?', '@'), - ('\', '\'), - ('。', '。'), - ('、', '・'), - ('𐄀', '𐄂'), - ('𐎟', '𐎟'), - ('𐏐', '𐏐'), - ('𐕯', '𐕯'), - ('𐡗', '𐡗'), - ('𐤟', '𐤟'), - ('𐤿', '𐤿'), - ('𐩐', '𐩘'), - ('𐩿', '𐩿'), - ('𐫰', '𐫶'), - ('𐬹', '𐬿'), - ('𐮙', '𐮜'), - ('𐽕', '𐽙'), - ('𐾆', '𐾉'), - ('𑁇', '𑁍'), - ('𑂻', '𑂼'), - ('𑂾', '𑃁'), - ('𑅀', '𑅃'), - ('𑅴', '𑅵'), - ('𑇅', '𑇈'), - ('𑇍', '𑇍'), - ('𑇛', '𑇛'), - ('𑇝', '𑇟'), - ('𑈸', '𑈽'), - ('𑊩', '𑊩'), - ('𑏔', '𑏕'), - ('𑏗', '𑏘'), - ('𑑋', '𑑏'), - ('𑑚', '𑑛'), - ('𑑝', '𑑝'), - ('𑓆', '𑓆'), - ('𑗁', '𑗗'), - ('𑙁', '𑙃'), - ('𑙠', '𑙬'), - ('𑚹', '𑚹'), - ('𑜼', '𑜾'), - ('𑠻', '𑠻'), - ('𑥄', '𑥆'), - ('𑧢', '𑧢'), - ('𑨿', '𑩆'), - ('𑪚', '𑪜'), - ('𑪞', '𑪢'), - ('𑬀', '𑬉'), - ('𑯡', '𑯡'), - ('𑱁', '𑱅'), - ('𑱰', '𑱱'), - ('𑻷', '𑻸'), - ('𑽃', '𑽏'), - ('𑿿', '𑿿'), - ('𒑰', '𒑴'), - ('𒿱', '𒿲'), - ('𖩮', '𖩯'), - ('𖫵', '𖫵'), - ('𖬷', '𖬻'), - ('𖭄', '𖭄'), - ('𖵭', '𖵯'), - ('𖺗', '𖺚'), - ('𖿢', '𖿢'), - ('𛲟', '𛲟'), - ('𝪇', '𝪋'), - ('𞗿', '𞗿'), - ('𞥞', '𞥟'), -]; - -pub const OTHER_SYMBOL: &'static [(char, char)] = &[ - ('¦', '¦'), - ('©', '©'), - ('®', '®'), - ('°', '°'), - ('҂', '҂'), - ('֍', '֎'), - ('؎', '؏'), - ('۞', '۞'), - ('۩', '۩'), - ('۽', '۾'), - ('߶', '߶'), - ('৺', '৺'), - ('୰', '୰'), - ('௳', '௸'), - ('௺', '௺'), - ('౿', '౿'), - ('൏', '൏'), - ('൹', '൹'), - ('༁', '༃'), - ('༓', '༓'), - ('༕', '༗'), - ('༚', '༟'), - ('༴', '༴'), - ('༶', '༶'), - ('༸', '༸'), - ('྾', '࿅'), - ('࿇', '࿌'), - ('࿎', '࿏'), - ('࿕', '࿘'), - ('႞', '႟'), - ('᎐', '᎙'), - ('᙭', '᙭'), - ('᥀', '᥀'), - ('᧞', '᧿'), - ('᭡', '᭪'), - ('᭴', '᭼'), - ('℀', '℁'), - ('℃', '℆'), - ('℈', '℉'), - ('℔', '℔'), - ('№', '℗'), - ('℞', '℣'), - ('℥', '℥'), - ('℧', '℧'), - ('℩', '℩'), - ('℮', '℮'), - ('℺', '℻'), - ('⅊', '⅊'), - ('⅌', '⅍'), - ('⅏', '⅏'), - ('↊', '↋'), - ('↕', '↙'), - ('↜', '↟'), - ('↡', '↢'), - ('↤', '↥'), - ('↧', '↭'), - ('↯', '⇍'), - ('⇐', '⇑'), - ('⇓', '⇓'), - ('⇕', '⇳'), - ('⌀', '⌇'), - ('⌌', '⌟'), - ('⌢', '⌨'), - ('⌫', '⍻'), - ('⍽', '⎚'), - ('⎴', '⏛'), - ('⏢', '␩'), - ('⑀', '⑊'), - ('⒜', 'ⓩ'), - ('─', '▶'), - ('▸', '◀'), - ('◂', '◷'), - ('☀', '♮'), - ('♰', '❧'), - ('➔', '➿'), - ('⠀', '⣿'), - ('⬀', '⬯'), - ('⭅', '⭆'), - ('⭍', '⭳'), - ('⭶', '⮕'), - ('⮗', '⯿'), - ('⳥', '⳪'), - ('⹐', '⹑'), - ('⺀', '⺙'), - ('⺛', '⻳'), - ('⼀', '⿕'), - ('⿰', '⿿'), - ('〄', '〄'), - ('〒', '〓'), - ('〠', '〠'), - ('〶', '〷'), - ('〾', '〿'), - ('㆐', '㆑'), - ('㆖', '㆟'), - ('㇀', '㇥'), - ('㇯', '㇯'), - ('㈀', '㈞'), - ('㈪', '㉇'), - ('㉐', '㉐'), - ('㉠', '㉿'), - ('㊊', '㊰'), - ('㋀', '㏿'), - ('䷀', '䷿'), - ('꒐', '꓆'), - ('꠨', '꠫'), - ('꠶', '꠷'), - ('꠹', '꠹'), - ('꩷', '꩹'), - ('﵀', '﵏'), - ('﷏', '﷏'), - ('﷽', '﷿'), - ('¦', '¦'), - ('│', '│'), - ('■', '○'), - ('', '�'), - ('𐄷', '𐄿'), - ('𐅹', '𐆉'), - ('𐆌', '𐆎'), - ('𐆐', '𐆜'), - ('𐆠', '𐆠'), - ('𐇐', '𐇼'), - ('𐡷', '𐡸'), - ('𐫈', '𐫈'), - ('𑜿', '𑜿'), - ('𑿕', '𑿜'), - ('𑿡', '𑿱'), - ('𖬼', '𖬿'), - ('𖭅', '𖭅'), - ('𛲜', '𛲜'), - ('𜰀', '𜳯'), - ('𜴀', '𜺳'), - ('𜽐', '𜿃'), - ('𝀀', '𝃵'), - ('𝄀', '𝄦'), - ('𝄩', '𝅘𝅥𝅲'), - ('𝅪', '𝅬'), - ('𝆃', '𝆄'), - ('𝆌', '𝆩'), - ('𝆮', '𝇪'), - ('𝈀', '𝉁'), - ('𝉅', '𝉅'), - ('𝌀', '𝍖'), - ('𝠀', '𝧿'), - ('𝨷', '𝨺'), - ('𝩭', '𝩴'), - ('𝩶', '𝪃'), - ('𝪅', '𝪆'), - ('𞅏', '𞅏'), - ('𞲬', '𞲬'), - ('𞴮', '𞴮'), - ('🀀', '🀫'), - ('🀰', '🂓'), - ('🂠', '🂮'), - ('🂱', '🂿'), - ('🃁', '🃏'), - ('🃑', '🃵'), - ('🄍', '🆭'), - ('🇦', '🈂'), - ('🈐', '🈻'), - ('🉀', '🉈'), - ('🉐', '🉑'), - ('🉠', '🉥'), - ('🌀', '🏺'), - ('🐀', '🛗'), - ('🛜', '🛬'), - ('🛰', '🛼'), - ('🜀', '🝶'), - ('🝻', '🟙'), - ('🟠', '🟫'), - ('🟰', '🟰'), - ('🠀', '🠋'), - ('🠐', '🡇'), - ('🡐', '🡙'), - ('🡠', '🢇'), - ('🢐', '🢭'), - ('🢰', '🢻'), - ('🣀', '🣁'), - ('🤀', '🩓'), - ('🩠', '🩭'), - ('🩰', '🩼'), - ('🪀', '🪉'), - ('🪏', '🫆'), - ('🫎', '🫜'), - ('🫟', '🫩'), - ('🫰', '🫸'), - ('🬀', '🮒'), - ('🮔', '🯯'), -]; - -pub const PARAGRAPH_SEPARATOR: &'static [(char, char)] = - &[('\u{2029}', '\u{2029}')]; - -pub const PRIVATE_USE: &'static [(char, char)] = &[ - ('\u{e000}', '\u{f8ff}'), - ('\u{f0000}', '\u{ffffd}'), - ('\u{100000}', '\u{10fffd}'), -]; - -pub const PUNCTUATION: &'static [(char, char)] = &[ - ('!', '#'), - ('%', '*'), - (',', '/'), - (':', ';'), - ('?', '@'), - ('[', ']'), - ('_', '_'), - ('{', '{'), - ('}', '}'), - ('¡', '¡'), - ('§', '§'), - ('«', '«'), - ('¶', '·'), - ('»', '»'), - ('¿', '¿'), - (';', ';'), - ('·', '·'), - ('՚', '՟'), - ('։', '֊'), - ('־', '־'), - ('׀', '׀'), - ('׃', '׃'), - ('׆', '׆'), - ('׳', '״'), - ('؉', '؊'), - ('،', '؍'), - ('؛', '؛'), - ('؝', '؟'), - ('٪', '٭'), - ('۔', '۔'), - ('܀', '܍'), - ('߷', '߹'), - ('࠰', '࠾'), - ('࡞', '࡞'), - ('।', '॥'), - ('॰', '॰'), - ('৽', '৽'), - ('੶', '੶'), - ('૰', '૰'), - ('౷', '౷'), - ('಄', '಄'), - ('෴', '෴'), - ('๏', '๏'), - ('๚', '๛'), - ('༄', '༒'), - ('༔', '༔'), - ('༺', '༽'), - ('྅', '྅'), - ('࿐', '࿔'), - ('࿙', '࿚'), - ('၊', '၏'), - ('჻', '჻'), - ('፠', '፨'), - ('᐀', '᐀'), - ('᙮', '᙮'), - ('᚛', '᚜'), - ('᛫', '᛭'), - ('᜵', '᜶'), - ('។', '៖'), - ('៘', '៚'), - ('᠀', '᠊'), - ('᥄', '᥅'), - ('᨞', '᨟'), - ('᪠', '᪦'), - ('᪨', '᪭'), - ('᭎', '᭏'), - ('᭚', '᭠'), - ('᭽', '᭿'), - ('᯼', '᯿'), - ('᰻', '᰿'), - ('᱾', '᱿'), - ('᳀', '᳇'), - ('᳓', '᳓'), - ('‐', '‧'), - ('‰', '⁃'), - ('⁅', '⁑'), - ('⁓', '⁞'), - ('⁽', '⁾'), - ('₍', '₎'), - ('⌈', '⌋'), - ('〈', '〉'), - ('❨', '❵'), - ('⟅', '⟆'), - ('⟦', '⟯'), - ('⦃', '⦘'), - ('⧘', '⧛'), - ('⧼', '⧽'), - ('⳹', '⳼'), - ('⳾', '⳿'), - ('⵰', '⵰'), - ('⸀', '⸮'), - ('⸰', '⹏'), - ('⹒', '⹝'), - ('、', '〃'), - ('〈', '】'), - ('〔', '〟'), - ('〰', '〰'), - ('〽', '〽'), - ('゠', '゠'), - ('・', '・'), - ('꓾', '꓿'), - ('꘍', '꘏'), - ('꙳', '꙳'), - ('꙾', '꙾'), - ('꛲', '꛷'), - ('꡴', '꡷'), - ('꣎', '꣏'), - ('꣸', '꣺'), - ('꣼', '꣼'), - ('꤮', '꤯'), - ('꥟', '꥟'), - ('꧁', '꧍'), - ('꧞', '꧟'), - ('꩜', '꩟'), - ('꫞', '꫟'), - ('꫰', '꫱'), - ('꯫', '꯫'), - ('﴾', '﴿'), - ('︐', '︙'), - ('︰', '﹒'), - ('﹔', '﹡'), - ('﹣', '﹣'), - ('﹨', '﹨'), - ('﹪', '﹫'), - ('!', '#'), - ('%', '*'), - (',', '/'), - (':', ';'), - ('?', '@'), - ('[', ']'), - ('_', '_'), - ('{', '{'), - ('}', '}'), - ('⦅', '・'), - ('𐄀', '𐄂'), - ('𐎟', '𐎟'), - ('𐏐', '𐏐'), - ('𐕯', '𐕯'), - ('𐡗', '𐡗'), - ('𐤟', '𐤟'), - ('𐤿', '𐤿'), - ('𐩐', '𐩘'), - ('𐩿', '𐩿'), - ('𐫰', '𐫶'), - ('𐬹', '𐬿'), - ('𐮙', '𐮜'), - ('𐵮', '𐵮'), - ('𐺭', '𐺭'), - ('𐽕', '𐽙'), - ('𐾆', '𐾉'), - ('𑁇', '𑁍'), - ('𑂻', '𑂼'), - ('𑂾', '𑃁'), - ('𑅀', '𑅃'), - ('𑅴', '𑅵'), - ('𑇅', '𑇈'), - ('𑇍', '𑇍'), - ('𑇛', '𑇛'), - ('𑇝', '𑇟'), - ('𑈸', '𑈽'), - ('𑊩', '𑊩'), - ('𑏔', '𑏕'), - ('𑏗', '𑏘'), - ('𑑋', '𑑏'), - ('𑑚', '𑑛'), - ('𑑝', '𑑝'), - ('𑓆', '𑓆'), - ('𑗁', '𑗗'), - ('𑙁', '𑙃'), - ('𑙠', '𑙬'), - ('𑚹', '𑚹'), - ('𑜼', '𑜾'), - ('𑠻', '𑠻'), - ('𑥄', '𑥆'), - ('𑧢', '𑧢'), - ('𑨿', '𑩆'), - ('𑪚', '𑪜'), - ('𑪞', '𑪢'), - ('𑬀', '𑬉'), - ('𑯡', '𑯡'), - ('𑱁', '𑱅'), - ('𑱰', '𑱱'), - ('𑻷', '𑻸'), - ('𑽃', '𑽏'), - ('𑿿', '𑿿'), - ('𒑰', '𒑴'), - ('𒿱', '𒿲'), - ('𖩮', '𖩯'), - ('𖫵', '𖫵'), - ('𖬷', '𖬻'), - ('𖭄', '𖭄'), - ('𖵭', '𖵯'), - ('𖺗', '𖺚'), - ('𖿢', '𖿢'), - ('𛲟', '𛲟'), - ('𝪇', '𝪋'), - ('𞗿', '𞗿'), - ('𞥞', '𞥟'), -]; - -pub const SEPARATOR: &'static [(char, char)] = &[ - (' ', ' '), - ('\u{a0}', '\u{a0}'), - ('\u{1680}', '\u{1680}'), - ('\u{2000}', '\u{200a}'), - ('\u{2028}', '\u{2029}'), - ('\u{202f}', '\u{202f}'), - ('\u{205f}', '\u{205f}'), - ('\u{3000}', '\u{3000}'), -]; - -pub const SPACE_SEPARATOR: &'static [(char, char)] = &[ - (' ', ' '), - ('\u{a0}', '\u{a0}'), - ('\u{1680}', '\u{1680}'), - ('\u{2000}', '\u{200a}'), - ('\u{202f}', '\u{202f}'), - ('\u{205f}', '\u{205f}'), - ('\u{3000}', '\u{3000}'), -]; - -pub const SPACING_MARK: &'static [(char, char)] = &[ - ('ः', 'ः'), - ('ऻ', 'ऻ'), - ('ा', 'ी'), - ('ॉ', 'ौ'), - ('ॎ', 'ॏ'), - ('ং', 'ঃ'), - ('\u{9be}', 'ী'), - ('ে', 'ৈ'), - ('ো', 'ৌ'), - ('\u{9d7}', '\u{9d7}'), - ('ਃ', 'ਃ'), - ('ਾ', 'ੀ'), - ('ઃ', 'ઃ'), - ('ા', 'ી'), - ('ૉ', 'ૉ'), - ('ો', 'ૌ'), - ('ଂ', 'ଃ'), - ('\u{b3e}', '\u{b3e}'), - ('ୀ', 'ୀ'), - ('େ', 'ୈ'), - ('ୋ', 'ୌ'), - ('\u{b57}', '\u{b57}'), - ('\u{bbe}', 'ி'), - ('ு', 'ூ'), - ('ெ', 'ை'), - ('ொ', 'ௌ'), - ('\u{bd7}', '\u{bd7}'), - ('ఁ', 'ః'), - ('ు', 'ౄ'), - ('ಂ', 'ಃ'), - ('ಾ', 'ಾ'), - ('\u{cc0}', 'ೄ'), - ('\u{cc7}', '\u{cc8}'), - ('\u{cca}', '\u{ccb}'), - ('\u{cd5}', '\u{cd6}'), - ('ೳ', 'ೳ'), - ('ം', 'ഃ'), - ('\u{d3e}', 'ീ'), - ('െ', 'ൈ'), - ('ൊ', 'ൌ'), - ('\u{d57}', '\u{d57}'), - ('ං', 'ඃ'), - ('\u{dcf}', 'ෑ'), - ('ෘ', '\u{ddf}'), - ('ෲ', 'ෳ'), - ('༾', '༿'), - ('ཿ', 'ཿ'), - ('ါ', 'ာ'), - ('ေ', 'ေ'), - ('း', 'း'), - ('ျ', 'ြ'), - ('ၖ', 'ၗ'), - ('ၢ', 'ၤ'), - ('ၧ', 'ၭ'), - ('ႃ', 'ႄ'), - ('ႇ', 'ႌ'), - ('ႏ', 'ႏ'), - ('ႚ', 'ႜ'), - ('\u{1715}', '\u{1715}'), - ('\u{1734}', '\u{1734}'), - ('ា', 'ា'), - ('ើ', 'ៅ'), - ('ះ', 'ៈ'), - ('ᤣ', 'ᤦ'), - ('ᤩ', 'ᤫ'), - ('ᤰ', 'ᤱ'), - ('ᤳ', 'ᤸ'), - ('ᨙ', 'ᨚ'), - ('ᩕ', 'ᩕ'), - ('ᩗ', 'ᩗ'), - ('ᩡ', 'ᩡ'), - ('ᩣ', 'ᩤ'), - ('ᩭ', 'ᩲ'), - ('ᬄ', 'ᬄ'), - ('\u{1b35}', '\u{1b35}'), - ('\u{1b3b}', '\u{1b3b}'), - ('\u{1b3d}', 'ᭁ'), - ('\u{1b43}', '\u{1b44}'), - ('ᮂ', 'ᮂ'), - ('ᮡ', 'ᮡ'), - ('ᮦ', 'ᮧ'), - ('\u{1baa}', '\u{1baa}'), - ('ᯧ', 'ᯧ'), - ('ᯪ', 'ᯬ'), - ('ᯮ', 'ᯮ'), - ('\u{1bf2}', '\u{1bf3}'), - ('ᰤ', 'ᰫ'), - ('ᰴ', 'ᰵ'), - ('᳡', '᳡'), - ('᳷', '᳷'), - ('\u{302e}', '\u{302f}'), - ('ꠣ', 'ꠤ'), - ('ꠧ', 'ꠧ'), - ('ꢀ', 'ꢁ'), - ('ꢴ', 'ꣃ'), - ('ꥒ', '\u{a953}'), - ('ꦃ', 'ꦃ'), - ('ꦴ', 'ꦵ'), - ('ꦺ', 'ꦻ'), - ('ꦾ', '\u{a9c0}'), - ('ꨯ', 'ꨰ'), - ('ꨳ', 'ꨴ'), - ('ꩍ', 'ꩍ'), - ('ꩻ', 'ꩻ'), - ('ꩽ', 'ꩽ'), - ('ꫫ', 'ꫫ'), - ('ꫮ', 'ꫯ'), - ('ꫵ', 'ꫵ'), - ('ꯣ', 'ꯤ'), - ('ꯦ', 'ꯧ'), - ('ꯩ', 'ꯪ'), - ('꯬', '꯬'), - ('𑀀', '𑀀'), - ('𑀂', '𑀂'), - ('𑂂', '𑂂'), - ('𑂰', '𑂲'), - ('𑂷', '𑂸'), - ('𑄬', '𑄬'), - ('𑅅', '𑅆'), - ('𑆂', '𑆂'), - ('𑆳', '𑆵'), - ('𑆿', '\u{111c0}'), - ('𑇎', '𑇎'), - ('𑈬', '𑈮'), - ('𑈲', '𑈳'), - ('\u{11235}', '\u{11235}'), - ('𑋠', '𑋢'), - ('𑌂', '𑌃'), - ('\u{1133e}', '𑌿'), - ('𑍁', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '\u{1134d}'), - ('\u{11357}', '\u{11357}'), - ('𑍢', '𑍣'), - ('\u{113b8}', '𑎺'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '𑏊'), - ('𑏌', '𑏍'), - ('\u{113cf}', '\u{113cf}'), - ('𑐵', '𑐷'), - ('𑑀', '𑑁'), - ('𑑅', '𑑅'), - ('\u{114b0}', '𑒲'), - ('𑒹', '𑒹'), - ('𑒻', '𑒾'), - ('𑓁', '𑓁'), - ('\u{115af}', '𑖱'), - ('𑖸', '𑖻'), - ('𑖾', '𑖾'), - ('𑘰', '𑘲'), - ('𑘻', '𑘼'), - ('𑘾', '𑘾'), - ('𑚬', '𑚬'), - ('𑚮', '𑚯'), - ('\u{116b6}', '\u{116b6}'), - ('𑜞', '𑜞'), - ('𑜠', '𑜡'), - ('𑜦', '𑜦'), - ('𑠬', '𑠮'), - ('𑠸', '𑠸'), - ('\u{11930}', '𑤵'), - ('𑤷', '𑤸'), - ('\u{1193d}', '\u{1193d}'), - ('𑥀', '𑥀'), - ('𑥂', '𑥂'), - ('𑧑', '𑧓'), - ('𑧜', '𑧟'), - ('𑧤', '𑧤'), - ('𑨹', '𑨹'), - ('𑩗', '𑩘'), - ('𑪗', '𑪗'), - ('𑰯', '𑰯'), - ('𑰾', '𑰾'), - ('𑲩', '𑲩'), - ('𑲱', '𑲱'), - ('𑲴', '𑲴'), - ('𑶊', '𑶎'), - ('𑶓', '𑶔'), - ('𑶖', '𑶖'), - ('𑻵', '𑻶'), - ('𑼃', '𑼃'), - ('𑼴', '𑼵'), - ('𑼾', '𑼿'), - ('\u{11f41}', '\u{11f41}'), - ('𖄪', '𖄬'), - ('𖽑', '𖾇'), - ('\u{16ff0}', '\u{16ff1}'), - ('\u{1d165}', '\u{1d166}'), - ('\u{1d16d}', '\u{1d172}'), -]; - -pub const SYMBOL: &'static [(char, char)] = &[ - ('$', '$'), - ('+', '+'), - ('<', '>'), - ('^', '^'), - ('`', '`'), - ('|', '|'), - ('~', '~'), - ('¢', '¦'), - ('¨', '©'), - ('¬', '¬'), - ('®', '±'), - ('´', '´'), - ('¸', '¸'), - ('×', '×'), - ('÷', '÷'), - ('˂', '˅'), - ('˒', '˟'), - ('˥', '˫'), - ('˭', '˭'), - ('˯', '˿'), - ('͵', '͵'), - ('΄', '΅'), - ('϶', '϶'), - ('҂', '҂'), - ('֍', '֏'), - ('؆', '؈'), - ('؋', '؋'), - ('؎', '؏'), - ('۞', '۞'), - ('۩', '۩'), - ('۽', '۾'), - ('߶', '߶'), - ('߾', '߿'), - ('࢈', '࢈'), - ('৲', '৳'), - ('৺', '৻'), - ('૱', '૱'), - ('୰', '୰'), - ('௳', '௺'), - ('౿', '౿'), - ('൏', '൏'), - ('൹', '൹'), - ('฿', '฿'), - ('༁', '༃'), - ('༓', '༓'), - ('༕', '༗'), - ('༚', '༟'), - ('༴', '༴'), - ('༶', '༶'), - ('༸', '༸'), - ('྾', '࿅'), - ('࿇', '࿌'), - ('࿎', '࿏'), - ('࿕', '࿘'), - ('႞', '႟'), - ('᎐', '᎙'), - ('᙭', '᙭'), - ('៛', '៛'), - ('᥀', '᥀'), - ('᧞', '᧿'), - ('᭡', '᭪'), - ('᭴', '᭼'), - ('᾽', '᾽'), - ('᾿', '῁'), - ('῍', '῏'), - ('῝', '῟'), - ('῭', '`'), - ('´', '῾'), - ('⁄', '⁄'), - ('⁒', '⁒'), - ('⁺', '⁼'), - ('₊', '₌'), - ('₠', '⃀'), - ('℀', '℁'), - ('℃', '℆'), - ('℈', '℉'), - ('℔', '℔'), - ('№', '℘'), - ('℞', '℣'), - ('℥', '℥'), - ('℧', '℧'), - ('℩', '℩'), - ('℮', '℮'), - ('℺', '℻'), - ('⅀', '⅄'), - ('⅊', '⅍'), - ('⅏', '⅏'), - ('↊', '↋'), - ('←', '⌇'), - ('⌌', '⌨'), - ('⌫', '␩'), - ('⑀', '⑊'), - ('⒜', 'ⓩ'), - ('─', '❧'), - ('➔', '⟄'), - ('⟇', '⟥'), - ('⟰', '⦂'), - ('⦙', '⧗'), - ('⧜', '⧻'), - ('⧾', '⭳'), - ('⭶', '⮕'), - ('⮗', '⯿'), - ('⳥', '⳪'), - ('⹐', '⹑'), - ('⺀', '⺙'), - ('⺛', '⻳'), - ('⼀', '⿕'), - ('⿰', '⿿'), - ('〄', '〄'), - ('〒', '〓'), - ('〠', '〠'), - ('〶', '〷'), - ('〾', '〿'), - ('゛', '゜'), - ('㆐', '㆑'), - ('㆖', '㆟'), - ('㇀', '㇥'), - ('㇯', '㇯'), - ('㈀', '㈞'), - ('㈪', '㉇'), - ('㉐', '㉐'), - ('㉠', '㉿'), - ('㊊', '㊰'), - ('㋀', '㏿'), - ('䷀', '䷿'), - ('꒐', '꓆'), - ('꜀', '꜖'), - ('꜠', '꜡'), - ('꞉', '꞊'), - ('꠨', '꠫'), - ('꠶', '꠹'), - ('꩷', '꩹'), - ('꭛', '꭛'), - ('꭪', '꭫'), - ('﬩', '﬩'), - ('﮲', '﯂'), - ('﵀', '﵏'), - ('﷏', '﷏'), - ('﷼', '﷿'), - ('﹢', '﹢'), - ('﹤', '﹦'), - ('﹩', '﹩'), - ('$', '$'), - ('+', '+'), - ('<', '>'), - ('^', '^'), - ('`', '`'), - ('|', '|'), - ('~', '~'), - ('¢', '₩'), - ('│', '○'), - ('', '�'), - ('𐄷', '𐄿'), - ('𐅹', '𐆉'), - ('𐆌', '𐆎'), - ('𐆐', '𐆜'), - ('𐆠', '𐆠'), - ('𐇐', '𐇼'), - ('𐡷', '𐡸'), - ('𐫈', '𐫈'), - ('𐶎', '𐶏'), - ('𑜿', '𑜿'), - ('𑿕', '𑿱'), - ('𖬼', '𖬿'), - ('𖭅', '𖭅'), - ('𛲜', '𛲜'), - ('𜰀', '𜳯'), - ('𜴀', '𜺳'), - ('𜽐', '𜿃'), - ('𝀀', '𝃵'), - ('𝄀', '𝄦'), - ('𝄩', '𝅘𝅥𝅲'), - ('𝅪', '𝅬'), - ('𝆃', '𝆄'), - ('𝆌', '𝆩'), - ('𝆮', '𝇪'), - ('𝈀', '𝉁'), - ('𝉅', '𝉅'), - ('𝌀', '𝍖'), - ('𝛁', '𝛁'), - ('𝛛', '𝛛'), - ('𝛻', '𝛻'), - ('𝜕', '𝜕'), - ('𝜵', '𝜵'), - ('𝝏', '𝝏'), - ('𝝯', '𝝯'), - ('𝞉', '𝞉'), - ('𝞩', '𝞩'), - ('𝟃', '𝟃'), - ('𝠀', '𝧿'), - ('𝨷', '𝨺'), - ('𝩭', '𝩴'), - ('𝩶', '𝪃'), - ('𝪅', '𝪆'), - ('𞅏', '𞅏'), - ('𞋿', '𞋿'), - ('𞲬', '𞲬'), - ('𞲰', '𞲰'), - ('𞴮', '𞴮'), - ('𞻰', '𞻱'), - ('🀀', '🀫'), - ('🀰', '🂓'), - ('🂠', '🂮'), - ('🂱', '🂿'), - ('🃁', '🃏'), - ('🃑', '🃵'), - ('🄍', '🆭'), - ('🇦', '🈂'), - ('🈐', '🈻'), - ('🉀', '🉈'), - ('🉐', '🉑'), - ('🉠', '🉥'), - ('🌀', '🛗'), - ('🛜', '🛬'), - ('🛰', '🛼'), - ('🜀', '🝶'), - ('🝻', '🟙'), - ('🟠', '🟫'), - ('🟰', '🟰'), - ('🠀', '🠋'), - ('🠐', '🡇'), - ('🡐', '🡙'), - ('🡠', '🢇'), - ('🢐', '🢭'), - ('🢰', '🢻'), - ('🣀', '🣁'), - ('🤀', '🩓'), - ('🩠', '🩭'), - ('🩰', '🩼'), - ('🪀', '🪉'), - ('🪏', '🫆'), - ('🫎', '🫜'), - ('🫟', '🫩'), - ('🫰', '🫸'), - ('🬀', '🮒'), - ('🮔', '🯯'), -]; - -pub const TITLECASE_LETTER: &'static [(char, char)] = &[ - ('Dž', 'Dž'), - ('Lj', 'Lj'), - ('Nj', 'Nj'), - ('Dz', 'Dz'), - ('ᾈ', 'ᾏ'), - ('ᾘ', 'ᾟ'), - ('ᾨ', 'ᾯ'), - ('ᾼ', 'ᾼ'), - ('ῌ', 'ῌ'), - ('ῼ', 'ῼ'), -]; - -pub const UNASSIGNED: &'static [(char, char)] = &[ - ('\u{378}', '\u{379}'), - ('\u{380}', '\u{383}'), - ('\u{38b}', '\u{38b}'), - ('\u{38d}', '\u{38d}'), - ('\u{3a2}', '\u{3a2}'), - ('\u{530}', '\u{530}'), - ('\u{557}', '\u{558}'), - ('\u{58b}', '\u{58c}'), - ('\u{590}', '\u{590}'), - ('\u{5c8}', '\u{5cf}'), - ('\u{5eb}', '\u{5ee}'), - ('\u{5f5}', '\u{5ff}'), - ('\u{70e}', '\u{70e}'), - ('\u{74b}', '\u{74c}'), - ('\u{7b2}', '\u{7bf}'), - ('\u{7fb}', '\u{7fc}'), - ('\u{82e}', '\u{82f}'), - ('\u{83f}', '\u{83f}'), - ('\u{85c}', '\u{85d}'), - ('\u{85f}', '\u{85f}'), - ('\u{86b}', '\u{86f}'), - ('\u{88f}', '\u{88f}'), - ('\u{892}', '\u{896}'), - ('\u{984}', '\u{984}'), - ('\u{98d}', '\u{98e}'), - ('\u{991}', '\u{992}'), - ('\u{9a9}', '\u{9a9}'), - ('\u{9b1}', '\u{9b1}'), - ('\u{9b3}', '\u{9b5}'), - ('\u{9ba}', '\u{9bb}'), - ('\u{9c5}', '\u{9c6}'), - ('\u{9c9}', '\u{9ca}'), - ('\u{9cf}', '\u{9d6}'), - ('\u{9d8}', '\u{9db}'), - ('\u{9de}', '\u{9de}'), - ('\u{9e4}', '\u{9e5}'), - ('\u{9ff}', '\u{a00}'), - ('\u{a04}', '\u{a04}'), - ('\u{a0b}', '\u{a0e}'), - ('\u{a11}', '\u{a12}'), - ('\u{a29}', '\u{a29}'), - ('\u{a31}', '\u{a31}'), - ('\u{a34}', '\u{a34}'), - ('\u{a37}', '\u{a37}'), - ('\u{a3a}', '\u{a3b}'), - ('\u{a3d}', '\u{a3d}'), - ('\u{a43}', '\u{a46}'), - ('\u{a49}', '\u{a4a}'), - ('\u{a4e}', '\u{a50}'), - ('\u{a52}', '\u{a58}'), - ('\u{a5d}', '\u{a5d}'), - ('\u{a5f}', '\u{a65}'), - ('\u{a77}', '\u{a80}'), - ('\u{a84}', '\u{a84}'), - ('\u{a8e}', '\u{a8e}'), - ('\u{a92}', '\u{a92}'), - ('\u{aa9}', '\u{aa9}'), - ('\u{ab1}', '\u{ab1}'), - ('\u{ab4}', '\u{ab4}'), - ('\u{aba}', '\u{abb}'), - ('\u{ac6}', '\u{ac6}'), - ('\u{aca}', '\u{aca}'), - ('\u{ace}', '\u{acf}'), - ('\u{ad1}', '\u{adf}'), - ('\u{ae4}', '\u{ae5}'), - ('\u{af2}', '\u{af8}'), - ('\u{b00}', '\u{b00}'), - ('\u{b04}', '\u{b04}'), - ('\u{b0d}', '\u{b0e}'), - ('\u{b11}', '\u{b12}'), - ('\u{b29}', '\u{b29}'), - ('\u{b31}', '\u{b31}'), - ('\u{b34}', '\u{b34}'), - ('\u{b3a}', '\u{b3b}'), - ('\u{b45}', '\u{b46}'), - ('\u{b49}', '\u{b4a}'), - ('\u{b4e}', '\u{b54}'), - ('\u{b58}', '\u{b5b}'), - ('\u{b5e}', '\u{b5e}'), - ('\u{b64}', '\u{b65}'), - ('\u{b78}', '\u{b81}'), - ('\u{b84}', '\u{b84}'), - ('\u{b8b}', '\u{b8d}'), - ('\u{b91}', '\u{b91}'), - ('\u{b96}', '\u{b98}'), - ('\u{b9b}', '\u{b9b}'), - ('\u{b9d}', '\u{b9d}'), - ('\u{ba0}', '\u{ba2}'), - ('\u{ba5}', '\u{ba7}'), - ('\u{bab}', '\u{bad}'), - ('\u{bba}', '\u{bbd}'), - ('\u{bc3}', '\u{bc5}'), - ('\u{bc9}', '\u{bc9}'), - ('\u{bce}', '\u{bcf}'), - ('\u{bd1}', '\u{bd6}'), - ('\u{bd8}', '\u{be5}'), - ('\u{bfb}', '\u{bff}'), - ('\u{c0d}', '\u{c0d}'), - ('\u{c11}', '\u{c11}'), - ('\u{c29}', '\u{c29}'), - ('\u{c3a}', '\u{c3b}'), - ('\u{c45}', '\u{c45}'), - ('\u{c49}', '\u{c49}'), - ('\u{c4e}', '\u{c54}'), - ('\u{c57}', '\u{c57}'), - ('\u{c5b}', '\u{c5c}'), - ('\u{c5e}', '\u{c5f}'), - ('\u{c64}', '\u{c65}'), - ('\u{c70}', '\u{c76}'), - ('\u{c8d}', '\u{c8d}'), - ('\u{c91}', '\u{c91}'), - ('\u{ca9}', '\u{ca9}'), - ('\u{cb4}', '\u{cb4}'), - ('\u{cba}', '\u{cbb}'), - ('\u{cc5}', '\u{cc5}'), - ('\u{cc9}', '\u{cc9}'), - ('\u{cce}', '\u{cd4}'), - ('\u{cd7}', '\u{cdc}'), - ('\u{cdf}', '\u{cdf}'), - ('\u{ce4}', '\u{ce5}'), - ('\u{cf0}', '\u{cf0}'), - ('\u{cf4}', '\u{cff}'), - ('\u{d0d}', '\u{d0d}'), - ('\u{d11}', '\u{d11}'), - ('\u{d45}', '\u{d45}'), - ('\u{d49}', '\u{d49}'), - ('\u{d50}', '\u{d53}'), - ('\u{d64}', '\u{d65}'), - ('\u{d80}', '\u{d80}'), - ('\u{d84}', '\u{d84}'), - ('\u{d97}', '\u{d99}'), - ('\u{db2}', '\u{db2}'), - ('\u{dbc}', '\u{dbc}'), - ('\u{dbe}', '\u{dbf}'), - ('\u{dc7}', '\u{dc9}'), - ('\u{dcb}', '\u{dce}'), - ('\u{dd5}', '\u{dd5}'), - ('\u{dd7}', '\u{dd7}'), - ('\u{de0}', '\u{de5}'), - ('\u{df0}', '\u{df1}'), - ('\u{df5}', '\u{e00}'), - ('\u{e3b}', '\u{e3e}'), - ('\u{e5c}', '\u{e80}'), - ('\u{e83}', '\u{e83}'), - ('\u{e85}', '\u{e85}'), - ('\u{e8b}', '\u{e8b}'), - ('\u{ea4}', '\u{ea4}'), - ('\u{ea6}', '\u{ea6}'), - ('\u{ebe}', '\u{ebf}'), - ('\u{ec5}', '\u{ec5}'), - ('\u{ec7}', '\u{ec7}'), - ('\u{ecf}', '\u{ecf}'), - ('\u{eda}', '\u{edb}'), - ('\u{ee0}', '\u{eff}'), - ('\u{f48}', '\u{f48}'), - ('\u{f6d}', '\u{f70}'), - ('\u{f98}', '\u{f98}'), - ('\u{fbd}', '\u{fbd}'), - ('\u{fcd}', '\u{fcd}'), - ('\u{fdb}', '\u{fff}'), - ('\u{10c6}', '\u{10c6}'), - ('\u{10c8}', '\u{10cc}'), - ('\u{10ce}', '\u{10cf}'), - ('\u{1249}', '\u{1249}'), - ('\u{124e}', '\u{124f}'), - ('\u{1257}', '\u{1257}'), - ('\u{1259}', '\u{1259}'), - ('\u{125e}', '\u{125f}'), - ('\u{1289}', '\u{1289}'), - ('\u{128e}', '\u{128f}'), - ('\u{12b1}', '\u{12b1}'), - ('\u{12b6}', '\u{12b7}'), - ('\u{12bf}', '\u{12bf}'), - ('\u{12c1}', '\u{12c1}'), - ('\u{12c6}', '\u{12c7}'), - ('\u{12d7}', '\u{12d7}'), - ('\u{1311}', '\u{1311}'), - ('\u{1316}', '\u{1317}'), - ('\u{135b}', '\u{135c}'), - ('\u{137d}', '\u{137f}'), - ('\u{139a}', '\u{139f}'), - ('\u{13f6}', '\u{13f7}'), - ('\u{13fe}', '\u{13ff}'), - ('\u{169d}', '\u{169f}'), - ('\u{16f9}', '\u{16ff}'), - ('\u{1716}', '\u{171e}'), - ('\u{1737}', '\u{173f}'), - ('\u{1754}', '\u{175f}'), - ('\u{176d}', '\u{176d}'), - ('\u{1771}', '\u{1771}'), - ('\u{1774}', '\u{177f}'), - ('\u{17de}', '\u{17df}'), - ('\u{17ea}', '\u{17ef}'), - ('\u{17fa}', '\u{17ff}'), - ('\u{181a}', '\u{181f}'), - ('\u{1879}', '\u{187f}'), - ('\u{18ab}', '\u{18af}'), - ('\u{18f6}', '\u{18ff}'), - ('\u{191f}', '\u{191f}'), - ('\u{192c}', '\u{192f}'), - ('\u{193c}', '\u{193f}'), - ('\u{1941}', '\u{1943}'), - ('\u{196e}', '\u{196f}'), - ('\u{1975}', '\u{197f}'), - ('\u{19ac}', '\u{19af}'), - ('\u{19ca}', '\u{19cf}'), - ('\u{19db}', '\u{19dd}'), - ('\u{1a1c}', '\u{1a1d}'), - ('\u{1a5f}', '\u{1a5f}'), - ('\u{1a7d}', '\u{1a7e}'), - ('\u{1a8a}', '\u{1a8f}'), - ('\u{1a9a}', '\u{1a9f}'), - ('\u{1aae}', '\u{1aaf}'), - ('\u{1acf}', '\u{1aff}'), - ('\u{1b4d}', '\u{1b4d}'), - ('\u{1bf4}', '\u{1bfb}'), - ('\u{1c38}', '\u{1c3a}'), - ('\u{1c4a}', '\u{1c4c}'), - ('\u{1c8b}', '\u{1c8f}'), - ('\u{1cbb}', '\u{1cbc}'), - ('\u{1cc8}', '\u{1ccf}'), - ('\u{1cfb}', '\u{1cff}'), - ('\u{1f16}', '\u{1f17}'), - ('\u{1f1e}', '\u{1f1f}'), - ('\u{1f46}', '\u{1f47}'), - ('\u{1f4e}', '\u{1f4f}'), - ('\u{1f58}', '\u{1f58}'), - ('\u{1f5a}', '\u{1f5a}'), - ('\u{1f5c}', '\u{1f5c}'), - ('\u{1f5e}', '\u{1f5e}'), - ('\u{1f7e}', '\u{1f7f}'), - ('\u{1fb5}', '\u{1fb5}'), - ('\u{1fc5}', '\u{1fc5}'), - ('\u{1fd4}', '\u{1fd5}'), - ('\u{1fdc}', '\u{1fdc}'), - ('\u{1ff0}', '\u{1ff1}'), - ('\u{1ff5}', '\u{1ff5}'), - ('\u{1fff}', '\u{1fff}'), - ('\u{2065}', '\u{2065}'), - ('\u{2072}', '\u{2073}'), - ('\u{208f}', '\u{208f}'), - ('\u{209d}', '\u{209f}'), - ('\u{20c1}', '\u{20cf}'), - ('\u{20f1}', '\u{20ff}'), - ('\u{218c}', '\u{218f}'), - ('\u{242a}', '\u{243f}'), - ('\u{244b}', '\u{245f}'), - ('\u{2b74}', '\u{2b75}'), - ('\u{2b96}', '\u{2b96}'), - ('\u{2cf4}', '\u{2cf8}'), - ('\u{2d26}', '\u{2d26}'), - ('\u{2d28}', '\u{2d2c}'), - ('\u{2d2e}', '\u{2d2f}'), - ('\u{2d68}', '\u{2d6e}'), - ('\u{2d71}', '\u{2d7e}'), - ('\u{2d97}', '\u{2d9f}'), - ('\u{2da7}', '\u{2da7}'), - ('\u{2daf}', '\u{2daf}'), - ('\u{2db7}', '\u{2db7}'), - ('\u{2dbf}', '\u{2dbf}'), - ('\u{2dc7}', '\u{2dc7}'), - ('\u{2dcf}', '\u{2dcf}'), - ('\u{2dd7}', '\u{2dd7}'), - ('\u{2ddf}', '\u{2ddf}'), - ('\u{2e5e}', '\u{2e7f}'), - ('\u{2e9a}', '\u{2e9a}'), - ('\u{2ef4}', '\u{2eff}'), - ('\u{2fd6}', '\u{2fef}'), - ('\u{3040}', '\u{3040}'), - ('\u{3097}', '\u{3098}'), - ('\u{3100}', '\u{3104}'), - ('\u{3130}', '\u{3130}'), - ('\u{318f}', '\u{318f}'), - ('\u{31e6}', '\u{31ee}'), - ('\u{321f}', '\u{321f}'), - ('\u{a48d}', '\u{a48f}'), - ('\u{a4c7}', '\u{a4cf}'), - ('\u{a62c}', '\u{a63f}'), - ('\u{a6f8}', '\u{a6ff}'), - ('\u{a7ce}', '\u{a7cf}'), - ('\u{a7d2}', '\u{a7d2}'), - ('\u{a7d4}', '\u{a7d4}'), - ('\u{a7dd}', '\u{a7f1}'), - ('\u{a82d}', '\u{a82f}'), - ('\u{a83a}', '\u{a83f}'), - ('\u{a878}', '\u{a87f}'), - ('\u{a8c6}', '\u{a8cd}'), - ('\u{a8da}', '\u{a8df}'), - ('\u{a954}', '\u{a95e}'), - ('\u{a97d}', '\u{a97f}'), - ('\u{a9ce}', '\u{a9ce}'), - ('\u{a9da}', '\u{a9dd}'), - ('\u{a9ff}', '\u{a9ff}'), - ('\u{aa37}', '\u{aa3f}'), - ('\u{aa4e}', '\u{aa4f}'), - ('\u{aa5a}', '\u{aa5b}'), - ('\u{aac3}', '\u{aada}'), - ('\u{aaf7}', '\u{ab00}'), - ('\u{ab07}', '\u{ab08}'), - ('\u{ab0f}', '\u{ab10}'), - ('\u{ab17}', '\u{ab1f}'), - ('\u{ab27}', '\u{ab27}'), - ('\u{ab2f}', '\u{ab2f}'), - ('\u{ab6c}', '\u{ab6f}'), - ('\u{abee}', '\u{abef}'), - ('\u{abfa}', '\u{abff}'), - ('\u{d7a4}', '\u{d7af}'), - ('\u{d7c7}', '\u{d7ca}'), - ('\u{d7fc}', '\u{d7ff}'), - ('\u{fa6e}', '\u{fa6f}'), - ('\u{fada}', '\u{faff}'), - ('\u{fb07}', '\u{fb12}'), - ('\u{fb18}', '\u{fb1c}'), - ('\u{fb37}', '\u{fb37}'), - ('\u{fb3d}', '\u{fb3d}'), - ('\u{fb3f}', '\u{fb3f}'), - ('\u{fb42}', '\u{fb42}'), - ('\u{fb45}', '\u{fb45}'), - ('\u{fbc3}', '\u{fbd2}'), - ('\u{fd90}', '\u{fd91}'), - ('\u{fdc8}', '\u{fdce}'), - ('\u{fdd0}', '\u{fdef}'), - ('\u{fe1a}', '\u{fe1f}'), - ('\u{fe53}', '\u{fe53}'), - ('\u{fe67}', '\u{fe67}'), - ('\u{fe6c}', '\u{fe6f}'), - ('\u{fe75}', '\u{fe75}'), - ('\u{fefd}', '\u{fefe}'), - ('\u{ff00}', '\u{ff00}'), - ('\u{ffbf}', '\u{ffc1}'), - ('\u{ffc8}', '\u{ffc9}'), - ('\u{ffd0}', '\u{ffd1}'), - ('\u{ffd8}', '\u{ffd9}'), - ('\u{ffdd}', '\u{ffdf}'), - ('\u{ffe7}', '\u{ffe7}'), - ('\u{ffef}', '\u{fff8}'), - ('\u{fffe}', '\u{ffff}'), - ('\u{1000c}', '\u{1000c}'), - ('\u{10027}', '\u{10027}'), - ('\u{1003b}', '\u{1003b}'), - ('\u{1003e}', '\u{1003e}'), - ('\u{1004e}', '\u{1004f}'), - ('\u{1005e}', '\u{1007f}'), - ('\u{100fb}', '\u{100ff}'), - ('\u{10103}', '\u{10106}'), - ('\u{10134}', '\u{10136}'), - ('\u{1018f}', '\u{1018f}'), - ('\u{1019d}', '\u{1019f}'), - ('\u{101a1}', '\u{101cf}'), - ('\u{101fe}', '\u{1027f}'), - ('\u{1029d}', '\u{1029f}'), - ('\u{102d1}', '\u{102df}'), - ('\u{102fc}', '\u{102ff}'), - ('\u{10324}', '\u{1032c}'), - ('\u{1034b}', '\u{1034f}'), - ('\u{1037b}', '\u{1037f}'), - ('\u{1039e}', '\u{1039e}'), - ('\u{103c4}', '\u{103c7}'), - ('\u{103d6}', '\u{103ff}'), - ('\u{1049e}', '\u{1049f}'), - ('\u{104aa}', '\u{104af}'), - ('\u{104d4}', '\u{104d7}'), - ('\u{104fc}', '\u{104ff}'), - ('\u{10528}', '\u{1052f}'), - ('\u{10564}', '\u{1056e}'), - ('\u{1057b}', '\u{1057b}'), - ('\u{1058b}', '\u{1058b}'), - ('\u{10593}', '\u{10593}'), - ('\u{10596}', '\u{10596}'), - ('\u{105a2}', '\u{105a2}'), - ('\u{105b2}', '\u{105b2}'), - ('\u{105ba}', '\u{105ba}'), - ('\u{105bd}', '\u{105bf}'), - ('\u{105f4}', '\u{105ff}'), - ('\u{10737}', '\u{1073f}'), - ('\u{10756}', '\u{1075f}'), - ('\u{10768}', '\u{1077f}'), - ('\u{10786}', '\u{10786}'), - ('\u{107b1}', '\u{107b1}'), - ('\u{107bb}', '\u{107ff}'), - ('\u{10806}', '\u{10807}'), - ('\u{10809}', '\u{10809}'), - ('\u{10836}', '\u{10836}'), - ('\u{10839}', '\u{1083b}'), - ('\u{1083d}', '\u{1083e}'), - ('\u{10856}', '\u{10856}'), - ('\u{1089f}', '\u{108a6}'), - ('\u{108b0}', '\u{108df}'), - ('\u{108f3}', '\u{108f3}'), - ('\u{108f6}', '\u{108fa}'), - ('\u{1091c}', '\u{1091e}'), - ('\u{1093a}', '\u{1093e}'), - ('\u{10940}', '\u{1097f}'), - ('\u{109b8}', '\u{109bb}'), - ('\u{109d0}', '\u{109d1}'), - ('\u{10a04}', '\u{10a04}'), - ('\u{10a07}', '\u{10a0b}'), - ('\u{10a14}', '\u{10a14}'), - ('\u{10a18}', '\u{10a18}'), - ('\u{10a36}', '\u{10a37}'), - ('\u{10a3b}', '\u{10a3e}'), - ('\u{10a49}', '\u{10a4f}'), - ('\u{10a59}', '\u{10a5f}'), - ('\u{10aa0}', '\u{10abf}'), - ('\u{10ae7}', '\u{10aea}'), - ('\u{10af7}', '\u{10aff}'), - ('\u{10b36}', '\u{10b38}'), - ('\u{10b56}', '\u{10b57}'), - ('\u{10b73}', '\u{10b77}'), - ('\u{10b92}', '\u{10b98}'), - ('\u{10b9d}', '\u{10ba8}'), - ('\u{10bb0}', '\u{10bff}'), - ('\u{10c49}', '\u{10c7f}'), - ('\u{10cb3}', '\u{10cbf}'), - ('\u{10cf3}', '\u{10cf9}'), - ('\u{10d28}', '\u{10d2f}'), - ('\u{10d3a}', '\u{10d3f}'), - ('\u{10d66}', '\u{10d68}'), - ('\u{10d86}', '\u{10d8d}'), - ('\u{10d90}', '\u{10e5f}'), - ('\u{10e7f}', '\u{10e7f}'), - ('\u{10eaa}', '\u{10eaa}'), - ('\u{10eae}', '\u{10eaf}'), - ('\u{10eb2}', '\u{10ec1}'), - ('\u{10ec5}', '\u{10efb}'), - ('\u{10f28}', '\u{10f2f}'), - ('\u{10f5a}', '\u{10f6f}'), - ('\u{10f8a}', '\u{10faf}'), - ('\u{10fcc}', '\u{10fdf}'), - ('\u{10ff7}', '\u{10fff}'), - ('\u{1104e}', '\u{11051}'), - ('\u{11076}', '\u{1107e}'), - ('\u{110c3}', '\u{110cc}'), - ('\u{110ce}', '\u{110cf}'), - ('\u{110e9}', '\u{110ef}'), - ('\u{110fa}', '\u{110ff}'), - ('\u{11135}', '\u{11135}'), - ('\u{11148}', '\u{1114f}'), - ('\u{11177}', '\u{1117f}'), - ('\u{111e0}', '\u{111e0}'), - ('\u{111f5}', '\u{111ff}'), - ('\u{11212}', '\u{11212}'), - ('\u{11242}', '\u{1127f}'), - ('\u{11287}', '\u{11287}'), - ('\u{11289}', '\u{11289}'), - ('\u{1128e}', '\u{1128e}'), - ('\u{1129e}', '\u{1129e}'), - ('\u{112aa}', '\u{112af}'), - ('\u{112eb}', '\u{112ef}'), - ('\u{112fa}', '\u{112ff}'), - ('\u{11304}', '\u{11304}'), - ('\u{1130d}', '\u{1130e}'), - ('\u{11311}', '\u{11312}'), - ('\u{11329}', '\u{11329}'), - ('\u{11331}', '\u{11331}'), - ('\u{11334}', '\u{11334}'), - ('\u{1133a}', '\u{1133a}'), - ('\u{11345}', '\u{11346}'), - ('\u{11349}', '\u{1134a}'), - ('\u{1134e}', '\u{1134f}'), - ('\u{11351}', '\u{11356}'), - ('\u{11358}', '\u{1135c}'), - ('\u{11364}', '\u{11365}'), - ('\u{1136d}', '\u{1136f}'), - ('\u{11375}', '\u{1137f}'), - ('\u{1138a}', '\u{1138a}'), - ('\u{1138c}', '\u{1138d}'), - ('\u{1138f}', '\u{1138f}'), - ('\u{113b6}', '\u{113b6}'), - ('\u{113c1}', '\u{113c1}'), - ('\u{113c3}', '\u{113c4}'), - ('\u{113c6}', '\u{113c6}'), - ('\u{113cb}', '\u{113cb}'), - ('\u{113d6}', '\u{113d6}'), - ('\u{113d9}', '\u{113e0}'), - ('\u{113e3}', '\u{113ff}'), - ('\u{1145c}', '\u{1145c}'), - ('\u{11462}', '\u{1147f}'), - ('\u{114c8}', '\u{114cf}'), - ('\u{114da}', '\u{1157f}'), - ('\u{115b6}', '\u{115b7}'), - ('\u{115de}', '\u{115ff}'), - ('\u{11645}', '\u{1164f}'), - ('\u{1165a}', '\u{1165f}'), - ('\u{1166d}', '\u{1167f}'), - ('\u{116ba}', '\u{116bf}'), - ('\u{116ca}', '\u{116cf}'), - ('\u{116e4}', '\u{116ff}'), - ('\u{1171b}', '\u{1171c}'), - ('\u{1172c}', '\u{1172f}'), - ('\u{11747}', '\u{117ff}'), - ('\u{1183c}', '\u{1189f}'), - ('\u{118f3}', '\u{118fe}'), - ('\u{11907}', '\u{11908}'), - ('\u{1190a}', '\u{1190b}'), - ('\u{11914}', '\u{11914}'), - ('\u{11917}', '\u{11917}'), - ('\u{11936}', '\u{11936}'), - ('\u{11939}', '\u{1193a}'), - ('\u{11947}', '\u{1194f}'), - ('\u{1195a}', '\u{1199f}'), - ('\u{119a8}', '\u{119a9}'), - ('\u{119d8}', '\u{119d9}'), - ('\u{119e5}', '\u{119ff}'), - ('\u{11a48}', '\u{11a4f}'), - ('\u{11aa3}', '\u{11aaf}'), - ('\u{11af9}', '\u{11aff}'), - ('\u{11b0a}', '\u{11bbf}'), - ('\u{11be2}', '\u{11bef}'), - ('\u{11bfa}', '\u{11bff}'), - ('\u{11c09}', '\u{11c09}'), - ('\u{11c37}', '\u{11c37}'), - ('\u{11c46}', '\u{11c4f}'), - ('\u{11c6d}', '\u{11c6f}'), - ('\u{11c90}', '\u{11c91}'), - ('\u{11ca8}', '\u{11ca8}'), - ('\u{11cb7}', '\u{11cff}'), - ('\u{11d07}', '\u{11d07}'), - ('\u{11d0a}', '\u{11d0a}'), - ('\u{11d37}', '\u{11d39}'), - ('\u{11d3b}', '\u{11d3b}'), - ('\u{11d3e}', '\u{11d3e}'), - ('\u{11d48}', '\u{11d4f}'), - ('\u{11d5a}', '\u{11d5f}'), - ('\u{11d66}', '\u{11d66}'), - ('\u{11d69}', '\u{11d69}'), - ('\u{11d8f}', '\u{11d8f}'), - ('\u{11d92}', '\u{11d92}'), - ('\u{11d99}', '\u{11d9f}'), - ('\u{11daa}', '\u{11edf}'), - ('\u{11ef9}', '\u{11eff}'), - ('\u{11f11}', '\u{11f11}'), - ('\u{11f3b}', '\u{11f3d}'), - ('\u{11f5b}', '\u{11faf}'), - ('\u{11fb1}', '\u{11fbf}'), - ('\u{11ff2}', '\u{11ffe}'), - ('\u{1239a}', '\u{123ff}'), - ('\u{1246f}', '\u{1246f}'), - ('\u{12475}', '\u{1247f}'), - ('\u{12544}', '\u{12f8f}'), - ('\u{12ff3}', '\u{12fff}'), - ('\u{13456}', '\u{1345f}'), - ('\u{143fb}', '\u{143ff}'), - ('\u{14647}', '\u{160ff}'), - ('\u{1613a}', '\u{167ff}'), - ('\u{16a39}', '\u{16a3f}'), - ('\u{16a5f}', '\u{16a5f}'), - ('\u{16a6a}', '\u{16a6d}'), - ('\u{16abf}', '\u{16abf}'), - ('\u{16aca}', '\u{16acf}'), - ('\u{16aee}', '\u{16aef}'), - ('\u{16af6}', '\u{16aff}'), - ('\u{16b46}', '\u{16b4f}'), - ('\u{16b5a}', '\u{16b5a}'), - ('\u{16b62}', '\u{16b62}'), - ('\u{16b78}', '\u{16b7c}'), - ('\u{16b90}', '\u{16d3f}'), - ('\u{16d7a}', '\u{16e3f}'), - ('\u{16e9b}', '\u{16eff}'), - ('\u{16f4b}', '\u{16f4e}'), - ('\u{16f88}', '\u{16f8e}'), - ('\u{16fa0}', '\u{16fdf}'), - ('\u{16fe5}', '\u{16fef}'), - ('\u{16ff2}', '\u{16fff}'), - ('\u{187f8}', '\u{187ff}'), - ('\u{18cd6}', '\u{18cfe}'), - ('\u{18d09}', '\u{1afef}'), - ('\u{1aff4}', '\u{1aff4}'), - ('\u{1affc}', '\u{1affc}'), - ('\u{1afff}', '\u{1afff}'), - ('\u{1b123}', '\u{1b131}'), - ('\u{1b133}', '\u{1b14f}'), - ('\u{1b153}', '\u{1b154}'), - ('\u{1b156}', '\u{1b163}'), - ('\u{1b168}', '\u{1b16f}'), - ('\u{1b2fc}', '\u{1bbff}'), - ('\u{1bc6b}', '\u{1bc6f}'), - ('\u{1bc7d}', '\u{1bc7f}'), - ('\u{1bc89}', '\u{1bc8f}'), - ('\u{1bc9a}', '\u{1bc9b}'), - ('\u{1bca4}', '\u{1cbff}'), - ('\u{1ccfa}', '\u{1ccff}'), - ('\u{1ceb4}', '\u{1ceff}'), - ('\u{1cf2e}', '\u{1cf2f}'), - ('\u{1cf47}', '\u{1cf4f}'), - ('\u{1cfc4}', '\u{1cfff}'), - ('\u{1d0f6}', '\u{1d0ff}'), - ('\u{1d127}', '\u{1d128}'), - ('\u{1d1eb}', '\u{1d1ff}'), - ('\u{1d246}', '\u{1d2bf}'), - ('\u{1d2d4}', '\u{1d2df}'), - ('\u{1d2f4}', '\u{1d2ff}'), - ('\u{1d357}', '\u{1d35f}'), - ('\u{1d379}', '\u{1d3ff}'), - ('\u{1d455}', '\u{1d455}'), - ('\u{1d49d}', '\u{1d49d}'), - ('\u{1d4a0}', '\u{1d4a1}'), - ('\u{1d4a3}', '\u{1d4a4}'), - ('\u{1d4a7}', '\u{1d4a8}'), - ('\u{1d4ad}', '\u{1d4ad}'), - ('\u{1d4ba}', '\u{1d4ba}'), - ('\u{1d4bc}', '\u{1d4bc}'), - ('\u{1d4c4}', '\u{1d4c4}'), - ('\u{1d506}', '\u{1d506}'), - ('\u{1d50b}', '\u{1d50c}'), - ('\u{1d515}', '\u{1d515}'), - ('\u{1d51d}', '\u{1d51d}'), - ('\u{1d53a}', '\u{1d53a}'), - ('\u{1d53f}', '\u{1d53f}'), - ('\u{1d545}', '\u{1d545}'), - ('\u{1d547}', '\u{1d549}'), - ('\u{1d551}', '\u{1d551}'), - ('\u{1d6a6}', '\u{1d6a7}'), - ('\u{1d7cc}', '\u{1d7cd}'), - ('\u{1da8c}', '\u{1da9a}'), - ('\u{1daa0}', '\u{1daa0}'), - ('\u{1dab0}', '\u{1deff}'), - ('\u{1df1f}', '\u{1df24}'), - ('\u{1df2b}', '\u{1dfff}'), - ('\u{1e007}', '\u{1e007}'), - ('\u{1e019}', '\u{1e01a}'), - ('\u{1e022}', '\u{1e022}'), - ('\u{1e025}', '\u{1e025}'), - ('\u{1e02b}', '\u{1e02f}'), - ('\u{1e06e}', '\u{1e08e}'), - ('\u{1e090}', '\u{1e0ff}'), - ('\u{1e12d}', '\u{1e12f}'), - ('\u{1e13e}', '\u{1e13f}'), - ('\u{1e14a}', '\u{1e14d}'), - ('\u{1e150}', '\u{1e28f}'), - ('\u{1e2af}', '\u{1e2bf}'), - ('\u{1e2fa}', '\u{1e2fe}'), - ('\u{1e300}', '\u{1e4cf}'), - ('\u{1e4fa}', '\u{1e5cf}'), - ('\u{1e5fb}', '\u{1e5fe}'), - ('\u{1e600}', '\u{1e7df}'), - ('\u{1e7e7}', '\u{1e7e7}'), - ('\u{1e7ec}', '\u{1e7ec}'), - ('\u{1e7ef}', '\u{1e7ef}'), - ('\u{1e7ff}', '\u{1e7ff}'), - ('\u{1e8c5}', '\u{1e8c6}'), - ('\u{1e8d7}', '\u{1e8ff}'), - ('\u{1e94c}', '\u{1e94f}'), - ('\u{1e95a}', '\u{1e95d}'), - ('\u{1e960}', '\u{1ec70}'), - ('\u{1ecb5}', '\u{1ed00}'), - ('\u{1ed3e}', '\u{1edff}'), - ('\u{1ee04}', '\u{1ee04}'), - ('\u{1ee20}', '\u{1ee20}'), - ('\u{1ee23}', '\u{1ee23}'), - ('\u{1ee25}', '\u{1ee26}'), - ('\u{1ee28}', '\u{1ee28}'), - ('\u{1ee33}', '\u{1ee33}'), - ('\u{1ee38}', '\u{1ee38}'), - ('\u{1ee3a}', '\u{1ee3a}'), - ('\u{1ee3c}', '\u{1ee41}'), - ('\u{1ee43}', '\u{1ee46}'), - ('\u{1ee48}', '\u{1ee48}'), - ('\u{1ee4a}', '\u{1ee4a}'), - ('\u{1ee4c}', '\u{1ee4c}'), - ('\u{1ee50}', '\u{1ee50}'), - ('\u{1ee53}', '\u{1ee53}'), - ('\u{1ee55}', '\u{1ee56}'), - ('\u{1ee58}', '\u{1ee58}'), - ('\u{1ee5a}', '\u{1ee5a}'), - ('\u{1ee5c}', '\u{1ee5c}'), - ('\u{1ee5e}', '\u{1ee5e}'), - ('\u{1ee60}', '\u{1ee60}'), - ('\u{1ee63}', '\u{1ee63}'), - ('\u{1ee65}', '\u{1ee66}'), - ('\u{1ee6b}', '\u{1ee6b}'), - ('\u{1ee73}', '\u{1ee73}'), - ('\u{1ee78}', '\u{1ee78}'), - ('\u{1ee7d}', '\u{1ee7d}'), - ('\u{1ee7f}', '\u{1ee7f}'), - ('\u{1ee8a}', '\u{1ee8a}'), - ('\u{1ee9c}', '\u{1eea0}'), - ('\u{1eea4}', '\u{1eea4}'), - ('\u{1eeaa}', '\u{1eeaa}'), - ('\u{1eebc}', '\u{1eeef}'), - ('\u{1eef2}', '\u{1efff}'), - ('\u{1f02c}', '\u{1f02f}'), - ('\u{1f094}', '\u{1f09f}'), - ('\u{1f0af}', '\u{1f0b0}'), - ('\u{1f0c0}', '\u{1f0c0}'), - ('\u{1f0d0}', '\u{1f0d0}'), - ('\u{1f0f6}', '\u{1f0ff}'), - ('\u{1f1ae}', '\u{1f1e5}'), - ('\u{1f203}', '\u{1f20f}'), - ('\u{1f23c}', '\u{1f23f}'), - ('\u{1f249}', '\u{1f24f}'), - ('\u{1f252}', '\u{1f25f}'), - ('\u{1f266}', '\u{1f2ff}'), - ('\u{1f6d8}', '\u{1f6db}'), - ('\u{1f6ed}', '\u{1f6ef}'), - ('\u{1f6fd}', '\u{1f6ff}'), - ('\u{1f777}', '\u{1f77a}'), - ('\u{1f7da}', '\u{1f7df}'), - ('\u{1f7ec}', '\u{1f7ef}'), - ('\u{1f7f1}', '\u{1f7ff}'), - ('\u{1f80c}', '\u{1f80f}'), - ('\u{1f848}', '\u{1f84f}'), - ('\u{1f85a}', '\u{1f85f}'), - ('\u{1f888}', '\u{1f88f}'), - ('\u{1f8ae}', '\u{1f8af}'), - ('\u{1f8bc}', '\u{1f8bf}'), - ('\u{1f8c2}', '\u{1f8ff}'), - ('\u{1fa54}', '\u{1fa5f}'), - ('\u{1fa6e}', '\u{1fa6f}'), - ('\u{1fa7d}', '\u{1fa7f}'), - ('\u{1fa8a}', '\u{1fa8e}'), - ('\u{1fac7}', '\u{1facd}'), - ('\u{1fadd}', '\u{1fade}'), - ('\u{1faea}', '\u{1faef}'), - ('\u{1faf9}', '\u{1faff}'), - ('\u{1fb93}', '\u{1fb93}'), - ('\u{1fbfa}', '\u{1ffff}'), - ('\u{2a6e0}', '\u{2a6ff}'), - ('\u{2b73a}', '\u{2b73f}'), - ('\u{2b81e}', '\u{2b81f}'), - ('\u{2cea2}', '\u{2ceaf}'), - ('\u{2ebe1}', '\u{2ebef}'), - ('\u{2ee5e}', '\u{2f7ff}'), - ('\u{2fa1e}', '\u{2ffff}'), - ('\u{3134b}', '\u{3134f}'), - ('\u{323b0}', '\u{e0000}'), - ('\u{e0002}', '\u{e001f}'), - ('\u{e0080}', '\u{e00ff}'), - ('\u{e01f0}', '\u{effff}'), - ('\u{ffffe}', '\u{fffff}'), - ('\u{10fffe}', '\u{10ffff}'), -]; - -pub const UPPERCASE_LETTER: &'static [(char, char)] = &[ - ('A', 'Z'), - ('À', 'Ö'), - ('Ø', 'Þ'), - ('Ā', 'Ā'), - ('Ă', 'Ă'), - ('Ą', 'Ą'), - ('Ć', 'Ć'), - ('Ĉ', 'Ĉ'), - ('Ċ', 'Ċ'), - ('Č', 'Č'), - ('Ď', 'Ď'), - ('Đ', 'Đ'), - ('Ē', 'Ē'), - ('Ĕ', 'Ĕ'), - ('Ė', 'Ė'), - ('Ę', 'Ę'), - ('Ě', 'Ě'), - ('Ĝ', 'Ĝ'), - ('Ğ', 'Ğ'), - ('Ġ', 'Ġ'), - ('Ģ', 'Ģ'), - ('Ĥ', 'Ĥ'), - ('Ħ', 'Ħ'), - ('Ĩ', 'Ĩ'), - ('Ī', 'Ī'), - ('Ĭ', 'Ĭ'), - ('Į', 'Į'), - ('İ', 'İ'), - ('IJ', 'IJ'), - ('Ĵ', 'Ĵ'), - ('Ķ', 'Ķ'), - ('Ĺ', 'Ĺ'), - ('Ļ', 'Ļ'), - ('Ľ', 'Ľ'), - ('Ŀ', 'Ŀ'), - ('Ł', 'Ł'), - ('Ń', 'Ń'), - ('Ņ', 'Ņ'), - ('Ň', 'Ň'), - ('Ŋ', 'Ŋ'), - ('Ō', 'Ō'), - ('Ŏ', 'Ŏ'), - ('Ő', 'Ő'), - ('Œ', 'Œ'), - ('Ŕ', 'Ŕ'), - ('Ŗ', 'Ŗ'), - ('Ř', 'Ř'), - ('Ś', 'Ś'), - ('Ŝ', 'Ŝ'), - ('Ş', 'Ş'), - ('Š', 'Š'), - ('Ţ', 'Ţ'), - ('Ť', 'Ť'), - ('Ŧ', 'Ŧ'), - ('Ũ', 'Ũ'), - ('Ū', 'Ū'), - ('Ŭ', 'Ŭ'), - ('Ů', 'Ů'), - ('Ű', 'Ű'), - ('Ų', 'Ų'), - ('Ŵ', 'Ŵ'), - ('Ŷ', 'Ŷ'), - ('Ÿ', 'Ź'), - ('Ż', 'Ż'), - ('Ž', 'Ž'), - ('Ɓ', 'Ƃ'), - ('Ƅ', 'Ƅ'), - ('Ɔ', 'Ƈ'), - ('Ɖ', 'Ƌ'), - ('Ǝ', 'Ƒ'), - ('Ɠ', 'Ɣ'), - ('Ɩ', 'Ƙ'), - ('Ɯ', 'Ɲ'), - ('Ɵ', 'Ơ'), - ('Ƣ', 'Ƣ'), - ('Ƥ', 'Ƥ'), - ('Ʀ', 'Ƨ'), - ('Ʃ', 'Ʃ'), - ('Ƭ', 'Ƭ'), - ('Ʈ', 'Ư'), - ('Ʊ', 'Ƴ'), - ('Ƶ', 'Ƶ'), - ('Ʒ', 'Ƹ'), - ('Ƽ', 'Ƽ'), - ('DŽ', 'DŽ'), - ('LJ', 'LJ'), - ('NJ', 'NJ'), - ('Ǎ', 'Ǎ'), - ('Ǐ', 'Ǐ'), - ('Ǒ', 'Ǒ'), - ('Ǔ', 'Ǔ'), - ('Ǖ', 'Ǖ'), - ('Ǘ', 'Ǘ'), - ('Ǚ', 'Ǚ'), - ('Ǜ', 'Ǜ'), - ('Ǟ', 'Ǟ'), - ('Ǡ', 'Ǡ'), - ('Ǣ', 'Ǣ'), - ('Ǥ', 'Ǥ'), - ('Ǧ', 'Ǧ'), - ('Ǩ', 'Ǩ'), - ('Ǫ', 'Ǫ'), - ('Ǭ', 'Ǭ'), - ('Ǯ', 'Ǯ'), - ('DZ', 'DZ'), - ('Ǵ', 'Ǵ'), - ('Ƕ', 'Ǹ'), - ('Ǻ', 'Ǻ'), - ('Ǽ', 'Ǽ'), - ('Ǿ', 'Ǿ'), - ('Ȁ', 'Ȁ'), - ('Ȃ', 'Ȃ'), - ('Ȅ', 'Ȅ'), - ('Ȇ', 'Ȇ'), - ('Ȉ', 'Ȉ'), - ('Ȋ', 'Ȋ'), - ('Ȍ', 'Ȍ'), - ('Ȏ', 'Ȏ'), - ('Ȑ', 'Ȑ'), - ('Ȓ', 'Ȓ'), - ('Ȕ', 'Ȕ'), - ('Ȗ', 'Ȗ'), - ('Ș', 'Ș'), - ('Ț', 'Ț'), - ('Ȝ', 'Ȝ'), - ('Ȟ', 'Ȟ'), - ('Ƞ', 'Ƞ'), - ('Ȣ', 'Ȣ'), - ('Ȥ', 'Ȥ'), - ('Ȧ', 'Ȧ'), - ('Ȩ', 'Ȩ'), - ('Ȫ', 'Ȫ'), - ('Ȭ', 'Ȭ'), - ('Ȯ', 'Ȯ'), - ('Ȱ', 'Ȱ'), - ('Ȳ', 'Ȳ'), - ('Ⱥ', 'Ȼ'), - ('Ƚ', 'Ⱦ'), - ('Ɂ', 'Ɂ'), - ('Ƀ', 'Ɇ'), - ('Ɉ', 'Ɉ'), - ('Ɋ', 'Ɋ'), - ('Ɍ', 'Ɍ'), - ('Ɏ', 'Ɏ'), - ('Ͱ', 'Ͱ'), - ('Ͳ', 'Ͳ'), - ('Ͷ', 'Ͷ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ώ'), - ('Α', 'Ρ'), - ('Σ', 'Ϋ'), - ('Ϗ', 'Ϗ'), - ('ϒ', 'ϔ'), - ('Ϙ', 'Ϙ'), - ('Ϛ', 'Ϛ'), - ('Ϝ', 'Ϝ'), - ('Ϟ', 'Ϟ'), - ('Ϡ', 'Ϡ'), - ('Ϣ', 'Ϣ'), - ('Ϥ', 'Ϥ'), - ('Ϧ', 'Ϧ'), - ('Ϩ', 'Ϩ'), - ('Ϫ', 'Ϫ'), - ('Ϭ', 'Ϭ'), - ('Ϯ', 'Ϯ'), - ('ϴ', 'ϴ'), - ('Ϸ', 'Ϸ'), - ('Ϲ', 'Ϻ'), - ('Ͻ', 'Я'), - ('Ѡ', 'Ѡ'), - ('Ѣ', 'Ѣ'), - ('Ѥ', 'Ѥ'), - ('Ѧ', 'Ѧ'), - ('Ѩ', 'Ѩ'), - ('Ѫ', 'Ѫ'), - ('Ѭ', 'Ѭ'), - ('Ѯ', 'Ѯ'), - ('Ѱ', 'Ѱ'), - ('Ѳ', 'Ѳ'), - ('Ѵ', 'Ѵ'), - ('Ѷ', 'Ѷ'), - ('Ѹ', 'Ѹ'), - ('Ѻ', 'Ѻ'), - ('Ѽ', 'Ѽ'), - ('Ѿ', 'Ѿ'), - ('Ҁ', 'Ҁ'), - ('Ҋ', 'Ҋ'), - ('Ҍ', 'Ҍ'), - ('Ҏ', 'Ҏ'), - ('Ґ', 'Ґ'), - ('Ғ', 'Ғ'), - ('Ҕ', 'Ҕ'), - ('Җ', 'Җ'), - ('Ҙ', 'Ҙ'), - ('Қ', 'Қ'), - ('Ҝ', 'Ҝ'), - ('Ҟ', 'Ҟ'), - ('Ҡ', 'Ҡ'), - ('Ң', 'Ң'), - ('Ҥ', 'Ҥ'), - ('Ҧ', 'Ҧ'), - ('Ҩ', 'Ҩ'), - ('Ҫ', 'Ҫ'), - ('Ҭ', 'Ҭ'), - ('Ү', 'Ү'), - ('Ұ', 'Ұ'), - ('Ҳ', 'Ҳ'), - ('Ҵ', 'Ҵ'), - ('Ҷ', 'Ҷ'), - ('Ҹ', 'Ҹ'), - ('Һ', 'Һ'), - ('Ҽ', 'Ҽ'), - ('Ҿ', 'Ҿ'), - ('Ӏ', 'Ӂ'), - ('Ӄ', 'Ӄ'), - ('Ӆ', 'Ӆ'), - ('Ӈ', 'Ӈ'), - ('Ӊ', 'Ӊ'), - ('Ӌ', 'Ӌ'), - ('Ӎ', 'Ӎ'), - ('Ӑ', 'Ӑ'), - ('Ӓ', 'Ӓ'), - ('Ӕ', 'Ӕ'), - ('Ӗ', 'Ӗ'), - ('Ә', 'Ә'), - ('Ӛ', 'Ӛ'), - ('Ӝ', 'Ӝ'), - ('Ӟ', 'Ӟ'), - ('Ӡ', 'Ӡ'), - ('Ӣ', 'Ӣ'), - ('Ӥ', 'Ӥ'), - ('Ӧ', 'Ӧ'), - ('Ө', 'Ө'), - ('Ӫ', 'Ӫ'), - ('Ӭ', 'Ӭ'), - ('Ӯ', 'Ӯ'), - ('Ӱ', 'Ӱ'), - ('Ӳ', 'Ӳ'), - ('Ӵ', 'Ӵ'), - ('Ӷ', 'Ӷ'), - ('Ӹ', 'Ӹ'), - ('Ӻ', 'Ӻ'), - ('Ӽ', 'Ӽ'), - ('Ӿ', 'Ӿ'), - ('Ԁ', 'Ԁ'), - ('Ԃ', 'Ԃ'), - ('Ԅ', 'Ԅ'), - ('Ԇ', 'Ԇ'), - ('Ԉ', 'Ԉ'), - ('Ԋ', 'Ԋ'), - ('Ԍ', 'Ԍ'), - ('Ԏ', 'Ԏ'), - ('Ԑ', 'Ԑ'), - ('Ԓ', 'Ԓ'), - ('Ԕ', 'Ԕ'), - ('Ԗ', 'Ԗ'), - ('Ԙ', 'Ԙ'), - ('Ԛ', 'Ԛ'), - ('Ԝ', 'Ԝ'), - ('Ԟ', 'Ԟ'), - ('Ԡ', 'Ԡ'), - ('Ԣ', 'Ԣ'), - ('Ԥ', 'Ԥ'), - ('Ԧ', 'Ԧ'), - ('Ԩ', 'Ԩ'), - ('Ԫ', 'Ԫ'), - ('Ԭ', 'Ԭ'), - ('Ԯ', 'Ԯ'), - ('Ա', 'Ֆ'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('Ꭰ', 'Ᏽ'), - ('Ᲊ', 'Ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('Ḁ', 'Ḁ'), - ('Ḃ', 'Ḃ'), - ('Ḅ', 'Ḅ'), - ('Ḇ', 'Ḇ'), - ('Ḉ', 'Ḉ'), - ('Ḋ', 'Ḋ'), - ('Ḍ', 'Ḍ'), - ('Ḏ', 'Ḏ'), - ('Ḑ', 'Ḑ'), - ('Ḓ', 'Ḓ'), - ('Ḕ', 'Ḕ'), - ('Ḗ', 'Ḗ'), - ('Ḙ', 'Ḙ'), - ('Ḛ', 'Ḛ'), - ('Ḝ', 'Ḝ'), - ('Ḟ', 'Ḟ'), - ('Ḡ', 'Ḡ'), - ('Ḣ', 'Ḣ'), - ('Ḥ', 'Ḥ'), - ('Ḧ', 'Ḧ'), - ('Ḩ', 'Ḩ'), - ('Ḫ', 'Ḫ'), - ('Ḭ', 'Ḭ'), - ('Ḯ', 'Ḯ'), - ('Ḱ', 'Ḱ'), - ('Ḳ', 'Ḳ'), - ('Ḵ', 'Ḵ'), - ('Ḷ', 'Ḷ'), - ('Ḹ', 'Ḹ'), - ('Ḻ', 'Ḻ'), - ('Ḽ', 'Ḽ'), - ('Ḿ', 'Ḿ'), - ('Ṁ', 'Ṁ'), - ('Ṃ', 'Ṃ'), - ('Ṅ', 'Ṅ'), - ('Ṇ', 'Ṇ'), - ('Ṉ', 'Ṉ'), - ('Ṋ', 'Ṋ'), - ('Ṍ', 'Ṍ'), - ('Ṏ', 'Ṏ'), - ('Ṑ', 'Ṑ'), - ('Ṓ', 'Ṓ'), - ('Ṕ', 'Ṕ'), - ('Ṗ', 'Ṗ'), - ('Ṙ', 'Ṙ'), - ('Ṛ', 'Ṛ'), - ('Ṝ', 'Ṝ'), - ('Ṟ', 'Ṟ'), - ('Ṡ', 'Ṡ'), - ('Ṣ', 'Ṣ'), - ('Ṥ', 'Ṥ'), - ('Ṧ', 'Ṧ'), - ('Ṩ', 'Ṩ'), - ('Ṫ', 'Ṫ'), - ('Ṭ', 'Ṭ'), - ('Ṯ', 'Ṯ'), - ('Ṱ', 'Ṱ'), - ('Ṳ', 'Ṳ'), - ('Ṵ', 'Ṵ'), - ('Ṷ', 'Ṷ'), - ('Ṹ', 'Ṹ'), - ('Ṻ', 'Ṻ'), - ('Ṽ', 'Ṽ'), - ('Ṿ', 'Ṿ'), - ('Ẁ', 'Ẁ'), - ('Ẃ', 'Ẃ'), - ('Ẅ', 'Ẅ'), - ('Ẇ', 'Ẇ'), - ('Ẉ', 'Ẉ'), - ('Ẋ', 'Ẋ'), - ('Ẍ', 'Ẍ'), - ('Ẏ', 'Ẏ'), - ('Ẑ', 'Ẑ'), - ('Ẓ', 'Ẓ'), - ('Ẕ', 'Ẕ'), - ('ẞ', 'ẞ'), - ('Ạ', 'Ạ'), - ('Ả', 'Ả'), - ('Ấ', 'Ấ'), - ('Ầ', 'Ầ'), - ('Ẩ', 'Ẩ'), - ('Ẫ', 'Ẫ'), - ('Ậ', 'Ậ'), - ('Ắ', 'Ắ'), - ('Ằ', 'Ằ'), - ('Ẳ', 'Ẳ'), - ('Ẵ', 'Ẵ'), - ('Ặ', 'Ặ'), - ('Ẹ', 'Ẹ'), - ('Ẻ', 'Ẻ'), - ('Ẽ', 'Ẽ'), - ('Ế', 'Ế'), - ('Ề', 'Ề'), - ('Ể', 'Ể'), - ('Ễ', 'Ễ'), - ('Ệ', 'Ệ'), - ('Ỉ', 'Ỉ'), - ('Ị', 'Ị'), - ('Ọ', 'Ọ'), - ('Ỏ', 'Ỏ'), - ('Ố', 'Ố'), - ('Ồ', 'Ồ'), - ('Ổ', 'Ổ'), - ('Ỗ', 'Ỗ'), - ('Ộ', 'Ộ'), - ('Ớ', 'Ớ'), - ('Ờ', 'Ờ'), - ('Ở', 'Ở'), - ('Ỡ', 'Ỡ'), - ('Ợ', 'Ợ'), - ('Ụ', 'Ụ'), - ('Ủ', 'Ủ'), - ('Ứ', 'Ứ'), - ('Ừ', 'Ừ'), - ('Ử', 'Ử'), - ('Ữ', 'Ữ'), - ('Ự', 'Ự'), - ('Ỳ', 'Ỳ'), - ('Ỵ', 'Ỵ'), - ('Ỷ', 'Ỷ'), - ('Ỹ', 'Ỹ'), - ('Ỻ', 'Ỻ'), - ('Ỽ', 'Ỽ'), - ('Ỿ', 'Ỿ'), - ('Ἀ', 'Ἇ'), - ('Ἐ', 'Ἕ'), - ('Ἠ', 'Ἧ'), - ('Ἰ', 'Ἷ'), - ('Ὀ', 'Ὅ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'Ὗ'), - ('Ὠ', 'Ὧ'), - ('Ᾰ', 'Ά'), - ('Ὲ', 'Ή'), - ('Ῐ', 'Ί'), - ('Ῠ', 'Ῥ'), - ('Ὸ', 'Ώ'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℋ', 'ℍ'), - ('ℐ', 'ℒ'), - ('ℕ', 'ℕ'), - ('ℙ', 'ℝ'), - ('ℤ', 'ℤ'), - ('Ω', 'Ω'), - ('ℨ', 'ℨ'), - ('K', 'ℭ'), - ('ℰ', 'ℳ'), - ('ℾ', 'ℿ'), - ('ⅅ', 'ⅅ'), - ('Ↄ', 'Ↄ'), - ('Ⰰ', 'Ⱟ'), - ('Ⱡ', 'Ⱡ'), - ('Ɫ', 'Ɽ'), - ('Ⱨ', 'Ⱨ'), - ('Ⱪ', 'Ⱪ'), - ('Ⱬ', 'Ⱬ'), - ('Ɑ', 'Ɒ'), - ('Ⱳ', 'Ⱳ'), - ('Ⱶ', 'Ⱶ'), - ('Ȿ', 'Ⲁ'), - ('Ⲃ', 'Ⲃ'), - ('Ⲅ', 'Ⲅ'), - ('Ⲇ', 'Ⲇ'), - ('Ⲉ', 'Ⲉ'), - ('Ⲋ', 'Ⲋ'), - ('Ⲍ', 'Ⲍ'), - ('Ⲏ', 'Ⲏ'), - ('Ⲑ', 'Ⲑ'), - ('Ⲓ', 'Ⲓ'), - ('Ⲕ', 'Ⲕ'), - ('Ⲗ', 'Ⲗ'), - ('Ⲙ', 'Ⲙ'), - ('Ⲛ', 'Ⲛ'), - ('Ⲝ', 'Ⲝ'), - ('Ⲟ', 'Ⲟ'), - ('Ⲡ', 'Ⲡ'), - ('Ⲣ', 'Ⲣ'), - ('Ⲥ', 'Ⲥ'), - ('Ⲧ', 'Ⲧ'), - ('Ⲩ', 'Ⲩ'), - ('Ⲫ', 'Ⲫ'), - ('Ⲭ', 'Ⲭ'), - ('Ⲯ', 'Ⲯ'), - ('Ⲱ', 'Ⲱ'), - ('Ⲳ', 'Ⲳ'), - ('Ⲵ', 'Ⲵ'), - ('Ⲷ', 'Ⲷ'), - ('Ⲹ', 'Ⲹ'), - ('Ⲻ', 'Ⲻ'), - ('Ⲽ', 'Ⲽ'), - ('Ⲿ', 'Ⲿ'), - ('Ⳁ', 'Ⳁ'), - ('Ⳃ', 'Ⳃ'), - ('Ⳅ', 'Ⳅ'), - ('Ⳇ', 'Ⳇ'), - ('Ⳉ', 'Ⳉ'), - ('Ⳋ', 'Ⳋ'), - ('Ⳍ', 'Ⳍ'), - ('Ⳏ', 'Ⳏ'), - ('Ⳑ', 'Ⳑ'), - ('Ⳓ', 'Ⳓ'), - ('Ⳕ', 'Ⳕ'), - ('Ⳗ', 'Ⳗ'), - ('Ⳙ', 'Ⳙ'), - ('Ⳛ', 'Ⳛ'), - ('Ⳝ', 'Ⳝ'), - ('Ⳟ', 'Ⳟ'), - ('Ⳡ', 'Ⳡ'), - ('Ⳣ', 'Ⳣ'), - ('Ⳬ', 'Ⳬ'), - ('Ⳮ', 'Ⳮ'), - ('Ⳳ', 'Ⳳ'), - ('Ꙁ', 'Ꙁ'), - ('Ꙃ', 'Ꙃ'), - ('Ꙅ', 'Ꙅ'), - ('Ꙇ', 'Ꙇ'), - ('Ꙉ', 'Ꙉ'), - ('Ꙋ', 'Ꙋ'), - ('Ꙍ', 'Ꙍ'), - ('Ꙏ', 'Ꙏ'), - ('Ꙑ', 'Ꙑ'), - ('Ꙓ', 'Ꙓ'), - ('Ꙕ', 'Ꙕ'), - ('Ꙗ', 'Ꙗ'), - ('Ꙙ', 'Ꙙ'), - ('Ꙛ', 'Ꙛ'), - ('Ꙝ', 'Ꙝ'), - ('Ꙟ', 'Ꙟ'), - ('Ꙡ', 'Ꙡ'), - ('Ꙣ', 'Ꙣ'), - ('Ꙥ', 'Ꙥ'), - ('Ꙧ', 'Ꙧ'), - ('Ꙩ', 'Ꙩ'), - ('Ꙫ', 'Ꙫ'), - ('Ꙭ', 'Ꙭ'), - ('Ꚁ', 'Ꚁ'), - ('Ꚃ', 'Ꚃ'), - ('Ꚅ', 'Ꚅ'), - ('Ꚇ', 'Ꚇ'), - ('Ꚉ', 'Ꚉ'), - ('Ꚋ', 'Ꚋ'), - ('Ꚍ', 'Ꚍ'), - ('Ꚏ', 'Ꚏ'), - ('Ꚑ', 'Ꚑ'), - ('Ꚓ', 'Ꚓ'), - ('Ꚕ', 'Ꚕ'), - ('Ꚗ', 'Ꚗ'), - ('Ꚙ', 'Ꚙ'), - ('Ꚛ', 'Ꚛ'), - ('Ꜣ', 'Ꜣ'), - ('Ꜥ', 'Ꜥ'), - ('Ꜧ', 'Ꜧ'), - ('Ꜩ', 'Ꜩ'), - ('Ꜫ', 'Ꜫ'), - ('Ꜭ', 'Ꜭ'), - ('Ꜯ', 'Ꜯ'), - ('Ꜳ', 'Ꜳ'), - ('Ꜵ', 'Ꜵ'), - ('Ꜷ', 'Ꜷ'), - ('Ꜹ', 'Ꜹ'), - ('Ꜻ', 'Ꜻ'), - ('Ꜽ', 'Ꜽ'), - ('Ꜿ', 'Ꜿ'), - ('Ꝁ', 'Ꝁ'), - ('Ꝃ', 'Ꝃ'), - ('Ꝅ', 'Ꝅ'), - ('Ꝇ', 'Ꝇ'), - ('Ꝉ', 'Ꝉ'), - ('Ꝋ', 'Ꝋ'), - ('Ꝍ', 'Ꝍ'), - ('Ꝏ', 'Ꝏ'), - ('Ꝑ', 'Ꝑ'), - ('Ꝓ', 'Ꝓ'), - ('Ꝕ', 'Ꝕ'), - ('Ꝗ', 'Ꝗ'), - ('Ꝙ', 'Ꝙ'), - ('Ꝛ', 'Ꝛ'), - ('Ꝝ', 'Ꝝ'), - ('Ꝟ', 'Ꝟ'), - ('Ꝡ', 'Ꝡ'), - ('Ꝣ', 'Ꝣ'), - ('Ꝥ', 'Ꝥ'), - ('Ꝧ', 'Ꝧ'), - ('Ꝩ', 'Ꝩ'), - ('Ꝫ', 'Ꝫ'), - ('Ꝭ', 'Ꝭ'), - ('Ꝯ', 'Ꝯ'), - ('Ꝺ', 'Ꝺ'), - ('Ꝼ', 'Ꝼ'), - ('Ᵹ', 'Ꝿ'), - ('Ꞁ', 'Ꞁ'), - ('Ꞃ', 'Ꞃ'), - ('Ꞅ', 'Ꞅ'), - ('Ꞇ', 'Ꞇ'), - ('Ꞌ', 'Ꞌ'), - ('Ɥ', 'Ɥ'), - ('Ꞑ', 'Ꞑ'), - ('Ꞓ', 'Ꞓ'), - ('Ꞗ', 'Ꞗ'), - ('Ꞙ', 'Ꞙ'), - ('Ꞛ', 'Ꞛ'), - ('Ꞝ', 'Ꞝ'), - ('Ꞟ', 'Ꞟ'), - ('Ꞡ', 'Ꞡ'), - ('Ꞣ', 'Ꞣ'), - ('Ꞥ', 'Ꞥ'), - ('Ꞧ', 'Ꞧ'), - ('Ꞩ', 'Ꞩ'), - ('Ɦ', 'Ɪ'), - ('Ʞ', 'Ꞵ'), - ('Ꞷ', 'Ꞷ'), - ('Ꞹ', 'Ꞹ'), - ('Ꞻ', 'Ꞻ'), - ('Ꞽ', 'Ꞽ'), - ('Ꞿ', 'Ꞿ'), - ('Ꟁ', 'Ꟁ'), - ('Ꟃ', 'Ꟃ'), - ('Ꞔ', 'Ꟈ'), - ('Ꟊ', 'Ꟊ'), - ('Ɤ', 'Ꟍ'), - ('Ꟑ', 'Ꟑ'), - ('Ꟗ', 'Ꟗ'), - ('Ꟙ', 'Ꟙ'), - ('Ꟛ', 'Ꟛ'), - ('Ƛ', 'Ƛ'), - ('Ꟶ', 'Ꟶ'), - ('A', 'Z'), - ('𐐀', '𐐧'), - ('𐒰', '𐓓'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐲀', '𐲲'), - ('𐵐', '𐵥'), - ('𑢠', '𑢿'), - ('𖹀', '𖹟'), - ('𝐀', '𝐙'), - ('𝐴', '𝑍'), - ('𝑨', '𝒁'), - ('𝒜', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒵'), - ('𝓐', '𝓩'), - ('𝔄', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔸', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕬', '𝖅'), - ('𝖠', '𝖹'), - ('𝗔', '𝗭'), - ('𝘈', '𝘡'), - ('𝘼', '𝙕'), - ('𝙰', '𝚉'), - ('𝚨', '𝛀'), - ('𝛢', '𝛺'), - ('𝜜', '𝜴'), - ('𝝖', '𝝮'), - ('𝞐', '𝞨'), - ('𝟊', '𝟊'), - ('𞤀', '𞤡'), -]; diff --git a/vendor/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs b/vendor/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs deleted file mode 100644 index 6a6ec2af..00000000 --- a/vendor/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs +++ /dev/null @@ -1,1420 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate grapheme-cluster-break ucd-16.0.0 --chars -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ - ("CR", CR), - ("Control", CONTROL), - ("Extend", EXTEND), - ("L", L), - ("LF", LF), - ("LV", LV), - ("LVT", LVT), - ("Prepend", PREPEND), - ("Regional_Indicator", REGIONAL_INDICATOR), - ("SpacingMark", SPACINGMARK), - ("T", T), - ("V", V), - ("ZWJ", ZWJ), -]; - -pub const CR: &'static [(char, char)] = &[('\r', '\r')]; - -pub const CONTROL: &'static [(char, char)] = &[ - ('\0', '\t'), - ('\u{b}', '\u{c}'), - ('\u{e}', '\u{1f}'), - ('\u{7f}', '\u{9f}'), - ('\u{ad}', '\u{ad}'), - ('\u{61c}', '\u{61c}'), - ('\u{180e}', '\u{180e}'), - ('\u{200b}', '\u{200b}'), - ('\u{200e}', '\u{200f}'), - ('\u{2028}', '\u{202e}'), - ('\u{2060}', '\u{206f}'), - ('\u{feff}', '\u{feff}'), - ('\u{fff0}', '\u{fffb}'), - ('\u{13430}', '\u{1343f}'), - ('\u{1bca0}', '\u{1bca3}'), - ('\u{1d173}', '\u{1d17a}'), - ('\u{e0000}', '\u{e001f}'), - ('\u{e0080}', '\u{e00ff}'), - ('\u{e01f0}', '\u{e0fff}'), -]; - -pub const EXTEND: &'static [(char, char)] = &[ - ('\u{300}', '\u{36f}'), - ('\u{483}', '\u{489}'), - ('\u{591}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c5}'), - ('\u{5c7}', '\u{5c7}'), - ('\u{610}', '\u{61a}'), - ('\u{64b}', '\u{65f}'), - ('\u{670}', '\u{670}'), - ('\u{6d6}', '\u{6dc}'), - ('\u{6df}', '\u{6e4}'), - ('\u{6e7}', '\u{6e8}'), - ('\u{6ea}', '\u{6ed}'), - ('\u{711}', '\u{711}'), - ('\u{730}', '\u{74a}'), - ('\u{7a6}', '\u{7b0}'), - ('\u{7eb}', '\u{7f3}'), - ('\u{7fd}', '\u{7fd}'), - ('\u{816}', '\u{819}'), - ('\u{81b}', '\u{823}'), - ('\u{825}', '\u{827}'), - ('\u{829}', '\u{82d}'), - ('\u{859}', '\u{85b}'), - ('\u{897}', '\u{89f}'), - ('\u{8ca}', '\u{8e1}'), - ('\u{8e3}', '\u{902}'), - ('\u{93a}', '\u{93a}'), - ('\u{93c}', '\u{93c}'), - ('\u{941}', '\u{948}'), - ('\u{94d}', '\u{94d}'), - ('\u{951}', '\u{957}'), - ('\u{962}', '\u{963}'), - ('\u{981}', '\u{981}'), - ('\u{9bc}', '\u{9bc}'), - ('\u{9be}', '\u{9be}'), - ('\u{9c1}', '\u{9c4}'), - ('\u{9cd}', '\u{9cd}'), - ('\u{9d7}', '\u{9d7}'), - ('\u{9e2}', '\u{9e3}'), - ('\u{9fe}', '\u{9fe}'), - ('\u{a01}', '\u{a02}'), - ('\u{a3c}', '\u{a3c}'), - ('\u{a41}', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('\u{a51}', '\u{a51}'), - ('\u{a70}', '\u{a71}'), - ('\u{a75}', '\u{a75}'), - ('\u{a81}', '\u{a82}'), - ('\u{abc}', '\u{abc}'), - ('\u{ac1}', '\u{ac5}'), - ('\u{ac7}', '\u{ac8}'), - ('\u{acd}', '\u{acd}'), - ('\u{ae2}', '\u{ae3}'), - ('\u{afa}', '\u{aff}'), - ('\u{b01}', '\u{b01}'), - ('\u{b3c}', '\u{b3c}'), - ('\u{b3e}', '\u{b3f}'), - ('\u{b41}', '\u{b44}'), - ('\u{b4d}', '\u{b4d}'), - ('\u{b55}', '\u{b57}'), - ('\u{b62}', '\u{b63}'), - ('\u{b82}', '\u{b82}'), - ('\u{bbe}', '\u{bbe}'), - ('\u{bc0}', '\u{bc0}'), - ('\u{bcd}', '\u{bcd}'), - ('\u{bd7}', '\u{bd7}'), - ('\u{c00}', '\u{c00}'), - ('\u{c04}', '\u{c04}'), - ('\u{c3c}', '\u{c3c}'), - ('\u{c3e}', '\u{c40}'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('\u{c62}', '\u{c63}'), - ('\u{c81}', '\u{c81}'), - ('\u{cbc}', '\u{cbc}'), - ('\u{cbf}', '\u{cc0}'), - ('\u{cc2}', '\u{cc2}'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccd}'), - ('\u{cd5}', '\u{cd6}'), - ('\u{ce2}', '\u{ce3}'), - ('\u{d00}', '\u{d01}'), - ('\u{d3b}', '\u{d3c}'), - ('\u{d3e}', '\u{d3e}'), - ('\u{d41}', '\u{d44}'), - ('\u{d4d}', '\u{d4d}'), - ('\u{d57}', '\u{d57}'), - ('\u{d62}', '\u{d63}'), - ('\u{d81}', '\u{d81}'), - ('\u{dca}', '\u{dca}'), - ('\u{dcf}', '\u{dcf}'), - ('\u{dd2}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('\u{ddf}', '\u{ddf}'), - ('\u{e31}', '\u{e31}'), - ('\u{e34}', '\u{e3a}'), - ('\u{e47}', '\u{e4e}'), - ('\u{eb1}', '\u{eb1}'), - ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ece}'), - ('\u{f18}', '\u{f19}'), - ('\u{f35}', '\u{f35}'), - ('\u{f37}', '\u{f37}'), - ('\u{f39}', '\u{f39}'), - ('\u{f71}', '\u{f7e}'), - ('\u{f80}', '\u{f84}'), - ('\u{f86}', '\u{f87}'), - ('\u{f8d}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('\u{fc6}', '\u{fc6}'), - ('\u{102d}', '\u{1030}'), - ('\u{1032}', '\u{1037}'), - ('\u{1039}', '\u{103a}'), - ('\u{103d}', '\u{103e}'), - ('\u{1058}', '\u{1059}'), - ('\u{105e}', '\u{1060}'), - ('\u{1071}', '\u{1074}'), - ('\u{1082}', '\u{1082}'), - ('\u{1085}', '\u{1086}'), - ('\u{108d}', '\u{108d}'), - ('\u{109d}', '\u{109d}'), - ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1715}'), - ('\u{1732}', '\u{1734}'), - ('\u{1752}', '\u{1753}'), - ('\u{1772}', '\u{1773}'), - ('\u{17b4}', '\u{17b5}'), - ('\u{17b7}', '\u{17bd}'), - ('\u{17c6}', '\u{17c6}'), - ('\u{17c9}', '\u{17d3}'), - ('\u{17dd}', '\u{17dd}'), - ('\u{180b}', '\u{180d}'), - ('\u{180f}', '\u{180f}'), - ('\u{1885}', '\u{1886}'), - ('\u{18a9}', '\u{18a9}'), - ('\u{1920}', '\u{1922}'), - ('\u{1927}', '\u{1928}'), - ('\u{1932}', '\u{1932}'), - ('\u{1939}', '\u{193b}'), - ('\u{1a17}', '\u{1a18}'), - ('\u{1a1b}', '\u{1a1b}'), - ('\u{1a56}', '\u{1a56}'), - ('\u{1a58}', '\u{1a5e}'), - ('\u{1a60}', '\u{1a60}'), - ('\u{1a62}', '\u{1a62}'), - ('\u{1a65}', '\u{1a6c}'), - ('\u{1a73}', '\u{1a7c}'), - ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ace}'), - ('\u{1b00}', '\u{1b03}'), - ('\u{1b34}', '\u{1b3d}'), - ('\u{1b42}', '\u{1b44}'), - ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '\u{1b81}'), - ('\u{1ba2}', '\u{1ba5}'), - ('\u{1ba8}', '\u{1bad}'), - ('\u{1be6}', '\u{1be6}'), - ('\u{1be8}', '\u{1be9}'), - ('\u{1bed}', '\u{1bed}'), - ('\u{1bef}', '\u{1bf3}'), - ('\u{1c2c}', '\u{1c33}'), - ('\u{1c36}', '\u{1c37}'), - ('\u{1cd0}', '\u{1cd2}'), - ('\u{1cd4}', '\u{1ce0}'), - ('\u{1ce2}', '\u{1ce8}'), - ('\u{1ced}', '\u{1ced}'), - ('\u{1cf4}', '\u{1cf4}'), - ('\u{1cf8}', '\u{1cf9}'), - ('\u{1dc0}', '\u{1dff}'), - ('\u{200c}', '\u{200c}'), - ('\u{20d0}', '\u{20f0}'), - ('\u{2cef}', '\u{2cf1}'), - ('\u{2d7f}', '\u{2d7f}'), - ('\u{2de0}', '\u{2dff}'), - ('\u{302a}', '\u{302f}'), - ('\u{3099}', '\u{309a}'), - ('\u{a66f}', '\u{a672}'), - ('\u{a674}', '\u{a67d}'), - ('\u{a69e}', '\u{a69f}'), - ('\u{a6f0}', '\u{a6f1}'), - ('\u{a802}', '\u{a802}'), - ('\u{a806}', '\u{a806}'), - ('\u{a80b}', '\u{a80b}'), - ('\u{a825}', '\u{a826}'), - ('\u{a82c}', '\u{a82c}'), - ('\u{a8c4}', '\u{a8c5}'), - ('\u{a8e0}', '\u{a8f1}'), - ('\u{a8ff}', '\u{a8ff}'), - ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '\u{a951}'), - ('\u{a953}', '\u{a953}'), - ('\u{a980}', '\u{a982}'), - ('\u{a9b3}', '\u{a9b3}'), - ('\u{a9b6}', '\u{a9b9}'), - ('\u{a9bc}', '\u{a9bd}'), - ('\u{a9c0}', '\u{a9c0}'), - ('\u{a9e5}', '\u{a9e5}'), - ('\u{aa29}', '\u{aa2e}'), - ('\u{aa31}', '\u{aa32}'), - ('\u{aa35}', '\u{aa36}'), - ('\u{aa43}', '\u{aa43}'), - ('\u{aa4c}', '\u{aa4c}'), - ('\u{aa7c}', '\u{aa7c}'), - ('\u{aab0}', '\u{aab0}'), - ('\u{aab2}', '\u{aab4}'), - ('\u{aab7}', '\u{aab8}'), - ('\u{aabe}', '\u{aabf}'), - ('\u{aac1}', '\u{aac1}'), - ('\u{aaec}', '\u{aaed}'), - ('\u{aaf6}', '\u{aaf6}'), - ('\u{abe5}', '\u{abe5}'), - ('\u{abe8}', '\u{abe8}'), - ('\u{abed}', '\u{abed}'), - ('\u{fb1e}', '\u{fb1e}'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{fe20}', '\u{fe2f}'), - ('\u{ff9e}', '\u{ff9f}'), - ('\u{101fd}', '\u{101fd}'), - ('\u{102e0}', '\u{102e0}'), - ('\u{10376}', '\u{1037a}'), - ('\u{10a01}', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '\u{10a0f}'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '\u{10a3f}'), - ('\u{10ae5}', '\u{10ae6}'), - ('\u{10d24}', '\u{10d27}'), - ('\u{10d69}', '\u{10d6d}'), - ('\u{10eab}', '\u{10eac}'), - ('\u{10efc}', '\u{10eff}'), - ('\u{10f46}', '\u{10f50}'), - ('\u{10f82}', '\u{10f85}'), - ('\u{11001}', '\u{11001}'), - ('\u{11038}', '\u{11046}'), - ('\u{11070}', '\u{11070}'), - ('\u{11073}', '\u{11074}'), - ('\u{1107f}', '\u{11081}'), - ('\u{110b3}', '\u{110b6}'), - ('\u{110b9}', '\u{110ba}'), - ('\u{110c2}', '\u{110c2}'), - ('\u{11100}', '\u{11102}'), - ('\u{11127}', '\u{1112b}'), - ('\u{1112d}', '\u{11134}'), - ('\u{11173}', '\u{11173}'), - ('\u{11180}', '\u{11181}'), - ('\u{111b6}', '\u{111be}'), - ('\u{111c0}', '\u{111c0}'), - ('\u{111c9}', '\u{111cc}'), - ('\u{111cf}', '\u{111cf}'), - ('\u{1122f}', '\u{11231}'), - ('\u{11234}', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), - ('\u{11241}', '\u{11241}'), - ('\u{112df}', '\u{112df}'), - ('\u{112e3}', '\u{112ea}'), - ('\u{11300}', '\u{11301}'), - ('\u{1133b}', '\u{1133c}'), - ('\u{1133e}', '\u{1133e}'), - ('\u{11340}', '\u{11340}'), - ('\u{1134d}', '\u{1134d}'), - ('\u{11357}', '\u{11357}'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('\u{113b8}', '\u{113b8}'), - ('\u{113bb}', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '\u{113c9}'), - ('\u{113ce}', '\u{113d0}'), - ('\u{113d2}', '\u{113d2}'), - ('\u{113e1}', '\u{113e2}'), - ('\u{11438}', '\u{1143f}'), - ('\u{11442}', '\u{11444}'), - ('\u{11446}', '\u{11446}'), - ('\u{1145e}', '\u{1145e}'), - ('\u{114b0}', '\u{114b0}'), - ('\u{114b3}', '\u{114b8}'), - ('\u{114ba}', '\u{114ba}'), - ('\u{114bd}', '\u{114bd}'), - ('\u{114bf}', '\u{114c0}'), - ('\u{114c2}', '\u{114c3}'), - ('\u{115af}', '\u{115af}'), - ('\u{115b2}', '\u{115b5}'), - ('\u{115bc}', '\u{115bd}'), - ('\u{115bf}', '\u{115c0}'), - ('\u{115dc}', '\u{115dd}'), - ('\u{11633}', '\u{1163a}'), - ('\u{1163d}', '\u{1163d}'), - ('\u{1163f}', '\u{11640}'), - ('\u{116ab}', '\u{116ab}'), - ('\u{116ad}', '\u{116ad}'), - ('\u{116b0}', '\u{116b7}'), - ('\u{1171d}', '\u{1171d}'), - ('\u{1171f}', '\u{1171f}'), - ('\u{11722}', '\u{11725}'), - ('\u{11727}', '\u{1172b}'), - ('\u{1182f}', '\u{11837}'), - ('\u{11839}', '\u{1183a}'), - ('\u{11930}', '\u{11930}'), - ('\u{1193b}', '\u{1193e}'), - ('\u{11943}', '\u{11943}'), - ('\u{119d4}', '\u{119d7}'), - ('\u{119da}', '\u{119db}'), - ('\u{119e0}', '\u{119e0}'), - ('\u{11a01}', '\u{11a0a}'), - ('\u{11a33}', '\u{11a38}'), - ('\u{11a3b}', '\u{11a3e}'), - ('\u{11a47}', '\u{11a47}'), - ('\u{11a51}', '\u{11a56}'), - ('\u{11a59}', '\u{11a5b}'), - ('\u{11a8a}', '\u{11a96}'), - ('\u{11a98}', '\u{11a99}'), - ('\u{11c30}', '\u{11c36}'), - ('\u{11c38}', '\u{11c3d}'), - ('\u{11c3f}', '\u{11c3f}'), - ('\u{11c92}', '\u{11ca7}'), - ('\u{11caa}', '\u{11cb0}'), - ('\u{11cb2}', '\u{11cb3}'), - ('\u{11cb5}', '\u{11cb6}'), - ('\u{11d31}', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d45}'), - ('\u{11d47}', '\u{11d47}'), - ('\u{11d90}', '\u{11d91}'), - ('\u{11d95}', '\u{11d95}'), - ('\u{11d97}', '\u{11d97}'), - ('\u{11ef3}', '\u{11ef4}'), - ('\u{11f00}', '\u{11f01}'), - ('\u{11f36}', '\u{11f3a}'), - ('\u{11f40}', '\u{11f42}'), - ('\u{11f5a}', '\u{11f5a}'), - ('\u{13440}', '\u{13440}'), - ('\u{13447}', '\u{13455}'), - ('\u{1611e}', '\u{16129}'), - ('\u{1612d}', '\u{1612f}'), - ('\u{16af0}', '\u{16af4}'), - ('\u{16b30}', '\u{16b36}'), - ('\u{16f4f}', '\u{16f4f}'), - ('\u{16f8f}', '\u{16f92}'), - ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), - ('\u{1bc9d}', '\u{1bc9e}'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d165}', '\u{1d169}'), - ('\u{1d16d}', '\u{1d172}'), - ('\u{1d17b}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('\u{1d242}', '\u{1d244}'), - ('\u{1da00}', '\u{1da36}'), - ('\u{1da3b}', '\u{1da6c}'), - ('\u{1da75}', '\u{1da75}'), - ('\u{1da84}', '\u{1da84}'), - ('\u{1da9b}', '\u{1da9f}'), - ('\u{1daa1}', '\u{1daaf}'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('\u{1e08f}', '\u{1e08f}'), - ('\u{1e130}', '\u{1e136}'), - ('\u{1e2ae}', '\u{1e2ae}'), - ('\u{1e2ec}', '\u{1e2ef}'), - ('\u{1e4ec}', '\u{1e4ef}'), - ('\u{1e5ee}', '\u{1e5ef}'), - ('\u{1e8d0}', '\u{1e8d6}'), - ('\u{1e944}', '\u{1e94a}'), - ('🏻', '🏿'), - ('\u{e0020}', '\u{e007f}'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const L: &'static [(char, char)] = &[('ᄀ', 'ᅟ'), ('ꥠ', 'ꥼ')]; - -pub const LF: &'static [(char, char)] = &[('\n', '\n')]; - -pub const LV: &'static [(char, char)] = &[ - ('가', '가'), - ('개', '개'), - ('갸', '갸'), - ('걔', '걔'), - ('거', '거'), - ('게', '게'), - ('겨', '겨'), - ('계', '계'), - ('고', '고'), - ('과', '과'), - ('괘', '괘'), - ('괴', '괴'), - ('교', '교'), - ('구', '구'), - ('궈', '궈'), - ('궤', '궤'), - ('귀', '귀'), - ('규', '규'), - ('그', '그'), - ('긔', '긔'), - ('기', '기'), - ('까', '까'), - ('깨', '깨'), - ('꺄', '꺄'), - ('꺠', '꺠'), - ('꺼', '꺼'), - ('께', '께'), - ('껴', '껴'), - ('꼐', '꼐'), - ('꼬', '꼬'), - ('꽈', '꽈'), - ('꽤', '꽤'), - ('꾀', '꾀'), - ('꾜', '꾜'), - ('꾸', '꾸'), - ('꿔', '꿔'), - ('꿰', '꿰'), - ('뀌', '뀌'), - ('뀨', '뀨'), - ('끄', '끄'), - ('끠', '끠'), - ('끼', '끼'), - ('나', '나'), - ('내', '내'), - ('냐', '냐'), - ('냬', '냬'), - ('너', '너'), - ('네', '네'), - ('녀', '녀'), - ('녜', '녜'), - ('노', '노'), - ('놔', '놔'), - ('놰', '놰'), - ('뇌', '뇌'), - ('뇨', '뇨'), - ('누', '누'), - ('눠', '눠'), - ('눼', '눼'), - ('뉘', '뉘'), - ('뉴', '뉴'), - ('느', '느'), - ('늬', '늬'), - ('니', '니'), - ('다', '다'), - ('대', '대'), - ('댜', '댜'), - ('댸', '댸'), - ('더', '더'), - ('데', '데'), - ('뎌', '뎌'), - ('뎨', '뎨'), - ('도', '도'), - ('돠', '돠'), - ('돼', '돼'), - ('되', '되'), - ('됴', '됴'), - ('두', '두'), - ('둬', '둬'), - ('뒈', '뒈'), - ('뒤', '뒤'), - ('듀', '듀'), - ('드', '드'), - ('듸', '듸'), - ('디', '디'), - ('따', '따'), - ('때', '때'), - ('땨', '땨'), - ('떄', '떄'), - ('떠', '떠'), - ('떼', '떼'), - ('뗘', '뗘'), - ('뗴', '뗴'), - ('또', '또'), - ('똬', '똬'), - ('뙈', '뙈'), - ('뙤', '뙤'), - ('뚀', '뚀'), - ('뚜', '뚜'), - ('뚸', '뚸'), - ('뛔', '뛔'), - ('뛰', '뛰'), - ('뜌', '뜌'), - ('뜨', '뜨'), - ('띄', '띄'), - ('띠', '띠'), - ('라', '라'), - ('래', '래'), - ('랴', '랴'), - ('럐', '럐'), - ('러', '러'), - ('레', '레'), - ('려', '려'), - ('례', '례'), - ('로', '로'), - ('롸', '롸'), - ('뢔', '뢔'), - ('뢰', '뢰'), - ('료', '료'), - ('루', '루'), - ('뤄', '뤄'), - ('뤠', '뤠'), - ('뤼', '뤼'), - ('류', '류'), - ('르', '르'), - ('릐', '릐'), - ('리', '리'), - ('마', '마'), - ('매', '매'), - ('먀', '먀'), - ('먜', '먜'), - ('머', '머'), - ('메', '메'), - ('며', '며'), - ('몌', '몌'), - ('모', '모'), - ('뫄', '뫄'), - ('뫠', '뫠'), - ('뫼', '뫼'), - ('묘', '묘'), - ('무', '무'), - ('뭐', '뭐'), - ('뭬', '뭬'), - ('뮈', '뮈'), - ('뮤', '뮤'), - ('므', '므'), - ('믜', '믜'), - ('미', '미'), - ('바', '바'), - ('배', '배'), - ('뱌', '뱌'), - ('뱨', '뱨'), - ('버', '버'), - ('베', '베'), - ('벼', '벼'), - ('볘', '볘'), - ('보', '보'), - ('봐', '봐'), - ('봬', '봬'), - ('뵈', '뵈'), - ('뵤', '뵤'), - ('부', '부'), - ('붜', '붜'), - ('붸', '붸'), - ('뷔', '뷔'), - ('뷰', '뷰'), - ('브', '브'), - ('븨', '븨'), - ('비', '비'), - ('빠', '빠'), - ('빼', '빼'), - ('뺘', '뺘'), - ('뺴', '뺴'), - ('뻐', '뻐'), - ('뻬', '뻬'), - ('뼈', '뼈'), - ('뼤', '뼤'), - ('뽀', '뽀'), - ('뽜', '뽜'), - ('뽸', '뽸'), - ('뾔', '뾔'), - ('뾰', '뾰'), - ('뿌', '뿌'), - ('뿨', '뿨'), - ('쀄', '쀄'), - ('쀠', '쀠'), - ('쀼', '쀼'), - ('쁘', '쁘'), - ('쁴', '쁴'), - ('삐', '삐'), - ('사', '사'), - ('새', '새'), - ('샤', '샤'), - ('섀', '섀'), - ('서', '서'), - ('세', '세'), - ('셔', '셔'), - ('셰', '셰'), - ('소', '소'), - ('솨', '솨'), - ('쇄', '쇄'), - ('쇠', '쇠'), - ('쇼', '쇼'), - ('수', '수'), - ('숴', '숴'), - ('쉐', '쉐'), - ('쉬', '쉬'), - ('슈', '슈'), - ('스', '스'), - ('싀', '싀'), - ('시', '시'), - ('싸', '싸'), - ('쌔', '쌔'), - ('쌰', '쌰'), - ('썌', '썌'), - ('써', '써'), - ('쎄', '쎄'), - ('쎠', '쎠'), - ('쎼', '쎼'), - ('쏘', '쏘'), - ('쏴', '쏴'), - ('쐐', '쐐'), - ('쐬', '쐬'), - ('쑈', '쑈'), - ('쑤', '쑤'), - ('쒀', '쒀'), - ('쒜', '쒜'), - ('쒸', '쒸'), - ('쓔', '쓔'), - ('쓰', '쓰'), - ('씌', '씌'), - ('씨', '씨'), - ('아', '아'), - ('애', '애'), - ('야', '야'), - ('얘', '얘'), - ('어', '어'), - ('에', '에'), - ('여', '여'), - ('예', '예'), - ('오', '오'), - ('와', '와'), - ('왜', '왜'), - ('외', '외'), - ('요', '요'), - ('우', '우'), - ('워', '워'), - ('웨', '웨'), - ('위', '위'), - ('유', '유'), - ('으', '으'), - ('의', '의'), - ('이', '이'), - ('자', '자'), - ('재', '재'), - ('쟈', '쟈'), - ('쟤', '쟤'), - ('저', '저'), - ('제', '제'), - ('져', '져'), - ('졔', '졔'), - ('조', '조'), - ('좌', '좌'), - ('좨', '좨'), - ('죄', '죄'), - ('죠', '죠'), - ('주', '주'), - ('줘', '줘'), - ('줴', '줴'), - ('쥐', '쥐'), - ('쥬', '쥬'), - ('즈', '즈'), - ('즤', '즤'), - ('지', '지'), - ('짜', '짜'), - ('째', '째'), - ('쨔', '쨔'), - ('쨰', '쨰'), - ('쩌', '쩌'), - ('쩨', '쩨'), - ('쪄', '쪄'), - ('쪠', '쪠'), - ('쪼', '쪼'), - ('쫘', '쫘'), - ('쫴', '쫴'), - ('쬐', '쬐'), - ('쬬', '쬬'), - ('쭈', '쭈'), - ('쭤', '쭤'), - ('쮀', '쮀'), - ('쮜', '쮜'), - ('쮸', '쮸'), - ('쯔', '쯔'), - ('쯰', '쯰'), - ('찌', '찌'), - ('차', '차'), - ('채', '채'), - ('챠', '챠'), - ('챼', '챼'), - ('처', '처'), - ('체', '체'), - ('쳐', '쳐'), - ('쳬', '쳬'), - ('초', '초'), - ('촤', '촤'), - ('쵀', '쵀'), - ('최', '최'), - ('쵸', '쵸'), - ('추', '추'), - ('춰', '춰'), - ('췌', '췌'), - ('취', '취'), - ('츄', '츄'), - ('츠', '츠'), - ('츼', '츼'), - ('치', '치'), - ('카', '카'), - ('캐', '캐'), - ('캬', '캬'), - ('컈', '컈'), - ('커', '커'), - ('케', '케'), - ('켜', '켜'), - ('켸', '켸'), - ('코', '코'), - ('콰', '콰'), - ('쾌', '쾌'), - ('쾨', '쾨'), - ('쿄', '쿄'), - ('쿠', '쿠'), - ('쿼', '쿼'), - ('퀘', '퀘'), - ('퀴', '퀴'), - ('큐', '큐'), - ('크', '크'), - ('킈', '킈'), - ('키', '키'), - ('타', '타'), - ('태', '태'), - ('탸', '탸'), - ('턔', '턔'), - ('터', '터'), - ('테', '테'), - ('텨', '텨'), - ('톄', '톄'), - ('토', '토'), - ('톼', '톼'), - ('퇘', '퇘'), - ('퇴', '퇴'), - ('툐', '툐'), - ('투', '투'), - ('퉈', '퉈'), - ('퉤', '퉤'), - ('튀', '튀'), - ('튜', '튜'), - ('트', '트'), - ('틔', '틔'), - ('티', '티'), - ('파', '파'), - ('패', '패'), - ('퍄', '퍄'), - ('퍠', '퍠'), - ('퍼', '퍼'), - ('페', '페'), - ('펴', '펴'), - ('폐', '폐'), - ('포', '포'), - ('퐈', '퐈'), - ('퐤', '퐤'), - ('푀', '푀'), - ('표', '표'), - ('푸', '푸'), - ('풔', '풔'), - ('풰', '풰'), - ('퓌', '퓌'), - ('퓨', '퓨'), - ('프', '프'), - ('픠', '픠'), - ('피', '피'), - ('하', '하'), - ('해', '해'), - ('햐', '햐'), - ('햬', '햬'), - ('허', '허'), - ('헤', '헤'), - ('혀', '혀'), - ('혜', '혜'), - ('호', '호'), - ('화', '화'), - ('홰', '홰'), - ('회', '회'), - ('효', '효'), - ('후', '후'), - ('훠', '훠'), - ('훼', '훼'), - ('휘', '휘'), - ('휴', '휴'), - ('흐', '흐'), - ('희', '희'), - ('히', '히'), -]; - -pub const LVT: &'static [(char, char)] = &[ - ('각', '갛'), - ('객', '갷'), - ('갹', '걓'), - ('걕', '걯'), - ('걱', '겋'), - ('겍', '겧'), - ('격', '곃'), - ('곅', '곟'), - ('곡', '곻'), - ('곽', '괗'), - ('괙', '괳'), - ('괵', '굏'), - ('굑', '굫'), - ('국', '궇'), - ('궉', '궣'), - ('궥', '궿'), - ('귁', '귛'), - ('귝', '귷'), - ('극', '긓'), - ('긕', '긯'), - ('긱', '깋'), - ('깍', '깧'), - ('깩', '꺃'), - ('꺅', '꺟'), - ('꺡', '꺻'), - ('꺽', '껗'), - ('껙', '껳'), - ('껵', '꼏'), - ('꼑', '꼫'), - ('꼭', '꽇'), - ('꽉', '꽣'), - ('꽥', '꽿'), - ('꾁', '꾛'), - ('꾝', '꾷'), - ('꾹', '꿓'), - ('꿕', '꿯'), - ('꿱', '뀋'), - ('뀍', '뀧'), - ('뀩', '끃'), - ('끅', '끟'), - ('끡', '끻'), - ('끽', '낗'), - ('낙', '낳'), - ('낵', '냏'), - ('냑', '냫'), - ('냭', '넇'), - ('넉', '넣'), - ('넥', '넿'), - ('녁', '녛'), - ('녝', '녷'), - ('녹', '놓'), - ('놕', '놯'), - ('놱', '뇋'), - ('뇍', '뇧'), - ('뇩', '눃'), - ('눅', '눟'), - ('눡', '눻'), - ('눽', '뉗'), - ('뉙', '뉳'), - ('뉵', '늏'), - ('늑', '늫'), - ('늭', '닇'), - ('닉', '닣'), - ('닥', '닿'), - ('댁', '댛'), - ('댝', '댷'), - ('댹', '덓'), - ('덕', '덯'), - ('덱', '뎋'), - ('뎍', '뎧'), - ('뎩', '돃'), - ('독', '돟'), - ('돡', '돻'), - ('돽', '됗'), - ('됙', '됳'), - ('됵', '둏'), - ('둑', '둫'), - ('둭', '뒇'), - ('뒉', '뒣'), - ('뒥', '뒿'), - ('듁', '듛'), - ('득', '듷'), - ('듹', '딓'), - ('딕', '딯'), - ('딱', '땋'), - ('땍', '땧'), - ('땩', '떃'), - ('떅', '떟'), - ('떡', '떻'), - ('떽', '뗗'), - ('뗙', '뗳'), - ('뗵', '똏'), - ('똑', '똫'), - ('똭', '뙇'), - ('뙉', '뙣'), - ('뙥', '뙿'), - ('뚁', '뚛'), - ('뚝', '뚷'), - ('뚹', '뛓'), - ('뛕', '뛯'), - ('뛱', '뜋'), - ('뜍', '뜧'), - ('뜩', '띃'), - ('띅', '띟'), - ('띡', '띻'), - ('락', '랗'), - ('랙', '랳'), - ('략', '럏'), - ('럑', '럫'), - ('럭', '렇'), - ('렉', '렣'), - ('력', '렿'), - ('롁', '롛'), - ('록', '롷'), - ('롹', '뢓'), - ('뢕', '뢯'), - ('뢱', '룋'), - ('룍', '룧'), - ('룩', '뤃'), - ('뤅', '뤟'), - ('뤡', '뤻'), - ('뤽', '륗'), - ('륙', '륳'), - ('륵', '릏'), - ('릑', '릫'), - ('릭', '맇'), - ('막', '맣'), - ('맥', '맿'), - ('먁', '먛'), - ('먝', '먷'), - ('먹', '멓'), - ('멕', '멯'), - ('멱', '몋'), - ('몍', '몧'), - ('목', '뫃'), - ('뫅', '뫟'), - ('뫡', '뫻'), - ('뫽', '묗'), - ('묙', '묳'), - ('묵', '뭏'), - ('뭑', '뭫'), - ('뭭', '뮇'), - ('뮉', '뮣'), - ('뮥', '뮿'), - ('믁', '믛'), - ('믝', '믷'), - ('믹', '밓'), - ('박', '밯'), - ('백', '뱋'), - ('뱍', '뱧'), - ('뱩', '벃'), - ('벅', '벟'), - ('벡', '벻'), - ('벽', '볗'), - ('볙', '볳'), - ('복', '봏'), - ('봑', '봫'), - ('봭', '뵇'), - ('뵉', '뵣'), - ('뵥', '뵿'), - ('북', '붛'), - ('붝', '붷'), - ('붹', '뷓'), - ('뷕', '뷯'), - ('뷱', '븋'), - ('븍', '븧'), - ('븩', '빃'), - ('빅', '빟'), - ('빡', '빻'), - ('빽', '뺗'), - ('뺙', '뺳'), - ('뺵', '뻏'), - ('뻑', '뻫'), - ('뻭', '뼇'), - ('뼉', '뼣'), - ('뼥', '뼿'), - ('뽁', '뽛'), - ('뽝', '뽷'), - ('뽹', '뾓'), - ('뾕', '뾯'), - ('뾱', '뿋'), - ('뿍', '뿧'), - ('뿩', '쀃'), - ('쀅', '쀟'), - ('쀡', '쀻'), - ('쀽', '쁗'), - ('쁙', '쁳'), - ('쁵', '삏'), - ('삑', '삫'), - ('삭', '샇'), - ('색', '샣'), - ('샥', '샿'), - ('섁', '섛'), - ('석', '섷'), - ('섹', '셓'), - ('셕', '셯'), - ('셱', '솋'), - ('속', '솧'), - ('솩', '쇃'), - ('쇅', '쇟'), - ('쇡', '쇻'), - ('쇽', '숗'), - ('숙', '숳'), - ('숵', '쉏'), - ('쉑', '쉫'), - ('쉭', '슇'), - ('슉', '슣'), - ('슥', '슿'), - ('싁', '싛'), - ('식', '싷'), - ('싹', '쌓'), - ('쌕', '쌯'), - ('쌱', '썋'), - ('썍', '썧'), - ('썩', '쎃'), - ('쎅', '쎟'), - ('쎡', '쎻'), - ('쎽', '쏗'), - ('쏙', '쏳'), - ('쏵', '쐏'), - ('쐑', '쐫'), - ('쐭', '쑇'), - ('쑉', '쑣'), - ('쑥', '쑿'), - ('쒁', '쒛'), - ('쒝', '쒷'), - ('쒹', '쓓'), - ('쓕', '쓯'), - ('쓱', '씋'), - ('씍', '씧'), - ('씩', '앃'), - ('악', '앟'), - ('액', '앻'), - ('약', '얗'), - ('얙', '얳'), - ('억', '엏'), - ('엑', '엫'), - ('역', '옇'), - ('옉', '옣'), - ('옥', '옿'), - ('왁', '왛'), - ('왝', '왷'), - ('왹', '욓'), - ('욕', '욯'), - ('욱', '웋'), - ('웍', '웧'), - ('웩', '윃'), - ('윅', '윟'), - ('육', '윻'), - ('윽', '읗'), - ('읙', '읳'), - ('익', '잏'), - ('작', '잫'), - ('잭', '쟇'), - ('쟉', '쟣'), - ('쟥', '쟿'), - ('적', '젛'), - ('젝', '젷'), - ('젹', '졓'), - ('졕', '졯'), - ('족', '좋'), - ('좍', '좧'), - ('좩', '죃'), - ('죅', '죟'), - ('죡', '죻'), - ('죽', '줗'), - ('줙', '줳'), - ('줵', '쥏'), - ('쥑', '쥫'), - ('쥭', '즇'), - ('즉', '즣'), - ('즥', '즿'), - ('직', '짛'), - ('짝', '짷'), - ('짹', '쨓'), - ('쨕', '쨯'), - ('쨱', '쩋'), - ('쩍', '쩧'), - ('쩩', '쪃'), - ('쪅', '쪟'), - ('쪡', '쪻'), - ('쪽', '쫗'), - ('쫙', '쫳'), - ('쫵', '쬏'), - ('쬑', '쬫'), - ('쬭', '쭇'), - ('쭉', '쭣'), - ('쭥', '쭿'), - ('쮁', '쮛'), - ('쮝', '쮷'), - ('쮹', '쯓'), - ('쯕', '쯯'), - ('쯱', '찋'), - ('찍', '찧'), - ('착', '챃'), - ('책', '챟'), - ('챡', '챻'), - ('챽', '첗'), - ('척', '첳'), - ('첵', '쳏'), - ('쳑', '쳫'), - ('쳭', '촇'), - ('촉', '촣'), - ('촥', '촿'), - ('쵁', '쵛'), - ('쵝', '쵷'), - ('쵹', '춓'), - ('축', '춯'), - ('춱', '췋'), - ('췍', '췧'), - ('췩', '츃'), - ('츅', '츟'), - ('측', '츻'), - ('츽', '칗'), - ('칙', '칳'), - ('칵', '캏'), - ('캑', '캫'), - ('캭', '컇'), - ('컉', '컣'), - ('컥', '컿'), - ('켁', '켛'), - ('켝', '켷'), - ('켹', '콓'), - ('콕', '콯'), - ('콱', '쾋'), - ('쾍', '쾧'), - ('쾩', '쿃'), - ('쿅', '쿟'), - ('쿡', '쿻'), - ('쿽', '퀗'), - ('퀙', '퀳'), - ('퀵', '큏'), - ('큑', '큫'), - ('큭', '킇'), - ('킉', '킣'), - ('킥', '킿'), - ('탁', '탛'), - ('택', '탷'), - ('탹', '턓'), - ('턕', '턯'), - ('턱', '텋'), - ('텍', '텧'), - ('텩', '톃'), - ('톅', '톟'), - ('톡', '톻'), - ('톽', '퇗'), - ('퇙', '퇳'), - ('퇵', '툏'), - ('툑', '툫'), - ('툭', '퉇'), - ('퉉', '퉣'), - ('퉥', '퉿'), - ('튁', '튛'), - ('튝', '튷'), - ('특', '틓'), - ('틕', '틯'), - ('틱', '팋'), - ('팍', '팧'), - ('팩', '퍃'), - ('퍅', '퍟'), - ('퍡', '퍻'), - ('퍽', '펗'), - ('펙', '펳'), - ('펵', '폏'), - ('폑', '폫'), - ('폭', '퐇'), - ('퐉', '퐣'), - ('퐥', '퐿'), - ('푁', '푛'), - ('푝', '푷'), - ('푹', '풓'), - ('풕', '풯'), - ('풱', '퓋'), - ('퓍', '퓧'), - ('퓩', '픃'), - ('픅', '픟'), - ('픡', '픻'), - ('픽', '핗'), - ('학', '핳'), - ('핵', '햏'), - ('햑', '햫'), - ('햭', '헇'), - ('헉', '헣'), - ('헥', '헿'), - ('혁', '혛'), - ('혝', '혷'), - ('혹', '홓'), - ('확', '홯'), - ('홱', '횋'), - ('획', '횧'), - ('횩', '훃'), - ('훅', '훟'), - ('훡', '훻'), - ('훽', '휗'), - ('휙', '휳'), - ('휵', '흏'), - ('흑', '흫'), - ('흭', '힇'), - ('힉', '힣'), -]; - -pub const PREPEND: &'static [(char, char)] = &[ - ('\u{600}', '\u{605}'), - ('\u{6dd}', '\u{6dd}'), - ('\u{70f}', '\u{70f}'), - ('\u{890}', '\u{891}'), - ('\u{8e2}', '\u{8e2}'), - ('ൎ', 'ൎ'), - ('\u{110bd}', '\u{110bd}'), - ('\u{110cd}', '\u{110cd}'), - ('𑇂', '𑇃'), - ('𑏑', '𑏑'), - ('𑤿', '𑤿'), - ('𑥁', '𑥁'), - ('𑨺', '𑨺'), - ('𑪄', '𑪉'), - ('𑵆', '𑵆'), - ('𑼂', '𑼂'), -]; - -pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('🇦', '🇿')]; - -pub const SPACINGMARK: &'static [(char, char)] = &[ - ('ः', 'ः'), - ('ऻ', 'ऻ'), - ('ा', 'ी'), - ('ॉ', 'ौ'), - ('ॎ', 'ॏ'), - ('ং', 'ঃ'), - ('ি', 'ী'), - ('ে', 'ৈ'), - ('ো', 'ৌ'), - ('ਃ', 'ਃ'), - ('ਾ', 'ੀ'), - ('ઃ', 'ઃ'), - ('ા', 'ી'), - ('ૉ', 'ૉ'), - ('ો', 'ૌ'), - ('ଂ', 'ଃ'), - ('ୀ', 'ୀ'), - ('େ', 'ୈ'), - ('ୋ', 'ୌ'), - ('ி', 'ி'), - ('ு', 'ூ'), - ('ெ', 'ை'), - ('ொ', 'ௌ'), - ('ఁ', 'ః'), - ('ు', 'ౄ'), - ('ಂ', 'ಃ'), - ('ಾ', 'ಾ'), - ('ು', 'ು'), - ('ೃ', 'ೄ'), - ('ೳ', 'ೳ'), - ('ം', 'ഃ'), - ('ി', 'ീ'), - ('െ', 'ൈ'), - ('ൊ', 'ൌ'), - ('ං', 'ඃ'), - ('ැ', 'ෑ'), - ('ෘ', 'ෞ'), - ('ෲ', 'ෳ'), - ('ำ', 'ำ'), - ('ຳ', 'ຳ'), - ('༾', '༿'), - ('ཿ', 'ཿ'), - ('ေ', 'ေ'), - ('ျ', 'ြ'), - ('ၖ', 'ၗ'), - ('ႄ', 'ႄ'), - ('ា', 'ា'), - ('ើ', 'ៅ'), - ('ះ', 'ៈ'), - ('ᤣ', 'ᤦ'), - ('ᤩ', 'ᤫ'), - ('ᤰ', 'ᤱ'), - ('ᤳ', 'ᤸ'), - ('ᨙ', 'ᨚ'), - ('ᩕ', 'ᩕ'), - ('ᩗ', 'ᩗ'), - ('ᩭ', 'ᩲ'), - ('ᬄ', 'ᬄ'), - ('ᬾ', 'ᭁ'), - ('ᮂ', 'ᮂ'), - ('ᮡ', 'ᮡ'), - ('ᮦ', 'ᮧ'), - ('ᯧ', 'ᯧ'), - ('ᯪ', 'ᯬ'), - ('ᯮ', 'ᯮ'), - ('ᰤ', 'ᰫ'), - ('ᰴ', 'ᰵ'), - ('᳡', '᳡'), - ('᳷', '᳷'), - ('ꠣ', 'ꠤ'), - ('ꠧ', 'ꠧ'), - ('ꢀ', 'ꢁ'), - ('ꢴ', 'ꣃ'), - ('ꥒ', 'ꥒ'), - ('ꦃ', 'ꦃ'), - ('ꦴ', 'ꦵ'), - ('ꦺ', 'ꦻ'), - ('ꦾ', 'ꦿ'), - ('ꨯ', 'ꨰ'), - ('ꨳ', 'ꨴ'), - ('ꩍ', 'ꩍ'), - ('ꫫ', 'ꫫ'), - ('ꫮ', 'ꫯ'), - ('ꫵ', 'ꫵ'), - ('ꯣ', 'ꯤ'), - ('ꯦ', 'ꯧ'), - ('ꯩ', 'ꯪ'), - ('꯬', '꯬'), - ('𑀀', '𑀀'), - ('𑀂', '𑀂'), - ('𑂂', '𑂂'), - ('𑂰', '𑂲'), - ('𑂷', '𑂸'), - ('𑄬', '𑄬'), - ('𑅅', '𑅆'), - ('𑆂', '𑆂'), - ('𑆳', '𑆵'), - ('𑆿', '𑆿'), - ('𑇎', '𑇎'), - ('𑈬', '𑈮'), - ('𑈲', '𑈳'), - ('𑋠', '𑋢'), - ('𑌂', '𑌃'), - ('𑌿', '𑌿'), - ('𑍁', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '𑍌'), - ('𑍢', '𑍣'), - ('𑎹', '𑎺'), - ('𑏊', '𑏊'), - ('𑏌', '𑏍'), - ('𑐵', '𑐷'), - ('𑑀', '𑑁'), - ('𑑅', '𑑅'), - ('𑒱', '𑒲'), - ('𑒹', '𑒹'), - ('𑒻', '𑒼'), - ('𑒾', '𑒾'), - ('𑓁', '𑓁'), - ('𑖰', '𑖱'), - ('𑖸', '𑖻'), - ('𑖾', '𑖾'), - ('𑘰', '𑘲'), - ('𑘻', '𑘼'), - ('𑘾', '𑘾'), - ('𑚬', '𑚬'), - ('𑚮', '𑚯'), - ('𑜞', '𑜞'), - ('𑜦', '𑜦'), - ('𑠬', '𑠮'), - ('𑠸', '𑠸'), - ('𑤱', '𑤵'), - ('𑤷', '𑤸'), - ('𑥀', '𑥀'), - ('𑥂', '𑥂'), - ('𑧑', '𑧓'), - ('𑧜', '𑧟'), - ('𑧤', '𑧤'), - ('𑨹', '𑨹'), - ('𑩗', '𑩘'), - ('𑪗', '𑪗'), - ('𑰯', '𑰯'), - ('𑰾', '𑰾'), - ('𑲩', '𑲩'), - ('𑲱', '𑲱'), - ('𑲴', '𑲴'), - ('𑶊', '𑶎'), - ('𑶓', '𑶔'), - ('𑶖', '𑶖'), - ('𑻵', '𑻶'), - ('𑼃', '𑼃'), - ('𑼴', '𑼵'), - ('𑼾', '𑼿'), - ('𖄪', '𖄬'), - ('𖽑', '𖾇'), -]; - -pub const T: &'static [(char, char)] = &[('ᆨ', 'ᇿ'), ('ퟋ', 'ퟻ')]; - -pub const V: &'static [(char, char)] = - &[('ᅠ', 'ᆧ'), ('ힰ', 'ퟆ'), ('𖵣', '𖵣'), ('𖵧', '𖵪')]; - -pub const ZWJ: &'static [(char, char)] = &[('\u{200d}', '\u{200d}')]; diff --git a/vendor/regex-syntax/src/unicode_tables/mod.rs b/vendor/regex-syntax/src/unicode_tables/mod.rs deleted file mode 100644 index 20736c7a..00000000 --- a/vendor/regex-syntax/src/unicode_tables/mod.rs +++ /dev/null @@ -1,57 +0,0 @@ -#[cfg(feature = "unicode-age")] -pub mod age; - -#[cfg(feature = "unicode-case")] -pub mod case_folding_simple; - -#[cfg(feature = "unicode-gencat")] -pub mod general_category; - -#[cfg(feature = "unicode-segment")] -pub mod grapheme_cluster_break; - -#[cfg(all(feature = "unicode-perl", not(feature = "unicode-gencat")))] -#[allow(dead_code)] -pub mod perl_decimal; - -#[cfg(all(feature = "unicode-perl", not(feature = "unicode-bool")))] -#[allow(dead_code)] -pub mod perl_space; - -#[cfg(feature = "unicode-perl")] -pub mod perl_word; - -#[cfg(feature = "unicode-bool")] -pub mod property_bool; - -#[cfg(any( - feature = "unicode-age", - feature = "unicode-bool", - feature = "unicode-gencat", - feature = "unicode-perl", - feature = "unicode-script", - feature = "unicode-segment", -))] -pub mod property_names; - -#[cfg(any( - feature = "unicode-age", - feature = "unicode-bool", - feature = "unicode-gencat", - feature = "unicode-perl", - feature = "unicode-script", - feature = "unicode-segment", -))] -pub mod property_values; - -#[cfg(feature = "unicode-script")] -pub mod script; - -#[cfg(feature = "unicode-script")] -pub mod script_extension; - -#[cfg(feature = "unicode-segment")] -pub mod sentence_break; - -#[cfg(feature = "unicode-segment")] -pub mod word_break; diff --git a/vendor/regex-syntax/src/unicode_tables/perl_decimal.rs b/vendor/regex-syntax/src/unicode_tables/perl_decimal.rs deleted file mode 100644 index 18996c2b..00000000 --- a/vendor/regex-syntax/src/unicode_tables/perl_decimal.rs +++ /dev/null @@ -1,84 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate general-category ucd-16.0.0 --chars --include decimalnumber -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = - &[("Decimal_Number", DECIMAL_NUMBER)]; - -pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ - ('0', '9'), - ('٠', '٩'), - ('۰', '۹'), - ('߀', '߉'), - ('०', '९'), - ('০', '৯'), - ('੦', '੯'), - ('૦', '૯'), - ('୦', '୯'), - ('௦', '௯'), - ('౦', '౯'), - ('೦', '೯'), - ('൦', '൯'), - ('෦', '෯'), - ('๐', '๙'), - ('໐', '໙'), - ('༠', '༩'), - ('၀', '၉'), - ('႐', '႙'), - ('០', '៩'), - ('᠐', '᠙'), - ('᥆', '᥏'), - ('᧐', '᧙'), - ('᪀', '᪉'), - ('᪐', '᪙'), - ('᭐', '᭙'), - ('᮰', '᮹'), - ('᱀', '᱉'), - ('᱐', '᱙'), - ('꘠', '꘩'), - ('꣐', '꣙'), - ('꤀', '꤉'), - ('꧐', '꧙'), - ('꧰', '꧹'), - ('꩐', '꩙'), - ('꯰', '꯹'), - ('0', '9'), - ('𐒠', '𐒩'), - ('𐴰', '𐴹'), - ('𐵀', '𐵉'), - ('𑁦', '𑁯'), - ('𑃰', '𑃹'), - ('𑄶', '𑄿'), - ('𑇐', '𑇙'), - ('𑋰', '𑋹'), - ('𑑐', '𑑙'), - ('𑓐', '𑓙'), - ('𑙐', '𑙙'), - ('𑛀', '𑛉'), - ('𑛐', '𑛣'), - ('𑜰', '𑜹'), - ('𑣠', '𑣩'), - ('𑥐', '𑥙'), - ('𑯰', '𑯹'), - ('𑱐', '𑱙'), - ('𑵐', '𑵙'), - ('𑶠', '𑶩'), - ('𑽐', '𑽙'), - ('𖄰', '𖄹'), - ('𖩠', '𖩩'), - ('𖫀', '𖫉'), - ('𖭐', '𖭙'), - ('𖵰', '𖵹'), - ('𜳰', '𜳹'), - ('𝟎', '𝟿'), - ('𞅀', '𞅉'), - ('𞋰', '𞋹'), - ('𞓰', '𞓹'), - ('𞗱', '𞗺'), - ('𞥐', '𞥙'), - ('🯰', '🯹'), -]; diff --git a/vendor/regex-syntax/src/unicode_tables/perl_space.rs b/vendor/regex-syntax/src/unicode_tables/perl_space.rs deleted file mode 100644 index c969e373..00000000 --- a/vendor/regex-syntax/src/unicode_tables/perl_space.rs +++ /dev/null @@ -1,23 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate property-bool ucd-16.0.0 --chars --include whitespace -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = - &[("White_Space", WHITE_SPACE)]; - -pub const WHITE_SPACE: &'static [(char, char)] = &[ - ('\t', '\r'), - (' ', ' '), - ('\u{85}', '\u{85}'), - ('\u{a0}', '\u{a0}'), - ('\u{1680}', '\u{1680}'), - ('\u{2000}', '\u{200a}'), - ('\u{2028}', '\u{2029}'), - ('\u{202f}', '\u{202f}'), - ('\u{205f}', '\u{205f}'), - ('\u{3000}', '\u{3000}'), -]; diff --git a/vendor/regex-syntax/src/unicode_tables/perl_word.rs b/vendor/regex-syntax/src/unicode_tables/perl_word.rs deleted file mode 100644 index 21c8c0f9..00000000 --- a/vendor/regex-syntax/src/unicode_tables/perl_word.rs +++ /dev/null @@ -1,806 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate perl-word ucd-16.0.0 --chars -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const PERL_WORD: &'static [(char, char)] = &[ - ('0', '9'), - ('A', 'Z'), - ('_', '_'), - ('a', 'z'), - ('ª', 'ª'), - ('µ', 'µ'), - ('º', 'º'), - ('À', 'Ö'), - ('Ø', 'ö'), - ('ø', 'ˁ'), - ('ˆ', 'ˑ'), - ('ˠ', 'ˤ'), - ('ˬ', 'ˬ'), - ('ˮ', 'ˮ'), - ('\u{300}', 'ʹ'), - ('Ͷ', 'ͷ'), - ('ͺ', 'ͽ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ϵ'), - ('Ϸ', 'ҁ'), - ('\u{483}', 'ԯ'), - ('Ա', 'Ֆ'), - ('ՙ', 'ՙ'), - ('ՠ', 'ֈ'), - ('\u{591}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c5}'), - ('\u{5c7}', '\u{5c7}'), - ('א', 'ת'), - ('ׯ', 'ײ'), - ('\u{610}', '\u{61a}'), - ('ؠ', '٩'), - ('ٮ', 'ۓ'), - ('ە', '\u{6dc}'), - ('\u{6df}', '\u{6e8}'), - ('\u{6ea}', 'ۼ'), - ('ۿ', 'ۿ'), - ('ܐ', '\u{74a}'), - ('ݍ', 'ޱ'), - ('߀', 'ߵ'), - ('ߺ', 'ߺ'), - ('\u{7fd}', '\u{7fd}'), - ('ࠀ', '\u{82d}'), - ('ࡀ', '\u{85b}'), - ('ࡠ', 'ࡪ'), - ('ࡰ', 'ࢇ'), - ('ࢉ', 'ࢎ'), - ('\u{897}', '\u{8e1}'), - ('\u{8e3}', '\u{963}'), - ('०', '९'), - ('ॱ', 'ঃ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('\u{9bc}', '\u{9c4}'), - ('ে', 'ৈ'), - ('ো', 'ৎ'), - ('\u{9d7}', '\u{9d7}'), - ('ড়', 'ঢ়'), - ('য়', '\u{9e3}'), - ('০', 'ৱ'), - ('ৼ', 'ৼ'), - ('\u{9fe}', '\u{9fe}'), - ('\u{a01}', 'ਃ'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('\u{a3c}', '\u{a3c}'), - ('ਾ', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('\u{a51}', '\u{a51}'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('੦', '\u{a75}'), - ('\u{a81}', 'ઃ'), - ('અ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('\u{abc}', '\u{ac5}'), - ('\u{ac7}', 'ૉ'), - ('ો', '\u{acd}'), - ('ૐ', 'ૐ'), - ('ૠ', '\u{ae3}'), - ('૦', '૯'), - ('ૹ', '\u{aff}'), - ('\u{b01}', 'ଃ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('\u{b3c}', '\u{b44}'), - ('େ', 'ୈ'), - ('ୋ', '\u{b4d}'), - ('\u{b55}', '\u{b57}'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', '\u{b63}'), - ('୦', '୯'), - ('ୱ', 'ୱ'), - ('\u{b82}', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'ஹ'), - ('\u{bbe}', 'ூ'), - ('ெ', 'ை'), - ('ொ', '\u{bcd}'), - ('ௐ', 'ௐ'), - ('\u{bd7}', '\u{bd7}'), - ('௦', '௯'), - ('\u{c00}', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'హ'), - ('\u{c3c}', 'ౄ'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('ౘ', 'ౚ'), - ('ౝ', 'ౝ'), - ('ౠ', '\u{c63}'), - ('౦', '౯'), - ('ಀ', 'ಃ'), - ('ಅ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('\u{cbc}', 'ೄ'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccd}'), - ('\u{cd5}', '\u{cd6}'), - ('ೝ', 'ೞ'), - ('ೠ', '\u{ce3}'), - ('೦', '೯'), - ('ೱ', 'ೳ'), - ('\u{d00}', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', '\u{d44}'), - ('െ', 'ൈ'), - ('ൊ', 'ൎ'), - ('ൔ', '\u{d57}'), - ('ൟ', '\u{d63}'), - ('൦', '൯'), - ('ൺ', 'ൿ'), - ('\u{d81}', 'ඃ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('\u{dca}', '\u{dca}'), - ('\u{dcf}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('ෘ', '\u{ddf}'), - ('෦', '෯'), - ('ෲ', 'ෳ'), - ('ก', '\u{e3a}'), - ('เ', '\u{e4e}'), - ('๐', '๙'), - ('ກ', 'ຂ'), - ('ຄ', 'ຄ'), - ('ຆ', 'ຊ'), - ('ຌ', 'ຣ'), - ('ລ', 'ລ'), - ('ວ', 'ຽ'), - ('ເ', 'ໄ'), - ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ece}'), - ('໐', '໙'), - ('ໜ', 'ໟ'), - ('ༀ', 'ༀ'), - ('\u{f18}', '\u{f19}'), - ('༠', '༩'), - ('\u{f35}', '\u{f35}'), - ('\u{f37}', '\u{f37}'), - ('\u{f39}', '\u{f39}'), - ('༾', 'ཇ'), - ('ཉ', 'ཬ'), - ('\u{f71}', '\u{f84}'), - ('\u{f86}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('\u{fc6}', '\u{fc6}'), - ('က', '၉'), - ('ၐ', '\u{109d}'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ჺ'), - ('ჼ', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዖ'), - ('ዘ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ፚ'), - ('\u{135d}', '\u{135f}'), - ('ᎀ', 'ᎏ'), - ('Ꭰ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('ᐁ', 'ᙬ'), - ('ᙯ', 'ᙿ'), - ('ᚁ', 'ᚚ'), - ('ᚠ', 'ᛪ'), - ('ᛮ', 'ᛸ'), - ('ᜀ', '\u{1715}'), - ('ᜟ', '\u{1734}'), - ('ᝀ', '\u{1753}'), - ('ᝠ', 'ᝬ'), - ('ᝮ', 'ᝰ'), - ('\u{1772}', '\u{1773}'), - ('ក', '\u{17d3}'), - ('ៗ', 'ៗ'), - ('ៜ', '\u{17dd}'), - ('០', '៩'), - ('\u{180b}', '\u{180d}'), - ('\u{180f}', '᠙'), - ('ᠠ', 'ᡸ'), - ('ᢀ', 'ᢪ'), - ('ᢰ', 'ᣵ'), - ('ᤀ', 'ᤞ'), - ('\u{1920}', 'ᤫ'), - ('ᤰ', '\u{193b}'), - ('᥆', 'ᥭ'), - ('ᥰ', 'ᥴ'), - ('ᦀ', 'ᦫ'), - ('ᦰ', 'ᧉ'), - ('᧐', '᧙'), - ('ᨀ', '\u{1a1b}'), - ('ᨠ', '\u{1a5e}'), - ('\u{1a60}', '\u{1a7c}'), - ('\u{1a7f}', '᪉'), - ('᪐', '᪙'), - ('ᪧ', 'ᪧ'), - ('\u{1ab0}', '\u{1ace}'), - ('\u{1b00}', 'ᭌ'), - ('᭐', '᭙'), - ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '\u{1bf3}'), - ('ᰀ', '\u{1c37}'), - ('᱀', '᱉'), - ('ᱍ', 'ᱽ'), - ('ᲀ', 'ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('\u{1cd0}', '\u{1cd2}'), - ('\u{1cd4}', 'ᳺ'), - ('ᴀ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ᾼ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῌ'), - ('ῐ', 'ΐ'), - ('ῖ', 'Ί'), - ('ῠ', 'Ῥ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῼ'), - ('\u{200c}', '\u{200d}'), - ('‿', '⁀'), - ('⁔', '⁔'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('\u{20d0}', '\u{20f0}'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℊ', 'ℓ'), - ('ℕ', 'ℕ'), - ('ℙ', 'ℝ'), - ('ℤ', 'ℤ'), - ('Ω', 'Ω'), - ('ℨ', 'ℨ'), - ('K', 'ℭ'), - ('ℯ', 'ℹ'), - ('ℼ', 'ℿ'), - ('ⅅ', 'ⅉ'), - ('ⅎ', 'ⅎ'), - ('Ⅰ', 'ↈ'), - ('Ⓐ', 'ⓩ'), - ('Ⰰ', 'ⳤ'), - ('Ⳬ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ⴰ', 'ⵧ'), - ('ⵯ', 'ⵯ'), - ('\u{2d7f}', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('\u{2de0}', '\u{2dff}'), - ('ⸯ', 'ⸯ'), - ('々', '〇'), - ('〡', '\u{302f}'), - ('〱', '〵'), - ('〸', '〼'), - ('ぁ', 'ゖ'), - ('\u{3099}', '\u{309a}'), - ('ゝ', 'ゟ'), - ('ァ', 'ヺ'), - ('ー', 'ヿ'), - ('ㄅ', 'ㄯ'), - ('ㄱ', 'ㆎ'), - ('ㆠ', 'ㆿ'), - ('ㇰ', 'ㇿ'), - ('㐀', '䶿'), - ('一', 'ꒌ'), - ('ꓐ', 'ꓽ'), - ('ꔀ', 'ꘌ'), - ('ꘐ', 'ꘫ'), - ('Ꙁ', '\u{a672}'), - ('\u{a674}', '\u{a67d}'), - ('ꙿ', '\u{a6f1}'), - ('ꜗ', 'ꜟ'), - ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'Ƛ'), - ('ꟲ', 'ꠧ'), - ('\u{a82c}', '\u{a82c}'), - ('ꡀ', 'ꡳ'), - ('ꢀ', '\u{a8c5}'), - ('꣐', '꣙'), - ('\u{a8e0}', 'ꣷ'), - ('ꣻ', 'ꣻ'), - ('ꣽ', '\u{a92d}'), - ('ꤰ', '\u{a953}'), - ('ꥠ', 'ꥼ'), - ('\u{a980}', '\u{a9c0}'), - ('ꧏ', '꧙'), - ('ꧠ', 'ꧾ'), - ('ꨀ', '\u{aa36}'), - ('ꩀ', 'ꩍ'), - ('꩐', '꩙'), - ('ꩠ', 'ꩶ'), - ('ꩺ', 'ꫂ'), - ('ꫛ', 'ꫝ'), - ('ꫠ', 'ꫯ'), - ('ꫲ', '\u{aaf6}'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('ꬰ', 'ꭚ'), - ('ꭜ', 'ꭩ'), - ('ꭰ', 'ꯪ'), - ('꯬', '\u{abed}'), - ('꯰', '꯹'), - ('가', '힣'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('豈', '舘'), - ('並', '龎'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('יִ', 'ﬨ'), - ('שׁ', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', 'ﮱ'), - ('ﯓ', 'ﴽ'), - ('ﵐ', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('ﷰ', 'ﷻ'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{fe20}', '\u{fe2f}'), - ('︳', '︴'), - ('﹍', '﹏'), - ('ﹰ', 'ﹴ'), - ('ﹶ', 'ﻼ'), - ('0', '9'), - ('A', 'Z'), - ('_', '_'), - ('a', 'z'), - ('ヲ', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), - ('𐅀', '𐅴'), - ('\u{101fd}', '\u{101fd}'), - ('𐊀', '𐊜'), - ('𐊠', '𐋐'), - ('\u{102e0}', '\u{102e0}'), - ('𐌀', '𐌟'), - ('𐌭', '𐍊'), - ('𐍐', '\u{1037a}'), - ('𐎀', '𐎝'), - ('𐎠', '𐏃'), - ('𐏈', '𐏏'), - ('𐏑', '𐏕'), - ('𐐀', '𐒝'), - ('𐒠', '𐒩'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), - ('𐔀', '𐔧'), - ('𐔰', '𐕣'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐗀', '𐗳'), - ('𐘀', '𐜶'), - ('𐝀', '𐝕'), - ('𐝠', '𐝧'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐠀', '𐠅'), - ('𐠈', '𐠈'), - ('𐠊', '𐠵'), - ('𐠷', '𐠸'), - ('𐠼', '𐠼'), - ('𐠿', '𐡕'), - ('𐡠', '𐡶'), - ('𐢀', '𐢞'), - ('𐣠', '𐣲'), - ('𐣴', '𐣵'), - ('𐤀', '𐤕'), - ('𐤠', '𐤹'), - ('𐦀', '𐦷'), - ('𐦾', '𐦿'), - ('𐨀', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨵'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '\u{10a3f}'), - ('𐩠', '𐩼'), - ('𐪀', '𐪜'), - ('𐫀', '𐫇'), - ('𐫉', '\u{10ae6}'), - ('𐬀', '𐬵'), - ('𐭀', '𐭕'), - ('𐭠', '𐭲'), - ('𐮀', '𐮑'), - ('𐰀', '𐱈'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐴀', '\u{10d27}'), - ('𐴰', '𐴹'), - ('𐵀', '𐵥'), - ('\u{10d69}', '\u{10d6d}'), - ('𐵯', '𐶅'), - ('𐺀', '𐺩'), - ('\u{10eab}', '\u{10eac}'), - ('𐺰', '𐺱'), - ('𐻂', '𐻄'), - ('\u{10efc}', '𐼜'), - ('𐼧', '𐼧'), - ('𐼰', '\u{10f50}'), - ('𐽰', '\u{10f85}'), - ('𐾰', '𐿄'), - ('𐿠', '𐿶'), - ('𑀀', '\u{11046}'), - ('𑁦', '𑁵'), - ('\u{1107f}', '\u{110ba}'), - ('\u{110c2}', '\u{110c2}'), - ('𑃐', '𑃨'), - ('𑃰', '𑃹'), - ('\u{11100}', '\u{11134}'), - ('𑄶', '𑄿'), - ('𑅄', '𑅇'), - ('𑅐', '\u{11173}'), - ('𑅶', '𑅶'), - ('\u{11180}', '𑇄'), - ('\u{111c9}', '\u{111cc}'), - ('𑇎', '𑇚'), - ('𑇜', '𑇜'), - ('𑈀', '𑈑'), - ('𑈓', '\u{11237}'), - ('\u{1123e}', '\u{11241}'), - ('𑊀', '𑊆'), - ('𑊈', '𑊈'), - ('𑊊', '𑊍'), - ('𑊏', '𑊝'), - ('𑊟', '𑊨'), - ('𑊰', '\u{112ea}'), - ('𑋰', '𑋹'), - ('\u{11300}', '𑌃'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('\u{1133b}', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '\u{1134d}'), - ('𑍐', '𑍐'), - ('\u{11357}', '\u{11357}'), - ('𑍝', '𑍣'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '𑏊'), - ('𑏌', '𑏓'), - ('\u{113e1}', '\u{113e2}'), - ('𑐀', '𑑊'), - ('𑑐', '𑑙'), - ('\u{1145e}', '𑑡'), - ('𑒀', '𑓅'), - ('𑓇', '𑓇'), - ('𑓐', '𑓙'), - ('𑖀', '\u{115b5}'), - ('𑖸', '\u{115c0}'), - ('𑗘', '\u{115dd}'), - ('𑘀', '\u{11640}'), - ('𑙄', '𑙄'), - ('𑙐', '𑙙'), - ('𑚀', '𑚸'), - ('𑛀', '𑛉'), - ('𑛐', '𑛣'), - ('𑜀', '𑜚'), - ('\u{1171d}', '\u{1172b}'), - ('𑜰', '𑜹'), - ('𑝀', '𑝆'), - ('𑠀', '\u{1183a}'), - ('𑢠', '𑣩'), - ('𑣿', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤵'), - ('𑤷', '𑤸'), - ('\u{1193b}', '\u{11943}'), - ('𑥐', '𑥙'), - ('𑦠', '𑦧'), - ('𑦪', '\u{119d7}'), - ('\u{119da}', '𑧡'), - ('𑧣', '𑧤'), - ('𑨀', '\u{11a3e}'), - ('\u{11a47}', '\u{11a47}'), - ('𑩐', '\u{11a99}'), - ('𑪝', '𑪝'), - ('𑪰', '𑫸'), - ('𑯀', '𑯠'), - ('𑯰', '𑯹'), - ('𑰀', '𑰈'), - ('𑰊', '\u{11c36}'), - ('\u{11c38}', '𑱀'), - ('𑱐', '𑱙'), - ('𑱲', '𑲏'), - ('\u{11c92}', '\u{11ca7}'), - ('𑲩', '\u{11cb6}'), - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d47}'), - ('𑵐', '𑵙'), - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶎'), - ('\u{11d90}', '\u{11d91}'), - ('𑶓', '𑶘'), - ('𑶠', '𑶩'), - ('𑻠', '𑻶'), - ('\u{11f00}', '𑼐'), - ('𑼒', '\u{11f3a}'), - ('𑼾', '\u{11f42}'), - ('𑽐', '\u{11f5a}'), - ('𑾰', '𑾰'), - ('𒀀', '𒎙'), - ('𒐀', '𒑮'), - ('𒒀', '𒕃'), - ('𒾐', '𒿰'), - ('𓀀', '𓐯'), - ('\u{13440}', '\u{13455}'), - ('𓑠', '𔏺'), - ('𔐀', '𔙆'), - ('𖄀', '𖄹'), - ('𖠀', '𖨸'), - ('𖩀', '𖩞'), - ('𖩠', '𖩩'), - ('𖩰', '𖪾'), - ('𖫀', '𖫉'), - ('𖫐', '𖫭'), - ('\u{16af0}', '\u{16af4}'), - ('𖬀', '\u{16b36}'), - ('𖭀', '𖭃'), - ('𖭐', '𖭙'), - ('𖭣', '𖭷'), - ('𖭽', '𖮏'), - ('𖵀', '𖵬'), - ('𖵰', '𖵹'), - ('𖹀', '𖹿'), - ('𖼀', '𖽊'), - ('\u{16f4f}', '𖾇'), - ('\u{16f8f}', '𖾟'), - ('𖿠', '𖿡'), - ('𖿣', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), - ('𗀀', '𘟷'), - ('𘠀', '𘳕'), - ('𘳿', '𘴈'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𛀀', '𛄢'), - ('𛄲', '𛄲'), - ('𛅐', '𛅒'), - ('𛅕', '𛅕'), - ('𛅤', '𛅧'), - ('𛅰', '𛋻'), - ('𛰀', '𛱪'), - ('𛱰', '𛱼'), - ('𛲀', '𛲈'), - ('𛲐', '𛲙'), - ('\u{1bc9d}', '\u{1bc9e}'), - ('𜳰', '𜳹'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d165}', '\u{1d169}'), - ('\u{1d16d}', '\u{1d172}'), - ('\u{1d17b}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('\u{1d242}', '\u{1d244}'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝛀'), - ('𝛂', '𝛚'), - ('𝛜', '𝛺'), - ('𝛼', '𝜔'), - ('𝜖', '𝜴'), - ('𝜶', '𝝎'), - ('𝝐', '𝝮'), - ('𝝰', '𝞈'), - ('𝞊', '𝞨'), - ('𝞪', '𝟂'), - ('𝟄', '𝟋'), - ('𝟎', '𝟿'), - ('\u{1da00}', '\u{1da36}'), - ('\u{1da3b}', '\u{1da6c}'), - ('\u{1da75}', '\u{1da75}'), - ('\u{1da84}', '\u{1da84}'), - ('\u{1da9b}', '\u{1da9f}'), - ('\u{1daa1}', '\u{1daaf}'), - ('𝼀', '𝼞'), - ('𝼥', '𝼪'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('𞀰', '𞁭'), - ('\u{1e08f}', '\u{1e08f}'), - ('𞄀', '𞄬'), - ('\u{1e130}', '𞄽'), - ('𞅀', '𞅉'), - ('𞅎', '𞅎'), - ('𞊐', '\u{1e2ae}'), - ('𞋀', '𞋹'), - ('𞓐', '𞓹'), - ('𞗐', '𞗺'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), - ('𞠀', '𞣄'), - ('\u{1e8d0}', '\u{1e8d6}'), - ('𞤀', '𞥋'), - ('𞥐', '𞥙'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('🄰', '🅉'), - ('🅐', '🅩'), - ('🅰', '🆉'), - ('🯰', '🯹'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('丽', '𪘀'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), - ('\u{e0100}', '\u{e01ef}'), -]; diff --git a/vendor/regex-syntax/src/unicode_tables/property_bool.rs b/vendor/regex-syntax/src/unicode_tables/property_bool.rs deleted file mode 100644 index 3d62edc4..00000000 --- a/vendor/regex-syntax/src/unicode_tables/property_bool.rs +++ /dev/null @@ -1,12095 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate property-bool ucd-16.0.0 --chars -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ - ("ASCII_Hex_Digit", ASCII_HEX_DIGIT), - ("Alphabetic", ALPHABETIC), - ("Bidi_Control", BIDI_CONTROL), - ("Bidi_Mirrored", BIDI_MIRRORED), - ("Case_Ignorable", CASE_IGNORABLE), - ("Cased", CASED), - ("Changes_When_Casefolded", CHANGES_WHEN_CASEFOLDED), - ("Changes_When_Casemapped", CHANGES_WHEN_CASEMAPPED), - ("Changes_When_Lowercased", CHANGES_WHEN_LOWERCASED), - ("Changes_When_Titlecased", CHANGES_WHEN_TITLECASED), - ("Changes_When_Uppercased", CHANGES_WHEN_UPPERCASED), - ("Dash", DASH), - ("Default_Ignorable_Code_Point", DEFAULT_IGNORABLE_CODE_POINT), - ("Deprecated", DEPRECATED), - ("Diacritic", DIACRITIC), - ("Emoji", EMOJI), - ("Emoji_Component", EMOJI_COMPONENT), - ("Emoji_Modifier", EMOJI_MODIFIER), - ("Emoji_Modifier_Base", EMOJI_MODIFIER_BASE), - ("Emoji_Presentation", EMOJI_PRESENTATION), - ("Extended_Pictographic", EXTENDED_PICTOGRAPHIC), - ("Extender", EXTENDER), - ("Grapheme_Base", GRAPHEME_BASE), - ("Grapheme_Extend", GRAPHEME_EXTEND), - ("Grapheme_Link", GRAPHEME_LINK), - ("Hex_Digit", HEX_DIGIT), - ("Hyphen", HYPHEN), - ("IDS_Binary_Operator", IDS_BINARY_OPERATOR), - ("IDS_Trinary_Operator", IDS_TRINARY_OPERATOR), - ("IDS_Unary_Operator", IDS_UNARY_OPERATOR), - ("ID_Compat_Math_Continue", ID_COMPAT_MATH_CONTINUE), - ("ID_Compat_Math_Start", ID_COMPAT_MATH_START), - ("ID_Continue", ID_CONTINUE), - ("ID_Start", ID_START), - ("Ideographic", IDEOGRAPHIC), - ("InCB", INCB), - ("Join_Control", JOIN_CONTROL), - ("Logical_Order_Exception", LOGICAL_ORDER_EXCEPTION), - ("Lowercase", LOWERCASE), - ("Math", MATH), - ("Modifier_Combining_Mark", MODIFIER_COMBINING_MARK), - ("Noncharacter_Code_Point", NONCHARACTER_CODE_POINT), - ("Other_Alphabetic", OTHER_ALPHABETIC), - ("Other_Default_Ignorable_Code_Point", OTHER_DEFAULT_IGNORABLE_CODE_POINT), - ("Other_Grapheme_Extend", OTHER_GRAPHEME_EXTEND), - ("Other_ID_Continue", OTHER_ID_CONTINUE), - ("Other_ID_Start", OTHER_ID_START), - ("Other_Lowercase", OTHER_LOWERCASE), - ("Other_Math", OTHER_MATH), - ("Other_Uppercase", OTHER_UPPERCASE), - ("Pattern_Syntax", PATTERN_SYNTAX), - ("Pattern_White_Space", PATTERN_WHITE_SPACE), - ("Prepended_Concatenation_Mark", PREPENDED_CONCATENATION_MARK), - ("Quotation_Mark", QUOTATION_MARK), - ("Radical", RADICAL), - ("Regional_Indicator", REGIONAL_INDICATOR), - ("Sentence_Terminal", SENTENCE_TERMINAL), - ("Soft_Dotted", SOFT_DOTTED), - ("Terminal_Punctuation", TERMINAL_PUNCTUATION), - ("Unified_Ideograph", UNIFIED_IDEOGRAPH), - ("Uppercase", UPPERCASE), - ("Variation_Selector", VARIATION_SELECTOR), - ("White_Space", WHITE_SPACE), - ("XID_Continue", XID_CONTINUE), - ("XID_Start", XID_START), -]; - -pub const ASCII_HEX_DIGIT: &'static [(char, char)] = - &[('0', '9'), ('A', 'F'), ('a', 'f')]; - -pub const ALPHABETIC: &'static [(char, char)] = &[ - ('A', 'Z'), - ('a', 'z'), - ('ª', 'ª'), - ('µ', 'µ'), - ('º', 'º'), - ('À', 'Ö'), - ('Ø', 'ö'), - ('ø', 'ˁ'), - ('ˆ', 'ˑ'), - ('ˠ', 'ˤ'), - ('ˬ', 'ˬ'), - ('ˮ', 'ˮ'), - ('\u{345}', '\u{345}'), - ('\u{363}', 'ʹ'), - ('Ͷ', 'ͷ'), - ('ͺ', 'ͽ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ϵ'), - ('Ϸ', 'ҁ'), - ('Ҋ', 'ԯ'), - ('Ա', 'Ֆ'), - ('ՙ', 'ՙ'), - ('ՠ', 'ֈ'), - ('\u{5b0}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c5}'), - ('\u{5c7}', '\u{5c7}'), - ('א', 'ת'), - ('ׯ', 'ײ'), - ('\u{610}', '\u{61a}'), - ('ؠ', '\u{657}'), - ('\u{659}', '\u{65f}'), - ('ٮ', 'ۓ'), - ('ە', '\u{6dc}'), - ('\u{6e1}', '\u{6e8}'), - ('\u{6ed}', 'ۯ'), - ('ۺ', 'ۼ'), - ('ۿ', 'ۿ'), - ('ܐ', '\u{73f}'), - ('ݍ', 'ޱ'), - ('ߊ', 'ߪ'), - ('ߴ', 'ߵ'), - ('ߺ', 'ߺ'), - ('ࠀ', '\u{817}'), - ('ࠚ', '\u{82c}'), - ('ࡀ', 'ࡘ'), - ('ࡠ', 'ࡪ'), - ('ࡰ', 'ࢇ'), - ('ࢉ', 'ࢎ'), - ('\u{897}', '\u{897}'), - ('ࢠ', 'ࣉ'), - ('\u{8d4}', '\u{8df}'), - ('\u{8e3}', '\u{8e9}'), - ('\u{8f0}', 'ऻ'), - ('ऽ', 'ौ'), - ('ॎ', 'ॐ'), - ('\u{955}', '\u{963}'), - ('ॱ', 'ঃ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('ঽ', '\u{9c4}'), - ('ে', 'ৈ'), - ('ো', 'ৌ'), - ('ৎ', 'ৎ'), - ('\u{9d7}', '\u{9d7}'), - ('ড়', 'ঢ়'), - ('য়', '\u{9e3}'), - ('ৰ', 'ৱ'), - ('ৼ', 'ৼ'), - ('\u{a01}', 'ਃ'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('ਾ', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4c}'), - ('\u{a51}', '\u{a51}'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('\u{a70}', '\u{a75}'), - ('\u{a81}', 'ઃ'), - ('અ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('ઽ', '\u{ac5}'), - ('\u{ac7}', 'ૉ'), - ('ો', 'ૌ'), - ('ૐ', 'ૐ'), - ('ૠ', '\u{ae3}'), - ('ૹ', '\u{afc}'), - ('\u{b01}', 'ଃ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('ଽ', '\u{b44}'), - ('େ', 'ୈ'), - ('ୋ', 'ୌ'), - ('\u{b56}', '\u{b57}'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', '\u{b63}'), - ('ୱ', 'ୱ'), - ('\u{b82}', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'ஹ'), - ('\u{bbe}', 'ூ'), - ('ெ', 'ை'), - ('ொ', 'ௌ'), - ('ௐ', 'ௐ'), - ('\u{bd7}', '\u{bd7}'), - ('\u{c00}', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'హ'), - ('ఽ', 'ౄ'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4c}'), - ('\u{c55}', '\u{c56}'), - ('ౘ', 'ౚ'), - ('ౝ', 'ౝ'), - ('ౠ', '\u{c63}'), - ('ಀ', 'ಃ'), - ('ಅ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('ಽ', 'ೄ'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccc}'), - ('\u{cd5}', '\u{cd6}'), - ('ೝ', 'ೞ'), - ('ೠ', '\u{ce3}'), - ('ೱ', 'ೳ'), - ('\u{d00}', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', 'ഺ'), - ('ഽ', '\u{d44}'), - ('െ', 'ൈ'), - ('ൊ', 'ൌ'), - ('ൎ', 'ൎ'), - ('ൔ', '\u{d57}'), - ('ൟ', '\u{d63}'), - ('ൺ', 'ൿ'), - ('\u{d81}', 'ඃ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('\u{dcf}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('ෘ', '\u{ddf}'), - ('ෲ', 'ෳ'), - ('ก', '\u{e3a}'), - ('เ', 'ๆ'), - ('\u{e4d}', '\u{e4d}'), - ('ກ', 'ຂ'), - ('ຄ', 'ຄ'), - ('ຆ', 'ຊ'), - ('ຌ', 'ຣ'), - ('ລ', 'ລ'), - ('ວ', '\u{eb9}'), - ('\u{ebb}', 'ຽ'), - ('ເ', 'ໄ'), - ('ໆ', 'ໆ'), - ('\u{ecd}', '\u{ecd}'), - ('ໜ', 'ໟ'), - ('ༀ', 'ༀ'), - ('ཀ', 'ཇ'), - ('ཉ', 'ཬ'), - ('\u{f71}', '\u{f83}'), - ('ྈ', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('က', '\u{1036}'), - ('း', 'း'), - ('ျ', 'ဿ'), - ('ၐ', 'ႏ'), - ('ႚ', '\u{109d}'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ჺ'), - ('ჼ', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዖ'), - ('ዘ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ፚ'), - ('ᎀ', 'ᎏ'), - ('Ꭰ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('ᐁ', 'ᙬ'), - ('ᙯ', 'ᙿ'), - ('ᚁ', 'ᚚ'), - ('ᚠ', 'ᛪ'), - ('ᛮ', 'ᛸ'), - ('ᜀ', '\u{1713}'), - ('ᜟ', '\u{1733}'), - ('ᝀ', '\u{1753}'), - ('ᝠ', 'ᝬ'), - ('ᝮ', 'ᝰ'), - ('\u{1772}', '\u{1773}'), - ('ក', 'ឳ'), - ('ា', 'ៈ'), - ('ៗ', 'ៗ'), - ('ៜ', 'ៜ'), - ('ᠠ', 'ᡸ'), - ('ᢀ', 'ᢪ'), - ('ᢰ', 'ᣵ'), - ('ᤀ', 'ᤞ'), - ('\u{1920}', 'ᤫ'), - ('ᤰ', 'ᤸ'), - ('ᥐ', 'ᥭ'), - ('ᥰ', 'ᥴ'), - ('ᦀ', 'ᦫ'), - ('ᦰ', 'ᧉ'), - ('ᨀ', '\u{1a1b}'), - ('ᨠ', '\u{1a5e}'), - ('ᩡ', '\u{1a74}'), - ('ᪧ', 'ᪧ'), - ('\u{1abf}', '\u{1ac0}'), - ('\u{1acc}', '\u{1ace}'), - ('\u{1b00}', 'ᬳ'), - ('\u{1b35}', '\u{1b43}'), - ('ᭅ', 'ᭌ'), - ('\u{1b80}', '\u{1ba9}'), - ('\u{1bac}', 'ᮯ'), - ('ᮺ', 'ᯥ'), - ('ᯧ', '\u{1bf1}'), - ('ᰀ', '\u{1c36}'), - ('ᱍ', 'ᱏ'), - ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('ᳩ', 'ᳬ'), - ('ᳮ', 'ᳳ'), - ('ᳵ', 'ᳶ'), - ('ᳺ', 'ᳺ'), - ('ᴀ', 'ᶿ'), - ('\u{1dd3}', '\u{1df4}'), - ('Ḁ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ᾼ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῌ'), - ('ῐ', 'ΐ'), - ('ῖ', 'Ί'), - ('ῠ', 'Ῥ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῼ'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℊ', 'ℓ'), - ('ℕ', 'ℕ'), - ('ℙ', 'ℝ'), - ('ℤ', 'ℤ'), - ('Ω', 'Ω'), - ('ℨ', 'ℨ'), - ('K', 'ℭ'), - ('ℯ', 'ℹ'), - ('ℼ', 'ℿ'), - ('ⅅ', 'ⅉ'), - ('ⅎ', 'ⅎ'), - ('Ⅰ', 'ↈ'), - ('Ⓐ', 'ⓩ'), - ('Ⰰ', 'ⳤ'), - ('Ⳬ', 'ⳮ'), - ('Ⳳ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ⴰ', 'ⵧ'), - ('ⵯ', 'ⵯ'), - ('ⶀ', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('\u{2de0}', '\u{2dff}'), - ('ⸯ', 'ⸯ'), - ('々', '〇'), - ('〡', '〩'), - ('〱', '〵'), - ('〸', '〼'), - ('ぁ', 'ゖ'), - ('ゝ', 'ゟ'), - ('ァ', 'ヺ'), - ('ー', 'ヿ'), - ('ㄅ', 'ㄯ'), - ('ㄱ', 'ㆎ'), - ('ㆠ', 'ㆿ'), - ('ㇰ', 'ㇿ'), - ('㐀', '䶿'), - ('一', 'ꒌ'), - ('ꓐ', 'ꓽ'), - ('ꔀ', 'ꘌ'), - ('ꘐ', 'ꘟ'), - ('ꘪ', 'ꘫ'), - ('Ꙁ', 'ꙮ'), - ('\u{a674}', '\u{a67b}'), - ('ꙿ', 'ꛯ'), - ('ꜗ', 'ꜟ'), - ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'Ƛ'), - ('ꟲ', 'ꠅ'), - ('ꠇ', 'ꠧ'), - ('ꡀ', 'ꡳ'), - ('ꢀ', 'ꣃ'), - ('\u{a8c5}', '\u{a8c5}'), - ('ꣲ', 'ꣷ'), - ('ꣻ', 'ꣻ'), - ('ꣽ', '\u{a8ff}'), - ('ꤊ', '\u{a92a}'), - ('ꤰ', 'ꥒ'), - ('ꥠ', 'ꥼ'), - ('\u{a980}', 'ꦲ'), - ('ꦴ', 'ꦿ'), - ('ꧏ', 'ꧏ'), - ('ꧠ', 'ꧯ'), - ('ꧺ', 'ꧾ'), - ('ꨀ', '\u{aa36}'), - ('ꩀ', 'ꩍ'), - ('ꩠ', 'ꩶ'), - ('ꩺ', '\u{aabe}'), - ('ꫀ', 'ꫀ'), - ('ꫂ', 'ꫂ'), - ('ꫛ', 'ꫝ'), - ('ꫠ', 'ꫯ'), - ('ꫲ', 'ꫵ'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('ꬰ', 'ꭚ'), - ('ꭜ', 'ꭩ'), - ('ꭰ', 'ꯪ'), - ('가', '힣'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('豈', '舘'), - ('並', '龎'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('יִ', 'ﬨ'), - ('שׁ', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', 'ﮱ'), - ('ﯓ', 'ﴽ'), - ('ﵐ', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('ﷰ', 'ﷻ'), - ('ﹰ', 'ﹴ'), - ('ﹶ', 'ﻼ'), - ('A', 'Z'), - ('a', 'z'), - ('ヲ', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), - ('𐅀', '𐅴'), - ('𐊀', '𐊜'), - ('𐊠', '𐋐'), - ('𐌀', '𐌟'), - ('𐌭', '𐍊'), - ('𐍐', '\u{1037a}'), - ('𐎀', '𐎝'), - ('𐎠', '𐏃'), - ('𐏈', '𐏏'), - ('𐏑', '𐏕'), - ('𐐀', '𐒝'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), - ('𐔀', '𐔧'), - ('𐔰', '𐕣'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐗀', '𐗳'), - ('𐘀', '𐜶'), - ('𐝀', '𐝕'), - ('𐝠', '𐝧'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐠀', '𐠅'), - ('𐠈', '𐠈'), - ('𐠊', '𐠵'), - ('𐠷', '𐠸'), - ('𐠼', '𐠼'), - ('𐠿', '𐡕'), - ('𐡠', '𐡶'), - ('𐢀', '𐢞'), - ('𐣠', '𐣲'), - ('𐣴', '𐣵'), - ('𐤀', '𐤕'), - ('𐤠', '𐤹'), - ('𐦀', '𐦷'), - ('𐦾', '𐦿'), - ('𐨀', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨵'), - ('𐩠', '𐩼'), - ('𐪀', '𐪜'), - ('𐫀', '𐫇'), - ('𐫉', '𐫤'), - ('𐬀', '𐬵'), - ('𐭀', '𐭕'), - ('𐭠', '𐭲'), - ('𐮀', '𐮑'), - ('𐰀', '𐱈'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐴀', '\u{10d27}'), - ('𐵊', '𐵥'), - ('\u{10d69}', '\u{10d69}'), - ('𐵯', '𐶅'), - ('𐺀', '𐺩'), - ('\u{10eab}', '\u{10eac}'), - ('𐺰', '𐺱'), - ('𐻂', '𐻄'), - ('\u{10efc}', '\u{10efc}'), - ('𐼀', '𐼜'), - ('𐼧', '𐼧'), - ('𐼰', '𐽅'), - ('𐽰', '𐾁'), - ('𐾰', '𐿄'), - ('𐿠', '𐿶'), - ('𑀀', '\u{11045}'), - ('𑁱', '𑁵'), - ('\u{11080}', '𑂸'), - ('\u{110c2}', '\u{110c2}'), - ('𑃐', '𑃨'), - ('\u{11100}', '\u{11132}'), - ('𑅄', '𑅇'), - ('𑅐', '𑅲'), - ('𑅶', '𑅶'), - ('\u{11180}', '𑆿'), - ('𑇁', '𑇄'), - ('𑇎', '\u{111cf}'), - ('𑇚', '𑇚'), - ('𑇜', '𑇜'), - ('𑈀', '𑈑'), - ('𑈓', '\u{11234}'), - ('\u{11237}', '\u{11237}'), - ('\u{1123e}', '\u{11241}'), - ('𑊀', '𑊆'), - ('𑊈', '𑊈'), - ('𑊊', '𑊍'), - ('𑊏', '𑊝'), - ('𑊟', '𑊨'), - ('𑊰', '\u{112e8}'), - ('\u{11300}', '𑌃'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('𑌽', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '𑍌'), - ('𑍐', '𑍐'), - ('\u{11357}', '\u{11357}'), - ('𑍝', '𑍣'), - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '𑏊'), - ('𑏌', '𑏍'), - ('𑏑', '𑏑'), - ('𑏓', '𑏓'), - ('𑐀', '𑑁'), - ('\u{11443}', '𑑅'), - ('𑑇', '𑑊'), - ('𑑟', '𑑡'), - ('𑒀', '𑓁'), - ('𑓄', '𑓅'), - ('𑓇', '𑓇'), - ('𑖀', '\u{115b5}'), - ('𑖸', '𑖾'), - ('𑗘', '\u{115dd}'), - ('𑘀', '𑘾'), - ('\u{11640}', '\u{11640}'), - ('𑙄', '𑙄'), - ('𑚀', '\u{116b5}'), - ('𑚸', '𑚸'), - ('𑜀', '𑜚'), - ('\u{1171d}', '\u{1172a}'), - ('𑝀', '𑝆'), - ('𑠀', '𑠸'), - ('𑢠', '𑣟'), - ('𑣿', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤵'), - ('𑤷', '𑤸'), - ('\u{1193b}', '\u{1193c}'), - ('𑤿', '𑥂'), - ('𑦠', '𑦧'), - ('𑦪', '\u{119d7}'), - ('\u{119da}', '𑧟'), - ('𑧡', '𑧡'), - ('𑧣', '𑧤'), - ('𑨀', '𑨲'), - ('\u{11a35}', '\u{11a3e}'), - ('𑩐', '𑪗'), - ('𑪝', '𑪝'), - ('𑪰', '𑫸'), - ('𑯀', '𑯠'), - ('𑰀', '𑰈'), - ('𑰊', '\u{11c36}'), - ('\u{11c38}', '𑰾'), - ('𑱀', '𑱀'), - ('𑱲', '𑲏'), - ('\u{11c92}', '\u{11ca7}'), - ('𑲩', '\u{11cb6}'), - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d41}'), - ('\u{11d43}', '\u{11d43}'), - ('𑵆', '\u{11d47}'), - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶎'), - ('\u{11d90}', '\u{11d91}'), - ('𑶓', '𑶖'), - ('𑶘', '𑶘'), - ('𑻠', '𑻶'), - ('\u{11f00}', '𑼐'), - ('𑼒', '\u{11f3a}'), - ('𑼾', '\u{11f40}'), - ('𑾰', '𑾰'), - ('𒀀', '𒎙'), - ('𒐀', '𒑮'), - ('𒒀', '𒕃'), - ('𒾐', '𒿰'), - ('𓀀', '𓐯'), - ('𓑁', '𓑆'), - ('𓑠', '𔏺'), - ('𔐀', '𔙆'), - ('𖄀', '\u{1612e}'), - ('𖠀', '𖨸'), - ('𖩀', '𖩞'), - ('𖩰', '𖪾'), - ('𖫐', '𖫭'), - ('𖬀', '𖬯'), - ('𖭀', '𖭃'), - ('𖭣', '𖭷'), - ('𖭽', '𖮏'), - ('𖵀', '𖵬'), - ('𖹀', '𖹿'), - ('𖼀', '𖽊'), - ('\u{16f4f}', '𖾇'), - ('\u{16f8f}', '𖾟'), - ('𖿠', '𖿡'), - ('𖿣', '𖿣'), - ('\u{16ff0}', '\u{16ff1}'), - ('𗀀', '𘟷'), - ('𘠀', '𘳕'), - ('𘳿', '𘴈'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𛀀', '𛄢'), - ('𛄲', '𛄲'), - ('𛅐', '𛅒'), - ('𛅕', '𛅕'), - ('𛅤', '𛅧'), - ('𛅰', '𛋻'), - ('𛰀', '𛱪'), - ('𛱰', '𛱼'), - ('𛲀', '𛲈'), - ('𛲐', '𛲙'), - ('\u{1bc9e}', '\u{1bc9e}'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝛀'), - ('𝛂', '𝛚'), - ('𝛜', '𝛺'), - ('𝛼', '𝜔'), - ('𝜖', '𝜴'), - ('𝜶', '𝝎'), - ('𝝐', '𝝮'), - ('𝝰', '𝞈'), - ('𝞊', '𝞨'), - ('𝞪', '𝟂'), - ('𝟄', '𝟋'), - ('𝼀', '𝼞'), - ('𝼥', '𝼪'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('𞀰', '𞁭'), - ('\u{1e08f}', '\u{1e08f}'), - ('𞄀', '𞄬'), - ('𞄷', '𞄽'), - ('𞅎', '𞅎'), - ('𞊐', '𞊭'), - ('𞋀', '𞋫'), - ('𞓐', '𞓫'), - ('𞗐', '𞗭'), - ('𞗰', '𞗰'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), - ('𞠀', '𞣄'), - ('𞤀', '𞥃'), - ('\u{1e947}', '\u{1e947}'), - ('𞥋', '𞥋'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('🄰', '🅉'), - ('🅐', '🅩'), - ('🅰', '🆉'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('丽', '𪘀'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), -]; - -pub const BIDI_CONTROL: &'static [(char, char)] = &[ - ('\u{61c}', '\u{61c}'), - ('\u{200e}', '\u{200f}'), - ('\u{202a}', '\u{202e}'), - ('\u{2066}', '\u{2069}'), -]; - -pub const BIDI_MIRRORED: &'static [(char, char)] = &[ - ('(', ')'), - ('<', '<'), - ('>', '>'), - ('[', '['), - (']', ']'), - ('{', '{'), - ('}', '}'), - ('«', '«'), - ('»', '»'), - ('༺', '༽'), - ('᚛', '᚜'), - ('‹', '›'), - ('⁅', '⁆'), - ('⁽', '⁾'), - ('₍', '₎'), - ('⅀', '⅀'), - ('∁', '∄'), - ('∈', '∍'), - ('∑', '∑'), - ('∕', '∖'), - ('√', '∝'), - ('∟', '∢'), - ('∤', '∤'), - ('∦', '∦'), - ('∫', '∳'), - ('∹', '∹'), - ('∻', '≌'), - ('≒', '≕'), - ('≟', '≠'), - ('≢', '≢'), - ('≤', '≫'), - ('≭', '⊌'), - ('⊏', '⊒'), - ('⊘', '⊘'), - ('⊢', '⊣'), - ('⊦', '⊸'), - ('⊾', '⊿'), - ('⋉', '⋍'), - ('⋐', '⋑'), - ('⋖', '⋭'), - ('⋰', '⋿'), - ('⌈', '⌋'), - ('⌠', '⌡'), - ('〈', '〉'), - ('❨', '❵'), - ('⟀', '⟀'), - ('⟃', '⟆'), - ('⟈', '⟉'), - ('⟋', '⟍'), - ('⟓', '⟖'), - ('⟜', '⟞'), - ('⟢', '⟯'), - ('⦃', '⦘'), - ('⦛', '⦠'), - ('⦢', '⦯'), - ('⦸', '⦸'), - ('⧀', '⧅'), - ('⧉', '⧉'), - ('⧎', '⧒'), - ('⧔', '⧕'), - ('⧘', '⧜'), - ('⧡', '⧡'), - ('⧣', '⧥'), - ('⧨', '⧩'), - ('⧴', '⧹'), - ('⧼', '⧽'), - ('⨊', '⨜'), - ('⨞', '⨡'), - ('⨤', '⨤'), - ('⨦', '⨦'), - ('⨩', '⨩'), - ('⨫', '⨮'), - ('⨴', '⨵'), - ('⨼', '⨾'), - ('⩗', '⩘'), - ('⩤', '⩥'), - ('⩪', '⩭'), - ('⩯', '⩰'), - ('⩳', '⩴'), - ('⩹', '⪣'), - ('⪦', '⪭'), - ('⪯', '⫖'), - ('⫝̸', '⫝̸'), - ('⫞', '⫞'), - ('⫢', '⫦'), - ('⫬', '⫮'), - ('⫳', '⫳'), - ('⫷', '⫻'), - ('⫽', '⫽'), - ('⯾', '⯾'), - ('⸂', '⸅'), - ('⸉', '⸊'), - ('⸌', '⸍'), - ('⸜', '⸝'), - ('⸠', '⸩'), - ('⹕', '⹜'), - ('〈', '】'), - ('〔', '〛'), - ('﹙', '﹞'), - ('﹤', '﹥'), - ('(', ')'), - ('<', '<'), - ('>', '>'), - ('[', '['), - (']', ']'), - ('{', '{'), - ('}', '}'), - ('⦅', '⦆'), - ('「', '」'), - ('𝛛', '𝛛'), - ('𝜕', '𝜕'), - ('𝝏', '𝝏'), - ('𝞉', '𝞉'), - ('𝟃', '𝟃'), -]; - -pub const CASE_IGNORABLE: &'static [(char, char)] = &[ - ('\'', '\''), - ('.', '.'), - (':', ':'), - ('^', '^'), - ('`', '`'), - ('¨', '¨'), - ('\u{ad}', '\u{ad}'), - ('¯', '¯'), - ('´', '´'), - ('·', '¸'), - ('ʰ', '\u{36f}'), - ('ʹ', '͵'), - ('ͺ', 'ͺ'), - ('΄', '΅'), - ('·', '·'), - ('\u{483}', '\u{489}'), - ('ՙ', 'ՙ'), - ('՟', '՟'), - ('\u{591}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c5}'), - ('\u{5c7}', '\u{5c7}'), - ('״', '״'), - ('\u{600}', '\u{605}'), - ('\u{610}', '\u{61a}'), - ('\u{61c}', '\u{61c}'), - ('ـ', 'ـ'), - ('\u{64b}', '\u{65f}'), - ('\u{670}', '\u{670}'), - ('\u{6d6}', '\u{6dd}'), - ('\u{6df}', '\u{6e8}'), - ('\u{6ea}', '\u{6ed}'), - ('\u{70f}', '\u{70f}'), - ('\u{711}', '\u{711}'), - ('\u{730}', '\u{74a}'), - ('\u{7a6}', '\u{7b0}'), - ('\u{7eb}', 'ߵ'), - ('ߺ', 'ߺ'), - ('\u{7fd}', '\u{7fd}'), - ('\u{816}', '\u{82d}'), - ('\u{859}', '\u{85b}'), - ('࢈', '࢈'), - ('\u{890}', '\u{891}'), - ('\u{897}', '\u{89f}'), - ('ࣉ', '\u{902}'), - ('\u{93a}', '\u{93a}'), - ('\u{93c}', '\u{93c}'), - ('\u{941}', '\u{948}'), - ('\u{94d}', '\u{94d}'), - ('\u{951}', '\u{957}'), - ('\u{962}', '\u{963}'), - ('ॱ', 'ॱ'), - ('\u{981}', '\u{981}'), - ('\u{9bc}', '\u{9bc}'), - ('\u{9c1}', '\u{9c4}'), - ('\u{9cd}', '\u{9cd}'), - ('\u{9e2}', '\u{9e3}'), - ('\u{9fe}', '\u{9fe}'), - ('\u{a01}', '\u{a02}'), - ('\u{a3c}', '\u{a3c}'), - ('\u{a41}', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('\u{a51}', '\u{a51}'), - ('\u{a70}', '\u{a71}'), - ('\u{a75}', '\u{a75}'), - ('\u{a81}', '\u{a82}'), - ('\u{abc}', '\u{abc}'), - ('\u{ac1}', '\u{ac5}'), - ('\u{ac7}', '\u{ac8}'), - ('\u{acd}', '\u{acd}'), - ('\u{ae2}', '\u{ae3}'), - ('\u{afa}', '\u{aff}'), - ('\u{b01}', '\u{b01}'), - ('\u{b3c}', '\u{b3c}'), - ('\u{b3f}', '\u{b3f}'), - ('\u{b41}', '\u{b44}'), - ('\u{b4d}', '\u{b4d}'), - ('\u{b55}', '\u{b56}'), - ('\u{b62}', '\u{b63}'), - ('\u{b82}', '\u{b82}'), - ('\u{bc0}', '\u{bc0}'), - ('\u{bcd}', '\u{bcd}'), - ('\u{c00}', '\u{c00}'), - ('\u{c04}', '\u{c04}'), - ('\u{c3c}', '\u{c3c}'), - ('\u{c3e}', '\u{c40}'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('\u{c62}', '\u{c63}'), - ('\u{c81}', '\u{c81}'), - ('\u{cbc}', '\u{cbc}'), - ('\u{cbf}', '\u{cbf}'), - ('\u{cc6}', '\u{cc6}'), - ('\u{ccc}', '\u{ccd}'), - ('\u{ce2}', '\u{ce3}'), - ('\u{d00}', '\u{d01}'), - ('\u{d3b}', '\u{d3c}'), - ('\u{d41}', '\u{d44}'), - ('\u{d4d}', '\u{d4d}'), - ('\u{d62}', '\u{d63}'), - ('\u{d81}', '\u{d81}'), - ('\u{dca}', '\u{dca}'), - ('\u{dd2}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('\u{e31}', '\u{e31}'), - ('\u{e34}', '\u{e3a}'), - ('ๆ', '\u{e4e}'), - ('\u{eb1}', '\u{eb1}'), - ('\u{eb4}', '\u{ebc}'), - ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ece}'), - ('\u{f18}', '\u{f19}'), - ('\u{f35}', '\u{f35}'), - ('\u{f37}', '\u{f37}'), - ('\u{f39}', '\u{f39}'), - ('\u{f71}', '\u{f7e}'), - ('\u{f80}', '\u{f84}'), - ('\u{f86}', '\u{f87}'), - ('\u{f8d}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('\u{fc6}', '\u{fc6}'), - ('\u{102d}', '\u{1030}'), - ('\u{1032}', '\u{1037}'), - ('\u{1039}', '\u{103a}'), - ('\u{103d}', '\u{103e}'), - ('\u{1058}', '\u{1059}'), - ('\u{105e}', '\u{1060}'), - ('\u{1071}', '\u{1074}'), - ('\u{1082}', '\u{1082}'), - ('\u{1085}', '\u{1086}'), - ('\u{108d}', '\u{108d}'), - ('\u{109d}', '\u{109d}'), - ('ჼ', 'ჼ'), - ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1733}'), - ('\u{1752}', '\u{1753}'), - ('\u{1772}', '\u{1773}'), - ('\u{17b4}', '\u{17b5}'), - ('\u{17b7}', '\u{17bd}'), - ('\u{17c6}', '\u{17c6}'), - ('\u{17c9}', '\u{17d3}'), - ('ៗ', 'ៗ'), - ('\u{17dd}', '\u{17dd}'), - ('\u{180b}', '\u{180f}'), - ('ᡃ', 'ᡃ'), - ('\u{1885}', '\u{1886}'), - ('\u{18a9}', '\u{18a9}'), - ('\u{1920}', '\u{1922}'), - ('\u{1927}', '\u{1928}'), - ('\u{1932}', '\u{1932}'), - ('\u{1939}', '\u{193b}'), - ('\u{1a17}', '\u{1a18}'), - ('\u{1a1b}', '\u{1a1b}'), - ('\u{1a56}', '\u{1a56}'), - ('\u{1a58}', '\u{1a5e}'), - ('\u{1a60}', '\u{1a60}'), - ('\u{1a62}', '\u{1a62}'), - ('\u{1a65}', '\u{1a6c}'), - ('\u{1a73}', '\u{1a7c}'), - ('\u{1a7f}', '\u{1a7f}'), - ('ᪧ', 'ᪧ'), - ('\u{1ab0}', '\u{1ace}'), - ('\u{1b00}', '\u{1b03}'), - ('\u{1b34}', '\u{1b34}'), - ('\u{1b36}', '\u{1b3a}'), - ('\u{1b3c}', '\u{1b3c}'), - ('\u{1b42}', '\u{1b42}'), - ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '\u{1b81}'), - ('\u{1ba2}', '\u{1ba5}'), - ('\u{1ba8}', '\u{1ba9}'), - ('\u{1bab}', '\u{1bad}'), - ('\u{1be6}', '\u{1be6}'), - ('\u{1be8}', '\u{1be9}'), - ('\u{1bed}', '\u{1bed}'), - ('\u{1bef}', '\u{1bf1}'), - ('\u{1c2c}', '\u{1c33}'), - ('\u{1c36}', '\u{1c37}'), - ('ᱸ', 'ᱽ'), - ('\u{1cd0}', '\u{1cd2}'), - ('\u{1cd4}', '\u{1ce0}'), - ('\u{1ce2}', '\u{1ce8}'), - ('\u{1ced}', '\u{1ced}'), - ('\u{1cf4}', '\u{1cf4}'), - ('\u{1cf8}', '\u{1cf9}'), - ('ᴬ', 'ᵪ'), - ('ᵸ', 'ᵸ'), - ('ᶛ', '\u{1dff}'), - ('᾽', '᾽'), - ('᾿', '῁'), - ('῍', '῏'), - ('῝', '῟'), - ('῭', '`'), - ('´', '῾'), - ('\u{200b}', '\u{200f}'), - ('‘', '’'), - ('․', '․'), - ('‧', '‧'), - ('\u{202a}', '\u{202e}'), - ('\u{2060}', '\u{2064}'), - ('\u{2066}', '\u{206f}'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('\u{20d0}', '\u{20f0}'), - ('ⱼ', 'ⱽ'), - ('\u{2cef}', '\u{2cf1}'), - ('ⵯ', 'ⵯ'), - ('\u{2d7f}', '\u{2d7f}'), - ('\u{2de0}', '\u{2dff}'), - ('ⸯ', 'ⸯ'), - ('々', '々'), - ('\u{302a}', '\u{302d}'), - ('〱', '〵'), - ('〻', '〻'), - ('\u{3099}', 'ゞ'), - ('ー', 'ヾ'), - ('ꀕ', 'ꀕ'), - ('ꓸ', 'ꓽ'), - ('ꘌ', 'ꘌ'), - ('\u{a66f}', '\u{a672}'), - ('\u{a674}', '\u{a67d}'), - ('ꙿ', 'ꙿ'), - ('ꚜ', '\u{a69f}'), - ('\u{a6f0}', '\u{a6f1}'), - ('꜀', '꜡'), - ('ꝰ', 'ꝰ'), - ('ꞈ', '꞊'), - ('ꟲ', 'ꟴ'), - ('ꟸ', 'ꟹ'), - ('\u{a802}', '\u{a802}'), - ('\u{a806}', '\u{a806}'), - ('\u{a80b}', '\u{a80b}'), - ('\u{a825}', '\u{a826}'), - ('\u{a82c}', '\u{a82c}'), - ('\u{a8c4}', '\u{a8c5}'), - ('\u{a8e0}', '\u{a8f1}'), - ('\u{a8ff}', '\u{a8ff}'), - ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '\u{a951}'), - ('\u{a980}', '\u{a982}'), - ('\u{a9b3}', '\u{a9b3}'), - ('\u{a9b6}', '\u{a9b9}'), - ('\u{a9bc}', '\u{a9bd}'), - ('ꧏ', 'ꧏ'), - ('\u{a9e5}', 'ꧦ'), - ('\u{aa29}', '\u{aa2e}'), - ('\u{aa31}', '\u{aa32}'), - ('\u{aa35}', '\u{aa36}'), - ('\u{aa43}', '\u{aa43}'), - ('\u{aa4c}', '\u{aa4c}'), - ('ꩰ', 'ꩰ'), - ('\u{aa7c}', '\u{aa7c}'), - ('\u{aab0}', '\u{aab0}'), - ('\u{aab2}', '\u{aab4}'), - ('\u{aab7}', '\u{aab8}'), - ('\u{aabe}', '\u{aabf}'), - ('\u{aac1}', '\u{aac1}'), - ('ꫝ', 'ꫝ'), - ('\u{aaec}', '\u{aaed}'), - ('ꫳ', 'ꫴ'), - ('\u{aaf6}', '\u{aaf6}'), - ('꭛', 'ꭟ'), - ('ꭩ', '꭫'), - ('\u{abe5}', '\u{abe5}'), - ('\u{abe8}', '\u{abe8}'), - ('\u{abed}', '\u{abed}'), - ('\u{fb1e}', '\u{fb1e}'), - ('﮲', '﯂'), - ('\u{fe00}', '\u{fe0f}'), - ('︓', '︓'), - ('\u{fe20}', '\u{fe2f}'), - ('﹒', '﹒'), - ('﹕', '﹕'), - ('\u{feff}', '\u{feff}'), - (''', '''), - ('.', '.'), - (':', ':'), - ('^', '^'), - ('`', '`'), - ('ー', 'ー'), - ('\u{ff9e}', '\u{ff9f}'), - (' ̄', ' ̄'), - ('\u{fff9}', '\u{fffb}'), - ('\u{101fd}', '\u{101fd}'), - ('\u{102e0}', '\u{102e0}'), - ('\u{10376}', '\u{1037a}'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('\u{10a01}', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '\u{10a0f}'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '\u{10a3f}'), - ('\u{10ae5}', '\u{10ae6}'), - ('\u{10d24}', '\u{10d27}'), - ('𐵎', '𐵎'), - ('\u{10d69}', '\u{10d6d}'), - ('𐵯', '𐵯'), - ('\u{10eab}', '\u{10eac}'), - ('\u{10efc}', '\u{10eff}'), - ('\u{10f46}', '\u{10f50}'), - ('\u{10f82}', '\u{10f85}'), - ('\u{11001}', '\u{11001}'), - ('\u{11038}', '\u{11046}'), - ('\u{11070}', '\u{11070}'), - ('\u{11073}', '\u{11074}'), - ('\u{1107f}', '\u{11081}'), - ('\u{110b3}', '\u{110b6}'), - ('\u{110b9}', '\u{110ba}'), - ('\u{110bd}', '\u{110bd}'), - ('\u{110c2}', '\u{110c2}'), - ('\u{110cd}', '\u{110cd}'), - ('\u{11100}', '\u{11102}'), - ('\u{11127}', '\u{1112b}'), - ('\u{1112d}', '\u{11134}'), - ('\u{11173}', '\u{11173}'), - ('\u{11180}', '\u{11181}'), - ('\u{111b6}', '\u{111be}'), - ('\u{111c9}', '\u{111cc}'), - ('\u{111cf}', '\u{111cf}'), - ('\u{1122f}', '\u{11231}'), - ('\u{11234}', '\u{11234}'), - ('\u{11236}', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), - ('\u{11241}', '\u{11241}'), - ('\u{112df}', '\u{112df}'), - ('\u{112e3}', '\u{112ea}'), - ('\u{11300}', '\u{11301}'), - ('\u{1133b}', '\u{1133c}'), - ('\u{11340}', '\u{11340}'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('\u{113bb}', '\u{113c0}'), - ('\u{113ce}', '\u{113ce}'), - ('\u{113d0}', '\u{113d0}'), - ('\u{113d2}', '\u{113d2}'), - ('\u{113e1}', '\u{113e2}'), - ('\u{11438}', '\u{1143f}'), - ('\u{11442}', '\u{11444}'), - ('\u{11446}', '\u{11446}'), - ('\u{1145e}', '\u{1145e}'), - ('\u{114b3}', '\u{114b8}'), - ('\u{114ba}', '\u{114ba}'), - ('\u{114bf}', '\u{114c0}'), - ('\u{114c2}', '\u{114c3}'), - ('\u{115b2}', '\u{115b5}'), - ('\u{115bc}', '\u{115bd}'), - ('\u{115bf}', '\u{115c0}'), - ('\u{115dc}', '\u{115dd}'), - ('\u{11633}', '\u{1163a}'), - ('\u{1163d}', '\u{1163d}'), - ('\u{1163f}', '\u{11640}'), - ('\u{116ab}', '\u{116ab}'), - ('\u{116ad}', '\u{116ad}'), - ('\u{116b0}', '\u{116b5}'), - ('\u{116b7}', '\u{116b7}'), - ('\u{1171d}', '\u{1171d}'), - ('\u{1171f}', '\u{1171f}'), - ('\u{11722}', '\u{11725}'), - ('\u{11727}', '\u{1172b}'), - ('\u{1182f}', '\u{11837}'), - ('\u{11839}', '\u{1183a}'), - ('\u{1193b}', '\u{1193c}'), - ('\u{1193e}', '\u{1193e}'), - ('\u{11943}', '\u{11943}'), - ('\u{119d4}', '\u{119d7}'), - ('\u{119da}', '\u{119db}'), - ('\u{119e0}', '\u{119e0}'), - ('\u{11a01}', '\u{11a0a}'), - ('\u{11a33}', '\u{11a38}'), - ('\u{11a3b}', '\u{11a3e}'), - ('\u{11a47}', '\u{11a47}'), - ('\u{11a51}', '\u{11a56}'), - ('\u{11a59}', '\u{11a5b}'), - ('\u{11a8a}', '\u{11a96}'), - ('\u{11a98}', '\u{11a99}'), - ('\u{11c30}', '\u{11c36}'), - ('\u{11c38}', '\u{11c3d}'), - ('\u{11c3f}', '\u{11c3f}'), - ('\u{11c92}', '\u{11ca7}'), - ('\u{11caa}', '\u{11cb0}'), - ('\u{11cb2}', '\u{11cb3}'), - ('\u{11cb5}', '\u{11cb6}'), - ('\u{11d31}', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d45}'), - ('\u{11d47}', '\u{11d47}'), - ('\u{11d90}', '\u{11d91}'), - ('\u{11d95}', '\u{11d95}'), - ('\u{11d97}', '\u{11d97}'), - ('\u{11ef3}', '\u{11ef4}'), - ('\u{11f00}', '\u{11f01}'), - ('\u{11f36}', '\u{11f3a}'), - ('\u{11f40}', '\u{11f40}'), - ('\u{11f42}', '\u{11f42}'), - ('\u{11f5a}', '\u{11f5a}'), - ('\u{13430}', '\u{13440}'), - ('\u{13447}', '\u{13455}'), - ('\u{1611e}', '\u{16129}'), - ('\u{1612d}', '\u{1612f}'), - ('\u{16af0}', '\u{16af4}'), - ('\u{16b30}', '\u{16b36}'), - ('𖭀', '𖭃'), - ('𖵀', '𖵂'), - ('𖵫', '𖵬'), - ('\u{16f4f}', '\u{16f4f}'), - ('\u{16f8f}', '𖾟'), - ('𖿠', '𖿡'), - ('𖿣', '\u{16fe4}'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('\u{1bc9d}', '\u{1bc9e}'), - ('\u{1bca0}', '\u{1bca3}'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d167}', '\u{1d169}'), - ('\u{1d173}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('\u{1d242}', '\u{1d244}'), - ('\u{1da00}', '\u{1da36}'), - ('\u{1da3b}', '\u{1da6c}'), - ('\u{1da75}', '\u{1da75}'), - ('\u{1da84}', '\u{1da84}'), - ('\u{1da9b}', '\u{1da9f}'), - ('\u{1daa1}', '\u{1daaf}'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('𞀰', '𞁭'), - ('\u{1e08f}', '\u{1e08f}'), - ('\u{1e130}', '𞄽'), - ('\u{1e2ae}', '\u{1e2ae}'), - ('\u{1e2ec}', '\u{1e2ef}'), - ('𞓫', '\u{1e4ef}'), - ('\u{1e5ee}', '\u{1e5ef}'), - ('\u{1e8d0}', '\u{1e8d6}'), - ('\u{1e944}', '𞥋'), - ('🏻', '🏿'), - ('\u{e0001}', '\u{e0001}'), - ('\u{e0020}', '\u{e007f}'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const CASED: &'static [(char, char)] = &[ - ('A', 'Z'), - ('a', 'z'), - ('ª', 'ª'), - ('µ', 'µ'), - ('º', 'º'), - ('À', 'Ö'), - ('Ø', 'ö'), - ('ø', 'ƺ'), - ('Ƽ', 'ƿ'), - ('DŽ', 'ʓ'), - ('ʕ', 'ʸ'), - ('ˀ', 'ˁ'), - ('ˠ', 'ˤ'), - ('\u{345}', '\u{345}'), - ('Ͱ', 'ͳ'), - ('Ͷ', 'ͷ'), - ('ͺ', 'ͽ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ϵ'), - ('Ϸ', 'ҁ'), - ('Ҋ', 'ԯ'), - ('Ա', 'Ֆ'), - ('ՠ', 'ֈ'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ჺ'), - ('ჼ', 'ჿ'), - ('Ꭰ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('ᴀ', 'ᶿ'), - ('Ḁ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ᾼ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῌ'), - ('ῐ', 'ΐ'), - ('ῖ', 'Ί'), - ('ῠ', 'Ῥ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῼ'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℊ', 'ℓ'), - ('ℕ', 'ℕ'), - ('ℙ', 'ℝ'), - ('ℤ', 'ℤ'), - ('Ω', 'Ω'), - ('ℨ', 'ℨ'), - ('K', 'ℭ'), - ('ℯ', 'ℴ'), - ('ℹ', 'ℹ'), - ('ℼ', 'ℿ'), - ('ⅅ', 'ⅉ'), - ('ⅎ', 'ⅎ'), - ('Ⅰ', 'ⅿ'), - ('Ↄ', 'ↄ'), - ('Ⓐ', 'ⓩ'), - ('Ⰰ', 'ⳤ'), - ('Ⳬ', 'ⳮ'), - ('Ⳳ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('Ꙁ', 'ꙭ'), - ('Ꚁ', 'ꚝ'), - ('Ꜣ', 'ꞇ'), - ('Ꞌ', 'ꞎ'), - ('Ꞑ', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'Ƛ'), - ('ꟲ', 'ꟶ'), - ('ꟸ', 'ꟺ'), - ('ꬰ', 'ꭚ'), - ('ꭜ', 'ꭩ'), - ('ꭰ', 'ꮿ'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('A', 'Z'), - ('a', 'z'), - ('𐐀', '𐑏'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐞀', '𐞀'), - ('𐞃', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐵐', '𐵥'), - ('𐵰', '𐶅'), - ('𑢠', '𑣟'), - ('𖹀', '𖹿'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝛀'), - ('𝛂', '𝛚'), - ('𝛜', '𝛺'), - ('𝛼', '𝜔'), - ('𝜖', '𝜴'), - ('𝜶', '𝝎'), - ('𝝐', '𝝮'), - ('𝝰', '𝞈'), - ('𝞊', '𝞨'), - ('𝞪', '𝟂'), - ('𝟄', '𝟋'), - ('𝼀', '𝼉'), - ('𝼋', '𝼞'), - ('𝼥', '𝼪'), - ('𞀰', '𞁭'), - ('𞤀', '𞥃'), - ('🄰', '🅉'), - ('🅐', '🅩'), - ('🅰', '🆉'), -]; - -pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[ - ('A', 'Z'), - ('µ', 'µ'), - ('À', 'Ö'), - ('Ø', 'ß'), - ('Ā', 'Ā'), - ('Ă', 'Ă'), - ('Ą', 'Ą'), - ('Ć', 'Ć'), - ('Ĉ', 'Ĉ'), - ('Ċ', 'Ċ'), - ('Č', 'Č'), - ('Ď', 'Ď'), - ('Đ', 'Đ'), - ('Ē', 'Ē'), - ('Ĕ', 'Ĕ'), - ('Ė', 'Ė'), - ('Ę', 'Ę'), - ('Ě', 'Ě'), - ('Ĝ', 'Ĝ'), - ('Ğ', 'Ğ'), - ('Ġ', 'Ġ'), - ('Ģ', 'Ģ'), - ('Ĥ', 'Ĥ'), - ('Ħ', 'Ħ'), - ('Ĩ', 'Ĩ'), - ('Ī', 'Ī'), - ('Ĭ', 'Ĭ'), - ('Į', 'Į'), - ('İ', 'İ'), - ('IJ', 'IJ'), - ('Ĵ', 'Ĵ'), - ('Ķ', 'Ķ'), - ('Ĺ', 'Ĺ'), - ('Ļ', 'Ļ'), - ('Ľ', 'Ľ'), - ('Ŀ', 'Ŀ'), - ('Ł', 'Ł'), - ('Ń', 'Ń'), - ('Ņ', 'Ņ'), - ('Ň', 'Ň'), - ('ʼn', 'Ŋ'), - ('Ō', 'Ō'), - ('Ŏ', 'Ŏ'), - ('Ő', 'Ő'), - ('Œ', 'Œ'), - ('Ŕ', 'Ŕ'), - ('Ŗ', 'Ŗ'), - ('Ř', 'Ř'), - ('Ś', 'Ś'), - ('Ŝ', 'Ŝ'), - ('Ş', 'Ş'), - ('Š', 'Š'), - ('Ţ', 'Ţ'), - ('Ť', 'Ť'), - ('Ŧ', 'Ŧ'), - ('Ũ', 'Ũ'), - ('Ū', 'Ū'), - ('Ŭ', 'Ŭ'), - ('Ů', 'Ů'), - ('Ű', 'Ű'), - ('Ų', 'Ų'), - ('Ŵ', 'Ŵ'), - ('Ŷ', 'Ŷ'), - ('Ÿ', 'Ź'), - ('Ż', 'Ż'), - ('Ž', 'Ž'), - ('ſ', 'ſ'), - ('Ɓ', 'Ƃ'), - ('Ƅ', 'Ƅ'), - ('Ɔ', 'Ƈ'), - ('Ɖ', 'Ƌ'), - ('Ǝ', 'Ƒ'), - ('Ɠ', 'Ɣ'), - ('Ɩ', 'Ƙ'), - ('Ɯ', 'Ɲ'), - ('Ɵ', 'Ơ'), - ('Ƣ', 'Ƣ'), - ('Ƥ', 'Ƥ'), - ('Ʀ', 'Ƨ'), - ('Ʃ', 'Ʃ'), - ('Ƭ', 'Ƭ'), - ('Ʈ', 'Ư'), - ('Ʊ', 'Ƴ'), - ('Ƶ', 'Ƶ'), - ('Ʒ', 'Ƹ'), - ('Ƽ', 'Ƽ'), - ('DŽ', 'Dž'), - ('LJ', 'Lj'), - ('NJ', 'Nj'), - ('Ǎ', 'Ǎ'), - ('Ǐ', 'Ǐ'), - ('Ǒ', 'Ǒ'), - ('Ǔ', 'Ǔ'), - ('Ǖ', 'Ǖ'), - ('Ǘ', 'Ǘ'), - ('Ǚ', 'Ǚ'), - ('Ǜ', 'Ǜ'), - ('Ǟ', 'Ǟ'), - ('Ǡ', 'Ǡ'), - ('Ǣ', 'Ǣ'), - ('Ǥ', 'Ǥ'), - ('Ǧ', 'Ǧ'), - ('Ǩ', 'Ǩ'), - ('Ǫ', 'Ǫ'), - ('Ǭ', 'Ǭ'), - ('Ǯ', 'Ǯ'), - ('DZ', 'Dz'), - ('Ǵ', 'Ǵ'), - ('Ƕ', 'Ǹ'), - ('Ǻ', 'Ǻ'), - ('Ǽ', 'Ǽ'), - ('Ǿ', 'Ǿ'), - ('Ȁ', 'Ȁ'), - ('Ȃ', 'Ȃ'), - ('Ȅ', 'Ȅ'), - ('Ȇ', 'Ȇ'), - ('Ȉ', 'Ȉ'), - ('Ȋ', 'Ȋ'), - ('Ȍ', 'Ȍ'), - ('Ȏ', 'Ȏ'), - ('Ȑ', 'Ȑ'), - ('Ȓ', 'Ȓ'), - ('Ȕ', 'Ȕ'), - ('Ȗ', 'Ȗ'), - ('Ș', 'Ș'), - ('Ț', 'Ț'), - ('Ȝ', 'Ȝ'), - ('Ȟ', 'Ȟ'), - ('Ƞ', 'Ƞ'), - ('Ȣ', 'Ȣ'), - ('Ȥ', 'Ȥ'), - ('Ȧ', 'Ȧ'), - ('Ȩ', 'Ȩ'), - ('Ȫ', 'Ȫ'), - ('Ȭ', 'Ȭ'), - ('Ȯ', 'Ȯ'), - ('Ȱ', 'Ȱ'), - ('Ȳ', 'Ȳ'), - ('Ⱥ', 'Ȼ'), - ('Ƚ', 'Ⱦ'), - ('Ɂ', 'Ɂ'), - ('Ƀ', 'Ɇ'), - ('Ɉ', 'Ɉ'), - ('Ɋ', 'Ɋ'), - ('Ɍ', 'Ɍ'), - ('Ɏ', 'Ɏ'), - ('\u{345}', '\u{345}'), - ('Ͱ', 'Ͱ'), - ('Ͳ', 'Ͳ'), - ('Ͷ', 'Ͷ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ώ'), - ('Α', 'Ρ'), - ('Σ', 'Ϋ'), - ('ς', 'ς'), - ('Ϗ', 'ϑ'), - ('ϕ', 'ϖ'), - ('Ϙ', 'Ϙ'), - ('Ϛ', 'Ϛ'), - ('Ϝ', 'Ϝ'), - ('Ϟ', 'Ϟ'), - ('Ϡ', 'Ϡ'), - ('Ϣ', 'Ϣ'), - ('Ϥ', 'Ϥ'), - ('Ϧ', 'Ϧ'), - ('Ϩ', 'Ϩ'), - ('Ϫ', 'Ϫ'), - ('Ϭ', 'Ϭ'), - ('Ϯ', 'Ϯ'), - ('ϰ', 'ϱ'), - ('ϴ', 'ϵ'), - ('Ϸ', 'Ϸ'), - ('Ϲ', 'Ϻ'), - ('Ͻ', 'Я'), - ('Ѡ', 'Ѡ'), - ('Ѣ', 'Ѣ'), - ('Ѥ', 'Ѥ'), - ('Ѧ', 'Ѧ'), - ('Ѩ', 'Ѩ'), - ('Ѫ', 'Ѫ'), - ('Ѭ', 'Ѭ'), - ('Ѯ', 'Ѯ'), - ('Ѱ', 'Ѱ'), - ('Ѳ', 'Ѳ'), - ('Ѵ', 'Ѵ'), - ('Ѷ', 'Ѷ'), - ('Ѹ', 'Ѹ'), - ('Ѻ', 'Ѻ'), - ('Ѽ', 'Ѽ'), - ('Ѿ', 'Ѿ'), - ('Ҁ', 'Ҁ'), - ('Ҋ', 'Ҋ'), - ('Ҍ', 'Ҍ'), - ('Ҏ', 'Ҏ'), - ('Ґ', 'Ґ'), - ('Ғ', 'Ғ'), - ('Ҕ', 'Ҕ'), - ('Җ', 'Җ'), - ('Ҙ', 'Ҙ'), - ('Қ', 'Қ'), - ('Ҝ', 'Ҝ'), - ('Ҟ', 'Ҟ'), - ('Ҡ', 'Ҡ'), - ('Ң', 'Ң'), - ('Ҥ', 'Ҥ'), - ('Ҧ', 'Ҧ'), - ('Ҩ', 'Ҩ'), - ('Ҫ', 'Ҫ'), - ('Ҭ', 'Ҭ'), - ('Ү', 'Ү'), - ('Ұ', 'Ұ'), - ('Ҳ', 'Ҳ'), - ('Ҵ', 'Ҵ'), - ('Ҷ', 'Ҷ'), - ('Ҹ', 'Ҹ'), - ('Һ', 'Һ'), - ('Ҽ', 'Ҽ'), - ('Ҿ', 'Ҿ'), - ('Ӏ', 'Ӂ'), - ('Ӄ', 'Ӄ'), - ('Ӆ', 'Ӆ'), - ('Ӈ', 'Ӈ'), - ('Ӊ', 'Ӊ'), - ('Ӌ', 'Ӌ'), - ('Ӎ', 'Ӎ'), - ('Ӑ', 'Ӑ'), - ('Ӓ', 'Ӓ'), - ('Ӕ', 'Ӕ'), - ('Ӗ', 'Ӗ'), - ('Ә', 'Ә'), - ('Ӛ', 'Ӛ'), - ('Ӝ', 'Ӝ'), - ('Ӟ', 'Ӟ'), - ('Ӡ', 'Ӡ'), - ('Ӣ', 'Ӣ'), - ('Ӥ', 'Ӥ'), - ('Ӧ', 'Ӧ'), - ('Ө', 'Ө'), - ('Ӫ', 'Ӫ'), - ('Ӭ', 'Ӭ'), - ('Ӯ', 'Ӯ'), - ('Ӱ', 'Ӱ'), - ('Ӳ', 'Ӳ'), - ('Ӵ', 'Ӵ'), - ('Ӷ', 'Ӷ'), - ('Ӹ', 'Ӹ'), - ('Ӻ', 'Ӻ'), - ('Ӽ', 'Ӽ'), - ('Ӿ', 'Ӿ'), - ('Ԁ', 'Ԁ'), - ('Ԃ', 'Ԃ'), - ('Ԅ', 'Ԅ'), - ('Ԇ', 'Ԇ'), - ('Ԉ', 'Ԉ'), - ('Ԋ', 'Ԋ'), - ('Ԍ', 'Ԍ'), - ('Ԏ', 'Ԏ'), - ('Ԑ', 'Ԑ'), - ('Ԓ', 'Ԓ'), - ('Ԕ', 'Ԕ'), - ('Ԗ', 'Ԗ'), - ('Ԙ', 'Ԙ'), - ('Ԛ', 'Ԛ'), - ('Ԝ', 'Ԝ'), - ('Ԟ', 'Ԟ'), - ('Ԡ', 'Ԡ'), - ('Ԣ', 'Ԣ'), - ('Ԥ', 'Ԥ'), - ('Ԧ', 'Ԧ'), - ('Ԩ', 'Ԩ'), - ('Ԫ', 'Ԫ'), - ('Ԭ', 'Ԭ'), - ('Ԯ', 'Ԯ'), - ('Ա', 'Ֆ'), - ('և', 'և'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ᏸ', 'ᏽ'), - ('ᲀ', 'Ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('Ḁ', 'Ḁ'), - ('Ḃ', 'Ḃ'), - ('Ḅ', 'Ḅ'), - ('Ḇ', 'Ḇ'), - ('Ḉ', 'Ḉ'), - ('Ḋ', 'Ḋ'), - ('Ḍ', 'Ḍ'), - ('Ḏ', 'Ḏ'), - ('Ḑ', 'Ḑ'), - ('Ḓ', 'Ḓ'), - ('Ḕ', 'Ḕ'), - ('Ḗ', 'Ḗ'), - ('Ḙ', 'Ḙ'), - ('Ḛ', 'Ḛ'), - ('Ḝ', 'Ḝ'), - ('Ḟ', 'Ḟ'), - ('Ḡ', 'Ḡ'), - ('Ḣ', 'Ḣ'), - ('Ḥ', 'Ḥ'), - ('Ḧ', 'Ḧ'), - ('Ḩ', 'Ḩ'), - ('Ḫ', 'Ḫ'), - ('Ḭ', 'Ḭ'), - ('Ḯ', 'Ḯ'), - ('Ḱ', 'Ḱ'), - ('Ḳ', 'Ḳ'), - ('Ḵ', 'Ḵ'), - ('Ḷ', 'Ḷ'), - ('Ḹ', 'Ḹ'), - ('Ḻ', 'Ḻ'), - ('Ḽ', 'Ḽ'), - ('Ḿ', 'Ḿ'), - ('Ṁ', 'Ṁ'), - ('Ṃ', 'Ṃ'), - ('Ṅ', 'Ṅ'), - ('Ṇ', 'Ṇ'), - ('Ṉ', 'Ṉ'), - ('Ṋ', 'Ṋ'), - ('Ṍ', 'Ṍ'), - ('Ṏ', 'Ṏ'), - ('Ṑ', 'Ṑ'), - ('Ṓ', 'Ṓ'), - ('Ṕ', 'Ṕ'), - ('Ṗ', 'Ṗ'), - ('Ṙ', 'Ṙ'), - ('Ṛ', 'Ṛ'), - ('Ṝ', 'Ṝ'), - ('Ṟ', 'Ṟ'), - ('Ṡ', 'Ṡ'), - ('Ṣ', 'Ṣ'), - ('Ṥ', 'Ṥ'), - ('Ṧ', 'Ṧ'), - ('Ṩ', 'Ṩ'), - ('Ṫ', 'Ṫ'), - ('Ṭ', 'Ṭ'), - ('Ṯ', 'Ṯ'), - ('Ṱ', 'Ṱ'), - ('Ṳ', 'Ṳ'), - ('Ṵ', 'Ṵ'), - ('Ṷ', 'Ṷ'), - ('Ṹ', 'Ṹ'), - ('Ṻ', 'Ṻ'), - ('Ṽ', 'Ṽ'), - ('Ṿ', 'Ṿ'), - ('Ẁ', 'Ẁ'), - ('Ẃ', 'Ẃ'), - ('Ẅ', 'Ẅ'), - ('Ẇ', 'Ẇ'), - ('Ẉ', 'Ẉ'), - ('Ẋ', 'Ẋ'), - ('Ẍ', 'Ẍ'), - ('Ẏ', 'Ẏ'), - ('Ẑ', 'Ẑ'), - ('Ẓ', 'Ẓ'), - ('Ẕ', 'Ẕ'), - ('ẚ', 'ẛ'), - ('ẞ', 'ẞ'), - ('Ạ', 'Ạ'), - ('Ả', 'Ả'), - ('Ấ', 'Ấ'), - ('Ầ', 'Ầ'), - ('Ẩ', 'Ẩ'), - ('Ẫ', 'Ẫ'), - ('Ậ', 'Ậ'), - ('Ắ', 'Ắ'), - ('Ằ', 'Ằ'), - ('Ẳ', 'Ẳ'), - ('Ẵ', 'Ẵ'), - ('Ặ', 'Ặ'), - ('Ẹ', 'Ẹ'), - ('Ẻ', 'Ẻ'), - ('Ẽ', 'Ẽ'), - ('Ế', 'Ế'), - ('Ề', 'Ề'), - ('Ể', 'Ể'), - ('Ễ', 'Ễ'), - ('Ệ', 'Ệ'), - ('Ỉ', 'Ỉ'), - ('Ị', 'Ị'), - ('Ọ', 'Ọ'), - ('Ỏ', 'Ỏ'), - ('Ố', 'Ố'), - ('Ồ', 'Ồ'), - ('Ổ', 'Ổ'), - ('Ỗ', 'Ỗ'), - ('Ộ', 'Ộ'), - ('Ớ', 'Ớ'), - ('Ờ', 'Ờ'), - ('Ở', 'Ở'), - ('Ỡ', 'Ỡ'), - ('Ợ', 'Ợ'), - ('Ụ', 'Ụ'), - ('Ủ', 'Ủ'), - ('Ứ', 'Ứ'), - ('Ừ', 'Ừ'), - ('Ử', 'Ử'), - ('Ữ', 'Ữ'), - ('Ự', 'Ự'), - ('Ỳ', 'Ỳ'), - ('Ỵ', 'Ỵ'), - ('Ỷ', 'Ỷ'), - ('Ỹ', 'Ỹ'), - ('Ỻ', 'Ỻ'), - ('Ỽ', 'Ỽ'), - ('Ỿ', 'Ỿ'), - ('Ἀ', 'Ἇ'), - ('Ἐ', 'Ἕ'), - ('Ἠ', 'Ἧ'), - ('Ἰ', 'Ἷ'), - ('Ὀ', 'Ὅ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'Ὗ'), - ('Ὠ', 'Ὧ'), - ('ᾀ', 'ᾯ'), - ('ᾲ', 'ᾴ'), - ('ᾷ', 'ᾼ'), - ('ῂ', 'ῄ'), - ('ῇ', 'ῌ'), - ('Ῐ', 'Ί'), - ('Ῠ', 'Ῥ'), - ('ῲ', 'ῴ'), - ('ῷ', 'ῼ'), - ('Ω', 'Ω'), - ('K', 'Å'), - ('Ⅎ', 'Ⅎ'), - ('Ⅰ', 'Ⅿ'), - ('Ↄ', 'Ↄ'), - ('Ⓐ', 'Ⓩ'), - ('Ⰰ', 'Ⱟ'), - ('Ⱡ', 'Ⱡ'), - ('Ɫ', 'Ɽ'), - ('Ⱨ', 'Ⱨ'), - ('Ⱪ', 'Ⱪ'), - ('Ⱬ', 'Ⱬ'), - ('Ɑ', 'Ɒ'), - ('Ⱳ', 'Ⱳ'), - ('Ⱶ', 'Ⱶ'), - ('Ȿ', 'Ⲁ'), - ('Ⲃ', 'Ⲃ'), - ('Ⲅ', 'Ⲅ'), - ('Ⲇ', 'Ⲇ'), - ('Ⲉ', 'Ⲉ'), - ('Ⲋ', 'Ⲋ'), - ('Ⲍ', 'Ⲍ'), - ('Ⲏ', 'Ⲏ'), - ('Ⲑ', 'Ⲑ'), - ('Ⲓ', 'Ⲓ'), - ('Ⲕ', 'Ⲕ'), - ('Ⲗ', 'Ⲗ'), - ('Ⲙ', 'Ⲙ'), - ('Ⲛ', 'Ⲛ'), - ('Ⲝ', 'Ⲝ'), - ('Ⲟ', 'Ⲟ'), - ('Ⲡ', 'Ⲡ'), - ('Ⲣ', 'Ⲣ'), - ('Ⲥ', 'Ⲥ'), - ('Ⲧ', 'Ⲧ'), - ('Ⲩ', 'Ⲩ'), - ('Ⲫ', 'Ⲫ'), - ('Ⲭ', 'Ⲭ'), - ('Ⲯ', 'Ⲯ'), - ('Ⲱ', 'Ⲱ'), - ('Ⲳ', 'Ⲳ'), - ('Ⲵ', 'Ⲵ'), - ('Ⲷ', 'Ⲷ'), - ('Ⲹ', 'Ⲹ'), - ('Ⲻ', 'Ⲻ'), - ('Ⲽ', 'Ⲽ'), - ('Ⲿ', 'Ⲿ'), - ('Ⳁ', 'Ⳁ'), - ('Ⳃ', 'Ⳃ'), - ('Ⳅ', 'Ⳅ'), - ('Ⳇ', 'Ⳇ'), - ('Ⳉ', 'Ⳉ'), - ('Ⳋ', 'Ⳋ'), - ('Ⳍ', 'Ⳍ'), - ('Ⳏ', 'Ⳏ'), - ('Ⳑ', 'Ⳑ'), - ('Ⳓ', 'Ⳓ'), - ('Ⳕ', 'Ⳕ'), - ('Ⳗ', 'Ⳗ'), - ('Ⳙ', 'Ⳙ'), - ('Ⳛ', 'Ⳛ'), - ('Ⳝ', 'Ⳝ'), - ('Ⳟ', 'Ⳟ'), - ('Ⳡ', 'Ⳡ'), - ('Ⳣ', 'Ⳣ'), - ('Ⳬ', 'Ⳬ'), - ('Ⳮ', 'Ⳮ'), - ('Ⳳ', 'Ⳳ'), - ('Ꙁ', 'Ꙁ'), - ('Ꙃ', 'Ꙃ'), - ('Ꙅ', 'Ꙅ'), - ('Ꙇ', 'Ꙇ'), - ('Ꙉ', 'Ꙉ'), - ('Ꙋ', 'Ꙋ'), - ('Ꙍ', 'Ꙍ'), - ('Ꙏ', 'Ꙏ'), - ('Ꙑ', 'Ꙑ'), - ('Ꙓ', 'Ꙓ'), - ('Ꙕ', 'Ꙕ'), - ('Ꙗ', 'Ꙗ'), - ('Ꙙ', 'Ꙙ'), - ('Ꙛ', 'Ꙛ'), - ('Ꙝ', 'Ꙝ'), - ('Ꙟ', 'Ꙟ'), - ('Ꙡ', 'Ꙡ'), - ('Ꙣ', 'Ꙣ'), - ('Ꙥ', 'Ꙥ'), - ('Ꙧ', 'Ꙧ'), - ('Ꙩ', 'Ꙩ'), - ('Ꙫ', 'Ꙫ'), - ('Ꙭ', 'Ꙭ'), - ('Ꚁ', 'Ꚁ'), - ('Ꚃ', 'Ꚃ'), - ('Ꚅ', 'Ꚅ'), - ('Ꚇ', 'Ꚇ'), - ('Ꚉ', 'Ꚉ'), - ('Ꚋ', 'Ꚋ'), - ('Ꚍ', 'Ꚍ'), - ('Ꚏ', 'Ꚏ'), - ('Ꚑ', 'Ꚑ'), - ('Ꚓ', 'Ꚓ'), - ('Ꚕ', 'Ꚕ'), - ('Ꚗ', 'Ꚗ'), - ('Ꚙ', 'Ꚙ'), - ('Ꚛ', 'Ꚛ'), - ('Ꜣ', 'Ꜣ'), - ('Ꜥ', 'Ꜥ'), - ('Ꜧ', 'Ꜧ'), - ('Ꜩ', 'Ꜩ'), - ('Ꜫ', 'Ꜫ'), - ('Ꜭ', 'Ꜭ'), - ('Ꜯ', 'Ꜯ'), - ('Ꜳ', 'Ꜳ'), - ('Ꜵ', 'Ꜵ'), - ('Ꜷ', 'Ꜷ'), - ('Ꜹ', 'Ꜹ'), - ('Ꜻ', 'Ꜻ'), - ('Ꜽ', 'Ꜽ'), - ('Ꜿ', 'Ꜿ'), - ('Ꝁ', 'Ꝁ'), - ('Ꝃ', 'Ꝃ'), - ('Ꝅ', 'Ꝅ'), - ('Ꝇ', 'Ꝇ'), - ('Ꝉ', 'Ꝉ'), - ('Ꝋ', 'Ꝋ'), - ('Ꝍ', 'Ꝍ'), - ('Ꝏ', 'Ꝏ'), - ('Ꝑ', 'Ꝑ'), - ('Ꝓ', 'Ꝓ'), - ('Ꝕ', 'Ꝕ'), - ('Ꝗ', 'Ꝗ'), - ('Ꝙ', 'Ꝙ'), - ('Ꝛ', 'Ꝛ'), - ('Ꝝ', 'Ꝝ'), - ('Ꝟ', 'Ꝟ'), - ('Ꝡ', 'Ꝡ'), - ('Ꝣ', 'Ꝣ'), - ('Ꝥ', 'Ꝥ'), - ('Ꝧ', 'Ꝧ'), - ('Ꝩ', 'Ꝩ'), - ('Ꝫ', 'Ꝫ'), - ('Ꝭ', 'Ꝭ'), - ('Ꝯ', 'Ꝯ'), - ('Ꝺ', 'Ꝺ'), - ('Ꝼ', 'Ꝼ'), - ('Ᵹ', 'Ꝿ'), - ('Ꞁ', 'Ꞁ'), - ('Ꞃ', 'Ꞃ'), - ('Ꞅ', 'Ꞅ'), - ('Ꞇ', 'Ꞇ'), - ('Ꞌ', 'Ꞌ'), - ('Ɥ', 'Ɥ'), - ('Ꞑ', 'Ꞑ'), - ('Ꞓ', 'Ꞓ'), - ('Ꞗ', 'Ꞗ'), - ('Ꞙ', 'Ꞙ'), - ('Ꞛ', 'Ꞛ'), - ('Ꞝ', 'Ꞝ'), - ('Ꞟ', 'Ꞟ'), - ('Ꞡ', 'Ꞡ'), - ('Ꞣ', 'Ꞣ'), - ('Ꞥ', 'Ꞥ'), - ('Ꞧ', 'Ꞧ'), - ('Ꞩ', 'Ꞩ'), - ('Ɦ', 'Ɪ'), - ('Ʞ', 'Ꞵ'), - ('Ꞷ', 'Ꞷ'), - ('Ꞹ', 'Ꞹ'), - ('Ꞻ', 'Ꞻ'), - ('Ꞽ', 'Ꞽ'), - ('Ꞿ', 'Ꞿ'), - ('Ꟁ', 'Ꟁ'), - ('Ꟃ', 'Ꟃ'), - ('Ꞔ', 'Ꟈ'), - ('Ꟊ', 'Ꟊ'), - ('Ɤ', 'Ꟍ'), - ('Ꟑ', 'Ꟑ'), - ('Ꟗ', 'Ꟗ'), - ('Ꟙ', 'Ꟙ'), - ('Ꟛ', 'Ꟛ'), - ('Ƛ', 'Ƛ'), - ('Ꟶ', 'Ꟶ'), - ('ꭰ', 'ꮿ'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('A', 'Z'), - ('𐐀', '𐐧'), - ('𐒰', '𐓓'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐲀', '𐲲'), - ('𐵐', '𐵥'), - ('𑢠', '𑢿'), - ('𖹀', '𖹟'), - ('𞤀', '𞤡'), -]; - -pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ - ('A', 'Z'), - ('a', 'z'), - ('µ', 'µ'), - ('À', 'Ö'), - ('Ø', 'ö'), - ('ø', 'ķ'), - ('Ĺ', 'ƌ'), - ('Ǝ', 'Ʃ'), - ('Ƭ', 'ƹ'), - ('Ƽ', 'ƽ'), - ('ƿ', 'ƿ'), - ('DŽ', 'Ƞ'), - ('Ȣ', 'ȳ'), - ('Ⱥ', 'ɔ'), - ('ɖ', 'ɗ'), - ('ə', 'ə'), - ('ɛ', 'ɜ'), - ('ɠ', 'ɡ'), - ('ɣ', 'ɦ'), - ('ɨ', 'ɬ'), - ('ɯ', 'ɯ'), - ('ɱ', 'ɲ'), - ('ɵ', 'ɵ'), - ('ɽ', 'ɽ'), - ('ʀ', 'ʀ'), - ('ʂ', 'ʃ'), - ('ʇ', 'ʌ'), - ('ʒ', 'ʒ'), - ('ʝ', 'ʞ'), - ('\u{345}', '\u{345}'), - ('Ͱ', 'ͳ'), - ('Ͷ', 'ͷ'), - ('ͻ', 'ͽ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ϑ'), - ('ϕ', 'ϵ'), - ('Ϸ', 'ϻ'), - ('Ͻ', 'ҁ'), - ('Ҋ', 'ԯ'), - ('Ա', 'Ֆ'), - ('ա', 'և'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ჺ'), - ('ჽ', 'ჿ'), - ('Ꭰ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('ᵹ', 'ᵹ'), - ('ᵽ', 'ᵽ'), - ('ᶎ', 'ᶎ'), - ('Ḁ', 'ẛ'), - ('ẞ', 'ẞ'), - ('Ạ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ᾼ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῌ'), - ('ῐ', 'ΐ'), - ('ῖ', 'Ί'), - ('ῠ', 'Ῥ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῼ'), - ('Ω', 'Ω'), - ('K', 'Å'), - ('Ⅎ', 'Ⅎ'), - ('ⅎ', 'ⅎ'), - ('Ⅰ', 'ⅿ'), - ('Ↄ', 'ↄ'), - ('Ⓐ', 'ⓩ'), - ('Ⰰ', 'Ɒ'), - ('Ⱳ', 'ⱳ'), - ('Ⱶ', 'ⱶ'), - ('Ȿ', 'ⳣ'), - ('Ⳬ', 'ⳮ'), - ('Ⳳ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('Ꙁ', 'ꙭ'), - ('Ꚁ', 'ꚛ'), - ('Ꜣ', 'ꜯ'), - ('Ꜳ', 'ꝯ'), - ('Ꝺ', 'ꞇ'), - ('Ꞌ', 'Ɥ'), - ('Ꞑ', 'ꞔ'), - ('Ꞗ', 'Ɪ'), - ('Ʞ', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('Ꟗ', 'Ƛ'), - ('Ꟶ', 'ꟶ'), - ('ꭓ', 'ꭓ'), - ('ꭰ', 'ꮿ'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('A', 'Z'), - ('a', 'z'), - ('𐐀', '𐑏'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐵐', '𐵥'), - ('𐵰', '𐶅'), - ('𑢠', '𑣟'), - ('𖹀', '𖹿'), - ('𞤀', '𞥃'), -]; - -pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[ - ('A', 'Z'), - ('À', 'Ö'), - ('Ø', 'Þ'), - ('Ā', 'Ā'), - ('Ă', 'Ă'), - ('Ą', 'Ą'), - ('Ć', 'Ć'), - ('Ĉ', 'Ĉ'), - ('Ċ', 'Ċ'), - ('Č', 'Č'), - ('Ď', 'Ď'), - ('Đ', 'Đ'), - ('Ē', 'Ē'), - ('Ĕ', 'Ĕ'), - ('Ė', 'Ė'), - ('Ę', 'Ę'), - ('Ě', 'Ě'), - ('Ĝ', 'Ĝ'), - ('Ğ', 'Ğ'), - ('Ġ', 'Ġ'), - ('Ģ', 'Ģ'), - ('Ĥ', 'Ĥ'), - ('Ħ', 'Ħ'), - ('Ĩ', 'Ĩ'), - ('Ī', 'Ī'), - ('Ĭ', 'Ĭ'), - ('Į', 'Į'), - ('İ', 'İ'), - ('IJ', 'IJ'), - ('Ĵ', 'Ĵ'), - ('Ķ', 'Ķ'), - ('Ĺ', 'Ĺ'), - ('Ļ', 'Ļ'), - ('Ľ', 'Ľ'), - ('Ŀ', 'Ŀ'), - ('Ł', 'Ł'), - ('Ń', 'Ń'), - ('Ņ', 'Ņ'), - ('Ň', 'Ň'), - ('Ŋ', 'Ŋ'), - ('Ō', 'Ō'), - ('Ŏ', 'Ŏ'), - ('Ő', 'Ő'), - ('Œ', 'Œ'), - ('Ŕ', 'Ŕ'), - ('Ŗ', 'Ŗ'), - ('Ř', 'Ř'), - ('Ś', 'Ś'), - ('Ŝ', 'Ŝ'), - ('Ş', 'Ş'), - ('Š', 'Š'), - ('Ţ', 'Ţ'), - ('Ť', 'Ť'), - ('Ŧ', 'Ŧ'), - ('Ũ', 'Ũ'), - ('Ū', 'Ū'), - ('Ŭ', 'Ŭ'), - ('Ů', 'Ů'), - ('Ű', 'Ű'), - ('Ų', 'Ų'), - ('Ŵ', 'Ŵ'), - ('Ŷ', 'Ŷ'), - ('Ÿ', 'Ź'), - ('Ż', 'Ż'), - ('Ž', 'Ž'), - ('Ɓ', 'Ƃ'), - ('Ƅ', 'Ƅ'), - ('Ɔ', 'Ƈ'), - ('Ɖ', 'Ƌ'), - ('Ǝ', 'Ƒ'), - ('Ɠ', 'Ɣ'), - ('Ɩ', 'Ƙ'), - ('Ɯ', 'Ɲ'), - ('Ɵ', 'Ơ'), - ('Ƣ', 'Ƣ'), - ('Ƥ', 'Ƥ'), - ('Ʀ', 'Ƨ'), - ('Ʃ', 'Ʃ'), - ('Ƭ', 'Ƭ'), - ('Ʈ', 'Ư'), - ('Ʊ', 'Ƴ'), - ('Ƶ', 'Ƶ'), - ('Ʒ', 'Ƹ'), - ('Ƽ', 'Ƽ'), - ('DŽ', 'Dž'), - ('LJ', 'Lj'), - ('NJ', 'Nj'), - ('Ǎ', 'Ǎ'), - ('Ǐ', 'Ǐ'), - ('Ǒ', 'Ǒ'), - ('Ǔ', 'Ǔ'), - ('Ǖ', 'Ǖ'), - ('Ǘ', 'Ǘ'), - ('Ǚ', 'Ǚ'), - ('Ǜ', 'Ǜ'), - ('Ǟ', 'Ǟ'), - ('Ǡ', 'Ǡ'), - ('Ǣ', 'Ǣ'), - ('Ǥ', 'Ǥ'), - ('Ǧ', 'Ǧ'), - ('Ǩ', 'Ǩ'), - ('Ǫ', 'Ǫ'), - ('Ǭ', 'Ǭ'), - ('Ǯ', 'Ǯ'), - ('DZ', 'Dz'), - ('Ǵ', 'Ǵ'), - ('Ƕ', 'Ǹ'), - ('Ǻ', 'Ǻ'), - ('Ǽ', 'Ǽ'), - ('Ǿ', 'Ǿ'), - ('Ȁ', 'Ȁ'), - ('Ȃ', 'Ȃ'), - ('Ȅ', 'Ȅ'), - ('Ȇ', 'Ȇ'), - ('Ȉ', 'Ȉ'), - ('Ȋ', 'Ȋ'), - ('Ȍ', 'Ȍ'), - ('Ȏ', 'Ȏ'), - ('Ȑ', 'Ȑ'), - ('Ȓ', 'Ȓ'), - ('Ȕ', 'Ȕ'), - ('Ȗ', 'Ȗ'), - ('Ș', 'Ș'), - ('Ț', 'Ț'), - ('Ȝ', 'Ȝ'), - ('Ȟ', 'Ȟ'), - ('Ƞ', 'Ƞ'), - ('Ȣ', 'Ȣ'), - ('Ȥ', 'Ȥ'), - ('Ȧ', 'Ȧ'), - ('Ȩ', 'Ȩ'), - ('Ȫ', 'Ȫ'), - ('Ȭ', 'Ȭ'), - ('Ȯ', 'Ȯ'), - ('Ȱ', 'Ȱ'), - ('Ȳ', 'Ȳ'), - ('Ⱥ', 'Ȼ'), - ('Ƚ', 'Ⱦ'), - ('Ɂ', 'Ɂ'), - ('Ƀ', 'Ɇ'), - ('Ɉ', 'Ɉ'), - ('Ɋ', 'Ɋ'), - ('Ɍ', 'Ɍ'), - ('Ɏ', 'Ɏ'), - ('Ͱ', 'Ͱ'), - ('Ͳ', 'Ͳ'), - ('Ͷ', 'Ͷ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ώ'), - ('Α', 'Ρ'), - ('Σ', 'Ϋ'), - ('Ϗ', 'Ϗ'), - ('Ϙ', 'Ϙ'), - ('Ϛ', 'Ϛ'), - ('Ϝ', 'Ϝ'), - ('Ϟ', 'Ϟ'), - ('Ϡ', 'Ϡ'), - ('Ϣ', 'Ϣ'), - ('Ϥ', 'Ϥ'), - ('Ϧ', 'Ϧ'), - ('Ϩ', 'Ϩ'), - ('Ϫ', 'Ϫ'), - ('Ϭ', 'Ϭ'), - ('Ϯ', 'Ϯ'), - ('ϴ', 'ϴ'), - ('Ϸ', 'Ϸ'), - ('Ϲ', 'Ϻ'), - ('Ͻ', 'Я'), - ('Ѡ', 'Ѡ'), - ('Ѣ', 'Ѣ'), - ('Ѥ', 'Ѥ'), - ('Ѧ', 'Ѧ'), - ('Ѩ', 'Ѩ'), - ('Ѫ', 'Ѫ'), - ('Ѭ', 'Ѭ'), - ('Ѯ', 'Ѯ'), - ('Ѱ', 'Ѱ'), - ('Ѳ', 'Ѳ'), - ('Ѵ', 'Ѵ'), - ('Ѷ', 'Ѷ'), - ('Ѹ', 'Ѹ'), - ('Ѻ', 'Ѻ'), - ('Ѽ', 'Ѽ'), - ('Ѿ', 'Ѿ'), - ('Ҁ', 'Ҁ'), - ('Ҋ', 'Ҋ'), - ('Ҍ', 'Ҍ'), - ('Ҏ', 'Ҏ'), - ('Ґ', 'Ґ'), - ('Ғ', 'Ғ'), - ('Ҕ', 'Ҕ'), - ('Җ', 'Җ'), - ('Ҙ', 'Ҙ'), - ('Қ', 'Қ'), - ('Ҝ', 'Ҝ'), - ('Ҟ', 'Ҟ'), - ('Ҡ', 'Ҡ'), - ('Ң', 'Ң'), - ('Ҥ', 'Ҥ'), - ('Ҧ', 'Ҧ'), - ('Ҩ', 'Ҩ'), - ('Ҫ', 'Ҫ'), - ('Ҭ', 'Ҭ'), - ('Ү', 'Ү'), - ('Ұ', 'Ұ'), - ('Ҳ', 'Ҳ'), - ('Ҵ', 'Ҵ'), - ('Ҷ', 'Ҷ'), - ('Ҹ', 'Ҹ'), - ('Һ', 'Һ'), - ('Ҽ', 'Ҽ'), - ('Ҿ', 'Ҿ'), - ('Ӏ', 'Ӂ'), - ('Ӄ', 'Ӄ'), - ('Ӆ', 'Ӆ'), - ('Ӈ', 'Ӈ'), - ('Ӊ', 'Ӊ'), - ('Ӌ', 'Ӌ'), - ('Ӎ', 'Ӎ'), - ('Ӑ', 'Ӑ'), - ('Ӓ', 'Ӓ'), - ('Ӕ', 'Ӕ'), - ('Ӗ', 'Ӗ'), - ('Ә', 'Ә'), - ('Ӛ', 'Ӛ'), - ('Ӝ', 'Ӝ'), - ('Ӟ', 'Ӟ'), - ('Ӡ', 'Ӡ'), - ('Ӣ', 'Ӣ'), - ('Ӥ', 'Ӥ'), - ('Ӧ', 'Ӧ'), - ('Ө', 'Ө'), - ('Ӫ', 'Ӫ'), - ('Ӭ', 'Ӭ'), - ('Ӯ', 'Ӯ'), - ('Ӱ', 'Ӱ'), - ('Ӳ', 'Ӳ'), - ('Ӵ', 'Ӵ'), - ('Ӷ', 'Ӷ'), - ('Ӹ', 'Ӹ'), - ('Ӻ', 'Ӻ'), - ('Ӽ', 'Ӽ'), - ('Ӿ', 'Ӿ'), - ('Ԁ', 'Ԁ'), - ('Ԃ', 'Ԃ'), - ('Ԅ', 'Ԅ'), - ('Ԇ', 'Ԇ'), - ('Ԉ', 'Ԉ'), - ('Ԋ', 'Ԋ'), - ('Ԍ', 'Ԍ'), - ('Ԏ', 'Ԏ'), - ('Ԑ', 'Ԑ'), - ('Ԓ', 'Ԓ'), - ('Ԕ', 'Ԕ'), - ('Ԗ', 'Ԗ'), - ('Ԙ', 'Ԙ'), - ('Ԛ', 'Ԛ'), - ('Ԝ', 'Ԝ'), - ('Ԟ', 'Ԟ'), - ('Ԡ', 'Ԡ'), - ('Ԣ', 'Ԣ'), - ('Ԥ', 'Ԥ'), - ('Ԧ', 'Ԧ'), - ('Ԩ', 'Ԩ'), - ('Ԫ', 'Ԫ'), - ('Ԭ', 'Ԭ'), - ('Ԯ', 'Ԯ'), - ('Ա', 'Ֆ'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('Ꭰ', 'Ᏽ'), - ('Ᲊ', 'Ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('Ḁ', 'Ḁ'), - ('Ḃ', 'Ḃ'), - ('Ḅ', 'Ḅ'), - ('Ḇ', 'Ḇ'), - ('Ḉ', 'Ḉ'), - ('Ḋ', 'Ḋ'), - ('Ḍ', 'Ḍ'), - ('Ḏ', 'Ḏ'), - ('Ḑ', 'Ḑ'), - ('Ḓ', 'Ḓ'), - ('Ḕ', 'Ḕ'), - ('Ḗ', 'Ḗ'), - ('Ḙ', 'Ḙ'), - ('Ḛ', 'Ḛ'), - ('Ḝ', 'Ḝ'), - ('Ḟ', 'Ḟ'), - ('Ḡ', 'Ḡ'), - ('Ḣ', 'Ḣ'), - ('Ḥ', 'Ḥ'), - ('Ḧ', 'Ḧ'), - ('Ḩ', 'Ḩ'), - ('Ḫ', 'Ḫ'), - ('Ḭ', 'Ḭ'), - ('Ḯ', 'Ḯ'), - ('Ḱ', 'Ḱ'), - ('Ḳ', 'Ḳ'), - ('Ḵ', 'Ḵ'), - ('Ḷ', 'Ḷ'), - ('Ḹ', 'Ḹ'), - ('Ḻ', 'Ḻ'), - ('Ḽ', 'Ḽ'), - ('Ḿ', 'Ḿ'), - ('Ṁ', 'Ṁ'), - ('Ṃ', 'Ṃ'), - ('Ṅ', 'Ṅ'), - ('Ṇ', 'Ṇ'), - ('Ṉ', 'Ṉ'), - ('Ṋ', 'Ṋ'), - ('Ṍ', 'Ṍ'), - ('Ṏ', 'Ṏ'), - ('Ṑ', 'Ṑ'), - ('Ṓ', 'Ṓ'), - ('Ṕ', 'Ṕ'), - ('Ṗ', 'Ṗ'), - ('Ṙ', 'Ṙ'), - ('Ṛ', 'Ṛ'), - ('Ṝ', 'Ṝ'), - ('Ṟ', 'Ṟ'), - ('Ṡ', 'Ṡ'), - ('Ṣ', 'Ṣ'), - ('Ṥ', 'Ṥ'), - ('Ṧ', 'Ṧ'), - ('Ṩ', 'Ṩ'), - ('Ṫ', 'Ṫ'), - ('Ṭ', 'Ṭ'), - ('Ṯ', 'Ṯ'), - ('Ṱ', 'Ṱ'), - ('Ṳ', 'Ṳ'), - ('Ṵ', 'Ṵ'), - ('Ṷ', 'Ṷ'), - ('Ṹ', 'Ṹ'), - ('Ṻ', 'Ṻ'), - ('Ṽ', 'Ṽ'), - ('Ṿ', 'Ṿ'), - ('Ẁ', 'Ẁ'), - ('Ẃ', 'Ẃ'), - ('Ẅ', 'Ẅ'), - ('Ẇ', 'Ẇ'), - ('Ẉ', 'Ẉ'), - ('Ẋ', 'Ẋ'), - ('Ẍ', 'Ẍ'), - ('Ẏ', 'Ẏ'), - ('Ẑ', 'Ẑ'), - ('Ẓ', 'Ẓ'), - ('Ẕ', 'Ẕ'), - ('ẞ', 'ẞ'), - ('Ạ', 'Ạ'), - ('Ả', 'Ả'), - ('Ấ', 'Ấ'), - ('Ầ', 'Ầ'), - ('Ẩ', 'Ẩ'), - ('Ẫ', 'Ẫ'), - ('Ậ', 'Ậ'), - ('Ắ', 'Ắ'), - ('Ằ', 'Ằ'), - ('Ẳ', 'Ẳ'), - ('Ẵ', 'Ẵ'), - ('Ặ', 'Ặ'), - ('Ẹ', 'Ẹ'), - ('Ẻ', 'Ẻ'), - ('Ẽ', 'Ẽ'), - ('Ế', 'Ế'), - ('Ề', 'Ề'), - ('Ể', 'Ể'), - ('Ễ', 'Ễ'), - ('Ệ', 'Ệ'), - ('Ỉ', 'Ỉ'), - ('Ị', 'Ị'), - ('Ọ', 'Ọ'), - ('Ỏ', 'Ỏ'), - ('Ố', 'Ố'), - ('Ồ', 'Ồ'), - ('Ổ', 'Ổ'), - ('Ỗ', 'Ỗ'), - ('Ộ', 'Ộ'), - ('Ớ', 'Ớ'), - ('Ờ', 'Ờ'), - ('Ở', 'Ở'), - ('Ỡ', 'Ỡ'), - ('Ợ', 'Ợ'), - ('Ụ', 'Ụ'), - ('Ủ', 'Ủ'), - ('Ứ', 'Ứ'), - ('Ừ', 'Ừ'), - ('Ử', 'Ử'), - ('Ữ', 'Ữ'), - ('Ự', 'Ự'), - ('Ỳ', 'Ỳ'), - ('Ỵ', 'Ỵ'), - ('Ỷ', 'Ỷ'), - ('Ỹ', 'Ỹ'), - ('Ỻ', 'Ỻ'), - ('Ỽ', 'Ỽ'), - ('Ỿ', 'Ỿ'), - ('Ἀ', 'Ἇ'), - ('Ἐ', 'Ἕ'), - ('Ἠ', 'Ἧ'), - ('Ἰ', 'Ἷ'), - ('Ὀ', 'Ὅ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'Ὗ'), - ('Ὠ', 'Ὧ'), - ('ᾈ', 'ᾏ'), - ('ᾘ', 'ᾟ'), - ('ᾨ', 'ᾯ'), - ('Ᾰ', 'ᾼ'), - ('Ὲ', 'ῌ'), - ('Ῐ', 'Ί'), - ('Ῠ', 'Ῥ'), - ('Ὸ', 'ῼ'), - ('Ω', 'Ω'), - ('K', 'Å'), - ('Ⅎ', 'Ⅎ'), - ('Ⅰ', 'Ⅿ'), - ('Ↄ', 'Ↄ'), - ('Ⓐ', 'Ⓩ'), - ('Ⰰ', 'Ⱟ'), - ('Ⱡ', 'Ⱡ'), - ('Ɫ', 'Ɽ'), - ('Ⱨ', 'Ⱨ'), - ('Ⱪ', 'Ⱪ'), - ('Ⱬ', 'Ⱬ'), - ('Ɑ', 'Ɒ'), - ('Ⱳ', 'Ⱳ'), - ('Ⱶ', 'Ⱶ'), - ('Ȿ', 'Ⲁ'), - ('Ⲃ', 'Ⲃ'), - ('Ⲅ', 'Ⲅ'), - ('Ⲇ', 'Ⲇ'), - ('Ⲉ', 'Ⲉ'), - ('Ⲋ', 'Ⲋ'), - ('Ⲍ', 'Ⲍ'), - ('Ⲏ', 'Ⲏ'), - ('Ⲑ', 'Ⲑ'), - ('Ⲓ', 'Ⲓ'), - ('Ⲕ', 'Ⲕ'), - ('Ⲗ', 'Ⲗ'), - ('Ⲙ', 'Ⲙ'), - ('Ⲛ', 'Ⲛ'), - ('Ⲝ', 'Ⲝ'), - ('Ⲟ', 'Ⲟ'), - ('Ⲡ', 'Ⲡ'), - ('Ⲣ', 'Ⲣ'), - ('Ⲥ', 'Ⲥ'), - ('Ⲧ', 'Ⲧ'), - ('Ⲩ', 'Ⲩ'), - ('Ⲫ', 'Ⲫ'), - ('Ⲭ', 'Ⲭ'), - ('Ⲯ', 'Ⲯ'), - ('Ⲱ', 'Ⲱ'), - ('Ⲳ', 'Ⲳ'), - ('Ⲵ', 'Ⲵ'), - ('Ⲷ', 'Ⲷ'), - ('Ⲹ', 'Ⲹ'), - ('Ⲻ', 'Ⲻ'), - ('Ⲽ', 'Ⲽ'), - ('Ⲿ', 'Ⲿ'), - ('Ⳁ', 'Ⳁ'), - ('Ⳃ', 'Ⳃ'), - ('Ⳅ', 'Ⳅ'), - ('Ⳇ', 'Ⳇ'), - ('Ⳉ', 'Ⳉ'), - ('Ⳋ', 'Ⳋ'), - ('Ⳍ', 'Ⳍ'), - ('Ⳏ', 'Ⳏ'), - ('Ⳑ', 'Ⳑ'), - ('Ⳓ', 'Ⳓ'), - ('Ⳕ', 'Ⳕ'), - ('Ⳗ', 'Ⳗ'), - ('Ⳙ', 'Ⳙ'), - ('Ⳛ', 'Ⳛ'), - ('Ⳝ', 'Ⳝ'), - ('Ⳟ', 'Ⳟ'), - ('Ⳡ', 'Ⳡ'), - ('Ⳣ', 'Ⳣ'), - ('Ⳬ', 'Ⳬ'), - ('Ⳮ', 'Ⳮ'), - ('Ⳳ', 'Ⳳ'), - ('Ꙁ', 'Ꙁ'), - ('Ꙃ', 'Ꙃ'), - ('Ꙅ', 'Ꙅ'), - ('Ꙇ', 'Ꙇ'), - ('Ꙉ', 'Ꙉ'), - ('Ꙋ', 'Ꙋ'), - ('Ꙍ', 'Ꙍ'), - ('Ꙏ', 'Ꙏ'), - ('Ꙑ', 'Ꙑ'), - ('Ꙓ', 'Ꙓ'), - ('Ꙕ', 'Ꙕ'), - ('Ꙗ', 'Ꙗ'), - ('Ꙙ', 'Ꙙ'), - ('Ꙛ', 'Ꙛ'), - ('Ꙝ', 'Ꙝ'), - ('Ꙟ', 'Ꙟ'), - ('Ꙡ', 'Ꙡ'), - ('Ꙣ', 'Ꙣ'), - ('Ꙥ', 'Ꙥ'), - ('Ꙧ', 'Ꙧ'), - ('Ꙩ', 'Ꙩ'), - ('Ꙫ', 'Ꙫ'), - ('Ꙭ', 'Ꙭ'), - ('Ꚁ', 'Ꚁ'), - ('Ꚃ', 'Ꚃ'), - ('Ꚅ', 'Ꚅ'), - ('Ꚇ', 'Ꚇ'), - ('Ꚉ', 'Ꚉ'), - ('Ꚋ', 'Ꚋ'), - ('Ꚍ', 'Ꚍ'), - ('Ꚏ', 'Ꚏ'), - ('Ꚑ', 'Ꚑ'), - ('Ꚓ', 'Ꚓ'), - ('Ꚕ', 'Ꚕ'), - ('Ꚗ', 'Ꚗ'), - ('Ꚙ', 'Ꚙ'), - ('Ꚛ', 'Ꚛ'), - ('Ꜣ', 'Ꜣ'), - ('Ꜥ', 'Ꜥ'), - ('Ꜧ', 'Ꜧ'), - ('Ꜩ', 'Ꜩ'), - ('Ꜫ', 'Ꜫ'), - ('Ꜭ', 'Ꜭ'), - ('Ꜯ', 'Ꜯ'), - ('Ꜳ', 'Ꜳ'), - ('Ꜵ', 'Ꜵ'), - ('Ꜷ', 'Ꜷ'), - ('Ꜹ', 'Ꜹ'), - ('Ꜻ', 'Ꜻ'), - ('Ꜽ', 'Ꜽ'), - ('Ꜿ', 'Ꜿ'), - ('Ꝁ', 'Ꝁ'), - ('Ꝃ', 'Ꝃ'), - ('Ꝅ', 'Ꝅ'), - ('Ꝇ', 'Ꝇ'), - ('Ꝉ', 'Ꝉ'), - ('Ꝋ', 'Ꝋ'), - ('Ꝍ', 'Ꝍ'), - ('Ꝏ', 'Ꝏ'), - ('Ꝑ', 'Ꝑ'), - ('Ꝓ', 'Ꝓ'), - ('Ꝕ', 'Ꝕ'), - ('Ꝗ', 'Ꝗ'), - ('Ꝙ', 'Ꝙ'), - ('Ꝛ', 'Ꝛ'), - ('Ꝝ', 'Ꝝ'), - ('Ꝟ', 'Ꝟ'), - ('Ꝡ', 'Ꝡ'), - ('Ꝣ', 'Ꝣ'), - ('Ꝥ', 'Ꝥ'), - ('Ꝧ', 'Ꝧ'), - ('Ꝩ', 'Ꝩ'), - ('Ꝫ', 'Ꝫ'), - ('Ꝭ', 'Ꝭ'), - ('Ꝯ', 'Ꝯ'), - ('Ꝺ', 'Ꝺ'), - ('Ꝼ', 'Ꝼ'), - ('Ᵹ', 'Ꝿ'), - ('Ꞁ', 'Ꞁ'), - ('Ꞃ', 'Ꞃ'), - ('Ꞅ', 'Ꞅ'), - ('Ꞇ', 'Ꞇ'), - ('Ꞌ', 'Ꞌ'), - ('Ɥ', 'Ɥ'), - ('Ꞑ', 'Ꞑ'), - ('Ꞓ', 'Ꞓ'), - ('Ꞗ', 'Ꞗ'), - ('Ꞙ', 'Ꞙ'), - ('Ꞛ', 'Ꞛ'), - ('Ꞝ', 'Ꞝ'), - ('Ꞟ', 'Ꞟ'), - ('Ꞡ', 'Ꞡ'), - ('Ꞣ', 'Ꞣ'), - ('Ꞥ', 'Ꞥ'), - ('Ꞧ', 'Ꞧ'), - ('Ꞩ', 'Ꞩ'), - ('Ɦ', 'Ɪ'), - ('Ʞ', 'Ꞵ'), - ('Ꞷ', 'Ꞷ'), - ('Ꞹ', 'Ꞹ'), - ('Ꞻ', 'Ꞻ'), - ('Ꞽ', 'Ꞽ'), - ('Ꞿ', 'Ꞿ'), - ('Ꟁ', 'Ꟁ'), - ('Ꟃ', 'Ꟃ'), - ('Ꞔ', 'Ꟈ'), - ('Ꟊ', 'Ꟊ'), - ('Ɤ', 'Ꟍ'), - ('Ꟑ', 'Ꟑ'), - ('Ꟗ', 'Ꟗ'), - ('Ꟙ', 'Ꟙ'), - ('Ꟛ', 'Ꟛ'), - ('Ƛ', 'Ƛ'), - ('Ꟶ', 'Ꟶ'), - ('A', 'Z'), - ('𐐀', '𐐧'), - ('𐒰', '𐓓'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐲀', '𐲲'), - ('𐵐', '𐵥'), - ('𑢠', '𑢿'), - ('𖹀', '𖹟'), - ('𞤀', '𞤡'), -]; - -pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ - ('a', 'z'), - ('µ', 'µ'), - ('ß', 'ö'), - ('ø', 'ÿ'), - ('ā', 'ā'), - ('ă', 'ă'), - ('ą', 'ą'), - ('ć', 'ć'), - ('ĉ', 'ĉ'), - ('ċ', 'ċ'), - ('č', 'č'), - ('ď', 'ď'), - ('đ', 'đ'), - ('ē', 'ē'), - ('ĕ', 'ĕ'), - ('ė', 'ė'), - ('ę', 'ę'), - ('ě', 'ě'), - ('ĝ', 'ĝ'), - ('ğ', 'ğ'), - ('ġ', 'ġ'), - ('ģ', 'ģ'), - ('ĥ', 'ĥ'), - ('ħ', 'ħ'), - ('ĩ', 'ĩ'), - ('ī', 'ī'), - ('ĭ', 'ĭ'), - ('į', 'į'), - ('ı', 'ı'), - ('ij', 'ij'), - ('ĵ', 'ĵ'), - ('ķ', 'ķ'), - ('ĺ', 'ĺ'), - ('ļ', 'ļ'), - ('ľ', 'ľ'), - ('ŀ', 'ŀ'), - ('ł', 'ł'), - ('ń', 'ń'), - ('ņ', 'ņ'), - ('ň', 'ʼn'), - ('ŋ', 'ŋ'), - ('ō', 'ō'), - ('ŏ', 'ŏ'), - ('ő', 'ő'), - ('œ', 'œ'), - ('ŕ', 'ŕ'), - ('ŗ', 'ŗ'), - ('ř', 'ř'), - ('ś', 'ś'), - ('ŝ', 'ŝ'), - ('ş', 'ş'), - ('š', 'š'), - ('ţ', 'ţ'), - ('ť', 'ť'), - ('ŧ', 'ŧ'), - ('ũ', 'ũ'), - ('ū', 'ū'), - ('ŭ', 'ŭ'), - ('ů', 'ů'), - ('ű', 'ű'), - ('ų', 'ų'), - ('ŵ', 'ŵ'), - ('ŷ', 'ŷ'), - ('ź', 'ź'), - ('ż', 'ż'), - ('ž', 'ƀ'), - ('ƃ', 'ƃ'), - ('ƅ', 'ƅ'), - ('ƈ', 'ƈ'), - ('ƌ', 'ƌ'), - ('ƒ', 'ƒ'), - ('ƕ', 'ƕ'), - ('ƙ', 'ƛ'), - ('ƞ', 'ƞ'), - ('ơ', 'ơ'), - ('ƣ', 'ƣ'), - ('ƥ', 'ƥ'), - ('ƨ', 'ƨ'), - ('ƭ', 'ƭ'), - ('ư', 'ư'), - ('ƴ', 'ƴ'), - ('ƶ', 'ƶ'), - ('ƹ', 'ƹ'), - ('ƽ', 'ƽ'), - ('ƿ', 'ƿ'), - ('DŽ', 'DŽ'), - ('dž', 'LJ'), - ('lj', 'NJ'), - ('nj', 'nj'), - ('ǎ', 'ǎ'), - ('ǐ', 'ǐ'), - ('ǒ', 'ǒ'), - ('ǔ', 'ǔ'), - ('ǖ', 'ǖ'), - ('ǘ', 'ǘ'), - ('ǚ', 'ǚ'), - ('ǜ', 'ǝ'), - ('ǟ', 'ǟ'), - ('ǡ', 'ǡ'), - ('ǣ', 'ǣ'), - ('ǥ', 'ǥ'), - ('ǧ', 'ǧ'), - ('ǩ', 'ǩ'), - ('ǫ', 'ǫ'), - ('ǭ', 'ǭ'), - ('ǯ', 'DZ'), - ('dz', 'dz'), - ('ǵ', 'ǵ'), - ('ǹ', 'ǹ'), - ('ǻ', 'ǻ'), - ('ǽ', 'ǽ'), - ('ǿ', 'ǿ'), - ('ȁ', 'ȁ'), - ('ȃ', 'ȃ'), - ('ȅ', 'ȅ'), - ('ȇ', 'ȇ'), - ('ȉ', 'ȉ'), - ('ȋ', 'ȋ'), - ('ȍ', 'ȍ'), - ('ȏ', 'ȏ'), - ('ȑ', 'ȑ'), - ('ȓ', 'ȓ'), - ('ȕ', 'ȕ'), - ('ȗ', 'ȗ'), - ('ș', 'ș'), - ('ț', 'ț'), - ('ȝ', 'ȝ'), - ('ȟ', 'ȟ'), - ('ȣ', 'ȣ'), - ('ȥ', 'ȥ'), - ('ȧ', 'ȧ'), - ('ȩ', 'ȩ'), - ('ȫ', 'ȫ'), - ('ȭ', 'ȭ'), - ('ȯ', 'ȯ'), - ('ȱ', 'ȱ'), - ('ȳ', 'ȳ'), - ('ȼ', 'ȼ'), - ('ȿ', 'ɀ'), - ('ɂ', 'ɂ'), - ('ɇ', 'ɇ'), - ('ɉ', 'ɉ'), - ('ɋ', 'ɋ'), - ('ɍ', 'ɍ'), - ('ɏ', 'ɔ'), - ('ɖ', 'ɗ'), - ('ə', 'ə'), - ('ɛ', 'ɜ'), - ('ɠ', 'ɡ'), - ('ɣ', 'ɦ'), - ('ɨ', 'ɬ'), - ('ɯ', 'ɯ'), - ('ɱ', 'ɲ'), - ('ɵ', 'ɵ'), - ('ɽ', 'ɽ'), - ('ʀ', 'ʀ'), - ('ʂ', 'ʃ'), - ('ʇ', 'ʌ'), - ('ʒ', 'ʒ'), - ('ʝ', 'ʞ'), - ('\u{345}', '\u{345}'), - ('ͱ', 'ͱ'), - ('ͳ', 'ͳ'), - ('ͷ', 'ͷ'), - ('ͻ', 'ͽ'), - ('ΐ', 'ΐ'), - ('ά', 'ώ'), - ('ϐ', 'ϑ'), - ('ϕ', 'ϗ'), - ('ϙ', 'ϙ'), - ('ϛ', 'ϛ'), - ('ϝ', 'ϝ'), - ('ϟ', 'ϟ'), - ('ϡ', 'ϡ'), - ('ϣ', 'ϣ'), - ('ϥ', 'ϥ'), - ('ϧ', 'ϧ'), - ('ϩ', 'ϩ'), - ('ϫ', 'ϫ'), - ('ϭ', 'ϭ'), - ('ϯ', 'ϳ'), - ('ϵ', 'ϵ'), - ('ϸ', 'ϸ'), - ('ϻ', 'ϻ'), - ('а', 'џ'), - ('ѡ', 'ѡ'), - ('ѣ', 'ѣ'), - ('ѥ', 'ѥ'), - ('ѧ', 'ѧ'), - ('ѩ', 'ѩ'), - ('ѫ', 'ѫ'), - ('ѭ', 'ѭ'), - ('ѯ', 'ѯ'), - ('ѱ', 'ѱ'), - ('ѳ', 'ѳ'), - ('ѵ', 'ѵ'), - ('ѷ', 'ѷ'), - ('ѹ', 'ѹ'), - ('ѻ', 'ѻ'), - ('ѽ', 'ѽ'), - ('ѿ', 'ѿ'), - ('ҁ', 'ҁ'), - ('ҋ', 'ҋ'), - ('ҍ', 'ҍ'), - ('ҏ', 'ҏ'), - ('ґ', 'ґ'), - ('ғ', 'ғ'), - ('ҕ', 'ҕ'), - ('җ', 'җ'), - ('ҙ', 'ҙ'), - ('қ', 'қ'), - ('ҝ', 'ҝ'), - ('ҟ', 'ҟ'), - ('ҡ', 'ҡ'), - ('ң', 'ң'), - ('ҥ', 'ҥ'), - ('ҧ', 'ҧ'), - ('ҩ', 'ҩ'), - ('ҫ', 'ҫ'), - ('ҭ', 'ҭ'), - ('ү', 'ү'), - ('ұ', 'ұ'), - ('ҳ', 'ҳ'), - ('ҵ', 'ҵ'), - ('ҷ', 'ҷ'), - ('ҹ', 'ҹ'), - ('һ', 'һ'), - ('ҽ', 'ҽ'), - ('ҿ', 'ҿ'), - ('ӂ', 'ӂ'), - ('ӄ', 'ӄ'), - ('ӆ', 'ӆ'), - ('ӈ', 'ӈ'), - ('ӊ', 'ӊ'), - ('ӌ', 'ӌ'), - ('ӎ', 'ӏ'), - ('ӑ', 'ӑ'), - ('ӓ', 'ӓ'), - ('ӕ', 'ӕ'), - ('ӗ', 'ӗ'), - ('ә', 'ә'), - ('ӛ', 'ӛ'), - ('ӝ', 'ӝ'), - ('ӟ', 'ӟ'), - ('ӡ', 'ӡ'), - ('ӣ', 'ӣ'), - ('ӥ', 'ӥ'), - ('ӧ', 'ӧ'), - ('ө', 'ө'), - ('ӫ', 'ӫ'), - ('ӭ', 'ӭ'), - ('ӯ', 'ӯ'), - ('ӱ', 'ӱ'), - ('ӳ', 'ӳ'), - ('ӵ', 'ӵ'), - ('ӷ', 'ӷ'), - ('ӹ', 'ӹ'), - ('ӻ', 'ӻ'), - ('ӽ', 'ӽ'), - ('ӿ', 'ӿ'), - ('ԁ', 'ԁ'), - ('ԃ', 'ԃ'), - ('ԅ', 'ԅ'), - ('ԇ', 'ԇ'), - ('ԉ', 'ԉ'), - ('ԋ', 'ԋ'), - ('ԍ', 'ԍ'), - ('ԏ', 'ԏ'), - ('ԑ', 'ԑ'), - ('ԓ', 'ԓ'), - ('ԕ', 'ԕ'), - ('ԗ', 'ԗ'), - ('ԙ', 'ԙ'), - ('ԛ', 'ԛ'), - ('ԝ', 'ԝ'), - ('ԟ', 'ԟ'), - ('ԡ', 'ԡ'), - ('ԣ', 'ԣ'), - ('ԥ', 'ԥ'), - ('ԧ', 'ԧ'), - ('ԩ', 'ԩ'), - ('ԫ', 'ԫ'), - ('ԭ', 'ԭ'), - ('ԯ', 'ԯ'), - ('ա', 'և'), - ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲈ'), - ('ᲊ', 'ᲊ'), - ('ᵹ', 'ᵹ'), - ('ᵽ', 'ᵽ'), - ('ᶎ', 'ᶎ'), - ('ḁ', 'ḁ'), - ('ḃ', 'ḃ'), - ('ḅ', 'ḅ'), - ('ḇ', 'ḇ'), - ('ḉ', 'ḉ'), - ('ḋ', 'ḋ'), - ('ḍ', 'ḍ'), - ('ḏ', 'ḏ'), - ('ḑ', 'ḑ'), - ('ḓ', 'ḓ'), - ('ḕ', 'ḕ'), - ('ḗ', 'ḗ'), - ('ḙ', 'ḙ'), - ('ḛ', 'ḛ'), - ('ḝ', 'ḝ'), - ('ḟ', 'ḟ'), - ('ḡ', 'ḡ'), - ('ḣ', 'ḣ'), - ('ḥ', 'ḥ'), - ('ḧ', 'ḧ'), - ('ḩ', 'ḩ'), - ('ḫ', 'ḫ'), - ('ḭ', 'ḭ'), - ('ḯ', 'ḯ'), - ('ḱ', 'ḱ'), - ('ḳ', 'ḳ'), - ('ḵ', 'ḵ'), - ('ḷ', 'ḷ'), - ('ḹ', 'ḹ'), - ('ḻ', 'ḻ'), - ('ḽ', 'ḽ'), - ('ḿ', 'ḿ'), - ('ṁ', 'ṁ'), - ('ṃ', 'ṃ'), - ('ṅ', 'ṅ'), - ('ṇ', 'ṇ'), - ('ṉ', 'ṉ'), - ('ṋ', 'ṋ'), - ('ṍ', 'ṍ'), - ('ṏ', 'ṏ'), - ('ṑ', 'ṑ'), - ('ṓ', 'ṓ'), - ('ṕ', 'ṕ'), - ('ṗ', 'ṗ'), - ('ṙ', 'ṙ'), - ('ṛ', 'ṛ'), - ('ṝ', 'ṝ'), - ('ṟ', 'ṟ'), - ('ṡ', 'ṡ'), - ('ṣ', 'ṣ'), - ('ṥ', 'ṥ'), - ('ṧ', 'ṧ'), - ('ṩ', 'ṩ'), - ('ṫ', 'ṫ'), - ('ṭ', 'ṭ'), - ('ṯ', 'ṯ'), - ('ṱ', 'ṱ'), - ('ṳ', 'ṳ'), - ('ṵ', 'ṵ'), - ('ṷ', 'ṷ'), - ('ṹ', 'ṹ'), - ('ṻ', 'ṻ'), - ('ṽ', 'ṽ'), - ('ṿ', 'ṿ'), - ('ẁ', 'ẁ'), - ('ẃ', 'ẃ'), - ('ẅ', 'ẅ'), - ('ẇ', 'ẇ'), - ('ẉ', 'ẉ'), - ('ẋ', 'ẋ'), - ('ẍ', 'ẍ'), - ('ẏ', 'ẏ'), - ('ẑ', 'ẑ'), - ('ẓ', 'ẓ'), - ('ẕ', 'ẛ'), - ('ạ', 'ạ'), - ('ả', 'ả'), - ('ấ', 'ấ'), - ('ầ', 'ầ'), - ('ẩ', 'ẩ'), - ('ẫ', 'ẫ'), - ('ậ', 'ậ'), - ('ắ', 'ắ'), - ('ằ', 'ằ'), - ('ẳ', 'ẳ'), - ('ẵ', 'ẵ'), - ('ặ', 'ặ'), - ('ẹ', 'ẹ'), - ('ẻ', 'ẻ'), - ('ẽ', 'ẽ'), - ('ế', 'ế'), - ('ề', 'ề'), - ('ể', 'ể'), - ('ễ', 'ễ'), - ('ệ', 'ệ'), - ('ỉ', 'ỉ'), - ('ị', 'ị'), - ('ọ', 'ọ'), - ('ỏ', 'ỏ'), - ('ố', 'ố'), - ('ồ', 'ồ'), - ('ổ', 'ổ'), - ('ỗ', 'ỗ'), - ('ộ', 'ộ'), - ('ớ', 'ớ'), - ('ờ', 'ờ'), - ('ở', 'ở'), - ('ỡ', 'ỡ'), - ('ợ', 'ợ'), - ('ụ', 'ụ'), - ('ủ', 'ủ'), - ('ứ', 'ứ'), - ('ừ', 'ừ'), - ('ử', 'ử'), - ('ữ', 'ữ'), - ('ự', 'ự'), - ('ỳ', 'ỳ'), - ('ỵ', 'ỵ'), - ('ỷ', 'ỷ'), - ('ỹ', 'ỹ'), - ('ỻ', 'ỻ'), - ('ỽ', 'ỽ'), - ('ỿ', 'ἇ'), - ('ἐ', 'ἕ'), - ('ἠ', 'ἧ'), - ('ἰ', 'ἷ'), - ('ὀ', 'ὅ'), - ('ὐ', 'ὗ'), - ('ὠ', 'ὧ'), - ('ὰ', 'ώ'), - ('ᾀ', 'ᾇ'), - ('ᾐ', 'ᾗ'), - ('ᾠ', 'ᾧ'), - ('ᾰ', 'ᾴ'), - ('ᾶ', 'ᾷ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῇ'), - ('ῐ', 'ΐ'), - ('ῖ', 'ῗ'), - ('ῠ', 'ῧ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῷ'), - ('ⅎ', 'ⅎ'), - ('ⅰ', 'ⅿ'), - ('ↄ', 'ↄ'), - ('ⓐ', 'ⓩ'), - ('ⰰ', 'ⱟ'), - ('ⱡ', 'ⱡ'), - ('ⱥ', 'ⱦ'), - ('ⱨ', 'ⱨ'), - ('ⱪ', 'ⱪ'), - ('ⱬ', 'ⱬ'), - ('ⱳ', 'ⱳ'), - ('ⱶ', 'ⱶ'), - ('ⲁ', 'ⲁ'), - ('ⲃ', 'ⲃ'), - ('ⲅ', 'ⲅ'), - ('ⲇ', 'ⲇ'), - ('ⲉ', 'ⲉ'), - ('ⲋ', 'ⲋ'), - ('ⲍ', 'ⲍ'), - ('ⲏ', 'ⲏ'), - ('ⲑ', 'ⲑ'), - ('ⲓ', 'ⲓ'), - ('ⲕ', 'ⲕ'), - ('ⲗ', 'ⲗ'), - ('ⲙ', 'ⲙ'), - ('ⲛ', 'ⲛ'), - ('ⲝ', 'ⲝ'), - ('ⲟ', 'ⲟ'), - ('ⲡ', 'ⲡ'), - ('ⲣ', 'ⲣ'), - ('ⲥ', 'ⲥ'), - ('ⲧ', 'ⲧ'), - ('ⲩ', 'ⲩ'), - ('ⲫ', 'ⲫ'), - ('ⲭ', 'ⲭ'), - ('ⲯ', 'ⲯ'), - ('ⲱ', 'ⲱ'), - ('ⲳ', 'ⲳ'), - ('ⲵ', 'ⲵ'), - ('ⲷ', 'ⲷ'), - ('ⲹ', 'ⲹ'), - ('ⲻ', 'ⲻ'), - ('ⲽ', 'ⲽ'), - ('ⲿ', 'ⲿ'), - ('ⳁ', 'ⳁ'), - ('ⳃ', 'ⳃ'), - ('ⳅ', 'ⳅ'), - ('ⳇ', 'ⳇ'), - ('ⳉ', 'ⳉ'), - ('ⳋ', 'ⳋ'), - ('ⳍ', 'ⳍ'), - ('ⳏ', 'ⳏ'), - ('ⳑ', 'ⳑ'), - ('ⳓ', 'ⳓ'), - ('ⳕ', 'ⳕ'), - ('ⳗ', 'ⳗ'), - ('ⳙ', 'ⳙ'), - ('ⳛ', 'ⳛ'), - ('ⳝ', 'ⳝ'), - ('ⳟ', 'ⳟ'), - ('ⳡ', 'ⳡ'), - ('ⳣ', 'ⳣ'), - ('ⳬ', 'ⳬ'), - ('ⳮ', 'ⳮ'), - ('ⳳ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ꙁ', 'ꙁ'), - ('ꙃ', 'ꙃ'), - ('ꙅ', 'ꙅ'), - ('ꙇ', 'ꙇ'), - ('ꙉ', 'ꙉ'), - ('ꙋ', 'ꙋ'), - ('ꙍ', 'ꙍ'), - ('ꙏ', 'ꙏ'), - ('ꙑ', 'ꙑ'), - ('ꙓ', 'ꙓ'), - ('ꙕ', 'ꙕ'), - ('ꙗ', 'ꙗ'), - ('ꙙ', 'ꙙ'), - ('ꙛ', 'ꙛ'), - ('ꙝ', 'ꙝ'), - ('ꙟ', 'ꙟ'), - ('ꙡ', 'ꙡ'), - ('ꙣ', 'ꙣ'), - ('ꙥ', 'ꙥ'), - ('ꙧ', 'ꙧ'), - ('ꙩ', 'ꙩ'), - ('ꙫ', 'ꙫ'), - ('ꙭ', 'ꙭ'), - ('ꚁ', 'ꚁ'), - ('ꚃ', 'ꚃ'), - ('ꚅ', 'ꚅ'), - ('ꚇ', 'ꚇ'), - ('ꚉ', 'ꚉ'), - ('ꚋ', 'ꚋ'), - ('ꚍ', 'ꚍ'), - ('ꚏ', 'ꚏ'), - ('ꚑ', 'ꚑ'), - ('ꚓ', 'ꚓ'), - ('ꚕ', 'ꚕ'), - ('ꚗ', 'ꚗ'), - ('ꚙ', 'ꚙ'), - ('ꚛ', 'ꚛ'), - ('ꜣ', 'ꜣ'), - ('ꜥ', 'ꜥ'), - ('ꜧ', 'ꜧ'), - ('ꜩ', 'ꜩ'), - ('ꜫ', 'ꜫ'), - ('ꜭ', 'ꜭ'), - ('ꜯ', 'ꜯ'), - ('ꜳ', 'ꜳ'), - ('ꜵ', 'ꜵ'), - ('ꜷ', 'ꜷ'), - ('ꜹ', 'ꜹ'), - ('ꜻ', 'ꜻ'), - ('ꜽ', 'ꜽ'), - ('ꜿ', 'ꜿ'), - ('ꝁ', 'ꝁ'), - ('ꝃ', 'ꝃ'), - ('ꝅ', 'ꝅ'), - ('ꝇ', 'ꝇ'), - ('ꝉ', 'ꝉ'), - ('ꝋ', 'ꝋ'), - ('ꝍ', 'ꝍ'), - ('ꝏ', 'ꝏ'), - ('ꝑ', 'ꝑ'), - ('ꝓ', 'ꝓ'), - ('ꝕ', 'ꝕ'), - ('ꝗ', 'ꝗ'), - ('ꝙ', 'ꝙ'), - ('ꝛ', 'ꝛ'), - ('ꝝ', 'ꝝ'), - ('ꝟ', 'ꝟ'), - ('ꝡ', 'ꝡ'), - ('ꝣ', 'ꝣ'), - ('ꝥ', 'ꝥ'), - ('ꝧ', 'ꝧ'), - ('ꝩ', 'ꝩ'), - ('ꝫ', 'ꝫ'), - ('ꝭ', 'ꝭ'), - ('ꝯ', 'ꝯ'), - ('ꝺ', 'ꝺ'), - ('ꝼ', 'ꝼ'), - ('ꝿ', 'ꝿ'), - ('ꞁ', 'ꞁ'), - ('ꞃ', 'ꞃ'), - ('ꞅ', 'ꞅ'), - ('ꞇ', 'ꞇ'), - ('ꞌ', 'ꞌ'), - ('ꞑ', 'ꞑ'), - ('ꞓ', 'ꞔ'), - ('ꞗ', 'ꞗ'), - ('ꞙ', 'ꞙ'), - ('ꞛ', 'ꞛ'), - ('ꞝ', 'ꞝ'), - ('ꞟ', 'ꞟ'), - ('ꞡ', 'ꞡ'), - ('ꞣ', 'ꞣ'), - ('ꞥ', 'ꞥ'), - ('ꞧ', 'ꞧ'), - ('ꞩ', 'ꞩ'), - ('ꞵ', 'ꞵ'), - ('ꞷ', 'ꞷ'), - ('ꞹ', 'ꞹ'), - ('ꞻ', 'ꞻ'), - ('ꞽ', 'ꞽ'), - ('ꞿ', 'ꞿ'), - ('ꟁ', 'ꟁ'), - ('ꟃ', 'ꟃ'), - ('ꟈ', 'ꟈ'), - ('ꟊ', 'ꟊ'), - ('ꟍ', 'ꟍ'), - ('ꟑ', 'ꟑ'), - ('ꟗ', 'ꟗ'), - ('ꟙ', 'ꟙ'), - ('ꟛ', 'ꟛ'), - ('ꟶ', 'ꟶ'), - ('ꭓ', 'ꭓ'), - ('ꭰ', 'ꮿ'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('a', 'z'), - ('𐐨', '𐑏'), - ('𐓘', '𐓻'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐳀', '𐳲'), - ('𐵰', '𐶅'), - ('𑣀', '𑣟'), - ('𖹠', '𖹿'), - ('𞤢', '𞥃'), -]; - -pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ - ('a', 'z'), - ('µ', 'µ'), - ('ß', 'ö'), - ('ø', 'ÿ'), - ('ā', 'ā'), - ('ă', 'ă'), - ('ą', 'ą'), - ('ć', 'ć'), - ('ĉ', 'ĉ'), - ('ċ', 'ċ'), - ('č', 'č'), - ('ď', 'ď'), - ('đ', 'đ'), - ('ē', 'ē'), - ('ĕ', 'ĕ'), - ('ė', 'ė'), - ('ę', 'ę'), - ('ě', 'ě'), - ('ĝ', 'ĝ'), - ('ğ', 'ğ'), - ('ġ', 'ġ'), - ('ģ', 'ģ'), - ('ĥ', 'ĥ'), - ('ħ', 'ħ'), - ('ĩ', 'ĩ'), - ('ī', 'ī'), - ('ĭ', 'ĭ'), - ('į', 'į'), - ('ı', 'ı'), - ('ij', 'ij'), - ('ĵ', 'ĵ'), - ('ķ', 'ķ'), - ('ĺ', 'ĺ'), - ('ļ', 'ļ'), - ('ľ', 'ľ'), - ('ŀ', 'ŀ'), - ('ł', 'ł'), - ('ń', 'ń'), - ('ņ', 'ņ'), - ('ň', 'ʼn'), - ('ŋ', 'ŋ'), - ('ō', 'ō'), - ('ŏ', 'ŏ'), - ('ő', 'ő'), - ('œ', 'œ'), - ('ŕ', 'ŕ'), - ('ŗ', 'ŗ'), - ('ř', 'ř'), - ('ś', 'ś'), - ('ŝ', 'ŝ'), - ('ş', 'ş'), - ('š', 'š'), - ('ţ', 'ţ'), - ('ť', 'ť'), - ('ŧ', 'ŧ'), - ('ũ', 'ũ'), - ('ū', 'ū'), - ('ŭ', 'ŭ'), - ('ů', 'ů'), - ('ű', 'ű'), - ('ų', 'ų'), - ('ŵ', 'ŵ'), - ('ŷ', 'ŷ'), - ('ź', 'ź'), - ('ż', 'ż'), - ('ž', 'ƀ'), - ('ƃ', 'ƃ'), - ('ƅ', 'ƅ'), - ('ƈ', 'ƈ'), - ('ƌ', 'ƌ'), - ('ƒ', 'ƒ'), - ('ƕ', 'ƕ'), - ('ƙ', 'ƛ'), - ('ƞ', 'ƞ'), - ('ơ', 'ơ'), - ('ƣ', 'ƣ'), - ('ƥ', 'ƥ'), - ('ƨ', 'ƨ'), - ('ƭ', 'ƭ'), - ('ư', 'ư'), - ('ƴ', 'ƴ'), - ('ƶ', 'ƶ'), - ('ƹ', 'ƹ'), - ('ƽ', 'ƽ'), - ('ƿ', 'ƿ'), - ('Dž', 'dž'), - ('Lj', 'lj'), - ('Nj', 'nj'), - ('ǎ', 'ǎ'), - ('ǐ', 'ǐ'), - ('ǒ', 'ǒ'), - ('ǔ', 'ǔ'), - ('ǖ', 'ǖ'), - ('ǘ', 'ǘ'), - ('ǚ', 'ǚ'), - ('ǜ', 'ǝ'), - ('ǟ', 'ǟ'), - ('ǡ', 'ǡ'), - ('ǣ', 'ǣ'), - ('ǥ', 'ǥ'), - ('ǧ', 'ǧ'), - ('ǩ', 'ǩ'), - ('ǫ', 'ǫ'), - ('ǭ', 'ǭ'), - ('ǯ', 'ǰ'), - ('Dz', 'dz'), - ('ǵ', 'ǵ'), - ('ǹ', 'ǹ'), - ('ǻ', 'ǻ'), - ('ǽ', 'ǽ'), - ('ǿ', 'ǿ'), - ('ȁ', 'ȁ'), - ('ȃ', 'ȃ'), - ('ȅ', 'ȅ'), - ('ȇ', 'ȇ'), - ('ȉ', 'ȉ'), - ('ȋ', 'ȋ'), - ('ȍ', 'ȍ'), - ('ȏ', 'ȏ'), - ('ȑ', 'ȑ'), - ('ȓ', 'ȓ'), - ('ȕ', 'ȕ'), - ('ȗ', 'ȗ'), - ('ș', 'ș'), - ('ț', 'ț'), - ('ȝ', 'ȝ'), - ('ȟ', 'ȟ'), - ('ȣ', 'ȣ'), - ('ȥ', 'ȥ'), - ('ȧ', 'ȧ'), - ('ȩ', 'ȩ'), - ('ȫ', 'ȫ'), - ('ȭ', 'ȭ'), - ('ȯ', 'ȯ'), - ('ȱ', 'ȱ'), - ('ȳ', 'ȳ'), - ('ȼ', 'ȼ'), - ('ȿ', 'ɀ'), - ('ɂ', 'ɂ'), - ('ɇ', 'ɇ'), - ('ɉ', 'ɉ'), - ('ɋ', 'ɋ'), - ('ɍ', 'ɍ'), - ('ɏ', 'ɔ'), - ('ɖ', 'ɗ'), - ('ə', 'ə'), - ('ɛ', 'ɜ'), - ('ɠ', 'ɡ'), - ('ɣ', 'ɦ'), - ('ɨ', 'ɬ'), - ('ɯ', 'ɯ'), - ('ɱ', 'ɲ'), - ('ɵ', 'ɵ'), - ('ɽ', 'ɽ'), - ('ʀ', 'ʀ'), - ('ʂ', 'ʃ'), - ('ʇ', 'ʌ'), - ('ʒ', 'ʒ'), - ('ʝ', 'ʞ'), - ('\u{345}', '\u{345}'), - ('ͱ', 'ͱ'), - ('ͳ', 'ͳ'), - ('ͷ', 'ͷ'), - ('ͻ', 'ͽ'), - ('ΐ', 'ΐ'), - ('ά', 'ώ'), - ('ϐ', 'ϑ'), - ('ϕ', 'ϗ'), - ('ϙ', 'ϙ'), - ('ϛ', 'ϛ'), - ('ϝ', 'ϝ'), - ('ϟ', 'ϟ'), - ('ϡ', 'ϡ'), - ('ϣ', 'ϣ'), - ('ϥ', 'ϥ'), - ('ϧ', 'ϧ'), - ('ϩ', 'ϩ'), - ('ϫ', 'ϫ'), - ('ϭ', 'ϭ'), - ('ϯ', 'ϳ'), - ('ϵ', 'ϵ'), - ('ϸ', 'ϸ'), - ('ϻ', 'ϻ'), - ('а', 'џ'), - ('ѡ', 'ѡ'), - ('ѣ', 'ѣ'), - ('ѥ', 'ѥ'), - ('ѧ', 'ѧ'), - ('ѩ', 'ѩ'), - ('ѫ', 'ѫ'), - ('ѭ', 'ѭ'), - ('ѯ', 'ѯ'), - ('ѱ', 'ѱ'), - ('ѳ', 'ѳ'), - ('ѵ', 'ѵ'), - ('ѷ', 'ѷ'), - ('ѹ', 'ѹ'), - ('ѻ', 'ѻ'), - ('ѽ', 'ѽ'), - ('ѿ', 'ѿ'), - ('ҁ', 'ҁ'), - ('ҋ', 'ҋ'), - ('ҍ', 'ҍ'), - ('ҏ', 'ҏ'), - ('ґ', 'ґ'), - ('ғ', 'ғ'), - ('ҕ', 'ҕ'), - ('җ', 'җ'), - ('ҙ', 'ҙ'), - ('қ', 'қ'), - ('ҝ', 'ҝ'), - ('ҟ', 'ҟ'), - ('ҡ', 'ҡ'), - ('ң', 'ң'), - ('ҥ', 'ҥ'), - ('ҧ', 'ҧ'), - ('ҩ', 'ҩ'), - ('ҫ', 'ҫ'), - ('ҭ', 'ҭ'), - ('ү', 'ү'), - ('ұ', 'ұ'), - ('ҳ', 'ҳ'), - ('ҵ', 'ҵ'), - ('ҷ', 'ҷ'), - ('ҹ', 'ҹ'), - ('һ', 'һ'), - ('ҽ', 'ҽ'), - ('ҿ', 'ҿ'), - ('ӂ', 'ӂ'), - ('ӄ', 'ӄ'), - ('ӆ', 'ӆ'), - ('ӈ', 'ӈ'), - ('ӊ', 'ӊ'), - ('ӌ', 'ӌ'), - ('ӎ', 'ӏ'), - ('ӑ', 'ӑ'), - ('ӓ', 'ӓ'), - ('ӕ', 'ӕ'), - ('ӗ', 'ӗ'), - ('ә', 'ә'), - ('ӛ', 'ӛ'), - ('ӝ', 'ӝ'), - ('ӟ', 'ӟ'), - ('ӡ', 'ӡ'), - ('ӣ', 'ӣ'), - ('ӥ', 'ӥ'), - ('ӧ', 'ӧ'), - ('ө', 'ө'), - ('ӫ', 'ӫ'), - ('ӭ', 'ӭ'), - ('ӯ', 'ӯ'), - ('ӱ', 'ӱ'), - ('ӳ', 'ӳ'), - ('ӵ', 'ӵ'), - ('ӷ', 'ӷ'), - ('ӹ', 'ӹ'), - ('ӻ', 'ӻ'), - ('ӽ', 'ӽ'), - ('ӿ', 'ӿ'), - ('ԁ', 'ԁ'), - ('ԃ', 'ԃ'), - ('ԅ', 'ԅ'), - ('ԇ', 'ԇ'), - ('ԉ', 'ԉ'), - ('ԋ', 'ԋ'), - ('ԍ', 'ԍ'), - ('ԏ', 'ԏ'), - ('ԑ', 'ԑ'), - ('ԓ', 'ԓ'), - ('ԕ', 'ԕ'), - ('ԗ', 'ԗ'), - ('ԙ', 'ԙ'), - ('ԛ', 'ԛ'), - ('ԝ', 'ԝ'), - ('ԟ', 'ԟ'), - ('ԡ', 'ԡ'), - ('ԣ', 'ԣ'), - ('ԥ', 'ԥ'), - ('ԧ', 'ԧ'), - ('ԩ', 'ԩ'), - ('ԫ', 'ԫ'), - ('ԭ', 'ԭ'), - ('ԯ', 'ԯ'), - ('ա', 'և'), - ('ა', 'ჺ'), - ('ჽ', 'ჿ'), - ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲈ'), - ('ᲊ', 'ᲊ'), - ('ᵹ', 'ᵹ'), - ('ᵽ', 'ᵽ'), - ('ᶎ', 'ᶎ'), - ('ḁ', 'ḁ'), - ('ḃ', 'ḃ'), - ('ḅ', 'ḅ'), - ('ḇ', 'ḇ'), - ('ḉ', 'ḉ'), - ('ḋ', 'ḋ'), - ('ḍ', 'ḍ'), - ('ḏ', 'ḏ'), - ('ḑ', 'ḑ'), - ('ḓ', 'ḓ'), - ('ḕ', 'ḕ'), - ('ḗ', 'ḗ'), - ('ḙ', 'ḙ'), - ('ḛ', 'ḛ'), - ('ḝ', 'ḝ'), - ('ḟ', 'ḟ'), - ('ḡ', 'ḡ'), - ('ḣ', 'ḣ'), - ('ḥ', 'ḥ'), - ('ḧ', 'ḧ'), - ('ḩ', 'ḩ'), - ('ḫ', 'ḫ'), - ('ḭ', 'ḭ'), - ('ḯ', 'ḯ'), - ('ḱ', 'ḱ'), - ('ḳ', 'ḳ'), - ('ḵ', 'ḵ'), - ('ḷ', 'ḷ'), - ('ḹ', 'ḹ'), - ('ḻ', 'ḻ'), - ('ḽ', 'ḽ'), - ('ḿ', 'ḿ'), - ('ṁ', 'ṁ'), - ('ṃ', 'ṃ'), - ('ṅ', 'ṅ'), - ('ṇ', 'ṇ'), - ('ṉ', 'ṉ'), - ('ṋ', 'ṋ'), - ('ṍ', 'ṍ'), - ('ṏ', 'ṏ'), - ('ṑ', 'ṑ'), - ('ṓ', 'ṓ'), - ('ṕ', 'ṕ'), - ('ṗ', 'ṗ'), - ('ṙ', 'ṙ'), - ('ṛ', 'ṛ'), - ('ṝ', 'ṝ'), - ('ṟ', 'ṟ'), - ('ṡ', 'ṡ'), - ('ṣ', 'ṣ'), - ('ṥ', 'ṥ'), - ('ṧ', 'ṧ'), - ('ṩ', 'ṩ'), - ('ṫ', 'ṫ'), - ('ṭ', 'ṭ'), - ('ṯ', 'ṯ'), - ('ṱ', 'ṱ'), - ('ṳ', 'ṳ'), - ('ṵ', 'ṵ'), - ('ṷ', 'ṷ'), - ('ṹ', 'ṹ'), - ('ṻ', 'ṻ'), - ('ṽ', 'ṽ'), - ('ṿ', 'ṿ'), - ('ẁ', 'ẁ'), - ('ẃ', 'ẃ'), - ('ẅ', 'ẅ'), - ('ẇ', 'ẇ'), - ('ẉ', 'ẉ'), - ('ẋ', 'ẋ'), - ('ẍ', 'ẍ'), - ('ẏ', 'ẏ'), - ('ẑ', 'ẑ'), - ('ẓ', 'ẓ'), - ('ẕ', 'ẛ'), - ('ạ', 'ạ'), - ('ả', 'ả'), - ('ấ', 'ấ'), - ('ầ', 'ầ'), - ('ẩ', 'ẩ'), - ('ẫ', 'ẫ'), - ('ậ', 'ậ'), - ('ắ', 'ắ'), - ('ằ', 'ằ'), - ('ẳ', 'ẳ'), - ('ẵ', 'ẵ'), - ('ặ', 'ặ'), - ('ẹ', 'ẹ'), - ('ẻ', 'ẻ'), - ('ẽ', 'ẽ'), - ('ế', 'ế'), - ('ề', 'ề'), - ('ể', 'ể'), - ('ễ', 'ễ'), - ('ệ', 'ệ'), - ('ỉ', 'ỉ'), - ('ị', 'ị'), - ('ọ', 'ọ'), - ('ỏ', 'ỏ'), - ('ố', 'ố'), - ('ồ', 'ồ'), - ('ổ', 'ổ'), - ('ỗ', 'ỗ'), - ('ộ', 'ộ'), - ('ớ', 'ớ'), - ('ờ', 'ờ'), - ('ở', 'ở'), - ('ỡ', 'ỡ'), - ('ợ', 'ợ'), - ('ụ', 'ụ'), - ('ủ', 'ủ'), - ('ứ', 'ứ'), - ('ừ', 'ừ'), - ('ử', 'ử'), - ('ữ', 'ữ'), - ('ự', 'ự'), - ('ỳ', 'ỳ'), - ('ỵ', 'ỵ'), - ('ỷ', 'ỷ'), - ('ỹ', 'ỹ'), - ('ỻ', 'ỻ'), - ('ỽ', 'ỽ'), - ('ỿ', 'ἇ'), - ('ἐ', 'ἕ'), - ('ἠ', 'ἧ'), - ('ἰ', 'ἷ'), - ('ὀ', 'ὅ'), - ('ὐ', 'ὗ'), - ('ὠ', 'ὧ'), - ('ὰ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ᾷ'), - ('ᾼ', 'ᾼ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῇ'), - ('ῌ', 'ῌ'), - ('ῐ', 'ΐ'), - ('ῖ', 'ῗ'), - ('ῠ', 'ῧ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῷ'), - ('ῼ', 'ῼ'), - ('ⅎ', 'ⅎ'), - ('ⅰ', 'ⅿ'), - ('ↄ', 'ↄ'), - ('ⓐ', 'ⓩ'), - ('ⰰ', 'ⱟ'), - ('ⱡ', 'ⱡ'), - ('ⱥ', 'ⱦ'), - ('ⱨ', 'ⱨ'), - ('ⱪ', 'ⱪ'), - ('ⱬ', 'ⱬ'), - ('ⱳ', 'ⱳ'), - ('ⱶ', 'ⱶ'), - ('ⲁ', 'ⲁ'), - ('ⲃ', 'ⲃ'), - ('ⲅ', 'ⲅ'), - ('ⲇ', 'ⲇ'), - ('ⲉ', 'ⲉ'), - ('ⲋ', 'ⲋ'), - ('ⲍ', 'ⲍ'), - ('ⲏ', 'ⲏ'), - ('ⲑ', 'ⲑ'), - ('ⲓ', 'ⲓ'), - ('ⲕ', 'ⲕ'), - ('ⲗ', 'ⲗ'), - ('ⲙ', 'ⲙ'), - ('ⲛ', 'ⲛ'), - ('ⲝ', 'ⲝ'), - ('ⲟ', 'ⲟ'), - ('ⲡ', 'ⲡ'), - ('ⲣ', 'ⲣ'), - ('ⲥ', 'ⲥ'), - ('ⲧ', 'ⲧ'), - ('ⲩ', 'ⲩ'), - ('ⲫ', 'ⲫ'), - ('ⲭ', 'ⲭ'), - ('ⲯ', 'ⲯ'), - ('ⲱ', 'ⲱ'), - ('ⲳ', 'ⲳ'), - ('ⲵ', 'ⲵ'), - ('ⲷ', 'ⲷ'), - ('ⲹ', 'ⲹ'), - ('ⲻ', 'ⲻ'), - ('ⲽ', 'ⲽ'), - ('ⲿ', 'ⲿ'), - ('ⳁ', 'ⳁ'), - ('ⳃ', 'ⳃ'), - ('ⳅ', 'ⳅ'), - ('ⳇ', 'ⳇ'), - ('ⳉ', 'ⳉ'), - ('ⳋ', 'ⳋ'), - ('ⳍ', 'ⳍ'), - ('ⳏ', 'ⳏ'), - ('ⳑ', 'ⳑ'), - ('ⳓ', 'ⳓ'), - ('ⳕ', 'ⳕ'), - ('ⳗ', 'ⳗ'), - ('ⳙ', 'ⳙ'), - ('ⳛ', 'ⳛ'), - ('ⳝ', 'ⳝ'), - ('ⳟ', 'ⳟ'), - ('ⳡ', 'ⳡ'), - ('ⳣ', 'ⳣ'), - ('ⳬ', 'ⳬ'), - ('ⳮ', 'ⳮ'), - ('ⳳ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ꙁ', 'ꙁ'), - ('ꙃ', 'ꙃ'), - ('ꙅ', 'ꙅ'), - ('ꙇ', 'ꙇ'), - ('ꙉ', 'ꙉ'), - ('ꙋ', 'ꙋ'), - ('ꙍ', 'ꙍ'), - ('ꙏ', 'ꙏ'), - ('ꙑ', 'ꙑ'), - ('ꙓ', 'ꙓ'), - ('ꙕ', 'ꙕ'), - ('ꙗ', 'ꙗ'), - ('ꙙ', 'ꙙ'), - ('ꙛ', 'ꙛ'), - ('ꙝ', 'ꙝ'), - ('ꙟ', 'ꙟ'), - ('ꙡ', 'ꙡ'), - ('ꙣ', 'ꙣ'), - ('ꙥ', 'ꙥ'), - ('ꙧ', 'ꙧ'), - ('ꙩ', 'ꙩ'), - ('ꙫ', 'ꙫ'), - ('ꙭ', 'ꙭ'), - ('ꚁ', 'ꚁ'), - ('ꚃ', 'ꚃ'), - ('ꚅ', 'ꚅ'), - ('ꚇ', 'ꚇ'), - ('ꚉ', 'ꚉ'), - ('ꚋ', 'ꚋ'), - ('ꚍ', 'ꚍ'), - ('ꚏ', 'ꚏ'), - ('ꚑ', 'ꚑ'), - ('ꚓ', 'ꚓ'), - ('ꚕ', 'ꚕ'), - ('ꚗ', 'ꚗ'), - ('ꚙ', 'ꚙ'), - ('ꚛ', 'ꚛ'), - ('ꜣ', 'ꜣ'), - ('ꜥ', 'ꜥ'), - ('ꜧ', 'ꜧ'), - ('ꜩ', 'ꜩ'), - ('ꜫ', 'ꜫ'), - ('ꜭ', 'ꜭ'), - ('ꜯ', 'ꜯ'), - ('ꜳ', 'ꜳ'), - ('ꜵ', 'ꜵ'), - ('ꜷ', 'ꜷ'), - ('ꜹ', 'ꜹ'), - ('ꜻ', 'ꜻ'), - ('ꜽ', 'ꜽ'), - ('ꜿ', 'ꜿ'), - ('ꝁ', 'ꝁ'), - ('ꝃ', 'ꝃ'), - ('ꝅ', 'ꝅ'), - ('ꝇ', 'ꝇ'), - ('ꝉ', 'ꝉ'), - ('ꝋ', 'ꝋ'), - ('ꝍ', 'ꝍ'), - ('ꝏ', 'ꝏ'), - ('ꝑ', 'ꝑ'), - ('ꝓ', 'ꝓ'), - ('ꝕ', 'ꝕ'), - ('ꝗ', 'ꝗ'), - ('ꝙ', 'ꝙ'), - ('ꝛ', 'ꝛ'), - ('ꝝ', 'ꝝ'), - ('ꝟ', 'ꝟ'), - ('ꝡ', 'ꝡ'), - ('ꝣ', 'ꝣ'), - ('ꝥ', 'ꝥ'), - ('ꝧ', 'ꝧ'), - ('ꝩ', 'ꝩ'), - ('ꝫ', 'ꝫ'), - ('ꝭ', 'ꝭ'), - ('ꝯ', 'ꝯ'), - ('ꝺ', 'ꝺ'), - ('ꝼ', 'ꝼ'), - ('ꝿ', 'ꝿ'), - ('ꞁ', 'ꞁ'), - ('ꞃ', 'ꞃ'), - ('ꞅ', 'ꞅ'), - ('ꞇ', 'ꞇ'), - ('ꞌ', 'ꞌ'), - ('ꞑ', 'ꞑ'), - ('ꞓ', 'ꞔ'), - ('ꞗ', 'ꞗ'), - ('ꞙ', 'ꞙ'), - ('ꞛ', 'ꞛ'), - ('ꞝ', 'ꞝ'), - ('ꞟ', 'ꞟ'), - ('ꞡ', 'ꞡ'), - ('ꞣ', 'ꞣ'), - ('ꞥ', 'ꞥ'), - ('ꞧ', 'ꞧ'), - ('ꞩ', 'ꞩ'), - ('ꞵ', 'ꞵ'), - ('ꞷ', 'ꞷ'), - ('ꞹ', 'ꞹ'), - ('ꞻ', 'ꞻ'), - ('ꞽ', 'ꞽ'), - ('ꞿ', 'ꞿ'), - ('ꟁ', 'ꟁ'), - ('ꟃ', 'ꟃ'), - ('ꟈ', 'ꟈ'), - ('ꟊ', 'ꟊ'), - ('ꟍ', 'ꟍ'), - ('ꟑ', 'ꟑ'), - ('ꟗ', 'ꟗ'), - ('ꟙ', 'ꟙ'), - ('ꟛ', 'ꟛ'), - ('ꟶ', 'ꟶ'), - ('ꭓ', 'ꭓ'), - ('ꭰ', 'ꮿ'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('a', 'z'), - ('𐐨', '𐑏'), - ('𐓘', '𐓻'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐳀', '𐳲'), - ('𐵰', '𐶅'), - ('𑣀', '𑣟'), - ('𖹠', '𖹿'), - ('𞤢', '𞥃'), -]; - -pub const DASH: &'static [(char, char)] = &[ - ('-', '-'), - ('֊', '֊'), - ('־', '־'), - ('᐀', '᐀'), - ('᠆', '᠆'), - ('‐', '―'), - ('⁓', '⁓'), - ('⁻', '⁻'), - ('₋', '₋'), - ('−', '−'), - ('⸗', '⸗'), - ('⸚', '⸚'), - ('⸺', '⸻'), - ('⹀', '⹀'), - ('⹝', '⹝'), - ('〜', '〜'), - ('〰', '〰'), - ('゠', '゠'), - ('︱', '︲'), - ('﹘', '﹘'), - ('﹣', '﹣'), - ('-', '-'), - ('𐵮', '𐵮'), - ('𐺭', '𐺭'), -]; - -pub const DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[ - ('\u{ad}', '\u{ad}'), - ('\u{34f}', '\u{34f}'), - ('\u{61c}', '\u{61c}'), - ('ᅟ', 'ᅠ'), - ('\u{17b4}', '\u{17b5}'), - ('\u{180b}', '\u{180f}'), - ('\u{200b}', '\u{200f}'), - ('\u{202a}', '\u{202e}'), - ('\u{2060}', '\u{206f}'), - ('ㅤ', 'ㅤ'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{feff}', '\u{feff}'), - ('ᅠ', 'ᅠ'), - ('\u{fff0}', '\u{fff8}'), - ('\u{1bca0}', '\u{1bca3}'), - ('\u{1d173}', '\u{1d17a}'), - ('\u{e0000}', '\u{e0fff}'), -]; - -pub const DEPRECATED: &'static [(char, char)] = &[ - ('ʼn', 'ʼn'), - ('ٳ', 'ٳ'), - ('\u{f77}', '\u{f77}'), - ('\u{f79}', '\u{f79}'), - ('ឣ', 'ឤ'), - ('\u{206a}', '\u{206f}'), - ('〈', '〉'), - ('\u{e0001}', '\u{e0001}'), -]; - -pub const DIACRITIC: &'static [(char, char)] = &[ - ('^', '^'), - ('`', '`'), - ('¨', '¨'), - ('¯', '¯'), - ('´', '´'), - ('·', '¸'), - ('ʰ', '\u{34e}'), - ('\u{350}', '\u{357}'), - ('\u{35d}', '\u{362}'), - ('ʹ', '͵'), - ('ͺ', 'ͺ'), - ('΄', '΅'), - ('\u{483}', '\u{487}'), - ('ՙ', 'ՙ'), - ('\u{591}', '\u{5a1}'), - ('\u{5a3}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c4}'), - ('\u{64b}', '\u{652}'), - ('\u{657}', '\u{658}'), - ('\u{6df}', '\u{6e0}'), - ('ۥ', 'ۦ'), - ('\u{6ea}', '\u{6ec}'), - ('\u{730}', '\u{74a}'), - ('\u{7a6}', '\u{7b0}'), - ('\u{7eb}', 'ߵ'), - ('\u{818}', '\u{819}'), - ('\u{898}', '\u{89f}'), - ('ࣉ', '\u{8d2}'), - ('\u{8e3}', '\u{8fe}'), - ('\u{93c}', '\u{93c}'), - ('\u{94d}', '\u{94d}'), - ('\u{951}', '\u{954}'), - ('ॱ', 'ॱ'), - ('\u{9bc}', '\u{9bc}'), - ('\u{9cd}', '\u{9cd}'), - ('\u{a3c}', '\u{a3c}'), - ('\u{a4d}', '\u{a4d}'), - ('\u{abc}', '\u{abc}'), - ('\u{acd}', '\u{acd}'), - ('\u{afd}', '\u{aff}'), - ('\u{b3c}', '\u{b3c}'), - ('\u{b4d}', '\u{b4d}'), - ('\u{b55}', '\u{b55}'), - ('\u{bcd}', '\u{bcd}'), - ('\u{c3c}', '\u{c3c}'), - ('\u{c4d}', '\u{c4d}'), - ('\u{cbc}', '\u{cbc}'), - ('\u{ccd}', '\u{ccd}'), - ('\u{d3b}', '\u{d3c}'), - ('\u{d4d}', '\u{d4d}'), - ('\u{dca}', '\u{dca}'), - ('\u{e3a}', '\u{e3a}'), - ('\u{e47}', '\u{e4c}'), - ('\u{e4e}', '\u{e4e}'), - ('\u{eba}', '\u{eba}'), - ('\u{ec8}', '\u{ecc}'), - ('\u{f18}', '\u{f19}'), - ('\u{f35}', '\u{f35}'), - ('\u{f37}', '\u{f37}'), - ('\u{f39}', '\u{f39}'), - ('༾', '༿'), - ('\u{f82}', '\u{f84}'), - ('\u{f86}', '\u{f87}'), - ('\u{fc6}', '\u{fc6}'), - ('\u{1037}', '\u{1037}'), - ('\u{1039}', '\u{103a}'), - ('ၣ', 'ၤ'), - ('ၩ', 'ၭ'), - ('ႇ', '\u{108d}'), - ('ႏ', 'ႏ'), - ('ႚ', 'ႛ'), - ('\u{135d}', '\u{135f}'), - ('\u{1714}', '\u{1715}'), - ('\u{1734}', '\u{1734}'), - ('\u{17c9}', '\u{17d3}'), - ('\u{17dd}', '\u{17dd}'), - ('\u{1939}', '\u{193b}'), - ('\u{1a60}', '\u{1a60}'), - ('\u{1a75}', '\u{1a7c}'), - ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1abe}'), - ('\u{1ac1}', '\u{1acb}'), - ('\u{1b34}', '\u{1b34}'), - ('\u{1b44}', '\u{1b44}'), - ('\u{1b6b}', '\u{1b73}'), - ('\u{1baa}', '\u{1bab}'), - ('\u{1be6}', '\u{1be6}'), - ('\u{1bf2}', '\u{1bf3}'), - ('\u{1c36}', '\u{1c37}'), - ('ᱸ', 'ᱽ'), - ('\u{1cd0}', '\u{1ce8}'), - ('\u{1ced}', '\u{1ced}'), - ('\u{1cf4}', '\u{1cf4}'), - ('᳷', '\u{1cf9}'), - ('ᴬ', 'ᵪ'), - ('\u{1dc4}', '\u{1dcf}'), - ('\u{1df5}', '\u{1dff}'), - ('᾽', '᾽'), - ('᾿', '῁'), - ('῍', '῏'), - ('῝', '῟'), - ('῭', '`'), - ('´', '῾'), - ('\u{2cef}', '\u{2cf1}'), - ('ⸯ', 'ⸯ'), - ('\u{302a}', '\u{302f}'), - ('\u{3099}', '゜'), - ('ー', 'ー'), - ('\u{a66f}', '\u{a66f}'), - ('\u{a67c}', '\u{a67d}'), - ('ꙿ', 'ꙿ'), - ('ꚜ', 'ꚝ'), - ('\u{a6f0}', '\u{a6f1}'), - ('꜀', '꜡'), - ('ꞈ', '꞊'), - ('ꟸ', 'ꟹ'), - ('\u{a806}', '\u{a806}'), - ('\u{a82c}', '\u{a82c}'), - ('\u{a8c4}', '\u{a8c4}'), - ('\u{a8e0}', '\u{a8f1}'), - ('\u{a92b}', '꤮'), - ('\u{a953}', '\u{a953}'), - ('\u{a9b3}', '\u{a9b3}'), - ('\u{a9c0}', '\u{a9c0}'), - ('\u{a9e5}', '\u{a9e5}'), - ('ꩻ', 'ꩽ'), - ('\u{aabf}', 'ꫂ'), - ('\u{aaf6}', '\u{aaf6}'), - ('꭛', 'ꭟ'), - ('ꭩ', '꭫'), - ('꯬', '\u{abed}'), - ('\u{fb1e}', '\u{fb1e}'), - ('\u{fe20}', '\u{fe2f}'), - ('^', '^'), - ('`', '`'), - ('ー', 'ー'), - ('\u{ff9e}', '\u{ff9f}'), - (' ̄', ' ̄'), - ('\u{102e0}', '\u{102e0}'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '\u{10a3f}'), - ('\u{10ae5}', '\u{10ae6}'), - ('𐴢', '\u{10d27}'), - ('𐵎', '𐵎'), - ('\u{10d69}', '\u{10d6d}'), - ('\u{10efd}', '\u{10eff}'), - ('\u{10f46}', '\u{10f50}'), - ('\u{10f82}', '\u{10f85}'), - ('\u{11046}', '\u{11046}'), - ('\u{11070}', '\u{11070}'), - ('\u{110b9}', '\u{110ba}'), - ('\u{11133}', '\u{11134}'), - ('\u{11173}', '\u{11173}'), - ('\u{111c0}', '\u{111c0}'), - ('\u{111ca}', '\u{111cc}'), - ('\u{11235}', '\u{11236}'), - ('\u{112e9}', '\u{112ea}'), - ('\u{1133b}', '\u{1133c}'), - ('\u{1134d}', '\u{1134d}'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('\u{113ce}', '\u{113d0}'), - ('\u{113d2}', '𑏓'), - ('\u{113e1}', '\u{113e2}'), - ('\u{11442}', '\u{11442}'), - ('\u{11446}', '\u{11446}'), - ('\u{114c2}', '\u{114c3}'), - ('\u{115bf}', '\u{115c0}'), - ('\u{1163f}', '\u{1163f}'), - ('\u{116b6}', '\u{116b7}'), - ('\u{1172b}', '\u{1172b}'), - ('\u{11839}', '\u{1183a}'), - ('\u{1193d}', '\u{1193e}'), - ('\u{11943}', '\u{11943}'), - ('\u{119e0}', '\u{119e0}'), - ('\u{11a34}', '\u{11a34}'), - ('\u{11a47}', '\u{11a47}'), - ('\u{11a99}', '\u{11a99}'), - ('\u{11c3f}', '\u{11c3f}'), - ('\u{11d42}', '\u{11d42}'), - ('\u{11d44}', '\u{11d45}'), - ('\u{11d97}', '\u{11d97}'), - ('\u{11f41}', '\u{11f42}'), - ('\u{11f5a}', '\u{11f5a}'), - ('\u{13447}', '\u{13455}'), - ('\u{1612f}', '\u{1612f}'), - ('\u{16af0}', '\u{16af4}'), - ('\u{16b30}', '\u{16b36}'), - ('𖵫', '𖵬'), - ('\u{16f8f}', '𖾟'), - ('\u{16ff0}', '\u{16ff1}'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d167}', '\u{1d169}'), - ('\u{1d16d}', '\u{1d172}'), - ('\u{1d17b}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('𞀰', '𞁭'), - ('\u{1e130}', '\u{1e136}'), - ('\u{1e2ae}', '\u{1e2ae}'), - ('\u{1e2ec}', '\u{1e2ef}'), - ('\u{1e5ee}', '\u{1e5ef}'), - ('\u{1e8d0}', '\u{1e8d6}'), - ('\u{1e944}', '\u{1e946}'), - ('\u{1e948}', '\u{1e94a}'), -]; - -pub const EMOJI: &'static [(char, char)] = &[ - ('#', '#'), - ('*', '*'), - ('0', '9'), - ('©', '©'), - ('®', '®'), - ('‼', '‼'), - ('⁉', '⁉'), - ('™', '™'), - ('ℹ', 'ℹ'), - ('↔', '↙'), - ('↩', '↪'), - ('⌚', '⌛'), - ('⌨', '⌨'), - ('⏏', '⏏'), - ('⏩', '⏳'), - ('⏸', '⏺'), - ('Ⓜ', 'Ⓜ'), - ('▪', '▫'), - ('▶', '▶'), - ('◀', '◀'), - ('◻', '◾'), - ('☀', '☄'), - ('☎', '☎'), - ('☑', '☑'), - ('☔', '☕'), - ('☘', '☘'), - ('☝', '☝'), - ('☠', '☠'), - ('☢', '☣'), - ('☦', '☦'), - ('☪', '☪'), - ('☮', '☯'), - ('☸', '☺'), - ('♀', '♀'), - ('♂', '♂'), - ('♈', '♓'), - ('♟', '♠'), - ('♣', '♣'), - ('♥', '♦'), - ('♨', '♨'), - ('♻', '♻'), - ('♾', '♿'), - ('⚒', '⚗'), - ('⚙', '⚙'), - ('⚛', '⚜'), - ('⚠', '⚡'), - ('⚧', '⚧'), - ('⚪', '⚫'), - ('⚰', '⚱'), - ('⚽', '⚾'), - ('⛄', '⛅'), - ('⛈', '⛈'), - ('⛎', '⛏'), - ('⛑', '⛑'), - ('⛓', '⛔'), - ('⛩', '⛪'), - ('⛰', '⛵'), - ('⛷', '⛺'), - ('⛽', '⛽'), - ('✂', '✂'), - ('✅', '✅'), - ('✈', '✍'), - ('✏', '✏'), - ('✒', '✒'), - ('✔', '✔'), - ('✖', '✖'), - ('✝', '✝'), - ('✡', '✡'), - ('✨', '✨'), - ('✳', '✴'), - ('❄', '❄'), - ('❇', '❇'), - ('❌', '❌'), - ('❎', '❎'), - ('❓', '❕'), - ('❗', '❗'), - ('❣', '❤'), - ('➕', '➗'), - ('➡', '➡'), - ('➰', '➰'), - ('➿', '➿'), - ('⤴', '⤵'), - ('⬅', '⬇'), - ('⬛', '⬜'), - ('⭐', '⭐'), - ('⭕', '⭕'), - ('〰', '〰'), - ('〽', '〽'), - ('㊗', '㊗'), - ('㊙', '㊙'), - ('🀄', '🀄'), - ('🃏', '🃏'), - ('🅰', '🅱'), - ('🅾', '🅿'), - ('🆎', '🆎'), - ('🆑', '🆚'), - ('🇦', '🇿'), - ('🈁', '🈂'), - ('🈚', '🈚'), - ('🈯', '🈯'), - ('🈲', '🈺'), - ('🉐', '🉑'), - ('🌀', '🌡'), - ('🌤', '🎓'), - ('🎖', '🎗'), - ('🎙', '🎛'), - ('🎞', '🏰'), - ('🏳', '🏵'), - ('🏷', '📽'), - ('📿', '🔽'), - ('🕉', '🕎'), - ('🕐', '🕧'), - ('🕯', '🕰'), - ('🕳', '🕺'), - ('🖇', '🖇'), - ('🖊', '🖍'), - ('🖐', '🖐'), - ('🖕', '🖖'), - ('🖤', '🖥'), - ('🖨', '🖨'), - ('🖱', '🖲'), - ('🖼', '🖼'), - ('🗂', '🗄'), - ('🗑', '🗓'), - ('🗜', '🗞'), - ('🗡', '🗡'), - ('🗣', '🗣'), - ('🗨', '🗨'), - ('🗯', '🗯'), - ('🗳', '🗳'), - ('🗺', '🙏'), - ('🚀', '🛅'), - ('🛋', '🛒'), - ('🛕', '🛗'), - ('🛜', '🛥'), - ('🛩', '🛩'), - ('🛫', '🛬'), - ('🛰', '🛰'), - ('🛳', '🛼'), - ('🟠', '🟫'), - ('🟰', '🟰'), - ('🤌', '🤺'), - ('🤼', '🥅'), - ('🥇', '🧿'), - ('🩰', '🩼'), - ('🪀', '🪉'), - ('🪏', '🫆'), - ('🫎', '🫜'), - ('🫟', '🫩'), - ('🫰', '🫸'), -]; - -pub const EMOJI_COMPONENT: &'static [(char, char)] = &[ - ('#', '#'), - ('*', '*'), - ('0', '9'), - ('\u{200d}', '\u{200d}'), - ('\u{20e3}', '\u{20e3}'), - ('\u{fe0f}', '\u{fe0f}'), - ('🇦', '🇿'), - ('🏻', '🏿'), - ('🦰', '🦳'), - ('\u{e0020}', '\u{e007f}'), -]; - -pub const EMOJI_MODIFIER: &'static [(char, char)] = &[('🏻', '🏿')]; - -pub const EMOJI_MODIFIER_BASE: &'static [(char, char)] = &[ - ('☝', '☝'), - ('⛹', '⛹'), - ('✊', '✍'), - ('🎅', '🎅'), - ('🏂', '🏄'), - ('🏇', '🏇'), - ('🏊', '🏌'), - ('👂', '👃'), - ('👆', '👐'), - ('👦', '👸'), - ('👼', '👼'), - ('💁', '💃'), - ('💅', '💇'), - ('💏', '💏'), - ('💑', '💑'), - ('💪', '💪'), - ('🕴', '🕵'), - ('🕺', '🕺'), - ('🖐', '🖐'), - ('🖕', '🖖'), - ('🙅', '🙇'), - ('🙋', '🙏'), - ('🚣', '🚣'), - ('🚴', '🚶'), - ('🛀', '🛀'), - ('🛌', '🛌'), - ('🤌', '🤌'), - ('🤏', '🤏'), - ('🤘', '🤟'), - ('🤦', '🤦'), - ('🤰', '🤹'), - ('🤼', '🤾'), - ('🥷', '🥷'), - ('🦵', '🦶'), - ('🦸', '🦹'), - ('🦻', '🦻'), - ('🧍', '🧏'), - ('🧑', '🧝'), - ('🫃', '🫅'), - ('🫰', '🫸'), -]; - -pub const EMOJI_PRESENTATION: &'static [(char, char)] = &[ - ('⌚', '⌛'), - ('⏩', '⏬'), - ('⏰', '⏰'), - ('⏳', '⏳'), - ('◽', '◾'), - ('☔', '☕'), - ('♈', '♓'), - ('♿', '♿'), - ('⚓', '⚓'), - ('⚡', '⚡'), - ('⚪', '⚫'), - ('⚽', '⚾'), - ('⛄', '⛅'), - ('⛎', '⛎'), - ('⛔', '⛔'), - ('⛪', '⛪'), - ('⛲', '⛳'), - ('⛵', '⛵'), - ('⛺', '⛺'), - ('⛽', '⛽'), - ('✅', '✅'), - ('✊', '✋'), - ('✨', '✨'), - ('❌', '❌'), - ('❎', '❎'), - ('❓', '❕'), - ('❗', '❗'), - ('➕', '➗'), - ('➰', '➰'), - ('➿', '➿'), - ('⬛', '⬜'), - ('⭐', '⭐'), - ('⭕', '⭕'), - ('🀄', '🀄'), - ('🃏', '🃏'), - ('🆎', '🆎'), - ('🆑', '🆚'), - ('🇦', '🇿'), - ('🈁', '🈁'), - ('🈚', '🈚'), - ('🈯', '🈯'), - ('🈲', '🈶'), - ('🈸', '🈺'), - ('🉐', '🉑'), - ('🌀', '🌠'), - ('🌭', '🌵'), - ('🌷', '🍼'), - ('🍾', '🎓'), - ('🎠', '🏊'), - ('🏏', '🏓'), - ('🏠', '🏰'), - ('🏴', '🏴'), - ('🏸', '🐾'), - ('👀', '👀'), - ('👂', '📼'), - ('📿', '🔽'), - ('🕋', '🕎'), - ('🕐', '🕧'), - ('🕺', '🕺'), - ('🖕', '🖖'), - ('🖤', '🖤'), - ('🗻', '🙏'), - ('🚀', '🛅'), - ('🛌', '🛌'), - ('🛐', '🛒'), - ('🛕', '🛗'), - ('🛜', '🛟'), - ('🛫', '🛬'), - ('🛴', '🛼'), - ('🟠', '🟫'), - ('🟰', '🟰'), - ('🤌', '🤺'), - ('🤼', '🥅'), - ('🥇', '🧿'), - ('🩰', '🩼'), - ('🪀', '🪉'), - ('🪏', '🫆'), - ('🫎', '🫜'), - ('🫟', '🫩'), - ('🫰', '🫸'), -]; - -pub const EXTENDED_PICTOGRAPHIC: &'static [(char, char)] = &[ - ('©', '©'), - ('®', '®'), - ('‼', '‼'), - ('⁉', '⁉'), - ('™', '™'), - ('ℹ', 'ℹ'), - ('↔', '↙'), - ('↩', '↪'), - ('⌚', '⌛'), - ('⌨', '⌨'), - ('⎈', '⎈'), - ('⏏', '⏏'), - ('⏩', '⏳'), - ('⏸', '⏺'), - ('Ⓜ', 'Ⓜ'), - ('▪', '▫'), - ('▶', '▶'), - ('◀', '◀'), - ('◻', '◾'), - ('☀', '★'), - ('☇', '☒'), - ('☔', '⚅'), - ('⚐', '✅'), - ('✈', '✒'), - ('✔', '✔'), - ('✖', '✖'), - ('✝', '✝'), - ('✡', '✡'), - ('✨', '✨'), - ('✳', '✴'), - ('❄', '❄'), - ('❇', '❇'), - ('❌', '❌'), - ('❎', '❎'), - ('❓', '❕'), - ('❗', '❗'), - ('❣', '❧'), - ('➕', '➗'), - ('➡', '➡'), - ('➰', '➰'), - ('➿', '➿'), - ('⤴', '⤵'), - ('⬅', '⬇'), - ('⬛', '⬜'), - ('⭐', '⭐'), - ('⭕', '⭕'), - ('〰', '〰'), - ('〽', '〽'), - ('㊗', '㊗'), - ('㊙', '㊙'), - ('🀀', '\u{1f0ff}'), - ('🄍', '🄏'), - ('🄯', '🄯'), - ('🅬', '🅱'), - ('🅾', '🅿'), - ('🆎', '🆎'), - ('🆑', '🆚'), - ('🆭', '\u{1f1e5}'), - ('🈁', '\u{1f20f}'), - ('🈚', '🈚'), - ('🈯', '🈯'), - ('🈲', '🈺'), - ('\u{1f23c}', '\u{1f23f}'), - ('\u{1f249}', '🏺'), - ('🐀', '🔽'), - ('🕆', '🙏'), - ('🚀', '\u{1f6ff}'), - ('🝴', '🝿'), - ('🟕', '\u{1f7ff}'), - ('\u{1f80c}', '\u{1f80f}'), - ('\u{1f848}', '\u{1f84f}'), - ('\u{1f85a}', '\u{1f85f}'), - ('\u{1f888}', '\u{1f88f}'), - ('\u{1f8ae}', '\u{1f8ff}'), - ('🤌', '🤺'), - ('🤼', '🥅'), - ('🥇', '\u{1faff}'), - ('\u{1fc00}', '\u{1fffd}'), -]; - -pub const EXTENDER: &'static [(char, char)] = &[ - ('·', '·'), - ('ː', 'ˑ'), - ('ـ', 'ـ'), - ('ߺ', 'ߺ'), - ('\u{a71}', '\u{a71}'), - ('\u{afb}', '\u{afb}'), - ('\u{b55}', '\u{b55}'), - ('ๆ', 'ๆ'), - ('ໆ', 'ໆ'), - ('᠊', '᠊'), - ('ᡃ', 'ᡃ'), - ('ᪧ', 'ᪧ'), - ('\u{1c36}', '\u{1c36}'), - ('ᱻ', 'ᱻ'), - ('々', '々'), - ('〱', '〵'), - ('ゝ', 'ゞ'), - ('ー', 'ヾ'), - ('ꀕ', 'ꀕ'), - ('ꘌ', 'ꘌ'), - ('ꧏ', 'ꧏ'), - ('ꧦ', 'ꧦ'), - ('ꩰ', 'ꩰ'), - ('ꫝ', 'ꫝ'), - ('ꫳ', 'ꫴ'), - ('ー', 'ー'), - ('𐞁', '𐞂'), - ('𐵎', '𐵎'), - ('\u{10d6a}', '\u{10d6a}'), - ('𐵯', '𐵯'), - ('\u{11237}', '\u{11237}'), - ('𑍝', '𑍝'), - ('\u{113d2}', '𑏓'), - ('𑗆', '𑗈'), - ('\u{11a98}', '\u{11a98}'), - ('𖭂', '𖭃'), - ('𖿠', '𖿡'), - ('𖿣', '𖿣'), - ('𞄼', '𞄽'), - ('\u{1e5ef}', '\u{1e5ef}'), - ('\u{1e944}', '\u{1e946}'), -]; - -pub const GRAPHEME_BASE: &'static [(char, char)] = &[ - (' ', '~'), - ('\u{a0}', '¬'), - ('®', '˿'), - ('Ͱ', 'ͷ'), - ('ͺ', 'Ϳ'), - ('΄', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', '҂'), - ('Ҋ', 'ԯ'), - ('Ա', 'Ֆ'), - ('ՙ', '֊'), - ('֍', '֏'), - ('־', '־'), - ('׀', '׀'), - ('׃', '׃'), - ('׆', '׆'), - ('א', 'ת'), - ('ׯ', '״'), - ('؆', '؏'), - ('؛', '؛'), - ('؝', 'ي'), - ('٠', 'ٯ'), - ('ٱ', 'ە'), - ('۞', '۞'), - ('ۥ', 'ۦ'), - ('۩', '۩'), - ('ۮ', '܍'), - ('ܐ', 'ܐ'), - ('ܒ', 'ܯ'), - ('ݍ', 'ޥ'), - ('ޱ', 'ޱ'), - ('߀', 'ߪ'), - ('ߴ', 'ߺ'), - ('߾', 'ࠕ'), - ('ࠚ', 'ࠚ'), - ('ࠤ', 'ࠤ'), - ('ࠨ', 'ࠨ'), - ('࠰', '࠾'), - ('ࡀ', 'ࡘ'), - ('࡞', '࡞'), - ('ࡠ', 'ࡪ'), - ('ࡰ', 'ࢎ'), - ('ࢠ', 'ࣉ'), - ('ः', 'ह'), - ('ऻ', 'ऻ'), - ('ऽ', 'ी'), - ('ॉ', 'ौ'), - ('ॎ', 'ॐ'), - ('क़', 'ॡ'), - ('।', 'ঀ'), - ('ং', 'ঃ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('ঽ', 'ঽ'), - ('ি', 'ী'), - ('ে', 'ৈ'), - ('ো', 'ৌ'), - ('ৎ', 'ৎ'), - ('ড়', 'ঢ়'), - ('য়', 'ৡ'), - ('০', '৽'), - ('ਃ', 'ਃ'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('ਾ', 'ੀ'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('੦', '੯'), - ('ੲ', 'ੴ'), - ('੶', '੶'), - ('ઃ', 'ઃ'), - ('અ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('ઽ', 'ી'), - ('ૉ', 'ૉ'), - ('ો', 'ૌ'), - ('ૐ', 'ૐ'), - ('ૠ', 'ૡ'), - ('૦', '૱'), - ('ૹ', 'ૹ'), - ('ଂ', 'ଃ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('ଽ', 'ଽ'), - ('ୀ', 'ୀ'), - ('େ', 'ୈ'), - ('ୋ', 'ୌ'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', 'ୡ'), - ('୦', '୷'), - ('ஃ', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'ஹ'), - ('ி', 'ி'), - ('ு', 'ூ'), - ('ெ', 'ை'), - ('ொ', 'ௌ'), - ('ௐ', 'ௐ'), - ('௦', '௺'), - ('ఁ', 'ః'), - ('అ', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'హ'), - ('ఽ', 'ఽ'), - ('ు', 'ౄ'), - ('ౘ', 'ౚ'), - ('ౝ', 'ౝ'), - ('ౠ', 'ౡ'), - ('౦', '౯'), - ('౷', 'ಀ'), - ('ಂ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('ಽ', 'ಾ'), - ('ು', 'ು'), - ('ೃ', 'ೄ'), - ('ೝ', 'ೞ'), - ('ೠ', 'ೡ'), - ('೦', '೯'), - ('ೱ', 'ೳ'), - ('ം', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', 'ഺ'), - ('ഽ', 'ഽ'), - ('ി', 'ീ'), - ('െ', 'ൈ'), - ('ൊ', 'ൌ'), - ('ൎ', '൏'), - ('ൔ', 'ൖ'), - ('൘', 'ൡ'), - ('൦', 'ൿ'), - ('ං', 'ඃ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('ැ', 'ෑ'), - ('ෘ', 'ෞ'), - ('෦', '෯'), - ('ෲ', '෴'), - ('ก', 'ะ'), - ('า', 'ำ'), - ('฿', 'ๆ'), - ('๏', '๛'), - ('ກ', 'ຂ'), - ('ຄ', 'ຄ'), - ('ຆ', 'ຊ'), - ('ຌ', 'ຣ'), - ('ລ', 'ລ'), - ('ວ', 'ະ'), - ('າ', 'ຳ'), - ('ຽ', 'ຽ'), - ('ເ', 'ໄ'), - ('ໆ', 'ໆ'), - ('໐', '໙'), - ('ໜ', 'ໟ'), - ('ༀ', '༗'), - ('༚', '༴'), - ('༶', '༶'), - ('༸', '༸'), - ('༺', 'ཇ'), - ('ཉ', 'ཬ'), - ('ཿ', 'ཿ'), - ('྅', '྅'), - ('ྈ', 'ྌ'), - ('྾', '࿅'), - ('࿇', '࿌'), - ('࿎', '࿚'), - ('က', 'ာ'), - ('ေ', 'ေ'), - ('း', 'း'), - ('ျ', 'ြ'), - ('ဿ', 'ၗ'), - ('ၚ', 'ၝ'), - ('ၡ', 'ၰ'), - ('ၵ', 'ႁ'), - ('ႃ', 'ႄ'), - ('ႇ', 'ႌ'), - ('ႎ', 'ႜ'), - ('႞', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዖ'), - ('ዘ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ፚ'), - ('፠', '፼'), - ('ᎀ', '᎙'), - ('Ꭰ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('᐀', '᚜'), - ('ᚠ', 'ᛸ'), - ('ᜀ', 'ᜑ'), - ('ᜟ', 'ᜱ'), - ('᜵', '᜶'), - ('ᝀ', 'ᝑ'), - ('ᝠ', 'ᝬ'), - ('ᝮ', 'ᝰ'), - ('ក', 'ឳ'), - ('ា', 'ា'), - ('ើ', 'ៅ'), - ('ះ', 'ៈ'), - ('។', 'ៜ'), - ('០', '៩'), - ('៰', '៹'), - ('᠀', '᠊'), - ('᠐', '᠙'), - ('ᠠ', 'ᡸ'), - ('ᢀ', 'ᢄ'), - ('ᢇ', 'ᢨ'), - ('ᢪ', 'ᢪ'), - ('ᢰ', 'ᣵ'), - ('ᤀ', 'ᤞ'), - ('ᤣ', 'ᤦ'), - ('ᤩ', 'ᤫ'), - ('ᤰ', 'ᤱ'), - ('ᤳ', 'ᤸ'), - ('᥀', '᥀'), - ('᥄', 'ᥭ'), - ('ᥰ', 'ᥴ'), - ('ᦀ', 'ᦫ'), - ('ᦰ', 'ᧉ'), - ('᧐', '᧚'), - ('᧞', 'ᨖ'), - ('ᨙ', 'ᨚ'), - ('᨞', 'ᩕ'), - ('ᩗ', 'ᩗ'), - ('ᩡ', 'ᩡ'), - ('ᩣ', 'ᩤ'), - ('ᩭ', 'ᩲ'), - ('᪀', '᪉'), - ('᪐', '᪙'), - ('᪠', '᪭'), - ('ᬄ', 'ᬳ'), - ('ᬾ', 'ᭁ'), - ('ᭅ', 'ᭌ'), - ('᭎', '᭪'), - ('᭴', '᭿'), - ('ᮂ', 'ᮡ'), - ('ᮦ', 'ᮧ'), - ('ᮮ', 'ᯥ'), - ('ᯧ', 'ᯧ'), - ('ᯪ', 'ᯬ'), - ('ᯮ', 'ᯮ'), - ('᯼', 'ᰫ'), - ('ᰴ', 'ᰵ'), - ('᰻', '᱉'), - ('ᱍ', 'ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', '᳇'), - ('᳓', '᳓'), - ('᳡', '᳡'), - ('ᳩ', 'ᳬ'), - ('ᳮ', 'ᳳ'), - ('ᳵ', '᳷'), - ('ᳺ', 'ᳺ'), - ('ᴀ', 'ᶿ'), - ('Ḁ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ῄ'), - ('ῆ', 'ΐ'), - ('ῖ', 'Ί'), - ('῝', '`'), - ('ῲ', 'ῴ'), - ('ῶ', '῾'), - ('\u{2000}', '\u{200a}'), - ('‐', '‧'), - ('\u{202f}', '\u{205f}'), - ('⁰', 'ⁱ'), - ('⁴', '₎'), - ('ₐ', 'ₜ'), - ('₠', '⃀'), - ('℀', '↋'), - ('←', '␩'), - ('⑀', '⑊'), - ('①', '⭳'), - ('⭶', '⮕'), - ('⮗', 'ⳮ'), - ('Ⳳ', 'ⳳ'), - ('⳹', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ⴰ', 'ⵧ'), - ('ⵯ', '⵰'), - ('ⶀ', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('⸀', '⹝'), - ('⺀', '⺙'), - ('⺛', '⻳'), - ('⼀', '⿕'), - ('⿰', '〩'), - ('〰', '〿'), - ('ぁ', 'ゖ'), - ('゛', 'ヿ'), - ('ㄅ', 'ㄯ'), - ('ㄱ', 'ㆎ'), - ('㆐', '㇥'), - ('㇯', '㈞'), - ('㈠', 'ꒌ'), - ('꒐', '꓆'), - ('ꓐ', 'ꘫ'), - ('Ꙁ', 'ꙮ'), - ('꙳', '꙳'), - ('꙾', 'ꚝ'), - ('ꚠ', 'ꛯ'), - ('꛲', '꛷'), - ('꜀', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'Ƛ'), - ('ꟲ', 'ꠁ'), - ('ꠃ', 'ꠅ'), - ('ꠇ', 'ꠊ'), - ('ꠌ', 'ꠤ'), - ('ꠧ', '꠫'), - ('꠰', '꠹'), - ('ꡀ', '꡷'), - ('ꢀ', 'ꣃ'), - ('꣎', '꣙'), - ('ꣲ', 'ꣾ'), - ('꤀', 'ꤥ'), - ('꤮', 'ꥆ'), - ('ꥒ', 'ꥒ'), - ('꥟', 'ꥼ'), - ('ꦃ', 'ꦲ'), - ('ꦴ', 'ꦵ'), - ('ꦺ', 'ꦻ'), - ('ꦾ', 'ꦿ'), - ('꧁', '꧍'), - ('ꧏ', '꧙'), - ('꧞', 'ꧤ'), - ('ꧦ', 'ꧾ'), - ('ꨀ', 'ꨨ'), - ('ꨯ', 'ꨰ'), - ('ꨳ', 'ꨴ'), - ('ꩀ', 'ꩂ'), - ('ꩄ', 'ꩋ'), - ('ꩍ', 'ꩍ'), - ('꩐', '꩙'), - ('꩜', 'ꩻ'), - ('ꩽ', 'ꪯ'), - ('ꪱ', 'ꪱ'), - ('ꪵ', 'ꪶ'), - ('ꪹ', 'ꪽ'), - ('ꫀ', 'ꫀ'), - ('ꫂ', 'ꫂ'), - ('ꫛ', 'ꫫ'), - ('ꫮ', 'ꫵ'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('ꬰ', '꭫'), - ('ꭰ', 'ꯤ'), - ('ꯦ', 'ꯧ'), - ('ꯩ', '꯬'), - ('꯰', '꯹'), - ('가', '힣'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('豈', '舘'), - ('並', '龎'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('יִ', 'יִ'), - ('ײַ', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', '﯂'), - ('ﯓ', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('﷏', '﷏'), - ('ﷰ', '﷿'), - ('︐', '︙'), - ('︰', '﹒'), - ('﹔', '﹦'), - ('﹨', '﹫'), - ('ﹰ', 'ﹴ'), - ('ﹶ', 'ﻼ'), - ('!', 'ン'), - ('ᅠ', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), - ('¢', '₩'), - ('│', '○'), - ('', '�'), - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), - ('𐄀', '𐄂'), - ('𐄇', '𐄳'), - ('𐄷', '𐆎'), - ('𐆐', '𐆜'), - ('𐆠', '𐆠'), - ('𐇐', '𐇼'), - ('𐊀', '𐊜'), - ('𐊠', '𐋐'), - ('𐋡', '𐋻'), - ('𐌀', '𐌣'), - ('𐌭', '𐍊'), - ('𐍐', '𐍵'), - ('𐎀', '𐎝'), - ('𐎟', '𐏃'), - ('𐏈', '𐏕'), - ('𐐀', '𐒝'), - ('𐒠', '𐒩'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), - ('𐔀', '𐔧'), - ('𐔰', '𐕣'), - ('𐕯', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐗀', '𐗳'), - ('𐘀', '𐜶'), - ('𐝀', '𐝕'), - ('𐝠', '𐝧'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐠀', '𐠅'), - ('𐠈', '𐠈'), - ('𐠊', '𐠵'), - ('𐠷', '𐠸'), - ('𐠼', '𐠼'), - ('𐠿', '𐡕'), - ('𐡗', '𐢞'), - ('𐢧', '𐢯'), - ('𐣠', '𐣲'), - ('𐣴', '𐣵'), - ('𐣻', '𐤛'), - ('𐤟', '𐤹'), - ('𐤿', '𐤿'), - ('𐦀', '𐦷'), - ('𐦼', '𐧏'), - ('𐧒', '𐨀'), - ('𐨐', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨵'), - ('𐩀', '𐩈'), - ('𐩐', '𐩘'), - ('𐩠', '𐪟'), - ('𐫀', '𐫤'), - ('𐫫', '𐫶'), - ('𐬀', '𐬵'), - ('𐬹', '𐭕'), - ('𐭘', '𐭲'), - ('𐭸', '𐮑'), - ('𐮙', '𐮜'), - ('𐮩', '𐮯'), - ('𐰀', '𐱈'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐳺', '𐴣'), - ('𐴰', '𐴹'), - ('𐵀', '𐵥'), - ('𐵮', '𐶅'), - ('𐶎', '𐶏'), - ('𐹠', '𐹾'), - ('𐺀', '𐺩'), - ('𐺭', '𐺭'), - ('𐺰', '𐺱'), - ('𐻂', '𐻄'), - ('𐼀', '𐼧'), - ('𐼰', '𐽅'), - ('𐽑', '𐽙'), - ('𐽰', '𐾁'), - ('𐾆', '𐾉'), - ('𐾰', '𐿋'), - ('𐿠', '𐿶'), - ('𑀀', '𑀀'), - ('𑀂', '𑀷'), - ('𑁇', '𑁍'), - ('𑁒', '𑁯'), - ('𑁱', '𑁲'), - ('𑁵', '𑁵'), - ('𑂂', '𑂲'), - ('𑂷', '𑂸'), - ('𑂻', '𑂼'), - ('𑂾', '𑃁'), - ('𑃐', '𑃨'), - ('𑃰', '𑃹'), - ('𑄃', '𑄦'), - ('𑄬', '𑄬'), - ('𑄶', '𑅇'), - ('𑅐', '𑅲'), - ('𑅴', '𑅶'), - ('𑆂', '𑆵'), - ('𑆿', '𑆿'), - ('𑇁', '𑇈'), - ('𑇍', '𑇎'), - ('𑇐', '𑇟'), - ('𑇡', '𑇴'), - ('𑈀', '𑈑'), - ('𑈓', '𑈮'), - ('𑈲', '𑈳'), - ('𑈸', '𑈽'), - ('𑈿', '𑉀'), - ('𑊀', '𑊆'), - ('𑊈', '𑊈'), - ('𑊊', '𑊍'), - ('𑊏', '𑊝'), - ('𑊟', '𑊩'), - ('𑊰', '𑋞'), - ('𑋠', '𑋢'), - ('𑋰', '𑋹'), - ('𑌂', '𑌃'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('𑌽', '𑌽'), - ('𑌿', '𑌿'), - ('𑍁', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '𑍌'), - ('𑍐', '𑍐'), - ('𑍝', '𑍣'), - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '𑎷'), - ('𑎹', '𑎺'), - ('𑏊', '𑏊'), - ('𑏌', '𑏍'), - ('𑏑', '𑏑'), - ('𑏓', '𑏕'), - ('𑏗', '𑏘'), - ('𑐀', '𑐷'), - ('𑑀', '𑑁'), - ('𑑅', '𑑅'), - ('𑑇', '𑑛'), - ('𑑝', '𑑝'), - ('𑑟', '𑑡'), - ('𑒀', '𑒯'), - ('𑒱', '𑒲'), - ('𑒹', '𑒹'), - ('𑒻', '𑒼'), - ('𑒾', '𑒾'), - ('𑓁', '𑓁'), - ('𑓄', '𑓇'), - ('𑓐', '𑓙'), - ('𑖀', '𑖮'), - ('𑖰', '𑖱'), - ('𑖸', '𑖻'), - ('𑖾', '𑖾'), - ('𑗁', '𑗛'), - ('𑘀', '𑘲'), - ('𑘻', '𑘼'), - ('𑘾', '𑘾'), - ('𑙁', '𑙄'), - ('𑙐', '𑙙'), - ('𑙠', '𑙬'), - ('𑚀', '𑚪'), - ('𑚬', '𑚬'), - ('𑚮', '𑚯'), - ('𑚸', '𑚹'), - ('𑛀', '𑛉'), - ('𑛐', '𑛣'), - ('𑜀', '𑜚'), - ('𑜞', '𑜞'), - ('𑜠', '𑜡'), - ('𑜦', '𑜦'), - ('𑜰', '𑝆'), - ('𑠀', '𑠮'), - ('𑠸', '𑠸'), - ('𑠻', '𑠻'), - ('𑢠', '𑣲'), - ('𑣿', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤯'), - ('𑤱', '𑤵'), - ('𑤷', '𑤸'), - ('𑤿', '𑥂'), - ('𑥄', '𑥆'), - ('𑥐', '𑥙'), - ('𑦠', '𑦧'), - ('𑦪', '𑧓'), - ('𑧜', '𑧟'), - ('𑧡', '𑧤'), - ('𑨀', '𑨀'), - ('𑨋', '𑨲'), - ('𑨹', '𑨺'), - ('𑨿', '𑩆'), - ('𑩐', '𑩐'), - ('𑩗', '𑩘'), - ('𑩜', '𑪉'), - ('𑪗', '𑪗'), - ('𑪚', '𑪢'), - ('𑪰', '𑫸'), - ('𑬀', '𑬉'), - ('𑯀', '𑯡'), - ('𑯰', '𑯹'), - ('𑰀', '𑰈'), - ('𑰊', '𑰯'), - ('𑰾', '𑰾'), - ('𑱀', '𑱅'), - ('𑱐', '𑱬'), - ('𑱰', '𑲏'), - ('𑲩', '𑲩'), - ('𑲱', '𑲱'), - ('𑲴', '𑲴'), - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '𑴰'), - ('𑵆', '𑵆'), - ('𑵐', '𑵙'), - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶎'), - ('𑶓', '𑶔'), - ('𑶖', '𑶖'), - ('𑶘', '𑶘'), - ('𑶠', '𑶩'), - ('𑻠', '𑻲'), - ('𑻵', '𑻸'), - ('𑼂', '𑼐'), - ('𑼒', '𑼵'), - ('𑼾', '𑼿'), - ('𑽃', '𑽙'), - ('𑾰', '𑾰'), - ('𑿀', '𑿱'), - ('𑿿', '𒎙'), - ('𒐀', '𒑮'), - ('𒑰', '𒑴'), - ('𒒀', '𒕃'), - ('𒾐', '𒿲'), - ('𓀀', '𓐯'), - ('𓑁', '𓑆'), - ('𓑠', '𔏺'), - ('𔐀', '𔙆'), - ('𖄀', '𖄝'), - ('𖄪', '𖄬'), - ('𖄰', '𖄹'), - ('𖠀', '𖨸'), - ('𖩀', '𖩞'), - ('𖩠', '𖩩'), - ('𖩮', '𖪾'), - ('𖫀', '𖫉'), - ('𖫐', '𖫭'), - ('𖫵', '𖫵'), - ('𖬀', '𖬯'), - ('𖬷', '𖭅'), - ('𖭐', '𖭙'), - ('𖭛', '𖭡'), - ('𖭣', '𖭷'), - ('𖭽', '𖮏'), - ('𖵀', '𖵹'), - ('𖹀', '𖺚'), - ('𖼀', '𖽊'), - ('𖽐', '𖾇'), - ('𖾓', '𖾟'), - ('𖿠', '𖿣'), - ('𗀀', '𘟷'), - ('𘠀', '𘳕'), - ('𘳿', '𘴈'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𛀀', '𛄢'), - ('𛄲', '𛄲'), - ('𛅐', '𛅒'), - ('𛅕', '𛅕'), - ('𛅤', '𛅧'), - ('𛅰', '𛋻'), - ('𛰀', '𛱪'), - ('𛱰', '𛱼'), - ('𛲀', '𛲈'), - ('𛲐', '𛲙'), - ('𛲜', '𛲜'), - ('𛲟', '𛲟'), - ('𜰀', '𜳹'), - ('𜴀', '𜺳'), - ('𜽐', '𜿃'), - ('𝀀', '𝃵'), - ('𝄀', '𝄦'), - ('𝄩', '𝅘𝅥𝅲'), - ('𝅪', '𝅬'), - ('𝆃', '𝆄'), - ('𝆌', '𝆩'), - ('𝆮', '𝇪'), - ('𝈀', '𝉁'), - ('𝉅', '𝉅'), - ('𝋀', '𝋓'), - ('𝋠', '𝋳'), - ('𝌀', '𝍖'), - ('𝍠', '𝍸'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝟋'), - ('𝟎', '𝧿'), - ('𝨷', '𝨺'), - ('𝩭', '𝩴'), - ('𝩶', '𝪃'), - ('𝪅', '𝪋'), - ('𝼀', '𝼞'), - ('𝼥', '𝼪'), - ('𞀰', '𞁭'), - ('𞄀', '𞄬'), - ('𞄷', '𞄽'), - ('𞅀', '𞅉'), - ('𞅎', '𞅏'), - ('𞊐', '𞊭'), - ('𞋀', '𞋫'), - ('𞋰', '𞋹'), - ('𞋿', '𞋿'), - ('𞓐', '𞓫'), - ('𞓰', '𞓹'), - ('𞗐', '𞗭'), - ('𞗰', '𞗺'), - ('𞗿', '𞗿'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), - ('𞠀', '𞣄'), - ('𞣇', '𞣏'), - ('𞤀', '𞥃'), - ('𞥋', '𞥋'), - ('𞥐', '𞥙'), - ('𞥞', '𞥟'), - ('𞱱', '𞲴'), - ('𞴁', '𞴽'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('𞻰', '𞻱'), - ('🀀', '🀫'), - ('🀰', '🂓'), - ('🂠', '🂮'), - ('🂱', '🂿'), - ('🃁', '🃏'), - ('🃑', '🃵'), - ('🄀', '🆭'), - ('🇦', '🈂'), - ('🈐', '🈻'), - ('🉀', '🉈'), - ('🉐', '🉑'), - ('🉠', '🉥'), - ('🌀', '🛗'), - ('🛜', '🛬'), - ('🛰', '🛼'), - ('🜀', '🝶'), - ('🝻', '🟙'), - ('🟠', '🟫'), - ('🟰', '🟰'), - ('🠀', '🠋'), - ('🠐', '🡇'), - ('🡐', '🡙'), - ('🡠', '🢇'), - ('🢐', '🢭'), - ('🢰', '🢻'), - ('🣀', '🣁'), - ('🤀', '🩓'), - ('🩠', '🩭'), - ('🩰', '🩼'), - ('🪀', '🪉'), - ('🪏', '🫆'), - ('🫎', '🫜'), - ('🫟', '🫩'), - ('🫰', '🫸'), - ('🬀', '🮒'), - ('🮔', '🯹'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('丽', '𪘀'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), -]; - -pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ - ('\u{300}', '\u{36f}'), - ('\u{483}', '\u{489}'), - ('\u{591}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c5}'), - ('\u{5c7}', '\u{5c7}'), - ('\u{610}', '\u{61a}'), - ('\u{64b}', '\u{65f}'), - ('\u{670}', '\u{670}'), - ('\u{6d6}', '\u{6dc}'), - ('\u{6df}', '\u{6e4}'), - ('\u{6e7}', '\u{6e8}'), - ('\u{6ea}', '\u{6ed}'), - ('\u{711}', '\u{711}'), - ('\u{730}', '\u{74a}'), - ('\u{7a6}', '\u{7b0}'), - ('\u{7eb}', '\u{7f3}'), - ('\u{7fd}', '\u{7fd}'), - ('\u{816}', '\u{819}'), - ('\u{81b}', '\u{823}'), - ('\u{825}', '\u{827}'), - ('\u{829}', '\u{82d}'), - ('\u{859}', '\u{85b}'), - ('\u{897}', '\u{89f}'), - ('\u{8ca}', '\u{8e1}'), - ('\u{8e3}', '\u{902}'), - ('\u{93a}', '\u{93a}'), - ('\u{93c}', '\u{93c}'), - ('\u{941}', '\u{948}'), - ('\u{94d}', '\u{94d}'), - ('\u{951}', '\u{957}'), - ('\u{962}', '\u{963}'), - ('\u{981}', '\u{981}'), - ('\u{9bc}', '\u{9bc}'), - ('\u{9be}', '\u{9be}'), - ('\u{9c1}', '\u{9c4}'), - ('\u{9cd}', '\u{9cd}'), - ('\u{9d7}', '\u{9d7}'), - ('\u{9e2}', '\u{9e3}'), - ('\u{9fe}', '\u{9fe}'), - ('\u{a01}', '\u{a02}'), - ('\u{a3c}', '\u{a3c}'), - ('\u{a41}', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('\u{a51}', '\u{a51}'), - ('\u{a70}', '\u{a71}'), - ('\u{a75}', '\u{a75}'), - ('\u{a81}', '\u{a82}'), - ('\u{abc}', '\u{abc}'), - ('\u{ac1}', '\u{ac5}'), - ('\u{ac7}', '\u{ac8}'), - ('\u{acd}', '\u{acd}'), - ('\u{ae2}', '\u{ae3}'), - ('\u{afa}', '\u{aff}'), - ('\u{b01}', '\u{b01}'), - ('\u{b3c}', '\u{b3c}'), - ('\u{b3e}', '\u{b3f}'), - ('\u{b41}', '\u{b44}'), - ('\u{b4d}', '\u{b4d}'), - ('\u{b55}', '\u{b57}'), - ('\u{b62}', '\u{b63}'), - ('\u{b82}', '\u{b82}'), - ('\u{bbe}', '\u{bbe}'), - ('\u{bc0}', '\u{bc0}'), - ('\u{bcd}', '\u{bcd}'), - ('\u{bd7}', '\u{bd7}'), - ('\u{c00}', '\u{c00}'), - ('\u{c04}', '\u{c04}'), - ('\u{c3c}', '\u{c3c}'), - ('\u{c3e}', '\u{c40}'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('\u{c62}', '\u{c63}'), - ('\u{c81}', '\u{c81}'), - ('\u{cbc}', '\u{cbc}'), - ('\u{cbf}', '\u{cc0}'), - ('\u{cc2}', '\u{cc2}'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccd}'), - ('\u{cd5}', '\u{cd6}'), - ('\u{ce2}', '\u{ce3}'), - ('\u{d00}', '\u{d01}'), - ('\u{d3b}', '\u{d3c}'), - ('\u{d3e}', '\u{d3e}'), - ('\u{d41}', '\u{d44}'), - ('\u{d4d}', '\u{d4d}'), - ('\u{d57}', '\u{d57}'), - ('\u{d62}', '\u{d63}'), - ('\u{d81}', '\u{d81}'), - ('\u{dca}', '\u{dca}'), - ('\u{dcf}', '\u{dcf}'), - ('\u{dd2}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('\u{ddf}', '\u{ddf}'), - ('\u{e31}', '\u{e31}'), - ('\u{e34}', '\u{e3a}'), - ('\u{e47}', '\u{e4e}'), - ('\u{eb1}', '\u{eb1}'), - ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ece}'), - ('\u{f18}', '\u{f19}'), - ('\u{f35}', '\u{f35}'), - ('\u{f37}', '\u{f37}'), - ('\u{f39}', '\u{f39}'), - ('\u{f71}', '\u{f7e}'), - ('\u{f80}', '\u{f84}'), - ('\u{f86}', '\u{f87}'), - ('\u{f8d}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('\u{fc6}', '\u{fc6}'), - ('\u{102d}', '\u{1030}'), - ('\u{1032}', '\u{1037}'), - ('\u{1039}', '\u{103a}'), - ('\u{103d}', '\u{103e}'), - ('\u{1058}', '\u{1059}'), - ('\u{105e}', '\u{1060}'), - ('\u{1071}', '\u{1074}'), - ('\u{1082}', '\u{1082}'), - ('\u{1085}', '\u{1086}'), - ('\u{108d}', '\u{108d}'), - ('\u{109d}', '\u{109d}'), - ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1715}'), - ('\u{1732}', '\u{1734}'), - ('\u{1752}', '\u{1753}'), - ('\u{1772}', '\u{1773}'), - ('\u{17b4}', '\u{17b5}'), - ('\u{17b7}', '\u{17bd}'), - ('\u{17c6}', '\u{17c6}'), - ('\u{17c9}', '\u{17d3}'), - ('\u{17dd}', '\u{17dd}'), - ('\u{180b}', '\u{180d}'), - ('\u{180f}', '\u{180f}'), - ('\u{1885}', '\u{1886}'), - ('\u{18a9}', '\u{18a9}'), - ('\u{1920}', '\u{1922}'), - ('\u{1927}', '\u{1928}'), - ('\u{1932}', '\u{1932}'), - ('\u{1939}', '\u{193b}'), - ('\u{1a17}', '\u{1a18}'), - ('\u{1a1b}', '\u{1a1b}'), - ('\u{1a56}', '\u{1a56}'), - ('\u{1a58}', '\u{1a5e}'), - ('\u{1a60}', '\u{1a60}'), - ('\u{1a62}', '\u{1a62}'), - ('\u{1a65}', '\u{1a6c}'), - ('\u{1a73}', '\u{1a7c}'), - ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ace}'), - ('\u{1b00}', '\u{1b03}'), - ('\u{1b34}', '\u{1b3d}'), - ('\u{1b42}', '\u{1b44}'), - ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '\u{1b81}'), - ('\u{1ba2}', '\u{1ba5}'), - ('\u{1ba8}', '\u{1bad}'), - ('\u{1be6}', '\u{1be6}'), - ('\u{1be8}', '\u{1be9}'), - ('\u{1bed}', '\u{1bed}'), - ('\u{1bef}', '\u{1bf3}'), - ('\u{1c2c}', '\u{1c33}'), - ('\u{1c36}', '\u{1c37}'), - ('\u{1cd0}', '\u{1cd2}'), - ('\u{1cd4}', '\u{1ce0}'), - ('\u{1ce2}', '\u{1ce8}'), - ('\u{1ced}', '\u{1ced}'), - ('\u{1cf4}', '\u{1cf4}'), - ('\u{1cf8}', '\u{1cf9}'), - ('\u{1dc0}', '\u{1dff}'), - ('\u{200c}', '\u{200c}'), - ('\u{20d0}', '\u{20f0}'), - ('\u{2cef}', '\u{2cf1}'), - ('\u{2d7f}', '\u{2d7f}'), - ('\u{2de0}', '\u{2dff}'), - ('\u{302a}', '\u{302f}'), - ('\u{3099}', '\u{309a}'), - ('\u{a66f}', '\u{a672}'), - ('\u{a674}', '\u{a67d}'), - ('\u{a69e}', '\u{a69f}'), - ('\u{a6f0}', '\u{a6f1}'), - ('\u{a802}', '\u{a802}'), - ('\u{a806}', '\u{a806}'), - ('\u{a80b}', '\u{a80b}'), - ('\u{a825}', '\u{a826}'), - ('\u{a82c}', '\u{a82c}'), - ('\u{a8c4}', '\u{a8c5}'), - ('\u{a8e0}', '\u{a8f1}'), - ('\u{a8ff}', '\u{a8ff}'), - ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '\u{a951}'), - ('\u{a953}', '\u{a953}'), - ('\u{a980}', '\u{a982}'), - ('\u{a9b3}', '\u{a9b3}'), - ('\u{a9b6}', '\u{a9b9}'), - ('\u{a9bc}', '\u{a9bd}'), - ('\u{a9c0}', '\u{a9c0}'), - ('\u{a9e5}', '\u{a9e5}'), - ('\u{aa29}', '\u{aa2e}'), - ('\u{aa31}', '\u{aa32}'), - ('\u{aa35}', '\u{aa36}'), - ('\u{aa43}', '\u{aa43}'), - ('\u{aa4c}', '\u{aa4c}'), - ('\u{aa7c}', '\u{aa7c}'), - ('\u{aab0}', '\u{aab0}'), - ('\u{aab2}', '\u{aab4}'), - ('\u{aab7}', '\u{aab8}'), - ('\u{aabe}', '\u{aabf}'), - ('\u{aac1}', '\u{aac1}'), - ('\u{aaec}', '\u{aaed}'), - ('\u{aaf6}', '\u{aaf6}'), - ('\u{abe5}', '\u{abe5}'), - ('\u{abe8}', '\u{abe8}'), - ('\u{abed}', '\u{abed}'), - ('\u{fb1e}', '\u{fb1e}'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{fe20}', '\u{fe2f}'), - ('\u{ff9e}', '\u{ff9f}'), - ('\u{101fd}', '\u{101fd}'), - ('\u{102e0}', '\u{102e0}'), - ('\u{10376}', '\u{1037a}'), - ('\u{10a01}', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '\u{10a0f}'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '\u{10a3f}'), - ('\u{10ae5}', '\u{10ae6}'), - ('\u{10d24}', '\u{10d27}'), - ('\u{10d69}', '\u{10d6d}'), - ('\u{10eab}', '\u{10eac}'), - ('\u{10efc}', '\u{10eff}'), - ('\u{10f46}', '\u{10f50}'), - ('\u{10f82}', '\u{10f85}'), - ('\u{11001}', '\u{11001}'), - ('\u{11038}', '\u{11046}'), - ('\u{11070}', '\u{11070}'), - ('\u{11073}', '\u{11074}'), - ('\u{1107f}', '\u{11081}'), - ('\u{110b3}', '\u{110b6}'), - ('\u{110b9}', '\u{110ba}'), - ('\u{110c2}', '\u{110c2}'), - ('\u{11100}', '\u{11102}'), - ('\u{11127}', '\u{1112b}'), - ('\u{1112d}', '\u{11134}'), - ('\u{11173}', '\u{11173}'), - ('\u{11180}', '\u{11181}'), - ('\u{111b6}', '\u{111be}'), - ('\u{111c0}', '\u{111c0}'), - ('\u{111c9}', '\u{111cc}'), - ('\u{111cf}', '\u{111cf}'), - ('\u{1122f}', '\u{11231}'), - ('\u{11234}', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), - ('\u{11241}', '\u{11241}'), - ('\u{112df}', '\u{112df}'), - ('\u{112e3}', '\u{112ea}'), - ('\u{11300}', '\u{11301}'), - ('\u{1133b}', '\u{1133c}'), - ('\u{1133e}', '\u{1133e}'), - ('\u{11340}', '\u{11340}'), - ('\u{1134d}', '\u{1134d}'), - ('\u{11357}', '\u{11357}'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('\u{113b8}', '\u{113b8}'), - ('\u{113bb}', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '\u{113c9}'), - ('\u{113ce}', '\u{113d0}'), - ('\u{113d2}', '\u{113d2}'), - ('\u{113e1}', '\u{113e2}'), - ('\u{11438}', '\u{1143f}'), - ('\u{11442}', '\u{11444}'), - ('\u{11446}', '\u{11446}'), - ('\u{1145e}', '\u{1145e}'), - ('\u{114b0}', '\u{114b0}'), - ('\u{114b3}', '\u{114b8}'), - ('\u{114ba}', '\u{114ba}'), - ('\u{114bd}', '\u{114bd}'), - ('\u{114bf}', '\u{114c0}'), - ('\u{114c2}', '\u{114c3}'), - ('\u{115af}', '\u{115af}'), - ('\u{115b2}', '\u{115b5}'), - ('\u{115bc}', '\u{115bd}'), - ('\u{115bf}', '\u{115c0}'), - ('\u{115dc}', '\u{115dd}'), - ('\u{11633}', '\u{1163a}'), - ('\u{1163d}', '\u{1163d}'), - ('\u{1163f}', '\u{11640}'), - ('\u{116ab}', '\u{116ab}'), - ('\u{116ad}', '\u{116ad}'), - ('\u{116b0}', '\u{116b7}'), - ('\u{1171d}', '\u{1171d}'), - ('\u{1171f}', '\u{1171f}'), - ('\u{11722}', '\u{11725}'), - ('\u{11727}', '\u{1172b}'), - ('\u{1182f}', '\u{11837}'), - ('\u{11839}', '\u{1183a}'), - ('\u{11930}', '\u{11930}'), - ('\u{1193b}', '\u{1193e}'), - ('\u{11943}', '\u{11943}'), - ('\u{119d4}', '\u{119d7}'), - ('\u{119da}', '\u{119db}'), - ('\u{119e0}', '\u{119e0}'), - ('\u{11a01}', '\u{11a0a}'), - ('\u{11a33}', '\u{11a38}'), - ('\u{11a3b}', '\u{11a3e}'), - ('\u{11a47}', '\u{11a47}'), - ('\u{11a51}', '\u{11a56}'), - ('\u{11a59}', '\u{11a5b}'), - ('\u{11a8a}', '\u{11a96}'), - ('\u{11a98}', '\u{11a99}'), - ('\u{11c30}', '\u{11c36}'), - ('\u{11c38}', '\u{11c3d}'), - ('\u{11c3f}', '\u{11c3f}'), - ('\u{11c92}', '\u{11ca7}'), - ('\u{11caa}', '\u{11cb0}'), - ('\u{11cb2}', '\u{11cb3}'), - ('\u{11cb5}', '\u{11cb6}'), - ('\u{11d31}', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d45}'), - ('\u{11d47}', '\u{11d47}'), - ('\u{11d90}', '\u{11d91}'), - ('\u{11d95}', '\u{11d95}'), - ('\u{11d97}', '\u{11d97}'), - ('\u{11ef3}', '\u{11ef4}'), - ('\u{11f00}', '\u{11f01}'), - ('\u{11f36}', '\u{11f3a}'), - ('\u{11f40}', '\u{11f42}'), - ('\u{11f5a}', '\u{11f5a}'), - ('\u{13440}', '\u{13440}'), - ('\u{13447}', '\u{13455}'), - ('\u{1611e}', '\u{16129}'), - ('\u{1612d}', '\u{1612f}'), - ('\u{16af0}', '\u{16af4}'), - ('\u{16b30}', '\u{16b36}'), - ('\u{16f4f}', '\u{16f4f}'), - ('\u{16f8f}', '\u{16f92}'), - ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), - ('\u{1bc9d}', '\u{1bc9e}'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d165}', '\u{1d169}'), - ('\u{1d16d}', '\u{1d172}'), - ('\u{1d17b}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('\u{1d242}', '\u{1d244}'), - ('\u{1da00}', '\u{1da36}'), - ('\u{1da3b}', '\u{1da6c}'), - ('\u{1da75}', '\u{1da75}'), - ('\u{1da84}', '\u{1da84}'), - ('\u{1da9b}', '\u{1da9f}'), - ('\u{1daa1}', '\u{1daaf}'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('\u{1e08f}', '\u{1e08f}'), - ('\u{1e130}', '\u{1e136}'), - ('\u{1e2ae}', '\u{1e2ae}'), - ('\u{1e2ec}', '\u{1e2ef}'), - ('\u{1e4ec}', '\u{1e4ef}'), - ('\u{1e5ee}', '\u{1e5ef}'), - ('\u{1e8d0}', '\u{1e8d6}'), - ('\u{1e944}', '\u{1e94a}'), - ('\u{e0020}', '\u{e007f}'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const GRAPHEME_LINK: &'static [(char, char)] = &[ - ('\u{94d}', '\u{94d}'), - ('\u{9cd}', '\u{9cd}'), - ('\u{a4d}', '\u{a4d}'), - ('\u{acd}', '\u{acd}'), - ('\u{b4d}', '\u{b4d}'), - ('\u{bcd}', '\u{bcd}'), - ('\u{c4d}', '\u{c4d}'), - ('\u{ccd}', '\u{ccd}'), - ('\u{d3b}', '\u{d3c}'), - ('\u{d4d}', '\u{d4d}'), - ('\u{dca}', '\u{dca}'), - ('\u{e3a}', '\u{e3a}'), - ('\u{eba}', '\u{eba}'), - ('\u{f84}', '\u{f84}'), - ('\u{1039}', '\u{103a}'), - ('\u{1714}', '\u{1715}'), - ('\u{1734}', '\u{1734}'), - ('\u{17d2}', '\u{17d2}'), - ('\u{1a60}', '\u{1a60}'), - ('\u{1b44}', '\u{1b44}'), - ('\u{1baa}', '\u{1bab}'), - ('\u{1bf2}', '\u{1bf3}'), - ('\u{2d7f}', '\u{2d7f}'), - ('\u{a806}', '\u{a806}'), - ('\u{a82c}', '\u{a82c}'), - ('\u{a8c4}', '\u{a8c4}'), - ('\u{a953}', '\u{a953}'), - ('\u{a9c0}', '\u{a9c0}'), - ('\u{aaf6}', '\u{aaf6}'), - ('\u{abed}', '\u{abed}'), - ('\u{10a3f}', '\u{10a3f}'), - ('\u{11046}', '\u{11046}'), - ('\u{11070}', '\u{11070}'), - ('\u{1107f}', '\u{1107f}'), - ('\u{110b9}', '\u{110b9}'), - ('\u{11133}', '\u{11134}'), - ('\u{111c0}', '\u{111c0}'), - ('\u{11235}', '\u{11235}'), - ('\u{112ea}', '\u{112ea}'), - ('\u{1134d}', '\u{1134d}'), - ('\u{113ce}', '\u{113d0}'), - ('\u{11442}', '\u{11442}'), - ('\u{114c2}', '\u{114c2}'), - ('\u{115bf}', '\u{115bf}'), - ('\u{1163f}', '\u{1163f}'), - ('\u{116b6}', '\u{116b6}'), - ('\u{1172b}', '\u{1172b}'), - ('\u{11839}', '\u{11839}'), - ('\u{1193d}', '\u{1193e}'), - ('\u{119e0}', '\u{119e0}'), - ('\u{11a34}', '\u{11a34}'), - ('\u{11a47}', '\u{11a47}'), - ('\u{11a99}', '\u{11a99}'), - ('\u{11c3f}', '\u{11c3f}'), - ('\u{11d44}', '\u{11d45}'), - ('\u{11d97}', '\u{11d97}'), - ('\u{11f41}', '\u{11f42}'), - ('\u{1612f}', '\u{1612f}'), -]; - -pub const HEX_DIGIT: &'static [(char, char)] = &[ - ('0', '9'), - ('A', 'F'), - ('a', 'f'), - ('0', '9'), - ('A', 'F'), - ('a', 'f'), -]; - -pub const HYPHEN: &'static [(char, char)] = &[ - ('-', '-'), - ('\u{ad}', '\u{ad}'), - ('֊', '֊'), - ('᠆', '᠆'), - ('‐', '‑'), - ('⸗', '⸗'), - ('・', '・'), - ('﹣', '﹣'), - ('-', '-'), - ('・', '・'), -]; - -pub const IDS_BINARY_OPERATOR: &'static [(char, char)] = - &[('⿰', '⿱'), ('⿴', '⿽'), ('㇯', '㇯')]; - -pub const IDS_TRINARY_OPERATOR: &'static [(char, char)] = &[('⿲', '⿳')]; - -pub const IDS_UNARY_OPERATOR: &'static [(char, char)] = &[('⿾', '⿿')]; - -pub const ID_COMPAT_MATH_CONTINUE: &'static [(char, char)] = &[ - ('²', '³'), - ('¹', '¹'), - ('⁰', '⁰'), - ('⁴', '⁾'), - ('₀', '₎'), - ('∂', '∂'), - ('∇', '∇'), - ('∞', '∞'), - ('𝛁', '𝛁'), - ('𝛛', '𝛛'), - ('𝛻', '𝛻'), - ('𝜕', '𝜕'), - ('𝜵', '𝜵'), - ('𝝏', '𝝏'), - ('𝝯', '𝝯'), - ('𝞉', '𝞉'), - ('𝞩', '𝞩'), - ('𝟃', '𝟃'), -]; - -pub const ID_COMPAT_MATH_START: &'static [(char, char)] = &[ - ('∂', '∂'), - ('∇', '∇'), - ('∞', '∞'), - ('𝛁', '𝛁'), - ('𝛛', '𝛛'), - ('𝛻', '𝛻'), - ('𝜕', '𝜕'), - ('𝜵', '𝜵'), - ('𝝏', '𝝏'), - ('𝝯', '𝝯'), - ('𝞉', '𝞉'), - ('𝞩', '𝞩'), - ('𝟃', '𝟃'), -]; - -pub const ID_CONTINUE: &'static [(char, char)] = &[ - ('0', '9'), - ('A', 'Z'), - ('_', '_'), - ('a', 'z'), - ('ª', 'ª'), - ('µ', 'µ'), - ('·', '·'), - ('º', 'º'), - ('À', 'Ö'), - ('Ø', 'ö'), - ('ø', 'ˁ'), - ('ˆ', 'ˑ'), - ('ˠ', 'ˤ'), - ('ˬ', 'ˬ'), - ('ˮ', 'ˮ'), - ('\u{300}', 'ʹ'), - ('Ͷ', 'ͷ'), - ('ͺ', 'ͽ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ϵ'), - ('Ϸ', 'ҁ'), - ('\u{483}', '\u{487}'), - ('Ҋ', 'ԯ'), - ('Ա', 'Ֆ'), - ('ՙ', 'ՙ'), - ('ՠ', 'ֈ'), - ('\u{591}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c5}'), - ('\u{5c7}', '\u{5c7}'), - ('א', 'ת'), - ('ׯ', 'ײ'), - ('\u{610}', '\u{61a}'), - ('ؠ', '٩'), - ('ٮ', 'ۓ'), - ('ە', '\u{6dc}'), - ('\u{6df}', '\u{6e8}'), - ('\u{6ea}', 'ۼ'), - ('ۿ', 'ۿ'), - ('ܐ', '\u{74a}'), - ('ݍ', 'ޱ'), - ('߀', 'ߵ'), - ('ߺ', 'ߺ'), - ('\u{7fd}', '\u{7fd}'), - ('ࠀ', '\u{82d}'), - ('ࡀ', '\u{85b}'), - ('ࡠ', 'ࡪ'), - ('ࡰ', 'ࢇ'), - ('ࢉ', 'ࢎ'), - ('\u{897}', '\u{8e1}'), - ('\u{8e3}', '\u{963}'), - ('०', '९'), - ('ॱ', 'ঃ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('\u{9bc}', '\u{9c4}'), - ('ে', 'ৈ'), - ('ো', 'ৎ'), - ('\u{9d7}', '\u{9d7}'), - ('ড়', 'ঢ়'), - ('য়', '\u{9e3}'), - ('০', 'ৱ'), - ('ৼ', 'ৼ'), - ('\u{9fe}', '\u{9fe}'), - ('\u{a01}', 'ਃ'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('\u{a3c}', '\u{a3c}'), - ('ਾ', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('\u{a51}', '\u{a51}'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('੦', '\u{a75}'), - ('\u{a81}', 'ઃ'), - ('અ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('\u{abc}', '\u{ac5}'), - ('\u{ac7}', 'ૉ'), - ('ો', '\u{acd}'), - ('ૐ', 'ૐ'), - ('ૠ', '\u{ae3}'), - ('૦', '૯'), - ('ૹ', '\u{aff}'), - ('\u{b01}', 'ଃ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('\u{b3c}', '\u{b44}'), - ('େ', 'ୈ'), - ('ୋ', '\u{b4d}'), - ('\u{b55}', '\u{b57}'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', '\u{b63}'), - ('୦', '୯'), - ('ୱ', 'ୱ'), - ('\u{b82}', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'ஹ'), - ('\u{bbe}', 'ூ'), - ('ெ', 'ை'), - ('ொ', '\u{bcd}'), - ('ௐ', 'ௐ'), - ('\u{bd7}', '\u{bd7}'), - ('௦', '௯'), - ('\u{c00}', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'హ'), - ('\u{c3c}', 'ౄ'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('ౘ', 'ౚ'), - ('ౝ', 'ౝ'), - ('ౠ', '\u{c63}'), - ('౦', '౯'), - ('ಀ', 'ಃ'), - ('ಅ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('\u{cbc}', 'ೄ'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccd}'), - ('\u{cd5}', '\u{cd6}'), - ('ೝ', 'ೞ'), - ('ೠ', '\u{ce3}'), - ('೦', '೯'), - ('ೱ', 'ೳ'), - ('\u{d00}', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', '\u{d44}'), - ('െ', 'ൈ'), - ('ൊ', 'ൎ'), - ('ൔ', '\u{d57}'), - ('ൟ', '\u{d63}'), - ('൦', '൯'), - ('ൺ', 'ൿ'), - ('\u{d81}', 'ඃ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('\u{dca}', '\u{dca}'), - ('\u{dcf}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('ෘ', '\u{ddf}'), - ('෦', '෯'), - ('ෲ', 'ෳ'), - ('ก', '\u{e3a}'), - ('เ', '\u{e4e}'), - ('๐', '๙'), - ('ກ', 'ຂ'), - ('ຄ', 'ຄ'), - ('ຆ', 'ຊ'), - ('ຌ', 'ຣ'), - ('ລ', 'ລ'), - ('ວ', 'ຽ'), - ('ເ', 'ໄ'), - ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ece}'), - ('໐', '໙'), - ('ໜ', 'ໟ'), - ('ༀ', 'ༀ'), - ('\u{f18}', '\u{f19}'), - ('༠', '༩'), - ('\u{f35}', '\u{f35}'), - ('\u{f37}', '\u{f37}'), - ('\u{f39}', '\u{f39}'), - ('༾', 'ཇ'), - ('ཉ', 'ཬ'), - ('\u{f71}', '\u{f84}'), - ('\u{f86}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('\u{fc6}', '\u{fc6}'), - ('က', '၉'), - ('ၐ', '\u{109d}'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ჺ'), - ('ჼ', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዖ'), - ('ዘ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ፚ'), - ('\u{135d}', '\u{135f}'), - ('፩', '፱'), - ('ᎀ', 'ᎏ'), - ('Ꭰ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('ᐁ', 'ᙬ'), - ('ᙯ', 'ᙿ'), - ('ᚁ', 'ᚚ'), - ('ᚠ', 'ᛪ'), - ('ᛮ', 'ᛸ'), - ('ᜀ', '\u{1715}'), - ('ᜟ', '\u{1734}'), - ('ᝀ', '\u{1753}'), - ('ᝠ', 'ᝬ'), - ('ᝮ', 'ᝰ'), - ('\u{1772}', '\u{1773}'), - ('ក', '\u{17d3}'), - ('ៗ', 'ៗ'), - ('ៜ', '\u{17dd}'), - ('០', '៩'), - ('\u{180b}', '\u{180d}'), - ('\u{180f}', '᠙'), - ('ᠠ', 'ᡸ'), - ('ᢀ', 'ᢪ'), - ('ᢰ', 'ᣵ'), - ('ᤀ', 'ᤞ'), - ('\u{1920}', 'ᤫ'), - ('ᤰ', '\u{193b}'), - ('᥆', 'ᥭ'), - ('ᥰ', 'ᥴ'), - ('ᦀ', 'ᦫ'), - ('ᦰ', 'ᧉ'), - ('᧐', '᧚'), - ('ᨀ', '\u{1a1b}'), - ('ᨠ', '\u{1a5e}'), - ('\u{1a60}', '\u{1a7c}'), - ('\u{1a7f}', '᪉'), - ('᪐', '᪙'), - ('ᪧ', 'ᪧ'), - ('\u{1ab0}', '\u{1abd}'), - ('\u{1abf}', '\u{1ace}'), - ('\u{1b00}', 'ᭌ'), - ('᭐', '᭙'), - ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '\u{1bf3}'), - ('ᰀ', '\u{1c37}'), - ('᱀', '᱉'), - ('ᱍ', 'ᱽ'), - ('ᲀ', 'ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('\u{1cd0}', '\u{1cd2}'), - ('\u{1cd4}', 'ᳺ'), - ('ᴀ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ᾼ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῌ'), - ('ῐ', 'ΐ'), - ('ῖ', 'Ί'), - ('ῠ', 'Ῥ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῼ'), - ('\u{200c}', '\u{200d}'), - ('‿', '⁀'), - ('⁔', '⁔'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('\u{20d0}', '\u{20dc}'), - ('\u{20e1}', '\u{20e1}'), - ('\u{20e5}', '\u{20f0}'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℊ', 'ℓ'), - ('ℕ', 'ℕ'), - ('℘', 'ℝ'), - ('ℤ', 'ℤ'), - ('Ω', 'Ω'), - ('ℨ', 'ℨ'), - ('K', 'ℹ'), - ('ℼ', 'ℿ'), - ('ⅅ', 'ⅉ'), - ('ⅎ', 'ⅎ'), - ('Ⅰ', 'ↈ'), - ('Ⰰ', 'ⳤ'), - ('Ⳬ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ⴰ', 'ⵧ'), - ('ⵯ', 'ⵯ'), - ('\u{2d7f}', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('\u{2de0}', '\u{2dff}'), - ('々', '〇'), - ('〡', '\u{302f}'), - ('〱', '〵'), - ('〸', '〼'), - ('ぁ', 'ゖ'), - ('\u{3099}', 'ゟ'), - ('ァ', 'ヿ'), - ('ㄅ', 'ㄯ'), - ('ㄱ', 'ㆎ'), - ('ㆠ', 'ㆿ'), - ('ㇰ', 'ㇿ'), - ('㐀', '䶿'), - ('一', 'ꒌ'), - ('ꓐ', 'ꓽ'), - ('ꔀ', 'ꘌ'), - ('ꘐ', 'ꘫ'), - ('Ꙁ', '\u{a66f}'), - ('\u{a674}', '\u{a67d}'), - ('ꙿ', '\u{a6f1}'), - ('ꜗ', 'ꜟ'), - ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'Ƛ'), - ('ꟲ', 'ꠧ'), - ('\u{a82c}', '\u{a82c}'), - ('ꡀ', 'ꡳ'), - ('ꢀ', '\u{a8c5}'), - ('꣐', '꣙'), - ('\u{a8e0}', 'ꣷ'), - ('ꣻ', 'ꣻ'), - ('ꣽ', '\u{a92d}'), - ('ꤰ', '\u{a953}'), - ('ꥠ', 'ꥼ'), - ('\u{a980}', '\u{a9c0}'), - ('ꧏ', '꧙'), - ('ꧠ', 'ꧾ'), - ('ꨀ', '\u{aa36}'), - ('ꩀ', 'ꩍ'), - ('꩐', '꩙'), - ('ꩠ', 'ꩶ'), - ('ꩺ', 'ꫂ'), - ('ꫛ', 'ꫝ'), - ('ꫠ', 'ꫯ'), - ('ꫲ', '\u{aaf6}'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('ꬰ', 'ꭚ'), - ('ꭜ', 'ꭩ'), - ('ꭰ', 'ꯪ'), - ('꯬', '\u{abed}'), - ('꯰', '꯹'), - ('가', '힣'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('豈', '舘'), - ('並', '龎'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('יִ', 'ﬨ'), - ('שׁ', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', 'ﮱ'), - ('ﯓ', 'ﴽ'), - ('ﵐ', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('ﷰ', 'ﷻ'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{fe20}', '\u{fe2f}'), - ('︳', '︴'), - ('﹍', '﹏'), - ('ﹰ', 'ﹴ'), - ('ﹶ', 'ﻼ'), - ('0', '9'), - ('A', 'Z'), - ('_', '_'), - ('a', 'z'), - ('・', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), - ('𐅀', '𐅴'), - ('\u{101fd}', '\u{101fd}'), - ('𐊀', '𐊜'), - ('𐊠', '𐋐'), - ('\u{102e0}', '\u{102e0}'), - ('𐌀', '𐌟'), - ('𐌭', '𐍊'), - ('𐍐', '\u{1037a}'), - ('𐎀', '𐎝'), - ('𐎠', '𐏃'), - ('𐏈', '𐏏'), - ('𐏑', '𐏕'), - ('𐐀', '𐒝'), - ('𐒠', '𐒩'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), - ('𐔀', '𐔧'), - ('𐔰', '𐕣'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐗀', '𐗳'), - ('𐘀', '𐜶'), - ('𐝀', '𐝕'), - ('𐝠', '𐝧'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐠀', '𐠅'), - ('𐠈', '𐠈'), - ('𐠊', '𐠵'), - ('𐠷', '𐠸'), - ('𐠼', '𐠼'), - ('𐠿', '𐡕'), - ('𐡠', '𐡶'), - ('𐢀', '𐢞'), - ('𐣠', '𐣲'), - ('𐣴', '𐣵'), - ('𐤀', '𐤕'), - ('𐤠', '𐤹'), - ('𐦀', '𐦷'), - ('𐦾', '𐦿'), - ('𐨀', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨵'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '\u{10a3f}'), - ('𐩠', '𐩼'), - ('𐪀', '𐪜'), - ('𐫀', '𐫇'), - ('𐫉', '\u{10ae6}'), - ('𐬀', '𐬵'), - ('𐭀', '𐭕'), - ('𐭠', '𐭲'), - ('𐮀', '𐮑'), - ('𐰀', '𐱈'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐴀', '\u{10d27}'), - ('𐴰', '𐴹'), - ('𐵀', '𐵥'), - ('\u{10d69}', '\u{10d6d}'), - ('𐵯', '𐶅'), - ('𐺀', '𐺩'), - ('\u{10eab}', '\u{10eac}'), - ('𐺰', '𐺱'), - ('𐻂', '𐻄'), - ('\u{10efc}', '𐼜'), - ('𐼧', '𐼧'), - ('𐼰', '\u{10f50}'), - ('𐽰', '\u{10f85}'), - ('𐾰', '𐿄'), - ('𐿠', '𐿶'), - ('𑀀', '\u{11046}'), - ('𑁦', '𑁵'), - ('\u{1107f}', '\u{110ba}'), - ('\u{110c2}', '\u{110c2}'), - ('𑃐', '𑃨'), - ('𑃰', '𑃹'), - ('\u{11100}', '\u{11134}'), - ('𑄶', '𑄿'), - ('𑅄', '𑅇'), - ('𑅐', '\u{11173}'), - ('𑅶', '𑅶'), - ('\u{11180}', '𑇄'), - ('\u{111c9}', '\u{111cc}'), - ('𑇎', '𑇚'), - ('𑇜', '𑇜'), - ('𑈀', '𑈑'), - ('𑈓', '\u{11237}'), - ('\u{1123e}', '\u{11241}'), - ('𑊀', '𑊆'), - ('𑊈', '𑊈'), - ('𑊊', '𑊍'), - ('𑊏', '𑊝'), - ('𑊟', '𑊨'), - ('𑊰', '\u{112ea}'), - ('𑋰', '𑋹'), - ('\u{11300}', '𑌃'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('\u{1133b}', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '\u{1134d}'), - ('𑍐', '𑍐'), - ('\u{11357}', '\u{11357}'), - ('𑍝', '𑍣'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '𑏊'), - ('𑏌', '𑏓'), - ('\u{113e1}', '\u{113e2}'), - ('𑐀', '𑑊'), - ('𑑐', '𑑙'), - ('\u{1145e}', '𑑡'), - ('𑒀', '𑓅'), - ('𑓇', '𑓇'), - ('𑓐', '𑓙'), - ('𑖀', '\u{115b5}'), - ('𑖸', '\u{115c0}'), - ('𑗘', '\u{115dd}'), - ('𑘀', '\u{11640}'), - ('𑙄', '𑙄'), - ('𑙐', '𑙙'), - ('𑚀', '𑚸'), - ('𑛀', '𑛉'), - ('𑛐', '𑛣'), - ('𑜀', '𑜚'), - ('\u{1171d}', '\u{1172b}'), - ('𑜰', '𑜹'), - ('𑝀', '𑝆'), - ('𑠀', '\u{1183a}'), - ('𑢠', '𑣩'), - ('𑣿', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤵'), - ('𑤷', '𑤸'), - ('\u{1193b}', '\u{11943}'), - ('𑥐', '𑥙'), - ('𑦠', '𑦧'), - ('𑦪', '\u{119d7}'), - ('\u{119da}', '𑧡'), - ('𑧣', '𑧤'), - ('𑨀', '\u{11a3e}'), - ('\u{11a47}', '\u{11a47}'), - ('𑩐', '\u{11a99}'), - ('𑪝', '𑪝'), - ('𑪰', '𑫸'), - ('𑯀', '𑯠'), - ('𑯰', '𑯹'), - ('𑰀', '𑰈'), - ('𑰊', '\u{11c36}'), - ('\u{11c38}', '𑱀'), - ('𑱐', '𑱙'), - ('𑱲', '𑲏'), - ('\u{11c92}', '\u{11ca7}'), - ('𑲩', '\u{11cb6}'), - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d47}'), - ('𑵐', '𑵙'), - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶎'), - ('\u{11d90}', '\u{11d91}'), - ('𑶓', '𑶘'), - ('𑶠', '𑶩'), - ('𑻠', '𑻶'), - ('\u{11f00}', '𑼐'), - ('𑼒', '\u{11f3a}'), - ('𑼾', '\u{11f42}'), - ('𑽐', '\u{11f5a}'), - ('𑾰', '𑾰'), - ('𒀀', '𒎙'), - ('𒐀', '𒑮'), - ('𒒀', '𒕃'), - ('𒾐', '𒿰'), - ('𓀀', '𓐯'), - ('\u{13440}', '\u{13455}'), - ('𓑠', '𔏺'), - ('𔐀', '𔙆'), - ('𖄀', '𖄹'), - ('𖠀', '𖨸'), - ('𖩀', '𖩞'), - ('𖩠', '𖩩'), - ('𖩰', '𖪾'), - ('𖫀', '𖫉'), - ('𖫐', '𖫭'), - ('\u{16af0}', '\u{16af4}'), - ('𖬀', '\u{16b36}'), - ('𖭀', '𖭃'), - ('𖭐', '𖭙'), - ('𖭣', '𖭷'), - ('𖭽', '𖮏'), - ('𖵀', '𖵬'), - ('𖵰', '𖵹'), - ('𖹀', '𖹿'), - ('𖼀', '𖽊'), - ('\u{16f4f}', '𖾇'), - ('\u{16f8f}', '𖾟'), - ('𖿠', '𖿡'), - ('𖿣', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), - ('𗀀', '𘟷'), - ('𘠀', '𘳕'), - ('𘳿', '𘴈'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𛀀', '𛄢'), - ('𛄲', '𛄲'), - ('𛅐', '𛅒'), - ('𛅕', '𛅕'), - ('𛅤', '𛅧'), - ('𛅰', '𛋻'), - ('𛰀', '𛱪'), - ('𛱰', '𛱼'), - ('𛲀', '𛲈'), - ('𛲐', '𛲙'), - ('\u{1bc9d}', '\u{1bc9e}'), - ('𜳰', '𜳹'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d165}', '\u{1d169}'), - ('\u{1d16d}', '\u{1d172}'), - ('\u{1d17b}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('\u{1d242}', '\u{1d244}'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝛀'), - ('𝛂', '𝛚'), - ('𝛜', '𝛺'), - ('𝛼', '𝜔'), - ('𝜖', '𝜴'), - ('𝜶', '𝝎'), - ('𝝐', '𝝮'), - ('𝝰', '𝞈'), - ('𝞊', '𝞨'), - ('𝞪', '𝟂'), - ('𝟄', '𝟋'), - ('𝟎', '𝟿'), - ('\u{1da00}', '\u{1da36}'), - ('\u{1da3b}', '\u{1da6c}'), - ('\u{1da75}', '\u{1da75}'), - ('\u{1da84}', '\u{1da84}'), - ('\u{1da9b}', '\u{1da9f}'), - ('\u{1daa1}', '\u{1daaf}'), - ('𝼀', '𝼞'), - ('𝼥', '𝼪'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('𞀰', '𞁭'), - ('\u{1e08f}', '\u{1e08f}'), - ('𞄀', '𞄬'), - ('\u{1e130}', '𞄽'), - ('𞅀', '𞅉'), - ('𞅎', '𞅎'), - ('𞊐', '\u{1e2ae}'), - ('𞋀', '𞋹'), - ('𞓐', '𞓹'), - ('𞗐', '𞗺'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), - ('𞠀', '𞣄'), - ('\u{1e8d0}', '\u{1e8d6}'), - ('𞤀', '𞥋'), - ('𞥐', '𞥙'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('🯰', '🯹'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('丽', '𪘀'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const ID_START: &'static [(char, char)] = &[ - ('A', 'Z'), - ('a', 'z'), - ('ª', 'ª'), - ('µ', 'µ'), - ('º', 'º'), - ('À', 'Ö'), - ('Ø', 'ö'), - ('ø', 'ˁ'), - ('ˆ', 'ˑ'), - ('ˠ', 'ˤ'), - ('ˬ', 'ˬ'), - ('ˮ', 'ˮ'), - ('Ͱ', 'ʹ'), - ('Ͷ', 'ͷ'), - ('ͺ', 'ͽ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ϵ'), - ('Ϸ', 'ҁ'), - ('Ҋ', 'ԯ'), - ('Ա', 'Ֆ'), - ('ՙ', 'ՙ'), - ('ՠ', 'ֈ'), - ('א', 'ת'), - ('ׯ', 'ײ'), - ('ؠ', 'ي'), - ('ٮ', 'ٯ'), - ('ٱ', 'ۓ'), - ('ە', 'ە'), - ('ۥ', 'ۦ'), - ('ۮ', 'ۯ'), - ('ۺ', 'ۼ'), - ('ۿ', 'ۿ'), - ('ܐ', 'ܐ'), - ('ܒ', 'ܯ'), - ('ݍ', 'ޥ'), - ('ޱ', 'ޱ'), - ('ߊ', 'ߪ'), - ('ߴ', 'ߵ'), - ('ߺ', 'ߺ'), - ('ࠀ', 'ࠕ'), - ('ࠚ', 'ࠚ'), - ('ࠤ', 'ࠤ'), - ('ࠨ', 'ࠨ'), - ('ࡀ', 'ࡘ'), - ('ࡠ', 'ࡪ'), - ('ࡰ', 'ࢇ'), - ('ࢉ', 'ࢎ'), - ('ࢠ', 'ࣉ'), - ('ऄ', 'ह'), - ('ऽ', 'ऽ'), - ('ॐ', 'ॐ'), - ('क़', 'ॡ'), - ('ॱ', 'ঀ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('ঽ', 'ঽ'), - ('ৎ', 'ৎ'), - ('ড়', 'ঢ়'), - ('য়', 'ৡ'), - ('ৰ', 'ৱ'), - ('ৼ', 'ৼ'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('ੲ', 'ੴ'), - ('અ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('ઽ', 'ઽ'), - ('ૐ', 'ૐ'), - ('ૠ', 'ૡ'), - ('ૹ', 'ૹ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('ଽ', 'ଽ'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', 'ୡ'), - ('ୱ', 'ୱ'), - ('ஃ', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'ஹ'), - ('ௐ', 'ௐ'), - ('అ', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'హ'), - ('ఽ', 'ఽ'), - ('ౘ', 'ౚ'), - ('ౝ', 'ౝ'), - ('ౠ', 'ౡ'), - ('ಀ', 'ಀ'), - ('ಅ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('ಽ', 'ಽ'), - ('ೝ', 'ೞ'), - ('ೠ', 'ೡ'), - ('ೱ', 'ೲ'), - ('ഄ', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', 'ഺ'), - ('ഽ', 'ഽ'), - ('ൎ', 'ൎ'), - ('ൔ', 'ൖ'), - ('ൟ', 'ൡ'), - ('ൺ', 'ൿ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('ก', 'ะ'), - ('า', 'ำ'), - ('เ', 'ๆ'), - ('ກ', 'ຂ'), - ('ຄ', 'ຄ'), - ('ຆ', 'ຊ'), - ('ຌ', 'ຣ'), - ('ລ', 'ລ'), - ('ວ', 'ະ'), - ('າ', 'ຳ'), - ('ຽ', 'ຽ'), - ('ເ', 'ໄ'), - ('ໆ', 'ໆ'), - ('ໜ', 'ໟ'), - ('ༀ', 'ༀ'), - ('ཀ', 'ཇ'), - ('ཉ', 'ཬ'), - ('ྈ', 'ྌ'), - ('က', 'ဪ'), - ('ဿ', 'ဿ'), - ('ၐ', 'ၕ'), - ('ၚ', 'ၝ'), - ('ၡ', 'ၡ'), - ('ၥ', 'ၦ'), - ('ၮ', 'ၰ'), - ('ၵ', 'ႁ'), - ('ႎ', 'ႎ'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ჺ'), - ('ჼ', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዖ'), - ('ዘ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ፚ'), - ('ᎀ', 'ᎏ'), - ('Ꭰ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('ᐁ', 'ᙬ'), - ('ᙯ', 'ᙿ'), - ('ᚁ', 'ᚚ'), - ('ᚠ', 'ᛪ'), - ('ᛮ', 'ᛸ'), - ('ᜀ', 'ᜑ'), - ('ᜟ', 'ᜱ'), - ('ᝀ', 'ᝑ'), - ('ᝠ', 'ᝬ'), - ('ᝮ', 'ᝰ'), - ('ក', 'ឳ'), - ('ៗ', 'ៗ'), - ('ៜ', 'ៜ'), - ('ᠠ', 'ᡸ'), - ('ᢀ', 'ᢨ'), - ('ᢪ', 'ᢪ'), - ('ᢰ', 'ᣵ'), - ('ᤀ', 'ᤞ'), - ('ᥐ', 'ᥭ'), - ('ᥰ', 'ᥴ'), - ('ᦀ', 'ᦫ'), - ('ᦰ', 'ᧉ'), - ('ᨀ', 'ᨖ'), - ('ᨠ', 'ᩔ'), - ('ᪧ', 'ᪧ'), - ('ᬅ', 'ᬳ'), - ('ᭅ', 'ᭌ'), - ('ᮃ', 'ᮠ'), - ('ᮮ', 'ᮯ'), - ('ᮺ', 'ᯥ'), - ('ᰀ', 'ᰣ'), - ('ᱍ', 'ᱏ'), - ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('ᳩ', 'ᳬ'), - ('ᳮ', 'ᳳ'), - ('ᳵ', 'ᳶ'), - ('ᳺ', 'ᳺ'), - ('ᴀ', 'ᶿ'), - ('Ḁ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ᾼ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῌ'), - ('ῐ', 'ΐ'), - ('ῖ', 'Ί'), - ('ῠ', 'Ῥ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῼ'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℊ', 'ℓ'), - ('ℕ', 'ℕ'), - ('℘', 'ℝ'), - ('ℤ', 'ℤ'), - ('Ω', 'Ω'), - ('ℨ', 'ℨ'), - ('K', 'ℹ'), - ('ℼ', 'ℿ'), - ('ⅅ', 'ⅉ'), - ('ⅎ', 'ⅎ'), - ('Ⅰ', 'ↈ'), - ('Ⰰ', 'ⳤ'), - ('Ⳬ', 'ⳮ'), - ('Ⳳ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ⴰ', 'ⵧ'), - ('ⵯ', 'ⵯ'), - ('ⶀ', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('々', '〇'), - ('〡', '〩'), - ('〱', '〵'), - ('〸', '〼'), - ('ぁ', 'ゖ'), - ('゛', 'ゟ'), - ('ァ', 'ヺ'), - ('ー', 'ヿ'), - ('ㄅ', 'ㄯ'), - ('ㄱ', 'ㆎ'), - ('ㆠ', 'ㆿ'), - ('ㇰ', 'ㇿ'), - ('㐀', '䶿'), - ('一', 'ꒌ'), - ('ꓐ', 'ꓽ'), - ('ꔀ', 'ꘌ'), - ('ꘐ', 'ꘟ'), - ('ꘪ', 'ꘫ'), - ('Ꙁ', 'ꙮ'), - ('ꙿ', 'ꚝ'), - ('ꚠ', 'ꛯ'), - ('ꜗ', 'ꜟ'), - ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'Ƛ'), - ('ꟲ', 'ꠁ'), - ('ꠃ', 'ꠅ'), - ('ꠇ', 'ꠊ'), - ('ꠌ', 'ꠢ'), - ('ꡀ', 'ꡳ'), - ('ꢂ', 'ꢳ'), - ('ꣲ', 'ꣷ'), - ('ꣻ', 'ꣻ'), - ('ꣽ', 'ꣾ'), - ('ꤊ', 'ꤥ'), - ('ꤰ', 'ꥆ'), - ('ꥠ', 'ꥼ'), - ('ꦄ', 'ꦲ'), - ('ꧏ', 'ꧏ'), - ('ꧠ', 'ꧤ'), - ('ꧦ', 'ꧯ'), - ('ꧺ', 'ꧾ'), - ('ꨀ', 'ꨨ'), - ('ꩀ', 'ꩂ'), - ('ꩄ', 'ꩋ'), - ('ꩠ', 'ꩶ'), - ('ꩺ', 'ꩺ'), - ('ꩾ', 'ꪯ'), - ('ꪱ', 'ꪱ'), - ('ꪵ', 'ꪶ'), - ('ꪹ', 'ꪽ'), - ('ꫀ', 'ꫀ'), - ('ꫂ', 'ꫂ'), - ('ꫛ', 'ꫝ'), - ('ꫠ', 'ꫪ'), - ('ꫲ', 'ꫴ'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('ꬰ', 'ꭚ'), - ('ꭜ', 'ꭩ'), - ('ꭰ', 'ꯢ'), - ('가', '힣'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('豈', '舘'), - ('並', '龎'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('יִ', 'יִ'), - ('ײַ', 'ﬨ'), - ('שׁ', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', 'ﮱ'), - ('ﯓ', 'ﴽ'), - ('ﵐ', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('ﷰ', 'ﷻ'), - ('ﹰ', 'ﹴ'), - ('ﹶ', 'ﻼ'), - ('A', 'Z'), - ('a', 'z'), - ('ヲ', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), - ('𐅀', '𐅴'), - ('𐊀', '𐊜'), - ('𐊠', '𐋐'), - ('𐌀', '𐌟'), - ('𐌭', '𐍊'), - ('𐍐', '𐍵'), - ('𐎀', '𐎝'), - ('𐎠', '𐏃'), - ('𐏈', '𐏏'), - ('𐏑', '𐏕'), - ('𐐀', '𐒝'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), - ('𐔀', '𐔧'), - ('𐔰', '𐕣'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐗀', '𐗳'), - ('𐘀', '𐜶'), - ('𐝀', '𐝕'), - ('𐝠', '𐝧'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐠀', '𐠅'), - ('𐠈', '𐠈'), - ('𐠊', '𐠵'), - ('𐠷', '𐠸'), - ('𐠼', '𐠼'), - ('𐠿', '𐡕'), - ('𐡠', '𐡶'), - ('𐢀', '𐢞'), - ('𐣠', '𐣲'), - ('𐣴', '𐣵'), - ('𐤀', '𐤕'), - ('𐤠', '𐤹'), - ('𐦀', '𐦷'), - ('𐦾', '𐦿'), - ('𐨀', '𐨀'), - ('𐨐', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨵'), - ('𐩠', '𐩼'), - ('𐪀', '𐪜'), - ('𐫀', '𐫇'), - ('𐫉', '𐫤'), - ('𐬀', '𐬵'), - ('𐭀', '𐭕'), - ('𐭠', '𐭲'), - ('𐮀', '𐮑'), - ('𐰀', '𐱈'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐴀', '𐴣'), - ('𐵊', '𐵥'), - ('𐵯', '𐶅'), - ('𐺀', '𐺩'), - ('𐺰', '𐺱'), - ('𐻂', '𐻄'), - ('𐼀', '𐼜'), - ('𐼧', '𐼧'), - ('𐼰', '𐽅'), - ('𐽰', '𐾁'), - ('𐾰', '𐿄'), - ('𐿠', '𐿶'), - ('𑀃', '𑀷'), - ('𑁱', '𑁲'), - ('𑁵', '𑁵'), - ('𑂃', '𑂯'), - ('𑃐', '𑃨'), - ('𑄃', '𑄦'), - ('𑅄', '𑅄'), - ('𑅇', '𑅇'), - ('𑅐', '𑅲'), - ('𑅶', '𑅶'), - ('𑆃', '𑆲'), - ('𑇁', '𑇄'), - ('𑇚', '𑇚'), - ('𑇜', '𑇜'), - ('𑈀', '𑈑'), - ('𑈓', '𑈫'), - ('𑈿', '𑉀'), - ('𑊀', '𑊆'), - ('𑊈', '𑊈'), - ('𑊊', '𑊍'), - ('𑊏', '𑊝'), - ('𑊟', '𑊨'), - ('𑊰', '𑋞'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('𑌽', '𑌽'), - ('𑍐', '𑍐'), - ('𑍝', '𑍡'), - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '𑎷'), - ('𑏑', '𑏑'), - ('𑏓', '𑏓'), - ('𑐀', '𑐴'), - ('𑑇', '𑑊'), - ('𑑟', '𑑡'), - ('𑒀', '𑒯'), - ('𑓄', '𑓅'), - ('𑓇', '𑓇'), - ('𑖀', '𑖮'), - ('𑗘', '𑗛'), - ('𑘀', '𑘯'), - ('𑙄', '𑙄'), - ('𑚀', '𑚪'), - ('𑚸', '𑚸'), - ('𑜀', '𑜚'), - ('𑝀', '𑝆'), - ('𑠀', '𑠫'), - ('𑢠', '𑣟'), - ('𑣿', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤯'), - ('𑤿', '𑤿'), - ('𑥁', '𑥁'), - ('𑦠', '𑦧'), - ('𑦪', '𑧐'), - ('𑧡', '𑧡'), - ('𑧣', '𑧣'), - ('𑨀', '𑨀'), - ('𑨋', '𑨲'), - ('𑨺', '𑨺'), - ('𑩐', '𑩐'), - ('𑩜', '𑪉'), - ('𑪝', '𑪝'), - ('𑪰', '𑫸'), - ('𑯀', '𑯠'), - ('𑰀', '𑰈'), - ('𑰊', '𑰮'), - ('𑱀', '𑱀'), - ('𑱲', '𑲏'), - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '𑴰'), - ('𑵆', '𑵆'), - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶉'), - ('𑶘', '𑶘'), - ('𑻠', '𑻲'), - ('𑼂', '𑼂'), - ('𑼄', '𑼐'), - ('𑼒', '𑼳'), - ('𑾰', '𑾰'), - ('𒀀', '𒎙'), - ('𒐀', '𒑮'), - ('𒒀', '𒕃'), - ('𒾐', '𒿰'), - ('𓀀', '𓐯'), - ('𓑁', '𓑆'), - ('𓑠', '𔏺'), - ('𔐀', '𔙆'), - ('𖄀', '𖄝'), - ('𖠀', '𖨸'), - ('𖩀', '𖩞'), - ('𖩰', '𖪾'), - ('𖫐', '𖫭'), - ('𖬀', '𖬯'), - ('𖭀', '𖭃'), - ('𖭣', '𖭷'), - ('𖭽', '𖮏'), - ('𖵀', '𖵬'), - ('𖹀', '𖹿'), - ('𖼀', '𖽊'), - ('𖽐', '𖽐'), - ('𖾓', '𖾟'), - ('𖿠', '𖿡'), - ('𖿣', '𖿣'), - ('𗀀', '𘟷'), - ('𘠀', '𘳕'), - ('𘳿', '𘴈'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𛀀', '𛄢'), - ('𛄲', '𛄲'), - ('𛅐', '𛅒'), - ('𛅕', '𛅕'), - ('𛅤', '𛅧'), - ('𛅰', '𛋻'), - ('𛰀', '𛱪'), - ('𛱰', '𛱼'), - ('𛲀', '𛲈'), - ('𛲐', '𛲙'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝛀'), - ('𝛂', '𝛚'), - ('𝛜', '𝛺'), - ('𝛼', '𝜔'), - ('𝜖', '𝜴'), - ('𝜶', '𝝎'), - ('𝝐', '𝝮'), - ('𝝰', '𝞈'), - ('𝞊', '𝞨'), - ('𝞪', '𝟂'), - ('𝟄', '𝟋'), - ('𝼀', '𝼞'), - ('𝼥', '𝼪'), - ('𞀰', '𞁭'), - ('𞄀', '𞄬'), - ('𞄷', '𞄽'), - ('𞅎', '𞅎'), - ('𞊐', '𞊭'), - ('𞋀', '𞋫'), - ('𞓐', '𞓫'), - ('𞗐', '𞗭'), - ('𞗰', '𞗰'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), - ('𞠀', '𞣄'), - ('𞤀', '𞥃'), - ('𞥋', '𞥋'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('丽', '𪘀'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), -]; - -pub const IDEOGRAPHIC: &'static [(char, char)] = &[ - ('〆', '〇'), - ('〡', '〩'), - ('〸', '〺'), - ('㐀', '䶿'), - ('一', '鿿'), - ('豈', '舘'), - ('並', '龎'), - ('\u{16fe4}', '\u{16fe4}'), - ('𗀀', '𘟷'), - ('𘠀', '𘳕'), - ('𘳿', '𘴈'), - ('𛅰', '𛋻'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('丽', '𪘀'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), -]; - -pub const INCB: &'static [(char, char)] = &[ - ('\u{300}', '\u{36f}'), - ('\u{483}', '\u{489}'), - ('\u{591}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c5}'), - ('\u{5c7}', '\u{5c7}'), - ('\u{610}', '\u{61a}'), - ('\u{64b}', '\u{65f}'), - ('\u{670}', '\u{670}'), - ('\u{6d6}', '\u{6dc}'), - ('\u{6df}', '\u{6e4}'), - ('\u{6e7}', '\u{6e8}'), - ('\u{6ea}', '\u{6ed}'), - ('\u{711}', '\u{711}'), - ('\u{730}', '\u{74a}'), - ('\u{7a6}', '\u{7b0}'), - ('\u{7eb}', '\u{7f3}'), - ('\u{7fd}', '\u{7fd}'), - ('\u{816}', '\u{819}'), - ('\u{81b}', '\u{823}'), - ('\u{825}', '\u{827}'), - ('\u{829}', '\u{82d}'), - ('\u{859}', '\u{85b}'), - ('\u{897}', '\u{89f}'), - ('\u{8ca}', '\u{8e1}'), - ('\u{8e3}', '\u{902}'), - ('क', '\u{93a}'), - ('\u{93c}', '\u{93c}'), - ('\u{941}', '\u{948}'), - ('\u{94d}', '\u{94d}'), - ('\u{951}', 'य़'), - ('\u{962}', '\u{963}'), - ('ॸ', 'ॿ'), - ('\u{981}', '\u{981}'), - ('ক', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('\u{9bc}', '\u{9bc}'), - ('\u{9be}', '\u{9be}'), - ('\u{9c1}', '\u{9c4}'), - ('\u{9cd}', '\u{9cd}'), - ('\u{9d7}', '\u{9d7}'), - ('ড়', 'ঢ়'), - ('য়', 'য়'), - ('\u{9e2}', '\u{9e3}'), - ('ৰ', 'ৱ'), - ('\u{9fe}', '\u{9fe}'), - ('\u{a01}', '\u{a02}'), - ('\u{a3c}', '\u{a3c}'), - ('\u{a41}', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('\u{a51}', '\u{a51}'), - ('\u{a70}', '\u{a71}'), - ('\u{a75}', '\u{a75}'), - ('\u{a81}', '\u{a82}'), - ('ક', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('\u{abc}', '\u{abc}'), - ('\u{ac1}', '\u{ac5}'), - ('\u{ac7}', '\u{ac8}'), - ('\u{acd}', '\u{acd}'), - ('\u{ae2}', '\u{ae3}'), - ('ૹ', '\u{aff}'), - ('\u{b01}', '\u{b01}'), - ('କ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('\u{b3c}', '\u{b3c}'), - ('\u{b3e}', '\u{b3f}'), - ('\u{b41}', '\u{b44}'), - ('\u{b4d}', '\u{b4d}'), - ('\u{b55}', '\u{b57}'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', 'ୟ'), - ('\u{b62}', '\u{b63}'), - ('ୱ', 'ୱ'), - ('\u{b82}', '\u{b82}'), - ('\u{bbe}', '\u{bbe}'), - ('\u{bc0}', '\u{bc0}'), - ('\u{bcd}', '\u{bcd}'), - ('\u{bd7}', '\u{bd7}'), - ('\u{c00}', '\u{c00}'), - ('\u{c04}', '\u{c04}'), - ('క', 'న'), - ('ప', 'హ'), - ('\u{c3c}', '\u{c3c}'), - ('\u{c3e}', '\u{c40}'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('ౘ', 'ౚ'), - ('\u{c62}', '\u{c63}'), - ('\u{c81}', '\u{c81}'), - ('\u{cbc}', '\u{cbc}'), - ('\u{cbf}', '\u{cc0}'), - ('\u{cc2}', '\u{cc2}'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccd}'), - ('\u{cd5}', '\u{cd6}'), - ('\u{ce2}', '\u{ce3}'), - ('\u{d00}', '\u{d01}'), - ('ക', '\u{d3c}'), - ('\u{d3e}', '\u{d3e}'), - ('\u{d41}', '\u{d44}'), - ('\u{d4d}', '\u{d4d}'), - ('\u{d57}', '\u{d57}'), - ('\u{d62}', '\u{d63}'), - ('\u{d81}', '\u{d81}'), - ('\u{dca}', '\u{dca}'), - ('\u{dcf}', '\u{dcf}'), - ('\u{dd2}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('\u{ddf}', '\u{ddf}'), - ('\u{e31}', '\u{e31}'), - ('\u{e34}', '\u{e3a}'), - ('\u{e47}', '\u{e4e}'), - ('\u{eb1}', '\u{eb1}'), - ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ece}'), - ('\u{f18}', '\u{f19}'), - ('\u{f35}', '\u{f35}'), - ('\u{f37}', '\u{f37}'), - ('\u{f39}', '\u{f39}'), - ('\u{f71}', '\u{f7e}'), - ('\u{f80}', '\u{f84}'), - ('\u{f86}', '\u{f87}'), - ('\u{f8d}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('\u{fc6}', '\u{fc6}'), - ('\u{102d}', '\u{1030}'), - ('\u{1032}', '\u{1037}'), - ('\u{1039}', '\u{103a}'), - ('\u{103d}', '\u{103e}'), - ('\u{1058}', '\u{1059}'), - ('\u{105e}', '\u{1060}'), - ('\u{1071}', '\u{1074}'), - ('\u{1082}', '\u{1082}'), - ('\u{1085}', '\u{1086}'), - ('\u{108d}', '\u{108d}'), - ('\u{109d}', '\u{109d}'), - ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1715}'), - ('\u{1732}', '\u{1734}'), - ('\u{1752}', '\u{1753}'), - ('\u{1772}', '\u{1773}'), - ('\u{17b4}', '\u{17b5}'), - ('\u{17b7}', '\u{17bd}'), - ('\u{17c6}', '\u{17c6}'), - ('\u{17c9}', '\u{17d3}'), - ('\u{17dd}', '\u{17dd}'), - ('\u{180b}', '\u{180d}'), - ('\u{180f}', '\u{180f}'), - ('\u{1885}', '\u{1886}'), - ('\u{18a9}', '\u{18a9}'), - ('\u{1920}', '\u{1922}'), - ('\u{1927}', '\u{1928}'), - ('\u{1932}', '\u{1932}'), - ('\u{1939}', '\u{193b}'), - ('\u{1a17}', '\u{1a18}'), - ('\u{1a1b}', '\u{1a1b}'), - ('\u{1a56}', '\u{1a56}'), - ('\u{1a58}', '\u{1a5e}'), - ('\u{1a60}', '\u{1a60}'), - ('\u{1a62}', '\u{1a62}'), - ('\u{1a65}', '\u{1a6c}'), - ('\u{1a73}', '\u{1a7c}'), - ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ace}'), - ('\u{1b00}', '\u{1b03}'), - ('\u{1b34}', '\u{1b3d}'), - ('\u{1b42}', '\u{1b44}'), - ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '\u{1b81}'), - ('\u{1ba2}', '\u{1ba5}'), - ('\u{1ba8}', '\u{1bad}'), - ('\u{1be6}', '\u{1be6}'), - ('\u{1be8}', '\u{1be9}'), - ('\u{1bed}', '\u{1bed}'), - ('\u{1bef}', '\u{1bf3}'), - ('\u{1c2c}', '\u{1c33}'), - ('\u{1c36}', '\u{1c37}'), - ('\u{1cd0}', '\u{1cd2}'), - ('\u{1cd4}', '\u{1ce0}'), - ('\u{1ce2}', '\u{1ce8}'), - ('\u{1ced}', '\u{1ced}'), - ('\u{1cf4}', '\u{1cf4}'), - ('\u{1cf8}', '\u{1cf9}'), - ('\u{1dc0}', '\u{1dff}'), - ('\u{200d}', '\u{200d}'), - ('\u{20d0}', '\u{20f0}'), - ('\u{2cef}', '\u{2cf1}'), - ('\u{2d7f}', '\u{2d7f}'), - ('\u{2de0}', '\u{2dff}'), - ('\u{302a}', '\u{302f}'), - ('\u{3099}', '\u{309a}'), - ('\u{a66f}', '\u{a672}'), - ('\u{a674}', '\u{a67d}'), - ('\u{a69e}', '\u{a69f}'), - ('\u{a6f0}', '\u{a6f1}'), - ('\u{a802}', '\u{a802}'), - ('\u{a806}', '\u{a806}'), - ('\u{a80b}', '\u{a80b}'), - ('\u{a825}', '\u{a826}'), - ('\u{a82c}', '\u{a82c}'), - ('\u{a8c4}', '\u{a8c5}'), - ('\u{a8e0}', '\u{a8f1}'), - ('\u{a8ff}', '\u{a8ff}'), - ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '\u{a951}'), - ('\u{a953}', '\u{a953}'), - ('\u{a980}', '\u{a982}'), - ('\u{a9b3}', '\u{a9b3}'), - ('\u{a9b6}', '\u{a9b9}'), - ('\u{a9bc}', '\u{a9bd}'), - ('\u{a9c0}', '\u{a9c0}'), - ('\u{a9e5}', '\u{a9e5}'), - ('\u{aa29}', '\u{aa2e}'), - ('\u{aa31}', '\u{aa32}'), - ('\u{aa35}', '\u{aa36}'), - ('\u{aa43}', '\u{aa43}'), - ('\u{aa4c}', '\u{aa4c}'), - ('\u{aa7c}', '\u{aa7c}'), - ('\u{aab0}', '\u{aab0}'), - ('\u{aab2}', '\u{aab4}'), - ('\u{aab7}', '\u{aab8}'), - ('\u{aabe}', '\u{aabf}'), - ('\u{aac1}', '\u{aac1}'), - ('\u{aaec}', '\u{aaed}'), - ('\u{aaf6}', '\u{aaf6}'), - ('\u{abe5}', '\u{abe5}'), - ('\u{abe8}', '\u{abe8}'), - ('\u{abed}', '\u{abed}'), - ('\u{fb1e}', '\u{fb1e}'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{fe20}', '\u{fe2f}'), - ('\u{ff9e}', '\u{ff9f}'), - ('\u{101fd}', '\u{101fd}'), - ('\u{102e0}', '\u{102e0}'), - ('\u{10376}', '\u{1037a}'), - ('\u{10a01}', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '\u{10a0f}'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '\u{10a3f}'), - ('\u{10ae5}', '\u{10ae6}'), - ('\u{10d24}', '\u{10d27}'), - ('\u{10d69}', '\u{10d6d}'), - ('\u{10eab}', '\u{10eac}'), - ('\u{10efc}', '\u{10eff}'), - ('\u{10f46}', '\u{10f50}'), - ('\u{10f82}', '\u{10f85}'), - ('\u{11001}', '\u{11001}'), - ('\u{11038}', '\u{11046}'), - ('\u{11070}', '\u{11070}'), - ('\u{11073}', '\u{11074}'), - ('\u{1107f}', '\u{11081}'), - ('\u{110b3}', '\u{110b6}'), - ('\u{110b9}', '\u{110ba}'), - ('\u{110c2}', '\u{110c2}'), - ('\u{11100}', '\u{11102}'), - ('\u{11127}', '\u{1112b}'), - ('\u{1112d}', '\u{11134}'), - ('\u{11173}', '\u{11173}'), - ('\u{11180}', '\u{11181}'), - ('\u{111b6}', '\u{111be}'), - ('\u{111c0}', '\u{111c0}'), - ('\u{111c9}', '\u{111cc}'), - ('\u{111cf}', '\u{111cf}'), - ('\u{1122f}', '\u{11231}'), - ('\u{11234}', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), - ('\u{11241}', '\u{11241}'), - ('\u{112df}', '\u{112df}'), - ('\u{112e3}', '\u{112ea}'), - ('\u{11300}', '\u{11301}'), - ('\u{1133b}', '\u{1133c}'), - ('\u{1133e}', '\u{1133e}'), - ('\u{11340}', '\u{11340}'), - ('\u{1134d}', '\u{1134d}'), - ('\u{11357}', '\u{11357}'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('\u{113b8}', '\u{113b8}'), - ('\u{113bb}', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '\u{113c9}'), - ('\u{113ce}', '\u{113d0}'), - ('\u{113d2}', '\u{113d2}'), - ('\u{113e1}', '\u{113e2}'), - ('\u{11438}', '\u{1143f}'), - ('\u{11442}', '\u{11444}'), - ('\u{11446}', '\u{11446}'), - ('\u{1145e}', '\u{1145e}'), - ('\u{114b0}', '\u{114b0}'), - ('\u{114b3}', '\u{114b8}'), - ('\u{114ba}', '\u{114ba}'), - ('\u{114bd}', '\u{114bd}'), - ('\u{114bf}', '\u{114c0}'), - ('\u{114c2}', '\u{114c3}'), - ('\u{115af}', '\u{115af}'), - ('\u{115b2}', '\u{115b5}'), - ('\u{115bc}', '\u{115bd}'), - ('\u{115bf}', '\u{115c0}'), - ('\u{115dc}', '\u{115dd}'), - ('\u{11633}', '\u{1163a}'), - ('\u{1163d}', '\u{1163d}'), - ('\u{1163f}', '\u{11640}'), - ('\u{116ab}', '\u{116ab}'), - ('\u{116ad}', '\u{116ad}'), - ('\u{116b0}', '\u{116b7}'), - ('\u{1171d}', '\u{1171d}'), - ('\u{1171f}', '\u{1171f}'), - ('\u{11722}', '\u{11725}'), - ('\u{11727}', '\u{1172b}'), - ('\u{1182f}', '\u{11837}'), - ('\u{11839}', '\u{1183a}'), - ('\u{11930}', '\u{11930}'), - ('\u{1193b}', '\u{1193e}'), - ('\u{11943}', '\u{11943}'), - ('\u{119d4}', '\u{119d7}'), - ('\u{119da}', '\u{119db}'), - ('\u{119e0}', '\u{119e0}'), - ('\u{11a01}', '\u{11a0a}'), - ('\u{11a33}', '\u{11a38}'), - ('\u{11a3b}', '\u{11a3e}'), - ('\u{11a47}', '\u{11a47}'), - ('\u{11a51}', '\u{11a56}'), - ('\u{11a59}', '\u{11a5b}'), - ('\u{11a8a}', '\u{11a96}'), - ('\u{11a98}', '\u{11a99}'), - ('\u{11c30}', '\u{11c36}'), - ('\u{11c38}', '\u{11c3d}'), - ('\u{11c3f}', '\u{11c3f}'), - ('\u{11c92}', '\u{11ca7}'), - ('\u{11caa}', '\u{11cb0}'), - ('\u{11cb2}', '\u{11cb3}'), - ('\u{11cb5}', '\u{11cb6}'), - ('\u{11d31}', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d45}'), - ('\u{11d47}', '\u{11d47}'), - ('\u{11d90}', '\u{11d91}'), - ('\u{11d95}', '\u{11d95}'), - ('\u{11d97}', '\u{11d97}'), - ('\u{11ef3}', '\u{11ef4}'), - ('\u{11f00}', '\u{11f01}'), - ('\u{11f36}', '\u{11f3a}'), - ('\u{11f40}', '\u{11f42}'), - ('\u{11f5a}', '\u{11f5a}'), - ('\u{13440}', '\u{13440}'), - ('\u{13447}', '\u{13455}'), - ('\u{1611e}', '\u{16129}'), - ('\u{1612d}', '\u{1612f}'), - ('\u{16af0}', '\u{16af4}'), - ('\u{16b30}', '\u{16b36}'), - ('\u{16f4f}', '\u{16f4f}'), - ('\u{16f8f}', '\u{16f92}'), - ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), - ('\u{1bc9d}', '\u{1bc9e}'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d165}', '\u{1d169}'), - ('\u{1d16d}', '\u{1d172}'), - ('\u{1d17b}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('\u{1d242}', '\u{1d244}'), - ('\u{1da00}', '\u{1da36}'), - ('\u{1da3b}', '\u{1da6c}'), - ('\u{1da75}', '\u{1da75}'), - ('\u{1da84}', '\u{1da84}'), - ('\u{1da9b}', '\u{1da9f}'), - ('\u{1daa1}', '\u{1daaf}'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('\u{1e08f}', '\u{1e08f}'), - ('\u{1e130}', '\u{1e136}'), - ('\u{1e2ae}', '\u{1e2ae}'), - ('\u{1e2ec}', '\u{1e2ef}'), - ('\u{1e4ec}', '\u{1e4ef}'), - ('\u{1e5ee}', '\u{1e5ef}'), - ('\u{1e8d0}', '\u{1e8d6}'), - ('\u{1e944}', '\u{1e94a}'), - ('🏻', '🏿'), - ('\u{e0020}', '\u{e007f}'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const JOIN_CONTROL: &'static [(char, char)] = &[('\u{200c}', '\u{200d}')]; - -pub const LOGICAL_ORDER_EXCEPTION: &'static [(char, char)] = &[ - ('เ', 'ไ'), - ('ເ', 'ໄ'), - ('ᦵ', 'ᦷ'), - ('ᦺ', 'ᦺ'), - ('ꪵ', 'ꪶ'), - ('ꪹ', 'ꪹ'), - ('ꪻ', 'ꪼ'), -]; - -pub const LOWERCASE: &'static [(char, char)] = &[ - ('a', 'z'), - ('ª', 'ª'), - ('µ', 'µ'), - ('º', 'º'), - ('ß', 'ö'), - ('ø', 'ÿ'), - ('ā', 'ā'), - ('ă', 'ă'), - ('ą', 'ą'), - ('ć', 'ć'), - ('ĉ', 'ĉ'), - ('ċ', 'ċ'), - ('č', 'č'), - ('ď', 'ď'), - ('đ', 'đ'), - ('ē', 'ē'), - ('ĕ', 'ĕ'), - ('ė', 'ė'), - ('ę', 'ę'), - ('ě', 'ě'), - ('ĝ', 'ĝ'), - ('ğ', 'ğ'), - ('ġ', 'ġ'), - ('ģ', 'ģ'), - ('ĥ', 'ĥ'), - ('ħ', 'ħ'), - ('ĩ', 'ĩ'), - ('ī', 'ī'), - ('ĭ', 'ĭ'), - ('į', 'į'), - ('ı', 'ı'), - ('ij', 'ij'), - ('ĵ', 'ĵ'), - ('ķ', 'ĸ'), - ('ĺ', 'ĺ'), - ('ļ', 'ļ'), - ('ľ', 'ľ'), - ('ŀ', 'ŀ'), - ('ł', 'ł'), - ('ń', 'ń'), - ('ņ', 'ņ'), - ('ň', 'ʼn'), - ('ŋ', 'ŋ'), - ('ō', 'ō'), - ('ŏ', 'ŏ'), - ('ő', 'ő'), - ('œ', 'œ'), - ('ŕ', 'ŕ'), - ('ŗ', 'ŗ'), - ('ř', 'ř'), - ('ś', 'ś'), - ('ŝ', 'ŝ'), - ('ş', 'ş'), - ('š', 'š'), - ('ţ', 'ţ'), - ('ť', 'ť'), - ('ŧ', 'ŧ'), - ('ũ', 'ũ'), - ('ū', 'ū'), - ('ŭ', 'ŭ'), - ('ů', 'ů'), - ('ű', 'ű'), - ('ų', 'ų'), - ('ŵ', 'ŵ'), - ('ŷ', 'ŷ'), - ('ź', 'ź'), - ('ż', 'ż'), - ('ž', 'ƀ'), - ('ƃ', 'ƃ'), - ('ƅ', 'ƅ'), - ('ƈ', 'ƈ'), - ('ƌ', 'ƍ'), - ('ƒ', 'ƒ'), - ('ƕ', 'ƕ'), - ('ƙ', 'ƛ'), - ('ƞ', 'ƞ'), - ('ơ', 'ơ'), - ('ƣ', 'ƣ'), - ('ƥ', 'ƥ'), - ('ƨ', 'ƨ'), - ('ƪ', 'ƫ'), - ('ƭ', 'ƭ'), - ('ư', 'ư'), - ('ƴ', 'ƴ'), - ('ƶ', 'ƶ'), - ('ƹ', 'ƺ'), - ('ƽ', 'ƿ'), - ('dž', 'dž'), - ('lj', 'lj'), - ('nj', 'nj'), - ('ǎ', 'ǎ'), - ('ǐ', 'ǐ'), - ('ǒ', 'ǒ'), - ('ǔ', 'ǔ'), - ('ǖ', 'ǖ'), - ('ǘ', 'ǘ'), - ('ǚ', 'ǚ'), - ('ǜ', 'ǝ'), - ('ǟ', 'ǟ'), - ('ǡ', 'ǡ'), - ('ǣ', 'ǣ'), - ('ǥ', 'ǥ'), - ('ǧ', 'ǧ'), - ('ǩ', 'ǩ'), - ('ǫ', 'ǫ'), - ('ǭ', 'ǭ'), - ('ǯ', 'ǰ'), - ('dz', 'dz'), - ('ǵ', 'ǵ'), - ('ǹ', 'ǹ'), - ('ǻ', 'ǻ'), - ('ǽ', 'ǽ'), - ('ǿ', 'ǿ'), - ('ȁ', 'ȁ'), - ('ȃ', 'ȃ'), - ('ȅ', 'ȅ'), - ('ȇ', 'ȇ'), - ('ȉ', 'ȉ'), - ('ȋ', 'ȋ'), - ('ȍ', 'ȍ'), - ('ȏ', 'ȏ'), - ('ȑ', 'ȑ'), - ('ȓ', 'ȓ'), - ('ȕ', 'ȕ'), - ('ȗ', 'ȗ'), - ('ș', 'ș'), - ('ț', 'ț'), - ('ȝ', 'ȝ'), - ('ȟ', 'ȟ'), - ('ȡ', 'ȡ'), - ('ȣ', 'ȣ'), - ('ȥ', 'ȥ'), - ('ȧ', 'ȧ'), - ('ȩ', 'ȩ'), - ('ȫ', 'ȫ'), - ('ȭ', 'ȭ'), - ('ȯ', 'ȯ'), - ('ȱ', 'ȱ'), - ('ȳ', 'ȹ'), - ('ȼ', 'ȼ'), - ('ȿ', 'ɀ'), - ('ɂ', 'ɂ'), - ('ɇ', 'ɇ'), - ('ɉ', 'ɉ'), - ('ɋ', 'ɋ'), - ('ɍ', 'ɍ'), - ('ɏ', 'ʓ'), - ('ʕ', 'ʸ'), - ('ˀ', 'ˁ'), - ('ˠ', 'ˤ'), - ('\u{345}', '\u{345}'), - ('ͱ', 'ͱ'), - ('ͳ', 'ͳ'), - ('ͷ', 'ͷ'), - ('ͺ', 'ͽ'), - ('ΐ', 'ΐ'), - ('ά', 'ώ'), - ('ϐ', 'ϑ'), - ('ϕ', 'ϗ'), - ('ϙ', 'ϙ'), - ('ϛ', 'ϛ'), - ('ϝ', 'ϝ'), - ('ϟ', 'ϟ'), - ('ϡ', 'ϡ'), - ('ϣ', 'ϣ'), - ('ϥ', 'ϥ'), - ('ϧ', 'ϧ'), - ('ϩ', 'ϩ'), - ('ϫ', 'ϫ'), - ('ϭ', 'ϭ'), - ('ϯ', 'ϳ'), - ('ϵ', 'ϵ'), - ('ϸ', 'ϸ'), - ('ϻ', 'ϼ'), - ('а', 'џ'), - ('ѡ', 'ѡ'), - ('ѣ', 'ѣ'), - ('ѥ', 'ѥ'), - ('ѧ', 'ѧ'), - ('ѩ', 'ѩ'), - ('ѫ', 'ѫ'), - ('ѭ', 'ѭ'), - ('ѯ', 'ѯ'), - ('ѱ', 'ѱ'), - ('ѳ', 'ѳ'), - ('ѵ', 'ѵ'), - ('ѷ', 'ѷ'), - ('ѹ', 'ѹ'), - ('ѻ', 'ѻ'), - ('ѽ', 'ѽ'), - ('ѿ', 'ѿ'), - ('ҁ', 'ҁ'), - ('ҋ', 'ҋ'), - ('ҍ', 'ҍ'), - ('ҏ', 'ҏ'), - ('ґ', 'ґ'), - ('ғ', 'ғ'), - ('ҕ', 'ҕ'), - ('җ', 'җ'), - ('ҙ', 'ҙ'), - ('қ', 'қ'), - ('ҝ', 'ҝ'), - ('ҟ', 'ҟ'), - ('ҡ', 'ҡ'), - ('ң', 'ң'), - ('ҥ', 'ҥ'), - ('ҧ', 'ҧ'), - ('ҩ', 'ҩ'), - ('ҫ', 'ҫ'), - ('ҭ', 'ҭ'), - ('ү', 'ү'), - ('ұ', 'ұ'), - ('ҳ', 'ҳ'), - ('ҵ', 'ҵ'), - ('ҷ', 'ҷ'), - ('ҹ', 'ҹ'), - ('һ', 'һ'), - ('ҽ', 'ҽ'), - ('ҿ', 'ҿ'), - ('ӂ', 'ӂ'), - ('ӄ', 'ӄ'), - ('ӆ', 'ӆ'), - ('ӈ', 'ӈ'), - ('ӊ', 'ӊ'), - ('ӌ', 'ӌ'), - ('ӎ', 'ӏ'), - ('ӑ', 'ӑ'), - ('ӓ', 'ӓ'), - ('ӕ', 'ӕ'), - ('ӗ', 'ӗ'), - ('ә', 'ә'), - ('ӛ', 'ӛ'), - ('ӝ', 'ӝ'), - ('ӟ', 'ӟ'), - ('ӡ', 'ӡ'), - ('ӣ', 'ӣ'), - ('ӥ', 'ӥ'), - ('ӧ', 'ӧ'), - ('ө', 'ө'), - ('ӫ', 'ӫ'), - ('ӭ', 'ӭ'), - ('ӯ', 'ӯ'), - ('ӱ', 'ӱ'), - ('ӳ', 'ӳ'), - ('ӵ', 'ӵ'), - ('ӷ', 'ӷ'), - ('ӹ', 'ӹ'), - ('ӻ', 'ӻ'), - ('ӽ', 'ӽ'), - ('ӿ', 'ӿ'), - ('ԁ', 'ԁ'), - ('ԃ', 'ԃ'), - ('ԅ', 'ԅ'), - ('ԇ', 'ԇ'), - ('ԉ', 'ԉ'), - ('ԋ', 'ԋ'), - ('ԍ', 'ԍ'), - ('ԏ', 'ԏ'), - ('ԑ', 'ԑ'), - ('ԓ', 'ԓ'), - ('ԕ', 'ԕ'), - ('ԗ', 'ԗ'), - ('ԙ', 'ԙ'), - ('ԛ', 'ԛ'), - ('ԝ', 'ԝ'), - ('ԟ', 'ԟ'), - ('ԡ', 'ԡ'), - ('ԣ', 'ԣ'), - ('ԥ', 'ԥ'), - ('ԧ', 'ԧ'), - ('ԩ', 'ԩ'), - ('ԫ', 'ԫ'), - ('ԭ', 'ԭ'), - ('ԯ', 'ԯ'), - ('ՠ', 'ֈ'), - ('ა', 'ჺ'), - ('ჼ', 'ჿ'), - ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲈ'), - ('ᲊ', 'ᲊ'), - ('ᴀ', 'ᶿ'), - ('ḁ', 'ḁ'), - ('ḃ', 'ḃ'), - ('ḅ', 'ḅ'), - ('ḇ', 'ḇ'), - ('ḉ', 'ḉ'), - ('ḋ', 'ḋ'), - ('ḍ', 'ḍ'), - ('ḏ', 'ḏ'), - ('ḑ', 'ḑ'), - ('ḓ', 'ḓ'), - ('ḕ', 'ḕ'), - ('ḗ', 'ḗ'), - ('ḙ', 'ḙ'), - ('ḛ', 'ḛ'), - ('ḝ', 'ḝ'), - ('ḟ', 'ḟ'), - ('ḡ', 'ḡ'), - ('ḣ', 'ḣ'), - ('ḥ', 'ḥ'), - ('ḧ', 'ḧ'), - ('ḩ', 'ḩ'), - ('ḫ', 'ḫ'), - ('ḭ', 'ḭ'), - ('ḯ', 'ḯ'), - ('ḱ', 'ḱ'), - ('ḳ', 'ḳ'), - ('ḵ', 'ḵ'), - ('ḷ', 'ḷ'), - ('ḹ', 'ḹ'), - ('ḻ', 'ḻ'), - ('ḽ', 'ḽ'), - ('ḿ', 'ḿ'), - ('ṁ', 'ṁ'), - ('ṃ', 'ṃ'), - ('ṅ', 'ṅ'), - ('ṇ', 'ṇ'), - ('ṉ', 'ṉ'), - ('ṋ', 'ṋ'), - ('ṍ', 'ṍ'), - ('ṏ', 'ṏ'), - ('ṑ', 'ṑ'), - ('ṓ', 'ṓ'), - ('ṕ', 'ṕ'), - ('ṗ', 'ṗ'), - ('ṙ', 'ṙ'), - ('ṛ', 'ṛ'), - ('ṝ', 'ṝ'), - ('ṟ', 'ṟ'), - ('ṡ', 'ṡ'), - ('ṣ', 'ṣ'), - ('ṥ', 'ṥ'), - ('ṧ', 'ṧ'), - ('ṩ', 'ṩ'), - ('ṫ', 'ṫ'), - ('ṭ', 'ṭ'), - ('ṯ', 'ṯ'), - ('ṱ', 'ṱ'), - ('ṳ', 'ṳ'), - ('ṵ', 'ṵ'), - ('ṷ', 'ṷ'), - ('ṹ', 'ṹ'), - ('ṻ', 'ṻ'), - ('ṽ', 'ṽ'), - ('ṿ', 'ṿ'), - ('ẁ', 'ẁ'), - ('ẃ', 'ẃ'), - ('ẅ', 'ẅ'), - ('ẇ', 'ẇ'), - ('ẉ', 'ẉ'), - ('ẋ', 'ẋ'), - ('ẍ', 'ẍ'), - ('ẏ', 'ẏ'), - ('ẑ', 'ẑ'), - ('ẓ', 'ẓ'), - ('ẕ', 'ẝ'), - ('ẟ', 'ẟ'), - ('ạ', 'ạ'), - ('ả', 'ả'), - ('ấ', 'ấ'), - ('ầ', 'ầ'), - ('ẩ', 'ẩ'), - ('ẫ', 'ẫ'), - ('ậ', 'ậ'), - ('ắ', 'ắ'), - ('ằ', 'ằ'), - ('ẳ', 'ẳ'), - ('ẵ', 'ẵ'), - ('ặ', 'ặ'), - ('ẹ', 'ẹ'), - ('ẻ', 'ẻ'), - ('ẽ', 'ẽ'), - ('ế', 'ế'), - ('ề', 'ề'), - ('ể', 'ể'), - ('ễ', 'ễ'), - ('ệ', 'ệ'), - ('ỉ', 'ỉ'), - ('ị', 'ị'), - ('ọ', 'ọ'), - ('ỏ', 'ỏ'), - ('ố', 'ố'), - ('ồ', 'ồ'), - ('ổ', 'ổ'), - ('ỗ', 'ỗ'), - ('ộ', 'ộ'), - ('ớ', 'ớ'), - ('ờ', 'ờ'), - ('ở', 'ở'), - ('ỡ', 'ỡ'), - ('ợ', 'ợ'), - ('ụ', 'ụ'), - ('ủ', 'ủ'), - ('ứ', 'ứ'), - ('ừ', 'ừ'), - ('ử', 'ử'), - ('ữ', 'ữ'), - ('ự', 'ự'), - ('ỳ', 'ỳ'), - ('ỵ', 'ỵ'), - ('ỷ', 'ỷ'), - ('ỹ', 'ỹ'), - ('ỻ', 'ỻ'), - ('ỽ', 'ỽ'), - ('ỿ', 'ἇ'), - ('ἐ', 'ἕ'), - ('ἠ', 'ἧ'), - ('ἰ', 'ἷ'), - ('ὀ', 'ὅ'), - ('ὐ', 'ὗ'), - ('ὠ', 'ὧ'), - ('ὰ', 'ώ'), - ('ᾀ', 'ᾇ'), - ('ᾐ', 'ᾗ'), - ('ᾠ', 'ᾧ'), - ('ᾰ', 'ᾴ'), - ('ᾶ', 'ᾷ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῇ'), - ('ῐ', 'ΐ'), - ('ῖ', 'ῗ'), - ('ῠ', 'ῧ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῷ'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('ℊ', 'ℊ'), - ('ℎ', 'ℏ'), - ('ℓ', 'ℓ'), - ('ℯ', 'ℯ'), - ('ℴ', 'ℴ'), - ('ℹ', 'ℹ'), - ('ℼ', 'ℽ'), - ('ⅆ', 'ⅉ'), - ('ⅎ', 'ⅎ'), - ('ⅰ', 'ⅿ'), - ('ↄ', 'ↄ'), - ('ⓐ', 'ⓩ'), - ('ⰰ', 'ⱟ'), - ('ⱡ', 'ⱡ'), - ('ⱥ', 'ⱦ'), - ('ⱨ', 'ⱨ'), - ('ⱪ', 'ⱪ'), - ('ⱬ', 'ⱬ'), - ('ⱱ', 'ⱱ'), - ('ⱳ', 'ⱴ'), - ('ⱶ', 'ⱽ'), - ('ⲁ', 'ⲁ'), - ('ⲃ', 'ⲃ'), - ('ⲅ', 'ⲅ'), - ('ⲇ', 'ⲇ'), - ('ⲉ', 'ⲉ'), - ('ⲋ', 'ⲋ'), - ('ⲍ', 'ⲍ'), - ('ⲏ', 'ⲏ'), - ('ⲑ', 'ⲑ'), - ('ⲓ', 'ⲓ'), - ('ⲕ', 'ⲕ'), - ('ⲗ', 'ⲗ'), - ('ⲙ', 'ⲙ'), - ('ⲛ', 'ⲛ'), - ('ⲝ', 'ⲝ'), - ('ⲟ', 'ⲟ'), - ('ⲡ', 'ⲡ'), - ('ⲣ', 'ⲣ'), - ('ⲥ', 'ⲥ'), - ('ⲧ', 'ⲧ'), - ('ⲩ', 'ⲩ'), - ('ⲫ', 'ⲫ'), - ('ⲭ', 'ⲭ'), - ('ⲯ', 'ⲯ'), - ('ⲱ', 'ⲱ'), - ('ⲳ', 'ⲳ'), - ('ⲵ', 'ⲵ'), - ('ⲷ', 'ⲷ'), - ('ⲹ', 'ⲹ'), - ('ⲻ', 'ⲻ'), - ('ⲽ', 'ⲽ'), - ('ⲿ', 'ⲿ'), - ('ⳁ', 'ⳁ'), - ('ⳃ', 'ⳃ'), - ('ⳅ', 'ⳅ'), - ('ⳇ', 'ⳇ'), - ('ⳉ', 'ⳉ'), - ('ⳋ', 'ⳋ'), - ('ⳍ', 'ⳍ'), - ('ⳏ', 'ⳏ'), - ('ⳑ', 'ⳑ'), - ('ⳓ', 'ⳓ'), - ('ⳕ', 'ⳕ'), - ('ⳗ', 'ⳗ'), - ('ⳙ', 'ⳙ'), - ('ⳛ', 'ⳛ'), - ('ⳝ', 'ⳝ'), - ('ⳟ', 'ⳟ'), - ('ⳡ', 'ⳡ'), - ('ⳣ', 'ⳤ'), - ('ⳬ', 'ⳬ'), - ('ⳮ', 'ⳮ'), - ('ⳳ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ꙁ', 'ꙁ'), - ('ꙃ', 'ꙃ'), - ('ꙅ', 'ꙅ'), - ('ꙇ', 'ꙇ'), - ('ꙉ', 'ꙉ'), - ('ꙋ', 'ꙋ'), - ('ꙍ', 'ꙍ'), - ('ꙏ', 'ꙏ'), - ('ꙑ', 'ꙑ'), - ('ꙓ', 'ꙓ'), - ('ꙕ', 'ꙕ'), - ('ꙗ', 'ꙗ'), - ('ꙙ', 'ꙙ'), - ('ꙛ', 'ꙛ'), - ('ꙝ', 'ꙝ'), - ('ꙟ', 'ꙟ'), - ('ꙡ', 'ꙡ'), - ('ꙣ', 'ꙣ'), - ('ꙥ', 'ꙥ'), - ('ꙧ', 'ꙧ'), - ('ꙩ', 'ꙩ'), - ('ꙫ', 'ꙫ'), - ('ꙭ', 'ꙭ'), - ('ꚁ', 'ꚁ'), - ('ꚃ', 'ꚃ'), - ('ꚅ', 'ꚅ'), - ('ꚇ', 'ꚇ'), - ('ꚉ', 'ꚉ'), - ('ꚋ', 'ꚋ'), - ('ꚍ', 'ꚍ'), - ('ꚏ', 'ꚏ'), - ('ꚑ', 'ꚑ'), - ('ꚓ', 'ꚓ'), - ('ꚕ', 'ꚕ'), - ('ꚗ', 'ꚗ'), - ('ꚙ', 'ꚙ'), - ('ꚛ', 'ꚝ'), - ('ꜣ', 'ꜣ'), - ('ꜥ', 'ꜥ'), - ('ꜧ', 'ꜧ'), - ('ꜩ', 'ꜩ'), - ('ꜫ', 'ꜫ'), - ('ꜭ', 'ꜭ'), - ('ꜯ', 'ꜱ'), - ('ꜳ', 'ꜳ'), - ('ꜵ', 'ꜵ'), - ('ꜷ', 'ꜷ'), - ('ꜹ', 'ꜹ'), - ('ꜻ', 'ꜻ'), - ('ꜽ', 'ꜽ'), - ('ꜿ', 'ꜿ'), - ('ꝁ', 'ꝁ'), - ('ꝃ', 'ꝃ'), - ('ꝅ', 'ꝅ'), - ('ꝇ', 'ꝇ'), - ('ꝉ', 'ꝉ'), - ('ꝋ', 'ꝋ'), - ('ꝍ', 'ꝍ'), - ('ꝏ', 'ꝏ'), - ('ꝑ', 'ꝑ'), - ('ꝓ', 'ꝓ'), - ('ꝕ', 'ꝕ'), - ('ꝗ', 'ꝗ'), - ('ꝙ', 'ꝙ'), - ('ꝛ', 'ꝛ'), - ('ꝝ', 'ꝝ'), - ('ꝟ', 'ꝟ'), - ('ꝡ', 'ꝡ'), - ('ꝣ', 'ꝣ'), - ('ꝥ', 'ꝥ'), - ('ꝧ', 'ꝧ'), - ('ꝩ', 'ꝩ'), - ('ꝫ', 'ꝫ'), - ('ꝭ', 'ꝭ'), - ('ꝯ', 'ꝸ'), - ('ꝺ', 'ꝺ'), - ('ꝼ', 'ꝼ'), - ('ꝿ', 'ꝿ'), - ('ꞁ', 'ꞁ'), - ('ꞃ', 'ꞃ'), - ('ꞅ', 'ꞅ'), - ('ꞇ', 'ꞇ'), - ('ꞌ', 'ꞌ'), - ('ꞎ', 'ꞎ'), - ('ꞑ', 'ꞑ'), - ('ꞓ', 'ꞕ'), - ('ꞗ', 'ꞗ'), - ('ꞙ', 'ꞙ'), - ('ꞛ', 'ꞛ'), - ('ꞝ', 'ꞝ'), - ('ꞟ', 'ꞟ'), - ('ꞡ', 'ꞡ'), - ('ꞣ', 'ꞣ'), - ('ꞥ', 'ꞥ'), - ('ꞧ', 'ꞧ'), - ('ꞩ', 'ꞩ'), - ('ꞯ', 'ꞯ'), - ('ꞵ', 'ꞵ'), - ('ꞷ', 'ꞷ'), - ('ꞹ', 'ꞹ'), - ('ꞻ', 'ꞻ'), - ('ꞽ', 'ꞽ'), - ('ꞿ', 'ꞿ'), - ('ꟁ', 'ꟁ'), - ('ꟃ', 'ꟃ'), - ('ꟈ', 'ꟈ'), - ('ꟊ', 'ꟊ'), - ('ꟍ', 'ꟍ'), - ('ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟕ'), - ('ꟗ', 'ꟗ'), - ('ꟙ', 'ꟙ'), - ('ꟛ', 'ꟛ'), - ('ꟲ', 'ꟴ'), - ('ꟶ', 'ꟶ'), - ('ꟸ', 'ꟺ'), - ('ꬰ', 'ꭚ'), - ('ꭜ', 'ꭩ'), - ('ꭰ', 'ꮿ'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('a', 'z'), - ('𐐨', '𐑏'), - ('𐓘', '𐓻'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐞀', '𐞀'), - ('𐞃', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐳀', '𐳲'), - ('𐵰', '𐶅'), - ('𑣀', '𑣟'), - ('𖹠', '𖹿'), - ('𝐚', '𝐳'), - ('𝑎', '𝑔'), - ('𝑖', '𝑧'), - ('𝒂', '𝒛'), - ('𝒶', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝓏'), - ('𝓪', '𝔃'), - ('𝔞', '𝔷'), - ('𝕒', '𝕫'), - ('𝖆', '𝖟'), - ('𝖺', '𝗓'), - ('𝗮', '𝘇'), - ('𝘢', '𝘻'), - ('𝙖', '𝙯'), - ('𝚊', '𝚥'), - ('𝛂', '𝛚'), - ('𝛜', '𝛡'), - ('𝛼', '𝜔'), - ('𝜖', '𝜛'), - ('𝜶', '𝝎'), - ('𝝐', '𝝕'), - ('𝝰', '𝞈'), - ('𝞊', '𝞏'), - ('𝞪', '𝟂'), - ('𝟄', '𝟉'), - ('𝟋', '𝟋'), - ('𝼀', '𝼉'), - ('𝼋', '𝼞'), - ('𝼥', '𝼪'), - ('𞀰', '𞁭'), - ('𞤢', '𞥃'), -]; - -pub const MATH: &'static [(char, char)] = &[ - ('+', '+'), - ('<', '>'), - ('^', '^'), - ('|', '|'), - ('~', '~'), - ('¬', '¬'), - ('±', '±'), - ('×', '×'), - ('÷', '÷'), - ('ϐ', 'ϒ'), - ('ϕ', 'ϕ'), - ('ϰ', 'ϱ'), - ('ϴ', '϶'), - ('؆', '؈'), - ('‖', '‖'), - ('′', '‴'), - ('⁀', '⁀'), - ('⁄', '⁄'), - ('⁒', '⁒'), - ('\u{2061}', '\u{2064}'), - ('⁺', '⁾'), - ('₊', '₎'), - ('\u{20d0}', '\u{20dc}'), - ('\u{20e1}', '\u{20e1}'), - ('\u{20e5}', '\u{20e6}'), - ('\u{20eb}', '\u{20ef}'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℊ', 'ℓ'), - ('ℕ', 'ℕ'), - ('℘', 'ℝ'), - ('ℤ', 'ℤ'), - ('ℨ', '℩'), - ('ℬ', 'ℭ'), - ('ℯ', 'ℱ'), - ('ℳ', 'ℸ'), - ('ℼ', 'ⅉ'), - ('⅋', '⅋'), - ('←', '↧'), - ('↩', '↮'), - ('↰', '↱'), - ('↶', '↷'), - ('↼', '⇛'), - ('⇝', '⇝'), - ('⇤', '⇥'), - ('⇴', '⋿'), - ('⌈', '⌋'), - ('⌠', '⌡'), - ('⍼', '⍼'), - ('⎛', '⎵'), - ('⎷', '⎷'), - ('⏐', '⏐'), - ('⏜', '⏢'), - ('■', '□'), - ('▮', '▷'), - ('▼', '◁'), - ('◆', '◇'), - ('◊', '○'), - ('●', '◓'), - ('◢', '◢'), - ('◤', '◤'), - ('◧', '◬'), - ('◸', '◿'), - ('★', '☆'), - ('♀', '♀'), - ('♂', '♂'), - ('♠', '♣'), - ('♭', '♯'), - ('⟀', '⟿'), - ('⤀', '⫿'), - ('⬰', '⭄'), - ('⭇', '⭌'), - ('﬩', '﬩'), - ('﹡', '﹦'), - ('﹨', '﹨'), - ('+', '+'), - ('<', '>'), - ('\', '\'), - ('^', '^'), - ('|', '|'), - ('~', '~'), - ('¬', '¬'), - ('←', '↓'), - ('𐶎', '𐶏'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝟋'), - ('𝟎', '𝟿'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('𞻰', '𞻱'), -]; - -pub const MODIFIER_COMBINING_MARK: &'static [(char, char)] = &[ - ('\u{654}', '\u{655}'), - ('\u{658}', '\u{658}'), - ('\u{6dc}', '\u{6dc}'), - ('\u{6e3}', '\u{6e3}'), - ('\u{6e7}', '\u{6e8}'), - ('\u{8ca}', '\u{8cb}'), - ('\u{8cd}', '\u{8cf}'), - ('\u{8d3}', '\u{8d3}'), - ('\u{8f3}', '\u{8f3}'), -]; - -pub const NONCHARACTER_CODE_POINT: &'static [(char, char)] = &[ - ('\u{fdd0}', '\u{fdef}'), - ('\u{fffe}', '\u{ffff}'), - ('\u{1fffe}', '\u{1ffff}'), - ('\u{2fffe}', '\u{2ffff}'), - ('\u{3fffe}', '\u{3ffff}'), - ('\u{4fffe}', '\u{4ffff}'), - ('\u{5fffe}', '\u{5ffff}'), - ('\u{6fffe}', '\u{6ffff}'), - ('\u{7fffe}', '\u{7ffff}'), - ('\u{8fffe}', '\u{8ffff}'), - ('\u{9fffe}', '\u{9ffff}'), - ('\u{afffe}', '\u{affff}'), - ('\u{bfffe}', '\u{bffff}'), - ('\u{cfffe}', '\u{cffff}'), - ('\u{dfffe}', '\u{dffff}'), - ('\u{efffe}', '\u{effff}'), - ('\u{ffffe}', '\u{fffff}'), - ('\u{10fffe}', '\u{10ffff}'), -]; - -pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ - ('\u{345}', '\u{345}'), - ('\u{363}', '\u{36f}'), - ('\u{5b0}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c5}'), - ('\u{5c7}', '\u{5c7}'), - ('\u{610}', '\u{61a}'), - ('\u{64b}', '\u{657}'), - ('\u{659}', '\u{65f}'), - ('\u{670}', '\u{670}'), - ('\u{6d6}', '\u{6dc}'), - ('\u{6e1}', '\u{6e4}'), - ('\u{6e7}', '\u{6e8}'), - ('\u{6ed}', '\u{6ed}'), - ('\u{711}', '\u{711}'), - ('\u{730}', '\u{73f}'), - ('\u{7a6}', '\u{7b0}'), - ('\u{816}', '\u{817}'), - ('\u{81b}', '\u{823}'), - ('\u{825}', '\u{827}'), - ('\u{829}', '\u{82c}'), - ('\u{897}', '\u{897}'), - ('\u{8d4}', '\u{8df}'), - ('\u{8e3}', '\u{8e9}'), - ('\u{8f0}', 'ः'), - ('\u{93a}', 'ऻ'), - ('ा', 'ौ'), - ('ॎ', 'ॏ'), - ('\u{955}', '\u{957}'), - ('\u{962}', '\u{963}'), - ('\u{981}', 'ঃ'), - ('\u{9be}', '\u{9c4}'), - ('ে', 'ৈ'), - ('ো', 'ৌ'), - ('\u{9d7}', '\u{9d7}'), - ('\u{9e2}', '\u{9e3}'), - ('\u{a01}', 'ਃ'), - ('ਾ', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4c}'), - ('\u{a51}', '\u{a51}'), - ('\u{a70}', '\u{a71}'), - ('\u{a75}', '\u{a75}'), - ('\u{a81}', 'ઃ'), - ('ા', '\u{ac5}'), - ('\u{ac7}', 'ૉ'), - ('ો', 'ૌ'), - ('\u{ae2}', '\u{ae3}'), - ('\u{afa}', '\u{afc}'), - ('\u{b01}', 'ଃ'), - ('\u{b3e}', '\u{b44}'), - ('େ', 'ୈ'), - ('ୋ', 'ୌ'), - ('\u{b56}', '\u{b57}'), - ('\u{b62}', '\u{b63}'), - ('\u{b82}', '\u{b82}'), - ('\u{bbe}', 'ூ'), - ('ெ', 'ை'), - ('ொ', 'ௌ'), - ('\u{bd7}', '\u{bd7}'), - ('\u{c00}', '\u{c04}'), - ('\u{c3e}', 'ౄ'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4c}'), - ('\u{c55}', '\u{c56}'), - ('\u{c62}', '\u{c63}'), - ('\u{c81}', 'ಃ'), - ('ಾ', 'ೄ'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccc}'), - ('\u{cd5}', '\u{cd6}'), - ('\u{ce2}', '\u{ce3}'), - ('ೳ', 'ೳ'), - ('\u{d00}', 'ഃ'), - ('\u{d3e}', '\u{d44}'), - ('െ', 'ൈ'), - ('ൊ', 'ൌ'), - ('\u{d57}', '\u{d57}'), - ('\u{d62}', '\u{d63}'), - ('\u{d81}', 'ඃ'), - ('\u{dcf}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('ෘ', '\u{ddf}'), - ('ෲ', 'ෳ'), - ('\u{e31}', '\u{e31}'), - ('\u{e34}', '\u{e3a}'), - ('\u{e4d}', '\u{e4d}'), - ('\u{eb1}', '\u{eb1}'), - ('\u{eb4}', '\u{eb9}'), - ('\u{ebb}', '\u{ebc}'), - ('\u{ecd}', '\u{ecd}'), - ('\u{f71}', '\u{f83}'), - ('\u{f8d}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('ါ', '\u{1036}'), - ('း', 'း'), - ('ျ', '\u{103e}'), - ('ၖ', '\u{1059}'), - ('\u{105e}', '\u{1060}'), - ('ၢ', 'ၤ'), - ('ၧ', 'ၭ'), - ('\u{1071}', '\u{1074}'), - ('\u{1082}', '\u{108d}'), - ('ႏ', 'ႏ'), - ('ႚ', '\u{109d}'), - ('\u{1712}', '\u{1713}'), - ('\u{1732}', '\u{1733}'), - ('\u{1752}', '\u{1753}'), - ('\u{1772}', '\u{1773}'), - ('ា', 'ៈ'), - ('\u{1885}', '\u{1886}'), - ('\u{18a9}', '\u{18a9}'), - ('\u{1920}', 'ᤫ'), - ('ᤰ', 'ᤸ'), - ('\u{1a17}', '\u{1a1b}'), - ('ᩕ', '\u{1a5e}'), - ('ᩡ', '\u{1a74}'), - ('\u{1abf}', '\u{1ac0}'), - ('\u{1acc}', '\u{1ace}'), - ('\u{1b00}', 'ᬄ'), - ('\u{1b35}', '\u{1b43}'), - ('\u{1b80}', 'ᮂ'), - ('ᮡ', '\u{1ba9}'), - ('\u{1bac}', '\u{1bad}'), - ('ᯧ', '\u{1bf1}'), - ('ᰤ', '\u{1c36}'), - ('\u{1dd3}', '\u{1df4}'), - ('Ⓐ', 'ⓩ'), - ('\u{2de0}', '\u{2dff}'), - ('\u{a674}', '\u{a67b}'), - ('\u{a69e}', '\u{a69f}'), - ('\u{a802}', '\u{a802}'), - ('\u{a80b}', '\u{a80b}'), - ('ꠣ', 'ꠧ'), - ('ꢀ', 'ꢁ'), - ('ꢴ', 'ꣃ'), - ('\u{a8c5}', '\u{a8c5}'), - ('\u{a8ff}', '\u{a8ff}'), - ('\u{a926}', '\u{a92a}'), - ('\u{a947}', 'ꥒ'), - ('\u{a980}', 'ꦃ'), - ('ꦴ', 'ꦿ'), - ('\u{a9e5}', '\u{a9e5}'), - ('\u{aa29}', '\u{aa36}'), - ('\u{aa43}', '\u{aa43}'), - ('\u{aa4c}', 'ꩍ'), - ('ꩻ', 'ꩽ'), - ('\u{aab0}', '\u{aab0}'), - ('\u{aab2}', '\u{aab4}'), - ('\u{aab7}', '\u{aab8}'), - ('\u{aabe}', '\u{aabe}'), - ('ꫫ', 'ꫯ'), - ('ꫵ', 'ꫵ'), - ('ꯣ', 'ꯪ'), - ('\u{fb1e}', '\u{fb1e}'), - ('\u{10376}', '\u{1037a}'), - ('\u{10a01}', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '\u{10a0f}'), - ('\u{10d24}', '\u{10d27}'), - ('\u{10d69}', '\u{10d69}'), - ('\u{10eab}', '\u{10eac}'), - ('\u{10efc}', '\u{10efc}'), - ('𑀀', '𑀂'), - ('\u{11038}', '\u{11045}'), - ('\u{11073}', '\u{11074}'), - ('\u{11080}', '𑂂'), - ('𑂰', '𑂸'), - ('\u{110c2}', '\u{110c2}'), - ('\u{11100}', '\u{11102}'), - ('\u{11127}', '\u{11132}'), - ('𑅅', '𑅆'), - ('\u{11180}', '𑆂'), - ('𑆳', '𑆿'), - ('𑇎', '\u{111cf}'), - ('𑈬', '\u{11234}'), - ('\u{11237}', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), - ('\u{11241}', '\u{11241}'), - ('\u{112df}', '\u{112e8}'), - ('\u{11300}', '𑌃'), - ('\u{1133e}', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '𑍌'), - ('\u{11357}', '\u{11357}'), - ('𑍢', '𑍣'), - ('\u{113b8}', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '𑏊'), - ('𑏌', '𑏍'), - ('𑐵', '𑑁'), - ('\u{11443}', '𑑅'), - ('\u{114b0}', '𑓁'), - ('\u{115af}', '\u{115b5}'), - ('𑖸', '𑖾'), - ('\u{115dc}', '\u{115dd}'), - ('𑘰', '𑘾'), - ('\u{11640}', '\u{11640}'), - ('\u{116ab}', '\u{116b5}'), - ('\u{1171d}', '\u{1172a}'), - ('𑠬', '𑠸'), - ('\u{11930}', '𑤵'), - ('𑤷', '𑤸'), - ('\u{1193b}', '\u{1193c}'), - ('𑥀', '𑥀'), - ('𑥂', '𑥂'), - ('𑧑', '\u{119d7}'), - ('\u{119da}', '𑧟'), - ('𑧤', '𑧤'), - ('\u{11a01}', '\u{11a0a}'), - ('\u{11a35}', '𑨹'), - ('\u{11a3b}', '\u{11a3e}'), - ('\u{11a51}', '\u{11a5b}'), - ('\u{11a8a}', '𑪗'), - ('𑰯', '\u{11c36}'), - ('\u{11c38}', '𑰾'), - ('\u{11c92}', '\u{11ca7}'), - ('𑲩', '\u{11cb6}'), - ('\u{11d31}', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d41}'), - ('\u{11d43}', '\u{11d43}'), - ('\u{11d47}', '\u{11d47}'), - ('𑶊', '𑶎'), - ('\u{11d90}', '\u{11d91}'), - ('𑶓', '𑶖'), - ('\u{11ef3}', '𑻶'), - ('\u{11f00}', '\u{11f01}'), - ('𑼃', '𑼃'), - ('𑼴', '\u{11f3a}'), - ('𑼾', '\u{11f40}'), - ('\u{1611e}', '\u{1612e}'), - ('\u{16f4f}', '\u{16f4f}'), - ('𖽑', '𖾇'), - ('\u{16f8f}', '\u{16f92}'), - ('\u{16ff0}', '\u{16ff1}'), - ('\u{1bc9e}', '\u{1bc9e}'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('\u{1e08f}', '\u{1e08f}'), - ('\u{1e947}', '\u{1e947}'), - ('🄰', '🅉'), - ('🅐', '🅩'), - ('🅰', '🆉'), -]; - -pub const OTHER_DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[ - ('\u{34f}', '\u{34f}'), - ('ᅟ', 'ᅠ'), - ('\u{17b4}', '\u{17b5}'), - ('\u{2065}', '\u{2065}'), - ('ㅤ', 'ㅤ'), - ('ᅠ', 'ᅠ'), - ('\u{fff0}', '\u{fff8}'), - ('\u{e0000}', '\u{e0000}'), - ('\u{e0002}', '\u{e001f}'), - ('\u{e0080}', '\u{e00ff}'), - ('\u{e01f0}', '\u{e0fff}'), -]; - -pub const OTHER_GRAPHEME_EXTEND: &'static [(char, char)] = &[ - ('\u{9be}', '\u{9be}'), - ('\u{9d7}', '\u{9d7}'), - ('\u{b3e}', '\u{b3e}'), - ('\u{b57}', '\u{b57}'), - ('\u{bbe}', '\u{bbe}'), - ('\u{bd7}', '\u{bd7}'), - ('\u{cc0}', '\u{cc0}'), - ('\u{cc2}', '\u{cc2}'), - ('\u{cc7}', '\u{cc8}'), - ('\u{cca}', '\u{ccb}'), - ('\u{cd5}', '\u{cd6}'), - ('\u{d3e}', '\u{d3e}'), - ('\u{d57}', '\u{d57}'), - ('\u{dcf}', '\u{dcf}'), - ('\u{ddf}', '\u{ddf}'), - ('\u{1715}', '\u{1715}'), - ('\u{1734}', '\u{1734}'), - ('\u{1b35}', '\u{1b35}'), - ('\u{1b3b}', '\u{1b3b}'), - ('\u{1b3d}', '\u{1b3d}'), - ('\u{1b43}', '\u{1b44}'), - ('\u{1baa}', '\u{1baa}'), - ('\u{1bf2}', '\u{1bf3}'), - ('\u{200c}', '\u{200c}'), - ('\u{302e}', '\u{302f}'), - ('\u{a953}', '\u{a953}'), - ('\u{a9c0}', '\u{a9c0}'), - ('\u{ff9e}', '\u{ff9f}'), - ('\u{111c0}', '\u{111c0}'), - ('\u{11235}', '\u{11235}'), - ('\u{1133e}', '\u{1133e}'), - ('\u{1134d}', '\u{1134d}'), - ('\u{11357}', '\u{11357}'), - ('\u{113b8}', '\u{113b8}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '\u{113c9}'), - ('\u{113cf}', '\u{113cf}'), - ('\u{114b0}', '\u{114b0}'), - ('\u{114bd}', '\u{114bd}'), - ('\u{115af}', '\u{115af}'), - ('\u{116b6}', '\u{116b6}'), - ('\u{11930}', '\u{11930}'), - ('\u{1193d}', '\u{1193d}'), - ('\u{11f41}', '\u{11f41}'), - ('\u{16ff0}', '\u{16ff1}'), - ('\u{1d165}', '\u{1d166}'), - ('\u{1d16d}', '\u{1d172}'), - ('\u{e0020}', '\u{e007f}'), -]; - -pub const OTHER_ID_CONTINUE: &'static [(char, char)] = &[ - ('·', '·'), - ('·', '·'), - ('፩', '፱'), - ('᧚', '᧚'), - ('\u{200c}', '\u{200d}'), - ('・', '・'), - ('・', '・'), -]; - -pub const OTHER_ID_START: &'static [(char, char)] = - &[('\u{1885}', '\u{1886}'), ('℘', '℘'), ('℮', '℮'), ('゛', '゜')]; - -pub const OTHER_LOWERCASE: &'static [(char, char)] = &[ - ('ª', 'ª'), - ('º', 'º'), - ('ʰ', 'ʸ'), - ('ˀ', 'ˁ'), - ('ˠ', 'ˤ'), - ('\u{345}', '\u{345}'), - ('ͺ', 'ͺ'), - ('ჼ', 'ჼ'), - ('ᴬ', 'ᵪ'), - ('ᵸ', 'ᵸ'), - ('ᶛ', 'ᶿ'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('ⅰ', 'ⅿ'), - ('ⓐ', 'ⓩ'), - ('ⱼ', 'ⱽ'), - ('ꚜ', 'ꚝ'), - ('ꝰ', 'ꝰ'), - ('ꟲ', 'ꟴ'), - ('ꟸ', 'ꟹ'), - ('ꭜ', 'ꭟ'), - ('ꭩ', 'ꭩ'), - ('𐞀', '𐞀'), - ('𐞃', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𞀰', '𞁭'), -]; - -pub const OTHER_MATH: &'static [(char, char)] = &[ - ('^', '^'), - ('ϐ', 'ϒ'), - ('ϕ', 'ϕ'), - ('ϰ', 'ϱ'), - ('ϴ', 'ϵ'), - ('‖', '‖'), - ('′', '‴'), - ('⁀', '⁀'), - ('\u{2061}', '\u{2064}'), - ('⁽', '⁾'), - ('₍', '₎'), - ('\u{20d0}', '\u{20dc}'), - ('\u{20e1}', '\u{20e1}'), - ('\u{20e5}', '\u{20e6}'), - ('\u{20eb}', '\u{20ef}'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℊ', 'ℓ'), - ('ℕ', 'ℕ'), - ('ℙ', 'ℝ'), - ('ℤ', 'ℤ'), - ('ℨ', '℩'), - ('ℬ', 'ℭ'), - ('ℯ', 'ℱ'), - ('ℳ', 'ℸ'), - ('ℼ', 'ℿ'), - ('ⅅ', 'ⅉ'), - ('↕', '↙'), - ('↜', '↟'), - ('↡', '↢'), - ('↤', '↥'), - ('↧', '↧'), - ('↩', '↭'), - ('↰', '↱'), - ('↶', '↷'), - ('↼', '⇍'), - ('⇐', '⇑'), - ('⇓', '⇓'), - ('⇕', '⇛'), - ('⇝', '⇝'), - ('⇤', '⇥'), - ('⌈', '⌋'), - ('⎴', '⎵'), - ('⎷', '⎷'), - ('⏐', '⏐'), - ('⏢', '⏢'), - ('■', '□'), - ('▮', '▶'), - ('▼', '◀'), - ('◆', '◇'), - ('◊', '○'), - ('●', '◓'), - ('◢', '◢'), - ('◤', '◤'), - ('◧', '◬'), - ('★', '☆'), - ('♀', '♀'), - ('♂', '♂'), - ('♠', '♣'), - ('♭', '♮'), - ('⟅', '⟆'), - ('⟦', '⟯'), - ('⦃', '⦘'), - ('⧘', '⧛'), - ('⧼', '⧽'), - ('﹡', '﹡'), - ('﹣', '﹣'), - ('﹨', '﹨'), - ('\', '\'), - ('^', '^'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝛀'), - ('𝛂', '𝛚'), - ('𝛜', '𝛺'), - ('𝛼', '𝜔'), - ('𝜖', '𝜴'), - ('𝜶', '𝝎'), - ('𝝐', '𝝮'), - ('𝝰', '𝞈'), - ('𝞊', '𝞨'), - ('𝞪', '𝟂'), - ('𝟄', '𝟋'), - ('𝟎', '𝟿'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), -]; - -pub const OTHER_UPPERCASE: &'static [(char, char)] = - &[('Ⅰ', 'Ⅿ'), ('Ⓐ', 'Ⓩ'), ('🄰', '🅉'), ('🅐', '🅩'), ('🅰', '🆉')]; - -pub const PATTERN_SYNTAX: &'static [(char, char)] = &[ - ('!', '/'), - (':', '@'), - ('[', '^'), - ('`', '`'), - ('{', '~'), - ('¡', '§'), - ('©', '©'), - ('«', '¬'), - ('®', '®'), - ('°', '±'), - ('¶', '¶'), - ('»', '»'), - ('¿', '¿'), - ('×', '×'), - ('÷', '÷'), - ('‐', '‧'), - ('‰', '‾'), - ('⁁', '⁓'), - ('⁕', '⁞'), - ('←', '\u{245f}'), - ('─', '❵'), - ('➔', '⯿'), - ('⸀', '\u{2e7f}'), - ('、', '〃'), - ('〈', '〠'), - ('〰', '〰'), - ('﴾', '﴿'), - ('﹅', '﹆'), -]; - -pub const PATTERN_WHITE_SPACE: &'static [(char, char)] = &[ - ('\t', '\r'), - (' ', ' '), - ('\u{85}', '\u{85}'), - ('\u{200e}', '\u{200f}'), - ('\u{2028}', '\u{2029}'), -]; - -pub const PREPENDED_CONCATENATION_MARK: &'static [(char, char)] = &[ - ('\u{600}', '\u{605}'), - ('\u{6dd}', '\u{6dd}'), - ('\u{70f}', '\u{70f}'), - ('\u{890}', '\u{891}'), - ('\u{8e2}', '\u{8e2}'), - ('\u{110bd}', '\u{110bd}'), - ('\u{110cd}', '\u{110cd}'), -]; - -pub const QUOTATION_MARK: &'static [(char, char)] = &[ - ('"', '"'), - ('\'', '\''), - ('«', '«'), - ('»', '»'), - ('‘', '‟'), - ('‹', '›'), - ('⹂', '⹂'), - ('「', '』'), - ('〝', '〟'), - ('﹁', '﹄'), - ('"', '"'), - (''', '''), - ('「', '」'), -]; - -pub const RADICAL: &'static [(char, char)] = - &[('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕')]; - -pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('🇦', '🇿')]; - -pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ - ('!', '!'), - ('.', '.'), - ('?', '?'), - ('։', '։'), - ('؝', '؟'), - ('۔', '۔'), - ('܀', '܂'), - ('߹', '߹'), - ('࠷', '࠷'), - ('࠹', '࠹'), - ('࠽', '࠾'), - ('।', '॥'), - ('၊', '။'), - ('።', '።'), - ('፧', '፨'), - ('᙮', '᙮'), - ('᜵', '᜶'), - ('។', '៕'), - ('᠃', '᠃'), - ('᠉', '᠉'), - ('᥄', '᥅'), - ('᪨', '᪫'), - ('᭎', '᭏'), - ('᭚', '᭛'), - ('᭞', '᭟'), - ('᭽', '᭿'), - ('᰻', '᰼'), - ('᱾', '᱿'), - ('․', '․'), - ('‼', '‽'), - ('⁇', '⁉'), - ('⳹', '⳻'), - ('⸮', '⸮'), - ('⸼', '⸼'), - ('⹓', '⹔'), - ('。', '。'), - ('꓿', '꓿'), - ('꘎', '꘏'), - ('꛳', '꛳'), - ('꛷', '꛷'), - ('꡶', '꡷'), - ('꣎', '꣏'), - ('꤯', '꤯'), - ('꧈', '꧉'), - ('꩝', '꩟'), - ('꫰', '꫱'), - ('꯫', '꯫'), - ('︒', '︒'), - ('︕', '︖'), - ('﹒', '﹒'), - ('﹖', '﹗'), - ('!', '!'), - ('.', '.'), - ('?', '?'), - ('。', '。'), - ('𐩖', '𐩗'), - ('𐽕', '𐽙'), - ('𐾆', '𐾉'), - ('𑁇', '𑁈'), - ('𑂾', '𑃁'), - ('𑅁', '𑅃'), - ('𑇅', '𑇆'), - ('𑇍', '𑇍'), - ('𑇞', '𑇟'), - ('𑈸', '𑈹'), - ('𑈻', '𑈼'), - ('𑊩', '𑊩'), - ('𑏔', '𑏕'), - ('𑑋', '𑑌'), - ('𑗂', '𑗃'), - ('𑗉', '𑗗'), - ('𑙁', '𑙂'), - ('𑜼', '𑜾'), - ('𑥄', '𑥄'), - ('𑥆', '𑥆'), - ('𑩂', '𑩃'), - ('𑪛', '𑪜'), - ('𑱁', '𑱂'), - ('𑻷', '𑻸'), - ('𑽃', '𑽄'), - ('𖩮', '𖩯'), - ('𖫵', '𖫵'), - ('𖬷', '𖬸'), - ('𖭄', '𖭄'), - ('𖵮', '𖵯'), - ('𖺘', '𖺘'), - ('𛲟', '𛲟'), - ('𝪈', '𝪈'), -]; - -pub const SOFT_DOTTED: &'static [(char, char)] = &[ - ('i', 'j'), - ('į', 'į'), - ('ɉ', 'ɉ'), - ('ɨ', 'ɨ'), - ('ʝ', 'ʝ'), - ('ʲ', 'ʲ'), - ('ϳ', 'ϳ'), - ('і', 'і'), - ('ј', 'ј'), - ('ᵢ', 'ᵢ'), - ('ᶖ', 'ᶖ'), - ('ᶤ', 'ᶤ'), - ('ᶨ', 'ᶨ'), - ('ḭ', 'ḭ'), - ('ị', 'ị'), - ('ⁱ', 'ⁱ'), - ('ⅈ', 'ⅉ'), - ('ⱼ', 'ⱼ'), - ('𝐢', '𝐣'), - ('𝑖', '𝑗'), - ('𝒊', '𝒋'), - ('𝒾', '𝒿'), - ('𝓲', '𝓳'), - ('𝔦', '𝔧'), - ('𝕚', '𝕛'), - ('𝖎', '𝖏'), - ('𝗂', '𝗃'), - ('𝗶', '𝗷'), - ('𝘪', '𝘫'), - ('𝙞', '𝙟'), - ('𝚒', '𝚓'), - ('𝼚', '𝼚'), - ('𞁌', '𞁍'), - ('𞁨', '𞁨'), -]; - -pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ - ('!', '!'), - (',', ','), - ('.', '.'), - (':', ';'), - ('?', '?'), - (';', ';'), - ('·', '·'), - ('։', '։'), - ('׃', '׃'), - ('،', '،'), - ('؛', '؛'), - ('؝', '؟'), - ('۔', '۔'), - ('܀', '܊'), - ('܌', '܌'), - ('߸', '߹'), - ('࠰', '࠵'), - ('࠷', '࠾'), - ('࡞', '࡞'), - ('।', '॥'), - ('๚', '๛'), - ('༈', '༈'), - ('།', '༒'), - ('၊', '။'), - ('፡', '፨'), - ('᙮', '᙮'), - ('᛫', '᛭'), - ('᜵', '᜶'), - ('។', '៖'), - ('៚', '៚'), - ('᠂', '᠅'), - ('᠈', '᠉'), - ('᥄', '᥅'), - ('᪨', '᪫'), - ('᭎', '᭏'), - ('᭚', '᭛'), - ('᭝', '᭟'), - ('᭽', '᭿'), - ('᰻', '᰿'), - ('᱾', '᱿'), - ('․', '․'), - ('‼', '‽'), - ('⁇', '⁉'), - ('⳹', '⳻'), - ('⸮', '⸮'), - ('⸼', '⸼'), - ('⹁', '⹁'), - ('⹌', '⹌'), - ('⹎', '⹏'), - ('⹓', '⹔'), - ('、', '。'), - ('꓾', '꓿'), - ('꘍', '꘏'), - ('꛳', '꛷'), - ('꡶', '꡷'), - ('꣎', '꣏'), - ('꤯', '꤯'), - ('꧇', '꧉'), - ('꩝', '꩟'), - ('꫟', '꫟'), - ('꫰', '꫱'), - ('꯫', '꯫'), - ('︒', '︒'), - ('︕', '︖'), - ('﹐', '﹒'), - ('﹔', '﹗'), - ('!', '!'), - (',', ','), - ('.', '.'), - (':', ';'), - ('?', '?'), - ('。', '。'), - ('、', '、'), - ('𐎟', '𐎟'), - ('𐏐', '𐏐'), - ('𐡗', '𐡗'), - ('𐤟', '𐤟'), - ('𐩖', '𐩗'), - ('𐫰', '𐫵'), - ('𐬺', '𐬿'), - ('𐮙', '𐮜'), - ('𐽕', '𐽙'), - ('𐾆', '𐾉'), - ('𑁇', '𑁍'), - ('𑂾', '𑃁'), - ('𑅁', '𑅃'), - ('𑇅', '𑇆'), - ('𑇍', '𑇍'), - ('𑇞', '𑇟'), - ('𑈸', '𑈼'), - ('𑊩', '𑊩'), - ('𑏔', '𑏕'), - ('𑑋', '𑑍'), - ('𑑚', '𑑛'), - ('𑗂', '𑗅'), - ('𑗉', '𑗗'), - ('𑙁', '𑙂'), - ('𑜼', '𑜾'), - ('𑥄', '𑥄'), - ('𑥆', '𑥆'), - ('𑩂', '𑩃'), - ('𑪛', '𑪜'), - ('𑪡', '𑪢'), - ('𑱁', '𑱃'), - ('𑱱', '𑱱'), - ('𑻷', '𑻸'), - ('𑽃', '𑽄'), - ('𒑰', '𒑴'), - ('𖩮', '𖩯'), - ('𖫵', '𖫵'), - ('𖬷', '𖬹'), - ('𖭄', '𖭄'), - ('𖵮', '𖵯'), - ('𖺗', '𖺘'), - ('𛲟', '𛲟'), - ('𝪇', '𝪊'), -]; - -pub const UNIFIED_IDEOGRAPH: &'static [(char, char)] = &[ - ('㐀', '䶿'), - ('一', '鿿'), - ('﨎', '﨏'), - ('﨑', '﨑'), - ('﨓', '﨔'), - ('﨟', '﨟'), - ('﨡', '﨡'), - ('﨣', '﨤'), - ('﨧', '﨩'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), -]; - -pub const UPPERCASE: &'static [(char, char)] = &[ - ('A', 'Z'), - ('À', 'Ö'), - ('Ø', 'Þ'), - ('Ā', 'Ā'), - ('Ă', 'Ă'), - ('Ą', 'Ą'), - ('Ć', 'Ć'), - ('Ĉ', 'Ĉ'), - ('Ċ', 'Ċ'), - ('Č', 'Č'), - ('Ď', 'Ď'), - ('Đ', 'Đ'), - ('Ē', 'Ē'), - ('Ĕ', 'Ĕ'), - ('Ė', 'Ė'), - ('Ę', 'Ę'), - ('Ě', 'Ě'), - ('Ĝ', 'Ĝ'), - ('Ğ', 'Ğ'), - ('Ġ', 'Ġ'), - ('Ģ', 'Ģ'), - ('Ĥ', 'Ĥ'), - ('Ħ', 'Ħ'), - ('Ĩ', 'Ĩ'), - ('Ī', 'Ī'), - ('Ĭ', 'Ĭ'), - ('Į', 'Į'), - ('İ', 'İ'), - ('IJ', 'IJ'), - ('Ĵ', 'Ĵ'), - ('Ķ', 'Ķ'), - ('Ĺ', 'Ĺ'), - ('Ļ', 'Ļ'), - ('Ľ', 'Ľ'), - ('Ŀ', 'Ŀ'), - ('Ł', 'Ł'), - ('Ń', 'Ń'), - ('Ņ', 'Ņ'), - ('Ň', 'Ň'), - ('Ŋ', 'Ŋ'), - ('Ō', 'Ō'), - ('Ŏ', 'Ŏ'), - ('Ő', 'Ő'), - ('Œ', 'Œ'), - ('Ŕ', 'Ŕ'), - ('Ŗ', 'Ŗ'), - ('Ř', 'Ř'), - ('Ś', 'Ś'), - ('Ŝ', 'Ŝ'), - ('Ş', 'Ş'), - ('Š', 'Š'), - ('Ţ', 'Ţ'), - ('Ť', 'Ť'), - ('Ŧ', 'Ŧ'), - ('Ũ', 'Ũ'), - ('Ū', 'Ū'), - ('Ŭ', 'Ŭ'), - ('Ů', 'Ů'), - ('Ű', 'Ű'), - ('Ų', 'Ų'), - ('Ŵ', 'Ŵ'), - ('Ŷ', 'Ŷ'), - ('Ÿ', 'Ź'), - ('Ż', 'Ż'), - ('Ž', 'Ž'), - ('Ɓ', 'Ƃ'), - ('Ƅ', 'Ƅ'), - ('Ɔ', 'Ƈ'), - ('Ɖ', 'Ƌ'), - ('Ǝ', 'Ƒ'), - ('Ɠ', 'Ɣ'), - ('Ɩ', 'Ƙ'), - ('Ɯ', 'Ɲ'), - ('Ɵ', 'Ơ'), - ('Ƣ', 'Ƣ'), - ('Ƥ', 'Ƥ'), - ('Ʀ', 'Ƨ'), - ('Ʃ', 'Ʃ'), - ('Ƭ', 'Ƭ'), - ('Ʈ', 'Ư'), - ('Ʊ', 'Ƴ'), - ('Ƶ', 'Ƶ'), - ('Ʒ', 'Ƹ'), - ('Ƽ', 'Ƽ'), - ('DŽ', 'DŽ'), - ('LJ', 'LJ'), - ('NJ', 'NJ'), - ('Ǎ', 'Ǎ'), - ('Ǐ', 'Ǐ'), - ('Ǒ', 'Ǒ'), - ('Ǔ', 'Ǔ'), - ('Ǖ', 'Ǖ'), - ('Ǘ', 'Ǘ'), - ('Ǚ', 'Ǚ'), - ('Ǜ', 'Ǜ'), - ('Ǟ', 'Ǟ'), - ('Ǡ', 'Ǡ'), - ('Ǣ', 'Ǣ'), - ('Ǥ', 'Ǥ'), - ('Ǧ', 'Ǧ'), - ('Ǩ', 'Ǩ'), - ('Ǫ', 'Ǫ'), - ('Ǭ', 'Ǭ'), - ('Ǯ', 'Ǯ'), - ('DZ', 'DZ'), - ('Ǵ', 'Ǵ'), - ('Ƕ', 'Ǹ'), - ('Ǻ', 'Ǻ'), - ('Ǽ', 'Ǽ'), - ('Ǿ', 'Ǿ'), - ('Ȁ', 'Ȁ'), - ('Ȃ', 'Ȃ'), - ('Ȅ', 'Ȅ'), - ('Ȇ', 'Ȇ'), - ('Ȉ', 'Ȉ'), - ('Ȋ', 'Ȋ'), - ('Ȍ', 'Ȍ'), - ('Ȏ', 'Ȏ'), - ('Ȑ', 'Ȑ'), - ('Ȓ', 'Ȓ'), - ('Ȕ', 'Ȕ'), - ('Ȗ', 'Ȗ'), - ('Ș', 'Ș'), - ('Ț', 'Ț'), - ('Ȝ', 'Ȝ'), - ('Ȟ', 'Ȟ'), - ('Ƞ', 'Ƞ'), - ('Ȣ', 'Ȣ'), - ('Ȥ', 'Ȥ'), - ('Ȧ', 'Ȧ'), - ('Ȩ', 'Ȩ'), - ('Ȫ', 'Ȫ'), - ('Ȭ', 'Ȭ'), - ('Ȯ', 'Ȯ'), - ('Ȱ', 'Ȱ'), - ('Ȳ', 'Ȳ'), - ('Ⱥ', 'Ȼ'), - ('Ƚ', 'Ⱦ'), - ('Ɂ', 'Ɂ'), - ('Ƀ', 'Ɇ'), - ('Ɉ', 'Ɉ'), - ('Ɋ', 'Ɋ'), - ('Ɍ', 'Ɍ'), - ('Ɏ', 'Ɏ'), - ('Ͱ', 'Ͱ'), - ('Ͳ', 'Ͳ'), - ('Ͷ', 'Ͷ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ώ'), - ('Α', 'Ρ'), - ('Σ', 'Ϋ'), - ('Ϗ', 'Ϗ'), - ('ϒ', 'ϔ'), - ('Ϙ', 'Ϙ'), - ('Ϛ', 'Ϛ'), - ('Ϝ', 'Ϝ'), - ('Ϟ', 'Ϟ'), - ('Ϡ', 'Ϡ'), - ('Ϣ', 'Ϣ'), - ('Ϥ', 'Ϥ'), - ('Ϧ', 'Ϧ'), - ('Ϩ', 'Ϩ'), - ('Ϫ', 'Ϫ'), - ('Ϭ', 'Ϭ'), - ('Ϯ', 'Ϯ'), - ('ϴ', 'ϴ'), - ('Ϸ', 'Ϸ'), - ('Ϲ', 'Ϻ'), - ('Ͻ', 'Я'), - ('Ѡ', 'Ѡ'), - ('Ѣ', 'Ѣ'), - ('Ѥ', 'Ѥ'), - ('Ѧ', 'Ѧ'), - ('Ѩ', 'Ѩ'), - ('Ѫ', 'Ѫ'), - ('Ѭ', 'Ѭ'), - ('Ѯ', 'Ѯ'), - ('Ѱ', 'Ѱ'), - ('Ѳ', 'Ѳ'), - ('Ѵ', 'Ѵ'), - ('Ѷ', 'Ѷ'), - ('Ѹ', 'Ѹ'), - ('Ѻ', 'Ѻ'), - ('Ѽ', 'Ѽ'), - ('Ѿ', 'Ѿ'), - ('Ҁ', 'Ҁ'), - ('Ҋ', 'Ҋ'), - ('Ҍ', 'Ҍ'), - ('Ҏ', 'Ҏ'), - ('Ґ', 'Ґ'), - ('Ғ', 'Ғ'), - ('Ҕ', 'Ҕ'), - ('Җ', 'Җ'), - ('Ҙ', 'Ҙ'), - ('Қ', 'Қ'), - ('Ҝ', 'Ҝ'), - ('Ҟ', 'Ҟ'), - ('Ҡ', 'Ҡ'), - ('Ң', 'Ң'), - ('Ҥ', 'Ҥ'), - ('Ҧ', 'Ҧ'), - ('Ҩ', 'Ҩ'), - ('Ҫ', 'Ҫ'), - ('Ҭ', 'Ҭ'), - ('Ү', 'Ү'), - ('Ұ', 'Ұ'), - ('Ҳ', 'Ҳ'), - ('Ҵ', 'Ҵ'), - ('Ҷ', 'Ҷ'), - ('Ҹ', 'Ҹ'), - ('Һ', 'Һ'), - ('Ҽ', 'Ҽ'), - ('Ҿ', 'Ҿ'), - ('Ӏ', 'Ӂ'), - ('Ӄ', 'Ӄ'), - ('Ӆ', 'Ӆ'), - ('Ӈ', 'Ӈ'), - ('Ӊ', 'Ӊ'), - ('Ӌ', 'Ӌ'), - ('Ӎ', 'Ӎ'), - ('Ӑ', 'Ӑ'), - ('Ӓ', 'Ӓ'), - ('Ӕ', 'Ӕ'), - ('Ӗ', 'Ӗ'), - ('Ә', 'Ә'), - ('Ӛ', 'Ӛ'), - ('Ӝ', 'Ӝ'), - ('Ӟ', 'Ӟ'), - ('Ӡ', 'Ӡ'), - ('Ӣ', 'Ӣ'), - ('Ӥ', 'Ӥ'), - ('Ӧ', 'Ӧ'), - ('Ө', 'Ө'), - ('Ӫ', 'Ӫ'), - ('Ӭ', 'Ӭ'), - ('Ӯ', 'Ӯ'), - ('Ӱ', 'Ӱ'), - ('Ӳ', 'Ӳ'), - ('Ӵ', 'Ӵ'), - ('Ӷ', 'Ӷ'), - ('Ӹ', 'Ӹ'), - ('Ӻ', 'Ӻ'), - ('Ӽ', 'Ӽ'), - ('Ӿ', 'Ӿ'), - ('Ԁ', 'Ԁ'), - ('Ԃ', 'Ԃ'), - ('Ԅ', 'Ԅ'), - ('Ԇ', 'Ԇ'), - ('Ԉ', 'Ԉ'), - ('Ԋ', 'Ԋ'), - ('Ԍ', 'Ԍ'), - ('Ԏ', 'Ԏ'), - ('Ԑ', 'Ԑ'), - ('Ԓ', 'Ԓ'), - ('Ԕ', 'Ԕ'), - ('Ԗ', 'Ԗ'), - ('Ԙ', 'Ԙ'), - ('Ԛ', 'Ԛ'), - ('Ԝ', 'Ԝ'), - ('Ԟ', 'Ԟ'), - ('Ԡ', 'Ԡ'), - ('Ԣ', 'Ԣ'), - ('Ԥ', 'Ԥ'), - ('Ԧ', 'Ԧ'), - ('Ԩ', 'Ԩ'), - ('Ԫ', 'Ԫ'), - ('Ԭ', 'Ԭ'), - ('Ԯ', 'Ԯ'), - ('Ա', 'Ֆ'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('Ꭰ', 'Ᏽ'), - ('Ᲊ', 'Ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('Ḁ', 'Ḁ'), - ('Ḃ', 'Ḃ'), - ('Ḅ', 'Ḅ'), - ('Ḇ', 'Ḇ'), - ('Ḉ', 'Ḉ'), - ('Ḋ', 'Ḋ'), - ('Ḍ', 'Ḍ'), - ('Ḏ', 'Ḏ'), - ('Ḑ', 'Ḑ'), - ('Ḓ', 'Ḓ'), - ('Ḕ', 'Ḕ'), - ('Ḗ', 'Ḗ'), - ('Ḙ', 'Ḙ'), - ('Ḛ', 'Ḛ'), - ('Ḝ', 'Ḝ'), - ('Ḟ', 'Ḟ'), - ('Ḡ', 'Ḡ'), - ('Ḣ', 'Ḣ'), - ('Ḥ', 'Ḥ'), - ('Ḧ', 'Ḧ'), - ('Ḩ', 'Ḩ'), - ('Ḫ', 'Ḫ'), - ('Ḭ', 'Ḭ'), - ('Ḯ', 'Ḯ'), - ('Ḱ', 'Ḱ'), - ('Ḳ', 'Ḳ'), - ('Ḵ', 'Ḵ'), - ('Ḷ', 'Ḷ'), - ('Ḹ', 'Ḹ'), - ('Ḻ', 'Ḻ'), - ('Ḽ', 'Ḽ'), - ('Ḿ', 'Ḿ'), - ('Ṁ', 'Ṁ'), - ('Ṃ', 'Ṃ'), - ('Ṅ', 'Ṅ'), - ('Ṇ', 'Ṇ'), - ('Ṉ', 'Ṉ'), - ('Ṋ', 'Ṋ'), - ('Ṍ', 'Ṍ'), - ('Ṏ', 'Ṏ'), - ('Ṑ', 'Ṑ'), - ('Ṓ', 'Ṓ'), - ('Ṕ', 'Ṕ'), - ('Ṗ', 'Ṗ'), - ('Ṙ', 'Ṙ'), - ('Ṛ', 'Ṛ'), - ('Ṝ', 'Ṝ'), - ('Ṟ', 'Ṟ'), - ('Ṡ', 'Ṡ'), - ('Ṣ', 'Ṣ'), - ('Ṥ', 'Ṥ'), - ('Ṧ', 'Ṧ'), - ('Ṩ', 'Ṩ'), - ('Ṫ', 'Ṫ'), - ('Ṭ', 'Ṭ'), - ('Ṯ', 'Ṯ'), - ('Ṱ', 'Ṱ'), - ('Ṳ', 'Ṳ'), - ('Ṵ', 'Ṵ'), - ('Ṷ', 'Ṷ'), - ('Ṹ', 'Ṹ'), - ('Ṻ', 'Ṻ'), - ('Ṽ', 'Ṽ'), - ('Ṿ', 'Ṿ'), - ('Ẁ', 'Ẁ'), - ('Ẃ', 'Ẃ'), - ('Ẅ', 'Ẅ'), - ('Ẇ', 'Ẇ'), - ('Ẉ', 'Ẉ'), - ('Ẋ', 'Ẋ'), - ('Ẍ', 'Ẍ'), - ('Ẏ', 'Ẏ'), - ('Ẑ', 'Ẑ'), - ('Ẓ', 'Ẓ'), - ('Ẕ', 'Ẕ'), - ('ẞ', 'ẞ'), - ('Ạ', 'Ạ'), - ('Ả', 'Ả'), - ('Ấ', 'Ấ'), - ('Ầ', 'Ầ'), - ('Ẩ', 'Ẩ'), - ('Ẫ', 'Ẫ'), - ('Ậ', 'Ậ'), - ('Ắ', 'Ắ'), - ('Ằ', 'Ằ'), - ('Ẳ', 'Ẳ'), - ('Ẵ', 'Ẵ'), - ('Ặ', 'Ặ'), - ('Ẹ', 'Ẹ'), - ('Ẻ', 'Ẻ'), - ('Ẽ', 'Ẽ'), - ('Ế', 'Ế'), - ('Ề', 'Ề'), - ('Ể', 'Ể'), - ('Ễ', 'Ễ'), - ('Ệ', 'Ệ'), - ('Ỉ', 'Ỉ'), - ('Ị', 'Ị'), - ('Ọ', 'Ọ'), - ('Ỏ', 'Ỏ'), - ('Ố', 'Ố'), - ('Ồ', 'Ồ'), - ('Ổ', 'Ổ'), - ('Ỗ', 'Ỗ'), - ('Ộ', 'Ộ'), - ('Ớ', 'Ớ'), - ('Ờ', 'Ờ'), - ('Ở', 'Ở'), - ('Ỡ', 'Ỡ'), - ('Ợ', 'Ợ'), - ('Ụ', 'Ụ'), - ('Ủ', 'Ủ'), - ('Ứ', 'Ứ'), - ('Ừ', 'Ừ'), - ('Ử', 'Ử'), - ('Ữ', 'Ữ'), - ('Ự', 'Ự'), - ('Ỳ', 'Ỳ'), - ('Ỵ', 'Ỵ'), - ('Ỷ', 'Ỷ'), - ('Ỹ', 'Ỹ'), - ('Ỻ', 'Ỻ'), - ('Ỽ', 'Ỽ'), - ('Ỿ', 'Ỿ'), - ('Ἀ', 'Ἇ'), - ('Ἐ', 'Ἕ'), - ('Ἠ', 'Ἧ'), - ('Ἰ', 'Ἷ'), - ('Ὀ', 'Ὅ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'Ὗ'), - ('Ὠ', 'Ὧ'), - ('Ᾰ', 'Ά'), - ('Ὲ', 'Ή'), - ('Ῐ', 'Ί'), - ('Ῠ', 'Ῥ'), - ('Ὸ', 'Ώ'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℋ', 'ℍ'), - ('ℐ', 'ℒ'), - ('ℕ', 'ℕ'), - ('ℙ', 'ℝ'), - ('ℤ', 'ℤ'), - ('Ω', 'Ω'), - ('ℨ', 'ℨ'), - ('K', 'ℭ'), - ('ℰ', 'ℳ'), - ('ℾ', 'ℿ'), - ('ⅅ', 'ⅅ'), - ('Ⅰ', 'Ⅿ'), - ('Ↄ', 'Ↄ'), - ('Ⓐ', 'Ⓩ'), - ('Ⰰ', 'Ⱟ'), - ('Ⱡ', 'Ⱡ'), - ('Ɫ', 'Ɽ'), - ('Ⱨ', 'Ⱨ'), - ('Ⱪ', 'Ⱪ'), - ('Ⱬ', 'Ⱬ'), - ('Ɑ', 'Ɒ'), - ('Ⱳ', 'Ⱳ'), - ('Ⱶ', 'Ⱶ'), - ('Ȿ', 'Ⲁ'), - ('Ⲃ', 'Ⲃ'), - ('Ⲅ', 'Ⲅ'), - ('Ⲇ', 'Ⲇ'), - ('Ⲉ', 'Ⲉ'), - ('Ⲋ', 'Ⲋ'), - ('Ⲍ', 'Ⲍ'), - ('Ⲏ', 'Ⲏ'), - ('Ⲑ', 'Ⲑ'), - ('Ⲓ', 'Ⲓ'), - ('Ⲕ', 'Ⲕ'), - ('Ⲗ', 'Ⲗ'), - ('Ⲙ', 'Ⲙ'), - ('Ⲛ', 'Ⲛ'), - ('Ⲝ', 'Ⲝ'), - ('Ⲟ', 'Ⲟ'), - ('Ⲡ', 'Ⲡ'), - ('Ⲣ', 'Ⲣ'), - ('Ⲥ', 'Ⲥ'), - ('Ⲧ', 'Ⲧ'), - ('Ⲩ', 'Ⲩ'), - ('Ⲫ', 'Ⲫ'), - ('Ⲭ', 'Ⲭ'), - ('Ⲯ', 'Ⲯ'), - ('Ⲱ', 'Ⲱ'), - ('Ⲳ', 'Ⲳ'), - ('Ⲵ', 'Ⲵ'), - ('Ⲷ', 'Ⲷ'), - ('Ⲹ', 'Ⲹ'), - ('Ⲻ', 'Ⲻ'), - ('Ⲽ', 'Ⲽ'), - ('Ⲿ', 'Ⲿ'), - ('Ⳁ', 'Ⳁ'), - ('Ⳃ', 'Ⳃ'), - ('Ⳅ', 'Ⳅ'), - ('Ⳇ', 'Ⳇ'), - ('Ⳉ', 'Ⳉ'), - ('Ⳋ', 'Ⳋ'), - ('Ⳍ', 'Ⳍ'), - ('Ⳏ', 'Ⳏ'), - ('Ⳑ', 'Ⳑ'), - ('Ⳓ', 'Ⳓ'), - ('Ⳕ', 'Ⳕ'), - ('Ⳗ', 'Ⳗ'), - ('Ⳙ', 'Ⳙ'), - ('Ⳛ', 'Ⳛ'), - ('Ⳝ', 'Ⳝ'), - ('Ⳟ', 'Ⳟ'), - ('Ⳡ', 'Ⳡ'), - ('Ⳣ', 'Ⳣ'), - ('Ⳬ', 'Ⳬ'), - ('Ⳮ', 'Ⳮ'), - ('Ⳳ', 'Ⳳ'), - ('Ꙁ', 'Ꙁ'), - ('Ꙃ', 'Ꙃ'), - ('Ꙅ', 'Ꙅ'), - ('Ꙇ', 'Ꙇ'), - ('Ꙉ', 'Ꙉ'), - ('Ꙋ', 'Ꙋ'), - ('Ꙍ', 'Ꙍ'), - ('Ꙏ', 'Ꙏ'), - ('Ꙑ', 'Ꙑ'), - ('Ꙓ', 'Ꙓ'), - ('Ꙕ', 'Ꙕ'), - ('Ꙗ', 'Ꙗ'), - ('Ꙙ', 'Ꙙ'), - ('Ꙛ', 'Ꙛ'), - ('Ꙝ', 'Ꙝ'), - ('Ꙟ', 'Ꙟ'), - ('Ꙡ', 'Ꙡ'), - ('Ꙣ', 'Ꙣ'), - ('Ꙥ', 'Ꙥ'), - ('Ꙧ', 'Ꙧ'), - ('Ꙩ', 'Ꙩ'), - ('Ꙫ', 'Ꙫ'), - ('Ꙭ', 'Ꙭ'), - ('Ꚁ', 'Ꚁ'), - ('Ꚃ', 'Ꚃ'), - ('Ꚅ', 'Ꚅ'), - ('Ꚇ', 'Ꚇ'), - ('Ꚉ', 'Ꚉ'), - ('Ꚋ', 'Ꚋ'), - ('Ꚍ', 'Ꚍ'), - ('Ꚏ', 'Ꚏ'), - ('Ꚑ', 'Ꚑ'), - ('Ꚓ', 'Ꚓ'), - ('Ꚕ', 'Ꚕ'), - ('Ꚗ', 'Ꚗ'), - ('Ꚙ', 'Ꚙ'), - ('Ꚛ', 'Ꚛ'), - ('Ꜣ', 'Ꜣ'), - ('Ꜥ', 'Ꜥ'), - ('Ꜧ', 'Ꜧ'), - ('Ꜩ', 'Ꜩ'), - ('Ꜫ', 'Ꜫ'), - ('Ꜭ', 'Ꜭ'), - ('Ꜯ', 'Ꜯ'), - ('Ꜳ', 'Ꜳ'), - ('Ꜵ', 'Ꜵ'), - ('Ꜷ', 'Ꜷ'), - ('Ꜹ', 'Ꜹ'), - ('Ꜻ', 'Ꜻ'), - ('Ꜽ', 'Ꜽ'), - ('Ꜿ', 'Ꜿ'), - ('Ꝁ', 'Ꝁ'), - ('Ꝃ', 'Ꝃ'), - ('Ꝅ', 'Ꝅ'), - ('Ꝇ', 'Ꝇ'), - ('Ꝉ', 'Ꝉ'), - ('Ꝋ', 'Ꝋ'), - ('Ꝍ', 'Ꝍ'), - ('Ꝏ', 'Ꝏ'), - ('Ꝑ', 'Ꝑ'), - ('Ꝓ', 'Ꝓ'), - ('Ꝕ', 'Ꝕ'), - ('Ꝗ', 'Ꝗ'), - ('Ꝙ', 'Ꝙ'), - ('Ꝛ', 'Ꝛ'), - ('Ꝝ', 'Ꝝ'), - ('Ꝟ', 'Ꝟ'), - ('Ꝡ', 'Ꝡ'), - ('Ꝣ', 'Ꝣ'), - ('Ꝥ', 'Ꝥ'), - ('Ꝧ', 'Ꝧ'), - ('Ꝩ', 'Ꝩ'), - ('Ꝫ', 'Ꝫ'), - ('Ꝭ', 'Ꝭ'), - ('Ꝯ', 'Ꝯ'), - ('Ꝺ', 'Ꝺ'), - ('Ꝼ', 'Ꝼ'), - ('Ᵹ', 'Ꝿ'), - ('Ꞁ', 'Ꞁ'), - ('Ꞃ', 'Ꞃ'), - ('Ꞅ', 'Ꞅ'), - ('Ꞇ', 'Ꞇ'), - ('Ꞌ', 'Ꞌ'), - ('Ɥ', 'Ɥ'), - ('Ꞑ', 'Ꞑ'), - ('Ꞓ', 'Ꞓ'), - ('Ꞗ', 'Ꞗ'), - ('Ꞙ', 'Ꞙ'), - ('Ꞛ', 'Ꞛ'), - ('Ꞝ', 'Ꞝ'), - ('Ꞟ', 'Ꞟ'), - ('Ꞡ', 'Ꞡ'), - ('Ꞣ', 'Ꞣ'), - ('Ꞥ', 'Ꞥ'), - ('Ꞧ', 'Ꞧ'), - ('Ꞩ', 'Ꞩ'), - ('Ɦ', 'Ɪ'), - ('Ʞ', 'Ꞵ'), - ('Ꞷ', 'Ꞷ'), - ('Ꞹ', 'Ꞹ'), - ('Ꞻ', 'Ꞻ'), - ('Ꞽ', 'Ꞽ'), - ('Ꞿ', 'Ꞿ'), - ('Ꟁ', 'Ꟁ'), - ('Ꟃ', 'Ꟃ'), - ('Ꞔ', 'Ꟈ'), - ('Ꟊ', 'Ꟊ'), - ('Ɤ', 'Ꟍ'), - ('Ꟑ', 'Ꟑ'), - ('Ꟗ', 'Ꟗ'), - ('Ꟙ', 'Ꟙ'), - ('Ꟛ', 'Ꟛ'), - ('Ƛ', 'Ƛ'), - ('Ꟶ', 'Ꟶ'), - ('A', 'Z'), - ('𐐀', '𐐧'), - ('𐒰', '𐓓'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐲀', '𐲲'), - ('𐵐', '𐵥'), - ('𑢠', '𑢿'), - ('𖹀', '𖹟'), - ('𝐀', '𝐙'), - ('𝐴', '𝑍'), - ('𝑨', '𝒁'), - ('𝒜', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒵'), - ('𝓐', '𝓩'), - ('𝔄', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔸', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕬', '𝖅'), - ('𝖠', '𝖹'), - ('𝗔', '𝗭'), - ('𝘈', '𝘡'), - ('𝘼', '𝙕'), - ('𝙰', '𝚉'), - ('𝚨', '𝛀'), - ('𝛢', '𝛺'), - ('𝜜', '𝜴'), - ('𝝖', '𝝮'), - ('𝞐', '𝞨'), - ('𝟊', '𝟊'), - ('𞤀', '𞤡'), - ('🄰', '🅉'), - ('🅐', '🅩'), - ('🅰', '🆉'), -]; - -pub const VARIATION_SELECTOR: &'static [(char, char)] = &[ - ('\u{180b}', '\u{180d}'), - ('\u{180f}', '\u{180f}'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const WHITE_SPACE: &'static [(char, char)] = &[ - ('\t', '\r'), - (' ', ' '), - ('\u{85}', '\u{85}'), - ('\u{a0}', '\u{a0}'), - ('\u{1680}', '\u{1680}'), - ('\u{2000}', '\u{200a}'), - ('\u{2028}', '\u{2029}'), - ('\u{202f}', '\u{202f}'), - ('\u{205f}', '\u{205f}'), - ('\u{3000}', '\u{3000}'), -]; - -pub const XID_CONTINUE: &'static [(char, char)] = &[ - ('0', '9'), - ('A', 'Z'), - ('_', '_'), - ('a', 'z'), - ('ª', 'ª'), - ('µ', 'µ'), - ('·', '·'), - ('º', 'º'), - ('À', 'Ö'), - ('Ø', 'ö'), - ('ø', 'ˁ'), - ('ˆ', 'ˑ'), - ('ˠ', 'ˤ'), - ('ˬ', 'ˬ'), - ('ˮ', 'ˮ'), - ('\u{300}', 'ʹ'), - ('Ͷ', 'ͷ'), - ('ͻ', 'ͽ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ϵ'), - ('Ϸ', 'ҁ'), - ('\u{483}', '\u{487}'), - ('Ҋ', 'ԯ'), - ('Ա', 'Ֆ'), - ('ՙ', 'ՙ'), - ('ՠ', 'ֈ'), - ('\u{591}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c5}'), - ('\u{5c7}', '\u{5c7}'), - ('א', 'ת'), - ('ׯ', 'ײ'), - ('\u{610}', '\u{61a}'), - ('ؠ', '٩'), - ('ٮ', 'ۓ'), - ('ە', '\u{6dc}'), - ('\u{6df}', '\u{6e8}'), - ('\u{6ea}', 'ۼ'), - ('ۿ', 'ۿ'), - ('ܐ', '\u{74a}'), - ('ݍ', 'ޱ'), - ('߀', 'ߵ'), - ('ߺ', 'ߺ'), - ('\u{7fd}', '\u{7fd}'), - ('ࠀ', '\u{82d}'), - ('ࡀ', '\u{85b}'), - ('ࡠ', 'ࡪ'), - ('ࡰ', 'ࢇ'), - ('ࢉ', 'ࢎ'), - ('\u{897}', '\u{8e1}'), - ('\u{8e3}', '\u{963}'), - ('०', '९'), - ('ॱ', 'ঃ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('\u{9bc}', '\u{9c4}'), - ('ে', 'ৈ'), - ('ো', 'ৎ'), - ('\u{9d7}', '\u{9d7}'), - ('ড়', 'ঢ়'), - ('য়', '\u{9e3}'), - ('০', 'ৱ'), - ('ৼ', 'ৼ'), - ('\u{9fe}', '\u{9fe}'), - ('\u{a01}', 'ਃ'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('\u{a3c}', '\u{a3c}'), - ('ਾ', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('\u{a51}', '\u{a51}'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('੦', '\u{a75}'), - ('\u{a81}', 'ઃ'), - ('અ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('\u{abc}', '\u{ac5}'), - ('\u{ac7}', 'ૉ'), - ('ો', '\u{acd}'), - ('ૐ', 'ૐ'), - ('ૠ', '\u{ae3}'), - ('૦', '૯'), - ('ૹ', '\u{aff}'), - ('\u{b01}', 'ଃ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('\u{b3c}', '\u{b44}'), - ('େ', 'ୈ'), - ('ୋ', '\u{b4d}'), - ('\u{b55}', '\u{b57}'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', '\u{b63}'), - ('୦', '୯'), - ('ୱ', 'ୱ'), - ('\u{b82}', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'ஹ'), - ('\u{bbe}', 'ூ'), - ('ெ', 'ை'), - ('ொ', '\u{bcd}'), - ('ௐ', 'ௐ'), - ('\u{bd7}', '\u{bd7}'), - ('௦', '௯'), - ('\u{c00}', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'హ'), - ('\u{c3c}', 'ౄ'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('ౘ', 'ౚ'), - ('ౝ', 'ౝ'), - ('ౠ', '\u{c63}'), - ('౦', '౯'), - ('ಀ', 'ಃ'), - ('ಅ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('\u{cbc}', 'ೄ'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccd}'), - ('\u{cd5}', '\u{cd6}'), - ('ೝ', 'ೞ'), - ('ೠ', '\u{ce3}'), - ('೦', '೯'), - ('ೱ', 'ೳ'), - ('\u{d00}', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', '\u{d44}'), - ('െ', 'ൈ'), - ('ൊ', 'ൎ'), - ('ൔ', '\u{d57}'), - ('ൟ', '\u{d63}'), - ('൦', '൯'), - ('ൺ', 'ൿ'), - ('\u{d81}', 'ඃ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('\u{dca}', '\u{dca}'), - ('\u{dcf}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('ෘ', '\u{ddf}'), - ('෦', '෯'), - ('ෲ', 'ෳ'), - ('ก', '\u{e3a}'), - ('เ', '\u{e4e}'), - ('๐', '๙'), - ('ກ', 'ຂ'), - ('ຄ', 'ຄ'), - ('ຆ', 'ຊ'), - ('ຌ', 'ຣ'), - ('ລ', 'ລ'), - ('ວ', 'ຽ'), - ('ເ', 'ໄ'), - ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ece}'), - ('໐', '໙'), - ('ໜ', 'ໟ'), - ('ༀ', 'ༀ'), - ('\u{f18}', '\u{f19}'), - ('༠', '༩'), - ('\u{f35}', '\u{f35}'), - ('\u{f37}', '\u{f37}'), - ('\u{f39}', '\u{f39}'), - ('༾', 'ཇ'), - ('ཉ', 'ཬ'), - ('\u{f71}', '\u{f84}'), - ('\u{f86}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('\u{fc6}', '\u{fc6}'), - ('က', '၉'), - ('ၐ', '\u{109d}'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ჺ'), - ('ჼ', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዖ'), - ('ዘ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ፚ'), - ('\u{135d}', '\u{135f}'), - ('፩', '፱'), - ('ᎀ', 'ᎏ'), - ('Ꭰ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('ᐁ', 'ᙬ'), - ('ᙯ', 'ᙿ'), - ('ᚁ', 'ᚚ'), - ('ᚠ', 'ᛪ'), - ('ᛮ', 'ᛸ'), - ('ᜀ', '\u{1715}'), - ('ᜟ', '\u{1734}'), - ('ᝀ', '\u{1753}'), - ('ᝠ', 'ᝬ'), - ('ᝮ', 'ᝰ'), - ('\u{1772}', '\u{1773}'), - ('ក', '\u{17d3}'), - ('ៗ', 'ៗ'), - ('ៜ', '\u{17dd}'), - ('០', '៩'), - ('\u{180b}', '\u{180d}'), - ('\u{180f}', '᠙'), - ('ᠠ', 'ᡸ'), - ('ᢀ', 'ᢪ'), - ('ᢰ', 'ᣵ'), - ('ᤀ', 'ᤞ'), - ('\u{1920}', 'ᤫ'), - ('ᤰ', '\u{193b}'), - ('᥆', 'ᥭ'), - ('ᥰ', 'ᥴ'), - ('ᦀ', 'ᦫ'), - ('ᦰ', 'ᧉ'), - ('᧐', '᧚'), - ('ᨀ', '\u{1a1b}'), - ('ᨠ', '\u{1a5e}'), - ('\u{1a60}', '\u{1a7c}'), - ('\u{1a7f}', '᪉'), - ('᪐', '᪙'), - ('ᪧ', 'ᪧ'), - ('\u{1ab0}', '\u{1abd}'), - ('\u{1abf}', '\u{1ace}'), - ('\u{1b00}', 'ᭌ'), - ('᭐', '᭙'), - ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '\u{1bf3}'), - ('ᰀ', '\u{1c37}'), - ('᱀', '᱉'), - ('ᱍ', 'ᱽ'), - ('ᲀ', 'ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('\u{1cd0}', '\u{1cd2}'), - ('\u{1cd4}', 'ᳺ'), - ('ᴀ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ᾼ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῌ'), - ('ῐ', 'ΐ'), - ('ῖ', 'Ί'), - ('ῠ', 'Ῥ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῼ'), - ('\u{200c}', '\u{200d}'), - ('‿', '⁀'), - ('⁔', '⁔'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('\u{20d0}', '\u{20dc}'), - ('\u{20e1}', '\u{20e1}'), - ('\u{20e5}', '\u{20f0}'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℊ', 'ℓ'), - ('ℕ', 'ℕ'), - ('℘', 'ℝ'), - ('ℤ', 'ℤ'), - ('Ω', 'Ω'), - ('ℨ', 'ℨ'), - ('K', 'ℹ'), - ('ℼ', 'ℿ'), - ('ⅅ', 'ⅉ'), - ('ⅎ', 'ⅎ'), - ('Ⅰ', 'ↈ'), - ('Ⰰ', 'ⳤ'), - ('Ⳬ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ⴰ', 'ⵧ'), - ('ⵯ', 'ⵯ'), - ('\u{2d7f}', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('\u{2de0}', '\u{2dff}'), - ('々', '〇'), - ('〡', '\u{302f}'), - ('〱', '〵'), - ('〸', '〼'), - ('ぁ', 'ゖ'), - ('\u{3099}', '\u{309a}'), - ('ゝ', 'ゟ'), - ('ァ', 'ヿ'), - ('ㄅ', 'ㄯ'), - ('ㄱ', 'ㆎ'), - ('ㆠ', 'ㆿ'), - ('ㇰ', 'ㇿ'), - ('㐀', '䶿'), - ('一', 'ꒌ'), - ('ꓐ', 'ꓽ'), - ('ꔀ', 'ꘌ'), - ('ꘐ', 'ꘫ'), - ('Ꙁ', '\u{a66f}'), - ('\u{a674}', '\u{a67d}'), - ('ꙿ', '\u{a6f1}'), - ('ꜗ', 'ꜟ'), - ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'Ƛ'), - ('ꟲ', 'ꠧ'), - ('\u{a82c}', '\u{a82c}'), - ('ꡀ', 'ꡳ'), - ('ꢀ', '\u{a8c5}'), - ('꣐', '꣙'), - ('\u{a8e0}', 'ꣷ'), - ('ꣻ', 'ꣻ'), - ('ꣽ', '\u{a92d}'), - ('ꤰ', '\u{a953}'), - ('ꥠ', 'ꥼ'), - ('\u{a980}', '\u{a9c0}'), - ('ꧏ', '꧙'), - ('ꧠ', 'ꧾ'), - ('ꨀ', '\u{aa36}'), - ('ꩀ', 'ꩍ'), - ('꩐', '꩙'), - ('ꩠ', 'ꩶ'), - ('ꩺ', 'ꫂ'), - ('ꫛ', 'ꫝ'), - ('ꫠ', 'ꫯ'), - ('ꫲ', '\u{aaf6}'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('ꬰ', 'ꭚ'), - ('ꭜ', 'ꭩ'), - ('ꭰ', 'ꯪ'), - ('꯬', '\u{abed}'), - ('꯰', '꯹'), - ('가', '힣'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('豈', '舘'), - ('並', '龎'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('יִ', 'ﬨ'), - ('שׁ', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', 'ﮱ'), - ('ﯓ', 'ﱝ'), - ('ﱤ', 'ﴽ'), - ('ﵐ', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('ﷰ', 'ﷹ'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{fe20}', '\u{fe2f}'), - ('︳', '︴'), - ('﹍', '﹏'), - ('ﹱ', 'ﹱ'), - ('ﹳ', 'ﹳ'), - ('ﹷ', 'ﹷ'), - ('ﹹ', 'ﹹ'), - ('ﹻ', 'ﹻ'), - ('ﹽ', 'ﹽ'), - ('ﹿ', 'ﻼ'), - ('0', '9'), - ('A', 'Z'), - ('_', '_'), - ('a', 'z'), - ('・', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), - ('𐅀', '𐅴'), - ('\u{101fd}', '\u{101fd}'), - ('𐊀', '𐊜'), - ('𐊠', '𐋐'), - ('\u{102e0}', '\u{102e0}'), - ('𐌀', '𐌟'), - ('𐌭', '𐍊'), - ('𐍐', '\u{1037a}'), - ('𐎀', '𐎝'), - ('𐎠', '𐏃'), - ('𐏈', '𐏏'), - ('𐏑', '𐏕'), - ('𐐀', '𐒝'), - ('𐒠', '𐒩'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), - ('𐔀', '𐔧'), - ('𐔰', '𐕣'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐗀', '𐗳'), - ('𐘀', '𐜶'), - ('𐝀', '𐝕'), - ('𐝠', '𐝧'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐠀', '𐠅'), - ('𐠈', '𐠈'), - ('𐠊', '𐠵'), - ('𐠷', '𐠸'), - ('𐠼', '𐠼'), - ('𐠿', '𐡕'), - ('𐡠', '𐡶'), - ('𐢀', '𐢞'), - ('𐣠', '𐣲'), - ('𐣴', '𐣵'), - ('𐤀', '𐤕'), - ('𐤠', '𐤹'), - ('𐦀', '𐦷'), - ('𐦾', '𐦿'), - ('𐨀', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨵'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '\u{10a3f}'), - ('𐩠', '𐩼'), - ('𐪀', '𐪜'), - ('𐫀', '𐫇'), - ('𐫉', '\u{10ae6}'), - ('𐬀', '𐬵'), - ('𐭀', '𐭕'), - ('𐭠', '𐭲'), - ('𐮀', '𐮑'), - ('𐰀', '𐱈'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐴀', '\u{10d27}'), - ('𐴰', '𐴹'), - ('𐵀', '𐵥'), - ('\u{10d69}', '\u{10d6d}'), - ('𐵯', '𐶅'), - ('𐺀', '𐺩'), - ('\u{10eab}', '\u{10eac}'), - ('𐺰', '𐺱'), - ('𐻂', '𐻄'), - ('\u{10efc}', '𐼜'), - ('𐼧', '𐼧'), - ('𐼰', '\u{10f50}'), - ('𐽰', '\u{10f85}'), - ('𐾰', '𐿄'), - ('𐿠', '𐿶'), - ('𑀀', '\u{11046}'), - ('𑁦', '𑁵'), - ('\u{1107f}', '\u{110ba}'), - ('\u{110c2}', '\u{110c2}'), - ('𑃐', '𑃨'), - ('𑃰', '𑃹'), - ('\u{11100}', '\u{11134}'), - ('𑄶', '𑄿'), - ('𑅄', '𑅇'), - ('𑅐', '\u{11173}'), - ('𑅶', '𑅶'), - ('\u{11180}', '𑇄'), - ('\u{111c9}', '\u{111cc}'), - ('𑇎', '𑇚'), - ('𑇜', '𑇜'), - ('𑈀', '𑈑'), - ('𑈓', '\u{11237}'), - ('\u{1123e}', '\u{11241}'), - ('𑊀', '𑊆'), - ('𑊈', '𑊈'), - ('𑊊', '𑊍'), - ('𑊏', '𑊝'), - ('𑊟', '𑊨'), - ('𑊰', '\u{112ea}'), - ('𑋰', '𑋹'), - ('\u{11300}', '𑌃'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('\u{1133b}', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '\u{1134d}'), - ('𑍐', '𑍐'), - ('\u{11357}', '\u{11357}'), - ('𑍝', '𑍣'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '𑏊'), - ('𑏌', '𑏓'), - ('\u{113e1}', '\u{113e2}'), - ('𑐀', '𑑊'), - ('𑑐', '𑑙'), - ('\u{1145e}', '𑑡'), - ('𑒀', '𑓅'), - ('𑓇', '𑓇'), - ('𑓐', '𑓙'), - ('𑖀', '\u{115b5}'), - ('𑖸', '\u{115c0}'), - ('𑗘', '\u{115dd}'), - ('𑘀', '\u{11640}'), - ('𑙄', '𑙄'), - ('𑙐', '𑙙'), - ('𑚀', '𑚸'), - ('𑛀', '𑛉'), - ('𑛐', '𑛣'), - ('𑜀', '𑜚'), - ('\u{1171d}', '\u{1172b}'), - ('𑜰', '𑜹'), - ('𑝀', '𑝆'), - ('𑠀', '\u{1183a}'), - ('𑢠', '𑣩'), - ('𑣿', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤵'), - ('𑤷', '𑤸'), - ('\u{1193b}', '\u{11943}'), - ('𑥐', '𑥙'), - ('𑦠', '𑦧'), - ('𑦪', '\u{119d7}'), - ('\u{119da}', '𑧡'), - ('𑧣', '𑧤'), - ('𑨀', '\u{11a3e}'), - ('\u{11a47}', '\u{11a47}'), - ('𑩐', '\u{11a99}'), - ('𑪝', '𑪝'), - ('𑪰', '𑫸'), - ('𑯀', '𑯠'), - ('𑯰', '𑯹'), - ('𑰀', '𑰈'), - ('𑰊', '\u{11c36}'), - ('\u{11c38}', '𑱀'), - ('𑱐', '𑱙'), - ('𑱲', '𑲏'), - ('\u{11c92}', '\u{11ca7}'), - ('𑲩', '\u{11cb6}'), - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d47}'), - ('𑵐', '𑵙'), - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶎'), - ('\u{11d90}', '\u{11d91}'), - ('𑶓', '𑶘'), - ('𑶠', '𑶩'), - ('𑻠', '𑻶'), - ('\u{11f00}', '𑼐'), - ('𑼒', '\u{11f3a}'), - ('𑼾', '\u{11f42}'), - ('𑽐', '\u{11f5a}'), - ('𑾰', '𑾰'), - ('𒀀', '𒎙'), - ('𒐀', '𒑮'), - ('𒒀', '𒕃'), - ('𒾐', '𒿰'), - ('𓀀', '𓐯'), - ('\u{13440}', '\u{13455}'), - ('𓑠', '𔏺'), - ('𔐀', '𔙆'), - ('𖄀', '𖄹'), - ('𖠀', '𖨸'), - ('𖩀', '𖩞'), - ('𖩠', '𖩩'), - ('𖩰', '𖪾'), - ('𖫀', '𖫉'), - ('𖫐', '𖫭'), - ('\u{16af0}', '\u{16af4}'), - ('𖬀', '\u{16b36}'), - ('𖭀', '𖭃'), - ('𖭐', '𖭙'), - ('𖭣', '𖭷'), - ('𖭽', '𖮏'), - ('𖵀', '𖵬'), - ('𖵰', '𖵹'), - ('𖹀', '𖹿'), - ('𖼀', '𖽊'), - ('\u{16f4f}', '𖾇'), - ('\u{16f8f}', '𖾟'), - ('𖿠', '𖿡'), - ('𖿣', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), - ('𗀀', '𘟷'), - ('𘠀', '𘳕'), - ('𘳿', '𘴈'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𛀀', '𛄢'), - ('𛄲', '𛄲'), - ('𛅐', '𛅒'), - ('𛅕', '𛅕'), - ('𛅤', '𛅧'), - ('𛅰', '𛋻'), - ('𛰀', '𛱪'), - ('𛱰', '𛱼'), - ('𛲀', '𛲈'), - ('𛲐', '𛲙'), - ('\u{1bc9d}', '\u{1bc9e}'), - ('𜳰', '𜳹'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d165}', '\u{1d169}'), - ('\u{1d16d}', '\u{1d172}'), - ('\u{1d17b}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('\u{1d242}', '\u{1d244}'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝛀'), - ('𝛂', '𝛚'), - ('𝛜', '𝛺'), - ('𝛼', '𝜔'), - ('𝜖', '𝜴'), - ('𝜶', '𝝎'), - ('𝝐', '𝝮'), - ('𝝰', '𝞈'), - ('𝞊', '𝞨'), - ('𝞪', '𝟂'), - ('𝟄', '𝟋'), - ('𝟎', '𝟿'), - ('\u{1da00}', '\u{1da36}'), - ('\u{1da3b}', '\u{1da6c}'), - ('\u{1da75}', '\u{1da75}'), - ('\u{1da84}', '\u{1da84}'), - ('\u{1da9b}', '\u{1da9f}'), - ('\u{1daa1}', '\u{1daaf}'), - ('𝼀', '𝼞'), - ('𝼥', '𝼪'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('𞀰', '𞁭'), - ('\u{1e08f}', '\u{1e08f}'), - ('𞄀', '𞄬'), - ('\u{1e130}', '𞄽'), - ('𞅀', '𞅉'), - ('𞅎', '𞅎'), - ('𞊐', '\u{1e2ae}'), - ('𞋀', '𞋹'), - ('𞓐', '𞓹'), - ('𞗐', '𞗺'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), - ('𞠀', '𞣄'), - ('\u{1e8d0}', '\u{1e8d6}'), - ('𞤀', '𞥋'), - ('𞥐', '𞥙'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('🯰', '🯹'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('丽', '𪘀'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const XID_START: &'static [(char, char)] = &[ - ('A', 'Z'), - ('a', 'z'), - ('ª', 'ª'), - ('µ', 'µ'), - ('º', 'º'), - ('À', 'Ö'), - ('Ø', 'ö'), - ('ø', 'ˁ'), - ('ˆ', 'ˑ'), - ('ˠ', 'ˤ'), - ('ˬ', 'ˬ'), - ('ˮ', 'ˮ'), - ('Ͱ', 'ʹ'), - ('Ͷ', 'ͷ'), - ('ͻ', 'ͽ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ϵ'), - ('Ϸ', 'ҁ'), - ('Ҋ', 'ԯ'), - ('Ա', 'Ֆ'), - ('ՙ', 'ՙ'), - ('ՠ', 'ֈ'), - ('א', 'ת'), - ('ׯ', 'ײ'), - ('ؠ', 'ي'), - ('ٮ', 'ٯ'), - ('ٱ', 'ۓ'), - ('ە', 'ە'), - ('ۥ', 'ۦ'), - ('ۮ', 'ۯ'), - ('ۺ', 'ۼ'), - ('ۿ', 'ۿ'), - ('ܐ', 'ܐ'), - ('ܒ', 'ܯ'), - ('ݍ', 'ޥ'), - ('ޱ', 'ޱ'), - ('ߊ', 'ߪ'), - ('ߴ', 'ߵ'), - ('ߺ', 'ߺ'), - ('ࠀ', 'ࠕ'), - ('ࠚ', 'ࠚ'), - ('ࠤ', 'ࠤ'), - ('ࠨ', 'ࠨ'), - ('ࡀ', 'ࡘ'), - ('ࡠ', 'ࡪ'), - ('ࡰ', 'ࢇ'), - ('ࢉ', 'ࢎ'), - ('ࢠ', 'ࣉ'), - ('ऄ', 'ह'), - ('ऽ', 'ऽ'), - ('ॐ', 'ॐ'), - ('क़', 'ॡ'), - ('ॱ', 'ঀ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('ঽ', 'ঽ'), - ('ৎ', 'ৎ'), - ('ড়', 'ঢ়'), - ('য়', 'ৡ'), - ('ৰ', 'ৱ'), - ('ৼ', 'ৼ'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('ੲ', 'ੴ'), - ('અ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('ઽ', 'ઽ'), - ('ૐ', 'ૐ'), - ('ૠ', 'ૡ'), - ('ૹ', 'ૹ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('ଽ', 'ଽ'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', 'ୡ'), - ('ୱ', 'ୱ'), - ('ஃ', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'ஹ'), - ('ௐ', 'ௐ'), - ('అ', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'హ'), - ('ఽ', 'ఽ'), - ('ౘ', 'ౚ'), - ('ౝ', 'ౝ'), - ('ౠ', 'ౡ'), - ('ಀ', 'ಀ'), - ('ಅ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('ಽ', 'ಽ'), - ('ೝ', 'ೞ'), - ('ೠ', 'ೡ'), - ('ೱ', 'ೲ'), - ('ഄ', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', 'ഺ'), - ('ഽ', 'ഽ'), - ('ൎ', 'ൎ'), - ('ൔ', 'ൖ'), - ('ൟ', 'ൡ'), - ('ൺ', 'ൿ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('ก', 'ะ'), - ('า', 'า'), - ('เ', 'ๆ'), - ('ກ', 'ຂ'), - ('ຄ', 'ຄ'), - ('ຆ', 'ຊ'), - ('ຌ', 'ຣ'), - ('ລ', 'ລ'), - ('ວ', 'ະ'), - ('າ', 'າ'), - ('ຽ', 'ຽ'), - ('ເ', 'ໄ'), - ('ໆ', 'ໆ'), - ('ໜ', 'ໟ'), - ('ༀ', 'ༀ'), - ('ཀ', 'ཇ'), - ('ཉ', 'ཬ'), - ('ྈ', 'ྌ'), - ('က', 'ဪ'), - ('ဿ', 'ဿ'), - ('ၐ', 'ၕ'), - ('ၚ', 'ၝ'), - ('ၡ', 'ၡ'), - ('ၥ', 'ၦ'), - ('ၮ', 'ၰ'), - ('ၵ', 'ႁ'), - ('ႎ', 'ႎ'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ჺ'), - ('ჼ', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዖ'), - ('ዘ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ፚ'), - ('ᎀ', 'ᎏ'), - ('Ꭰ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('ᐁ', 'ᙬ'), - ('ᙯ', 'ᙿ'), - ('ᚁ', 'ᚚ'), - ('ᚠ', 'ᛪ'), - ('ᛮ', 'ᛸ'), - ('ᜀ', 'ᜑ'), - ('ᜟ', 'ᜱ'), - ('ᝀ', 'ᝑ'), - ('ᝠ', 'ᝬ'), - ('ᝮ', 'ᝰ'), - ('ក', 'ឳ'), - ('ៗ', 'ៗ'), - ('ៜ', 'ៜ'), - ('ᠠ', 'ᡸ'), - ('ᢀ', 'ᢨ'), - ('ᢪ', 'ᢪ'), - ('ᢰ', 'ᣵ'), - ('ᤀ', 'ᤞ'), - ('ᥐ', 'ᥭ'), - ('ᥰ', 'ᥴ'), - ('ᦀ', 'ᦫ'), - ('ᦰ', 'ᧉ'), - ('ᨀ', 'ᨖ'), - ('ᨠ', 'ᩔ'), - ('ᪧ', 'ᪧ'), - ('ᬅ', 'ᬳ'), - ('ᭅ', 'ᭌ'), - ('ᮃ', 'ᮠ'), - ('ᮮ', 'ᮯ'), - ('ᮺ', 'ᯥ'), - ('ᰀ', 'ᰣ'), - ('ᱍ', 'ᱏ'), - ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('ᳩ', 'ᳬ'), - ('ᳮ', 'ᳳ'), - ('ᳵ', 'ᳶ'), - ('ᳺ', 'ᳺ'), - ('ᴀ', 'ᶿ'), - ('Ḁ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ᾼ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῌ'), - ('ῐ', 'ΐ'), - ('ῖ', 'Ί'), - ('ῠ', 'Ῥ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῼ'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℊ', 'ℓ'), - ('ℕ', 'ℕ'), - ('℘', 'ℝ'), - ('ℤ', 'ℤ'), - ('Ω', 'Ω'), - ('ℨ', 'ℨ'), - ('K', 'ℹ'), - ('ℼ', 'ℿ'), - ('ⅅ', 'ⅉ'), - ('ⅎ', 'ⅎ'), - ('Ⅰ', 'ↈ'), - ('Ⰰ', 'ⳤ'), - ('Ⳬ', 'ⳮ'), - ('Ⳳ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ⴰ', 'ⵧ'), - ('ⵯ', 'ⵯ'), - ('ⶀ', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('々', '〇'), - ('〡', '〩'), - ('〱', '〵'), - ('〸', '〼'), - ('ぁ', 'ゖ'), - ('ゝ', 'ゟ'), - ('ァ', 'ヺ'), - ('ー', 'ヿ'), - ('ㄅ', 'ㄯ'), - ('ㄱ', 'ㆎ'), - ('ㆠ', 'ㆿ'), - ('ㇰ', 'ㇿ'), - ('㐀', '䶿'), - ('一', 'ꒌ'), - ('ꓐ', 'ꓽ'), - ('ꔀ', 'ꘌ'), - ('ꘐ', 'ꘟ'), - ('ꘪ', 'ꘫ'), - ('Ꙁ', 'ꙮ'), - ('ꙿ', 'ꚝ'), - ('ꚠ', 'ꛯ'), - ('ꜗ', 'ꜟ'), - ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'Ƛ'), - ('ꟲ', 'ꠁ'), - ('ꠃ', 'ꠅ'), - ('ꠇ', 'ꠊ'), - ('ꠌ', 'ꠢ'), - ('ꡀ', 'ꡳ'), - ('ꢂ', 'ꢳ'), - ('ꣲ', 'ꣷ'), - ('ꣻ', 'ꣻ'), - ('ꣽ', 'ꣾ'), - ('ꤊ', 'ꤥ'), - ('ꤰ', 'ꥆ'), - ('ꥠ', 'ꥼ'), - ('ꦄ', 'ꦲ'), - ('ꧏ', 'ꧏ'), - ('ꧠ', 'ꧤ'), - ('ꧦ', 'ꧯ'), - ('ꧺ', 'ꧾ'), - ('ꨀ', 'ꨨ'), - ('ꩀ', 'ꩂ'), - ('ꩄ', 'ꩋ'), - ('ꩠ', 'ꩶ'), - ('ꩺ', 'ꩺ'), - ('ꩾ', 'ꪯ'), - ('ꪱ', 'ꪱ'), - ('ꪵ', 'ꪶ'), - ('ꪹ', 'ꪽ'), - ('ꫀ', 'ꫀ'), - ('ꫂ', 'ꫂ'), - ('ꫛ', 'ꫝ'), - ('ꫠ', 'ꫪ'), - ('ꫲ', 'ꫴ'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('ꬰ', 'ꭚ'), - ('ꭜ', 'ꭩ'), - ('ꭰ', 'ꯢ'), - ('가', '힣'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('豈', '舘'), - ('並', '龎'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('יִ', 'יִ'), - ('ײַ', 'ﬨ'), - ('שׁ', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', 'ﮱ'), - ('ﯓ', 'ﱝ'), - ('ﱤ', 'ﴽ'), - ('ﵐ', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('ﷰ', 'ﷹ'), - ('ﹱ', 'ﹱ'), - ('ﹳ', 'ﹳ'), - ('ﹷ', 'ﹷ'), - ('ﹹ', 'ﹹ'), - ('ﹻ', 'ﹻ'), - ('ﹽ', 'ﹽ'), - ('ﹿ', 'ﻼ'), - ('A', 'Z'), - ('a', 'z'), - ('ヲ', 'ン'), - ('ᅠ', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), - ('𐅀', '𐅴'), - ('𐊀', '𐊜'), - ('𐊠', '𐋐'), - ('𐌀', '𐌟'), - ('𐌭', '𐍊'), - ('𐍐', '𐍵'), - ('𐎀', '𐎝'), - ('𐎠', '𐏃'), - ('𐏈', '𐏏'), - ('𐏑', '𐏕'), - ('𐐀', '𐒝'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), - ('𐔀', '𐔧'), - ('𐔰', '𐕣'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐗀', '𐗳'), - ('𐘀', '𐜶'), - ('𐝀', '𐝕'), - ('𐝠', '𐝧'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐠀', '𐠅'), - ('𐠈', '𐠈'), - ('𐠊', '𐠵'), - ('𐠷', '𐠸'), - ('𐠼', '𐠼'), - ('𐠿', '𐡕'), - ('𐡠', '𐡶'), - ('𐢀', '𐢞'), - ('𐣠', '𐣲'), - ('𐣴', '𐣵'), - ('𐤀', '𐤕'), - ('𐤠', '𐤹'), - ('𐦀', '𐦷'), - ('𐦾', '𐦿'), - ('𐨀', '𐨀'), - ('𐨐', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨵'), - ('𐩠', '𐩼'), - ('𐪀', '𐪜'), - ('𐫀', '𐫇'), - ('𐫉', '𐫤'), - ('𐬀', '𐬵'), - ('𐭀', '𐭕'), - ('𐭠', '𐭲'), - ('𐮀', '𐮑'), - ('𐰀', '𐱈'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐴀', '𐴣'), - ('𐵊', '𐵥'), - ('𐵯', '𐶅'), - ('𐺀', '𐺩'), - ('𐺰', '𐺱'), - ('𐻂', '𐻄'), - ('𐼀', '𐼜'), - ('𐼧', '𐼧'), - ('𐼰', '𐽅'), - ('𐽰', '𐾁'), - ('𐾰', '𐿄'), - ('𐿠', '𐿶'), - ('𑀃', '𑀷'), - ('𑁱', '𑁲'), - ('𑁵', '𑁵'), - ('𑂃', '𑂯'), - ('𑃐', '𑃨'), - ('𑄃', '𑄦'), - ('𑅄', '𑅄'), - ('𑅇', '𑅇'), - ('𑅐', '𑅲'), - ('𑅶', '𑅶'), - ('𑆃', '𑆲'), - ('𑇁', '𑇄'), - ('𑇚', '𑇚'), - ('𑇜', '𑇜'), - ('𑈀', '𑈑'), - ('𑈓', '𑈫'), - ('𑈿', '𑉀'), - ('𑊀', '𑊆'), - ('𑊈', '𑊈'), - ('𑊊', '𑊍'), - ('𑊏', '𑊝'), - ('𑊟', '𑊨'), - ('𑊰', '𑋞'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('𑌽', '𑌽'), - ('𑍐', '𑍐'), - ('𑍝', '𑍡'), - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '𑎷'), - ('𑏑', '𑏑'), - ('𑏓', '𑏓'), - ('𑐀', '𑐴'), - ('𑑇', '𑑊'), - ('𑑟', '𑑡'), - ('𑒀', '𑒯'), - ('𑓄', '𑓅'), - ('𑓇', '𑓇'), - ('𑖀', '𑖮'), - ('𑗘', '𑗛'), - ('𑘀', '𑘯'), - ('𑙄', '𑙄'), - ('𑚀', '𑚪'), - ('𑚸', '𑚸'), - ('𑜀', '𑜚'), - ('𑝀', '𑝆'), - ('𑠀', '𑠫'), - ('𑢠', '𑣟'), - ('𑣿', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤯'), - ('𑤿', '𑤿'), - ('𑥁', '𑥁'), - ('𑦠', '𑦧'), - ('𑦪', '𑧐'), - ('𑧡', '𑧡'), - ('𑧣', '𑧣'), - ('𑨀', '𑨀'), - ('𑨋', '𑨲'), - ('𑨺', '𑨺'), - ('𑩐', '𑩐'), - ('𑩜', '𑪉'), - ('𑪝', '𑪝'), - ('𑪰', '𑫸'), - ('𑯀', '𑯠'), - ('𑰀', '𑰈'), - ('𑰊', '𑰮'), - ('𑱀', '𑱀'), - ('𑱲', '𑲏'), - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '𑴰'), - ('𑵆', '𑵆'), - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶉'), - ('𑶘', '𑶘'), - ('𑻠', '𑻲'), - ('𑼂', '𑼂'), - ('𑼄', '𑼐'), - ('𑼒', '𑼳'), - ('𑾰', '𑾰'), - ('𒀀', '𒎙'), - ('𒐀', '𒑮'), - ('𒒀', '𒕃'), - ('𒾐', '𒿰'), - ('𓀀', '𓐯'), - ('𓑁', '𓑆'), - ('𓑠', '𔏺'), - ('𔐀', '𔙆'), - ('𖄀', '𖄝'), - ('𖠀', '𖨸'), - ('𖩀', '𖩞'), - ('𖩰', '𖪾'), - ('𖫐', '𖫭'), - ('𖬀', '𖬯'), - ('𖭀', '𖭃'), - ('𖭣', '𖭷'), - ('𖭽', '𖮏'), - ('𖵀', '𖵬'), - ('𖹀', '𖹿'), - ('𖼀', '𖽊'), - ('𖽐', '𖽐'), - ('𖾓', '𖾟'), - ('𖿠', '𖿡'), - ('𖿣', '𖿣'), - ('𗀀', '𘟷'), - ('𘠀', '𘳕'), - ('𘳿', '𘴈'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𛀀', '𛄢'), - ('𛄲', '𛄲'), - ('𛅐', '𛅒'), - ('𛅕', '𛅕'), - ('𛅤', '𛅧'), - ('𛅰', '𛋻'), - ('𛰀', '𛱪'), - ('𛱰', '𛱼'), - ('𛲀', '𛲈'), - ('𛲐', '𛲙'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝛀'), - ('𝛂', '𝛚'), - ('𝛜', '𝛺'), - ('𝛼', '𝜔'), - ('𝜖', '𝜴'), - ('𝜶', '𝝎'), - ('𝝐', '𝝮'), - ('𝝰', '𝞈'), - ('𝞊', '𝞨'), - ('𝞪', '𝟂'), - ('𝟄', '𝟋'), - ('𝼀', '𝼞'), - ('𝼥', '𝼪'), - ('𞀰', '𞁭'), - ('𞄀', '𞄬'), - ('𞄷', '𞄽'), - ('𞅎', '𞅎'), - ('𞊐', '𞊭'), - ('𞋀', '𞋫'), - ('𞓐', '𞓫'), - ('𞗐', '𞗭'), - ('𞗰', '𞗰'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), - ('𞠀', '𞣄'), - ('𞤀', '𞥃'), - ('𞥋', '𞥋'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('丽', '𪘀'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), -]; diff --git a/vendor/regex-syntax/src/unicode_tables/property_names.rs b/vendor/regex-syntax/src/unicode_tables/property_names.rs deleted file mode 100644 index a27b4913..00000000 --- a/vendor/regex-syntax/src/unicode_tables/property_names.rs +++ /dev/null @@ -1,281 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate property-names ucd-16.0.0 -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ - ("age", "Age"), - ("ahex", "ASCII_Hex_Digit"), - ("alpha", "Alphabetic"), - ("alphabetic", "Alphabetic"), - ("asciihexdigit", "ASCII_Hex_Digit"), - ("bc", "Bidi_Class"), - ("bidic", "Bidi_Control"), - ("bidiclass", "Bidi_Class"), - ("bidicontrol", "Bidi_Control"), - ("bidim", "Bidi_Mirrored"), - ("bidimirrored", "Bidi_Mirrored"), - ("bidimirroringglyph", "Bidi_Mirroring_Glyph"), - ("bidipairedbracket", "Bidi_Paired_Bracket"), - ("bidipairedbrackettype", "Bidi_Paired_Bracket_Type"), - ("blk", "Block"), - ("block", "Block"), - ("bmg", "Bidi_Mirroring_Glyph"), - ("bpb", "Bidi_Paired_Bracket"), - ("bpt", "Bidi_Paired_Bracket_Type"), - ("canonicalcombiningclass", "Canonical_Combining_Class"), - ("cased", "Cased"), - ("casefolding", "Case_Folding"), - ("caseignorable", "Case_Ignorable"), - ("ccc", "Canonical_Combining_Class"), - ("ce", "Composition_Exclusion"), - ("cf", "Case_Folding"), - ("changeswhencasefolded", "Changes_When_Casefolded"), - ("changeswhencasemapped", "Changes_When_Casemapped"), - ("changeswhenlowercased", "Changes_When_Lowercased"), - ("changeswhennfkccasefolded", "Changes_When_NFKC_Casefolded"), - ("changeswhentitlecased", "Changes_When_Titlecased"), - ("changeswhenuppercased", "Changes_When_Uppercased"), - ("ci", "Case_Ignorable"), - ("cjkaccountingnumeric", "kAccountingNumeric"), - ("cjkcompatibilityvariant", "kCompatibilityVariant"), - ("cjkiicore", "kIICore"), - ("cjkirggsource", "kIRG_GSource"), - ("cjkirghsource", "kIRG_HSource"), - ("cjkirgjsource", "kIRG_JSource"), - ("cjkirgkpsource", "kIRG_KPSource"), - ("cjkirgksource", "kIRG_KSource"), - ("cjkirgmsource", "kIRG_MSource"), - ("cjkirgssource", "kIRG_SSource"), - ("cjkirgtsource", "kIRG_TSource"), - ("cjkirguksource", "kIRG_UKSource"), - ("cjkirgusource", "kIRG_USource"), - ("cjkirgvsource", "kIRG_VSource"), - ("cjkothernumeric", "kOtherNumeric"), - ("cjkprimarynumeric", "kPrimaryNumeric"), - ("cjkrsunicode", "kRSUnicode"), - ("compex", "Full_Composition_Exclusion"), - ("compositionexclusion", "Composition_Exclusion"), - ("cwcf", "Changes_When_Casefolded"), - ("cwcm", "Changes_When_Casemapped"), - ("cwkcf", "Changes_When_NFKC_Casefolded"), - ("cwl", "Changes_When_Lowercased"), - ("cwt", "Changes_When_Titlecased"), - ("cwu", "Changes_When_Uppercased"), - ("dash", "Dash"), - ("decompositionmapping", "Decomposition_Mapping"), - ("decompositiontype", "Decomposition_Type"), - ("defaultignorablecodepoint", "Default_Ignorable_Code_Point"), - ("dep", "Deprecated"), - ("deprecated", "Deprecated"), - ("di", "Default_Ignorable_Code_Point"), - ("dia", "Diacritic"), - ("diacritic", "Diacritic"), - ("dm", "Decomposition_Mapping"), - ("dt", "Decomposition_Type"), - ("ea", "East_Asian_Width"), - ("eastasianwidth", "East_Asian_Width"), - ("ebase", "Emoji_Modifier_Base"), - ("ecomp", "Emoji_Component"), - ("emod", "Emoji_Modifier"), - ("emoji", "Emoji"), - ("emojicomponent", "Emoji_Component"), - ("emojimodifier", "Emoji_Modifier"), - ("emojimodifierbase", "Emoji_Modifier_Base"), - ("emojipresentation", "Emoji_Presentation"), - ("epres", "Emoji_Presentation"), - ("equideo", "Equivalent_Unified_Ideograph"), - ("equivalentunifiedideograph", "Equivalent_Unified_Ideograph"), - ("expandsonnfc", "Expands_On_NFC"), - ("expandsonnfd", "Expands_On_NFD"), - ("expandsonnfkc", "Expands_On_NFKC"), - ("expandsonnfkd", "Expands_On_NFKD"), - ("ext", "Extender"), - ("extendedpictographic", "Extended_Pictographic"), - ("extender", "Extender"), - ("extpict", "Extended_Pictographic"), - ("fcnfkc", "FC_NFKC_Closure"), - ("fcnfkcclosure", "FC_NFKC_Closure"), - ("fullcompositionexclusion", "Full_Composition_Exclusion"), - ("gc", "General_Category"), - ("gcb", "Grapheme_Cluster_Break"), - ("generalcategory", "General_Category"), - ("graphemebase", "Grapheme_Base"), - ("graphemeclusterbreak", "Grapheme_Cluster_Break"), - ("graphemeextend", "Grapheme_Extend"), - ("graphemelink", "Grapheme_Link"), - ("grbase", "Grapheme_Base"), - ("grext", "Grapheme_Extend"), - ("grlink", "Grapheme_Link"), - ("hangulsyllabletype", "Hangul_Syllable_Type"), - ("hex", "Hex_Digit"), - ("hexdigit", "Hex_Digit"), - ("hst", "Hangul_Syllable_Type"), - ("hyphen", "Hyphen"), - ("idc", "ID_Continue"), - ("idcompatmathcontinue", "ID_Compat_Math_Continue"), - ("idcompatmathstart", "ID_Compat_Math_Start"), - ("idcontinue", "ID_Continue"), - ("ideo", "Ideographic"), - ("ideographic", "Ideographic"), - ("ids", "ID_Start"), - ("idsb", "IDS_Binary_Operator"), - ("idsbinaryoperator", "IDS_Binary_Operator"), - ("idst", "IDS_Trinary_Operator"), - ("idstart", "ID_Start"), - ("idstrinaryoperator", "IDS_Trinary_Operator"), - ("idsu", "IDS_Unary_Operator"), - ("idsunaryoperator", "IDS_Unary_Operator"), - ("incb", "Indic_Conjunct_Break"), - ("indicconjunctbreak", "Indic_Conjunct_Break"), - ("indicpositionalcategory", "Indic_Positional_Category"), - ("indicsyllabiccategory", "Indic_Syllabic_Category"), - ("inpc", "Indic_Positional_Category"), - ("insc", "Indic_Syllabic_Category"), - ("isc", "ISO_Comment"), - ("jamoshortname", "Jamo_Short_Name"), - ("jg", "Joining_Group"), - ("joinc", "Join_Control"), - ("joincontrol", "Join_Control"), - ("joininggroup", "Joining_Group"), - ("joiningtype", "Joining_Type"), - ("jsn", "Jamo_Short_Name"), - ("jt", "Joining_Type"), - ("kaccountingnumeric", "kAccountingNumeric"), - ("kcompatibilityvariant", "kCompatibilityVariant"), - ("kehcat", "kEH_Cat"), - ("kehdesc", "kEH_Desc"), - ("kehhg", "kEH_HG"), - ("kehifao", "kEH_IFAO"), - ("kehjsesh", "kEH_JSesh"), - ("kehnomirror", "kEH_NoMirror"), - ("kehnorotate", "kEH_NoRotate"), - ("kiicore", "kIICore"), - ("kirggsource", "kIRG_GSource"), - ("kirghsource", "kIRG_HSource"), - ("kirgjsource", "kIRG_JSource"), - ("kirgkpsource", "kIRG_KPSource"), - ("kirgksource", "kIRG_KSource"), - ("kirgmsource", "kIRG_MSource"), - ("kirgssource", "kIRG_SSource"), - ("kirgtsource", "kIRG_TSource"), - ("kirguksource", "kIRG_UKSource"), - ("kirgusource", "kIRG_USource"), - ("kirgvsource", "kIRG_VSource"), - ("kothernumeric", "kOtherNumeric"), - ("kprimarynumeric", "kPrimaryNumeric"), - ("krsunicode", "kRSUnicode"), - ("lb", "Line_Break"), - ("lc", "Lowercase_Mapping"), - ("linebreak", "Line_Break"), - ("loe", "Logical_Order_Exception"), - ("logicalorderexception", "Logical_Order_Exception"), - ("lower", "Lowercase"), - ("lowercase", "Lowercase"), - ("lowercasemapping", "Lowercase_Mapping"), - ("math", "Math"), - ("mcm", "Modifier_Combining_Mark"), - ("modifiercombiningmark", "Modifier_Combining_Mark"), - ("na", "Name"), - ("na1", "Unicode_1_Name"), - ("name", "Name"), - ("namealias", "Name_Alias"), - ("nchar", "Noncharacter_Code_Point"), - ("nfcqc", "NFC_Quick_Check"), - ("nfcquickcheck", "NFC_Quick_Check"), - ("nfdqc", "NFD_Quick_Check"), - ("nfdquickcheck", "NFD_Quick_Check"), - ("nfkccasefold", "NFKC_Casefold"), - ("nfkccf", "NFKC_Casefold"), - ("nfkcqc", "NFKC_Quick_Check"), - ("nfkcquickcheck", "NFKC_Quick_Check"), - ("nfkcscf", "NFKC_Simple_Casefold"), - ("nfkcsimplecasefold", "NFKC_Simple_Casefold"), - ("nfkdqc", "NFKD_Quick_Check"), - ("nfkdquickcheck", "NFKD_Quick_Check"), - ("noncharactercodepoint", "Noncharacter_Code_Point"), - ("nt", "Numeric_Type"), - ("numerictype", "Numeric_Type"), - ("numericvalue", "Numeric_Value"), - ("nv", "Numeric_Value"), - ("oalpha", "Other_Alphabetic"), - ("ocomment", "ISO_Comment"), - ("odi", "Other_Default_Ignorable_Code_Point"), - ("ogrext", "Other_Grapheme_Extend"), - ("oidc", "Other_ID_Continue"), - ("oids", "Other_ID_Start"), - ("olower", "Other_Lowercase"), - ("omath", "Other_Math"), - ("otheralphabetic", "Other_Alphabetic"), - ("otherdefaultignorablecodepoint", "Other_Default_Ignorable_Code_Point"), - ("othergraphemeextend", "Other_Grapheme_Extend"), - ("otheridcontinue", "Other_ID_Continue"), - ("otheridstart", "Other_ID_Start"), - ("otherlowercase", "Other_Lowercase"), - ("othermath", "Other_Math"), - ("otheruppercase", "Other_Uppercase"), - ("oupper", "Other_Uppercase"), - ("patsyn", "Pattern_Syntax"), - ("patternsyntax", "Pattern_Syntax"), - ("patternwhitespace", "Pattern_White_Space"), - ("patws", "Pattern_White_Space"), - ("pcm", "Prepended_Concatenation_Mark"), - ("prependedconcatenationmark", "Prepended_Concatenation_Mark"), - ("qmark", "Quotation_Mark"), - ("quotationmark", "Quotation_Mark"), - ("radical", "Radical"), - ("regionalindicator", "Regional_Indicator"), - ("ri", "Regional_Indicator"), - ("sb", "Sentence_Break"), - ("sc", "Script"), - ("scf", "Simple_Case_Folding"), - ("script", "Script"), - ("scriptextensions", "Script_Extensions"), - ("scx", "Script_Extensions"), - ("sd", "Soft_Dotted"), - ("sentencebreak", "Sentence_Break"), - ("sentenceterminal", "Sentence_Terminal"), - ("sfc", "Simple_Case_Folding"), - ("simplecasefolding", "Simple_Case_Folding"), - ("simplelowercasemapping", "Simple_Lowercase_Mapping"), - ("simpletitlecasemapping", "Simple_Titlecase_Mapping"), - ("simpleuppercasemapping", "Simple_Uppercase_Mapping"), - ("slc", "Simple_Lowercase_Mapping"), - ("softdotted", "Soft_Dotted"), - ("space", "White_Space"), - ("stc", "Simple_Titlecase_Mapping"), - ("sterm", "Sentence_Terminal"), - ("suc", "Simple_Uppercase_Mapping"), - ("tc", "Titlecase_Mapping"), - ("term", "Terminal_Punctuation"), - ("terminalpunctuation", "Terminal_Punctuation"), - ("titlecasemapping", "Titlecase_Mapping"), - ("uc", "Uppercase_Mapping"), - ("uideo", "Unified_Ideograph"), - ("unicode1name", "Unicode_1_Name"), - ("unicoderadicalstroke", "kRSUnicode"), - ("unifiedideograph", "Unified_Ideograph"), - ("upper", "Uppercase"), - ("uppercase", "Uppercase"), - ("uppercasemapping", "Uppercase_Mapping"), - ("urs", "kRSUnicode"), - ("variationselector", "Variation_Selector"), - ("verticalorientation", "Vertical_Orientation"), - ("vo", "Vertical_Orientation"), - ("vs", "Variation_Selector"), - ("wb", "Word_Break"), - ("whitespace", "White_Space"), - ("wordbreak", "Word_Break"), - ("wspace", "White_Space"), - ("xidc", "XID_Continue"), - ("xidcontinue", "XID_Continue"), - ("xids", "XID_Start"), - ("xidstart", "XID_Start"), - ("xonfc", "Expands_On_NFC"), - ("xonfd", "Expands_On_NFD"), - ("xonfkc", "Expands_On_NFKC"), - ("xonfkd", "Expands_On_NFKD"), -]; diff --git a/vendor/regex-syntax/src/unicode_tables/property_values.rs b/vendor/regex-syntax/src/unicode_tables/property_values.rs deleted file mode 100644 index 2270d663..00000000 --- a/vendor/regex-syntax/src/unicode_tables/property_values.rs +++ /dev/null @@ -1,956 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate property-values ucd-16.0.0 --include gc,script,scx,age,gcb,wb,sb -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const PROPERTY_VALUES: &'static [( - &'static str, - &'static [(&'static str, &'static str)], -)] = &[ - ( - "Age", - &[ - ("1.1", "V1_1"), - ("10.0", "V10_0"), - ("11.0", "V11_0"), - ("12.0", "V12_0"), - ("12.1", "V12_1"), - ("13.0", "V13_0"), - ("14.0", "V14_0"), - ("15.0", "V15_0"), - ("15.1", "V15_1"), - ("16.0", "V16_0"), - ("2.0", "V2_0"), - ("2.1", "V2_1"), - ("3.0", "V3_0"), - ("3.1", "V3_1"), - ("3.2", "V3_2"), - ("4.0", "V4_0"), - ("4.1", "V4_1"), - ("5.0", "V5_0"), - ("5.1", "V5_1"), - ("5.2", "V5_2"), - ("6.0", "V6_0"), - ("6.1", "V6_1"), - ("6.2", "V6_2"), - ("6.3", "V6_3"), - ("7.0", "V7_0"), - ("8.0", "V8_0"), - ("9.0", "V9_0"), - ("na", "Unassigned"), - ("unassigned", "Unassigned"), - ("v100", "V10_0"), - ("v11", "V1_1"), - ("v110", "V11_0"), - ("v120", "V12_0"), - ("v121", "V12_1"), - ("v130", "V13_0"), - ("v140", "V14_0"), - ("v150", "V15_0"), - ("v151", "V15_1"), - ("v160", "V16_0"), - ("v20", "V2_0"), - ("v21", "V2_1"), - ("v30", "V3_0"), - ("v31", "V3_1"), - ("v32", "V3_2"), - ("v40", "V4_0"), - ("v41", "V4_1"), - ("v50", "V5_0"), - ("v51", "V5_1"), - ("v52", "V5_2"), - ("v60", "V6_0"), - ("v61", "V6_1"), - ("v62", "V6_2"), - ("v63", "V6_3"), - ("v70", "V7_0"), - ("v80", "V8_0"), - ("v90", "V9_0"), - ], - ), - ( - "General_Category", - &[ - ("c", "Other"), - ("casedletter", "Cased_Letter"), - ("cc", "Control"), - ("cf", "Format"), - ("closepunctuation", "Close_Punctuation"), - ("cn", "Unassigned"), - ("cntrl", "Control"), - ("co", "Private_Use"), - ("combiningmark", "Mark"), - ("connectorpunctuation", "Connector_Punctuation"), - ("control", "Control"), - ("cs", "Surrogate"), - ("currencysymbol", "Currency_Symbol"), - ("dashpunctuation", "Dash_Punctuation"), - ("decimalnumber", "Decimal_Number"), - ("digit", "Decimal_Number"), - ("enclosingmark", "Enclosing_Mark"), - ("finalpunctuation", "Final_Punctuation"), - ("format", "Format"), - ("initialpunctuation", "Initial_Punctuation"), - ("l", "Letter"), - ("lc", "Cased_Letter"), - ("letter", "Letter"), - ("letternumber", "Letter_Number"), - ("lineseparator", "Line_Separator"), - ("ll", "Lowercase_Letter"), - ("lm", "Modifier_Letter"), - ("lo", "Other_Letter"), - ("lowercaseletter", "Lowercase_Letter"), - ("lt", "Titlecase_Letter"), - ("lu", "Uppercase_Letter"), - ("m", "Mark"), - ("mark", "Mark"), - ("mathsymbol", "Math_Symbol"), - ("mc", "Spacing_Mark"), - ("me", "Enclosing_Mark"), - ("mn", "Nonspacing_Mark"), - ("modifierletter", "Modifier_Letter"), - ("modifiersymbol", "Modifier_Symbol"), - ("n", "Number"), - ("nd", "Decimal_Number"), - ("nl", "Letter_Number"), - ("no", "Other_Number"), - ("nonspacingmark", "Nonspacing_Mark"), - ("number", "Number"), - ("openpunctuation", "Open_Punctuation"), - ("other", "Other"), - ("otherletter", "Other_Letter"), - ("othernumber", "Other_Number"), - ("otherpunctuation", "Other_Punctuation"), - ("othersymbol", "Other_Symbol"), - ("p", "Punctuation"), - ("paragraphseparator", "Paragraph_Separator"), - ("pc", "Connector_Punctuation"), - ("pd", "Dash_Punctuation"), - ("pe", "Close_Punctuation"), - ("pf", "Final_Punctuation"), - ("pi", "Initial_Punctuation"), - ("po", "Other_Punctuation"), - ("privateuse", "Private_Use"), - ("ps", "Open_Punctuation"), - ("punct", "Punctuation"), - ("punctuation", "Punctuation"), - ("s", "Symbol"), - ("sc", "Currency_Symbol"), - ("separator", "Separator"), - ("sk", "Modifier_Symbol"), - ("sm", "Math_Symbol"), - ("so", "Other_Symbol"), - ("spaceseparator", "Space_Separator"), - ("spacingmark", "Spacing_Mark"), - ("surrogate", "Surrogate"), - ("symbol", "Symbol"), - ("titlecaseletter", "Titlecase_Letter"), - ("unassigned", "Unassigned"), - ("uppercaseletter", "Uppercase_Letter"), - ("z", "Separator"), - ("zl", "Line_Separator"), - ("zp", "Paragraph_Separator"), - ("zs", "Space_Separator"), - ], - ), - ( - "Grapheme_Cluster_Break", - &[ - ("cn", "Control"), - ("control", "Control"), - ("cr", "CR"), - ("eb", "E_Base"), - ("ebase", "E_Base"), - ("ebasegaz", "E_Base_GAZ"), - ("ebg", "E_Base_GAZ"), - ("em", "E_Modifier"), - ("emodifier", "E_Modifier"), - ("ex", "Extend"), - ("extend", "Extend"), - ("gaz", "Glue_After_Zwj"), - ("glueafterzwj", "Glue_After_Zwj"), - ("l", "L"), - ("lf", "LF"), - ("lv", "LV"), - ("lvt", "LVT"), - ("other", "Other"), - ("pp", "Prepend"), - ("prepend", "Prepend"), - ("regionalindicator", "Regional_Indicator"), - ("ri", "Regional_Indicator"), - ("sm", "SpacingMark"), - ("spacingmark", "SpacingMark"), - ("t", "T"), - ("v", "V"), - ("xx", "Other"), - ("zwj", "ZWJ"), - ], - ), - ( - "Script", - &[ - ("adlam", "Adlam"), - ("adlm", "Adlam"), - ("aghb", "Caucasian_Albanian"), - ("ahom", "Ahom"), - ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"), - ("arab", "Arabic"), - ("arabic", "Arabic"), - ("armenian", "Armenian"), - ("armi", "Imperial_Aramaic"), - ("armn", "Armenian"), - ("avestan", "Avestan"), - ("avst", "Avestan"), - ("bali", "Balinese"), - ("balinese", "Balinese"), - ("bamu", "Bamum"), - ("bamum", "Bamum"), - ("bass", "Bassa_Vah"), - ("bassavah", "Bassa_Vah"), - ("batak", "Batak"), - ("batk", "Batak"), - ("beng", "Bengali"), - ("bengali", "Bengali"), - ("bhaiksuki", "Bhaiksuki"), - ("bhks", "Bhaiksuki"), - ("bopo", "Bopomofo"), - ("bopomofo", "Bopomofo"), - ("brah", "Brahmi"), - ("brahmi", "Brahmi"), - ("brai", "Braille"), - ("braille", "Braille"), - ("bugi", "Buginese"), - ("buginese", "Buginese"), - ("buhd", "Buhid"), - ("buhid", "Buhid"), - ("cakm", "Chakma"), - ("canadianaboriginal", "Canadian_Aboriginal"), - ("cans", "Canadian_Aboriginal"), - ("cari", "Carian"), - ("carian", "Carian"), - ("caucasianalbanian", "Caucasian_Albanian"), - ("chakma", "Chakma"), - ("cham", "Cham"), - ("cher", "Cherokee"), - ("cherokee", "Cherokee"), - ("chorasmian", "Chorasmian"), - ("chrs", "Chorasmian"), - ("common", "Common"), - ("copt", "Coptic"), - ("coptic", "Coptic"), - ("cpmn", "Cypro_Minoan"), - ("cprt", "Cypriot"), - ("cuneiform", "Cuneiform"), - ("cypriot", "Cypriot"), - ("cyprominoan", "Cypro_Minoan"), - ("cyrillic", "Cyrillic"), - ("cyrl", "Cyrillic"), - ("deseret", "Deseret"), - ("deva", "Devanagari"), - ("devanagari", "Devanagari"), - ("diak", "Dives_Akuru"), - ("divesakuru", "Dives_Akuru"), - ("dogr", "Dogra"), - ("dogra", "Dogra"), - ("dsrt", "Deseret"), - ("dupl", "Duployan"), - ("duployan", "Duployan"), - ("egyp", "Egyptian_Hieroglyphs"), - ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"), - ("elba", "Elbasan"), - ("elbasan", "Elbasan"), - ("elym", "Elymaic"), - ("elymaic", "Elymaic"), - ("ethi", "Ethiopic"), - ("ethiopic", "Ethiopic"), - ("gara", "Garay"), - ("garay", "Garay"), - ("geor", "Georgian"), - ("georgian", "Georgian"), - ("glag", "Glagolitic"), - ("glagolitic", "Glagolitic"), - ("gong", "Gunjala_Gondi"), - ("gonm", "Masaram_Gondi"), - ("goth", "Gothic"), - ("gothic", "Gothic"), - ("gran", "Grantha"), - ("grantha", "Grantha"), - ("greek", "Greek"), - ("grek", "Greek"), - ("gujarati", "Gujarati"), - ("gujr", "Gujarati"), - ("gukh", "Gurung_Khema"), - ("gunjalagondi", "Gunjala_Gondi"), - ("gurmukhi", "Gurmukhi"), - ("guru", "Gurmukhi"), - ("gurungkhema", "Gurung_Khema"), - ("han", "Han"), - ("hang", "Hangul"), - ("hangul", "Hangul"), - ("hani", "Han"), - ("hanifirohingya", "Hanifi_Rohingya"), - ("hano", "Hanunoo"), - ("hanunoo", "Hanunoo"), - ("hatr", "Hatran"), - ("hatran", "Hatran"), - ("hebr", "Hebrew"), - ("hebrew", "Hebrew"), - ("hira", "Hiragana"), - ("hiragana", "Hiragana"), - ("hluw", "Anatolian_Hieroglyphs"), - ("hmng", "Pahawh_Hmong"), - ("hmnp", "Nyiakeng_Puachue_Hmong"), - ("hrkt", "Katakana_Or_Hiragana"), - ("hung", "Old_Hungarian"), - ("imperialaramaic", "Imperial_Aramaic"), - ("inherited", "Inherited"), - ("inscriptionalpahlavi", "Inscriptional_Pahlavi"), - ("inscriptionalparthian", "Inscriptional_Parthian"), - ("ital", "Old_Italic"), - ("java", "Javanese"), - ("javanese", "Javanese"), - ("kaithi", "Kaithi"), - ("kali", "Kayah_Li"), - ("kana", "Katakana"), - ("kannada", "Kannada"), - ("katakana", "Katakana"), - ("katakanaorhiragana", "Katakana_Or_Hiragana"), - ("kawi", "Kawi"), - ("kayahli", "Kayah_Li"), - ("khar", "Kharoshthi"), - ("kharoshthi", "Kharoshthi"), - ("khitansmallscript", "Khitan_Small_Script"), - ("khmer", "Khmer"), - ("khmr", "Khmer"), - ("khoj", "Khojki"), - ("khojki", "Khojki"), - ("khudawadi", "Khudawadi"), - ("kiratrai", "Kirat_Rai"), - ("kits", "Khitan_Small_Script"), - ("knda", "Kannada"), - ("krai", "Kirat_Rai"), - ("kthi", "Kaithi"), - ("lana", "Tai_Tham"), - ("lao", "Lao"), - ("laoo", "Lao"), - ("latin", "Latin"), - ("latn", "Latin"), - ("lepc", "Lepcha"), - ("lepcha", "Lepcha"), - ("limb", "Limbu"), - ("limbu", "Limbu"), - ("lina", "Linear_A"), - ("linb", "Linear_B"), - ("lineara", "Linear_A"), - ("linearb", "Linear_B"), - ("lisu", "Lisu"), - ("lyci", "Lycian"), - ("lycian", "Lycian"), - ("lydi", "Lydian"), - ("lydian", "Lydian"), - ("mahajani", "Mahajani"), - ("mahj", "Mahajani"), - ("maka", "Makasar"), - ("makasar", "Makasar"), - ("malayalam", "Malayalam"), - ("mand", "Mandaic"), - ("mandaic", "Mandaic"), - ("mani", "Manichaean"), - ("manichaean", "Manichaean"), - ("marc", "Marchen"), - ("marchen", "Marchen"), - ("masaramgondi", "Masaram_Gondi"), - ("medefaidrin", "Medefaidrin"), - ("medf", "Medefaidrin"), - ("meeteimayek", "Meetei_Mayek"), - ("mend", "Mende_Kikakui"), - ("mendekikakui", "Mende_Kikakui"), - ("merc", "Meroitic_Cursive"), - ("mero", "Meroitic_Hieroglyphs"), - ("meroiticcursive", "Meroitic_Cursive"), - ("meroitichieroglyphs", "Meroitic_Hieroglyphs"), - ("miao", "Miao"), - ("mlym", "Malayalam"), - ("modi", "Modi"), - ("mong", "Mongolian"), - ("mongolian", "Mongolian"), - ("mro", "Mro"), - ("mroo", "Mro"), - ("mtei", "Meetei_Mayek"), - ("mult", "Multani"), - ("multani", "Multani"), - ("myanmar", "Myanmar"), - ("mymr", "Myanmar"), - ("nabataean", "Nabataean"), - ("nagm", "Nag_Mundari"), - ("nagmundari", "Nag_Mundari"), - ("nand", "Nandinagari"), - ("nandinagari", "Nandinagari"), - ("narb", "Old_North_Arabian"), - ("nbat", "Nabataean"), - ("newa", "Newa"), - ("newtailue", "New_Tai_Lue"), - ("nko", "Nko"), - ("nkoo", "Nko"), - ("nshu", "Nushu"), - ("nushu", "Nushu"), - ("nyiakengpuachuehmong", "Nyiakeng_Puachue_Hmong"), - ("ogam", "Ogham"), - ("ogham", "Ogham"), - ("olchiki", "Ol_Chiki"), - ("olck", "Ol_Chiki"), - ("oldhungarian", "Old_Hungarian"), - ("olditalic", "Old_Italic"), - ("oldnortharabian", "Old_North_Arabian"), - ("oldpermic", "Old_Permic"), - ("oldpersian", "Old_Persian"), - ("oldsogdian", "Old_Sogdian"), - ("oldsoutharabian", "Old_South_Arabian"), - ("oldturkic", "Old_Turkic"), - ("olduyghur", "Old_Uyghur"), - ("olonal", "Ol_Onal"), - ("onao", "Ol_Onal"), - ("oriya", "Oriya"), - ("orkh", "Old_Turkic"), - ("orya", "Oriya"), - ("osage", "Osage"), - ("osge", "Osage"), - ("osma", "Osmanya"), - ("osmanya", "Osmanya"), - ("ougr", "Old_Uyghur"), - ("pahawhhmong", "Pahawh_Hmong"), - ("palm", "Palmyrene"), - ("palmyrene", "Palmyrene"), - ("pauc", "Pau_Cin_Hau"), - ("paucinhau", "Pau_Cin_Hau"), - ("perm", "Old_Permic"), - ("phag", "Phags_Pa"), - ("phagspa", "Phags_Pa"), - ("phli", "Inscriptional_Pahlavi"), - ("phlp", "Psalter_Pahlavi"), - ("phnx", "Phoenician"), - ("phoenician", "Phoenician"), - ("plrd", "Miao"), - ("prti", "Inscriptional_Parthian"), - ("psalterpahlavi", "Psalter_Pahlavi"), - ("qaac", "Coptic"), - ("qaai", "Inherited"), - ("rejang", "Rejang"), - ("rjng", "Rejang"), - ("rohg", "Hanifi_Rohingya"), - ("runic", "Runic"), - ("runr", "Runic"), - ("samaritan", "Samaritan"), - ("samr", "Samaritan"), - ("sarb", "Old_South_Arabian"), - ("saur", "Saurashtra"), - ("saurashtra", "Saurashtra"), - ("sgnw", "SignWriting"), - ("sharada", "Sharada"), - ("shavian", "Shavian"), - ("shaw", "Shavian"), - ("shrd", "Sharada"), - ("sidd", "Siddham"), - ("siddham", "Siddham"), - ("signwriting", "SignWriting"), - ("sind", "Khudawadi"), - ("sinh", "Sinhala"), - ("sinhala", "Sinhala"), - ("sogd", "Sogdian"), - ("sogdian", "Sogdian"), - ("sogo", "Old_Sogdian"), - ("sora", "Sora_Sompeng"), - ("sorasompeng", "Sora_Sompeng"), - ("soyo", "Soyombo"), - ("soyombo", "Soyombo"), - ("sund", "Sundanese"), - ("sundanese", "Sundanese"), - ("sunu", "Sunuwar"), - ("sunuwar", "Sunuwar"), - ("sylo", "Syloti_Nagri"), - ("sylotinagri", "Syloti_Nagri"), - ("syrc", "Syriac"), - ("syriac", "Syriac"), - ("tagalog", "Tagalog"), - ("tagb", "Tagbanwa"), - ("tagbanwa", "Tagbanwa"), - ("taile", "Tai_Le"), - ("taitham", "Tai_Tham"), - ("taiviet", "Tai_Viet"), - ("takr", "Takri"), - ("takri", "Takri"), - ("tale", "Tai_Le"), - ("talu", "New_Tai_Lue"), - ("tamil", "Tamil"), - ("taml", "Tamil"), - ("tang", "Tangut"), - ("tangsa", "Tangsa"), - ("tangut", "Tangut"), - ("tavt", "Tai_Viet"), - ("telu", "Telugu"), - ("telugu", "Telugu"), - ("tfng", "Tifinagh"), - ("tglg", "Tagalog"), - ("thaa", "Thaana"), - ("thaana", "Thaana"), - ("thai", "Thai"), - ("tibetan", "Tibetan"), - ("tibt", "Tibetan"), - ("tifinagh", "Tifinagh"), - ("tirh", "Tirhuta"), - ("tirhuta", "Tirhuta"), - ("tnsa", "Tangsa"), - ("todhri", "Todhri"), - ("todr", "Todhri"), - ("toto", "Toto"), - ("tulutigalari", "Tulu_Tigalari"), - ("tutg", "Tulu_Tigalari"), - ("ugar", "Ugaritic"), - ("ugaritic", "Ugaritic"), - ("unknown", "Unknown"), - ("vai", "Vai"), - ("vaii", "Vai"), - ("vith", "Vithkuqi"), - ("vithkuqi", "Vithkuqi"), - ("wancho", "Wancho"), - ("wara", "Warang_Citi"), - ("warangciti", "Warang_Citi"), - ("wcho", "Wancho"), - ("xpeo", "Old_Persian"), - ("xsux", "Cuneiform"), - ("yezi", "Yezidi"), - ("yezidi", "Yezidi"), - ("yi", "Yi"), - ("yiii", "Yi"), - ("zanabazarsquare", "Zanabazar_Square"), - ("zanb", "Zanabazar_Square"), - ("zinh", "Inherited"), - ("zyyy", "Common"), - ("zzzz", "Unknown"), - ], - ), - ( - "Script_Extensions", - &[ - ("adlam", "Adlam"), - ("adlm", "Adlam"), - ("aghb", "Caucasian_Albanian"), - ("ahom", "Ahom"), - ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"), - ("arab", "Arabic"), - ("arabic", "Arabic"), - ("armenian", "Armenian"), - ("armi", "Imperial_Aramaic"), - ("armn", "Armenian"), - ("avestan", "Avestan"), - ("avst", "Avestan"), - ("bali", "Balinese"), - ("balinese", "Balinese"), - ("bamu", "Bamum"), - ("bamum", "Bamum"), - ("bass", "Bassa_Vah"), - ("bassavah", "Bassa_Vah"), - ("batak", "Batak"), - ("batk", "Batak"), - ("beng", "Bengali"), - ("bengali", "Bengali"), - ("bhaiksuki", "Bhaiksuki"), - ("bhks", "Bhaiksuki"), - ("bopo", "Bopomofo"), - ("bopomofo", "Bopomofo"), - ("brah", "Brahmi"), - ("brahmi", "Brahmi"), - ("brai", "Braille"), - ("braille", "Braille"), - ("bugi", "Buginese"), - ("buginese", "Buginese"), - ("buhd", "Buhid"), - ("buhid", "Buhid"), - ("cakm", "Chakma"), - ("canadianaboriginal", "Canadian_Aboriginal"), - ("cans", "Canadian_Aboriginal"), - ("cari", "Carian"), - ("carian", "Carian"), - ("caucasianalbanian", "Caucasian_Albanian"), - ("chakma", "Chakma"), - ("cham", "Cham"), - ("cher", "Cherokee"), - ("cherokee", "Cherokee"), - ("chorasmian", "Chorasmian"), - ("chrs", "Chorasmian"), - ("common", "Common"), - ("copt", "Coptic"), - ("coptic", "Coptic"), - ("cpmn", "Cypro_Minoan"), - ("cprt", "Cypriot"), - ("cuneiform", "Cuneiform"), - ("cypriot", "Cypriot"), - ("cyprominoan", "Cypro_Minoan"), - ("cyrillic", "Cyrillic"), - ("cyrl", "Cyrillic"), - ("deseret", "Deseret"), - ("deva", "Devanagari"), - ("devanagari", "Devanagari"), - ("diak", "Dives_Akuru"), - ("divesakuru", "Dives_Akuru"), - ("dogr", "Dogra"), - ("dogra", "Dogra"), - ("dsrt", "Deseret"), - ("dupl", "Duployan"), - ("duployan", "Duployan"), - ("egyp", "Egyptian_Hieroglyphs"), - ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"), - ("elba", "Elbasan"), - ("elbasan", "Elbasan"), - ("elym", "Elymaic"), - ("elymaic", "Elymaic"), - ("ethi", "Ethiopic"), - ("ethiopic", "Ethiopic"), - ("gara", "Garay"), - ("garay", "Garay"), - ("geor", "Georgian"), - ("georgian", "Georgian"), - ("glag", "Glagolitic"), - ("glagolitic", "Glagolitic"), - ("gong", "Gunjala_Gondi"), - ("gonm", "Masaram_Gondi"), - ("goth", "Gothic"), - ("gothic", "Gothic"), - ("gran", "Grantha"), - ("grantha", "Grantha"), - ("greek", "Greek"), - ("grek", "Greek"), - ("gujarati", "Gujarati"), - ("gujr", "Gujarati"), - ("gukh", "Gurung_Khema"), - ("gunjalagondi", "Gunjala_Gondi"), - ("gurmukhi", "Gurmukhi"), - ("guru", "Gurmukhi"), - ("gurungkhema", "Gurung_Khema"), - ("han", "Han"), - ("hang", "Hangul"), - ("hangul", "Hangul"), - ("hani", "Han"), - ("hanifirohingya", "Hanifi_Rohingya"), - ("hano", "Hanunoo"), - ("hanunoo", "Hanunoo"), - ("hatr", "Hatran"), - ("hatran", "Hatran"), - ("hebr", "Hebrew"), - ("hebrew", "Hebrew"), - ("hira", "Hiragana"), - ("hiragana", "Hiragana"), - ("hluw", "Anatolian_Hieroglyphs"), - ("hmng", "Pahawh_Hmong"), - ("hmnp", "Nyiakeng_Puachue_Hmong"), - ("hrkt", "Katakana_Or_Hiragana"), - ("hung", "Old_Hungarian"), - ("imperialaramaic", "Imperial_Aramaic"), - ("inherited", "Inherited"), - ("inscriptionalpahlavi", "Inscriptional_Pahlavi"), - ("inscriptionalparthian", "Inscriptional_Parthian"), - ("ital", "Old_Italic"), - ("java", "Javanese"), - ("javanese", "Javanese"), - ("kaithi", "Kaithi"), - ("kali", "Kayah_Li"), - ("kana", "Katakana"), - ("kannada", "Kannada"), - ("katakana", "Katakana"), - ("katakanaorhiragana", "Katakana_Or_Hiragana"), - ("kawi", "Kawi"), - ("kayahli", "Kayah_Li"), - ("khar", "Kharoshthi"), - ("kharoshthi", "Kharoshthi"), - ("khitansmallscript", "Khitan_Small_Script"), - ("khmer", "Khmer"), - ("khmr", "Khmer"), - ("khoj", "Khojki"), - ("khojki", "Khojki"), - ("khudawadi", "Khudawadi"), - ("kiratrai", "Kirat_Rai"), - ("kits", "Khitan_Small_Script"), - ("knda", "Kannada"), - ("krai", "Kirat_Rai"), - ("kthi", "Kaithi"), - ("lana", "Tai_Tham"), - ("lao", "Lao"), - ("laoo", "Lao"), - ("latin", "Latin"), - ("latn", "Latin"), - ("lepc", "Lepcha"), - ("lepcha", "Lepcha"), - ("limb", "Limbu"), - ("limbu", "Limbu"), - ("lina", "Linear_A"), - ("linb", "Linear_B"), - ("lineara", "Linear_A"), - ("linearb", "Linear_B"), - ("lisu", "Lisu"), - ("lyci", "Lycian"), - ("lycian", "Lycian"), - ("lydi", "Lydian"), - ("lydian", "Lydian"), - ("mahajani", "Mahajani"), - ("mahj", "Mahajani"), - ("maka", "Makasar"), - ("makasar", "Makasar"), - ("malayalam", "Malayalam"), - ("mand", "Mandaic"), - ("mandaic", "Mandaic"), - ("mani", "Manichaean"), - ("manichaean", "Manichaean"), - ("marc", "Marchen"), - ("marchen", "Marchen"), - ("masaramgondi", "Masaram_Gondi"), - ("medefaidrin", "Medefaidrin"), - ("medf", "Medefaidrin"), - ("meeteimayek", "Meetei_Mayek"), - ("mend", "Mende_Kikakui"), - ("mendekikakui", "Mende_Kikakui"), - ("merc", "Meroitic_Cursive"), - ("mero", "Meroitic_Hieroglyphs"), - ("meroiticcursive", "Meroitic_Cursive"), - ("meroitichieroglyphs", "Meroitic_Hieroglyphs"), - ("miao", "Miao"), - ("mlym", "Malayalam"), - ("modi", "Modi"), - ("mong", "Mongolian"), - ("mongolian", "Mongolian"), - ("mro", "Mro"), - ("mroo", "Mro"), - ("mtei", "Meetei_Mayek"), - ("mult", "Multani"), - ("multani", "Multani"), - ("myanmar", "Myanmar"), - ("mymr", "Myanmar"), - ("nabataean", "Nabataean"), - ("nagm", "Nag_Mundari"), - ("nagmundari", "Nag_Mundari"), - ("nand", "Nandinagari"), - ("nandinagari", "Nandinagari"), - ("narb", "Old_North_Arabian"), - ("nbat", "Nabataean"), - ("newa", "Newa"), - ("newtailue", "New_Tai_Lue"), - ("nko", "Nko"), - ("nkoo", "Nko"), - ("nshu", "Nushu"), - ("nushu", "Nushu"), - ("nyiakengpuachuehmong", "Nyiakeng_Puachue_Hmong"), - ("ogam", "Ogham"), - ("ogham", "Ogham"), - ("olchiki", "Ol_Chiki"), - ("olck", "Ol_Chiki"), - ("oldhungarian", "Old_Hungarian"), - ("olditalic", "Old_Italic"), - ("oldnortharabian", "Old_North_Arabian"), - ("oldpermic", "Old_Permic"), - ("oldpersian", "Old_Persian"), - ("oldsogdian", "Old_Sogdian"), - ("oldsoutharabian", "Old_South_Arabian"), - ("oldturkic", "Old_Turkic"), - ("olduyghur", "Old_Uyghur"), - ("olonal", "Ol_Onal"), - ("onao", "Ol_Onal"), - ("oriya", "Oriya"), - ("orkh", "Old_Turkic"), - ("orya", "Oriya"), - ("osage", "Osage"), - ("osge", "Osage"), - ("osma", "Osmanya"), - ("osmanya", "Osmanya"), - ("ougr", "Old_Uyghur"), - ("pahawhhmong", "Pahawh_Hmong"), - ("palm", "Palmyrene"), - ("palmyrene", "Palmyrene"), - ("pauc", "Pau_Cin_Hau"), - ("paucinhau", "Pau_Cin_Hau"), - ("perm", "Old_Permic"), - ("phag", "Phags_Pa"), - ("phagspa", "Phags_Pa"), - ("phli", "Inscriptional_Pahlavi"), - ("phlp", "Psalter_Pahlavi"), - ("phnx", "Phoenician"), - ("phoenician", "Phoenician"), - ("plrd", "Miao"), - ("prti", "Inscriptional_Parthian"), - ("psalterpahlavi", "Psalter_Pahlavi"), - ("qaac", "Coptic"), - ("qaai", "Inherited"), - ("rejang", "Rejang"), - ("rjng", "Rejang"), - ("rohg", "Hanifi_Rohingya"), - ("runic", "Runic"), - ("runr", "Runic"), - ("samaritan", "Samaritan"), - ("samr", "Samaritan"), - ("sarb", "Old_South_Arabian"), - ("saur", "Saurashtra"), - ("saurashtra", "Saurashtra"), - ("sgnw", "SignWriting"), - ("sharada", "Sharada"), - ("shavian", "Shavian"), - ("shaw", "Shavian"), - ("shrd", "Sharada"), - ("sidd", "Siddham"), - ("siddham", "Siddham"), - ("signwriting", "SignWriting"), - ("sind", "Khudawadi"), - ("sinh", "Sinhala"), - ("sinhala", "Sinhala"), - ("sogd", "Sogdian"), - ("sogdian", "Sogdian"), - ("sogo", "Old_Sogdian"), - ("sora", "Sora_Sompeng"), - ("sorasompeng", "Sora_Sompeng"), - ("soyo", "Soyombo"), - ("soyombo", "Soyombo"), - ("sund", "Sundanese"), - ("sundanese", "Sundanese"), - ("sunu", "Sunuwar"), - ("sunuwar", "Sunuwar"), - ("sylo", "Syloti_Nagri"), - ("sylotinagri", "Syloti_Nagri"), - ("syrc", "Syriac"), - ("syriac", "Syriac"), - ("tagalog", "Tagalog"), - ("tagb", "Tagbanwa"), - ("tagbanwa", "Tagbanwa"), - ("taile", "Tai_Le"), - ("taitham", "Tai_Tham"), - ("taiviet", "Tai_Viet"), - ("takr", "Takri"), - ("takri", "Takri"), - ("tale", "Tai_Le"), - ("talu", "New_Tai_Lue"), - ("tamil", "Tamil"), - ("taml", "Tamil"), - ("tang", "Tangut"), - ("tangsa", "Tangsa"), - ("tangut", "Tangut"), - ("tavt", "Tai_Viet"), - ("telu", "Telugu"), - ("telugu", "Telugu"), - ("tfng", "Tifinagh"), - ("tglg", "Tagalog"), - ("thaa", "Thaana"), - ("thaana", "Thaana"), - ("thai", "Thai"), - ("tibetan", "Tibetan"), - ("tibt", "Tibetan"), - ("tifinagh", "Tifinagh"), - ("tirh", "Tirhuta"), - ("tirhuta", "Tirhuta"), - ("tnsa", "Tangsa"), - ("todhri", "Todhri"), - ("todr", "Todhri"), - ("toto", "Toto"), - ("tulutigalari", "Tulu_Tigalari"), - ("tutg", "Tulu_Tigalari"), - ("ugar", "Ugaritic"), - ("ugaritic", "Ugaritic"), - ("unknown", "Unknown"), - ("vai", "Vai"), - ("vaii", "Vai"), - ("vith", "Vithkuqi"), - ("vithkuqi", "Vithkuqi"), - ("wancho", "Wancho"), - ("wara", "Warang_Citi"), - ("warangciti", "Warang_Citi"), - ("wcho", "Wancho"), - ("xpeo", "Old_Persian"), - ("xsux", "Cuneiform"), - ("yezi", "Yezidi"), - ("yezidi", "Yezidi"), - ("yi", "Yi"), - ("yiii", "Yi"), - ("zanabazarsquare", "Zanabazar_Square"), - ("zanb", "Zanabazar_Square"), - ("zinh", "Inherited"), - ("zyyy", "Common"), - ("zzzz", "Unknown"), - ], - ), - ( - "Sentence_Break", - &[ - ("at", "ATerm"), - ("aterm", "ATerm"), - ("cl", "Close"), - ("close", "Close"), - ("cr", "CR"), - ("ex", "Extend"), - ("extend", "Extend"), - ("fo", "Format"), - ("format", "Format"), - ("le", "OLetter"), - ("lf", "LF"), - ("lo", "Lower"), - ("lower", "Lower"), - ("nu", "Numeric"), - ("numeric", "Numeric"), - ("oletter", "OLetter"), - ("other", "Other"), - ("sc", "SContinue"), - ("scontinue", "SContinue"), - ("se", "Sep"), - ("sep", "Sep"), - ("sp", "Sp"), - ("st", "STerm"), - ("sterm", "STerm"), - ("up", "Upper"), - ("upper", "Upper"), - ("xx", "Other"), - ], - ), - ( - "Word_Break", - &[ - ("aletter", "ALetter"), - ("cr", "CR"), - ("doublequote", "Double_Quote"), - ("dq", "Double_Quote"), - ("eb", "E_Base"), - ("ebase", "E_Base"), - ("ebasegaz", "E_Base_GAZ"), - ("ebg", "E_Base_GAZ"), - ("em", "E_Modifier"), - ("emodifier", "E_Modifier"), - ("ex", "ExtendNumLet"), - ("extend", "Extend"), - ("extendnumlet", "ExtendNumLet"), - ("fo", "Format"), - ("format", "Format"), - ("gaz", "Glue_After_Zwj"), - ("glueafterzwj", "Glue_After_Zwj"), - ("hebrewletter", "Hebrew_Letter"), - ("hl", "Hebrew_Letter"), - ("ka", "Katakana"), - ("katakana", "Katakana"), - ("le", "ALetter"), - ("lf", "LF"), - ("mb", "MidNumLet"), - ("midletter", "MidLetter"), - ("midnum", "MidNum"), - ("midnumlet", "MidNumLet"), - ("ml", "MidLetter"), - ("mn", "MidNum"), - ("newline", "Newline"), - ("nl", "Newline"), - ("nu", "Numeric"), - ("numeric", "Numeric"), - ("other", "Other"), - ("regionalindicator", "Regional_Indicator"), - ("ri", "Regional_Indicator"), - ("singlequote", "Single_Quote"), - ("sq", "Single_Quote"), - ("wsegspace", "WSegSpace"), - ("xx", "Other"), - ("zwj", "ZWJ"), - ], - ), -]; diff --git a/vendor/regex-syntax/src/unicode_tables/script.rs b/vendor/regex-syntax/src/unicode_tables/script.rs deleted file mode 100644 index 3e437ca9..00000000 --- a/vendor/regex-syntax/src/unicode_tables/script.rs +++ /dev/null @@ -1,1300 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate script ucd-16.0.0 --chars -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ - ("Adlam", ADLAM), - ("Ahom", AHOM), - ("Anatolian_Hieroglyphs", ANATOLIAN_HIEROGLYPHS), - ("Arabic", ARABIC), - ("Armenian", ARMENIAN), - ("Avestan", AVESTAN), - ("Balinese", BALINESE), - ("Bamum", BAMUM), - ("Bassa_Vah", BASSA_VAH), - ("Batak", BATAK), - ("Bengali", BENGALI), - ("Bhaiksuki", BHAIKSUKI), - ("Bopomofo", BOPOMOFO), - ("Brahmi", BRAHMI), - ("Braille", BRAILLE), - ("Buginese", BUGINESE), - ("Buhid", BUHID), - ("Canadian_Aboriginal", CANADIAN_ABORIGINAL), - ("Carian", CARIAN), - ("Caucasian_Albanian", CAUCASIAN_ALBANIAN), - ("Chakma", CHAKMA), - ("Cham", CHAM), - ("Cherokee", CHEROKEE), - ("Chorasmian", CHORASMIAN), - ("Common", COMMON), - ("Coptic", COPTIC), - ("Cuneiform", CUNEIFORM), - ("Cypriot", CYPRIOT), - ("Cypro_Minoan", CYPRO_MINOAN), - ("Cyrillic", CYRILLIC), - ("Deseret", DESERET), - ("Devanagari", DEVANAGARI), - ("Dives_Akuru", DIVES_AKURU), - ("Dogra", DOGRA), - ("Duployan", DUPLOYAN), - ("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS), - ("Elbasan", ELBASAN), - ("Elymaic", ELYMAIC), - ("Ethiopic", ETHIOPIC), - ("Garay", GARAY), - ("Georgian", GEORGIAN), - ("Glagolitic", GLAGOLITIC), - ("Gothic", GOTHIC), - ("Grantha", GRANTHA), - ("Greek", GREEK), - ("Gujarati", GUJARATI), - ("Gunjala_Gondi", GUNJALA_GONDI), - ("Gurmukhi", GURMUKHI), - ("Gurung_Khema", GURUNG_KHEMA), - ("Han", HAN), - ("Hangul", HANGUL), - ("Hanifi_Rohingya", HANIFI_ROHINGYA), - ("Hanunoo", HANUNOO), - ("Hatran", HATRAN), - ("Hebrew", HEBREW), - ("Hiragana", HIRAGANA), - ("Imperial_Aramaic", IMPERIAL_ARAMAIC), - ("Inherited", INHERITED), - ("Inscriptional_Pahlavi", INSCRIPTIONAL_PAHLAVI), - ("Inscriptional_Parthian", INSCRIPTIONAL_PARTHIAN), - ("Javanese", JAVANESE), - ("Kaithi", KAITHI), - ("Kannada", KANNADA), - ("Katakana", KATAKANA), - ("Kawi", KAWI), - ("Kayah_Li", KAYAH_LI), - ("Kharoshthi", KHAROSHTHI), - ("Khitan_Small_Script", KHITAN_SMALL_SCRIPT), - ("Khmer", KHMER), - ("Khojki", KHOJKI), - ("Khudawadi", KHUDAWADI), - ("Kirat_Rai", KIRAT_RAI), - ("Lao", LAO), - ("Latin", LATIN), - ("Lepcha", LEPCHA), - ("Limbu", LIMBU), - ("Linear_A", LINEAR_A), - ("Linear_B", LINEAR_B), - ("Lisu", LISU), - ("Lycian", LYCIAN), - ("Lydian", LYDIAN), - ("Mahajani", MAHAJANI), - ("Makasar", MAKASAR), - ("Malayalam", MALAYALAM), - ("Mandaic", MANDAIC), - ("Manichaean", MANICHAEAN), - ("Marchen", MARCHEN), - ("Masaram_Gondi", MASARAM_GONDI), - ("Medefaidrin", MEDEFAIDRIN), - ("Meetei_Mayek", MEETEI_MAYEK), - ("Mende_Kikakui", MENDE_KIKAKUI), - ("Meroitic_Cursive", MEROITIC_CURSIVE), - ("Meroitic_Hieroglyphs", MEROITIC_HIEROGLYPHS), - ("Miao", MIAO), - ("Modi", MODI), - ("Mongolian", MONGOLIAN), - ("Mro", MRO), - ("Multani", MULTANI), - ("Myanmar", MYANMAR), - ("Nabataean", NABATAEAN), - ("Nag_Mundari", NAG_MUNDARI), - ("Nandinagari", NANDINAGARI), - ("New_Tai_Lue", NEW_TAI_LUE), - ("Newa", NEWA), - ("Nko", NKO), - ("Nushu", NUSHU), - ("Nyiakeng_Puachue_Hmong", NYIAKENG_PUACHUE_HMONG), - ("Ogham", OGHAM), - ("Ol_Chiki", OL_CHIKI), - ("Ol_Onal", OL_ONAL), - ("Old_Hungarian", OLD_HUNGARIAN), - ("Old_Italic", OLD_ITALIC), - ("Old_North_Arabian", OLD_NORTH_ARABIAN), - ("Old_Permic", OLD_PERMIC), - ("Old_Persian", OLD_PERSIAN), - ("Old_Sogdian", OLD_SOGDIAN), - ("Old_South_Arabian", OLD_SOUTH_ARABIAN), - ("Old_Turkic", OLD_TURKIC), - ("Old_Uyghur", OLD_UYGHUR), - ("Oriya", ORIYA), - ("Osage", OSAGE), - ("Osmanya", OSMANYA), - ("Pahawh_Hmong", PAHAWH_HMONG), - ("Palmyrene", PALMYRENE), - ("Pau_Cin_Hau", PAU_CIN_HAU), - ("Phags_Pa", PHAGS_PA), - ("Phoenician", PHOENICIAN), - ("Psalter_Pahlavi", PSALTER_PAHLAVI), - ("Rejang", REJANG), - ("Runic", RUNIC), - ("Samaritan", SAMARITAN), - ("Saurashtra", SAURASHTRA), - ("Sharada", SHARADA), - ("Shavian", SHAVIAN), - ("Siddham", SIDDHAM), - ("SignWriting", SIGNWRITING), - ("Sinhala", SINHALA), - ("Sogdian", SOGDIAN), - ("Sora_Sompeng", SORA_SOMPENG), - ("Soyombo", SOYOMBO), - ("Sundanese", SUNDANESE), - ("Sunuwar", SUNUWAR), - ("Syloti_Nagri", SYLOTI_NAGRI), - ("Syriac", SYRIAC), - ("Tagalog", TAGALOG), - ("Tagbanwa", TAGBANWA), - ("Tai_Le", TAI_LE), - ("Tai_Tham", TAI_THAM), - ("Tai_Viet", TAI_VIET), - ("Takri", TAKRI), - ("Tamil", TAMIL), - ("Tangsa", TANGSA), - ("Tangut", TANGUT), - ("Telugu", TELUGU), - ("Thaana", THAANA), - ("Thai", THAI), - ("Tibetan", TIBETAN), - ("Tifinagh", TIFINAGH), - ("Tirhuta", TIRHUTA), - ("Todhri", TODHRI), - ("Toto", TOTO), - ("Tulu_Tigalari", TULU_TIGALARI), - ("Ugaritic", UGARITIC), - ("Vai", VAI), - ("Vithkuqi", VITHKUQI), - ("Wancho", WANCHO), - ("Warang_Citi", WARANG_CITI), - ("Yezidi", YEZIDI), - ("Yi", YI), - ("Zanabazar_Square", ZANABAZAR_SQUARE), -]; - -pub const ADLAM: &'static [(char, char)] = - &[('𞤀', '𞥋'), ('𞥐', '𞥙'), ('𞥞', '𞥟')]; - -pub const AHOM: &'static [(char, char)] = - &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑝆')]; - -pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[('𔐀', '𔙆')]; - -pub const ARABIC: &'static [(char, char)] = &[ - ('\u{600}', '\u{604}'), - ('؆', '؋'), - ('؍', '\u{61a}'), - ('\u{61c}', '؞'), - ('ؠ', 'ؿ'), - ('ف', 'ي'), - ('\u{656}', 'ٯ'), - ('ٱ', '\u{6dc}'), - ('۞', 'ۿ'), - ('ݐ', 'ݿ'), - ('ࡰ', 'ࢎ'), - ('\u{890}', '\u{891}'), - ('\u{897}', '\u{8e1}'), - ('\u{8e3}', '\u{8ff}'), - ('ﭐ', '﯂'), - ('ﯓ', 'ﴽ'), - ('﵀', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('﷏', '﷏'), - ('ﷰ', '﷿'), - ('ﹰ', 'ﹴ'), - ('ﹶ', 'ﻼ'), - ('𐹠', '𐹾'), - ('𐻂', '𐻄'), - ('\u{10efc}', '\u{10eff}'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('𞻰', '𞻱'), -]; - -pub const ARMENIAN: &'static [(char, char)] = - &[('Ա', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('ﬓ', 'ﬗ')]; - -pub const AVESTAN: &'static [(char, char)] = &[('𐬀', '𐬵'), ('𐬹', '𐬿')]; - -pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭎', '᭿')]; - -pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')]; - -pub const BASSA_VAH: &'static [(char, char)] = - &[('𖫐', '𖫭'), ('\u{16af0}', '𖫵')]; - -pub const BATAK: &'static [(char, char)] = &[('ᯀ', '\u{1bf3}'), ('᯼', '᯿')]; - -pub const BENGALI: &'static [(char, char)] = &[ - ('ঀ', 'ঃ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('\u{9bc}', '\u{9c4}'), - ('ে', 'ৈ'), - ('ো', 'ৎ'), - ('\u{9d7}', '\u{9d7}'), - ('ড়', 'ঢ়'), - ('য়', '\u{9e3}'), - ('০', '\u{9fe}'), -]; - -pub const BHAIKSUKI: &'static [(char, char)] = - &[('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱅'), ('𑱐', '𑱬')]; - -pub const BOPOMOFO: &'static [(char, char)] = - &[('˪', '˫'), ('ㄅ', 'ㄯ'), ('ㆠ', 'ㆿ')]; - -pub const BRAHMI: &'static [(char, char)] = - &[('𑀀', '𑁍'), ('𑁒', '𑁵'), ('\u{1107f}', '\u{1107f}')]; - -pub const BRAILLE: &'static [(char, char)] = &[('⠀', '⣿')]; - -pub const BUGINESE: &'static [(char, char)] = &[('ᨀ', '\u{1a1b}'), ('᨞', '᨟')]; - -pub const BUHID: &'static [(char, char)] = &[('ᝀ', '\u{1753}')]; - -pub const CANADIAN_ABORIGINAL: &'static [(char, char)] = - &[('᐀', 'ᙿ'), ('ᢰ', 'ᣵ'), ('𑪰', '𑪿')]; - -pub const CARIAN: &'static [(char, char)] = &[('𐊠', '𐋐')]; - -pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = - &[('𐔰', '𐕣'), ('𐕯', '𐕯')]; - -pub const CHAKMA: &'static [(char, char)] = - &[('\u{11100}', '\u{11134}'), ('𑄶', '𑅇')]; - -pub const CHAM: &'static [(char, char)] = - &[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')]; - -pub const CHEROKEE: &'static [(char, char)] = - &[('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ')]; - -pub const CHORASMIAN: &'static [(char, char)] = &[('𐾰', '𐿋')]; - -pub const COMMON: &'static [(char, char)] = &[ - ('\0', '@'), - ('[', '`'), - ('{', '©'), - ('«', '¹'), - ('»', '¿'), - ('×', '×'), - ('÷', '÷'), - ('ʹ', '˟'), - ('˥', '˩'), - ('ˬ', '˿'), - ('ʹ', 'ʹ'), - (';', ';'), - ('΅', '΅'), - ('·', '·'), - ('\u{605}', '\u{605}'), - ('،', '،'), - ('؛', '؛'), - ('؟', '؟'), - ('ـ', 'ـ'), - ('\u{6dd}', '\u{6dd}'), - ('\u{8e2}', '\u{8e2}'), - ('।', '॥'), - ('฿', '฿'), - ('࿕', '࿘'), - ('჻', '჻'), - ('᛫', '᛭'), - ('᜵', '᜶'), - ('᠂', '᠃'), - ('᠅', '᠅'), - ('᳓', '᳓'), - ('᳡', '᳡'), - ('ᳩ', 'ᳬ'), - ('ᳮ', 'ᳳ'), - ('ᳵ', '᳷'), - ('ᳺ', 'ᳺ'), - ('\u{2000}', '\u{200b}'), - ('\u{200e}', '\u{2064}'), - ('\u{2066}', '⁰'), - ('⁴', '⁾'), - ('₀', '₎'), - ('₠', '⃀'), - ('℀', '℥'), - ('℧', '℩'), - ('ℬ', 'ℱ'), - ('ℳ', '⅍'), - ('⅏', '⅟'), - ('↉', '↋'), - ('←', '␩'), - ('⑀', '⑊'), - ('①', '⟿'), - ('⤀', '⭳'), - ('⭶', '⮕'), - ('⮗', '⯿'), - ('⸀', '⹝'), - ('⿰', '〄'), - ('〆', '〆'), - ('〈', '〠'), - ('〰', '〷'), - ('〼', '〿'), - ('゛', '゜'), - ('゠', '゠'), - ('・', 'ー'), - ('㆐', '㆟'), - ('㇀', '㇥'), - ('㇯', '㇯'), - ('㈠', '㉟'), - ('㉿', '㋏'), - ('㋿', '㋿'), - ('㍘', '㏿'), - ('䷀', '䷿'), - ('꜀', '꜡'), - ('ꞈ', '꞊'), - ('꠰', '꠹'), - ('꤮', '꤮'), - ('ꧏ', 'ꧏ'), - ('꭛', '꭛'), - ('꭪', '꭫'), - ('﴾', '﴿'), - ('︐', '︙'), - ('︰', '﹒'), - ('﹔', '﹦'), - ('﹨', '﹫'), - ('\u{feff}', '\u{feff}'), - ('!', '@'), - ('[', '`'), - ('{', '・'), - ('ー', 'ー'), - ('\u{ff9e}', '\u{ff9f}'), - ('¢', '₩'), - ('│', '○'), - ('\u{fff9}', '�'), - ('𐄀', '𐄂'), - ('𐄇', '𐄳'), - ('𐄷', '𐄿'), - ('𐆐', '𐆜'), - ('𐇐', '𐇼'), - ('𐋡', '𐋻'), - ('\u{1bca0}', '\u{1bca3}'), - ('𜰀', '𜳹'), - ('𜴀', '𜺳'), - ('𜽐', '𜿃'), - ('𝀀', '𝃵'), - ('𝄀', '𝄦'), - ('𝄩', '\u{1d166}'), - ('𝅪', '\u{1d17a}'), - ('𝆃', '𝆄'), - ('𝆌', '𝆩'), - ('𝆮', '𝇪'), - ('𝋀', '𝋓'), - ('𝋠', '𝋳'), - ('𝌀', '𝍖'), - ('𝍠', '𝍸'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝟋'), - ('𝟎', '𝟿'), - ('𞱱', '𞲴'), - ('𞴁', '𞴽'), - ('🀀', '🀫'), - ('🀰', '🂓'), - ('🂠', '🂮'), - ('🂱', '🂿'), - ('🃁', '🃏'), - ('🃑', '🃵'), - ('🄀', '🆭'), - ('🇦', '🇿'), - ('🈁', '🈂'), - ('🈐', '🈻'), - ('🉀', '🉈'), - ('🉐', '🉑'), - ('🉠', '🉥'), - ('🌀', '🛗'), - ('🛜', '🛬'), - ('🛰', '🛼'), - ('🜀', '🝶'), - ('🝻', '🟙'), - ('🟠', '🟫'), - ('🟰', '🟰'), - ('🠀', '🠋'), - ('🠐', '🡇'), - ('🡐', '🡙'), - ('🡠', '🢇'), - ('🢐', '🢭'), - ('🢰', '🢻'), - ('🣀', '🣁'), - ('🤀', '🩓'), - ('🩠', '🩭'), - ('🩰', '🩼'), - ('🪀', '🪉'), - ('🪏', '🫆'), - ('🫎', '🫜'), - ('🫟', '🫩'), - ('🫰', '🫸'), - ('🬀', '🮒'), - ('🮔', '🯹'), - ('\u{e0001}', '\u{e0001}'), - ('\u{e0020}', '\u{e007f}'), -]; - -pub const COPTIC: &'static [(char, char)] = - &[('Ϣ', 'ϯ'), ('Ⲁ', 'ⳳ'), ('⳹', '⳿')]; - -pub const CUNEIFORM: &'static [(char, char)] = - &[('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃')]; - -pub const CYPRIOT: &'static [(char, char)] = - &[('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐠿')]; - -pub const CYPRO_MINOAN: &'static [(char, char)] = &[('𒾐', '𒿲')]; - -pub const CYRILLIC: &'static [(char, char)] = &[ - ('Ѐ', '\u{484}'), - ('\u{487}', 'ԯ'), - ('ᲀ', 'ᲊ'), - ('ᴫ', 'ᴫ'), - ('ᵸ', 'ᵸ'), - ('\u{2de0}', '\u{2dff}'), - ('Ꙁ', '\u{a69f}'), - ('\u{fe2e}', '\u{fe2f}'), - ('𞀰', '𞁭'), - ('\u{1e08f}', '\u{1e08f}'), -]; - -pub const DESERET: &'static [(char, char)] = &[('𐐀', '𐑏')]; - -pub const DEVANAGARI: &'static [(char, char)] = &[ - ('\u{900}', 'ॐ'), - ('\u{955}', '\u{963}'), - ('०', 'ॿ'), - ('\u{a8e0}', '\u{a8ff}'), - ('𑬀', '𑬉'), -]; - -pub const DIVES_AKURU: &'static [(char, char)] = &[ - ('𑤀', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤵'), - ('𑤷', '𑤸'), - ('\u{1193b}', '𑥆'), - ('𑥐', '𑥙'), -]; - -pub const DOGRA: &'static [(char, char)] = &[('𑠀', '𑠻')]; - -pub const DUPLOYAN: &'static [(char, char)] = - &[('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '𛲟')]; - -pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = - &[('𓀀', '\u{13455}'), ('𓑠', '𔏺')]; - -pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')]; - -pub const ELYMAIC: &'static [(char, char)] = &[('𐿠', '𐿶')]; - -pub const ETHIOPIC: &'static [(char, char)] = &[ - ('ሀ', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዖ'), - ('ዘ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ፚ'), - ('\u{135d}', '፼'), - ('ᎀ', '᎙'), - ('ⶀ', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), -]; - -pub const GARAY: &'static [(char, char)] = - &[('𐵀', '𐵥'), ('\u{10d69}', '𐶅'), ('𐶎', '𐶏')]; - -pub const GEORGIAN: &'static [(char, char)] = &[ - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ჺ'), - ('ჼ', 'ჿ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), -]; - -pub const GLAGOLITIC: &'static [(char, char)] = &[ - ('Ⰰ', 'ⱟ'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), -]; - -pub const GOTHIC: &'static [(char, char)] = &[('𐌰', '𐍊')]; - -pub const GRANTHA: &'static [(char, char)] = &[ - ('\u{11300}', '𑌃'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('\u{1133c}', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '\u{1134d}'), - ('𑍐', '𑍐'), - ('\u{11357}', '\u{11357}'), - ('𑍝', '𑍣'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), -]; - -pub const GREEK: &'static [(char, char)] = &[ - ('Ͱ', 'ͳ'), - ('͵', 'ͷ'), - ('ͺ', 'ͽ'), - ('Ϳ', 'Ϳ'), - ('΄', '΄'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ϡ'), - ('ϰ', 'Ͽ'), - ('ᴦ', 'ᴪ'), - ('ᵝ', 'ᵡ'), - ('ᵦ', 'ᵪ'), - ('ᶿ', 'ᶿ'), - ('ἀ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ῄ'), - ('ῆ', 'ΐ'), - ('ῖ', 'Ί'), - ('῝', '`'), - ('ῲ', 'ῴ'), - ('ῶ', '῾'), - ('Ω', 'Ω'), - ('ꭥ', 'ꭥ'), - ('𐅀', '𐆎'), - ('𐆠', '𐆠'), - ('𝈀', '𝉅'), -]; - -pub const GUJARATI: &'static [(char, char)] = &[ - ('\u{a81}', 'ઃ'), - ('અ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('\u{abc}', '\u{ac5}'), - ('\u{ac7}', 'ૉ'), - ('ો', '\u{acd}'), - ('ૐ', 'ૐ'), - ('ૠ', '\u{ae3}'), - ('૦', '૱'), - ('ૹ', '\u{aff}'), -]; - -pub const GUNJALA_GONDI: &'static [(char, char)] = &[ - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶎'), - ('\u{11d90}', '\u{11d91}'), - ('𑶓', '𑶘'), - ('𑶠', '𑶩'), -]; - -pub const GURMUKHI: &'static [(char, char)] = &[ - ('\u{a01}', 'ਃ'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('\u{a3c}', '\u{a3c}'), - ('ਾ', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('\u{a51}', '\u{a51}'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('੦', '੶'), -]; - -pub const GURUNG_KHEMA: &'static [(char, char)] = &[('𖄀', '𖄹')]; - -pub const HAN: &'static [(char, char)] = &[ - ('⺀', '⺙'), - ('⺛', '⻳'), - ('⼀', '⿕'), - ('々', '々'), - ('〇', '〇'), - ('〡', '〩'), - ('〸', '〻'), - ('㐀', '䶿'), - ('一', '鿿'), - ('豈', '舘'), - ('並', '龎'), - ('𖿢', '𖿣'), - ('\u{16ff0}', '\u{16ff1}'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('丽', '𪘀'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), -]; - -pub const HANGUL: &'static [(char, char)] = &[ - ('ᄀ', 'ᇿ'), - ('\u{302e}', '\u{302f}'), - ('ㄱ', 'ㆎ'), - ('㈀', '㈞'), - ('㉠', '㉾'), - ('ꥠ', 'ꥼ'), - ('가', '힣'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('ᅠ', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), -]; - -pub const HANIFI_ROHINGYA: &'static [(char, char)] = - &[('𐴀', '\u{10d27}'), ('𐴰', '𐴹')]; - -pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', '\u{1734}')]; - -pub const HATRAN: &'static [(char, char)] = - &[('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿')]; - -pub const HEBREW: &'static [(char, char)] = &[ - ('\u{591}', '\u{5c7}'), - ('א', 'ת'), - ('ׯ', '״'), - ('יִ', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', 'ﭏ'), -]; - -pub const HIRAGANA: &'static [(char, char)] = &[ - ('ぁ', 'ゖ'), - ('ゝ', 'ゟ'), - ('𛀁', '𛄟'), - ('𛄲', '𛄲'), - ('𛅐', '𛅒'), - ('🈀', '🈀'), -]; - -pub const IMPERIAL_ARAMAIC: &'static [(char, char)] = - &[('𐡀', '𐡕'), ('𐡗', '𐡟')]; - -pub const INHERITED: &'static [(char, char)] = &[ - ('\u{300}', '\u{36f}'), - ('\u{485}', '\u{486}'), - ('\u{64b}', '\u{655}'), - ('\u{670}', '\u{670}'), - ('\u{951}', '\u{954}'), - ('\u{1ab0}', '\u{1ace}'), - ('\u{1cd0}', '\u{1cd2}'), - ('\u{1cd4}', '\u{1ce0}'), - ('\u{1ce2}', '\u{1ce8}'), - ('\u{1ced}', '\u{1ced}'), - ('\u{1cf4}', '\u{1cf4}'), - ('\u{1cf8}', '\u{1cf9}'), - ('\u{1dc0}', '\u{1dff}'), - ('\u{200c}', '\u{200d}'), - ('\u{20d0}', '\u{20f0}'), - ('\u{302a}', '\u{302d}'), - ('\u{3099}', '\u{309a}'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{fe20}', '\u{fe2d}'), - ('\u{101fd}', '\u{101fd}'), - ('\u{102e0}', '\u{102e0}'), - ('\u{1133b}', '\u{1133b}'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d167}', '\u{1d169}'), - ('\u{1d17b}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const INSCRIPTIONAL_PAHLAVI: &'static [(char, char)] = - &[('𐭠', '𐭲'), ('𐭸', '𐭿')]; - -pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] = - &[('𐭀', '𐭕'), ('𐭘', '𐭟')]; - -pub const JAVANESE: &'static [(char, char)] = - &[('\u{a980}', '꧍'), ('꧐', '꧙'), ('꧞', '꧟')]; - -pub const KAITHI: &'static [(char, char)] = - &[('\u{11080}', '\u{110c2}'), ('\u{110cd}', '\u{110cd}')]; - -pub const KANNADA: &'static [(char, char)] = &[ - ('ಀ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('\u{cbc}', 'ೄ'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccd}'), - ('\u{cd5}', '\u{cd6}'), - ('ೝ', 'ೞ'), - ('ೠ', '\u{ce3}'), - ('೦', '೯'), - ('ೱ', 'ೳ'), -]; - -pub const KATAKANA: &'static [(char, char)] = &[ - ('ァ', 'ヺ'), - ('ヽ', 'ヿ'), - ('ㇰ', 'ㇿ'), - ('㋐', '㋾'), - ('㌀', '㍗'), - ('ヲ', 'ッ'), - ('ア', 'ン'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𛀀', '𛀀'), - ('𛄠', '𛄢'), - ('𛅕', '𛅕'), - ('𛅤', '𛅧'), -]; - -pub const KAWI: &'static [(char, char)] = - &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f5a}')]; - -pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '\u{a92d}'), ('꤯', '꤯')]; - -pub const KHAROSHTHI: &'static [(char, char)] = &[ - ('𐨀', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨵'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '𐩈'), - ('𐩐', '𐩘'), -]; - -pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] = - &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕'), ('𘳿', '𘳿')]; - -pub const KHMER: &'static [(char, char)] = - &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')]; - -pub const KHOJKI: &'static [(char, char)] = &[('𑈀', '𑈑'), ('𑈓', '\u{11241}')]; - -pub const KHUDAWADI: &'static [(char, char)] = - &[('𑊰', '\u{112ea}'), ('𑋰', '𑋹')]; - -pub const KIRAT_RAI: &'static [(char, char)] = &[('𖵀', '𖵹')]; - -pub const LAO: &'static [(char, char)] = &[ - ('ກ', 'ຂ'), - ('ຄ', 'ຄ'), - ('ຆ', 'ຊ'), - ('ຌ', 'ຣ'), - ('ລ', 'ລ'), - ('ວ', 'ຽ'), - ('ເ', 'ໄ'), - ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ece}'), - ('໐', '໙'), - ('ໜ', 'ໟ'), -]; - -pub const LATIN: &'static [(char, char)] = &[ - ('A', 'Z'), - ('a', 'z'), - ('ª', 'ª'), - ('º', 'º'), - ('À', 'Ö'), - ('Ø', 'ö'), - ('ø', 'ʸ'), - ('ˠ', 'ˤ'), - ('ᴀ', 'ᴥ'), - ('ᴬ', 'ᵜ'), - ('ᵢ', 'ᵥ'), - ('ᵫ', 'ᵷ'), - ('ᵹ', 'ᶾ'), - ('Ḁ', 'ỿ'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('K', 'Å'), - ('Ⅎ', 'Ⅎ'), - ('ⅎ', 'ⅎ'), - ('Ⅰ', 'ↈ'), - ('Ⱡ', 'Ɀ'), - ('Ꜣ', 'ꞇ'), - ('Ꞌ', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'Ƛ'), - ('ꟲ', 'ꟿ'), - ('ꬰ', 'ꭚ'), - ('ꭜ', 'ꭤ'), - ('ꭦ', 'ꭩ'), - ('ff', 'st'), - ('A', 'Z'), - ('a', 'z'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𝼀', '𝼞'), - ('𝼥', '𝼪'), -]; - -pub const LEPCHA: &'static [(char, char)] = - &[('ᰀ', '\u{1c37}'), ('᰻', '᱉'), ('ᱍ', 'ᱏ')]; - -pub const LIMBU: &'static [(char, char)] = &[ - ('ᤀ', 'ᤞ'), - ('\u{1920}', 'ᤫ'), - ('ᤰ', '\u{193b}'), - ('᥀', '᥀'), - ('᥄', '᥏'), -]; - -pub const LINEAR_A: &'static [(char, char)] = - &[('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧')]; - -pub const LINEAR_B: &'static [(char, char)] = &[ - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), -]; - -pub const LISU: &'static [(char, char)] = &[('ꓐ', '꓿'), ('𑾰', '𑾰')]; - -pub const LYCIAN: &'static [(char, char)] = &[('𐊀', '𐊜')]; - -pub const LYDIAN: &'static [(char, char)] = &[('𐤠', '𐤹'), ('𐤿', '𐤿')]; - -pub const MAHAJANI: &'static [(char, char)] = &[('𑅐', '𑅶')]; - -pub const MAKASAR: &'static [(char, char)] = &[('𑻠', '𑻸')]; - -pub const MALAYALAM: &'static [(char, char)] = &[ - ('\u{d00}', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', '\u{d44}'), - ('െ', 'ൈ'), - ('ൊ', '൏'), - ('ൔ', '\u{d63}'), - ('൦', 'ൿ'), -]; - -pub const MANDAIC: &'static [(char, char)] = &[('ࡀ', '\u{85b}'), ('࡞', '࡞')]; - -pub const MANICHAEAN: &'static [(char, char)] = - &[('𐫀', '\u{10ae6}'), ('𐫫', '𐫶')]; - -pub const MARCHEN: &'static [(char, char)] = - &[('𑱰', '𑲏'), ('\u{11c92}', '\u{11ca7}'), ('𑲩', '\u{11cb6}')]; - -pub const MASARAM_GONDI: &'static [(char, char)] = &[ - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d47}'), - ('𑵐', '𑵙'), -]; - -pub const MEDEFAIDRIN: &'static [(char, char)] = &[('𖹀', '𖺚')]; - -pub const MEETEI_MAYEK: &'static [(char, char)] = - &[('ꫠ', '\u{aaf6}'), ('ꯀ', '\u{abed}'), ('꯰', '꯹')]; - -pub const MENDE_KIKAKUI: &'static [(char, char)] = - &[('𞠀', '𞣄'), ('𞣇', '\u{1e8d6}')]; - -pub const MEROITIC_CURSIVE: &'static [(char, char)] = - &[('𐦠', '𐦷'), ('𐦼', '𐧏'), ('𐧒', '𐧿')]; - -pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[('𐦀', '𐦟')]; - -pub const MIAO: &'static [(char, char)] = - &[('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟')]; - -pub const MODI: &'static [(char, char)] = &[('𑘀', '𑙄'), ('𑙐', '𑙙')]; - -pub const MONGOLIAN: &'static [(char, char)] = - &[('᠀', '᠁'), ('᠄', '᠄'), ('᠆', '᠙'), ('ᠠ', 'ᡸ'), ('ᢀ', 'ᢪ'), ('𑙠', '𑙬')]; - -pub const MRO: &'static [(char, char)] = &[('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯')]; - -pub const MULTANI: &'static [(char, char)] = - &[('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')]; - -pub const MYANMAR: &'static [(char, char)] = - &[('က', '႟'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ'), ('𑛐', '𑛣')]; - -pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')]; - -pub const NAG_MUNDARI: &'static [(char, char)] = &[('𞓐', '𞓹')]; - -pub const NANDINAGARI: &'static [(char, char)] = - &[('𑦠', '𑦧'), ('𑦪', '\u{119d7}'), ('\u{119da}', '𑧤')]; - -pub const NEW_TAI_LUE: &'static [(char, char)] = - &[('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟')]; - -pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '𑑡')]; - -pub const NKO: &'static [(char, char)] = &[('߀', 'ߺ'), ('\u{7fd}', '߿')]; - -pub const NUSHU: &'static [(char, char)] = &[('𖿡', '𖿡'), ('𛅰', '𛋻')]; - -pub const NYIAKENG_PUACHUE_HMONG: &'static [(char, char)] = - &[('𞄀', '𞄬'), ('\u{1e130}', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅏')]; - -pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')]; - -pub const OL_CHIKI: &'static [(char, char)] = &[('᱐', '᱿')]; - -pub const OL_ONAL: &'static [(char, char)] = &[('𞗐', '𞗺'), ('𞗿', '𞗿')]; - -pub const OLD_HUNGARIAN: &'static [(char, char)] = - &[('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿')]; - -pub const OLD_ITALIC: &'static [(char, char)] = &[('𐌀', '𐌣'), ('𐌭', '𐌯')]; - -pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[('𐪀', '𐪟')]; - -pub const OLD_PERMIC: &'static [(char, char)] = &[('𐍐', '\u{1037a}')]; - -pub const OLD_PERSIAN: &'static [(char, char)] = &[('𐎠', '𐏃'), ('𐏈', '𐏕')]; - -pub const OLD_SOGDIAN: &'static [(char, char)] = &[('𐼀', '𐼧')]; - -pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('𐩠', '𐩿')]; - -pub const OLD_TURKIC: &'static [(char, char)] = &[('𐰀', '𐱈')]; - -pub const OLD_UYGHUR: &'static [(char, char)] = &[('𐽰', '𐾉')]; - -pub const ORIYA: &'static [(char, char)] = &[ - ('\u{b01}', 'ଃ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('\u{b3c}', '\u{b44}'), - ('େ', 'ୈ'), - ('ୋ', '\u{b4d}'), - ('\u{b55}', '\u{b57}'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', '\u{b63}'), - ('୦', '୷'), -]; - -pub const OSAGE: &'static [(char, char)] = &[('𐒰', '𐓓'), ('𐓘', '𐓻')]; - -pub const OSMANYA: &'static [(char, char)] = &[('𐒀', '𐒝'), ('𐒠', '𐒩')]; - -pub const PAHAWH_HMONG: &'static [(char, char)] = - &[('𖬀', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'), ('𖭽', '𖮏')]; - -pub const PALMYRENE: &'static [(char, char)] = &[('𐡠', '𐡿')]; - -pub const PAU_CIN_HAU: &'static [(char, char)] = &[('𑫀', '𑫸')]; - -pub const PHAGS_PA: &'static [(char, char)] = &[('ꡀ', '꡷')]; - -pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', '𐤟')]; - -pub const PSALTER_PAHLAVI: &'static [(char, char)] = - &[('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯')]; - -pub const REJANG: &'static [(char, char)] = &[('ꤰ', '\u{a953}'), ('꥟', '꥟')]; - -pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ')]; - -pub const SAMARITAN: &'static [(char, char)] = &[('ࠀ', '\u{82d}'), ('࠰', '࠾')]; - -pub const SAURASHTRA: &'static [(char, char)] = - &[('ꢀ', '\u{a8c5}'), ('꣎', '꣙')]; - -pub const SHARADA: &'static [(char, char)] = &[('\u{11180}', '𑇟')]; - -pub const SHAVIAN: &'static [(char, char)] = &[('𐑐', '𐑿')]; - -pub const SIDDHAM: &'static [(char, char)] = - &[('𑖀', '\u{115b5}'), ('𑖸', '\u{115dd}')]; - -pub const SIGNWRITING: &'static [(char, char)] = - &[('𝠀', '𝪋'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}')]; - -pub const SINHALA: &'static [(char, char)] = &[ - ('\u{d81}', 'ඃ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('\u{dca}', '\u{dca}'), - ('\u{dcf}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('ෘ', '\u{ddf}'), - ('෦', '෯'), - ('ෲ', '෴'), - ('𑇡', '𑇴'), -]; - -pub const SOGDIAN: &'static [(char, char)] = &[('𐼰', '𐽙')]; - -pub const SORA_SOMPENG: &'static [(char, char)] = &[('𑃐', '𑃨'), ('𑃰', '𑃹')]; - -pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', '𑪢')]; - -pub const SUNDANESE: &'static [(char, char)] = - &[('\u{1b80}', 'ᮿ'), ('᳀', '᳇')]; - -pub const SUNUWAR: &'static [(char, char)] = &[('𑯀', '𑯡'), ('𑯰', '𑯹')]; - -pub const SYLOTI_NAGRI: &'static [(char, char)] = &[('ꠀ', '\u{a82c}')]; - -pub const SYRIAC: &'static [(char, char)] = - &[('܀', '܍'), ('\u{70f}', '\u{74a}'), ('ݍ', 'ݏ'), ('ࡠ', 'ࡪ')]; - -pub const TAGALOG: &'static [(char, char)] = &[('ᜀ', '\u{1715}'), ('ᜟ', 'ᜟ')]; - -pub const TAGBANWA: &'static [(char, char)] = - &[('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')]; - -pub const TAI_LE: &'static [(char, char)] = &[('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ')]; - -pub const TAI_THAM: &'static [(char, char)] = &[ - ('ᨠ', '\u{1a5e}'), - ('\u{1a60}', '\u{1a7c}'), - ('\u{1a7f}', '᪉'), - ('᪐', '᪙'), - ('᪠', '᪭'), -]; - -pub const TAI_VIET: &'static [(char, char)] = &[('ꪀ', 'ꫂ'), ('ꫛ', '꫟')]; - -pub const TAKRI: &'static [(char, char)] = &[('𑚀', '𑚹'), ('𑛀', '𑛉')]; - -pub const TAMIL: &'static [(char, char)] = &[ - ('\u{b82}', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'ஹ'), - ('\u{bbe}', 'ூ'), - ('ெ', 'ை'), - ('ொ', '\u{bcd}'), - ('ௐ', 'ௐ'), - ('\u{bd7}', '\u{bd7}'), - ('௦', '௺'), - ('𑿀', '𑿱'), - ('𑿿', '𑿿'), -]; - -pub const TANGSA: &'static [(char, char)] = &[('𖩰', '𖪾'), ('𖫀', '𖫉')]; - -pub const TANGUT: &'static [(char, char)] = - &[('𖿠', '𖿠'), ('𗀀', '𘟷'), ('𘠀', '𘫿'), ('𘴀', '𘴈')]; - -pub const TELUGU: &'static [(char, char)] = &[ - ('\u{c00}', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'హ'), - ('\u{c3c}', 'ౄ'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('ౘ', 'ౚ'), - ('ౝ', 'ౝ'), - ('ౠ', '\u{c63}'), - ('౦', '౯'), - ('౷', '౿'), -]; - -pub const THAANA: &'static [(char, char)] = &[('ހ', 'ޱ')]; - -pub const THAI: &'static [(char, char)] = &[('ก', '\u{e3a}'), ('เ', '๛')]; - -pub const TIBETAN: &'static [(char, char)] = &[ - ('ༀ', 'ཇ'), - ('ཉ', 'ཬ'), - ('\u{f71}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('྾', '࿌'), - ('࿎', '࿔'), - ('࿙', '࿚'), -]; - -pub const TIFINAGH: &'static [(char, char)] = - &[('ⴰ', 'ⵧ'), ('ⵯ', '⵰'), ('\u{2d7f}', '\u{2d7f}')]; - -pub const TIRHUTA: &'static [(char, char)] = &[('𑒀', '𑓇'), ('𑓐', '𑓙')]; - -pub const TODHRI: &'static [(char, char)] = &[('𐗀', '𐗳')]; - -pub const TOTO: &'static [(char, char)] = &[('𞊐', '\u{1e2ae}')]; - -pub const TULU_TIGALARI: &'static [(char, char)] = &[ - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '𑏊'), - ('𑏌', '𑏕'), - ('𑏗', '𑏘'), - ('\u{113e1}', '\u{113e2}'), -]; - -pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')]; - -pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ꘫ')]; - -pub const VITHKUQI: &'static [(char, char)] = &[ - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), -]; - -pub const WANCHO: &'static [(char, char)] = &[('𞋀', '𞋹'), ('𞋿', '𞋿')]; - -pub const WARANG_CITI: &'static [(char, char)] = &[('𑢠', '𑣲'), ('𑣿', '𑣿')]; - -pub const YEZIDI: &'static [(char, char)] = - &[('𐺀', '𐺩'), ('\u{10eab}', '𐺭'), ('𐺰', '𐺱')]; - -pub const YI: &'static [(char, char)] = &[('ꀀ', 'ꒌ'), ('꒐', '꓆')]; - -pub const ZANABAZAR_SQUARE: &'static [(char, char)] = &[('𑨀', '\u{11a47}')]; diff --git a/vendor/regex-syntax/src/unicode_tables/script_extension.rs b/vendor/regex-syntax/src/unicode_tables/script_extension.rs deleted file mode 100644 index e3f492e2..00000000 --- a/vendor/regex-syntax/src/unicode_tables/script_extension.rs +++ /dev/null @@ -1,1718 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate script-extension ucd-16.0.0 --chars -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ - ("Adlam", ADLAM), - ("Ahom", AHOM), - ("Anatolian_Hieroglyphs", ANATOLIAN_HIEROGLYPHS), - ("Arabic", ARABIC), - ("Armenian", ARMENIAN), - ("Avestan", AVESTAN), - ("Balinese", BALINESE), - ("Bamum", BAMUM), - ("Bassa_Vah", BASSA_VAH), - ("Batak", BATAK), - ("Bengali", BENGALI), - ("Bhaiksuki", BHAIKSUKI), - ("Bopomofo", BOPOMOFO), - ("Brahmi", BRAHMI), - ("Braille", BRAILLE), - ("Buginese", BUGINESE), - ("Buhid", BUHID), - ("Canadian_Aboriginal", CANADIAN_ABORIGINAL), - ("Carian", CARIAN), - ("Caucasian_Albanian", CAUCASIAN_ALBANIAN), - ("Chakma", CHAKMA), - ("Cham", CHAM), - ("Cherokee", CHEROKEE), - ("Chorasmian", CHORASMIAN), - ("Common", COMMON), - ("Coptic", COPTIC), - ("Cuneiform", CUNEIFORM), - ("Cypriot", CYPRIOT), - ("Cypro_Minoan", CYPRO_MINOAN), - ("Cyrillic", CYRILLIC), - ("Deseret", DESERET), - ("Devanagari", DEVANAGARI), - ("Dives_Akuru", DIVES_AKURU), - ("Dogra", DOGRA), - ("Duployan", DUPLOYAN), - ("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS), - ("Elbasan", ELBASAN), - ("Elymaic", ELYMAIC), - ("Ethiopic", ETHIOPIC), - ("Garay", GARAY), - ("Georgian", GEORGIAN), - ("Glagolitic", GLAGOLITIC), - ("Gothic", GOTHIC), - ("Grantha", GRANTHA), - ("Greek", GREEK), - ("Gujarati", GUJARATI), - ("Gunjala_Gondi", GUNJALA_GONDI), - ("Gurmukhi", GURMUKHI), - ("Gurung_Khema", GURUNG_KHEMA), - ("Han", HAN), - ("Hangul", HANGUL), - ("Hanifi_Rohingya", HANIFI_ROHINGYA), - ("Hanunoo", HANUNOO), - ("Hatran", HATRAN), - ("Hebrew", HEBREW), - ("Hiragana", HIRAGANA), - ("Imperial_Aramaic", IMPERIAL_ARAMAIC), - ("Inherited", INHERITED), - ("Inscriptional_Pahlavi", INSCRIPTIONAL_PAHLAVI), - ("Inscriptional_Parthian", INSCRIPTIONAL_PARTHIAN), - ("Javanese", JAVANESE), - ("Kaithi", KAITHI), - ("Kannada", KANNADA), - ("Katakana", KATAKANA), - ("Kawi", KAWI), - ("Kayah_Li", KAYAH_LI), - ("Kharoshthi", KHAROSHTHI), - ("Khitan_Small_Script", KHITAN_SMALL_SCRIPT), - ("Khmer", KHMER), - ("Khojki", KHOJKI), - ("Khudawadi", KHUDAWADI), - ("Kirat_Rai", KIRAT_RAI), - ("Lao", LAO), - ("Latin", LATIN), - ("Lepcha", LEPCHA), - ("Limbu", LIMBU), - ("Linear_A", LINEAR_A), - ("Linear_B", LINEAR_B), - ("Lisu", LISU), - ("Lycian", LYCIAN), - ("Lydian", LYDIAN), - ("Mahajani", MAHAJANI), - ("Makasar", MAKASAR), - ("Malayalam", MALAYALAM), - ("Mandaic", MANDAIC), - ("Manichaean", MANICHAEAN), - ("Marchen", MARCHEN), - ("Masaram_Gondi", MASARAM_GONDI), - ("Medefaidrin", MEDEFAIDRIN), - ("Meetei_Mayek", MEETEI_MAYEK), - ("Mende_Kikakui", MENDE_KIKAKUI), - ("Meroitic_Cursive", MEROITIC_CURSIVE), - ("Meroitic_Hieroglyphs", MEROITIC_HIEROGLYPHS), - ("Miao", MIAO), - ("Modi", MODI), - ("Mongolian", MONGOLIAN), - ("Mro", MRO), - ("Multani", MULTANI), - ("Myanmar", MYANMAR), - ("Nabataean", NABATAEAN), - ("Nag_Mundari", NAG_MUNDARI), - ("Nandinagari", NANDINAGARI), - ("New_Tai_Lue", NEW_TAI_LUE), - ("Newa", NEWA), - ("Nko", NKO), - ("Nushu", NUSHU), - ("Nyiakeng_Puachue_Hmong", NYIAKENG_PUACHUE_HMONG), - ("Ogham", OGHAM), - ("Ol_Chiki", OL_CHIKI), - ("Ol_Onal", OL_ONAL), - ("Old_Hungarian", OLD_HUNGARIAN), - ("Old_Italic", OLD_ITALIC), - ("Old_North_Arabian", OLD_NORTH_ARABIAN), - ("Old_Permic", OLD_PERMIC), - ("Old_Persian", OLD_PERSIAN), - ("Old_Sogdian", OLD_SOGDIAN), - ("Old_South_Arabian", OLD_SOUTH_ARABIAN), - ("Old_Turkic", OLD_TURKIC), - ("Old_Uyghur", OLD_UYGHUR), - ("Oriya", ORIYA), - ("Osage", OSAGE), - ("Osmanya", OSMANYA), - ("Pahawh_Hmong", PAHAWH_HMONG), - ("Palmyrene", PALMYRENE), - ("Pau_Cin_Hau", PAU_CIN_HAU), - ("Phags_Pa", PHAGS_PA), - ("Phoenician", PHOENICIAN), - ("Psalter_Pahlavi", PSALTER_PAHLAVI), - ("Rejang", REJANG), - ("Runic", RUNIC), - ("Samaritan", SAMARITAN), - ("Saurashtra", SAURASHTRA), - ("Sharada", SHARADA), - ("Shavian", SHAVIAN), - ("Siddham", SIDDHAM), - ("SignWriting", SIGNWRITING), - ("Sinhala", SINHALA), - ("Sogdian", SOGDIAN), - ("Sora_Sompeng", SORA_SOMPENG), - ("Soyombo", SOYOMBO), - ("Sundanese", SUNDANESE), - ("Sunuwar", SUNUWAR), - ("Syloti_Nagri", SYLOTI_NAGRI), - ("Syriac", SYRIAC), - ("Tagalog", TAGALOG), - ("Tagbanwa", TAGBANWA), - ("Tai_Le", TAI_LE), - ("Tai_Tham", TAI_THAM), - ("Tai_Viet", TAI_VIET), - ("Takri", TAKRI), - ("Tamil", TAMIL), - ("Tangsa", TANGSA), - ("Tangut", TANGUT), - ("Telugu", TELUGU), - ("Thaana", THAANA), - ("Thai", THAI), - ("Tibetan", TIBETAN), - ("Tifinagh", TIFINAGH), - ("Tirhuta", TIRHUTA), - ("Todhri", TODHRI), - ("Toto", TOTO), - ("Tulu_Tigalari", TULU_TIGALARI), - ("Ugaritic", UGARITIC), - ("Vai", VAI), - ("Vithkuqi", VITHKUQI), - ("Wancho", WANCHO), - ("Warang_Citi", WARANG_CITI), - ("Yezidi", YEZIDI), - ("Yi", YI), - ("Zanabazar_Square", ZANABAZAR_SQUARE), -]; - -pub const ADLAM: &'static [(char, char)] = &[ - ('؟', '؟'), - ('ـ', 'ـ'), - ('⁏', '⁏'), - ('⹁', '⹁'), - ('𞤀', '𞥋'), - ('𞥐', '𞥙'), - ('𞥞', '𞥟'), -]; - -pub const AHOM: &'static [(char, char)] = - &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑝆')]; - -pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[('𔐀', '𔙆')]; - -pub const ARABIC: &'static [(char, char)] = &[ - ('\u{600}', '\u{604}'), - ('؆', '\u{6dc}'), - ('۞', 'ۿ'), - ('ݐ', 'ݿ'), - ('ࡰ', 'ࢎ'), - ('\u{890}', '\u{891}'), - ('\u{897}', '\u{8e1}'), - ('\u{8e3}', '\u{8ff}'), - ('⁏', '⁏'), - ('⹁', '⹁'), - ('ﭐ', '﯂'), - ('ﯓ', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('﷏', '﷏'), - ('ﷰ', '﷿'), - ('ﹰ', 'ﹴ'), - ('ﹶ', 'ﻼ'), - ('\u{102e0}', '𐋻'), - ('𐹠', '𐹾'), - ('𐻂', '𐻄'), - ('\u{10efc}', '\u{10eff}'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('𞻰', '𞻱'), -]; - -pub const ARMENIAN: &'static [(char, char)] = - &[('\u{308}', '\u{308}'), ('Ա', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('ﬓ', 'ﬗ')]; - -pub const AVESTAN: &'static [(char, char)] = - &[('·', '·'), ('⸰', '⸱'), ('𐬀', '𐬵'), ('𐬹', '𐬿')]; - -pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭎', '᭿')]; - -pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')]; - -pub const BASSA_VAH: &'static [(char, char)] = - &[('𖫐', '𖫭'), ('\u{16af0}', '𖫵')]; - -pub const BATAK: &'static [(char, char)] = &[('ᯀ', '\u{1bf3}'), ('᯼', '᯿')]; - -pub const BENGALI: &'static [(char, char)] = &[ - ('ʼ', 'ʼ'), - ('\u{951}', '\u{952}'), - ('।', '॥'), - ('ঀ', 'ঃ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('\u{9bc}', '\u{9c4}'), - ('ে', 'ৈ'), - ('ো', 'ৎ'), - ('\u{9d7}', '\u{9d7}'), - ('ড়', 'ঢ়'), - ('য়', '\u{9e3}'), - ('০', '\u{9fe}'), - ('\u{1cd0}', '\u{1cd0}'), - ('\u{1cd2}', '\u{1cd2}'), - ('\u{1cd5}', '\u{1cd6}'), - ('\u{1cd8}', '\u{1cd8}'), - ('᳡', '᳡'), - ('ᳪ', 'ᳪ'), - ('\u{1ced}', '\u{1ced}'), - ('ᳲ', 'ᳲ'), - ('ᳵ', '᳷'), - ('\u{a8f1}', '\u{a8f1}'), -]; - -pub const BHAIKSUKI: &'static [(char, char)] = - &[('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱅'), ('𑱐', '𑱬')]; - -pub const BOPOMOFO: &'static [(char, char)] = &[ - ('ˇ', 'ˇ'), - ('ˉ', 'ˋ'), - ('˙', '˙'), - ('˪', '˫'), - ('、', '〃'), - ('〈', '】'), - ('〓', '〟'), - ('\u{302a}', '\u{302d}'), - ('〰', '〰'), - ('〷', '〷'), - ('・', '・'), - ('ㄅ', 'ㄯ'), - ('ㆠ', 'ㆿ'), - ('﹅', '﹆'), - ('。', '・'), -]; - -pub const BRAHMI: &'static [(char, char)] = - &[('𑀀', '𑁍'), ('𑁒', '𑁵'), ('\u{1107f}', '\u{1107f}')]; - -pub const BRAILLE: &'static [(char, char)] = &[('⠀', '⣿')]; - -pub const BUGINESE: &'static [(char, char)] = - &[('ᨀ', '\u{1a1b}'), ('᨞', '᨟'), ('ꧏ', 'ꧏ')]; - -pub const BUHID: &'static [(char, char)] = &[('᜵', '᜶'), ('ᝀ', '\u{1753}')]; - -pub const CANADIAN_ABORIGINAL: &'static [(char, char)] = - &[('᐀', 'ᙿ'), ('ᢰ', 'ᣵ'), ('𑪰', '𑪿')]; - -pub const CARIAN: &'static [(char, char)] = - &[('·', '·'), ('⁚', '⁚'), ('⁝', '⁝'), ('⸱', '⸱'), ('𐊠', '𐋐')]; - -pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = &[ - ('\u{304}', '\u{304}'), - ('\u{331}', '\u{331}'), - ('\u{35e}', '\u{35e}'), - ('𐔰', '𐕣'), - ('𐕯', '𐕯'), -]; - -pub const CHAKMA: &'static [(char, char)] = - &[('০', '৯'), ('၀', '၉'), ('\u{11100}', '\u{11134}'), ('𑄶', '𑅇')]; - -pub const CHAM: &'static [(char, char)] = - &[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')]; - -pub const CHEROKEE: &'static [(char, char)] = &[ - ('\u{300}', '\u{302}'), - ('\u{304}', '\u{304}'), - ('\u{30b}', '\u{30c}'), - ('\u{323}', '\u{324}'), - ('\u{330}', '\u{331}'), - ('Ꭰ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('ꭰ', 'ꮿ'), -]; - -pub const CHORASMIAN: &'static [(char, char)] = &[('𐾰', '𐿋')]; - -pub const COMMON: &'static [(char, char)] = &[ - ('\0', '@'), - ('[', '`'), - ('{', '©'), - ('«', '¶'), - ('¸', '¹'), - ('»', '¿'), - ('×', '×'), - ('÷', '÷'), - ('ʹ', 'ʻ'), - ('ʽ', 'ˆ'), - ('ˈ', 'ˈ'), - ('ˌ', 'ˌ'), - ('ˎ', '˖'), - ('˘', '˘'), - ('˚', '˟'), - ('˥', '˩'), - ('ˬ', '˿'), - (';', ';'), - ('΅', '΅'), - ('·', '·'), - ('\u{605}', '\u{605}'), - ('\u{6dd}', '\u{6dd}'), - ('\u{8e2}', '\u{8e2}'), - ('฿', '฿'), - ('࿕', '࿘'), - ('\u{2000}', '\u{200b}'), - ('\u{200e}', '\u{202e}'), - ('‰', '⁎'), - ('⁐', '⁙'), - ('⁛', '⁜'), - ('⁞', '\u{2064}'), - ('\u{2066}', '⁰'), - ('⁴', '⁾'), - ('₀', '₎'), - ('₠', '⃀'), - ('℀', '℥'), - ('℧', '℩'), - ('ℬ', 'ℱ'), - ('ℳ', '⅍'), - ('⅏', '⅟'), - ('↉', '↋'), - ('←', '␩'), - ('⑀', '⑊'), - ('①', '⟿'), - ('⤀', '⭳'), - ('⭶', '⮕'), - ('⮗', '⯿'), - ('⸀', '⸖'), - ('⸘', 'ⸯ'), - ('⸲', '⸻'), - ('⸽', '⹀'), - ('⹂', '⹂'), - ('⹄', '⹝'), - ('\u{3000}', '\u{3000}'), - ('〄', '〄'), - ('〒', '〒'), - ('〠', '〠'), - ('〶', '〶'), - ('㉈', '㉟'), - ('㉿', '㉿'), - ('㊱', '㊿'), - ('㋌', '㋏'), - ('㍱', '㍺'), - ('㎀', '㏟'), - ('㏿', '㏿'), - ('䷀', '䷿'), - ('꜈', '꜡'), - ('ꞈ', '꞊'), - ('꭛', '꭛'), - ('꭪', '꭫'), - ('︐', '︙'), - ('︰', '﹄'), - ('﹇', '﹒'), - ('﹔', '﹦'), - ('﹨', '﹫'), - ('\u{feff}', '\u{feff}'), - ('!', '@'), - ('[', '`'), - ('{', '⦆'), - ('¢', '₩'), - ('│', '○'), - ('\u{fff9}', '�'), - ('𐆐', '𐆜'), - ('𐇐', '𐇼'), - ('𜰀', '𜳹'), - ('𜴀', '𜺳'), - ('𜽐', '𜿃'), - ('𝀀', '𝃵'), - ('𝄀', '𝄦'), - ('𝄩', '\u{1d166}'), - ('𝅪', '\u{1d17a}'), - ('𝆃', '𝆄'), - ('𝆌', '𝆩'), - ('𝆮', '𝇪'), - ('𝋀', '𝋓'), - ('𝋠', '𝋳'), - ('𝌀', '𝍖'), - ('𝍲', '𝍸'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝟋'), - ('𝟎', '𝟿'), - ('𞱱', '𞲴'), - ('𞴁', '𞴽'), - ('🀀', '🀫'), - ('🀰', '🂓'), - ('🂠', '🂮'), - ('🂱', '🂿'), - ('🃁', '🃏'), - ('🃑', '🃵'), - ('🄀', '🆭'), - ('🇦', '🇿'), - ('🈁', '🈂'), - ('🈐', '🈻'), - ('🉀', '🉈'), - ('🉠', '🉥'), - ('🌀', '🛗'), - ('🛜', '🛬'), - ('🛰', '🛼'), - ('🜀', '🝶'), - ('🝻', '🟙'), - ('🟠', '🟫'), - ('🟰', '🟰'), - ('🠀', '🠋'), - ('🠐', '🡇'), - ('🡐', '🡙'), - ('🡠', '🢇'), - ('🢐', '🢭'), - ('🢰', '🢻'), - ('🣀', '🣁'), - ('🤀', '🩓'), - ('🩠', '🩭'), - ('🩰', '🩼'), - ('🪀', '🪉'), - ('🪏', '🫆'), - ('🫎', '🫜'), - ('🫟', '🫩'), - ('🫰', '🫸'), - ('🬀', '🮒'), - ('🮔', '🯹'), - ('\u{e0001}', '\u{e0001}'), - ('\u{e0020}', '\u{e007f}'), -]; - -pub const COPTIC: &'static [(char, char)] = &[ - ('·', '·'), - ('\u{300}', '\u{300}'), - ('\u{304}', '\u{305}'), - ('\u{307}', '\u{307}'), - ('ʹ', '͵'), - ('Ϣ', 'ϯ'), - ('Ⲁ', 'ⳳ'), - ('⳹', '⳿'), - ('⸗', '⸗'), - ('\u{102e0}', '𐋻'), -]; - -pub const CUNEIFORM: &'static [(char, char)] = - &[('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃')]; - -pub const CYPRIOT: &'static [(char, char)] = &[ - ('𐄀', '𐄂'), - ('𐄇', '𐄳'), - ('𐄷', '𐄿'), - ('𐠀', '𐠅'), - ('𐠈', '𐠈'), - ('𐠊', '𐠵'), - ('𐠷', '𐠸'), - ('𐠼', '𐠼'), - ('𐠿', '𐠿'), -]; - -pub const CYPRO_MINOAN: &'static [(char, char)] = &[('𐄀', '𐄁'), ('𒾐', '𒿲')]; - -pub const CYRILLIC: &'static [(char, char)] = &[ - ('ʼ', 'ʼ'), - ('\u{300}', '\u{302}'), - ('\u{304}', '\u{304}'), - ('\u{306}', '\u{306}'), - ('\u{308}', '\u{308}'), - ('\u{30b}', '\u{30b}'), - ('\u{311}', '\u{311}'), - ('Ѐ', 'ԯ'), - ('ᲀ', 'ᲊ'), - ('ᴫ', 'ᴫ'), - ('ᵸ', 'ᵸ'), - ('\u{1df8}', '\u{1df8}'), - ('\u{2de0}', '\u{2dff}'), - ('⹃', '⹃'), - ('Ꙁ', '\u{a69f}'), - ('\u{fe2e}', '\u{fe2f}'), - ('𞀰', '𞁭'), - ('\u{1e08f}', '\u{1e08f}'), -]; - -pub const DESERET: &'static [(char, char)] = &[('𐐀', '𐑏')]; - -pub const DEVANAGARI: &'static [(char, char)] = &[ - ('ʼ', 'ʼ'), - ('\u{900}', '\u{952}'), - ('\u{955}', 'ॿ'), - ('\u{1cd0}', 'ᳶ'), - ('\u{1cf8}', '\u{1cf9}'), - ('\u{20f0}', '\u{20f0}'), - ('꠰', '꠹'), - ('\u{a8e0}', '\u{a8ff}'), - ('𑬀', '𑬉'), -]; - -pub const DIVES_AKURU: &'static [(char, char)] = &[ - ('𑤀', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤵'), - ('𑤷', '𑤸'), - ('\u{1193b}', '𑥆'), - ('𑥐', '𑥙'), -]; - -pub const DOGRA: &'static [(char, char)] = - &[('।', '९'), ('꠰', '꠹'), ('𑠀', '𑠻')]; - -pub const DUPLOYAN: &'static [(char, char)] = &[ - ('·', '·'), - ('\u{307}', '\u{308}'), - ('\u{30a}', '\u{30a}'), - ('\u{323}', '\u{324}'), - ('⸼', '⸼'), - ('𛰀', '𛱪'), - ('𛱰', '𛱼'), - ('𛲀', '𛲈'), - ('𛲐', '𛲙'), - ('𛲜', '\u{1bca3}'), -]; - -pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = - &[('𓀀', '\u{13455}'), ('𓑠', '𔏺')]; - -pub const ELBASAN: &'static [(char, char)] = - &[('·', '·'), ('\u{305}', '\u{305}'), ('𐔀', '𐔧')]; - -pub const ELYMAIC: &'static [(char, char)] = &[('𐿠', '𐿶')]; - -pub const ETHIOPIC: &'static [(char, char)] = &[ - ('\u{30e}', '\u{30e}'), - ('ሀ', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዖ'), - ('ዘ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ፚ'), - ('\u{135d}', '፼'), - ('ᎀ', '᎙'), - ('ⶀ', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), -]; - -pub const GARAY: &'static [(char, char)] = &[ - ('،', '،'), - ('؛', '؛'), - ('؟', '؟'), - ('𐵀', '𐵥'), - ('\u{10d69}', '𐶅'), - ('𐶎', '𐶏'), -]; - -pub const GEORGIAN: &'static [(char, char)] = &[ - ('·', '·'), - ('։', '։'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ჿ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('⁚', '⁚'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('⸱', '⸱'), -]; - -pub const GLAGOLITIC: &'static [(char, char)] = &[ - ('·', '·'), - ('\u{303}', '\u{303}'), - ('\u{305}', '\u{305}'), - ('\u{484}', '\u{484}'), - ('\u{487}', '\u{487}'), - ('։', '։'), - ('჻', '჻'), - ('⁚', '⁚'), - ('Ⰰ', 'ⱟ'), - ('⹃', '⹃'), - ('\u{a66f}', '\u{a66f}'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), -]; - -pub const GOTHIC: &'static [(char, char)] = &[ - ('·', '·'), - ('\u{304}', '\u{305}'), - ('\u{308}', '\u{308}'), - ('\u{331}', '\u{331}'), - ('𐌰', '𐍊'), -]; - -pub const GRANTHA: &'static [(char, char)] = &[ - ('\u{951}', '\u{952}'), - ('।', '॥'), - ('௦', '௳'), - ('\u{1cd0}', '\u{1cd0}'), - ('\u{1cd2}', '᳓'), - ('ᳲ', '\u{1cf4}'), - ('\u{1cf8}', '\u{1cf9}'), - ('\u{20f0}', '\u{20f0}'), - ('\u{11300}', '𑌃'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('\u{1133b}', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '\u{1134d}'), - ('𑍐', '𑍐'), - ('\u{11357}', '\u{11357}'), - ('𑍝', '𑍣'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('𑿐', '𑿑'), - ('𑿓', '𑿓'), -]; - -pub const GREEK: &'static [(char, char)] = &[ - ('·', '·'), - ('\u{300}', '\u{301}'), - ('\u{304}', '\u{304}'), - ('\u{306}', '\u{306}'), - ('\u{308}', '\u{308}'), - ('\u{313}', '\u{313}'), - ('\u{342}', '\u{342}'), - ('\u{345}', '\u{345}'), - ('Ͱ', 'ͷ'), - ('ͺ', 'ͽ'), - ('Ϳ', 'Ϳ'), - ('΄', '΄'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ϡ'), - ('ϰ', 'Ͽ'), - ('ᴦ', 'ᴪ'), - ('ᵝ', 'ᵡ'), - ('ᵦ', 'ᵪ'), - ('ᶿ', '\u{1dc1}'), - ('ἀ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ῄ'), - ('ῆ', 'ΐ'), - ('ῖ', 'Ί'), - ('῝', '`'), - ('ῲ', 'ῴ'), - ('ῶ', '῾'), - ('⁝', '⁝'), - ('Ω', 'Ω'), - ('ꭥ', 'ꭥ'), - ('𐅀', '𐆎'), - ('𐆠', '𐆠'), - ('𝈀', '𝉅'), -]; - -pub const GUJARATI: &'static [(char, char)] = &[ - ('\u{951}', '\u{952}'), - ('।', '॥'), - ('\u{a81}', 'ઃ'), - ('અ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('\u{abc}', '\u{ac5}'), - ('\u{ac7}', 'ૉ'), - ('ો', '\u{acd}'), - ('ૐ', 'ૐ'), - ('ૠ', '\u{ae3}'), - ('૦', '૱'), - ('ૹ', '\u{aff}'), - ('꠰', '꠹'), -]; - -pub const GUNJALA_GONDI: &'static [(char, char)] = &[ - ('·', '·'), - ('।', '॥'), - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶎'), - ('\u{11d90}', '\u{11d91}'), - ('𑶓', '𑶘'), - ('𑶠', '𑶩'), -]; - -pub const GURMUKHI: &'static [(char, char)] = &[ - ('\u{951}', '\u{952}'), - ('।', '॥'), - ('\u{a01}', 'ਃ'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('\u{a3c}', '\u{a3c}'), - ('ਾ', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('\u{a51}', '\u{a51}'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('੦', '੶'), - ('꠰', '꠹'), -]; - -pub const GURUNG_KHEMA: &'static [(char, char)] = &[('॥', '॥'), ('𖄀', '𖄹')]; - -pub const HAN: &'static [(char, char)] = &[ - ('·', '·'), - ('⺀', '⺙'), - ('⺛', '⻳'), - ('⼀', '⿕'), - ('⿰', '⿿'), - ('、', '〃'), - ('々', '】'), - ('〓', '〟'), - ('〡', '\u{302d}'), - ('〰', '〰'), - ('〷', '〿'), - ('・', '・'), - ('㆐', '㆟'), - ('㇀', '㇥'), - ('㇯', '㇯'), - ('㈠', '㉇'), - ('㊀', '㊰'), - ('㋀', '㋋'), - ('㋿', '㋿'), - ('㍘', '㍰'), - ('㍻', '㍿'), - ('㏠', '㏾'), - ('㐀', '䶿'), - ('一', '鿿'), - ('꜀', '꜇'), - ('豈', '舘'), - ('並', '龎'), - ('﹅', '﹆'), - ('。', '・'), - ('𖿢', '𖿣'), - ('\u{16ff0}', '\u{16ff1}'), - ('𝍠', '𝍱'), - ('🉐', '🉑'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('丽', '𪘀'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), -]; - -pub const HANGUL: &'static [(char, char)] = &[ - ('ᄀ', 'ᇿ'), - ('、', '〃'), - ('〈', '】'), - ('〓', '〟'), - ('\u{302e}', '〰'), - ('〷', '〷'), - ('・', '・'), - ('ㄱ', 'ㆎ'), - ('㈀', '㈞'), - ('㉠', '㉾'), - ('ꥠ', 'ꥼ'), - ('가', '힣'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('﹅', '﹆'), - ('。', '・'), - ('ᅠ', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), -]; - -pub const HANIFI_ROHINGYA: &'static [(char, char)] = &[ - ('،', '،'), - ('؛', '؛'), - ('؟', '؟'), - ('ـ', 'ـ'), - ('۔', '۔'), - ('𐴀', '\u{10d27}'), - ('𐴰', '𐴹'), -]; - -pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', '᜶')]; - -pub const HATRAN: &'static [(char, char)] = - &[('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿')]; - -pub const HEBREW: &'static [(char, char)] = &[ - ('\u{307}', '\u{308}'), - ('\u{591}', '\u{5c7}'), - ('א', 'ת'), - ('ׯ', '״'), - ('יִ', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', 'ﭏ'), -]; - -pub const HIRAGANA: &'static [(char, char)] = &[ - ('、', '〃'), - ('〈', '】'), - ('〓', '〟'), - ('〰', '〵'), - ('〷', '〷'), - ('〼', '〽'), - ('ぁ', 'ゖ'), - ('\u{3099}', '゠'), - ('・', 'ー'), - ('﹅', '﹆'), - ('。', '・'), - ('ー', 'ー'), - ('\u{ff9e}', '\u{ff9f}'), - ('𛀁', '𛄟'), - ('𛄲', '𛄲'), - ('𛅐', '𛅒'), - ('🈀', '🈀'), -]; - -pub const IMPERIAL_ARAMAIC: &'static [(char, char)] = - &[('𐡀', '𐡕'), ('𐡗', '𐡟')]; - -pub const INHERITED: &'static [(char, char)] = &[ - ('\u{30f}', '\u{30f}'), - ('\u{312}', '\u{312}'), - ('\u{314}', '\u{31f}'), - ('\u{321}', '\u{322}'), - ('\u{326}', '\u{32c}'), - ('\u{32f}', '\u{32f}'), - ('\u{332}', '\u{341}'), - ('\u{343}', '\u{344}'), - ('\u{346}', '\u{357}'), - ('\u{359}', '\u{35d}'), - ('\u{35f}', '\u{362}'), - ('\u{953}', '\u{954}'), - ('\u{1ab0}', '\u{1ace}'), - ('\u{1dc2}', '\u{1df7}'), - ('\u{1df9}', '\u{1df9}'), - ('\u{1dfb}', '\u{1dff}'), - ('\u{200c}', '\u{200d}'), - ('\u{20d0}', '\u{20ef}'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{fe20}', '\u{fe2d}'), - ('\u{101fd}', '\u{101fd}'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d167}', '\u{1d169}'), - ('\u{1d17b}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const INSCRIPTIONAL_PAHLAVI: &'static [(char, char)] = - &[('𐭠', '𐭲'), ('𐭸', '𐭿')]; - -pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] = - &[('𐭀', '𐭕'), ('𐭘', '𐭟')]; - -pub const JAVANESE: &'static [(char, char)] = - &[('\u{a980}', '꧍'), ('ꧏ', '꧙'), ('꧞', '꧟')]; - -pub const KAITHI: &'static [(char, char)] = &[ - ('०', '९'), - ('⸱', '⸱'), - ('꠰', '꠹'), - ('\u{11080}', '\u{110c2}'), - ('\u{110cd}', '\u{110cd}'), -]; - -pub const KANNADA: &'static [(char, char)] = &[ - ('\u{951}', '\u{952}'), - ('।', '॥'), - ('ಀ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('\u{cbc}', 'ೄ'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccd}'), - ('\u{cd5}', '\u{cd6}'), - ('ೝ', 'ೞ'), - ('ೠ', '\u{ce3}'), - ('೦', '೯'), - ('ೱ', 'ೳ'), - ('\u{1cd0}', '\u{1cd0}'), - ('\u{1cd2}', '᳓'), - ('\u{1cda}', '\u{1cda}'), - ('ᳲ', 'ᳲ'), - ('\u{1cf4}', '\u{1cf4}'), - ('꠰', '꠵'), -]; - -pub const KATAKANA: &'static [(char, char)] = &[ - ('\u{305}', '\u{305}'), - ('\u{323}', '\u{323}'), - ('、', '〃'), - ('〈', '】'), - ('〓', '〟'), - ('〰', '〵'), - ('〷', '〷'), - ('〼', '〽'), - ('\u{3099}', '゜'), - ('゠', 'ヿ'), - ('ㇰ', 'ㇿ'), - ('㋐', '㋾'), - ('㌀', '㍗'), - ('﹅', '﹆'), - ('。', '\u{ff9f}'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𛀀', '𛀀'), - ('𛄠', '𛄢'), - ('𛅕', '𛅕'), - ('𛅤', '𛅧'), -]; - -pub const KAWI: &'static [(char, char)] = - &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f5a}')]; - -pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '꤯')]; - -pub const KHAROSHTHI: &'static [(char, char)] = &[ - ('𐨀', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨵'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '𐩈'), - ('𐩐', '𐩘'), -]; - -pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] = - &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕'), ('𘳿', '𘳿')]; - -pub const KHMER: &'static [(char, char)] = - &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')]; - -pub const KHOJKI: &'static [(char, char)] = - &[('૦', '૯'), ('꠰', '꠹'), ('𑈀', '𑈑'), ('𑈓', '\u{11241}')]; - -pub const KHUDAWADI: &'static [(char, char)] = - &[('।', '॥'), ('꠰', '꠹'), ('𑊰', '\u{112ea}'), ('𑋰', '𑋹')]; - -pub const KIRAT_RAI: &'static [(char, char)] = &[('𖵀', '𖵹')]; - -pub const LAO: &'static [(char, char)] = &[ - ('ກ', 'ຂ'), - ('ຄ', 'ຄ'), - ('ຆ', 'ຊ'), - ('ຌ', 'ຣ'), - ('ລ', 'ລ'), - ('ວ', 'ຽ'), - ('ເ', 'ໄ'), - ('ໆ', 'ໆ'), - ('\u{ec8}', '\u{ece}'), - ('໐', '໙'), - ('ໜ', 'ໟ'), -]; - -pub const LATIN: &'static [(char, char)] = &[ - ('A', 'Z'), - ('a', 'z'), - ('ª', 'ª'), - ('·', '·'), - ('º', 'º'), - ('À', 'Ö'), - ('Ø', 'ö'), - ('ø', 'ʸ'), - ('ʼ', 'ʼ'), - ('ˇ', 'ˇ'), - ('ˉ', 'ˋ'), - ('ˍ', 'ˍ'), - ('˗', '˗'), - ('˙', '˙'), - ('ˠ', 'ˤ'), - ('\u{300}', '\u{30e}'), - ('\u{310}', '\u{311}'), - ('\u{313}', '\u{313}'), - ('\u{320}', '\u{320}'), - ('\u{323}', '\u{325}'), - ('\u{32d}', '\u{32e}'), - ('\u{330}', '\u{331}'), - ('\u{358}', '\u{358}'), - ('\u{35e}', '\u{35e}'), - ('\u{363}', '\u{36f}'), - ('\u{485}', '\u{486}'), - ('\u{951}', '\u{952}'), - ('჻', '჻'), - ('ᴀ', 'ᴥ'), - ('ᴬ', 'ᵜ'), - ('ᵢ', 'ᵥ'), - ('ᵫ', 'ᵷ'), - ('ᵹ', 'ᶾ'), - ('\u{1df8}', '\u{1df8}'), - ('Ḁ', 'ỿ'), - ('\u{202f}', '\u{202f}'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('\u{20f0}', '\u{20f0}'), - ('K', 'Å'), - ('Ⅎ', 'Ⅎ'), - ('ⅎ', 'ⅎ'), - ('Ⅰ', 'ↈ'), - ('Ⱡ', 'Ɀ'), - ('⸗', '⸗'), - ('꜀', '꜇'), - ('Ꜣ', 'ꞇ'), - ('Ꞌ', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'Ƛ'), - ('ꟲ', 'ꟿ'), - ('꤮', '꤮'), - ('ꬰ', 'ꭚ'), - ('ꭜ', 'ꭤ'), - ('ꭦ', 'ꭩ'), - ('ff', 'st'), - ('A', 'Z'), - ('a', 'z'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𝼀', '𝼞'), - ('𝼥', '𝼪'), -]; - -pub const LEPCHA: &'static [(char, char)] = - &[('ᰀ', '\u{1c37}'), ('᰻', '᱉'), ('ᱍ', 'ᱏ')]; - -pub const LIMBU: &'static [(char, char)] = &[ - ('॥', '॥'), - ('ᤀ', 'ᤞ'), - ('\u{1920}', 'ᤫ'), - ('ᤰ', '\u{193b}'), - ('᥀', '᥀'), - ('᥄', '᥏'), -]; - -pub const LINEAR_A: &'static [(char, char)] = - &[('𐄇', '𐄳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧')]; - -pub const LINEAR_B: &'static [(char, char)] = &[ - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), - ('𐄀', '𐄂'), - ('𐄇', '𐄳'), - ('𐄷', '𐄿'), -]; - -pub const LISU: &'static [(char, char)] = - &[('ʼ', 'ʼ'), ('ˍ', 'ˍ'), ('《', '》'), ('ꓐ', '꓿'), ('𑾰', '𑾰')]; - -pub const LYCIAN: &'static [(char, char)] = &[('⁚', '⁚'), ('𐊀', '𐊜')]; - -pub const LYDIAN: &'static [(char, char)] = - &[('·', '·'), ('⸱', '⸱'), ('𐤠', '𐤹'), ('𐤿', '𐤿')]; - -pub const MAHAJANI: &'static [(char, char)] = - &[('·', '·'), ('।', '९'), ('꠰', '꠹'), ('𑅐', '𑅶')]; - -pub const MAKASAR: &'static [(char, char)] = &[('𑻠', '𑻸')]; - -pub const MALAYALAM: &'static [(char, char)] = &[ - ('\u{951}', '\u{952}'), - ('।', '॥'), - ('\u{d00}', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', '\u{d44}'), - ('െ', 'ൈ'), - ('ൊ', '൏'), - ('ൔ', '\u{d63}'), - ('൦', 'ൿ'), - ('\u{1cda}', '\u{1cda}'), - ('ᳲ', 'ᳲ'), - ('꠰', '꠲'), -]; - -pub const MANDAIC: &'static [(char, char)] = - &[('ـ', 'ـ'), ('ࡀ', '\u{85b}'), ('࡞', '࡞')]; - -pub const MANICHAEAN: &'static [(char, char)] = - &[('ـ', 'ـ'), ('𐫀', '\u{10ae6}'), ('𐫫', '𐫶')]; - -pub const MARCHEN: &'static [(char, char)] = - &[('𑱰', '𑲏'), ('\u{11c92}', '\u{11ca7}'), ('𑲩', '\u{11cb6}')]; - -pub const MASARAM_GONDI: &'static [(char, char)] = &[ - ('।', '॥'), - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d47}'), - ('𑵐', '𑵙'), -]; - -pub const MEDEFAIDRIN: &'static [(char, char)] = &[('𖹀', '𖺚')]; - -pub const MEETEI_MAYEK: &'static [(char, char)] = - &[('ꫠ', '\u{aaf6}'), ('ꯀ', '\u{abed}'), ('꯰', '꯹')]; - -pub const MENDE_KIKAKUI: &'static [(char, char)] = - &[('𞠀', '𞣄'), ('𞣇', '\u{1e8d6}')]; - -pub const MEROITIC_CURSIVE: &'static [(char, char)] = - &[('𐦠', '𐦷'), ('𐦼', '𐧏'), ('𐧒', '𐧿')]; - -pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = - &[('⁝', '⁝'), ('𐦀', '𐦟')]; - -pub const MIAO: &'static [(char, char)] = - &[('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟')]; - -pub const MODI: &'static [(char, char)] = - &[('꠰', '꠹'), ('𑘀', '𑙄'), ('𑙐', '𑙙')]; - -pub const MONGOLIAN: &'static [(char, char)] = &[ - ('᠀', '᠙'), - ('ᠠ', 'ᡸ'), - ('ᢀ', 'ᢪ'), - ('\u{202f}', '\u{202f}'), - ('、', '。'), - ('〈', '》'), - ('𑙠', '𑙬'), -]; - -pub const MRO: &'static [(char, char)] = &[('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯')]; - -pub const MULTANI: &'static [(char, char)] = - &[('੦', '੯'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')]; - -pub const MYANMAR: &'static [(char, char)] = - &[('က', '႟'), ('꤮', '꤮'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ'), ('𑛐', '𑛣')]; - -pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')]; - -pub const NAG_MUNDARI: &'static [(char, char)] = &[('𞓐', '𞓹')]; - -pub const NANDINAGARI: &'static [(char, char)] = &[ - ('।', '॥'), - ('೦', '೯'), - ('ᳩ', 'ᳩ'), - ('ᳲ', 'ᳲ'), - ('ᳺ', 'ᳺ'), - ('꠰', '꠵'), - ('𑦠', '𑦧'), - ('𑦪', '\u{119d7}'), - ('\u{119da}', '𑧤'), -]; - -pub const NEW_TAI_LUE: &'static [(char, char)] = - &[('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟')]; - -pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '𑑡')]; - -pub const NKO: &'static [(char, char)] = &[ - ('،', '،'), - ('؛', '؛'), - ('؟', '؟'), - ('߀', 'ߺ'), - ('\u{7fd}', '߿'), - ('﴾', '﴿'), -]; - -pub const NUSHU: &'static [(char, char)] = &[('𖿡', '𖿡'), ('𛅰', '𛋻')]; - -pub const NYIAKENG_PUACHUE_HMONG: &'static [(char, char)] = - &[('𞄀', '𞄬'), ('\u{1e130}', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅏')]; - -pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')]; - -pub const OL_CHIKI: &'static [(char, char)] = &[('᱐', '᱿')]; - -pub const OL_ONAL: &'static [(char, char)] = - &[('।', '॥'), ('𞗐', '𞗺'), ('𞗿', '𞗿')]; - -pub const OLD_HUNGARIAN: &'static [(char, char)] = &[ - ('⁚', '⁚'), - ('⁝', '⁝'), - ('⸱', '⸱'), - ('⹁', '⹁'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐳺', '𐳿'), -]; - -pub const OLD_ITALIC: &'static [(char, char)] = &[('𐌀', '𐌣'), ('𐌭', '𐌯')]; - -pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[('𐪀', '𐪟')]; - -pub const OLD_PERMIC: &'static [(char, char)] = &[ - ('·', '·'), - ('\u{300}', '\u{300}'), - ('\u{306}', '\u{308}'), - ('\u{313}', '\u{313}'), - ('\u{483}', '\u{483}'), - ('𐍐', '\u{1037a}'), -]; - -pub const OLD_PERSIAN: &'static [(char, char)] = &[('𐎠', '𐏃'), ('𐏈', '𐏕')]; - -pub const OLD_SOGDIAN: &'static [(char, char)] = &[('𐼀', '𐼧')]; - -pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('𐩠', '𐩿')]; - -pub const OLD_TURKIC: &'static [(char, char)] = - &[('⁚', '⁚'), ('⸰', '⸰'), ('𐰀', '𐱈')]; - -pub const OLD_UYGHUR: &'static [(char, char)] = - &[('ـ', 'ـ'), ('𐫲', '𐫲'), ('𐽰', '𐾉')]; - -pub const ORIYA: &'static [(char, char)] = &[ - ('\u{951}', '\u{952}'), - ('।', '॥'), - ('\u{b01}', 'ଃ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('\u{b3c}', '\u{b44}'), - ('େ', 'ୈ'), - ('ୋ', '\u{b4d}'), - ('\u{b55}', '\u{b57}'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', '\u{b63}'), - ('୦', '୷'), - ('\u{1cda}', '\u{1cda}'), - ('ᳲ', 'ᳲ'), -]; - -pub const OSAGE: &'static [(char, char)] = &[ - ('\u{301}', '\u{301}'), - ('\u{304}', '\u{304}'), - ('\u{30b}', '\u{30b}'), - ('\u{358}', '\u{358}'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), -]; - -pub const OSMANYA: &'static [(char, char)] = &[('𐒀', '𐒝'), ('𐒠', '𐒩')]; - -pub const PAHAWH_HMONG: &'static [(char, char)] = - &[('𖬀', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'), ('𖭽', '𖮏')]; - -pub const PALMYRENE: &'static [(char, char)] = &[('𐡠', '𐡿')]; - -pub const PAU_CIN_HAU: &'static [(char, char)] = &[('𑫀', '𑫸')]; - -pub const PHAGS_PA: &'static [(char, char)] = &[ - ('᠂', '᠃'), - ('᠅', '᠅'), - ('\u{202f}', '\u{202f}'), - ('。', '。'), - ('ꡀ', '꡷'), -]; - -pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', '𐤟')]; - -pub const PSALTER_PAHLAVI: &'static [(char, char)] = - &[('ـ', 'ـ'), ('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯')]; - -pub const REJANG: &'static [(char, char)] = &[('ꤰ', '\u{a953}'), ('꥟', '꥟')]; - -pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛸ')]; - -pub const SAMARITAN: &'static [(char, char)] = - &[('ࠀ', '\u{82d}'), ('࠰', '࠾'), ('⸱', '⸱')]; - -pub const SAURASHTRA: &'static [(char, char)] = - &[('ꢀ', '\u{a8c5}'), ('꣎', '꣙')]; - -pub const SHARADA: &'static [(char, char)] = &[ - ('\u{951}', '\u{951}'), - ('\u{1cd7}', '\u{1cd7}'), - ('\u{1cd9}', '\u{1cd9}'), - ('\u{1cdc}', '\u{1cdd}'), - ('\u{1ce0}', '\u{1ce0}'), - ('꠰', '꠵'), - ('꠸', '꠸'), - ('\u{11180}', '𑇟'), -]; - -pub const SHAVIAN: &'static [(char, char)] = &[('·', '·'), ('𐑐', '𐑿')]; - -pub const SIDDHAM: &'static [(char, char)] = - &[('𑖀', '\u{115b5}'), ('𑖸', '\u{115dd}')]; - -pub const SIGNWRITING: &'static [(char, char)] = - &[('𝠀', '𝪋'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}')]; - -pub const SINHALA: &'static [(char, char)] = &[ - ('।', '॥'), - ('\u{d81}', 'ඃ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('\u{dca}', '\u{dca}'), - ('\u{dcf}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('ෘ', '\u{ddf}'), - ('෦', '෯'), - ('ෲ', '෴'), - ('ᳲ', 'ᳲ'), - ('𑇡', '𑇴'), -]; - -pub const SOGDIAN: &'static [(char, char)] = &[('ـ', 'ـ'), ('𐼰', '𐽙')]; - -pub const SORA_SOMPENG: &'static [(char, char)] = &[('𑃐', '𑃨'), ('𑃰', '𑃹')]; - -pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', '𑪢')]; - -pub const SUNDANESE: &'static [(char, char)] = - &[('\u{1b80}', 'ᮿ'), ('᳀', '᳇')]; - -pub const SUNUWAR: &'static [(char, char)] = &[ - ('\u{300}', '\u{301}'), - ('\u{303}', '\u{303}'), - ('\u{30d}', '\u{30d}'), - ('\u{310}', '\u{310}'), - ('\u{32d}', '\u{32d}'), - ('\u{331}', '\u{331}'), - ('𑯀', '𑯡'), - ('𑯰', '𑯹'), -]; - -pub const SYLOTI_NAGRI: &'static [(char, char)] = - &[('।', '॥'), ('০', '৯'), ('ꠀ', '\u{a82c}')]; - -pub const SYRIAC: &'static [(char, char)] = &[ - ('\u{303}', '\u{304}'), - ('\u{307}', '\u{308}'), - ('\u{30a}', '\u{30a}'), - ('\u{320}', '\u{320}'), - ('\u{323}', '\u{325}'), - ('\u{32d}', '\u{32e}'), - ('\u{330}', '\u{330}'), - ('،', '،'), - ('؛', '\u{61c}'), - ('؟', '؟'), - ('ـ', 'ـ'), - ('\u{64b}', '\u{655}'), - ('\u{670}', '\u{670}'), - ('܀', '܍'), - ('\u{70f}', '\u{74a}'), - ('ݍ', 'ݏ'), - ('ࡠ', 'ࡪ'), - ('\u{1df8}', '\u{1df8}'), - ('\u{1dfa}', '\u{1dfa}'), -]; - -pub const TAGALOG: &'static [(char, char)] = - &[('ᜀ', '\u{1715}'), ('ᜟ', 'ᜟ'), ('᜵', '᜶')]; - -pub const TAGBANWA: &'static [(char, char)] = - &[('᜵', '᜶'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')]; - -pub const TAI_LE: &'static [(char, char)] = &[ - ('\u{300}', '\u{301}'), - ('\u{307}', '\u{308}'), - ('\u{30c}', '\u{30c}'), - ('၀', '၉'), - ('ᥐ', 'ᥭ'), - ('ᥰ', 'ᥴ'), -]; - -pub const TAI_THAM: &'static [(char, char)] = &[ - ('ᨠ', '\u{1a5e}'), - ('\u{1a60}', '\u{1a7c}'), - ('\u{1a7f}', '᪉'), - ('᪐', '᪙'), - ('᪠', '᪭'), -]; - -pub const TAI_VIET: &'static [(char, char)] = &[('ꪀ', 'ꫂ'), ('ꫛ', '꫟')]; - -pub const TAKRI: &'static [(char, char)] = - &[('।', '॥'), ('꠰', '꠹'), ('𑚀', '𑚹'), ('𑛀', '𑛉')]; - -pub const TAMIL: &'static [(char, char)] = &[ - ('\u{951}', '\u{952}'), - ('।', '॥'), - ('\u{b82}', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'ஹ'), - ('\u{bbe}', 'ூ'), - ('ெ', 'ை'), - ('ொ', '\u{bcd}'), - ('ௐ', 'ௐ'), - ('\u{bd7}', '\u{bd7}'), - ('௦', '௺'), - ('\u{1cda}', '\u{1cda}'), - ('ꣳ', 'ꣳ'), - ('\u{11301}', '\u{11301}'), - ('𑌃', '𑌃'), - ('\u{1133b}', '\u{1133c}'), - ('𑿀', '𑿱'), - ('𑿿', '𑿿'), -]; - -pub const TANGSA: &'static [(char, char)] = &[('𖩰', '𖪾'), ('𖫀', '𖫉')]; - -pub const TANGUT: &'static [(char, char)] = &[ - ('⿰', '⿿'), - ('㇯', '㇯'), - ('𖿠', '𖿠'), - ('𗀀', '𘟷'), - ('𘠀', '𘫿'), - ('𘴀', '𘴈'), -]; - -pub const TELUGU: &'static [(char, char)] = &[ - ('\u{951}', '\u{952}'), - ('।', '॥'), - ('\u{c00}', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'హ'), - ('\u{c3c}', 'ౄ'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('ౘ', 'ౚ'), - ('ౝ', 'ౝ'), - ('ౠ', '\u{c63}'), - ('౦', '౯'), - ('౷', '౿'), - ('\u{1cda}', '\u{1cda}'), - ('ᳲ', 'ᳲ'), -]; - -pub const THAANA: &'static [(char, char)] = &[ - ('،', '،'), - ('؛', '\u{61c}'), - ('؟', '؟'), - ('٠', '٩'), - ('ހ', 'ޱ'), - ('ﷲ', 'ﷲ'), - ('﷽', '﷽'), -]; - -pub const THAI: &'static [(char, char)] = &[ - ('ʼ', 'ʼ'), - ('˗', '˗'), - ('\u{303}', '\u{303}'), - ('\u{331}', '\u{331}'), - ('ก', '\u{e3a}'), - ('เ', '๛'), -]; - -pub const TIBETAN: &'static [(char, char)] = &[ - ('ༀ', 'ཇ'), - ('ཉ', 'ཬ'), - ('\u{f71}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('྾', '࿌'), - ('࿎', '࿔'), - ('࿙', '࿚'), - ('〈', '》'), -]; - -pub const TIFINAGH: &'static [(char, char)] = &[ - ('\u{302}', '\u{302}'), - ('\u{304}', '\u{304}'), - ('\u{307}', '\u{307}'), - ('\u{309}', '\u{309}'), - ('ⴰ', 'ⵧ'), - ('ⵯ', '⵰'), - ('\u{2d7f}', '\u{2d7f}'), -]; - -pub const TIRHUTA: &'static [(char, char)] = &[ - ('\u{951}', '\u{952}'), - ('।', '॥'), - ('ᳲ', 'ᳲ'), - ('꠰', '꠹'), - ('𑒀', '𑓇'), - ('𑓐', '𑓙'), -]; - -pub const TODHRI: &'static [(char, char)] = &[ - ('\u{301}', '\u{301}'), - ('\u{304}', '\u{304}'), - ('\u{307}', '\u{307}'), - ('\u{311}', '\u{311}'), - ('\u{313}', '\u{313}'), - ('\u{35e}', '\u{35e}'), - ('𐗀', '𐗳'), -]; - -pub const TOTO: &'static [(char, char)] = &[('ʼ', 'ʼ'), ('𞊐', '\u{1e2ae}')]; - -pub const TULU_TIGALARI: &'static [(char, char)] = &[ - ('೦', '೯'), - ('ᳲ', 'ᳲ'), - ('\u{1cf4}', '\u{1cf4}'), - ('꠰', '꠵'), - ('\u{a8f1}', '\u{a8f1}'), - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '𑏊'), - ('𑏌', '𑏕'), - ('𑏗', '𑏘'), - ('\u{113e1}', '\u{113e2}'), -]; - -pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')]; - -pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ꘫ')]; - -pub const VITHKUQI: &'static [(char, char)] = &[ - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), -]; - -pub const WANCHO: &'static [(char, char)] = &[('𞋀', '𞋹'), ('𞋿', '𞋿')]; - -pub const WARANG_CITI: &'static [(char, char)] = &[('𑢠', '𑣲'), ('𑣿', '𑣿')]; - -pub const YEZIDI: &'static [(char, char)] = &[ - ('،', '،'), - ('؛', '؛'), - ('؟', '؟'), - ('٠', '٩'), - ('𐺀', '𐺩'), - ('\u{10eab}', '𐺭'), - ('𐺰', '𐺱'), -]; - -pub const YI: &'static [(char, char)] = &[ - ('、', '。'), - ('〈', '】'), - ('〔', '〛'), - ('・', '・'), - ('ꀀ', 'ꒌ'), - ('꒐', '꓆'), - ('。', '・'), -]; - -pub const ZANABAZAR_SQUARE: &'static [(char, char)] = &[('𑨀', '\u{11a47}')]; diff --git a/vendor/regex-syntax/src/unicode_tables/sentence_break.rs b/vendor/regex-syntax/src/unicode_tables/sentence_break.rs deleted file mode 100644 index af1c5bea..00000000 --- a/vendor/regex-syntax/src/unicode_tables/sentence_break.rs +++ /dev/null @@ -1,2530 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate sentence-break ucd-16.0.0 --chars -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ - ("ATerm", ATERM), - ("CR", CR), - ("Close", CLOSE), - ("Extend", EXTEND), - ("Format", FORMAT), - ("LF", LF), - ("Lower", LOWER), - ("Numeric", NUMERIC), - ("OLetter", OLETTER), - ("SContinue", SCONTINUE), - ("STerm", STERM), - ("Sep", SEP), - ("Sp", SP), - ("Upper", UPPER), -]; - -pub const ATERM: &'static [(char, char)] = - &[('.', '.'), ('․', '․'), ('﹒', '﹒'), ('.', '.')]; - -pub const CR: &'static [(char, char)] = &[('\r', '\r')]; - -pub const CLOSE: &'static [(char, char)] = &[ - ('"', '"'), - ('\'', ')'), - ('[', '['), - (']', ']'), - ('{', '{'), - ('}', '}'), - ('«', '«'), - ('»', '»'), - ('༺', '༽'), - ('᚛', '᚜'), - ('‘', '‟'), - ('‹', '›'), - ('⁅', '⁆'), - ('⁽', '⁾'), - ('₍', '₎'), - ('⌈', '⌋'), - ('〈', '〉'), - ('❛', '❠'), - ('❨', '❵'), - ('⟅', '⟆'), - ('⟦', '⟯'), - ('⦃', '⦘'), - ('⧘', '⧛'), - ('⧼', '⧽'), - ('⸀', '⸍'), - ('⸜', '⸝'), - ('⸠', '⸩'), - ('⹂', '⹂'), - ('⹕', '⹜'), - ('〈', '】'), - ('〔', '〛'), - ('〝', '〟'), - ('﴾', '﴿'), - ('︗', '︘'), - ('︵', '﹄'), - ('﹇', '﹈'), - ('﹙', '﹞'), - ('(', ')'), - ('[', '['), - (']', ']'), - ('{', '{'), - ('}', '}'), - ('⦅', '⦆'), - ('「', '」'), - ('🙶', '🙸'), -]; - -pub const EXTEND: &'static [(char, char)] = &[ - ('\u{300}', '\u{36f}'), - ('\u{483}', '\u{489}'), - ('\u{591}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c5}'), - ('\u{5c7}', '\u{5c7}'), - ('\u{610}', '\u{61a}'), - ('\u{64b}', '\u{65f}'), - ('\u{670}', '\u{670}'), - ('\u{6d6}', '\u{6dc}'), - ('\u{6df}', '\u{6e4}'), - ('\u{6e7}', '\u{6e8}'), - ('\u{6ea}', '\u{6ed}'), - ('\u{711}', '\u{711}'), - ('\u{730}', '\u{74a}'), - ('\u{7a6}', '\u{7b0}'), - ('\u{7eb}', '\u{7f3}'), - ('\u{7fd}', '\u{7fd}'), - ('\u{816}', '\u{819}'), - ('\u{81b}', '\u{823}'), - ('\u{825}', '\u{827}'), - ('\u{829}', '\u{82d}'), - ('\u{859}', '\u{85b}'), - ('\u{897}', '\u{89f}'), - ('\u{8ca}', '\u{8e1}'), - ('\u{8e3}', 'ः'), - ('\u{93a}', '\u{93c}'), - ('ा', 'ॏ'), - ('\u{951}', '\u{957}'), - ('\u{962}', '\u{963}'), - ('\u{981}', 'ঃ'), - ('\u{9bc}', '\u{9bc}'), - ('\u{9be}', '\u{9c4}'), - ('ে', 'ৈ'), - ('ো', '\u{9cd}'), - ('\u{9d7}', '\u{9d7}'), - ('\u{9e2}', '\u{9e3}'), - ('\u{9fe}', '\u{9fe}'), - ('\u{a01}', 'ਃ'), - ('\u{a3c}', '\u{a3c}'), - ('ਾ', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('\u{a51}', '\u{a51}'), - ('\u{a70}', '\u{a71}'), - ('\u{a75}', '\u{a75}'), - ('\u{a81}', 'ઃ'), - ('\u{abc}', '\u{abc}'), - ('ા', '\u{ac5}'), - ('\u{ac7}', 'ૉ'), - ('ો', '\u{acd}'), - ('\u{ae2}', '\u{ae3}'), - ('\u{afa}', '\u{aff}'), - ('\u{b01}', 'ଃ'), - ('\u{b3c}', '\u{b3c}'), - ('\u{b3e}', '\u{b44}'), - ('େ', 'ୈ'), - ('ୋ', '\u{b4d}'), - ('\u{b55}', '\u{b57}'), - ('\u{b62}', '\u{b63}'), - ('\u{b82}', '\u{b82}'), - ('\u{bbe}', 'ூ'), - ('ெ', 'ை'), - ('ொ', '\u{bcd}'), - ('\u{bd7}', '\u{bd7}'), - ('\u{c00}', '\u{c04}'), - ('\u{c3c}', '\u{c3c}'), - ('\u{c3e}', 'ౄ'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('\u{c62}', '\u{c63}'), - ('\u{c81}', 'ಃ'), - ('\u{cbc}', '\u{cbc}'), - ('ಾ', 'ೄ'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccd}'), - ('\u{cd5}', '\u{cd6}'), - ('\u{ce2}', '\u{ce3}'), - ('ೳ', 'ೳ'), - ('\u{d00}', 'ഃ'), - ('\u{d3b}', '\u{d3c}'), - ('\u{d3e}', '\u{d44}'), - ('െ', 'ൈ'), - ('ൊ', '\u{d4d}'), - ('\u{d57}', '\u{d57}'), - ('\u{d62}', '\u{d63}'), - ('\u{d81}', 'ඃ'), - ('\u{dca}', '\u{dca}'), - ('\u{dcf}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('ෘ', '\u{ddf}'), - ('ෲ', 'ෳ'), - ('\u{e31}', '\u{e31}'), - ('\u{e34}', '\u{e3a}'), - ('\u{e47}', '\u{e4e}'), - ('\u{eb1}', '\u{eb1}'), - ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ece}'), - ('\u{f18}', '\u{f19}'), - ('\u{f35}', '\u{f35}'), - ('\u{f37}', '\u{f37}'), - ('\u{f39}', '\u{f39}'), - ('༾', '༿'), - ('\u{f71}', '\u{f84}'), - ('\u{f86}', '\u{f87}'), - ('\u{f8d}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('\u{fc6}', '\u{fc6}'), - ('ါ', '\u{103e}'), - ('ၖ', '\u{1059}'), - ('\u{105e}', '\u{1060}'), - ('ၢ', 'ၤ'), - ('ၧ', 'ၭ'), - ('\u{1071}', '\u{1074}'), - ('\u{1082}', '\u{108d}'), - ('ႏ', 'ႏ'), - ('ႚ', '\u{109d}'), - ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1715}'), - ('\u{1732}', '\u{1734}'), - ('\u{1752}', '\u{1753}'), - ('\u{1772}', '\u{1773}'), - ('\u{17b4}', '\u{17d3}'), - ('\u{17dd}', '\u{17dd}'), - ('\u{180b}', '\u{180d}'), - ('\u{180f}', '\u{180f}'), - ('\u{1885}', '\u{1886}'), - ('\u{18a9}', '\u{18a9}'), - ('\u{1920}', 'ᤫ'), - ('ᤰ', '\u{193b}'), - ('\u{1a17}', '\u{1a1b}'), - ('ᩕ', '\u{1a5e}'), - ('\u{1a60}', '\u{1a7c}'), - ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ace}'), - ('\u{1b00}', 'ᬄ'), - ('\u{1b34}', '\u{1b44}'), - ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', 'ᮂ'), - ('ᮡ', '\u{1bad}'), - ('\u{1be6}', '\u{1bf3}'), - ('ᰤ', '\u{1c37}'), - ('\u{1cd0}', '\u{1cd2}'), - ('\u{1cd4}', '\u{1ce8}'), - ('\u{1ced}', '\u{1ced}'), - ('\u{1cf4}', '\u{1cf4}'), - ('᳷', '\u{1cf9}'), - ('\u{1dc0}', '\u{1dff}'), - ('\u{200c}', '\u{200d}'), - ('\u{20d0}', '\u{20f0}'), - ('\u{2cef}', '\u{2cf1}'), - ('\u{2d7f}', '\u{2d7f}'), - ('\u{2de0}', '\u{2dff}'), - ('\u{302a}', '\u{302f}'), - ('\u{3099}', '\u{309a}'), - ('\u{a66f}', '\u{a672}'), - ('\u{a674}', '\u{a67d}'), - ('\u{a69e}', '\u{a69f}'), - ('\u{a6f0}', '\u{a6f1}'), - ('\u{a802}', '\u{a802}'), - ('\u{a806}', '\u{a806}'), - ('\u{a80b}', '\u{a80b}'), - ('ꠣ', 'ꠧ'), - ('\u{a82c}', '\u{a82c}'), - ('ꢀ', 'ꢁ'), - ('ꢴ', '\u{a8c5}'), - ('\u{a8e0}', '\u{a8f1}'), - ('\u{a8ff}', '\u{a8ff}'), - ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '\u{a953}'), - ('\u{a980}', 'ꦃ'), - ('\u{a9b3}', '\u{a9c0}'), - ('\u{a9e5}', '\u{a9e5}'), - ('\u{aa29}', '\u{aa36}'), - ('\u{aa43}', '\u{aa43}'), - ('\u{aa4c}', 'ꩍ'), - ('ꩻ', 'ꩽ'), - ('\u{aab0}', '\u{aab0}'), - ('\u{aab2}', '\u{aab4}'), - ('\u{aab7}', '\u{aab8}'), - ('\u{aabe}', '\u{aabf}'), - ('\u{aac1}', '\u{aac1}'), - ('ꫫ', 'ꫯ'), - ('ꫵ', '\u{aaf6}'), - ('ꯣ', 'ꯪ'), - ('꯬', '\u{abed}'), - ('\u{fb1e}', '\u{fb1e}'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{fe20}', '\u{fe2f}'), - ('\u{ff9e}', '\u{ff9f}'), - ('\u{101fd}', '\u{101fd}'), - ('\u{102e0}', '\u{102e0}'), - ('\u{10376}', '\u{1037a}'), - ('\u{10a01}', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '\u{10a0f}'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '\u{10a3f}'), - ('\u{10ae5}', '\u{10ae6}'), - ('\u{10d24}', '\u{10d27}'), - ('\u{10d69}', '\u{10d6d}'), - ('\u{10eab}', '\u{10eac}'), - ('\u{10efc}', '\u{10eff}'), - ('\u{10f46}', '\u{10f50}'), - ('\u{10f82}', '\u{10f85}'), - ('𑀀', '𑀂'), - ('\u{11038}', '\u{11046}'), - ('\u{11070}', '\u{11070}'), - ('\u{11073}', '\u{11074}'), - ('\u{1107f}', '𑂂'), - ('𑂰', '\u{110ba}'), - ('\u{110c2}', '\u{110c2}'), - ('\u{11100}', '\u{11102}'), - ('\u{11127}', '\u{11134}'), - ('𑅅', '𑅆'), - ('\u{11173}', '\u{11173}'), - ('\u{11180}', '𑆂'), - ('𑆳', '\u{111c0}'), - ('\u{111c9}', '\u{111cc}'), - ('𑇎', '\u{111cf}'), - ('𑈬', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), - ('\u{11241}', '\u{11241}'), - ('\u{112df}', '\u{112ea}'), - ('\u{11300}', '𑌃'), - ('\u{1133b}', '\u{1133c}'), - ('\u{1133e}', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '\u{1134d}'), - ('\u{11357}', '\u{11357}'), - ('𑍢', '𑍣'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('\u{113b8}', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '𑏊'), - ('𑏌', '\u{113d0}'), - ('\u{113d2}', '\u{113d2}'), - ('\u{113e1}', '\u{113e2}'), - ('𑐵', '\u{11446}'), - ('\u{1145e}', '\u{1145e}'), - ('\u{114b0}', '\u{114c3}'), - ('\u{115af}', '\u{115b5}'), - ('𑖸', '\u{115c0}'), - ('\u{115dc}', '\u{115dd}'), - ('𑘰', '\u{11640}'), - ('\u{116ab}', '\u{116b7}'), - ('\u{1171d}', '\u{1172b}'), - ('𑠬', '\u{1183a}'), - ('\u{11930}', '𑤵'), - ('𑤷', '𑤸'), - ('\u{1193b}', '\u{1193e}'), - ('𑥀', '𑥀'), - ('𑥂', '\u{11943}'), - ('𑧑', '\u{119d7}'), - ('\u{119da}', '\u{119e0}'), - ('𑧤', '𑧤'), - ('\u{11a01}', '\u{11a0a}'), - ('\u{11a33}', '𑨹'), - ('\u{11a3b}', '\u{11a3e}'), - ('\u{11a47}', '\u{11a47}'), - ('\u{11a51}', '\u{11a5b}'), - ('\u{11a8a}', '\u{11a99}'), - ('𑰯', '\u{11c36}'), - ('\u{11c38}', '\u{11c3f}'), - ('\u{11c92}', '\u{11ca7}'), - ('𑲩', '\u{11cb6}'), - ('\u{11d31}', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d45}'), - ('\u{11d47}', '\u{11d47}'), - ('𑶊', '𑶎'), - ('\u{11d90}', '\u{11d91}'), - ('𑶓', '\u{11d97}'), - ('\u{11ef3}', '𑻶'), - ('\u{11f00}', '\u{11f01}'), - ('𑼃', '𑼃'), - ('𑼴', '\u{11f3a}'), - ('𑼾', '\u{11f42}'), - ('\u{11f5a}', '\u{11f5a}'), - ('\u{13440}', '\u{13440}'), - ('\u{13447}', '\u{13455}'), - ('\u{1611e}', '\u{1612f}'), - ('\u{16af0}', '\u{16af4}'), - ('\u{16b30}', '\u{16b36}'), - ('\u{16f4f}', '\u{16f4f}'), - ('𖽑', '𖾇'), - ('\u{16f8f}', '\u{16f92}'), - ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), - ('\u{1bc9d}', '\u{1bc9e}'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d165}', '\u{1d169}'), - ('\u{1d16d}', '\u{1d172}'), - ('\u{1d17b}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('\u{1d242}', '\u{1d244}'), - ('\u{1da00}', '\u{1da36}'), - ('\u{1da3b}', '\u{1da6c}'), - ('\u{1da75}', '\u{1da75}'), - ('\u{1da84}', '\u{1da84}'), - ('\u{1da9b}', '\u{1da9f}'), - ('\u{1daa1}', '\u{1daaf}'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('\u{1e08f}', '\u{1e08f}'), - ('\u{1e130}', '\u{1e136}'), - ('\u{1e2ae}', '\u{1e2ae}'), - ('\u{1e2ec}', '\u{1e2ef}'), - ('\u{1e4ec}', '\u{1e4ef}'), - ('\u{1e5ee}', '\u{1e5ef}'), - ('\u{1e8d0}', '\u{1e8d6}'), - ('\u{1e944}', '\u{1e94a}'), - ('\u{e0020}', '\u{e007f}'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const FORMAT: &'static [(char, char)] = &[ - ('\u{ad}', '\u{ad}'), - ('\u{61c}', '\u{61c}'), - ('\u{70f}', '\u{70f}'), - ('\u{180e}', '\u{180e}'), - ('\u{200b}', '\u{200b}'), - ('\u{200e}', '\u{200f}'), - ('\u{202a}', '\u{202e}'), - ('\u{2060}', '\u{2064}'), - ('\u{2066}', '\u{206f}'), - ('\u{feff}', '\u{feff}'), - ('\u{fff9}', '\u{fffb}'), - ('\u{13430}', '\u{1343f}'), - ('\u{1bca0}', '\u{1bca3}'), - ('\u{1d173}', '\u{1d17a}'), - ('\u{e0001}', '\u{e0001}'), -]; - -pub const LF: &'static [(char, char)] = &[('\n', '\n')]; - -pub const LOWER: &'static [(char, char)] = &[ - ('a', 'z'), - ('ª', 'ª'), - ('µ', 'µ'), - ('º', 'º'), - ('ß', 'ö'), - ('ø', 'ÿ'), - ('ā', 'ā'), - ('ă', 'ă'), - ('ą', 'ą'), - ('ć', 'ć'), - ('ĉ', 'ĉ'), - ('ċ', 'ċ'), - ('č', 'č'), - ('ď', 'ď'), - ('đ', 'đ'), - ('ē', 'ē'), - ('ĕ', 'ĕ'), - ('ė', 'ė'), - ('ę', 'ę'), - ('ě', 'ě'), - ('ĝ', 'ĝ'), - ('ğ', 'ğ'), - ('ġ', 'ġ'), - ('ģ', 'ģ'), - ('ĥ', 'ĥ'), - ('ħ', 'ħ'), - ('ĩ', 'ĩ'), - ('ī', 'ī'), - ('ĭ', 'ĭ'), - ('į', 'į'), - ('ı', 'ı'), - ('ij', 'ij'), - ('ĵ', 'ĵ'), - ('ķ', 'ĸ'), - ('ĺ', 'ĺ'), - ('ļ', 'ļ'), - ('ľ', 'ľ'), - ('ŀ', 'ŀ'), - ('ł', 'ł'), - ('ń', 'ń'), - ('ņ', 'ņ'), - ('ň', 'ʼn'), - ('ŋ', 'ŋ'), - ('ō', 'ō'), - ('ŏ', 'ŏ'), - ('ő', 'ő'), - ('œ', 'œ'), - ('ŕ', 'ŕ'), - ('ŗ', 'ŗ'), - ('ř', 'ř'), - ('ś', 'ś'), - ('ŝ', 'ŝ'), - ('ş', 'ş'), - ('š', 'š'), - ('ţ', 'ţ'), - ('ť', 'ť'), - ('ŧ', 'ŧ'), - ('ũ', 'ũ'), - ('ū', 'ū'), - ('ŭ', 'ŭ'), - ('ů', 'ů'), - ('ű', 'ű'), - ('ų', 'ų'), - ('ŵ', 'ŵ'), - ('ŷ', 'ŷ'), - ('ź', 'ź'), - ('ż', 'ż'), - ('ž', 'ƀ'), - ('ƃ', 'ƃ'), - ('ƅ', 'ƅ'), - ('ƈ', 'ƈ'), - ('ƌ', 'ƍ'), - ('ƒ', 'ƒ'), - ('ƕ', 'ƕ'), - ('ƙ', 'ƛ'), - ('ƞ', 'ƞ'), - ('ơ', 'ơ'), - ('ƣ', 'ƣ'), - ('ƥ', 'ƥ'), - ('ƨ', 'ƨ'), - ('ƪ', 'ƫ'), - ('ƭ', 'ƭ'), - ('ư', 'ư'), - ('ƴ', 'ƴ'), - ('ƶ', 'ƶ'), - ('ƹ', 'ƺ'), - ('ƽ', 'ƿ'), - ('dž', 'dž'), - ('lj', 'lj'), - ('nj', 'nj'), - ('ǎ', 'ǎ'), - ('ǐ', 'ǐ'), - ('ǒ', 'ǒ'), - ('ǔ', 'ǔ'), - ('ǖ', 'ǖ'), - ('ǘ', 'ǘ'), - ('ǚ', 'ǚ'), - ('ǜ', 'ǝ'), - ('ǟ', 'ǟ'), - ('ǡ', 'ǡ'), - ('ǣ', 'ǣ'), - ('ǥ', 'ǥ'), - ('ǧ', 'ǧ'), - ('ǩ', 'ǩ'), - ('ǫ', 'ǫ'), - ('ǭ', 'ǭ'), - ('ǯ', 'ǰ'), - ('dz', 'dz'), - ('ǵ', 'ǵ'), - ('ǹ', 'ǹ'), - ('ǻ', 'ǻ'), - ('ǽ', 'ǽ'), - ('ǿ', 'ǿ'), - ('ȁ', 'ȁ'), - ('ȃ', 'ȃ'), - ('ȅ', 'ȅ'), - ('ȇ', 'ȇ'), - ('ȉ', 'ȉ'), - ('ȋ', 'ȋ'), - ('ȍ', 'ȍ'), - ('ȏ', 'ȏ'), - ('ȑ', 'ȑ'), - ('ȓ', 'ȓ'), - ('ȕ', 'ȕ'), - ('ȗ', 'ȗ'), - ('ș', 'ș'), - ('ț', 'ț'), - ('ȝ', 'ȝ'), - ('ȟ', 'ȟ'), - ('ȡ', 'ȡ'), - ('ȣ', 'ȣ'), - ('ȥ', 'ȥ'), - ('ȧ', 'ȧ'), - ('ȩ', 'ȩ'), - ('ȫ', 'ȫ'), - ('ȭ', 'ȭ'), - ('ȯ', 'ȯ'), - ('ȱ', 'ȱ'), - ('ȳ', 'ȹ'), - ('ȼ', 'ȼ'), - ('ȿ', 'ɀ'), - ('ɂ', 'ɂ'), - ('ɇ', 'ɇ'), - ('ɉ', 'ɉ'), - ('ɋ', 'ɋ'), - ('ɍ', 'ɍ'), - ('ɏ', 'ʓ'), - ('ʕ', 'ʸ'), - ('ˀ', 'ˁ'), - ('ˠ', 'ˤ'), - ('ͱ', 'ͱ'), - ('ͳ', 'ͳ'), - ('ͷ', 'ͷ'), - ('ͺ', 'ͽ'), - ('ΐ', 'ΐ'), - ('ά', 'ώ'), - ('ϐ', 'ϑ'), - ('ϕ', 'ϗ'), - ('ϙ', 'ϙ'), - ('ϛ', 'ϛ'), - ('ϝ', 'ϝ'), - ('ϟ', 'ϟ'), - ('ϡ', 'ϡ'), - ('ϣ', 'ϣ'), - ('ϥ', 'ϥ'), - ('ϧ', 'ϧ'), - ('ϩ', 'ϩ'), - ('ϫ', 'ϫ'), - ('ϭ', 'ϭ'), - ('ϯ', 'ϳ'), - ('ϵ', 'ϵ'), - ('ϸ', 'ϸ'), - ('ϻ', 'ϼ'), - ('а', 'џ'), - ('ѡ', 'ѡ'), - ('ѣ', 'ѣ'), - ('ѥ', 'ѥ'), - ('ѧ', 'ѧ'), - ('ѩ', 'ѩ'), - ('ѫ', 'ѫ'), - ('ѭ', 'ѭ'), - ('ѯ', 'ѯ'), - ('ѱ', 'ѱ'), - ('ѳ', 'ѳ'), - ('ѵ', 'ѵ'), - ('ѷ', 'ѷ'), - ('ѹ', 'ѹ'), - ('ѻ', 'ѻ'), - ('ѽ', 'ѽ'), - ('ѿ', 'ѿ'), - ('ҁ', 'ҁ'), - ('ҋ', 'ҋ'), - ('ҍ', 'ҍ'), - ('ҏ', 'ҏ'), - ('ґ', 'ґ'), - ('ғ', 'ғ'), - ('ҕ', 'ҕ'), - ('җ', 'җ'), - ('ҙ', 'ҙ'), - ('қ', 'қ'), - ('ҝ', 'ҝ'), - ('ҟ', 'ҟ'), - ('ҡ', 'ҡ'), - ('ң', 'ң'), - ('ҥ', 'ҥ'), - ('ҧ', 'ҧ'), - ('ҩ', 'ҩ'), - ('ҫ', 'ҫ'), - ('ҭ', 'ҭ'), - ('ү', 'ү'), - ('ұ', 'ұ'), - ('ҳ', 'ҳ'), - ('ҵ', 'ҵ'), - ('ҷ', 'ҷ'), - ('ҹ', 'ҹ'), - ('һ', 'һ'), - ('ҽ', 'ҽ'), - ('ҿ', 'ҿ'), - ('ӂ', 'ӂ'), - ('ӄ', 'ӄ'), - ('ӆ', 'ӆ'), - ('ӈ', 'ӈ'), - ('ӊ', 'ӊ'), - ('ӌ', 'ӌ'), - ('ӎ', 'ӏ'), - ('ӑ', 'ӑ'), - ('ӓ', 'ӓ'), - ('ӕ', 'ӕ'), - ('ӗ', 'ӗ'), - ('ә', 'ә'), - ('ӛ', 'ӛ'), - ('ӝ', 'ӝ'), - ('ӟ', 'ӟ'), - ('ӡ', 'ӡ'), - ('ӣ', 'ӣ'), - ('ӥ', 'ӥ'), - ('ӧ', 'ӧ'), - ('ө', 'ө'), - ('ӫ', 'ӫ'), - ('ӭ', 'ӭ'), - ('ӯ', 'ӯ'), - ('ӱ', 'ӱ'), - ('ӳ', 'ӳ'), - ('ӵ', 'ӵ'), - ('ӷ', 'ӷ'), - ('ӹ', 'ӹ'), - ('ӻ', 'ӻ'), - ('ӽ', 'ӽ'), - ('ӿ', 'ӿ'), - ('ԁ', 'ԁ'), - ('ԃ', 'ԃ'), - ('ԅ', 'ԅ'), - ('ԇ', 'ԇ'), - ('ԉ', 'ԉ'), - ('ԋ', 'ԋ'), - ('ԍ', 'ԍ'), - ('ԏ', 'ԏ'), - ('ԑ', 'ԑ'), - ('ԓ', 'ԓ'), - ('ԕ', 'ԕ'), - ('ԗ', 'ԗ'), - ('ԙ', 'ԙ'), - ('ԛ', 'ԛ'), - ('ԝ', 'ԝ'), - ('ԟ', 'ԟ'), - ('ԡ', 'ԡ'), - ('ԣ', 'ԣ'), - ('ԥ', 'ԥ'), - ('ԧ', 'ԧ'), - ('ԩ', 'ԩ'), - ('ԫ', 'ԫ'), - ('ԭ', 'ԭ'), - ('ԯ', 'ԯ'), - ('ՠ', 'ֈ'), - ('ჼ', 'ჼ'), - ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲈ'), - ('ᲊ', 'ᲊ'), - ('ᴀ', 'ᶿ'), - ('ḁ', 'ḁ'), - ('ḃ', 'ḃ'), - ('ḅ', 'ḅ'), - ('ḇ', 'ḇ'), - ('ḉ', 'ḉ'), - ('ḋ', 'ḋ'), - ('ḍ', 'ḍ'), - ('ḏ', 'ḏ'), - ('ḑ', 'ḑ'), - ('ḓ', 'ḓ'), - ('ḕ', 'ḕ'), - ('ḗ', 'ḗ'), - ('ḙ', 'ḙ'), - ('ḛ', 'ḛ'), - ('ḝ', 'ḝ'), - ('ḟ', 'ḟ'), - ('ḡ', 'ḡ'), - ('ḣ', 'ḣ'), - ('ḥ', 'ḥ'), - ('ḧ', 'ḧ'), - ('ḩ', 'ḩ'), - ('ḫ', 'ḫ'), - ('ḭ', 'ḭ'), - ('ḯ', 'ḯ'), - ('ḱ', 'ḱ'), - ('ḳ', 'ḳ'), - ('ḵ', 'ḵ'), - ('ḷ', 'ḷ'), - ('ḹ', 'ḹ'), - ('ḻ', 'ḻ'), - ('ḽ', 'ḽ'), - ('ḿ', 'ḿ'), - ('ṁ', 'ṁ'), - ('ṃ', 'ṃ'), - ('ṅ', 'ṅ'), - ('ṇ', 'ṇ'), - ('ṉ', 'ṉ'), - ('ṋ', 'ṋ'), - ('ṍ', 'ṍ'), - ('ṏ', 'ṏ'), - ('ṑ', 'ṑ'), - ('ṓ', 'ṓ'), - ('ṕ', 'ṕ'), - ('ṗ', 'ṗ'), - ('ṙ', 'ṙ'), - ('ṛ', 'ṛ'), - ('ṝ', 'ṝ'), - ('ṟ', 'ṟ'), - ('ṡ', 'ṡ'), - ('ṣ', 'ṣ'), - ('ṥ', 'ṥ'), - ('ṧ', 'ṧ'), - ('ṩ', 'ṩ'), - ('ṫ', 'ṫ'), - ('ṭ', 'ṭ'), - ('ṯ', 'ṯ'), - ('ṱ', 'ṱ'), - ('ṳ', 'ṳ'), - ('ṵ', 'ṵ'), - ('ṷ', 'ṷ'), - ('ṹ', 'ṹ'), - ('ṻ', 'ṻ'), - ('ṽ', 'ṽ'), - ('ṿ', 'ṿ'), - ('ẁ', 'ẁ'), - ('ẃ', 'ẃ'), - ('ẅ', 'ẅ'), - ('ẇ', 'ẇ'), - ('ẉ', 'ẉ'), - ('ẋ', 'ẋ'), - ('ẍ', 'ẍ'), - ('ẏ', 'ẏ'), - ('ẑ', 'ẑ'), - ('ẓ', 'ẓ'), - ('ẕ', 'ẝ'), - ('ẟ', 'ẟ'), - ('ạ', 'ạ'), - ('ả', 'ả'), - ('ấ', 'ấ'), - ('ầ', 'ầ'), - ('ẩ', 'ẩ'), - ('ẫ', 'ẫ'), - ('ậ', 'ậ'), - ('ắ', 'ắ'), - ('ằ', 'ằ'), - ('ẳ', 'ẳ'), - ('ẵ', 'ẵ'), - ('ặ', 'ặ'), - ('ẹ', 'ẹ'), - ('ẻ', 'ẻ'), - ('ẽ', 'ẽ'), - ('ế', 'ế'), - ('ề', 'ề'), - ('ể', 'ể'), - ('ễ', 'ễ'), - ('ệ', 'ệ'), - ('ỉ', 'ỉ'), - ('ị', 'ị'), - ('ọ', 'ọ'), - ('ỏ', 'ỏ'), - ('ố', 'ố'), - ('ồ', 'ồ'), - ('ổ', 'ổ'), - ('ỗ', 'ỗ'), - ('ộ', 'ộ'), - ('ớ', 'ớ'), - ('ờ', 'ờ'), - ('ở', 'ở'), - ('ỡ', 'ỡ'), - ('ợ', 'ợ'), - ('ụ', 'ụ'), - ('ủ', 'ủ'), - ('ứ', 'ứ'), - ('ừ', 'ừ'), - ('ử', 'ử'), - ('ữ', 'ữ'), - ('ự', 'ự'), - ('ỳ', 'ỳ'), - ('ỵ', 'ỵ'), - ('ỷ', 'ỷ'), - ('ỹ', 'ỹ'), - ('ỻ', 'ỻ'), - ('ỽ', 'ỽ'), - ('ỿ', 'ἇ'), - ('ἐ', 'ἕ'), - ('ἠ', 'ἧ'), - ('ἰ', 'ἷ'), - ('ὀ', 'ὅ'), - ('ὐ', 'ὗ'), - ('ὠ', 'ὧ'), - ('ὰ', 'ώ'), - ('ᾀ', 'ᾇ'), - ('ᾐ', 'ᾗ'), - ('ᾠ', 'ᾧ'), - ('ᾰ', 'ᾴ'), - ('ᾶ', 'ᾷ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῇ'), - ('ῐ', 'ΐ'), - ('ῖ', 'ῗ'), - ('ῠ', 'ῧ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῷ'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('ℊ', 'ℊ'), - ('ℎ', 'ℏ'), - ('ℓ', 'ℓ'), - ('ℯ', 'ℯ'), - ('ℴ', 'ℴ'), - ('ℹ', 'ℹ'), - ('ℼ', 'ℽ'), - ('ⅆ', 'ⅉ'), - ('ⅎ', 'ⅎ'), - ('ⅰ', 'ⅿ'), - ('ↄ', 'ↄ'), - ('ⓐ', 'ⓩ'), - ('ⰰ', 'ⱟ'), - ('ⱡ', 'ⱡ'), - ('ⱥ', 'ⱦ'), - ('ⱨ', 'ⱨ'), - ('ⱪ', 'ⱪ'), - ('ⱬ', 'ⱬ'), - ('ⱱ', 'ⱱ'), - ('ⱳ', 'ⱴ'), - ('ⱶ', 'ⱽ'), - ('ⲁ', 'ⲁ'), - ('ⲃ', 'ⲃ'), - ('ⲅ', 'ⲅ'), - ('ⲇ', 'ⲇ'), - ('ⲉ', 'ⲉ'), - ('ⲋ', 'ⲋ'), - ('ⲍ', 'ⲍ'), - ('ⲏ', 'ⲏ'), - ('ⲑ', 'ⲑ'), - ('ⲓ', 'ⲓ'), - ('ⲕ', 'ⲕ'), - ('ⲗ', 'ⲗ'), - ('ⲙ', 'ⲙ'), - ('ⲛ', 'ⲛ'), - ('ⲝ', 'ⲝ'), - ('ⲟ', 'ⲟ'), - ('ⲡ', 'ⲡ'), - ('ⲣ', 'ⲣ'), - ('ⲥ', 'ⲥ'), - ('ⲧ', 'ⲧ'), - ('ⲩ', 'ⲩ'), - ('ⲫ', 'ⲫ'), - ('ⲭ', 'ⲭ'), - ('ⲯ', 'ⲯ'), - ('ⲱ', 'ⲱ'), - ('ⲳ', 'ⲳ'), - ('ⲵ', 'ⲵ'), - ('ⲷ', 'ⲷ'), - ('ⲹ', 'ⲹ'), - ('ⲻ', 'ⲻ'), - ('ⲽ', 'ⲽ'), - ('ⲿ', 'ⲿ'), - ('ⳁ', 'ⳁ'), - ('ⳃ', 'ⳃ'), - ('ⳅ', 'ⳅ'), - ('ⳇ', 'ⳇ'), - ('ⳉ', 'ⳉ'), - ('ⳋ', 'ⳋ'), - ('ⳍ', 'ⳍ'), - ('ⳏ', 'ⳏ'), - ('ⳑ', 'ⳑ'), - ('ⳓ', 'ⳓ'), - ('ⳕ', 'ⳕ'), - ('ⳗ', 'ⳗ'), - ('ⳙ', 'ⳙ'), - ('ⳛ', 'ⳛ'), - ('ⳝ', 'ⳝ'), - ('ⳟ', 'ⳟ'), - ('ⳡ', 'ⳡ'), - ('ⳣ', 'ⳤ'), - ('ⳬ', 'ⳬ'), - ('ⳮ', 'ⳮ'), - ('ⳳ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ꙁ', 'ꙁ'), - ('ꙃ', 'ꙃ'), - ('ꙅ', 'ꙅ'), - ('ꙇ', 'ꙇ'), - ('ꙉ', 'ꙉ'), - ('ꙋ', 'ꙋ'), - ('ꙍ', 'ꙍ'), - ('ꙏ', 'ꙏ'), - ('ꙑ', 'ꙑ'), - ('ꙓ', 'ꙓ'), - ('ꙕ', 'ꙕ'), - ('ꙗ', 'ꙗ'), - ('ꙙ', 'ꙙ'), - ('ꙛ', 'ꙛ'), - ('ꙝ', 'ꙝ'), - ('ꙟ', 'ꙟ'), - ('ꙡ', 'ꙡ'), - ('ꙣ', 'ꙣ'), - ('ꙥ', 'ꙥ'), - ('ꙧ', 'ꙧ'), - ('ꙩ', 'ꙩ'), - ('ꙫ', 'ꙫ'), - ('ꙭ', 'ꙭ'), - ('ꚁ', 'ꚁ'), - ('ꚃ', 'ꚃ'), - ('ꚅ', 'ꚅ'), - ('ꚇ', 'ꚇ'), - ('ꚉ', 'ꚉ'), - ('ꚋ', 'ꚋ'), - ('ꚍ', 'ꚍ'), - ('ꚏ', 'ꚏ'), - ('ꚑ', 'ꚑ'), - ('ꚓ', 'ꚓ'), - ('ꚕ', 'ꚕ'), - ('ꚗ', 'ꚗ'), - ('ꚙ', 'ꚙ'), - ('ꚛ', 'ꚝ'), - ('ꜣ', 'ꜣ'), - ('ꜥ', 'ꜥ'), - ('ꜧ', 'ꜧ'), - ('ꜩ', 'ꜩ'), - ('ꜫ', 'ꜫ'), - ('ꜭ', 'ꜭ'), - ('ꜯ', 'ꜱ'), - ('ꜳ', 'ꜳ'), - ('ꜵ', 'ꜵ'), - ('ꜷ', 'ꜷ'), - ('ꜹ', 'ꜹ'), - ('ꜻ', 'ꜻ'), - ('ꜽ', 'ꜽ'), - ('ꜿ', 'ꜿ'), - ('ꝁ', 'ꝁ'), - ('ꝃ', 'ꝃ'), - ('ꝅ', 'ꝅ'), - ('ꝇ', 'ꝇ'), - ('ꝉ', 'ꝉ'), - ('ꝋ', 'ꝋ'), - ('ꝍ', 'ꝍ'), - ('ꝏ', 'ꝏ'), - ('ꝑ', 'ꝑ'), - ('ꝓ', 'ꝓ'), - ('ꝕ', 'ꝕ'), - ('ꝗ', 'ꝗ'), - ('ꝙ', 'ꝙ'), - ('ꝛ', 'ꝛ'), - ('ꝝ', 'ꝝ'), - ('ꝟ', 'ꝟ'), - ('ꝡ', 'ꝡ'), - ('ꝣ', 'ꝣ'), - ('ꝥ', 'ꝥ'), - ('ꝧ', 'ꝧ'), - ('ꝩ', 'ꝩ'), - ('ꝫ', 'ꝫ'), - ('ꝭ', 'ꝭ'), - ('ꝯ', 'ꝸ'), - ('ꝺ', 'ꝺ'), - ('ꝼ', 'ꝼ'), - ('ꝿ', 'ꝿ'), - ('ꞁ', 'ꞁ'), - ('ꞃ', 'ꞃ'), - ('ꞅ', 'ꞅ'), - ('ꞇ', 'ꞇ'), - ('ꞌ', 'ꞌ'), - ('ꞎ', 'ꞎ'), - ('ꞑ', 'ꞑ'), - ('ꞓ', 'ꞕ'), - ('ꞗ', 'ꞗ'), - ('ꞙ', 'ꞙ'), - ('ꞛ', 'ꞛ'), - ('ꞝ', 'ꞝ'), - ('ꞟ', 'ꞟ'), - ('ꞡ', 'ꞡ'), - ('ꞣ', 'ꞣ'), - ('ꞥ', 'ꞥ'), - ('ꞧ', 'ꞧ'), - ('ꞩ', 'ꞩ'), - ('ꞯ', 'ꞯ'), - ('ꞵ', 'ꞵ'), - ('ꞷ', 'ꞷ'), - ('ꞹ', 'ꞹ'), - ('ꞻ', 'ꞻ'), - ('ꞽ', 'ꞽ'), - ('ꞿ', 'ꞿ'), - ('ꟁ', 'ꟁ'), - ('ꟃ', 'ꟃ'), - ('ꟈ', 'ꟈ'), - ('ꟊ', 'ꟊ'), - ('ꟍ', 'ꟍ'), - ('ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟕ'), - ('ꟗ', 'ꟗ'), - ('ꟙ', 'ꟙ'), - ('ꟛ', 'ꟛ'), - ('ꟲ', 'ꟴ'), - ('ꟶ', 'ꟶ'), - ('ꟸ', 'ꟺ'), - ('ꬰ', 'ꭚ'), - ('ꭜ', 'ꭩ'), - ('ꭰ', 'ꮿ'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('a', 'z'), - ('𐐨', '𐑏'), - ('𐓘', '𐓻'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐞀', '𐞀'), - ('𐞃', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐳀', '𐳲'), - ('𐵰', '𐶅'), - ('𑣀', '𑣟'), - ('𖹠', '𖹿'), - ('𝐚', '𝐳'), - ('𝑎', '𝑔'), - ('𝑖', '𝑧'), - ('𝒂', '𝒛'), - ('𝒶', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝓏'), - ('𝓪', '𝔃'), - ('𝔞', '𝔷'), - ('𝕒', '𝕫'), - ('𝖆', '𝖟'), - ('𝖺', '𝗓'), - ('𝗮', '𝘇'), - ('𝘢', '𝘻'), - ('𝙖', '𝙯'), - ('𝚊', '𝚥'), - ('𝛂', '𝛚'), - ('𝛜', '𝛡'), - ('𝛼', '𝜔'), - ('𝜖', '𝜛'), - ('𝜶', '𝝎'), - ('𝝐', '𝝕'), - ('𝝰', '𝞈'), - ('𝞊', '𝞏'), - ('𝞪', '𝟂'), - ('𝟄', '𝟉'), - ('𝟋', '𝟋'), - ('𝼀', '𝼉'), - ('𝼋', '𝼞'), - ('𝼥', '𝼪'), - ('𞀰', '𞁭'), - ('𞤢', '𞥃'), -]; - -pub const NUMERIC: &'static [(char, char)] = &[ - ('0', '9'), - ('\u{600}', '\u{605}'), - ('٠', '٩'), - ('٫', '٬'), - ('\u{6dd}', '\u{6dd}'), - ('۰', '۹'), - ('߀', '߉'), - ('\u{890}', '\u{891}'), - ('\u{8e2}', '\u{8e2}'), - ('०', '९'), - ('০', '৯'), - ('੦', '੯'), - ('૦', '૯'), - ('୦', '୯'), - ('௦', '௯'), - ('౦', '౯'), - ('೦', '೯'), - ('൦', '൯'), - ('෦', '෯'), - ('๐', '๙'), - ('໐', '໙'), - ('༠', '༩'), - ('၀', '၉'), - ('႐', '႙'), - ('០', '៩'), - ('᠐', '᠙'), - ('᥆', '᥏'), - ('᧐', '᧚'), - ('᪀', '᪉'), - ('᪐', '᪙'), - ('᭐', '᭙'), - ('᮰', '᮹'), - ('᱀', '᱉'), - ('᱐', '᱙'), - ('꘠', '꘩'), - ('꣐', '꣙'), - ('꤀', '꤉'), - ('꧐', '꧙'), - ('꧰', '꧹'), - ('꩐', '꩙'), - ('꯰', '꯹'), - ('0', '9'), - ('𐒠', '𐒩'), - ('𐴰', '𐴹'), - ('𐵀', '𐵉'), - ('𑁦', '𑁯'), - ('\u{110bd}', '\u{110bd}'), - ('\u{110cd}', '\u{110cd}'), - ('𑃰', '𑃹'), - ('𑄶', '𑄿'), - ('𑇐', '𑇙'), - ('𑋰', '𑋹'), - ('𑑐', '𑑙'), - ('𑓐', '𑓙'), - ('𑙐', '𑙙'), - ('𑛀', '𑛉'), - ('𑛐', '𑛣'), - ('𑜰', '𑜹'), - ('𑣠', '𑣩'), - ('𑥐', '𑥙'), - ('𑯰', '𑯹'), - ('𑱐', '𑱙'), - ('𑵐', '𑵙'), - ('𑶠', '𑶩'), - ('𑽐', '𑽙'), - ('𖄰', '𖄹'), - ('𖩠', '𖩩'), - ('𖫀', '𖫉'), - ('𖭐', '𖭙'), - ('𖵰', '𖵹'), - ('𜳰', '𜳹'), - ('𝟎', '𝟿'), - ('𞅀', '𞅉'), - ('𞋰', '𞋹'), - ('𞓰', '𞓹'), - ('𞗱', '𞗺'), - ('𞥐', '𞥙'), - ('🯰', '🯹'), -]; - -pub const OLETTER: &'static [(char, char)] = &[ - ('ƻ', 'ƻ'), - ('ǀ', 'ǃ'), - ('ʔ', 'ʔ'), - ('ʹ', 'ʿ'), - ('ˆ', 'ˑ'), - ('ˬ', 'ˬ'), - ('ˮ', 'ˮ'), - ('ʹ', 'ʹ'), - ('ՙ', 'ՙ'), - ('א', 'ת'), - ('ׯ', '׳'), - ('ؠ', 'ي'), - ('ٮ', 'ٯ'), - ('ٱ', 'ۓ'), - ('ە', 'ە'), - ('ۥ', 'ۦ'), - ('ۮ', 'ۯ'), - ('ۺ', 'ۼ'), - ('ۿ', 'ۿ'), - ('ܐ', 'ܐ'), - ('ܒ', 'ܯ'), - ('ݍ', 'ޥ'), - ('ޱ', 'ޱ'), - ('ߊ', 'ߪ'), - ('ߴ', 'ߵ'), - ('ߺ', 'ߺ'), - ('ࠀ', 'ࠕ'), - ('ࠚ', 'ࠚ'), - ('ࠤ', 'ࠤ'), - ('ࠨ', 'ࠨ'), - ('ࡀ', 'ࡘ'), - ('ࡠ', 'ࡪ'), - ('ࡰ', 'ࢇ'), - ('ࢉ', 'ࢎ'), - ('ࢠ', 'ࣉ'), - ('ऄ', 'ह'), - ('ऽ', 'ऽ'), - ('ॐ', 'ॐ'), - ('क़', 'ॡ'), - ('ॱ', 'ঀ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('ঽ', 'ঽ'), - ('ৎ', 'ৎ'), - ('ড়', 'ঢ়'), - ('য়', 'ৡ'), - ('ৰ', 'ৱ'), - ('ৼ', 'ৼ'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('ੲ', 'ੴ'), - ('અ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('ઽ', 'ઽ'), - ('ૐ', 'ૐ'), - ('ૠ', 'ૡ'), - ('ૹ', 'ૹ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('ଽ', 'ଽ'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', 'ୡ'), - ('ୱ', 'ୱ'), - ('ஃ', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'ஹ'), - ('ௐ', 'ௐ'), - ('అ', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'హ'), - ('ఽ', 'ఽ'), - ('ౘ', 'ౚ'), - ('ౝ', 'ౝ'), - ('ౠ', 'ౡ'), - ('ಀ', 'ಀ'), - ('ಅ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('ಽ', 'ಽ'), - ('ೝ', 'ೞ'), - ('ೠ', 'ೡ'), - ('ೱ', 'ೲ'), - ('ഄ', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', 'ഺ'), - ('ഽ', 'ഽ'), - ('ൎ', 'ൎ'), - ('ൔ', 'ൖ'), - ('ൟ', 'ൡ'), - ('ൺ', 'ൿ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('ก', 'ะ'), - ('า', 'ำ'), - ('เ', 'ๆ'), - ('ກ', 'ຂ'), - ('ຄ', 'ຄ'), - ('ຆ', 'ຊ'), - ('ຌ', 'ຣ'), - ('ລ', 'ລ'), - ('ວ', 'ະ'), - ('າ', 'ຳ'), - ('ຽ', 'ຽ'), - ('ເ', 'ໄ'), - ('ໆ', 'ໆ'), - ('ໜ', 'ໟ'), - ('ༀ', 'ༀ'), - ('ཀ', 'ཇ'), - ('ཉ', 'ཬ'), - ('ྈ', 'ྌ'), - ('က', 'ဪ'), - ('ဿ', 'ဿ'), - ('ၐ', 'ၕ'), - ('ၚ', 'ၝ'), - ('ၡ', 'ၡ'), - ('ၥ', 'ၦ'), - ('ၮ', 'ၰ'), - ('ၵ', 'ႁ'), - ('ႎ', 'ႎ'), - ('ა', 'ჺ'), - ('ჽ', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዖ'), - ('ዘ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ፚ'), - ('ᎀ', 'ᎏ'), - ('ᐁ', 'ᙬ'), - ('ᙯ', 'ᙿ'), - ('ᚁ', 'ᚚ'), - ('ᚠ', 'ᛪ'), - ('ᛮ', 'ᛸ'), - ('ᜀ', 'ᜑ'), - ('ᜟ', 'ᜱ'), - ('ᝀ', 'ᝑ'), - ('ᝠ', 'ᝬ'), - ('ᝮ', 'ᝰ'), - ('ក', 'ឳ'), - ('ៗ', 'ៗ'), - ('ៜ', 'ៜ'), - ('ᠠ', 'ᡸ'), - ('ᢀ', 'ᢄ'), - ('ᢇ', 'ᢨ'), - ('ᢪ', 'ᢪ'), - ('ᢰ', 'ᣵ'), - ('ᤀ', 'ᤞ'), - ('ᥐ', 'ᥭ'), - ('ᥰ', 'ᥴ'), - ('ᦀ', 'ᦫ'), - ('ᦰ', 'ᧉ'), - ('ᨀ', 'ᨖ'), - ('ᨠ', 'ᩔ'), - ('ᪧ', 'ᪧ'), - ('ᬅ', 'ᬳ'), - ('ᭅ', 'ᭌ'), - ('ᮃ', 'ᮠ'), - ('ᮮ', 'ᮯ'), - ('ᮺ', 'ᯥ'), - ('ᰀ', 'ᰣ'), - ('ᱍ', 'ᱏ'), - ('ᱚ', 'ᱽ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('ᳩ', 'ᳬ'), - ('ᳮ', 'ᳳ'), - ('ᳵ', 'ᳶ'), - ('ᳺ', 'ᳺ'), - ('ℵ', 'ℸ'), - ('ↀ', 'ↂ'), - ('ↅ', 'ↈ'), - ('ⴰ', 'ⵧ'), - ('ⵯ', 'ⵯ'), - ('ⶀ', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('ⸯ', 'ⸯ'), - ('々', '〇'), - ('〡', '〩'), - ('〱', '〵'), - ('〸', '〼'), - ('ぁ', 'ゖ'), - ('ゝ', 'ゟ'), - ('ァ', 'ヺ'), - ('ー', 'ヿ'), - ('ㄅ', 'ㄯ'), - ('ㄱ', 'ㆎ'), - ('ㆠ', 'ㆿ'), - ('ㇰ', 'ㇿ'), - ('㐀', '䶿'), - ('一', 'ꒌ'), - ('ꓐ', 'ꓽ'), - ('ꔀ', 'ꘌ'), - ('ꘐ', 'ꘟ'), - ('ꘪ', 'ꘫ'), - ('ꙮ', 'ꙮ'), - ('ꙿ', 'ꙿ'), - ('ꚠ', 'ꛯ'), - ('ꜗ', 'ꜟ'), - ('ꞈ', 'ꞈ'), - ('ꞏ', 'ꞏ'), - ('ꟷ', 'ꟷ'), - ('ꟻ', 'ꠁ'), - ('ꠃ', 'ꠅ'), - ('ꠇ', 'ꠊ'), - ('ꠌ', 'ꠢ'), - ('ꡀ', 'ꡳ'), - ('ꢂ', 'ꢳ'), - ('ꣲ', 'ꣷ'), - ('ꣻ', 'ꣻ'), - ('ꣽ', 'ꣾ'), - ('ꤊ', 'ꤥ'), - ('ꤰ', 'ꥆ'), - ('ꥠ', 'ꥼ'), - ('ꦄ', 'ꦲ'), - ('ꧏ', 'ꧏ'), - ('ꧠ', 'ꧤ'), - ('ꧦ', 'ꧯ'), - ('ꧺ', 'ꧾ'), - ('ꨀ', 'ꨨ'), - ('ꩀ', 'ꩂ'), - ('ꩄ', 'ꩋ'), - ('ꩠ', 'ꩶ'), - ('ꩺ', 'ꩺ'), - ('ꩾ', 'ꪯ'), - ('ꪱ', 'ꪱ'), - ('ꪵ', 'ꪶ'), - ('ꪹ', 'ꪽ'), - ('ꫀ', 'ꫀ'), - ('ꫂ', 'ꫂ'), - ('ꫛ', 'ꫝ'), - ('ꫠ', 'ꫪ'), - ('ꫲ', 'ꫴ'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('ꯀ', 'ꯢ'), - ('가', '힣'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('豈', '舘'), - ('並', '龎'), - ('יִ', 'יִ'), - ('ײַ', 'ﬨ'), - ('שׁ', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', 'ﮱ'), - ('ﯓ', 'ﴽ'), - ('ﵐ', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('ﷰ', 'ﷻ'), - ('ﹰ', 'ﹴ'), - ('ﹶ', 'ﻼ'), - ('ヲ', 'ン'), - ('ᅠ', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), - ('𐅀', '𐅴'), - ('𐊀', '𐊜'), - ('𐊠', '𐋐'), - ('𐌀', '𐌟'), - ('𐌭', '𐍊'), - ('𐍐', '𐍵'), - ('𐎀', '𐎝'), - ('𐎠', '𐏃'), - ('𐏈', '𐏏'), - ('𐏑', '𐏕'), - ('𐑐', '𐒝'), - ('𐔀', '𐔧'), - ('𐔰', '𐕣'), - ('𐗀', '𐗳'), - ('𐘀', '𐜶'), - ('𐝀', '𐝕'), - ('𐝠', '𐝧'), - ('𐞁', '𐞂'), - ('𐠀', '𐠅'), - ('𐠈', '𐠈'), - ('𐠊', '𐠵'), - ('𐠷', '𐠸'), - ('𐠼', '𐠼'), - ('𐠿', '𐡕'), - ('𐡠', '𐡶'), - ('𐢀', '𐢞'), - ('𐣠', '𐣲'), - ('𐣴', '𐣵'), - ('𐤀', '𐤕'), - ('𐤠', '𐤹'), - ('𐦀', '𐦷'), - ('𐦾', '𐦿'), - ('𐨀', '𐨀'), - ('𐨐', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨵'), - ('𐩠', '𐩼'), - ('𐪀', '𐪜'), - ('𐫀', '𐫇'), - ('𐫉', '𐫤'), - ('𐬀', '𐬵'), - ('𐭀', '𐭕'), - ('𐭠', '𐭲'), - ('𐮀', '𐮑'), - ('𐰀', '𐱈'), - ('𐴀', '𐴣'), - ('𐵊', '𐵏'), - ('𐵯', '𐵯'), - ('𐺀', '𐺩'), - ('𐺰', '𐺱'), - ('𐻂', '𐻄'), - ('𐼀', '𐼜'), - ('𐼧', '𐼧'), - ('𐼰', '𐽅'), - ('𐽰', '𐾁'), - ('𐾰', '𐿄'), - ('𐿠', '𐿶'), - ('𑀃', '𑀷'), - ('𑁱', '𑁲'), - ('𑁵', '𑁵'), - ('𑂃', '𑂯'), - ('𑃐', '𑃨'), - ('𑄃', '𑄦'), - ('𑅄', '𑅄'), - ('𑅇', '𑅇'), - ('𑅐', '𑅲'), - ('𑅶', '𑅶'), - ('𑆃', '𑆲'), - ('𑇁', '𑇄'), - ('𑇚', '𑇚'), - ('𑇜', '𑇜'), - ('𑈀', '𑈑'), - ('𑈓', '𑈫'), - ('𑈿', '𑉀'), - ('𑊀', '𑊆'), - ('𑊈', '𑊈'), - ('𑊊', '𑊍'), - ('𑊏', '𑊝'), - ('𑊟', '𑊨'), - ('𑊰', '𑋞'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('𑌽', '𑌽'), - ('𑍐', '𑍐'), - ('𑍝', '𑍡'), - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '𑎷'), - ('𑏑', '𑏑'), - ('𑏓', '𑏓'), - ('𑐀', '𑐴'), - ('𑑇', '𑑊'), - ('𑑟', '𑑡'), - ('𑒀', '𑒯'), - ('𑓄', '𑓅'), - ('𑓇', '𑓇'), - ('𑖀', '𑖮'), - ('𑗘', '𑗛'), - ('𑘀', '𑘯'), - ('𑙄', '𑙄'), - ('𑚀', '𑚪'), - ('𑚸', '𑚸'), - ('𑜀', '𑜚'), - ('𑝀', '𑝆'), - ('𑠀', '𑠫'), - ('𑣿', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤯'), - ('𑤿', '𑤿'), - ('𑥁', '𑥁'), - ('𑦠', '𑦧'), - ('𑦪', '𑧐'), - ('𑧡', '𑧡'), - ('𑧣', '𑧣'), - ('𑨀', '𑨀'), - ('𑨋', '𑨲'), - ('𑨺', '𑨺'), - ('𑩐', '𑩐'), - ('𑩜', '𑪉'), - ('𑪝', '𑪝'), - ('𑪰', '𑫸'), - ('𑯀', '𑯠'), - ('𑰀', '𑰈'), - ('𑰊', '𑰮'), - ('𑱀', '𑱀'), - ('𑱲', '𑲏'), - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '𑴰'), - ('𑵆', '𑵆'), - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶉'), - ('𑶘', '𑶘'), - ('𑻠', '𑻲'), - ('𑼂', '𑼂'), - ('𑼄', '𑼐'), - ('𑼒', '𑼳'), - ('𑾰', '𑾰'), - ('𒀀', '𒎙'), - ('𒐀', '𒑮'), - ('𒒀', '𒕃'), - ('𒾐', '𒿰'), - ('𓀀', '𓐯'), - ('𓑁', '𓑆'), - ('𓑠', '𔏺'), - ('𔐀', '𔙆'), - ('𖄀', '𖄝'), - ('𖠀', '𖨸'), - ('𖩀', '𖩞'), - ('𖩰', '𖪾'), - ('𖫐', '𖫭'), - ('𖬀', '𖬯'), - ('𖭀', '𖭃'), - ('𖭣', '𖭷'), - ('𖭽', '𖮏'), - ('𖵀', '𖵬'), - ('𖼀', '𖽊'), - ('𖽐', '𖽐'), - ('𖾓', '𖾟'), - ('𖿠', '𖿡'), - ('𖿣', '𖿣'), - ('𗀀', '𘟷'), - ('𘠀', '𘳕'), - ('𘳿', '𘴈'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𛀀', '𛄢'), - ('𛄲', '𛄲'), - ('𛅐', '𛅒'), - ('𛅕', '𛅕'), - ('𛅤', '𛅧'), - ('𛅰', '𛋻'), - ('𛰀', '𛱪'), - ('𛱰', '𛱼'), - ('𛲀', '𛲈'), - ('𛲐', '𛲙'), - ('𝼊', '𝼊'), - ('𞄀', '𞄬'), - ('𞄷', '𞄽'), - ('𞅎', '𞅎'), - ('𞊐', '𞊭'), - ('𞋀', '𞋫'), - ('𞓐', '𞓫'), - ('𞗐', '𞗭'), - ('𞗰', '𞗰'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), - ('𞠀', '𞣄'), - ('𞥋', '𞥋'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('𠀀', '𪛟'), - ('𪜀', '𫜹'), - ('𫝀', '𫠝'), - ('𫠠', '𬺡'), - ('𬺰', '𮯠'), - ('𮯰', '𮹝'), - ('丽', '𪘀'), - ('𰀀', '𱍊'), - ('𱍐', '𲎯'), -]; - -pub const SCONTINUE: &'static [(char, char)] = &[ - (',', '-'), - (':', ';'), - (';', ';'), - ('՝', '՝'), - ('،', '؍'), - ('߸', '߸'), - ('᠂', '᠂'), - ('᠈', '᠈'), - ('–', '—'), - ('、', '、'), - ('︐', '︑'), - ('︓', '︔'), - ('︱', '︲'), - ('﹐', '﹑'), - ('﹔', '﹕'), - ('﹘', '﹘'), - ('﹣', '﹣'), - (',', '-'), - (':', ';'), - ('、', '、'), -]; - -pub const STERM: &'static [(char, char)] = &[ - ('!', '!'), - ('?', '?'), - ('։', '։'), - ('؝', '؟'), - ('۔', '۔'), - ('܀', '܂'), - ('߹', '߹'), - ('࠷', '࠷'), - ('࠹', '࠹'), - ('࠽', '࠾'), - ('।', '॥'), - ('၊', '။'), - ('።', '።'), - ('፧', '፨'), - ('᙮', '᙮'), - ('᜵', '᜶'), - ('។', '៕'), - ('᠃', '᠃'), - ('᠉', '᠉'), - ('᥄', '᥅'), - ('᪨', '᪫'), - ('᭎', '᭏'), - ('᭚', '᭛'), - ('᭞', '᭟'), - ('᭽', '᭿'), - ('᰻', '᰼'), - ('᱾', '᱿'), - ('‼', '‽'), - ('⁇', '⁉'), - ('⳹', '⳻'), - ('⸮', '⸮'), - ('⸼', '⸼'), - ('⹓', '⹔'), - ('。', '。'), - ('꓿', '꓿'), - ('꘎', '꘏'), - ('꛳', '꛳'), - ('꛷', '꛷'), - ('꡶', '꡷'), - ('꣎', '꣏'), - ('꤯', '꤯'), - ('꧈', '꧉'), - ('꩝', '꩟'), - ('꫰', '꫱'), - ('꯫', '꯫'), - ('︒', '︒'), - ('︕', '︖'), - ('﹖', '﹗'), - ('!', '!'), - ('?', '?'), - ('。', '。'), - ('𐩖', '𐩗'), - ('𐽕', '𐽙'), - ('𐾆', '𐾉'), - ('𑁇', '𑁈'), - ('𑂾', '𑃁'), - ('𑅁', '𑅃'), - ('𑇅', '𑇆'), - ('𑇍', '𑇍'), - ('𑇞', '𑇟'), - ('𑈸', '𑈹'), - ('𑈻', '𑈼'), - ('𑊩', '𑊩'), - ('𑏔', '𑏕'), - ('𑑋', '𑑌'), - ('𑗂', '𑗃'), - ('𑗉', '𑗗'), - ('𑙁', '𑙂'), - ('𑜼', '𑜾'), - ('𑥄', '𑥄'), - ('𑥆', '𑥆'), - ('𑩂', '𑩃'), - ('𑪛', '𑪜'), - ('𑱁', '𑱂'), - ('𑻷', '𑻸'), - ('𑽃', '𑽄'), - ('𖩮', '𖩯'), - ('𖫵', '𖫵'), - ('𖬷', '𖬸'), - ('𖭄', '𖭄'), - ('𖵮', '𖵯'), - ('𖺘', '𖺘'), - ('𛲟', '𛲟'), - ('𝪈', '𝪈'), -]; - -pub const SEP: &'static [(char, char)] = - &[('\u{85}', '\u{85}'), ('\u{2028}', '\u{2029}')]; - -pub const SP: &'static [(char, char)] = &[ - ('\t', '\t'), - ('\u{b}', '\u{c}'), - (' ', ' '), - ('\u{a0}', '\u{a0}'), - ('\u{1680}', '\u{1680}'), - ('\u{2000}', '\u{200a}'), - ('\u{202f}', '\u{202f}'), - ('\u{205f}', '\u{205f}'), - ('\u{3000}', '\u{3000}'), -]; - -pub const UPPER: &'static [(char, char)] = &[ - ('A', 'Z'), - ('À', 'Ö'), - ('Ø', 'Þ'), - ('Ā', 'Ā'), - ('Ă', 'Ă'), - ('Ą', 'Ą'), - ('Ć', 'Ć'), - ('Ĉ', 'Ĉ'), - ('Ċ', 'Ċ'), - ('Č', 'Č'), - ('Ď', 'Ď'), - ('Đ', 'Đ'), - ('Ē', 'Ē'), - ('Ĕ', 'Ĕ'), - ('Ė', 'Ė'), - ('Ę', 'Ę'), - ('Ě', 'Ě'), - ('Ĝ', 'Ĝ'), - ('Ğ', 'Ğ'), - ('Ġ', 'Ġ'), - ('Ģ', 'Ģ'), - ('Ĥ', 'Ĥ'), - ('Ħ', 'Ħ'), - ('Ĩ', 'Ĩ'), - ('Ī', 'Ī'), - ('Ĭ', 'Ĭ'), - ('Į', 'Į'), - ('İ', 'İ'), - ('IJ', 'IJ'), - ('Ĵ', 'Ĵ'), - ('Ķ', 'Ķ'), - ('Ĺ', 'Ĺ'), - ('Ļ', 'Ļ'), - ('Ľ', 'Ľ'), - ('Ŀ', 'Ŀ'), - ('Ł', 'Ł'), - ('Ń', 'Ń'), - ('Ņ', 'Ņ'), - ('Ň', 'Ň'), - ('Ŋ', 'Ŋ'), - ('Ō', 'Ō'), - ('Ŏ', 'Ŏ'), - ('Ő', 'Ő'), - ('Œ', 'Œ'), - ('Ŕ', 'Ŕ'), - ('Ŗ', 'Ŗ'), - ('Ř', 'Ř'), - ('Ś', 'Ś'), - ('Ŝ', 'Ŝ'), - ('Ş', 'Ş'), - ('Š', 'Š'), - ('Ţ', 'Ţ'), - ('Ť', 'Ť'), - ('Ŧ', 'Ŧ'), - ('Ũ', 'Ũ'), - ('Ū', 'Ū'), - ('Ŭ', 'Ŭ'), - ('Ů', 'Ů'), - ('Ű', 'Ű'), - ('Ų', 'Ų'), - ('Ŵ', 'Ŵ'), - ('Ŷ', 'Ŷ'), - ('Ÿ', 'Ź'), - ('Ż', 'Ż'), - ('Ž', 'Ž'), - ('Ɓ', 'Ƃ'), - ('Ƅ', 'Ƅ'), - ('Ɔ', 'Ƈ'), - ('Ɖ', 'Ƌ'), - ('Ǝ', 'Ƒ'), - ('Ɠ', 'Ɣ'), - ('Ɩ', 'Ƙ'), - ('Ɯ', 'Ɲ'), - ('Ɵ', 'Ơ'), - ('Ƣ', 'Ƣ'), - ('Ƥ', 'Ƥ'), - ('Ʀ', 'Ƨ'), - ('Ʃ', 'Ʃ'), - ('Ƭ', 'Ƭ'), - ('Ʈ', 'Ư'), - ('Ʊ', 'Ƴ'), - ('Ƶ', 'Ƶ'), - ('Ʒ', 'Ƹ'), - ('Ƽ', 'Ƽ'), - ('DŽ', 'Dž'), - ('LJ', 'Lj'), - ('NJ', 'Nj'), - ('Ǎ', 'Ǎ'), - ('Ǐ', 'Ǐ'), - ('Ǒ', 'Ǒ'), - ('Ǔ', 'Ǔ'), - ('Ǖ', 'Ǖ'), - ('Ǘ', 'Ǘ'), - ('Ǚ', 'Ǚ'), - ('Ǜ', 'Ǜ'), - ('Ǟ', 'Ǟ'), - ('Ǡ', 'Ǡ'), - ('Ǣ', 'Ǣ'), - ('Ǥ', 'Ǥ'), - ('Ǧ', 'Ǧ'), - ('Ǩ', 'Ǩ'), - ('Ǫ', 'Ǫ'), - ('Ǭ', 'Ǭ'), - ('Ǯ', 'Ǯ'), - ('DZ', 'Dz'), - ('Ǵ', 'Ǵ'), - ('Ƕ', 'Ǹ'), - ('Ǻ', 'Ǻ'), - ('Ǽ', 'Ǽ'), - ('Ǿ', 'Ǿ'), - ('Ȁ', 'Ȁ'), - ('Ȃ', 'Ȃ'), - ('Ȅ', 'Ȅ'), - ('Ȇ', 'Ȇ'), - ('Ȉ', 'Ȉ'), - ('Ȋ', 'Ȋ'), - ('Ȍ', 'Ȍ'), - ('Ȏ', 'Ȏ'), - ('Ȑ', 'Ȑ'), - ('Ȓ', 'Ȓ'), - ('Ȕ', 'Ȕ'), - ('Ȗ', 'Ȗ'), - ('Ș', 'Ș'), - ('Ț', 'Ț'), - ('Ȝ', 'Ȝ'), - ('Ȟ', 'Ȟ'), - ('Ƞ', 'Ƞ'), - ('Ȣ', 'Ȣ'), - ('Ȥ', 'Ȥ'), - ('Ȧ', 'Ȧ'), - ('Ȩ', 'Ȩ'), - ('Ȫ', 'Ȫ'), - ('Ȭ', 'Ȭ'), - ('Ȯ', 'Ȯ'), - ('Ȱ', 'Ȱ'), - ('Ȳ', 'Ȳ'), - ('Ⱥ', 'Ȼ'), - ('Ƚ', 'Ⱦ'), - ('Ɂ', 'Ɂ'), - ('Ƀ', 'Ɇ'), - ('Ɉ', 'Ɉ'), - ('Ɋ', 'Ɋ'), - ('Ɍ', 'Ɍ'), - ('Ɏ', 'Ɏ'), - ('Ͱ', 'Ͱ'), - ('Ͳ', 'Ͳ'), - ('Ͷ', 'Ͷ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ώ'), - ('Α', 'Ρ'), - ('Σ', 'Ϋ'), - ('Ϗ', 'Ϗ'), - ('ϒ', 'ϔ'), - ('Ϙ', 'Ϙ'), - ('Ϛ', 'Ϛ'), - ('Ϝ', 'Ϝ'), - ('Ϟ', 'Ϟ'), - ('Ϡ', 'Ϡ'), - ('Ϣ', 'Ϣ'), - ('Ϥ', 'Ϥ'), - ('Ϧ', 'Ϧ'), - ('Ϩ', 'Ϩ'), - ('Ϫ', 'Ϫ'), - ('Ϭ', 'Ϭ'), - ('Ϯ', 'Ϯ'), - ('ϴ', 'ϴ'), - ('Ϸ', 'Ϸ'), - ('Ϲ', 'Ϻ'), - ('Ͻ', 'Я'), - ('Ѡ', 'Ѡ'), - ('Ѣ', 'Ѣ'), - ('Ѥ', 'Ѥ'), - ('Ѧ', 'Ѧ'), - ('Ѩ', 'Ѩ'), - ('Ѫ', 'Ѫ'), - ('Ѭ', 'Ѭ'), - ('Ѯ', 'Ѯ'), - ('Ѱ', 'Ѱ'), - ('Ѳ', 'Ѳ'), - ('Ѵ', 'Ѵ'), - ('Ѷ', 'Ѷ'), - ('Ѹ', 'Ѹ'), - ('Ѻ', 'Ѻ'), - ('Ѽ', 'Ѽ'), - ('Ѿ', 'Ѿ'), - ('Ҁ', 'Ҁ'), - ('Ҋ', 'Ҋ'), - ('Ҍ', 'Ҍ'), - ('Ҏ', 'Ҏ'), - ('Ґ', 'Ґ'), - ('Ғ', 'Ғ'), - ('Ҕ', 'Ҕ'), - ('Җ', 'Җ'), - ('Ҙ', 'Ҙ'), - ('Қ', 'Қ'), - ('Ҝ', 'Ҝ'), - ('Ҟ', 'Ҟ'), - ('Ҡ', 'Ҡ'), - ('Ң', 'Ң'), - ('Ҥ', 'Ҥ'), - ('Ҧ', 'Ҧ'), - ('Ҩ', 'Ҩ'), - ('Ҫ', 'Ҫ'), - ('Ҭ', 'Ҭ'), - ('Ү', 'Ү'), - ('Ұ', 'Ұ'), - ('Ҳ', 'Ҳ'), - ('Ҵ', 'Ҵ'), - ('Ҷ', 'Ҷ'), - ('Ҹ', 'Ҹ'), - ('Һ', 'Һ'), - ('Ҽ', 'Ҽ'), - ('Ҿ', 'Ҿ'), - ('Ӏ', 'Ӂ'), - ('Ӄ', 'Ӄ'), - ('Ӆ', 'Ӆ'), - ('Ӈ', 'Ӈ'), - ('Ӊ', 'Ӊ'), - ('Ӌ', 'Ӌ'), - ('Ӎ', 'Ӎ'), - ('Ӑ', 'Ӑ'), - ('Ӓ', 'Ӓ'), - ('Ӕ', 'Ӕ'), - ('Ӗ', 'Ӗ'), - ('Ә', 'Ә'), - ('Ӛ', 'Ӛ'), - ('Ӝ', 'Ӝ'), - ('Ӟ', 'Ӟ'), - ('Ӡ', 'Ӡ'), - ('Ӣ', 'Ӣ'), - ('Ӥ', 'Ӥ'), - ('Ӧ', 'Ӧ'), - ('Ө', 'Ө'), - ('Ӫ', 'Ӫ'), - ('Ӭ', 'Ӭ'), - ('Ӯ', 'Ӯ'), - ('Ӱ', 'Ӱ'), - ('Ӳ', 'Ӳ'), - ('Ӵ', 'Ӵ'), - ('Ӷ', 'Ӷ'), - ('Ӹ', 'Ӹ'), - ('Ӻ', 'Ӻ'), - ('Ӽ', 'Ӽ'), - ('Ӿ', 'Ӿ'), - ('Ԁ', 'Ԁ'), - ('Ԃ', 'Ԃ'), - ('Ԅ', 'Ԅ'), - ('Ԇ', 'Ԇ'), - ('Ԉ', 'Ԉ'), - ('Ԋ', 'Ԋ'), - ('Ԍ', 'Ԍ'), - ('Ԏ', 'Ԏ'), - ('Ԑ', 'Ԑ'), - ('Ԓ', 'Ԓ'), - ('Ԕ', 'Ԕ'), - ('Ԗ', 'Ԗ'), - ('Ԙ', 'Ԙ'), - ('Ԛ', 'Ԛ'), - ('Ԝ', 'Ԝ'), - ('Ԟ', 'Ԟ'), - ('Ԡ', 'Ԡ'), - ('Ԣ', 'Ԣ'), - ('Ԥ', 'Ԥ'), - ('Ԧ', 'Ԧ'), - ('Ԩ', 'Ԩ'), - ('Ԫ', 'Ԫ'), - ('Ԭ', 'Ԭ'), - ('Ԯ', 'Ԯ'), - ('Ա', 'Ֆ'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('Ꭰ', 'Ᏽ'), - ('Ᲊ', 'Ᲊ'), - ('Ḁ', 'Ḁ'), - ('Ḃ', 'Ḃ'), - ('Ḅ', 'Ḅ'), - ('Ḇ', 'Ḇ'), - ('Ḉ', 'Ḉ'), - ('Ḋ', 'Ḋ'), - ('Ḍ', 'Ḍ'), - ('Ḏ', 'Ḏ'), - ('Ḑ', 'Ḑ'), - ('Ḓ', 'Ḓ'), - ('Ḕ', 'Ḕ'), - ('Ḗ', 'Ḗ'), - ('Ḙ', 'Ḙ'), - ('Ḛ', 'Ḛ'), - ('Ḝ', 'Ḝ'), - ('Ḟ', 'Ḟ'), - ('Ḡ', 'Ḡ'), - ('Ḣ', 'Ḣ'), - ('Ḥ', 'Ḥ'), - ('Ḧ', 'Ḧ'), - ('Ḩ', 'Ḩ'), - ('Ḫ', 'Ḫ'), - ('Ḭ', 'Ḭ'), - ('Ḯ', 'Ḯ'), - ('Ḱ', 'Ḱ'), - ('Ḳ', 'Ḳ'), - ('Ḵ', 'Ḵ'), - ('Ḷ', 'Ḷ'), - ('Ḹ', 'Ḹ'), - ('Ḻ', 'Ḻ'), - ('Ḽ', 'Ḽ'), - ('Ḿ', 'Ḿ'), - ('Ṁ', 'Ṁ'), - ('Ṃ', 'Ṃ'), - ('Ṅ', 'Ṅ'), - ('Ṇ', 'Ṇ'), - ('Ṉ', 'Ṉ'), - ('Ṋ', 'Ṋ'), - ('Ṍ', 'Ṍ'), - ('Ṏ', 'Ṏ'), - ('Ṑ', 'Ṑ'), - ('Ṓ', 'Ṓ'), - ('Ṕ', 'Ṕ'), - ('Ṗ', 'Ṗ'), - ('Ṙ', 'Ṙ'), - ('Ṛ', 'Ṛ'), - ('Ṝ', 'Ṝ'), - ('Ṟ', 'Ṟ'), - ('Ṡ', 'Ṡ'), - ('Ṣ', 'Ṣ'), - ('Ṥ', 'Ṥ'), - ('Ṧ', 'Ṧ'), - ('Ṩ', 'Ṩ'), - ('Ṫ', 'Ṫ'), - ('Ṭ', 'Ṭ'), - ('Ṯ', 'Ṯ'), - ('Ṱ', 'Ṱ'), - ('Ṳ', 'Ṳ'), - ('Ṵ', 'Ṵ'), - ('Ṷ', 'Ṷ'), - ('Ṹ', 'Ṹ'), - ('Ṻ', 'Ṻ'), - ('Ṽ', 'Ṽ'), - ('Ṿ', 'Ṿ'), - ('Ẁ', 'Ẁ'), - ('Ẃ', 'Ẃ'), - ('Ẅ', 'Ẅ'), - ('Ẇ', 'Ẇ'), - ('Ẉ', 'Ẉ'), - ('Ẋ', 'Ẋ'), - ('Ẍ', 'Ẍ'), - ('Ẏ', 'Ẏ'), - ('Ẑ', 'Ẑ'), - ('Ẓ', 'Ẓ'), - ('Ẕ', 'Ẕ'), - ('ẞ', 'ẞ'), - ('Ạ', 'Ạ'), - ('Ả', 'Ả'), - ('Ấ', 'Ấ'), - ('Ầ', 'Ầ'), - ('Ẩ', 'Ẩ'), - ('Ẫ', 'Ẫ'), - ('Ậ', 'Ậ'), - ('Ắ', 'Ắ'), - ('Ằ', 'Ằ'), - ('Ẳ', 'Ẳ'), - ('Ẵ', 'Ẵ'), - ('Ặ', 'Ặ'), - ('Ẹ', 'Ẹ'), - ('Ẻ', 'Ẻ'), - ('Ẽ', 'Ẽ'), - ('Ế', 'Ế'), - ('Ề', 'Ề'), - ('Ể', 'Ể'), - ('Ễ', 'Ễ'), - ('Ệ', 'Ệ'), - ('Ỉ', 'Ỉ'), - ('Ị', 'Ị'), - ('Ọ', 'Ọ'), - ('Ỏ', 'Ỏ'), - ('Ố', 'Ố'), - ('Ồ', 'Ồ'), - ('Ổ', 'Ổ'), - ('Ỗ', 'Ỗ'), - ('Ộ', 'Ộ'), - ('Ớ', 'Ớ'), - ('Ờ', 'Ờ'), - ('Ở', 'Ở'), - ('Ỡ', 'Ỡ'), - ('Ợ', 'Ợ'), - ('Ụ', 'Ụ'), - ('Ủ', 'Ủ'), - ('Ứ', 'Ứ'), - ('Ừ', 'Ừ'), - ('Ử', 'Ử'), - ('Ữ', 'Ữ'), - ('Ự', 'Ự'), - ('Ỳ', 'Ỳ'), - ('Ỵ', 'Ỵ'), - ('Ỷ', 'Ỷ'), - ('Ỹ', 'Ỹ'), - ('Ỻ', 'Ỻ'), - ('Ỽ', 'Ỽ'), - ('Ỿ', 'Ỿ'), - ('Ἀ', 'Ἇ'), - ('Ἐ', 'Ἕ'), - ('Ἠ', 'Ἧ'), - ('Ἰ', 'Ἷ'), - ('Ὀ', 'Ὅ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'Ὗ'), - ('Ὠ', 'Ὧ'), - ('ᾈ', 'ᾏ'), - ('ᾘ', 'ᾟ'), - ('ᾨ', 'ᾯ'), - ('Ᾰ', 'ᾼ'), - ('Ὲ', 'ῌ'), - ('Ῐ', 'Ί'), - ('Ῠ', 'Ῥ'), - ('Ὸ', 'ῼ'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℋ', 'ℍ'), - ('ℐ', 'ℒ'), - ('ℕ', 'ℕ'), - ('ℙ', 'ℝ'), - ('ℤ', 'ℤ'), - ('Ω', 'Ω'), - ('ℨ', 'ℨ'), - ('K', 'ℭ'), - ('ℰ', 'ℳ'), - ('ℾ', 'ℿ'), - ('ⅅ', 'ⅅ'), - ('Ⅰ', 'Ⅿ'), - ('Ↄ', 'Ↄ'), - ('Ⓐ', 'Ⓩ'), - ('Ⰰ', 'Ⱟ'), - ('Ⱡ', 'Ⱡ'), - ('Ɫ', 'Ɽ'), - ('Ⱨ', 'Ⱨ'), - ('Ⱪ', 'Ⱪ'), - ('Ⱬ', 'Ⱬ'), - ('Ɑ', 'Ɒ'), - ('Ⱳ', 'Ⱳ'), - ('Ⱶ', 'Ⱶ'), - ('Ȿ', 'Ⲁ'), - ('Ⲃ', 'Ⲃ'), - ('Ⲅ', 'Ⲅ'), - ('Ⲇ', 'Ⲇ'), - ('Ⲉ', 'Ⲉ'), - ('Ⲋ', 'Ⲋ'), - ('Ⲍ', 'Ⲍ'), - ('Ⲏ', 'Ⲏ'), - ('Ⲑ', 'Ⲑ'), - ('Ⲓ', 'Ⲓ'), - ('Ⲕ', 'Ⲕ'), - ('Ⲗ', 'Ⲗ'), - ('Ⲙ', 'Ⲙ'), - ('Ⲛ', 'Ⲛ'), - ('Ⲝ', 'Ⲝ'), - ('Ⲟ', 'Ⲟ'), - ('Ⲡ', 'Ⲡ'), - ('Ⲣ', 'Ⲣ'), - ('Ⲥ', 'Ⲥ'), - ('Ⲧ', 'Ⲧ'), - ('Ⲩ', 'Ⲩ'), - ('Ⲫ', 'Ⲫ'), - ('Ⲭ', 'Ⲭ'), - ('Ⲯ', 'Ⲯ'), - ('Ⲱ', 'Ⲱ'), - ('Ⲳ', 'Ⲳ'), - ('Ⲵ', 'Ⲵ'), - ('Ⲷ', 'Ⲷ'), - ('Ⲹ', 'Ⲹ'), - ('Ⲻ', 'Ⲻ'), - ('Ⲽ', 'Ⲽ'), - ('Ⲿ', 'Ⲿ'), - ('Ⳁ', 'Ⳁ'), - ('Ⳃ', 'Ⳃ'), - ('Ⳅ', 'Ⳅ'), - ('Ⳇ', 'Ⳇ'), - ('Ⳉ', 'Ⳉ'), - ('Ⳋ', 'Ⳋ'), - ('Ⳍ', 'Ⳍ'), - ('Ⳏ', 'Ⳏ'), - ('Ⳑ', 'Ⳑ'), - ('Ⳓ', 'Ⳓ'), - ('Ⳕ', 'Ⳕ'), - ('Ⳗ', 'Ⳗ'), - ('Ⳙ', 'Ⳙ'), - ('Ⳛ', 'Ⳛ'), - ('Ⳝ', 'Ⳝ'), - ('Ⳟ', 'Ⳟ'), - ('Ⳡ', 'Ⳡ'), - ('Ⳣ', 'Ⳣ'), - ('Ⳬ', 'Ⳬ'), - ('Ⳮ', 'Ⳮ'), - ('Ⳳ', 'Ⳳ'), - ('Ꙁ', 'Ꙁ'), - ('Ꙃ', 'Ꙃ'), - ('Ꙅ', 'Ꙅ'), - ('Ꙇ', 'Ꙇ'), - ('Ꙉ', 'Ꙉ'), - ('Ꙋ', 'Ꙋ'), - ('Ꙍ', 'Ꙍ'), - ('Ꙏ', 'Ꙏ'), - ('Ꙑ', 'Ꙑ'), - ('Ꙓ', 'Ꙓ'), - ('Ꙕ', 'Ꙕ'), - ('Ꙗ', 'Ꙗ'), - ('Ꙙ', 'Ꙙ'), - ('Ꙛ', 'Ꙛ'), - ('Ꙝ', 'Ꙝ'), - ('Ꙟ', 'Ꙟ'), - ('Ꙡ', 'Ꙡ'), - ('Ꙣ', 'Ꙣ'), - ('Ꙥ', 'Ꙥ'), - ('Ꙧ', 'Ꙧ'), - ('Ꙩ', 'Ꙩ'), - ('Ꙫ', 'Ꙫ'), - ('Ꙭ', 'Ꙭ'), - ('Ꚁ', 'Ꚁ'), - ('Ꚃ', 'Ꚃ'), - ('Ꚅ', 'Ꚅ'), - ('Ꚇ', 'Ꚇ'), - ('Ꚉ', 'Ꚉ'), - ('Ꚋ', 'Ꚋ'), - ('Ꚍ', 'Ꚍ'), - ('Ꚏ', 'Ꚏ'), - ('Ꚑ', 'Ꚑ'), - ('Ꚓ', 'Ꚓ'), - ('Ꚕ', 'Ꚕ'), - ('Ꚗ', 'Ꚗ'), - ('Ꚙ', 'Ꚙ'), - ('Ꚛ', 'Ꚛ'), - ('Ꜣ', 'Ꜣ'), - ('Ꜥ', 'Ꜥ'), - ('Ꜧ', 'Ꜧ'), - ('Ꜩ', 'Ꜩ'), - ('Ꜫ', 'Ꜫ'), - ('Ꜭ', 'Ꜭ'), - ('Ꜯ', 'Ꜯ'), - ('Ꜳ', 'Ꜳ'), - ('Ꜵ', 'Ꜵ'), - ('Ꜷ', 'Ꜷ'), - ('Ꜹ', 'Ꜹ'), - ('Ꜻ', 'Ꜻ'), - ('Ꜽ', 'Ꜽ'), - ('Ꜿ', 'Ꜿ'), - ('Ꝁ', 'Ꝁ'), - ('Ꝃ', 'Ꝃ'), - ('Ꝅ', 'Ꝅ'), - ('Ꝇ', 'Ꝇ'), - ('Ꝉ', 'Ꝉ'), - ('Ꝋ', 'Ꝋ'), - ('Ꝍ', 'Ꝍ'), - ('Ꝏ', 'Ꝏ'), - ('Ꝑ', 'Ꝑ'), - ('Ꝓ', 'Ꝓ'), - ('Ꝕ', 'Ꝕ'), - ('Ꝗ', 'Ꝗ'), - ('Ꝙ', 'Ꝙ'), - ('Ꝛ', 'Ꝛ'), - ('Ꝝ', 'Ꝝ'), - ('Ꝟ', 'Ꝟ'), - ('Ꝡ', 'Ꝡ'), - ('Ꝣ', 'Ꝣ'), - ('Ꝥ', 'Ꝥ'), - ('Ꝧ', 'Ꝧ'), - ('Ꝩ', 'Ꝩ'), - ('Ꝫ', 'Ꝫ'), - ('Ꝭ', 'Ꝭ'), - ('Ꝯ', 'Ꝯ'), - ('Ꝺ', 'Ꝺ'), - ('Ꝼ', 'Ꝼ'), - ('Ᵹ', 'Ꝿ'), - ('Ꞁ', 'Ꞁ'), - ('Ꞃ', 'Ꞃ'), - ('Ꞅ', 'Ꞅ'), - ('Ꞇ', 'Ꞇ'), - ('Ꞌ', 'Ꞌ'), - ('Ɥ', 'Ɥ'), - ('Ꞑ', 'Ꞑ'), - ('Ꞓ', 'Ꞓ'), - ('Ꞗ', 'Ꞗ'), - ('Ꞙ', 'Ꞙ'), - ('Ꞛ', 'Ꞛ'), - ('Ꞝ', 'Ꞝ'), - ('Ꞟ', 'Ꞟ'), - ('Ꞡ', 'Ꞡ'), - ('Ꞣ', 'Ꞣ'), - ('Ꞥ', 'Ꞥ'), - ('Ꞧ', 'Ꞧ'), - ('Ꞩ', 'Ꞩ'), - ('Ɦ', 'Ɪ'), - ('Ʞ', 'Ꞵ'), - ('Ꞷ', 'Ꞷ'), - ('Ꞹ', 'Ꞹ'), - ('Ꞻ', 'Ꞻ'), - ('Ꞽ', 'Ꞽ'), - ('Ꞿ', 'Ꞿ'), - ('Ꟁ', 'Ꟁ'), - ('Ꟃ', 'Ꟃ'), - ('Ꞔ', 'Ꟈ'), - ('Ꟊ', 'Ꟊ'), - ('Ɤ', 'Ꟍ'), - ('Ꟑ', 'Ꟑ'), - ('Ꟗ', 'Ꟗ'), - ('Ꟙ', 'Ꟙ'), - ('Ꟛ', 'Ꟛ'), - ('Ƛ', 'Ƛ'), - ('Ꟶ', 'Ꟶ'), - ('A', 'Z'), - ('𐐀', '𐐧'), - ('𐒰', '𐓓'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐲀', '𐲲'), - ('𐵐', '𐵥'), - ('𑢠', '𑢿'), - ('𖹀', '𖹟'), - ('𝐀', '𝐙'), - ('𝐴', '𝑍'), - ('𝑨', '𝒁'), - ('𝒜', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒵'), - ('𝓐', '𝓩'), - ('𝔄', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔸', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕬', '𝖅'), - ('𝖠', '𝖹'), - ('𝗔', '𝗭'), - ('𝘈', '𝘡'), - ('𝘼', '𝙕'), - ('𝙰', '𝚉'), - ('𝚨', '𝛀'), - ('𝛢', '𝛺'), - ('𝜜', '𝜴'), - ('𝝖', '𝝮'), - ('𝞐', '𝞨'), - ('𝟊', '𝟊'), - ('𞤀', '𞤡'), - ('🄰', '🅉'), - ('🅐', '🅩'), - ('🅰', '🆉'), -]; diff --git a/vendor/regex-syntax/src/unicode_tables/word_break.rs b/vendor/regex-syntax/src/unicode_tables/word_break.rs deleted file mode 100644 index b764d34a..00000000 --- a/vendor/regex-syntax/src/unicode_tables/word_break.rs +++ /dev/null @@ -1,1152 +0,0 @@ -// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: -// -// ucd-generate word-break ucd-16.0.0 --chars -// -// Unicode version: 16.0.0. -// -// ucd-generate 0.3.1 is available on crates.io. - -pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ - ("ALetter", ALETTER), - ("CR", CR), - ("Double_Quote", DOUBLE_QUOTE), - ("Extend", EXTEND), - ("ExtendNumLet", EXTENDNUMLET), - ("Format", FORMAT), - ("Hebrew_Letter", HEBREW_LETTER), - ("Katakana", KATAKANA), - ("LF", LF), - ("MidLetter", MIDLETTER), - ("MidNum", MIDNUM), - ("MidNumLet", MIDNUMLET), - ("Newline", NEWLINE), - ("Numeric", NUMERIC), - ("Regional_Indicator", REGIONAL_INDICATOR), - ("Single_Quote", SINGLE_QUOTE), - ("WSegSpace", WSEGSPACE), - ("ZWJ", ZWJ), -]; - -pub const ALETTER: &'static [(char, char)] = &[ - ('A', 'Z'), - ('a', 'z'), - ('ª', 'ª'), - ('µ', 'µ'), - ('º', 'º'), - ('À', 'Ö'), - ('Ø', 'ö'), - ('ø', '˗'), - ('˞', '˿'), - ('Ͱ', 'ʹ'), - ('Ͷ', 'ͷ'), - ('ͺ', 'ͽ'), - ('Ϳ', 'Ϳ'), - ('Ά', 'Ά'), - ('Έ', 'Ί'), - ('Ό', 'Ό'), - ('Ύ', 'Ρ'), - ('Σ', 'ϵ'), - ('Ϸ', 'ҁ'), - ('Ҋ', 'ԯ'), - ('Ա', 'Ֆ'), - ('ՙ', '՜'), - ('՞', '՞'), - ('ՠ', 'ֈ'), - ('֊', '֊'), - ('׳', '׳'), - ('ؠ', 'ي'), - ('ٮ', 'ٯ'), - ('ٱ', 'ۓ'), - ('ە', 'ە'), - ('ۥ', 'ۦ'), - ('ۮ', 'ۯ'), - ('ۺ', 'ۼ'), - ('ۿ', 'ۿ'), - ('\u{70f}', 'ܐ'), - ('ܒ', 'ܯ'), - ('ݍ', 'ޥ'), - ('ޱ', 'ޱ'), - ('ߊ', 'ߪ'), - ('ߴ', 'ߵ'), - ('ߺ', 'ߺ'), - ('ࠀ', 'ࠕ'), - ('ࠚ', 'ࠚ'), - ('ࠤ', 'ࠤ'), - ('ࠨ', 'ࠨ'), - ('ࡀ', 'ࡘ'), - ('ࡠ', 'ࡪ'), - ('ࡰ', 'ࢇ'), - ('ࢉ', 'ࢎ'), - ('ࢠ', 'ࣉ'), - ('ऄ', 'ह'), - ('ऽ', 'ऽ'), - ('ॐ', 'ॐ'), - ('क़', 'ॡ'), - ('ॱ', 'ঀ'), - ('অ', 'ঌ'), - ('এ', 'ঐ'), - ('ও', 'ন'), - ('প', 'র'), - ('ল', 'ল'), - ('শ', 'হ'), - ('ঽ', 'ঽ'), - ('ৎ', 'ৎ'), - ('ড়', 'ঢ়'), - ('য়', 'ৡ'), - ('ৰ', 'ৱ'), - ('ৼ', 'ৼ'), - ('ਅ', 'ਊ'), - ('ਏ', 'ਐ'), - ('ਓ', 'ਨ'), - ('ਪ', 'ਰ'), - ('ਲ', 'ਲ਼'), - ('ਵ', 'ਸ਼'), - ('ਸ', 'ਹ'), - ('ਖ਼', 'ੜ'), - ('ਫ਼', 'ਫ਼'), - ('ੲ', 'ੴ'), - ('અ', 'ઍ'), - ('એ', 'ઑ'), - ('ઓ', 'ન'), - ('પ', 'ર'), - ('લ', 'ળ'), - ('વ', 'હ'), - ('ઽ', 'ઽ'), - ('ૐ', 'ૐ'), - ('ૠ', 'ૡ'), - ('ૹ', 'ૹ'), - ('ଅ', 'ଌ'), - ('ଏ', 'ଐ'), - ('ଓ', 'ନ'), - ('ପ', 'ର'), - ('ଲ', 'ଳ'), - ('ଵ', 'ହ'), - ('ଽ', 'ଽ'), - ('ଡ଼', 'ଢ଼'), - ('ୟ', 'ୡ'), - ('ୱ', 'ୱ'), - ('ஃ', 'ஃ'), - ('அ', 'ஊ'), - ('எ', 'ஐ'), - ('ஒ', 'க'), - ('ங', 'ச'), - ('ஜ', 'ஜ'), - ('ஞ', 'ட'), - ('ண', 'த'), - ('ந', 'ப'), - ('ம', 'ஹ'), - ('ௐ', 'ௐ'), - ('అ', 'ఌ'), - ('ఎ', 'ఐ'), - ('ఒ', 'న'), - ('ప', 'హ'), - ('ఽ', 'ఽ'), - ('ౘ', 'ౚ'), - ('ౝ', 'ౝ'), - ('ౠ', 'ౡ'), - ('ಀ', 'ಀ'), - ('ಅ', 'ಌ'), - ('ಎ', 'ಐ'), - ('ಒ', 'ನ'), - ('ಪ', 'ಳ'), - ('ವ', 'ಹ'), - ('ಽ', 'ಽ'), - ('ೝ', 'ೞ'), - ('ೠ', 'ೡ'), - ('ೱ', 'ೲ'), - ('ഄ', 'ഌ'), - ('എ', 'ഐ'), - ('ഒ', 'ഺ'), - ('ഽ', 'ഽ'), - ('ൎ', 'ൎ'), - ('ൔ', 'ൖ'), - ('ൟ', 'ൡ'), - ('ൺ', 'ൿ'), - ('අ', 'ඖ'), - ('ක', 'න'), - ('ඳ', 'ර'), - ('ල', 'ල'), - ('ව', 'ෆ'), - ('ༀ', 'ༀ'), - ('ཀ', 'ཇ'), - ('ཉ', 'ཬ'), - ('ྈ', 'ྌ'), - ('Ⴀ', 'Ⴥ'), - ('Ⴧ', 'Ⴧ'), - ('Ⴭ', 'Ⴭ'), - ('ა', 'ჺ'), - ('ჼ', 'ቈ'), - ('ቊ', 'ቍ'), - ('ቐ', 'ቖ'), - ('ቘ', 'ቘ'), - ('ቚ', 'ቝ'), - ('በ', 'ኈ'), - ('ኊ', 'ኍ'), - ('ነ', 'ኰ'), - ('ኲ', 'ኵ'), - ('ኸ', 'ኾ'), - ('ዀ', 'ዀ'), - ('ዂ', 'ዅ'), - ('ወ', 'ዖ'), - ('ዘ', 'ጐ'), - ('ጒ', 'ጕ'), - ('ጘ', 'ፚ'), - ('ᎀ', 'ᎏ'), - ('Ꭰ', 'Ᏽ'), - ('ᏸ', 'ᏽ'), - ('ᐁ', 'ᙬ'), - ('ᙯ', 'ᙿ'), - ('ᚁ', 'ᚚ'), - ('ᚠ', 'ᛪ'), - ('ᛮ', 'ᛸ'), - ('ᜀ', 'ᜑ'), - ('ᜟ', 'ᜱ'), - ('ᝀ', 'ᝑ'), - ('ᝠ', 'ᝬ'), - ('ᝮ', 'ᝰ'), - ('ᠠ', 'ᡸ'), - ('ᢀ', 'ᢄ'), - ('ᢇ', 'ᢨ'), - ('ᢪ', 'ᢪ'), - ('ᢰ', 'ᣵ'), - ('ᤀ', 'ᤞ'), - ('ᨀ', 'ᨖ'), - ('ᬅ', 'ᬳ'), - ('ᭅ', 'ᭌ'), - ('ᮃ', 'ᮠ'), - ('ᮮ', 'ᮯ'), - ('ᮺ', 'ᯥ'), - ('ᰀ', 'ᰣ'), - ('ᱍ', 'ᱏ'), - ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲊ'), - ('Ა', 'Ჺ'), - ('Ჽ', 'Ჿ'), - ('ᳩ', 'ᳬ'), - ('ᳮ', 'ᳳ'), - ('ᳵ', 'ᳶ'), - ('ᳺ', 'ᳺ'), - ('ᴀ', 'ᶿ'), - ('Ḁ', 'ἕ'), - ('Ἐ', 'Ἕ'), - ('ἠ', 'ὅ'), - ('Ὀ', 'Ὅ'), - ('ὐ', 'ὗ'), - ('Ὑ', 'Ὑ'), - ('Ὓ', 'Ὓ'), - ('Ὕ', 'Ὕ'), - ('Ὗ', 'ώ'), - ('ᾀ', 'ᾴ'), - ('ᾶ', 'ᾼ'), - ('ι', 'ι'), - ('ῂ', 'ῄ'), - ('ῆ', 'ῌ'), - ('ῐ', 'ΐ'), - ('ῖ', 'Ί'), - ('ῠ', 'Ῥ'), - ('ῲ', 'ῴ'), - ('ῶ', 'ῼ'), - ('ⁱ', 'ⁱ'), - ('ⁿ', 'ⁿ'), - ('ₐ', 'ₜ'), - ('ℂ', 'ℂ'), - ('ℇ', 'ℇ'), - ('ℊ', 'ℓ'), - ('ℕ', 'ℕ'), - ('ℙ', 'ℝ'), - ('ℤ', 'ℤ'), - ('Ω', 'Ω'), - ('ℨ', 'ℨ'), - ('K', 'ℭ'), - ('ℯ', 'ℹ'), - ('ℼ', 'ℿ'), - ('ⅅ', 'ⅉ'), - ('ⅎ', 'ⅎ'), - ('Ⅰ', 'ↈ'), - ('Ⓐ', 'ⓩ'), - ('Ⰰ', 'ⳤ'), - ('Ⳬ', 'ⳮ'), - ('Ⳳ', 'ⳳ'), - ('ⴀ', 'ⴥ'), - ('ⴧ', 'ⴧ'), - ('ⴭ', 'ⴭ'), - ('ⴰ', 'ⵧ'), - ('ⵯ', 'ⵯ'), - ('ⶀ', 'ⶖ'), - ('ⶠ', 'ⶦ'), - ('ⶨ', 'ⶮ'), - ('ⶰ', 'ⶶ'), - ('ⶸ', 'ⶾ'), - ('ⷀ', 'ⷆ'), - ('ⷈ', 'ⷎ'), - ('ⷐ', 'ⷖ'), - ('ⷘ', 'ⷞ'), - ('ⸯ', 'ⸯ'), - ('々', '々'), - ('〻', '〼'), - ('ㄅ', 'ㄯ'), - ('ㄱ', 'ㆎ'), - ('ㆠ', 'ㆿ'), - ('ꀀ', 'ꒌ'), - ('ꓐ', 'ꓽ'), - ('ꔀ', 'ꘌ'), - ('ꘐ', 'ꘟ'), - ('ꘪ', 'ꘫ'), - ('Ꙁ', 'ꙮ'), - ('ꙿ', 'ꚝ'), - ('ꚠ', 'ꛯ'), - ('꜈', 'ꟍ'), - ('Ꟑ', 'ꟑ'), - ('ꟓ', 'ꟓ'), - ('ꟕ', 'Ƛ'), - ('ꟲ', 'ꠁ'), - ('ꠃ', 'ꠅ'), - ('ꠇ', 'ꠊ'), - ('ꠌ', 'ꠢ'), - ('ꡀ', 'ꡳ'), - ('ꢂ', 'ꢳ'), - ('ꣲ', 'ꣷ'), - ('ꣻ', 'ꣻ'), - ('ꣽ', 'ꣾ'), - ('ꤊ', 'ꤥ'), - ('ꤰ', 'ꥆ'), - ('ꥠ', 'ꥼ'), - ('ꦄ', 'ꦲ'), - ('ꧏ', 'ꧏ'), - ('ꨀ', 'ꨨ'), - ('ꩀ', 'ꩂ'), - ('ꩄ', 'ꩋ'), - ('ꫠ', 'ꫪ'), - ('ꫲ', 'ꫴ'), - ('ꬁ', 'ꬆ'), - ('ꬉ', 'ꬎ'), - ('ꬑ', 'ꬖ'), - ('ꬠ', 'ꬦ'), - ('ꬨ', 'ꬮ'), - ('ꬰ', 'ꭩ'), - ('ꭰ', 'ꯢ'), - ('가', '힣'), - ('ힰ', 'ퟆ'), - ('ퟋ', 'ퟻ'), - ('ff', 'st'), - ('ﬓ', 'ﬗ'), - ('ﭐ', 'ﮱ'), - ('ﯓ', 'ﴽ'), - ('ﵐ', 'ﶏ'), - ('ﶒ', 'ﷇ'), - ('ﷰ', 'ﷻ'), - ('ﹰ', 'ﹴ'), - ('ﹶ', 'ﻼ'), - ('A', 'Z'), - ('a', 'z'), - ('ᅠ', 'ᄒ'), - ('ᅡ', 'ᅦ'), - ('ᅧ', 'ᅬ'), - ('ᅭ', 'ᅲ'), - ('ᅳ', 'ᅵ'), - ('𐀀', '𐀋'), - ('𐀍', '𐀦'), - ('𐀨', '𐀺'), - ('𐀼', '𐀽'), - ('𐀿', '𐁍'), - ('𐁐', '𐁝'), - ('𐂀', '𐃺'), - ('𐅀', '𐅴'), - ('𐊀', '𐊜'), - ('𐊠', '𐋐'), - ('𐌀', '𐌟'), - ('𐌭', '𐍊'), - ('𐍐', '𐍵'), - ('𐎀', '𐎝'), - ('𐎠', '𐏃'), - ('𐏈', '𐏏'), - ('𐏑', '𐏕'), - ('𐐀', '𐒝'), - ('𐒰', '𐓓'), - ('𐓘', '𐓻'), - ('𐔀', '𐔧'), - ('𐔰', '𐕣'), - ('𐕰', '𐕺'), - ('𐕼', '𐖊'), - ('𐖌', '𐖒'), - ('𐖔', '𐖕'), - ('𐖗', '𐖡'), - ('𐖣', '𐖱'), - ('𐖳', '𐖹'), - ('𐖻', '𐖼'), - ('𐗀', '𐗳'), - ('𐘀', '𐜶'), - ('𐝀', '𐝕'), - ('𐝠', '𐝧'), - ('𐞀', '𐞅'), - ('𐞇', '𐞰'), - ('𐞲', '𐞺'), - ('𐠀', '𐠅'), - ('𐠈', '𐠈'), - ('𐠊', '𐠵'), - ('𐠷', '𐠸'), - ('𐠼', '𐠼'), - ('𐠿', '𐡕'), - ('𐡠', '𐡶'), - ('𐢀', '𐢞'), - ('𐣠', '𐣲'), - ('𐣴', '𐣵'), - ('𐤀', '𐤕'), - ('𐤠', '𐤹'), - ('𐦀', '𐦷'), - ('𐦾', '𐦿'), - ('𐨀', '𐨀'), - ('𐨐', '𐨓'), - ('𐨕', '𐨗'), - ('𐨙', '𐨵'), - ('𐩠', '𐩼'), - ('𐪀', '𐪜'), - ('𐫀', '𐫇'), - ('𐫉', '𐫤'), - ('𐬀', '𐬵'), - ('𐭀', '𐭕'), - ('𐭠', '𐭲'), - ('𐮀', '𐮑'), - ('𐰀', '𐱈'), - ('𐲀', '𐲲'), - ('𐳀', '𐳲'), - ('𐴀', '𐴣'), - ('𐵊', '𐵥'), - ('𐵯', '𐶅'), - ('𐺀', '𐺩'), - ('𐺰', '𐺱'), - ('𐻂', '𐻄'), - ('𐼀', '𐼜'), - ('𐼧', '𐼧'), - ('𐼰', '𐽅'), - ('𐽰', '𐾁'), - ('𐾰', '𐿄'), - ('𐿠', '𐿶'), - ('𑀃', '𑀷'), - ('𑁱', '𑁲'), - ('𑁵', '𑁵'), - ('𑂃', '𑂯'), - ('𑃐', '𑃨'), - ('𑄃', '𑄦'), - ('𑅄', '𑅄'), - ('𑅇', '𑅇'), - ('𑅐', '𑅲'), - ('𑅶', '𑅶'), - ('𑆃', '𑆲'), - ('𑇁', '𑇄'), - ('𑇚', '𑇚'), - ('𑇜', '𑇜'), - ('𑈀', '𑈑'), - ('𑈓', '𑈫'), - ('𑈿', '𑉀'), - ('𑊀', '𑊆'), - ('𑊈', '𑊈'), - ('𑊊', '𑊍'), - ('𑊏', '𑊝'), - ('𑊟', '𑊨'), - ('𑊰', '𑋞'), - ('𑌅', '𑌌'), - ('𑌏', '𑌐'), - ('𑌓', '𑌨'), - ('𑌪', '𑌰'), - ('𑌲', '𑌳'), - ('𑌵', '𑌹'), - ('𑌽', '𑌽'), - ('𑍐', '𑍐'), - ('𑍝', '𑍡'), - ('𑎀', '𑎉'), - ('𑎋', '𑎋'), - ('𑎎', '𑎎'), - ('𑎐', '𑎵'), - ('𑎷', '𑎷'), - ('𑏑', '𑏑'), - ('𑏓', '𑏓'), - ('𑐀', '𑐴'), - ('𑑇', '𑑊'), - ('𑑟', '𑑡'), - ('𑒀', '𑒯'), - ('𑓄', '𑓅'), - ('𑓇', '𑓇'), - ('𑖀', '𑖮'), - ('𑗘', '𑗛'), - ('𑘀', '𑘯'), - ('𑙄', '𑙄'), - ('𑚀', '𑚪'), - ('𑚸', '𑚸'), - ('𑠀', '𑠫'), - ('𑢠', '𑣟'), - ('𑣿', '𑤆'), - ('𑤉', '𑤉'), - ('𑤌', '𑤓'), - ('𑤕', '𑤖'), - ('𑤘', '𑤯'), - ('𑤿', '𑤿'), - ('𑥁', '𑥁'), - ('𑦠', '𑦧'), - ('𑦪', '𑧐'), - ('𑧡', '𑧡'), - ('𑧣', '𑧣'), - ('𑨀', '𑨀'), - ('𑨋', '𑨲'), - ('𑨺', '𑨺'), - ('𑩐', '𑩐'), - ('𑩜', '𑪉'), - ('𑪝', '𑪝'), - ('𑪰', '𑫸'), - ('𑯀', '𑯠'), - ('𑰀', '𑰈'), - ('𑰊', '𑰮'), - ('𑱀', '𑱀'), - ('𑱲', '𑲏'), - ('𑴀', '𑴆'), - ('𑴈', '𑴉'), - ('𑴋', '𑴰'), - ('𑵆', '𑵆'), - ('𑵠', '𑵥'), - ('𑵧', '𑵨'), - ('𑵪', '𑶉'), - ('𑶘', '𑶘'), - ('𑻠', '𑻲'), - ('𑼂', '𑼂'), - ('𑼄', '𑼐'), - ('𑼒', '𑼳'), - ('𑾰', '𑾰'), - ('𒀀', '𒎙'), - ('𒐀', '𒑮'), - ('𒒀', '𒕃'), - ('𒾐', '𒿰'), - ('𓀀', '𓐯'), - ('𓑁', '𓑆'), - ('𓑠', '𔏺'), - ('𔐀', '𔙆'), - ('𖄀', '𖄝'), - ('𖠀', '𖨸'), - ('𖩀', '𖩞'), - ('𖩰', '𖪾'), - ('𖫐', '𖫭'), - ('𖬀', '𖬯'), - ('𖭀', '𖭃'), - ('𖭣', '𖭷'), - ('𖭽', '𖮏'), - ('𖵀', '𖵬'), - ('𖹀', '𖹿'), - ('𖼀', '𖽊'), - ('𖽐', '𖽐'), - ('𖾓', '𖾟'), - ('𖿠', '𖿡'), - ('𖿣', '𖿣'), - ('𛰀', '𛱪'), - ('𛱰', '𛱼'), - ('𛲀', '𛲈'), - ('𛲐', '𛲙'), - ('𝐀', '𝑔'), - ('𝑖', '𝒜'), - ('𝒞', '𝒟'), - ('𝒢', '𝒢'), - ('𝒥', '𝒦'), - ('𝒩', '𝒬'), - ('𝒮', '𝒹'), - ('𝒻', '𝒻'), - ('𝒽', '𝓃'), - ('𝓅', '𝔅'), - ('𝔇', '𝔊'), - ('𝔍', '𝔔'), - ('𝔖', '𝔜'), - ('𝔞', '𝔹'), - ('𝔻', '𝔾'), - ('𝕀', '𝕄'), - ('𝕆', '𝕆'), - ('𝕊', '𝕐'), - ('𝕒', '𝚥'), - ('𝚨', '𝛀'), - ('𝛂', '𝛚'), - ('𝛜', '𝛺'), - ('𝛼', '𝜔'), - ('𝜖', '𝜴'), - ('𝜶', '𝝎'), - ('𝝐', '𝝮'), - ('𝝰', '𝞈'), - ('𝞊', '𝞨'), - ('𝞪', '𝟂'), - ('𝟄', '𝟋'), - ('𝼀', '𝼞'), - ('𝼥', '𝼪'), - ('𞀰', '𞁭'), - ('𞄀', '𞄬'), - ('𞄷', '𞄽'), - ('𞅎', '𞅎'), - ('𞊐', '𞊭'), - ('𞋀', '𞋫'), - ('𞓐', '𞓫'), - ('𞗐', '𞗭'), - ('𞗰', '𞗰'), - ('𞟠', '𞟦'), - ('𞟨', '𞟫'), - ('𞟭', '𞟮'), - ('𞟰', '𞟾'), - ('𞠀', '𞣄'), - ('𞤀', '𞥃'), - ('𞥋', '𞥋'), - ('𞸀', '𞸃'), - ('𞸅', '𞸟'), - ('𞸡', '𞸢'), - ('𞸤', '𞸤'), - ('𞸧', '𞸧'), - ('𞸩', '𞸲'), - ('𞸴', '𞸷'), - ('𞸹', '𞸹'), - ('𞸻', '𞸻'), - ('𞹂', '𞹂'), - ('𞹇', '𞹇'), - ('𞹉', '𞹉'), - ('𞹋', '𞹋'), - ('𞹍', '𞹏'), - ('𞹑', '𞹒'), - ('𞹔', '𞹔'), - ('𞹗', '𞹗'), - ('𞹙', '𞹙'), - ('𞹛', '𞹛'), - ('𞹝', '𞹝'), - ('𞹟', '𞹟'), - ('𞹡', '𞹢'), - ('𞹤', '𞹤'), - ('𞹧', '𞹪'), - ('𞹬', '𞹲'), - ('𞹴', '𞹷'), - ('𞹹', '𞹼'), - ('𞹾', '𞹾'), - ('𞺀', '𞺉'), - ('𞺋', '𞺛'), - ('𞺡', '𞺣'), - ('𞺥', '𞺩'), - ('𞺫', '𞺻'), - ('🄰', '🅉'), - ('🅐', '🅩'), - ('🅰', '🆉'), -]; - -pub const CR: &'static [(char, char)] = &[('\r', '\r')]; - -pub const DOUBLE_QUOTE: &'static [(char, char)] = &[('"', '"')]; - -pub const EXTEND: &'static [(char, char)] = &[ - ('\u{300}', '\u{36f}'), - ('\u{483}', '\u{489}'), - ('\u{591}', '\u{5bd}'), - ('\u{5bf}', '\u{5bf}'), - ('\u{5c1}', '\u{5c2}'), - ('\u{5c4}', '\u{5c5}'), - ('\u{5c7}', '\u{5c7}'), - ('\u{610}', '\u{61a}'), - ('\u{64b}', '\u{65f}'), - ('\u{670}', '\u{670}'), - ('\u{6d6}', '\u{6dc}'), - ('\u{6df}', '\u{6e4}'), - ('\u{6e7}', '\u{6e8}'), - ('\u{6ea}', '\u{6ed}'), - ('\u{711}', '\u{711}'), - ('\u{730}', '\u{74a}'), - ('\u{7a6}', '\u{7b0}'), - ('\u{7eb}', '\u{7f3}'), - ('\u{7fd}', '\u{7fd}'), - ('\u{816}', '\u{819}'), - ('\u{81b}', '\u{823}'), - ('\u{825}', '\u{827}'), - ('\u{829}', '\u{82d}'), - ('\u{859}', '\u{85b}'), - ('\u{897}', '\u{89f}'), - ('\u{8ca}', '\u{8e1}'), - ('\u{8e3}', 'ः'), - ('\u{93a}', '\u{93c}'), - ('ा', 'ॏ'), - ('\u{951}', '\u{957}'), - ('\u{962}', '\u{963}'), - ('\u{981}', 'ঃ'), - ('\u{9bc}', '\u{9bc}'), - ('\u{9be}', '\u{9c4}'), - ('ে', 'ৈ'), - ('ো', '\u{9cd}'), - ('\u{9d7}', '\u{9d7}'), - ('\u{9e2}', '\u{9e3}'), - ('\u{9fe}', '\u{9fe}'), - ('\u{a01}', 'ਃ'), - ('\u{a3c}', '\u{a3c}'), - ('ਾ', '\u{a42}'), - ('\u{a47}', '\u{a48}'), - ('\u{a4b}', '\u{a4d}'), - ('\u{a51}', '\u{a51}'), - ('\u{a70}', '\u{a71}'), - ('\u{a75}', '\u{a75}'), - ('\u{a81}', 'ઃ'), - ('\u{abc}', '\u{abc}'), - ('ા', '\u{ac5}'), - ('\u{ac7}', 'ૉ'), - ('ો', '\u{acd}'), - ('\u{ae2}', '\u{ae3}'), - ('\u{afa}', '\u{aff}'), - ('\u{b01}', 'ଃ'), - ('\u{b3c}', '\u{b3c}'), - ('\u{b3e}', '\u{b44}'), - ('େ', 'ୈ'), - ('ୋ', '\u{b4d}'), - ('\u{b55}', '\u{b57}'), - ('\u{b62}', '\u{b63}'), - ('\u{b82}', '\u{b82}'), - ('\u{bbe}', 'ூ'), - ('ெ', 'ை'), - ('ொ', '\u{bcd}'), - ('\u{bd7}', '\u{bd7}'), - ('\u{c00}', '\u{c04}'), - ('\u{c3c}', '\u{c3c}'), - ('\u{c3e}', 'ౄ'), - ('\u{c46}', '\u{c48}'), - ('\u{c4a}', '\u{c4d}'), - ('\u{c55}', '\u{c56}'), - ('\u{c62}', '\u{c63}'), - ('\u{c81}', 'ಃ'), - ('\u{cbc}', '\u{cbc}'), - ('ಾ', 'ೄ'), - ('\u{cc6}', '\u{cc8}'), - ('\u{cca}', '\u{ccd}'), - ('\u{cd5}', '\u{cd6}'), - ('\u{ce2}', '\u{ce3}'), - ('ೳ', 'ೳ'), - ('\u{d00}', 'ഃ'), - ('\u{d3b}', '\u{d3c}'), - ('\u{d3e}', '\u{d44}'), - ('െ', 'ൈ'), - ('ൊ', '\u{d4d}'), - ('\u{d57}', '\u{d57}'), - ('\u{d62}', '\u{d63}'), - ('\u{d81}', 'ඃ'), - ('\u{dca}', '\u{dca}'), - ('\u{dcf}', '\u{dd4}'), - ('\u{dd6}', '\u{dd6}'), - ('ෘ', '\u{ddf}'), - ('ෲ', 'ෳ'), - ('\u{e31}', '\u{e31}'), - ('\u{e34}', '\u{e3a}'), - ('\u{e47}', '\u{e4e}'), - ('\u{eb1}', '\u{eb1}'), - ('\u{eb4}', '\u{ebc}'), - ('\u{ec8}', '\u{ece}'), - ('\u{f18}', '\u{f19}'), - ('\u{f35}', '\u{f35}'), - ('\u{f37}', '\u{f37}'), - ('\u{f39}', '\u{f39}'), - ('༾', '༿'), - ('\u{f71}', '\u{f84}'), - ('\u{f86}', '\u{f87}'), - ('\u{f8d}', '\u{f97}'), - ('\u{f99}', '\u{fbc}'), - ('\u{fc6}', '\u{fc6}'), - ('ါ', '\u{103e}'), - ('ၖ', '\u{1059}'), - ('\u{105e}', '\u{1060}'), - ('ၢ', 'ၤ'), - ('ၧ', 'ၭ'), - ('\u{1071}', '\u{1074}'), - ('\u{1082}', '\u{108d}'), - ('ႏ', 'ႏ'), - ('ႚ', '\u{109d}'), - ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1715}'), - ('\u{1732}', '\u{1734}'), - ('\u{1752}', '\u{1753}'), - ('\u{1772}', '\u{1773}'), - ('\u{17b4}', '\u{17d3}'), - ('\u{17dd}', '\u{17dd}'), - ('\u{180b}', '\u{180d}'), - ('\u{180f}', '\u{180f}'), - ('\u{1885}', '\u{1886}'), - ('\u{18a9}', '\u{18a9}'), - ('\u{1920}', 'ᤫ'), - ('ᤰ', '\u{193b}'), - ('\u{1a17}', '\u{1a1b}'), - ('ᩕ', '\u{1a5e}'), - ('\u{1a60}', '\u{1a7c}'), - ('\u{1a7f}', '\u{1a7f}'), - ('\u{1ab0}', '\u{1ace}'), - ('\u{1b00}', 'ᬄ'), - ('\u{1b34}', '\u{1b44}'), - ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', 'ᮂ'), - ('ᮡ', '\u{1bad}'), - ('\u{1be6}', '\u{1bf3}'), - ('ᰤ', '\u{1c37}'), - ('\u{1cd0}', '\u{1cd2}'), - ('\u{1cd4}', '\u{1ce8}'), - ('\u{1ced}', '\u{1ced}'), - ('\u{1cf4}', '\u{1cf4}'), - ('᳷', '\u{1cf9}'), - ('\u{1dc0}', '\u{1dff}'), - ('\u{200c}', '\u{200c}'), - ('\u{20d0}', '\u{20f0}'), - ('\u{2cef}', '\u{2cf1}'), - ('\u{2d7f}', '\u{2d7f}'), - ('\u{2de0}', '\u{2dff}'), - ('\u{302a}', '\u{302f}'), - ('\u{3099}', '\u{309a}'), - ('\u{a66f}', '\u{a672}'), - ('\u{a674}', '\u{a67d}'), - ('\u{a69e}', '\u{a69f}'), - ('\u{a6f0}', '\u{a6f1}'), - ('\u{a802}', '\u{a802}'), - ('\u{a806}', '\u{a806}'), - ('\u{a80b}', '\u{a80b}'), - ('ꠣ', 'ꠧ'), - ('\u{a82c}', '\u{a82c}'), - ('ꢀ', 'ꢁ'), - ('ꢴ', '\u{a8c5}'), - ('\u{a8e0}', '\u{a8f1}'), - ('\u{a8ff}', '\u{a8ff}'), - ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '\u{a953}'), - ('\u{a980}', 'ꦃ'), - ('\u{a9b3}', '\u{a9c0}'), - ('\u{a9e5}', '\u{a9e5}'), - ('\u{aa29}', '\u{aa36}'), - ('\u{aa43}', '\u{aa43}'), - ('\u{aa4c}', 'ꩍ'), - ('ꩻ', 'ꩽ'), - ('\u{aab0}', '\u{aab0}'), - ('\u{aab2}', '\u{aab4}'), - ('\u{aab7}', '\u{aab8}'), - ('\u{aabe}', '\u{aabf}'), - ('\u{aac1}', '\u{aac1}'), - ('ꫫ', 'ꫯ'), - ('ꫵ', '\u{aaf6}'), - ('ꯣ', 'ꯪ'), - ('꯬', '\u{abed}'), - ('\u{fb1e}', '\u{fb1e}'), - ('\u{fe00}', '\u{fe0f}'), - ('\u{fe20}', '\u{fe2f}'), - ('\u{ff9e}', '\u{ff9f}'), - ('\u{101fd}', '\u{101fd}'), - ('\u{102e0}', '\u{102e0}'), - ('\u{10376}', '\u{1037a}'), - ('\u{10a01}', '\u{10a03}'), - ('\u{10a05}', '\u{10a06}'), - ('\u{10a0c}', '\u{10a0f}'), - ('\u{10a38}', '\u{10a3a}'), - ('\u{10a3f}', '\u{10a3f}'), - ('\u{10ae5}', '\u{10ae6}'), - ('\u{10d24}', '\u{10d27}'), - ('\u{10d69}', '\u{10d6d}'), - ('\u{10eab}', '\u{10eac}'), - ('\u{10efc}', '\u{10eff}'), - ('\u{10f46}', '\u{10f50}'), - ('\u{10f82}', '\u{10f85}'), - ('𑀀', '𑀂'), - ('\u{11038}', '\u{11046}'), - ('\u{11070}', '\u{11070}'), - ('\u{11073}', '\u{11074}'), - ('\u{1107f}', '𑂂'), - ('𑂰', '\u{110ba}'), - ('\u{110c2}', '\u{110c2}'), - ('\u{11100}', '\u{11102}'), - ('\u{11127}', '\u{11134}'), - ('𑅅', '𑅆'), - ('\u{11173}', '\u{11173}'), - ('\u{11180}', '𑆂'), - ('𑆳', '\u{111c0}'), - ('\u{111c9}', '\u{111cc}'), - ('𑇎', '\u{111cf}'), - ('𑈬', '\u{11237}'), - ('\u{1123e}', '\u{1123e}'), - ('\u{11241}', '\u{11241}'), - ('\u{112df}', '\u{112ea}'), - ('\u{11300}', '𑌃'), - ('\u{1133b}', '\u{1133c}'), - ('\u{1133e}', '𑍄'), - ('𑍇', '𑍈'), - ('𑍋', '\u{1134d}'), - ('\u{11357}', '\u{11357}'), - ('𑍢', '𑍣'), - ('\u{11366}', '\u{1136c}'), - ('\u{11370}', '\u{11374}'), - ('\u{113b8}', '\u{113c0}'), - ('\u{113c2}', '\u{113c2}'), - ('\u{113c5}', '\u{113c5}'), - ('\u{113c7}', '𑏊'), - ('𑏌', '\u{113d0}'), - ('\u{113d2}', '\u{113d2}'), - ('\u{113e1}', '\u{113e2}'), - ('𑐵', '\u{11446}'), - ('\u{1145e}', '\u{1145e}'), - ('\u{114b0}', '\u{114c3}'), - ('\u{115af}', '\u{115b5}'), - ('𑖸', '\u{115c0}'), - ('\u{115dc}', '\u{115dd}'), - ('𑘰', '\u{11640}'), - ('\u{116ab}', '\u{116b7}'), - ('\u{1171d}', '\u{1172b}'), - ('𑠬', '\u{1183a}'), - ('\u{11930}', '𑤵'), - ('𑤷', '𑤸'), - ('\u{1193b}', '\u{1193e}'), - ('𑥀', '𑥀'), - ('𑥂', '\u{11943}'), - ('𑧑', '\u{119d7}'), - ('\u{119da}', '\u{119e0}'), - ('𑧤', '𑧤'), - ('\u{11a01}', '\u{11a0a}'), - ('\u{11a33}', '𑨹'), - ('\u{11a3b}', '\u{11a3e}'), - ('\u{11a47}', '\u{11a47}'), - ('\u{11a51}', '\u{11a5b}'), - ('\u{11a8a}', '\u{11a99}'), - ('𑰯', '\u{11c36}'), - ('\u{11c38}', '\u{11c3f}'), - ('\u{11c92}', '\u{11ca7}'), - ('𑲩', '\u{11cb6}'), - ('\u{11d31}', '\u{11d36}'), - ('\u{11d3a}', '\u{11d3a}'), - ('\u{11d3c}', '\u{11d3d}'), - ('\u{11d3f}', '\u{11d45}'), - ('\u{11d47}', '\u{11d47}'), - ('𑶊', '𑶎'), - ('\u{11d90}', '\u{11d91}'), - ('𑶓', '\u{11d97}'), - ('\u{11ef3}', '𑻶'), - ('\u{11f00}', '\u{11f01}'), - ('𑼃', '𑼃'), - ('𑼴', '\u{11f3a}'), - ('𑼾', '\u{11f42}'), - ('\u{11f5a}', '\u{11f5a}'), - ('\u{13440}', '\u{13440}'), - ('\u{13447}', '\u{13455}'), - ('\u{1611e}', '\u{1612f}'), - ('\u{16af0}', '\u{16af4}'), - ('\u{16b30}', '\u{16b36}'), - ('\u{16f4f}', '\u{16f4f}'), - ('𖽑', '𖾇'), - ('\u{16f8f}', '\u{16f92}'), - ('\u{16fe4}', '\u{16fe4}'), - ('\u{16ff0}', '\u{16ff1}'), - ('\u{1bc9d}', '\u{1bc9e}'), - ('\u{1cf00}', '\u{1cf2d}'), - ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d165}', '\u{1d169}'), - ('\u{1d16d}', '\u{1d172}'), - ('\u{1d17b}', '\u{1d182}'), - ('\u{1d185}', '\u{1d18b}'), - ('\u{1d1aa}', '\u{1d1ad}'), - ('\u{1d242}', '\u{1d244}'), - ('\u{1da00}', '\u{1da36}'), - ('\u{1da3b}', '\u{1da6c}'), - ('\u{1da75}', '\u{1da75}'), - ('\u{1da84}', '\u{1da84}'), - ('\u{1da9b}', '\u{1da9f}'), - ('\u{1daa1}', '\u{1daaf}'), - ('\u{1e000}', '\u{1e006}'), - ('\u{1e008}', '\u{1e018}'), - ('\u{1e01b}', '\u{1e021}'), - ('\u{1e023}', '\u{1e024}'), - ('\u{1e026}', '\u{1e02a}'), - ('\u{1e08f}', '\u{1e08f}'), - ('\u{1e130}', '\u{1e136}'), - ('\u{1e2ae}', '\u{1e2ae}'), - ('\u{1e2ec}', '\u{1e2ef}'), - ('\u{1e4ec}', '\u{1e4ef}'), - ('\u{1e5ee}', '\u{1e5ef}'), - ('\u{1e8d0}', '\u{1e8d6}'), - ('\u{1e944}', '\u{1e94a}'), - ('🏻', '🏿'), - ('\u{e0020}', '\u{e007f}'), - ('\u{e0100}', '\u{e01ef}'), -]; - -pub const EXTENDNUMLET: &'static [(char, char)] = &[ - ('_', '_'), - ('\u{202f}', '\u{202f}'), - ('‿', '⁀'), - ('⁔', '⁔'), - ('︳', '︴'), - ('﹍', '﹏'), - ('_', '_'), -]; - -pub const FORMAT: &'static [(char, char)] = &[ - ('\u{ad}', '\u{ad}'), - ('\u{61c}', '\u{61c}'), - ('\u{180e}', '\u{180e}'), - ('\u{200e}', '\u{200f}'), - ('\u{202a}', '\u{202e}'), - ('\u{2060}', '\u{2064}'), - ('\u{2066}', '\u{206f}'), - ('\u{feff}', '\u{feff}'), - ('\u{fff9}', '\u{fffb}'), - ('\u{13430}', '\u{1343f}'), - ('\u{1bca0}', '\u{1bca3}'), - ('\u{1d173}', '\u{1d17a}'), - ('\u{e0001}', '\u{e0001}'), -]; - -pub const HEBREW_LETTER: &'static [(char, char)] = &[ - ('א', 'ת'), - ('ׯ', 'ײ'), - ('יִ', 'יִ'), - ('ײַ', 'ﬨ'), - ('שׁ', 'זּ'), - ('טּ', 'לּ'), - ('מּ', 'מּ'), - ('נּ', 'סּ'), - ('ףּ', 'פּ'), - ('צּ', 'ﭏ'), -]; - -pub const KATAKANA: &'static [(char, char)] = &[ - ('〱', '〵'), - ('゛', '゜'), - ('゠', 'ヺ'), - ('ー', 'ヿ'), - ('ㇰ', 'ㇿ'), - ('㋐', '㋾'), - ('㌀', '㍗'), - ('ヲ', 'ン'), - ('𚿰', '𚿳'), - ('𚿵', '𚿻'), - ('𚿽', '𚿾'), - ('𛀀', '𛀀'), - ('𛄠', '𛄢'), - ('𛅕', '𛅕'), - ('𛅤', '𛅧'), -]; - -pub const LF: &'static [(char, char)] = &[('\n', '\n')]; - -pub const MIDLETTER: &'static [(char, char)] = &[ - (':', ':'), - ('·', '·'), - ('·', '·'), - ('՟', '՟'), - ('״', '״'), - ('‧', '‧'), - ('︓', '︓'), - ('﹕', '﹕'), - (':', ':'), -]; - -pub const MIDNUM: &'static [(char, char)] = &[ - (',', ','), - (';', ';'), - (';', ';'), - ('։', '։'), - ('،', '؍'), - ('٬', '٬'), - ('߸', '߸'), - ('⁄', '⁄'), - ('﹐', '﹐'), - ('﹔', '﹔'), - (',', ','), - (';', ';'), -]; - -pub const MIDNUMLET: &'static [(char, char)] = &[ - ('.', '.'), - ('‘', '’'), - ('․', '․'), - ('﹒', '﹒'), - (''', '''), - ('.', '.'), -]; - -pub const NEWLINE: &'static [(char, char)] = - &[('\u{b}', '\u{c}'), ('\u{85}', '\u{85}'), ('\u{2028}', '\u{2029}')]; - -pub const NUMERIC: &'static [(char, char)] = &[ - ('0', '9'), - ('\u{600}', '\u{605}'), - ('٠', '٩'), - ('٫', '٫'), - ('\u{6dd}', '\u{6dd}'), - ('۰', '۹'), - ('߀', '߉'), - ('\u{890}', '\u{891}'), - ('\u{8e2}', '\u{8e2}'), - ('०', '९'), - ('০', '৯'), - ('੦', '੯'), - ('૦', '૯'), - ('୦', '୯'), - ('௦', '௯'), - ('౦', '౯'), - ('೦', '೯'), - ('൦', '൯'), - ('෦', '෯'), - ('๐', '๙'), - ('໐', '໙'), - ('༠', '༩'), - ('၀', '၉'), - ('႐', '႙'), - ('០', '៩'), - ('᠐', '᠙'), - ('᥆', '᥏'), - ('᧐', '᧚'), - ('᪀', '᪉'), - ('᪐', '᪙'), - ('᭐', '᭙'), - ('᮰', '᮹'), - ('᱀', '᱉'), - ('᱐', '᱙'), - ('꘠', '꘩'), - ('꣐', '꣙'), - ('꤀', '꤉'), - ('꧐', '꧙'), - ('꧰', '꧹'), - ('꩐', '꩙'), - ('꯰', '꯹'), - ('0', '9'), - ('𐒠', '𐒩'), - ('𐴰', '𐴹'), - ('𐵀', '𐵉'), - ('𑁦', '𑁯'), - ('\u{110bd}', '\u{110bd}'), - ('\u{110cd}', '\u{110cd}'), - ('𑃰', '𑃹'), - ('𑄶', '𑄿'), - ('𑇐', '𑇙'), - ('𑋰', '𑋹'), - ('𑑐', '𑑙'), - ('𑓐', '𑓙'), - ('𑙐', '𑙙'), - ('𑛀', '𑛉'), - ('𑛐', '𑛣'), - ('𑜰', '𑜹'), - ('𑣠', '𑣩'), - ('𑥐', '𑥙'), - ('𑯰', '𑯹'), - ('𑱐', '𑱙'), - ('𑵐', '𑵙'), - ('𑶠', '𑶩'), - ('𑽐', '𑽙'), - ('𖄰', '𖄹'), - ('𖩠', '𖩩'), - ('𖫀', '𖫉'), - ('𖭐', '𖭙'), - ('𖵰', '𖵹'), - ('𜳰', '𜳹'), - ('𝟎', '𝟿'), - ('𞅀', '𞅉'), - ('𞋰', '𞋹'), - ('𞓰', '𞓹'), - ('𞗱', '𞗺'), - ('𞥐', '𞥙'), - ('🯰', '🯹'), -]; - -pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('🇦', '🇿')]; - -pub const SINGLE_QUOTE: &'static [(char, char)] = &[('\'', '\'')]; - -pub const WSEGSPACE: &'static [(char, char)] = &[ - (' ', ' '), - ('\u{1680}', '\u{1680}'), - ('\u{2000}', '\u{2006}'), - ('\u{2008}', '\u{200a}'), - ('\u{205f}', '\u{205f}'), - ('\u{3000}', '\u{3000}'), -]; - -pub const ZWJ: &'static [(char, char)] = &[('\u{200d}', '\u{200d}')]; diff --git a/vendor/regex-syntax/src/utf8.rs b/vendor/regex-syntax/src/utf8.rs deleted file mode 100644 index 69d74945..00000000 --- a/vendor/regex-syntax/src/utf8.rs +++ /dev/null @@ -1,592 +0,0 @@ -/*! -Converts ranges of Unicode scalar values to equivalent ranges of UTF-8 bytes. - -This is sub-module is useful for constructing byte based automatons that need -to embed UTF-8 decoding. The most common use of this module is in conjunction -with the [`hir::ClassUnicodeRange`](crate::hir::ClassUnicodeRange) type. - -See the documentation on the `Utf8Sequences` iterator for more details and -an example. - -# Wait, what is this? - -This is simplest to explain with an example. Let's say you wanted to test -whether a particular byte sequence was a Cyrillic character. One possible -scalar value range is `[0400-04FF]`. The set of allowed bytes for this -range can be expressed as a sequence of byte ranges: - -```text -[D0-D3][80-BF] -``` - -This is simple enough: simply encode the boundaries, `0400` encodes to -`D0 80` and `04FF` encodes to `D3 BF`, and create ranges from each -corresponding pair of bytes: `D0` to `D3` and `80` to `BF`. - -However, what if you wanted to add the Cyrillic Supplementary characters to -your range? Your range might then become `[0400-052F]`. The same procedure -as above doesn't quite work because `052F` encodes to `D4 AF`. The byte ranges -you'd get from the previous transformation would be `[D0-D4][80-AF]`. However, -this isn't quite correct because this range doesn't capture many characters, -for example, `04FF` (because its last byte, `BF` isn't in the range `80-AF`). - -Instead, you need multiple sequences of byte ranges: - -```text -[D0-D3][80-BF] # matches codepoints 0400-04FF -[D4][80-AF] # matches codepoints 0500-052F -``` - -This gets even more complicated if you want bigger ranges, particularly if -they naively contain surrogate codepoints. For example, the sequence of byte -ranges for the basic multilingual plane (`[0000-FFFF]`) look like this: - -```text -[0-7F] -[C2-DF][80-BF] -[E0][A0-BF][80-BF] -[E1-EC][80-BF][80-BF] -[ED][80-9F][80-BF] -[EE-EF][80-BF][80-BF] -``` - -Note that the byte ranges above will *not* match any erroneous encoding of -UTF-8, including encodings of surrogate codepoints. - -And, of course, for all of Unicode (`[000000-10FFFF]`): - -```text -[0-7F] -[C2-DF][80-BF] -[E0][A0-BF][80-BF] -[E1-EC][80-BF][80-BF] -[ED][80-9F][80-BF] -[EE-EF][80-BF][80-BF] -[F0][90-BF][80-BF][80-BF] -[F1-F3][80-BF][80-BF][80-BF] -[F4][80-8F][80-BF][80-BF] -``` - -This module automates the process of creating these byte ranges from ranges of -Unicode scalar values. - -# Lineage - -I got the idea and general implementation strategy from Russ Cox in his -[article on regexps](https://web.archive.org/web/20160404141123/https://swtch.com/~rsc/regexp/regexp3.html) and RE2. -Russ Cox got it from Ken Thompson's `grep` (no source, folk lore?). -I also got the idea from -[Lucene](https://github.com/apache/lucene-solr/blob/ae93f4e7ac6a3908046391de35d4f50a0d3c59ca/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java), -which uses it for executing automata on their term index. -*/ - -use core::{char, fmt, iter::FusedIterator, slice}; - -use alloc::{vec, vec::Vec}; - -const MAX_UTF8_BYTES: usize = 4; - -/// Utf8Sequence represents a sequence of byte ranges. -/// -/// To match a Utf8Sequence, a candidate byte sequence must match each -/// successive range. -/// -/// For example, if there are two ranges, `[C2-DF][80-BF]`, then the byte -/// sequence `\xDD\x61` would not match because `0x61 < 0x80`. -#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)] -pub enum Utf8Sequence { - /// One byte range. - One(Utf8Range), - /// Two successive byte ranges. - Two([Utf8Range; 2]), - /// Three successive byte ranges. - Three([Utf8Range; 3]), - /// Four successive byte ranges. - Four([Utf8Range; 4]), -} - -impl Utf8Sequence { - /// Creates a new UTF-8 sequence from the encoded bytes of a scalar value - /// range. - /// - /// This assumes that `start` and `end` have the same length. - fn from_encoded_range(start: &[u8], end: &[u8]) -> Self { - assert_eq!(start.len(), end.len()); - match start.len() { - 2 => Utf8Sequence::Two([ - Utf8Range::new(start[0], end[0]), - Utf8Range::new(start[1], end[1]), - ]), - 3 => Utf8Sequence::Three([ - Utf8Range::new(start[0], end[0]), - Utf8Range::new(start[1], end[1]), - Utf8Range::new(start[2], end[2]), - ]), - 4 => Utf8Sequence::Four([ - Utf8Range::new(start[0], end[0]), - Utf8Range::new(start[1], end[1]), - Utf8Range::new(start[2], end[2]), - Utf8Range::new(start[3], end[3]), - ]), - n => unreachable!("invalid encoded length: {}", n), - } - } - - /// Returns the underlying sequence of byte ranges as a slice. - pub fn as_slice(&self) -> &[Utf8Range] { - use self::Utf8Sequence::*; - match *self { - One(ref r) => slice::from_ref(r), - Two(ref r) => &r[..], - Three(ref r) => &r[..], - Four(ref r) => &r[..], - } - } - - /// Returns the number of byte ranges in this sequence. - /// - /// The length is guaranteed to be in the closed interval `[1, 4]`. - pub fn len(&self) -> usize { - self.as_slice().len() - } - - /// Reverses the ranges in this sequence. - /// - /// For example, if this corresponds to the following sequence: - /// - /// ```text - /// [D0-D3][80-BF] - /// ``` - /// - /// Then after reversal, it will be - /// - /// ```text - /// [80-BF][D0-D3] - /// ``` - /// - /// This is useful when one is constructing a UTF-8 automaton to match - /// character classes in reverse. - pub fn reverse(&mut self) { - match *self { - Utf8Sequence::One(_) => {} - Utf8Sequence::Two(ref mut x) => x.reverse(), - Utf8Sequence::Three(ref mut x) => x.reverse(), - Utf8Sequence::Four(ref mut x) => x.reverse(), - } - } - - /// Returns true if and only if a prefix of `bytes` matches this sequence - /// of byte ranges. - pub fn matches(&self, bytes: &[u8]) -> bool { - if bytes.len() < self.len() { - return false; - } - for (&b, r) in bytes.iter().zip(self) { - if !r.matches(b) { - return false; - } - } - true - } -} - -impl<'a> IntoIterator for &'a Utf8Sequence { - type IntoIter = slice::Iter<'a, Utf8Range>; - type Item = &'a Utf8Range; - - fn into_iter(self) -> Self::IntoIter { - self.as_slice().iter() - } -} - -impl fmt::Debug for Utf8Sequence { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use self::Utf8Sequence::*; - match *self { - One(ref r) => write!(f, "{:?}", r), - Two(ref r) => write!(f, "{:?}{:?}", r[0], r[1]), - Three(ref r) => write!(f, "{:?}{:?}{:?}", r[0], r[1], r[2]), - Four(ref r) => { - write!(f, "{:?}{:?}{:?}{:?}", r[0], r[1], r[2], r[3]) - } - } - } -} - -/// A single inclusive range of UTF-8 bytes. -#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)] -pub struct Utf8Range { - /// Start of byte range (inclusive). - pub start: u8, - /// End of byte range (inclusive). - pub end: u8, -} - -impl Utf8Range { - fn new(start: u8, end: u8) -> Self { - Utf8Range { start, end } - } - - /// Returns true if and only if the given byte is in this range. - pub fn matches(&self, b: u8) -> bool { - self.start <= b && b <= self.end - } -} - -impl fmt::Debug for Utf8Range { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.start == self.end { - write!(f, "[{:X}]", self.start) - } else { - write!(f, "[{:X}-{:X}]", self.start, self.end) - } - } -} - -/// An iterator over ranges of matching UTF-8 byte sequences. -/// -/// The iteration represents an alternation of comprehensive byte sequences -/// that match precisely the set of UTF-8 encoded scalar values. -/// -/// A byte sequence corresponds to one of the scalar values in the range given -/// if and only if it completely matches exactly one of the sequences of byte -/// ranges produced by this iterator. -/// -/// Each sequence of byte ranges matches a unique set of bytes. That is, no two -/// sequences will match the same bytes. -/// -/// # Example -/// -/// This shows how to match an arbitrary byte sequence against a range of -/// scalar values. -/// -/// ```rust -/// use regex_syntax::utf8::{Utf8Sequences, Utf8Sequence}; -/// -/// fn matches(seqs: &[Utf8Sequence], bytes: &[u8]) -> bool { -/// for range in seqs { -/// if range.matches(bytes) { -/// return true; -/// } -/// } -/// false -/// } -/// -/// // Test the basic multilingual plane. -/// let seqs: Vec<_> = Utf8Sequences::new('\u{0}', '\u{FFFF}').collect(); -/// -/// // UTF-8 encoding of 'a'. -/// assert!(matches(&seqs, &[0x61])); -/// // UTF-8 encoding of '☃' (`\u{2603}`). -/// assert!(matches(&seqs, &[0xE2, 0x98, 0x83])); -/// // UTF-8 encoding of `\u{10348}` (outside the BMP). -/// assert!(!matches(&seqs, &[0xF0, 0x90, 0x8D, 0x88])); -/// // Tries to match against a UTF-8 encoding of a surrogate codepoint, -/// // which is invalid UTF-8, and therefore fails, despite the fact that -/// // the corresponding codepoint (0xD800) falls in the range given. -/// assert!(!matches(&seqs, &[0xED, 0xA0, 0x80])); -/// // And fails against plain old invalid UTF-8. -/// assert!(!matches(&seqs, &[0xFF, 0xFF])); -/// ``` -/// -/// If this example seems circuitous, that's because it is! It's meant to be -/// illustrative. In practice, you could just try to decode your byte sequence -/// and compare it with the scalar value range directly. However, this is not -/// always possible (for example, in a byte based automaton). -#[derive(Debug)] -pub struct Utf8Sequences { - range_stack: Vec, -} - -impl Utf8Sequences { - /// Create a new iterator over UTF-8 byte ranges for the scalar value range - /// given. - pub fn new(start: char, end: char) -> Self { - let range = - ScalarRange { start: u32::from(start), end: u32::from(end) }; - Utf8Sequences { range_stack: vec![range] } - } - - /// reset resets the scalar value range. - /// Any existing state is cleared, but resources may be reused. - /// - /// N.B. Benchmarks say that this method is dubious. - #[doc(hidden)] - pub fn reset(&mut self, start: char, end: char) { - self.range_stack.clear(); - self.push(u32::from(start), u32::from(end)); - } - - fn push(&mut self, start: u32, end: u32) { - self.range_stack.push(ScalarRange { start, end }); - } -} - -struct ScalarRange { - start: u32, - end: u32, -} - -impl fmt::Debug for ScalarRange { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "ScalarRange({:X}, {:X})", self.start, self.end) - } -} - -impl Iterator for Utf8Sequences { - type Item = Utf8Sequence; - - fn next(&mut self) -> Option { - 'TOP: while let Some(mut r) = self.range_stack.pop() { - 'INNER: loop { - if let Some((r1, r2)) = r.split() { - self.push(r2.start, r2.end); - r.start = r1.start; - r.end = r1.end; - continue 'INNER; - } - if !r.is_valid() { - continue 'TOP; - } - for i in 1..MAX_UTF8_BYTES { - let max = max_scalar_value(i); - if r.start <= max && max < r.end { - self.push(max + 1, r.end); - r.end = max; - continue 'INNER; - } - } - if let Some(ascii_range) = r.as_ascii() { - return Some(Utf8Sequence::One(ascii_range)); - } - for i in 1..MAX_UTF8_BYTES { - let m = (1 << (6 * i)) - 1; - if (r.start & !m) != (r.end & !m) { - if (r.start & m) != 0 { - self.push((r.start | m) + 1, r.end); - r.end = r.start | m; - continue 'INNER; - } - if (r.end & m) != m { - self.push(r.end & !m, r.end); - r.end = (r.end & !m) - 1; - continue 'INNER; - } - } - } - let mut start = [0; MAX_UTF8_BYTES]; - let mut end = [0; MAX_UTF8_BYTES]; - let n = r.encode(&mut start, &mut end); - return Some(Utf8Sequence::from_encoded_range( - &start[0..n], - &end[0..n], - )); - } - } - None - } -} - -impl FusedIterator for Utf8Sequences {} - -impl ScalarRange { - /// split splits this range if it overlaps with a surrogate codepoint. - /// - /// Either or both ranges may be invalid. - fn split(&self) -> Option<(ScalarRange, ScalarRange)> { - if self.start < 0xE000 && self.end > 0xD7FF { - Some(( - ScalarRange { start: self.start, end: 0xD7FF }, - ScalarRange { start: 0xE000, end: self.end }, - )) - } else { - None - } - } - - /// is_valid returns true if and only if start <= end. - fn is_valid(&self) -> bool { - self.start <= self.end - } - - /// as_ascii returns this range as a Utf8Range if and only if all scalar - /// values in this range can be encoded as a single byte. - fn as_ascii(&self) -> Option { - if self.is_ascii() { - let start = u8::try_from(self.start).unwrap(); - let end = u8::try_from(self.end).unwrap(); - Some(Utf8Range::new(start, end)) - } else { - None - } - } - - /// is_ascii returns true if the range is ASCII only (i.e., takes a single - /// byte to encode any scalar value). - fn is_ascii(&self) -> bool { - self.is_valid() && self.end <= 0x7f - } - - /// encode writes the UTF-8 encoding of the start and end of this range - /// to the corresponding destination slices, and returns the number of - /// bytes written. - /// - /// The slices should have room for at least `MAX_UTF8_BYTES`. - fn encode(&self, start: &mut [u8], end: &mut [u8]) -> usize { - let cs = char::from_u32(self.start).unwrap(); - let ce = char::from_u32(self.end).unwrap(); - let ss = cs.encode_utf8(start); - let se = ce.encode_utf8(end); - assert_eq!(ss.len(), se.len()); - ss.len() - } -} - -fn max_scalar_value(nbytes: usize) -> u32 { - match nbytes { - 1 => 0x007F, - 2 => 0x07FF, - 3 => 0xFFFF, - 4 => 0x0010_FFFF, - _ => unreachable!("invalid UTF-8 byte sequence size"), - } -} - -#[cfg(test)] -mod tests { - use core::char; - - use alloc::{vec, vec::Vec}; - - use crate::utf8::{Utf8Range, Utf8Sequences}; - - fn rutf8(s: u8, e: u8) -> Utf8Range { - Utf8Range::new(s, e) - } - - fn never_accepts_surrogate_codepoints(start: char, end: char) { - for cp in 0xD800..0xE000 { - let buf = encode_surrogate(cp); - for r in Utf8Sequences::new(start, end) { - if r.matches(&buf) { - panic!( - "Sequence ({:X}, {:X}) contains range {:?}, \ - which matches surrogate code point {:X} \ - with encoded bytes {:?}", - u32::from(start), - u32::from(end), - r, - cp, - buf, - ); - } - } - } - } - - #[test] - fn codepoints_no_surrogates() { - never_accepts_surrogate_codepoints('\u{0}', '\u{FFFF}'); - never_accepts_surrogate_codepoints('\u{0}', '\u{10FFFF}'); - never_accepts_surrogate_codepoints('\u{0}', '\u{10FFFE}'); - never_accepts_surrogate_codepoints('\u{80}', '\u{10FFFF}'); - never_accepts_surrogate_codepoints('\u{D7FF}', '\u{E000}'); - } - - #[test] - fn single_codepoint_one_sequence() { - // Tests that every range of scalar values that contains a single - // scalar value is recognized by one sequence of byte ranges. - for i in 0x0..=0x0010_FFFF { - let c = match char::from_u32(i) { - None => continue, - Some(c) => c, - }; - let seqs: Vec<_> = Utf8Sequences::new(c, c).collect(); - assert_eq!(seqs.len(), 1); - } - } - - #[test] - fn bmp() { - use crate::utf8::Utf8Sequence::*; - - let seqs = Utf8Sequences::new('\u{0}', '\u{FFFF}').collect::>(); - assert_eq!( - seqs, - vec![ - One(rutf8(0x0, 0x7F)), - Two([rutf8(0xC2, 0xDF), rutf8(0x80, 0xBF)]), - Three([ - rutf8(0xE0, 0xE0), - rutf8(0xA0, 0xBF), - rutf8(0x80, 0xBF) - ]), - Three([ - rutf8(0xE1, 0xEC), - rutf8(0x80, 0xBF), - rutf8(0x80, 0xBF) - ]), - Three([ - rutf8(0xED, 0xED), - rutf8(0x80, 0x9F), - rutf8(0x80, 0xBF) - ]), - Three([ - rutf8(0xEE, 0xEF), - rutf8(0x80, 0xBF), - rutf8(0x80, 0xBF) - ]), - ] - ); - } - - #[test] - fn reverse() { - use crate::utf8::Utf8Sequence::*; - - let mut s = One(rutf8(0xA, 0xB)); - s.reverse(); - assert_eq!(s.as_slice(), &[rutf8(0xA, 0xB)]); - - let mut s = Two([rutf8(0xA, 0xB), rutf8(0xB, 0xC)]); - s.reverse(); - assert_eq!(s.as_slice(), &[rutf8(0xB, 0xC), rutf8(0xA, 0xB)]); - - let mut s = Three([rutf8(0xA, 0xB), rutf8(0xB, 0xC), rutf8(0xC, 0xD)]); - s.reverse(); - assert_eq!( - s.as_slice(), - &[rutf8(0xC, 0xD), rutf8(0xB, 0xC), rutf8(0xA, 0xB)] - ); - - let mut s = Four([ - rutf8(0xA, 0xB), - rutf8(0xB, 0xC), - rutf8(0xC, 0xD), - rutf8(0xD, 0xE), - ]); - s.reverse(); - assert_eq!( - s.as_slice(), - &[ - rutf8(0xD, 0xE), - rutf8(0xC, 0xD), - rutf8(0xB, 0xC), - rutf8(0xA, 0xB) - ] - ); - } - - fn encode_surrogate(cp: u32) -> [u8; 3] { - const TAG_CONT: u8 = 0b1000_0000; - const TAG_THREE_B: u8 = 0b1110_0000; - - assert!(0xD800 <= cp && cp < 0xE000); - let mut dst = [0; 3]; - dst[0] = u8::try_from(cp >> 12 & 0x0F).unwrap() | TAG_THREE_B; - dst[1] = u8::try_from(cp >> 6 & 0x3F).unwrap() | TAG_CONT; - dst[2] = u8::try_from(cp & 0x3F).unwrap() | TAG_CONT; - dst - } -} diff --git a/vendor/regex-syntax/test b/vendor/regex-syntax/test deleted file mode 100755 index 8626c3bf..00000000 --- a/vendor/regex-syntax/test +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -set -e - -# cd to the directory containing this crate's Cargo.toml so that we don't need -# to pass --manifest-path to every `cargo` command. -cd "$(dirname "$0")" - -# This is a convenience script for running a broad swath of the syntax tests. -echo "===== DEFAULT FEATURES ===" -cargo test - -features=( - std - unicode - unicode-age - unicode-bool - unicode-case - unicode-gencat - unicode-perl - unicode-script - unicode-segment -) -for f in "${features[@]}"; do - echo "=== FEATURE: $f ===" - # We only run library tests because I couldn't figure out how to easily - # make doc tests run in 'no_std' mode. In particular, without the Error - # trait, using '?' in doc tests seems tricky. - cargo test --no-default-features --lib --features "$f" -done -- cgit v1.2.3