summaryrefslogtreecommitdiff
path: root/vendor/regex-syntax
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2025-07-15 16:37:08 -0600
committermo khan <mo@mokhan.ca>2025-07-17 16:30:22 -0600
commit45df4d0d9b577fecee798d672695fe24ff57fb1b (patch)
tree1b99bf645035b58e0d6db08c7a83521f41f7a75b /vendor/regex-syntax
parentf94f79608393d4ab127db63cc41668445ef6b243 (diff)
feat: migrate from Cedar to SpiceDB authorization system
This is a major architectural change that replaces the Cedar policy-based authorization system with SpiceDB's relation-based authorization. Key changes: - Migrate from Rust to Go implementation - Replace Cedar policies with SpiceDB schema and relationships - Switch from envoy `ext_authz` with Cedar to SpiceDB permission checks - Update build system and dependencies for Go ecosystem - Maintain Envoy integration for external authorization This change enables more flexible permission modeling through SpiceDB's Google Zanzibar inspired relation-based system, supporting complex hierarchical permissions that were difficult to express in Cedar. Breaking change: Existing Cedar policies and Rust-based configuration will no longer work and need to be migrated to SpiceDB schema.
Diffstat (limited to 'vendor/regex-syntax')
-rw-r--r--vendor/regex-syntax/.cargo-checksum.json1
-rw-r--r--vendor/regex-syntax/Cargo.toml74
-rw-r--r--vendor/regex-syntax/LICENSE-APACHE201
-rw-r--r--vendor/regex-syntax/LICENSE-MIT25
-rw-r--r--vendor/regex-syntax/README.md96
-rw-r--r--vendor/regex-syntax/benches/bench.rs63
-rw-r--r--vendor/regex-syntax/src/ast/mod.rs1809
-rw-r--r--vendor/regex-syntax/src/ast/parse.rs6377
-rw-r--r--vendor/regex-syntax/src/ast/print.rs577
-rw-r--r--vendor/regex-syntax/src/ast/visitor.rs522
-rw-r--r--vendor/regex-syntax/src/debug.rs107
-rw-r--r--vendor/regex-syntax/src/either.rs8
-rw-r--r--vendor/regex-syntax/src/error.rs311
-rw-r--r--vendor/regex-syntax/src/hir/interval.rs564
-rw-r--r--vendor/regex-syntax/src/hir/literal.rs3214
-rw-r--r--vendor/regex-syntax/src/hir/mod.rs3873
-rw-r--r--vendor/regex-syntax/src/hir/print.rs608
-rw-r--r--vendor/regex-syntax/src/hir/translate.rs3744
-rw-r--r--vendor/regex-syntax/src/hir/visitor.rs215
-rw-r--r--vendor/regex-syntax/src/lib.rs431
-rw-r--r--vendor/regex-syntax/src/parser.rs254
-rw-r--r--vendor/regex-syntax/src/rank.rs258
-rw-r--r--vendor/regex-syntax/src/unicode.rs1041
-rw-r--r--vendor/regex-syntax/src/unicode_tables/LICENSE-UNICODE57
-rw-r--r--vendor/regex-syntax/src/unicode_tables/age.rs1846
-rw-r--r--vendor/regex-syntax/src/unicode_tables/case_folding_simple.rs2948
-rw-r--r--vendor/regex-syntax/src/unicode_tables/general_category.rs6717
-rw-r--r--vendor/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs1420
-rw-r--r--vendor/regex-syntax/src/unicode_tables/mod.rs57
-rw-r--r--vendor/regex-syntax/src/unicode_tables/perl_decimal.rs84
-rw-r--r--vendor/regex-syntax/src/unicode_tables/perl_space.rs23
-rw-r--r--vendor/regex-syntax/src/unicode_tables/perl_word.rs806
-rw-r--r--vendor/regex-syntax/src/unicode_tables/property_bool.rs12095
-rw-r--r--vendor/regex-syntax/src/unicode_tables/property_names.rs281
-rw-r--r--vendor/regex-syntax/src/unicode_tables/property_values.rs956
-rw-r--r--vendor/regex-syntax/src/unicode_tables/script.rs1300
-rw-r--r--vendor/regex-syntax/src/unicode_tables/script_extension.rs1718
-rw-r--r--vendor/regex-syntax/src/unicode_tables/sentence_break.rs2530
-rw-r--r--vendor/regex-syntax/src/unicode_tables/word_break.rs1152
-rw-r--r--vendor/regex-syntax/src/utf8.rs592
-rwxr-xr-xvendor/regex-syntax/test30
41 files changed, 0 insertions, 58985 deletions
diff --git a/vendor/regex-syntax/.cargo-checksum.json b/vendor/regex-syntax/.cargo-checksum.json
deleted file mode 100644
index 3b7b712f..00000000
--- a/vendor/regex-syntax/.cargo-checksum.json
+++ /dev/null
@@ -1 +0,0 @@
-{"files":{"Cargo.toml":"362cfcf492ed93948ca374491d8dc3b688da0f90d00c2bb9a3892ec2ac27e276","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"b2484aa7e66fb92d1378e9a7ce7605af18f77cb12c179866eaf92ba28cfec1d9","benches/bench.rs":"d2b6ae5b939abd6093064f144b981b7739d7f474ec0698a1268052fc92406635","src/ast/mod.rs":"21cda9fe9e1810b285cb8f8a2aa5eeaff6c38e256ceed036b68c66fb6b0124d6","src/ast/parse.rs":"89a3701a9a95fea692be925e97b7dcfc5af1ac41f20e8f054eafaeb391e8dec2","src/ast/print.rs":"99cb69ece252ef31e0be177fb3364797eb30b785f936532b8dcd8106e7be0738","src/ast/visitor.rs":"f0fdf758801fe70e6b299b73ab63196e814af95ef6eccad7ef4f72075743fcf6","src/debug.rs":"7a16cca02be9715fdc8c26a32279465774623cd12fab1ec59ac25a6e3047817f","src/either.rs":"1758e3edd056884eccadd995708d1e374ba9aa65846bd0e13b1aae852607c560","src/error.rs":"01a67e3407b0d0d869119363e47a94d92158834bfe5936366c2e3f6f4ed13f36","src/hir/interval.rs":"74d75837d24ab9a3cff33b375b70694cdd3b9a4610c799137533f365755ba604","src/hir/literal.rs":"6a8108b8919fbfd9ab93072846124c51d2998489810fcd6e7a89fdccc45833e0","src/hir/mod.rs":"599ceb4921f2345a7d01d2390188fad13f236b98efe7a38c9beb9a0ce5c4ebad","src/hir/print.rs":"ad51c515c933bfd67d307ba3d7e6ac59c9c5903b4f393a9f9a4785c92b88348d","src/hir/translate.rs":"6129a12a686a6ec8965cdeb9f889640891da6aae75995606080fa88d4dd1a602","src/hir/visitor.rs":"71ca9c93aa48a5ed445399659fa6455093a1bbd9ef44b66bc7095c1b08b2ec1f","src/lib.rs":"5ae457d402e49443bdb23b71353693dd3b0d263b57a6eeb9eb5b5dae5c901bdd","src/parser.rs":"6b2f4f27e3331a01a25b87c89368dd2e54396bd425dac57941f9c1ebfd238ac8","src/rank.rs":"ff3d58b0cc5ffa69e2e8c56fc7d9ef41dd399d59a639a253a51551b858cb5bbd","src/unicode.rs":"b2084dcbd4331501b9a895fd7e7575d93ff96eb661c6e6adbc8c66bb72685cde","src/unicode_tables/LICENSE-UNICODE":"74db5baf44a41b1000312c673544b3374e4198af5605c7f9080a402cec42cfa3","src/unicode_tables/age.rs":"71b7cf52acdb4aa98b44145303b8efbfa94913235493521941ef1e0092a0ffe2","src/unicode_tables/case_folding_simple.rs":"7622c7f7f03ac0dc2f2bcd51c81a217d64de0cc912f62f1add5f676603a02456","src/unicode_tables/general_category.rs":"9488e3721f7c2ae20e1b77fcff9a59b4ed8f22954b8645ea6d8592eac1856423","src/unicode_tables/grapheme_cluster_break.rs":"0dd9d66bad598f4ec3451b6699f05c17c52079e37d463baf6385bbe51aa218f1","src/unicode_tables/mod.rs":"26c837099cd934c8062e24bc9a0aaecf15fe1de03f9c6da3f3e1e5ac3ca24bee","src/unicode_tables/perl_decimal.rs":"6a59143db81a0bcaf0e8d0af265e711d1a6472e1f091ee9ee4377da5d5d0cd1f","src/unicode_tables/perl_space.rs":"ec9bb22ed7e99feef292249c7e6f4673ee0af9635d4d158f93923494c14cd5ed","src/unicode_tables/perl_word.rs":"30f073baae28ea34c373c7778c00f20c1621c3e644404eff031f7d1cc8e9c9e2","src/unicode_tables/property_bool.rs":"66cf5bd2a1438bf9694152f077a285cf014fbd50b9dd63a97233b2ea61d64962","src/unicode_tables/property_names.rs":"8c93985d1bcb01735667a3c4cb92f7e260d267326bde9d7f048bc77cd7e07855","src/unicode_tables/property_values.rs":"ef9131ce0a575c7327ec6d466aafd8b7c25600d80c232b5a4110bbf0a5a59136","src/unicode_tables/script.rs":"41bd424f1e3a03290cf4995ced678dcf24c94b38c905c62f6819bf67e098a2ec","src/unicode_tables/script_extension.rs":"a314099ddbf50a07fe350bb0835bf2fe494ed5ad278b30e171e21506eb557906","src/unicode_tables/sentence_break.rs":"be84fbe8c5c67e761b16fe6c27f16664dbb145357835cd6b92bc2a4a4c52ee79","src/unicode_tables/word_break.rs":"c551681ad49ec28c7ae32bab1371945821c736ca8f0de410cb89f28066ec2ecf","src/utf8.rs":"33657f668361b6648d74c92d3d59eab97e3747d785760f47e4d71c13af07bfba","test":"c7de5fbc0010d9b5b758cd49956375a64b88601c068167fd366808950257f108"},"package":"2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"} \ No newline at end of file
diff --git a/vendor/regex-syntax/Cargo.toml b/vendor/regex-syntax/Cargo.toml
deleted file mode 100644
index 6b93357c..00000000
--- a/vendor/regex-syntax/Cargo.toml
+++ /dev/null
@@ -1,74 +0,0 @@
-# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
-#
-# When uploading crates to the registry Cargo will automatically
-# "normalize" Cargo.toml files for maximal compatibility
-# with all versions of Cargo and also rewrite `path` dependencies
-# to registry (e.g., crates.io) dependencies.
-#
-# If you are reading this file be aware that the original Cargo.toml
-# will likely look very different (and much more reasonable).
-# See Cargo.toml.orig for the original contents.
-
-[package]
-edition = "2021"
-rust-version = "1.65"
-name = "regex-syntax"
-version = "0.8.5"
-authors = [
- "The Rust Project Developers",
- "Andrew Gallant <jamslam@gmail.com>",
-]
-build = false
-autobins = false
-autoexamples = false
-autotests = false
-autobenches = false
-description = "A regular expression parser."
-documentation = "https://docs.rs/regex-syntax"
-readme = "README.md"
-license = "MIT OR Apache-2.0"
-repository = "https://github.com/rust-lang/regex/tree/master/regex-syntax"
-
-[package.metadata.docs.rs]
-all-features = true
-rustdoc-args = [
- "--cfg",
- "docsrs",
-]
-
-[lib]
-name = "regex_syntax"
-path = "src/lib.rs"
-
-[[bench]]
-name = "bench"
-path = "benches/bench.rs"
-
-[dependencies.arbitrary]
-version = "1.3.0"
-features = ["derive"]
-optional = true
-
-[features]
-arbitrary = ["dep:arbitrary"]
-default = [
- "std",
- "unicode",
-]
-std = []
-unicode = [
- "unicode-age",
- "unicode-bool",
- "unicode-case",
- "unicode-gencat",
- "unicode-perl",
- "unicode-script",
- "unicode-segment",
-]
-unicode-age = []
-unicode-bool = []
-unicode-case = []
-unicode-gencat = []
-unicode-perl = []
-unicode-script = []
-unicode-segment = []
diff --git a/vendor/regex-syntax/LICENSE-APACHE b/vendor/regex-syntax/LICENSE-APACHE
deleted file mode 100644
index 16fe87b0..00000000
--- a/vendor/regex-syntax/LICENSE-APACHE
+++ /dev/null
@@ -1,201 +0,0 @@
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
-TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
-2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
-3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
-4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
-5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
-6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
-7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
-8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
-9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
-END OF TERMS AND CONDITIONS
-
-APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
-Copyright [yyyy] [name of copyright owner]
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
diff --git a/vendor/regex-syntax/LICENSE-MIT b/vendor/regex-syntax/LICENSE-MIT
deleted file mode 100644
index 39d4bdb5..00000000
--- a/vendor/regex-syntax/LICENSE-MIT
+++ /dev/null
@@ -1,25 +0,0 @@
-Copyright (c) 2014 The Rust Project Developers
-
-Permission is hereby granted, free of charge, to any
-person obtaining a copy of this software and associated
-documentation files (the "Software"), to deal in the
-Software without restriction, including without
-limitation the rights to use, copy, modify, merge,
-publish, distribute, sublicense, and/or sell copies of
-the Software, and to permit persons to whom the Software
-is furnished to do so, subject to the following
-conditions:
-
-The above copyright notice and this permission notice
-shall be included in all copies or substantial portions
-of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
-ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
-PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
-SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
diff --git a/vendor/regex-syntax/README.md b/vendor/regex-syntax/README.md
deleted file mode 100644
index 529513b0..00000000
--- a/vendor/regex-syntax/README.md
+++ /dev/null
@@ -1,96 +0,0 @@
-regex-syntax
-============
-This crate provides a robust regular expression parser.
-
-[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions)
-[![Crates.io](https://img.shields.io/crates/v/regex-syntax.svg)](https://crates.io/crates/regex-syntax)
-
-
-### Documentation
-
-https://docs.rs/regex-syntax
-
-
-### Overview
-
-There are two primary types exported by this crate: `Ast` and `Hir`. The former
-is a faithful abstract syntax of a regular expression, and can convert regular
-expressions back to their concrete syntax while mostly preserving its original
-form. The latter type is a high level intermediate representation of a regular
-expression that is amenable to analysis and compilation into byte codes or
-automata. An `Hir` achieves this by drastically simplifying the syntactic
-structure of the regular expression. While an `Hir` can be converted back to
-its equivalent concrete syntax, the result is unlikely to resemble the original
-concrete syntax that produced the `Hir`.
-
-
-### Example
-
-This example shows how to parse a pattern string into its HIR:
-
-```rust
-use regex_syntax::{hir::Hir, parse};
-
-let hir = parse("a|b").unwrap();
-assert_eq!(hir, Hir::alternation(vec![
- Hir::literal("a".as_bytes()),
- Hir::literal("b".as_bytes()),
-]));
-```
-
-
-### Safety
-
-This crate has no `unsafe` code and sets `forbid(unsafe_code)`. While it's
-possible this crate could use `unsafe` code in the future, the standard
-for doing so is extremely high. In general, most code in this crate is not
-performance critical, since it tends to be dwarfed by the time it takes to
-compile a regular expression into an automaton. Therefore, there is little need
-for extreme optimization, and therefore, use of `unsafe`.
-
-The standard for using `unsafe` in this crate is extremely high because this
-crate is intended to be reasonably safe to use with user supplied regular
-expressions. Therefore, while there may be bugs in the regex parser itself,
-they should _never_ result in memory unsafety unless there is either a bug
-in the compiler or the standard library. (Since `regex-syntax` has zero
-dependencies.)
-
-
-### Crate features
-
-By default, this crate bundles a fairly large amount of Unicode data tables
-(a source size of ~750KB). Because of their large size, one can disable some
-or all of these data tables. If a regular expression attempts to use Unicode
-data that is not available, then an error will occur when translating the `Ast`
-to the `Hir`.
-
-The full set of features one can disable are
-[in the "Crate features" section of the documentation](https://docs.rs/regex-syntax/*/#crate-features).
-
-
-### Testing
-
-Simply running `cargo test` will give you very good coverage. However, because
-of the large number of features exposed by this crate, a `test` script is
-included in this directory which will test several feature combinations. This
-is the same script that is run in CI.
-
-
-### Motivation
-
-The primary purpose of this crate is to provide the parser used by `regex`.
-Specifically, this crate is treated as an implementation detail of the `regex`,
-and is primarily developed for the needs of `regex`.
-
-Since this crate is an implementation detail of `regex`, it may experience
-breaking change releases at a different cadence from `regex`. This is only
-possible because this crate is _not_ a public dependency of `regex`.
-
-Another consequence of this de-coupling is that there is no direct way to
-compile a `regex::Regex` from a `regex_syntax::hir::Hir`. Instead, one must
-first convert the `Hir` to a string (via its `std::fmt::Display`) and then
-compile that via `Regex::new`. While this does repeat some work, compilation
-typically takes much longer than parsing.
-
-Stated differently, the coupling between `regex` and `regex-syntax` exists only
-at the level of the concrete syntax.
diff --git a/vendor/regex-syntax/benches/bench.rs b/vendor/regex-syntax/benches/bench.rs
deleted file mode 100644
index d4703d4f..00000000
--- a/vendor/regex-syntax/benches/bench.rs
+++ /dev/null
@@ -1,63 +0,0 @@
-#![feature(test)]
-
-extern crate test;
-
-use regex_syntax::Parser;
-use test::Bencher;
-
-#[bench]
-fn parse_simple1(b: &mut Bencher) {
- b.iter(|| {
- let re = r"^bc(d|e)*$";
- Parser::new().parse(re).unwrap()
- });
-}
-
-#[bench]
-fn parse_simple2(b: &mut Bencher) {
- b.iter(|| {
- let re = r"'[a-zA-Z_][a-zA-Z0-9_]*(')\b";
- Parser::new().parse(re).unwrap()
- });
-}
-
-#[bench]
-fn parse_small1(b: &mut Bencher) {
- b.iter(|| {
- let re = r"\p{L}|\p{N}|\s|.|\d";
- Parser::new().parse(re).unwrap()
- });
-}
-
-#[bench]
-fn parse_medium1(b: &mut Bencher) {
- b.iter(|| {
- let re = r"\pL\p{Greek}\p{Hiragana}\p{Alphabetic}\p{Hebrew}\p{Arabic}";
- Parser::new().parse(re).unwrap()
- });
-}
-
-#[bench]
-fn parse_medium2(b: &mut Bencher) {
- b.iter(|| {
- let re = r"\s\S\w\W\d\D";
- Parser::new().parse(re).unwrap()
- });
-}
-
-#[bench]
-fn parse_medium3(b: &mut Bencher) {
- b.iter(|| {
- let re =
- r"\p{age:3.2}\p{hira}\p{scx:hira}\p{alphabetic}\p{sc:Greek}\pL";
- Parser::new().parse(re).unwrap()
- });
-}
-
-#[bench]
-fn parse_huge(b: &mut Bencher) {
- b.iter(|| {
- let re = r"\p{L}{100}";
- Parser::new().parse(re).unwrap()
- });
-}
diff --git a/vendor/regex-syntax/src/ast/mod.rs b/vendor/regex-syntax/src/ast/mod.rs
deleted file mode 100644
index ce79a89a..00000000
--- a/vendor/regex-syntax/src/ast/mod.rs
+++ /dev/null
@@ -1,1809 +0,0 @@
-/*!
-Defines an abstract syntax for regular expressions.
-*/
-
-use core::cmp::Ordering;
-
-use alloc::{boxed::Box, string::String, vec, vec::Vec};
-
-pub use crate::ast::visitor::{visit, Visitor};
-
-pub mod parse;
-pub mod print;
-mod visitor;
-
-/// An error that occurred while parsing a regular expression into an abstract
-/// syntax tree.
-///
-/// Note that not all ASTs represents a valid regular expression. For example,
-/// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a
-/// valid Unicode property name. That particular error is reported when
-/// translating an AST to the high-level intermediate representation (`HIR`).
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct Error {
- /// The kind of error.
- kind: ErrorKind,
- /// The original pattern that the parser generated the error from. Every
- /// span in an error is a valid range into this string.
- pattern: String,
- /// The span of this error.
- span: Span,
-}
-
-impl Error {
- /// Return the type of this error.
- pub fn kind(&self) -> &ErrorKind {
- &self.kind
- }
-
- /// The original pattern string in which this error occurred.
- ///
- /// Every span reported by this error is reported in terms of this string.
- pub fn pattern(&self) -> &str {
- &self.pattern
- }
-
- /// Return the span at which this error occurred.
- pub fn span(&self) -> &Span {
- &self.span
- }
-
- /// Return an auxiliary span. This span exists only for some errors that
- /// benefit from being able to point to two locations in the original
- /// regular expression. For example, "duplicate" errors will have the
- /// main error position set to the duplicate occurrence while its
- /// auxiliary span will be set to the initial occurrence.
- pub fn auxiliary_span(&self) -> Option<&Span> {
- use self::ErrorKind::*;
- match self.kind {
- FlagDuplicate { ref original } => Some(original),
- FlagRepeatedNegation { ref original, .. } => Some(original),
- GroupNameDuplicate { ref original, .. } => Some(original),
- _ => None,
- }
- }
-}
-
-/// The type of an error that occurred while building an AST.
-///
-/// This error type is marked as `non_exhaustive`. This means that adding a
-/// new variant is not considered a breaking change.
-#[non_exhaustive]
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum ErrorKind {
- /// The capturing group limit was exceeded.
- ///
- /// Note that this represents a limit on the total number of capturing
- /// groups in a regex and not necessarily the number of nested capturing
- /// groups. That is, the nest limit can be low and it is still possible for
- /// this error to occur.
- CaptureLimitExceeded,
- /// An invalid escape sequence was found in a character class set.
- ClassEscapeInvalid,
- /// An invalid character class range was found. An invalid range is any
- /// range where the start is greater than the end.
- ClassRangeInvalid,
- /// An invalid range boundary was found in a character class. Range
- /// boundaries must be a single literal codepoint, but this error indicates
- /// that something else was found, such as a nested class.
- ClassRangeLiteral,
- /// An opening `[` was found with no corresponding closing `]`.
- ClassUnclosed,
- /// Note that this error variant is no longer used. Namely, a decimal
- /// number can only appear as a repetition quantifier. When the number
- /// in a repetition quantifier is empty, then it gets its own specialized
- /// error, `RepetitionCountDecimalEmpty`.
- DecimalEmpty,
- /// An invalid decimal number was given where one was expected.
- DecimalInvalid,
- /// A bracketed hex literal was empty.
- EscapeHexEmpty,
- /// A bracketed hex literal did not correspond to a Unicode scalar value.
- EscapeHexInvalid,
- /// An invalid hexadecimal digit was found.
- EscapeHexInvalidDigit,
- /// EOF was found before an escape sequence was completed.
- EscapeUnexpectedEof,
- /// An unrecognized escape sequence.
- EscapeUnrecognized,
- /// A dangling negation was used when setting flags, e.g., `i-`.
- FlagDanglingNegation,
- /// A flag was used twice, e.g., `i-i`.
- FlagDuplicate {
- /// The position of the original flag. The error position
- /// points to the duplicate flag.
- original: Span,
- },
- /// The negation operator was used twice, e.g., `-i-s`.
- FlagRepeatedNegation {
- /// The position of the original negation operator. The error position
- /// points to the duplicate negation operator.
- original: Span,
- },
- /// Expected a flag but got EOF, e.g., `(?`.
- FlagUnexpectedEof,
- /// Unrecognized flag, e.g., `a`.
- FlagUnrecognized,
- /// A duplicate capture name was found.
- GroupNameDuplicate {
- /// The position of the initial occurrence of the capture name. The
- /// error position itself points to the duplicate occurrence.
- original: Span,
- },
- /// A capture group name is empty, e.g., `(?P<>abc)`.
- GroupNameEmpty,
- /// An invalid character was seen for a capture group name. This includes
- /// errors where the first character is a digit (even though subsequent
- /// characters are allowed to be digits).
- GroupNameInvalid,
- /// A closing `>` could not be found for a capture group name.
- GroupNameUnexpectedEof,
- /// An unclosed group, e.g., `(ab`.
- ///
- /// The span of this error corresponds to the unclosed parenthesis.
- GroupUnclosed,
- /// An unopened group, e.g., `ab)`.
- GroupUnopened,
- /// The nest limit was exceeded. The limit stored here is the limit
- /// configured in the parser.
- NestLimitExceeded(u32),
- /// The range provided in a counted repetition operator is invalid. The
- /// range is invalid if the start is greater than the end.
- RepetitionCountInvalid,
- /// An opening `{` was not followed by a valid decimal value.
- /// For example, `x{}` or `x{]}` would fail.
- RepetitionCountDecimalEmpty,
- /// An opening `{` was found with no corresponding closing `}`.
- RepetitionCountUnclosed,
- /// A repetition operator was applied to a missing sub-expression. This
- /// occurs, for example, in the regex consisting of just a `*` or even
- /// `(?i)*`. It is, however, possible to create a repetition operating on
- /// an empty sub-expression. For example, `()*` is still considered valid.
- RepetitionMissing,
- /// The special word boundary syntax, `\b{something}`, was used, but
- /// either EOF without `}` was seen, or an invalid character in the
- /// braces was seen.
- SpecialWordBoundaryUnclosed,
- /// The special word boundary syntax, `\b{something}`, was used, but
- /// `something` was not recognized as a valid word boundary kind.
- SpecialWordBoundaryUnrecognized,
- /// The syntax `\b{` was observed, but afterwards the end of the pattern
- /// was observed without being able to tell whether it was meant to be a
- /// bounded repetition on the `\b` or the beginning of a special word
- /// boundary assertion.
- SpecialWordOrRepetitionUnexpectedEof,
- /// The Unicode class is not valid. This typically occurs when a `\p` is
- /// followed by something other than a `{`.
- UnicodeClassInvalid,
- /// When octal support is disabled, this error is produced when an octal
- /// escape is used. The octal escape is assumed to be an invocation of
- /// a backreference, which is the common case.
- UnsupportedBackreference,
- /// When syntax similar to PCRE's look-around is used, this error is
- /// returned. Some example syntaxes that are rejected include, but are
- /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
- /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
- /// error is used to improve the user experience.
- UnsupportedLookAround,
-}
-
-#[cfg(feature = "std")]
-impl std::error::Error for Error {}
-
-impl core::fmt::Display for Error {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- crate::error::Formatter::from(self).fmt(f)
- }
-}
-
-impl core::fmt::Display for ErrorKind {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- use self::ErrorKind::*;
- match *self {
- CaptureLimitExceeded => write!(
- f,
- "exceeded the maximum number of \
- capturing groups ({})",
- u32::MAX
- ),
- ClassEscapeInvalid => {
- write!(f, "invalid escape sequence found in character class")
- }
- ClassRangeInvalid => write!(
- f,
- "invalid character class range, \
- the start must be <= the end"
- ),
- ClassRangeLiteral => {
- write!(f, "invalid range boundary, must be a literal")
- }
- ClassUnclosed => write!(f, "unclosed character class"),
- DecimalEmpty => write!(f, "decimal literal empty"),
- DecimalInvalid => write!(f, "decimal literal invalid"),
- EscapeHexEmpty => write!(f, "hexadecimal literal empty"),
- EscapeHexInvalid => {
- write!(f, "hexadecimal literal is not a Unicode scalar value")
- }
- EscapeHexInvalidDigit => write!(f, "invalid hexadecimal digit"),
- EscapeUnexpectedEof => write!(
- f,
- "incomplete escape sequence, \
- reached end of pattern prematurely"
- ),
- EscapeUnrecognized => write!(f, "unrecognized escape sequence"),
- FlagDanglingNegation => {
- write!(f, "dangling flag negation operator")
- }
- FlagDuplicate { .. } => write!(f, "duplicate flag"),
- FlagRepeatedNegation { .. } => {
- write!(f, "flag negation operator repeated")
- }
- FlagUnexpectedEof => {
- write!(f, "expected flag but got end of regex")
- }
- FlagUnrecognized => write!(f, "unrecognized flag"),
- GroupNameDuplicate { .. } => {
- write!(f, "duplicate capture group name")
- }
- GroupNameEmpty => write!(f, "empty capture group name"),
- GroupNameInvalid => write!(f, "invalid capture group character"),
- GroupNameUnexpectedEof => write!(f, "unclosed capture group name"),
- GroupUnclosed => write!(f, "unclosed group"),
- GroupUnopened => write!(f, "unopened group"),
- NestLimitExceeded(limit) => write!(
- f,
- "exceed the maximum number of \
- nested parentheses/brackets ({})",
- limit
- ),
- RepetitionCountInvalid => write!(
- f,
- "invalid repetition count range, \
- the start must be <= the end"
- ),
- RepetitionCountDecimalEmpty => {
- write!(f, "repetition quantifier expects a valid decimal")
- }
- RepetitionCountUnclosed => {
- write!(f, "unclosed counted repetition")
- }
- RepetitionMissing => {
- write!(f, "repetition operator missing expression")
- }
- SpecialWordBoundaryUnclosed => {
- write!(
- f,
- "special word boundary assertion is either \
- unclosed or contains an invalid character",
- )
- }
- SpecialWordBoundaryUnrecognized => {
- write!(
- f,
- "unrecognized special word boundary assertion, \
- valid choices are: start, end, start-half \
- or end-half",
- )
- }
- SpecialWordOrRepetitionUnexpectedEof => {
- write!(
- f,
- "found either the beginning of a special word \
- boundary or a bounded repetition on a \\b with \
- an opening brace, but no closing brace",
- )
- }
- UnicodeClassInvalid => {
- write!(f, "invalid Unicode character class")
- }
- UnsupportedBackreference => {
- write!(f, "backreferences are not supported")
- }
- UnsupportedLookAround => write!(
- f,
- "look-around, including look-ahead and look-behind, \
- is not supported"
- ),
- }
- }
-}
-
-/// Span represents the position information of a single AST item.
-///
-/// All span positions are absolute byte offsets that can be used on the
-/// original regular expression that was parsed.
-#[derive(Clone, Copy, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct Span {
- /// The start byte offset.
- pub start: Position,
- /// The end byte offset.
- pub end: Position,
-}
-
-impl core::fmt::Debug for Span {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- write!(f, "Span({:?}, {:?})", self.start, self.end)
- }
-}
-
-impl Ord for Span {
- fn cmp(&self, other: &Span) -> Ordering {
- (&self.start, &self.end).cmp(&(&other.start, &other.end))
- }
-}
-
-impl PartialOrd for Span {
- fn partial_cmp(&self, other: &Span) -> Option<Ordering> {
- Some(self.cmp(other))
- }
-}
-
-/// A single position in a regular expression.
-///
-/// A position encodes one half of a span, and include the byte offset, line
-/// number and column number.
-#[derive(Clone, Copy, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct Position {
- /// The absolute offset of this position, starting at `0` from the
- /// beginning of the regular expression pattern string.
- pub offset: usize,
- /// The line number, starting at `1`.
- pub line: usize,
- /// The approximate column number, starting at `1`.
- pub column: usize,
-}
-
-impl core::fmt::Debug for Position {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- write!(
- f,
- "Position(o: {:?}, l: {:?}, c: {:?})",
- self.offset, self.line, self.column
- )
- }
-}
-
-impl Ord for Position {
- fn cmp(&self, other: &Position) -> Ordering {
- self.offset.cmp(&other.offset)
- }
-}
-
-impl PartialOrd for Position {
- fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
- Some(self.cmp(other))
- }
-}
-
-impl Span {
- /// Create a new span with the given positions.
- pub fn new(start: Position, end: Position) -> Span {
- Span { start, end }
- }
-
- /// Create a new span using the given position as the start and end.
- pub fn splat(pos: Position) -> Span {
- Span::new(pos, pos)
- }
-
- /// Create a new span by replacing the starting the position with the one
- /// given.
- pub fn with_start(self, pos: Position) -> Span {
- Span { start: pos, ..self }
- }
-
- /// Create a new span by replacing the ending the position with the one
- /// given.
- pub fn with_end(self, pos: Position) -> Span {
- Span { end: pos, ..self }
- }
-
- /// Returns true if and only if this span occurs on a single line.
- pub fn is_one_line(&self) -> bool {
- self.start.line == self.end.line
- }
-
- /// Returns true if and only if this span is empty. That is, it points to
- /// a single position in the concrete syntax of a regular expression.
- pub fn is_empty(&self) -> bool {
- self.start.offset == self.end.offset
- }
-}
-
-impl Position {
- /// Create a new position with the given information.
- ///
- /// `offset` is the absolute offset of the position, starting at `0` from
- /// the beginning of the regular expression pattern string.
- ///
- /// `line` is the line number, starting at `1`.
- ///
- /// `column` is the approximate column number, starting at `1`.
- pub fn new(offset: usize, line: usize, column: usize) -> Position {
- Position { offset, line, column }
- }
-}
-
-/// An abstract syntax tree for a singular expression along with comments
-/// found.
-///
-/// Comments are not stored in the tree itself to avoid complexity. Each
-/// comment contains a span of precisely where it occurred in the original
-/// regular expression.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct WithComments {
- /// The actual ast.
- pub ast: Ast,
- /// All comments found in the original regular expression.
- pub comments: Vec<Comment>,
-}
-
-/// A comment from a regular expression with an associated span.
-///
-/// A regular expression can only contain comments when the `x` flag is
-/// enabled.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct Comment {
- /// The span of this comment, including the beginning `#` and ending `\n`.
- pub span: Span,
- /// The comment text, starting with the first character following the `#`
- /// and ending with the last character preceding the `\n`.
- pub comment: String,
-}
-
-/// An abstract syntax tree for a single regular expression.
-///
-/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
-/// space proportional to the size of the `Ast`.
-///
-/// This type defines its own destructor that uses constant stack space and
-/// heap space proportional to the size of the `Ast`.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum Ast {
- /// An empty regex that matches everything.
- Empty(Box<Span>),
- /// A set of flags, e.g., `(?is)`.
- Flags(Box<SetFlags>),
- /// A single character literal, which includes escape sequences.
- Literal(Box<Literal>),
- /// The "any character" class.
- Dot(Box<Span>),
- /// A single zero-width assertion.
- Assertion(Box<Assertion>),
- /// A single Unicode character class, e.g., `\pL` or `\p{Greek}`.
- ClassUnicode(Box<ClassUnicode>),
- /// A single perl character class, e.g., `\d` or `\W`.
- ClassPerl(Box<ClassPerl>),
- /// A single bracketed character class set, which may contain zero or more
- /// character ranges and/or zero or more nested classes. e.g.,
- /// `[a-zA-Z\pL]`.
- ClassBracketed(Box<ClassBracketed>),
- /// A repetition operator applied to an arbitrary regular expression.
- Repetition(Box<Repetition>),
- /// A grouped regular expression.
- Group(Box<Group>),
- /// An alternation of regular expressions.
- Alternation(Box<Alternation>),
- /// A concatenation of regular expressions.
- Concat(Box<Concat>),
-}
-
-impl Ast {
- /// Create an "empty" AST item.
- pub fn empty(span: Span) -> Ast {
- Ast::Empty(Box::new(span))
- }
-
- /// Create a "flags" AST item.
- pub fn flags(e: SetFlags) -> Ast {
- Ast::Flags(Box::new(e))
- }
-
- /// Create a "literal" AST item.
- pub fn literal(e: Literal) -> Ast {
- Ast::Literal(Box::new(e))
- }
-
- /// Create a "dot" AST item.
- pub fn dot(span: Span) -> Ast {
- Ast::Dot(Box::new(span))
- }
-
- /// Create a "assertion" AST item.
- pub fn assertion(e: Assertion) -> Ast {
- Ast::Assertion(Box::new(e))
- }
-
- /// Create a "Unicode class" AST item.
- pub fn class_unicode(e: ClassUnicode) -> Ast {
- Ast::ClassUnicode(Box::new(e))
- }
-
- /// Create a "Perl class" AST item.
- pub fn class_perl(e: ClassPerl) -> Ast {
- Ast::ClassPerl(Box::new(e))
- }
-
- /// Create a "bracketed class" AST item.
- pub fn class_bracketed(e: ClassBracketed) -> Ast {
- Ast::ClassBracketed(Box::new(e))
- }
-
- /// Create a "repetition" AST item.
- pub fn repetition(e: Repetition) -> Ast {
- Ast::Repetition(Box::new(e))
- }
-
- /// Create a "group" AST item.
- pub fn group(e: Group) -> Ast {
- Ast::Group(Box::new(e))
- }
-
- /// Create a "alternation" AST item.
- pub fn alternation(e: Alternation) -> Ast {
- Ast::Alternation(Box::new(e))
- }
-
- /// Create a "concat" AST item.
- pub fn concat(e: Concat) -> Ast {
- Ast::Concat(Box::new(e))
- }
-
- /// Return the span of this abstract syntax tree.
- pub fn span(&self) -> &Span {
- match *self {
- Ast::Empty(ref span) => span,
- Ast::Flags(ref x) => &x.span,
- Ast::Literal(ref x) => &x.span,
- Ast::Dot(ref span) => span,
- Ast::Assertion(ref x) => &x.span,
- Ast::ClassUnicode(ref x) => &x.span,
- Ast::ClassPerl(ref x) => &x.span,
- Ast::ClassBracketed(ref x) => &x.span,
- Ast::Repetition(ref x) => &x.span,
- Ast::Group(ref x) => &x.span,
- Ast::Alternation(ref x) => &x.span,
- Ast::Concat(ref x) => &x.span,
- }
- }
-
- /// Return true if and only if this Ast is empty.
- pub fn is_empty(&self) -> bool {
- match *self {
- Ast::Empty(_) => true,
- _ => false,
- }
- }
-
- /// Returns true if and only if this AST has any (including possibly empty)
- /// subexpressions.
- fn has_subexprs(&self) -> bool {
- match *self {
- Ast::Empty(_)
- | Ast::Flags(_)
- | Ast::Literal(_)
- | Ast::Dot(_)
- | Ast::Assertion(_)
- | Ast::ClassUnicode(_)
- | Ast::ClassPerl(_) => false,
- Ast::ClassBracketed(_)
- | Ast::Repetition(_)
- | Ast::Group(_)
- | Ast::Alternation(_)
- | Ast::Concat(_) => true,
- }
- }
-}
-
-/// Print a display representation of this Ast.
-///
-/// This does not preserve any of the original whitespace formatting that may
-/// have originally been present in the concrete syntax from which this Ast
-/// was generated.
-///
-/// This implementation uses constant stack space and heap space proportional
-/// to the size of the `Ast`.
-impl core::fmt::Display for Ast {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- use crate::ast::print::Printer;
- Printer::new().print(self, f)
- }
-}
-
-/// An alternation of regular expressions.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct Alternation {
- /// The span of this alternation.
- pub span: Span,
- /// The alternate regular expressions.
- pub asts: Vec<Ast>,
-}
-
-impl Alternation {
- /// Return this alternation as an AST.
- ///
- /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
- /// If this alternation contains exactly 1 AST, then the corresponding AST
- /// is returned. Otherwise, `Ast::alternation` is returned.
- pub fn into_ast(mut self) -> Ast {
- match self.asts.len() {
- 0 => Ast::empty(self.span),
- 1 => self.asts.pop().unwrap(),
- _ => Ast::alternation(self),
- }
- }
-}
-
-/// A concatenation of regular expressions.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct Concat {
- /// The span of this concatenation.
- pub span: Span,
- /// The concatenation regular expressions.
- pub asts: Vec<Ast>,
-}
-
-impl Concat {
- /// Return this concatenation as an AST.
- ///
- /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
- /// If this alternation contains exactly 1 AST, then the corresponding AST
- /// is returned. Otherwise, `Ast::concat` is returned.
- pub fn into_ast(mut self) -> Ast {
- match self.asts.len() {
- 0 => Ast::empty(self.span),
- 1 => self.asts.pop().unwrap(),
- _ => Ast::concat(self),
- }
- }
-}
-
-/// A single literal expression.
-///
-/// A literal corresponds to a single Unicode scalar value. Literals may be
-/// represented in their literal form, e.g., `a` or in their escaped form,
-/// e.g., `\x61`.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct Literal {
- /// The span of this literal.
- pub span: Span,
- /// The kind of this literal.
- pub kind: LiteralKind,
- /// The Unicode scalar value corresponding to this literal.
- pub c: char,
-}
-
-impl Literal {
- /// If this literal was written as a `\x` hex escape, then this returns
- /// the corresponding byte value. Otherwise, this returns `None`.
- pub fn byte(&self) -> Option<u8> {
- match self.kind {
- LiteralKind::HexFixed(HexLiteralKind::X) => {
- u8::try_from(self.c).ok()
- }
- _ => None,
- }
- }
-}
-
-/// The kind of a single literal expression.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum LiteralKind {
- /// The literal is written verbatim, e.g., `a` or `☃`.
- Verbatim,
- /// The literal is written as an escape because it is otherwise a special
- /// regex meta character, e.g., `\*` or `\[`.
- Meta,
- /// The literal is written as an escape despite the fact that the escape is
- /// unnecessary, e.g., `\%` or `\/`.
- Superfluous,
- /// The literal is written as an octal escape, e.g., `\141`.
- Octal,
- /// The literal is written as a hex code with a fixed number of digits
- /// depending on the type of the escape, e.g., `\x61` or `\u0061` or
- /// `\U00000061`.
- HexFixed(HexLiteralKind),
- /// The literal is written as a hex code with a bracketed number of
- /// digits. The only restriction is that the bracketed hex code must refer
- /// to a valid Unicode scalar value.
- HexBrace(HexLiteralKind),
- /// The literal is written as a specially recognized escape, e.g., `\f`
- /// or `\n`.
- Special(SpecialLiteralKind),
-}
-
-/// The type of a special literal.
-///
-/// A special literal is a special escape sequence recognized by the regex
-/// parser, e.g., `\f` or `\n`.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum SpecialLiteralKind {
- /// Bell, spelled `\a` (`\x07`).
- Bell,
- /// Form feed, spelled `\f` (`\x0C`).
- FormFeed,
- /// Tab, spelled `\t` (`\x09`).
- Tab,
- /// Line feed, spelled `\n` (`\x0A`).
- LineFeed,
- /// Carriage return, spelled `\r` (`\x0D`).
- CarriageReturn,
- /// Vertical tab, spelled `\v` (`\x0B`).
- VerticalTab,
- /// Space, spelled `\ ` (`\x20`). Note that this can only appear when
- /// parsing in verbose mode.
- Space,
-}
-
-/// The type of a Unicode hex literal.
-///
-/// Note that all variants behave the same when used with brackets. They only
-/// differ when used without brackets in the number of hex digits that must
-/// follow.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum HexLiteralKind {
- /// A `\x` prefix. When used without brackets, this form is limited to
- /// two digits.
- X,
- /// A `\u` prefix. When used without brackets, this form is limited to
- /// four digits.
- UnicodeShort,
- /// A `\U` prefix. When used without brackets, this form is limited to
- /// eight digits.
- UnicodeLong,
-}
-
-impl HexLiteralKind {
- /// The number of digits that must be used with this literal form when
- /// used without brackets. When used with brackets, there is no
- /// restriction on the number of digits.
- pub fn digits(&self) -> u32 {
- match *self {
- HexLiteralKind::X => 2,
- HexLiteralKind::UnicodeShort => 4,
- HexLiteralKind::UnicodeLong => 8,
- }
- }
-}
-
-/// A Perl character class.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct ClassPerl {
- /// The span of this class.
- pub span: Span,
- /// The kind of Perl class.
- pub kind: ClassPerlKind,
- /// Whether the class is negated or not. e.g., `\d` is not negated but
- /// `\D` is.
- pub negated: bool,
-}
-
-/// The available Perl character classes.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum ClassPerlKind {
- /// Decimal numbers.
- Digit,
- /// Whitespace.
- Space,
- /// Word characters.
- Word,
-}
-
-/// An ASCII character class.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct ClassAscii {
- /// The span of this class.
- pub span: Span,
- /// The kind of ASCII class.
- pub kind: ClassAsciiKind,
- /// Whether the class is negated or not. e.g., `[[:alpha:]]` is not negated
- /// but `[[:^alpha:]]` is.
- pub negated: bool,
-}
-
-/// The available ASCII character classes.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum ClassAsciiKind {
- /// `[0-9A-Za-z]`
- Alnum,
- /// `[A-Za-z]`
- Alpha,
- /// `[\x00-\x7F]`
- Ascii,
- /// `[ \t]`
- Blank,
- /// `[\x00-\x1F\x7F]`
- Cntrl,
- /// `[0-9]`
- Digit,
- /// `[!-~]`
- Graph,
- /// `[a-z]`
- Lower,
- /// `[ -~]`
- Print,
- /// `[!-/:-@\[-`{-~]`
- Punct,
- /// `[\t\n\v\f\r ]`
- Space,
- /// `[A-Z]`
- Upper,
- /// `[0-9A-Za-z_]`
- Word,
- /// `[0-9A-Fa-f]`
- Xdigit,
-}
-
-impl ClassAsciiKind {
- /// Return the corresponding ClassAsciiKind variant for the given name.
- ///
- /// The name given should correspond to the lowercase version of the
- /// variant name. e.g., `cntrl` is the name for `ClassAsciiKind::Cntrl`.
- ///
- /// If no variant with the corresponding name exists, then `None` is
- /// returned.
- pub fn from_name(name: &str) -> Option<ClassAsciiKind> {
- use self::ClassAsciiKind::*;
- match name {
- "alnum" => Some(Alnum),
- "alpha" => Some(Alpha),
- "ascii" => Some(Ascii),
- "blank" => Some(Blank),
- "cntrl" => Some(Cntrl),
- "digit" => Some(Digit),
- "graph" => Some(Graph),
- "lower" => Some(Lower),
- "print" => Some(Print),
- "punct" => Some(Punct),
- "space" => Some(Space),
- "upper" => Some(Upper),
- "word" => Some(Word),
- "xdigit" => Some(Xdigit),
- _ => None,
- }
- }
-}
-
-/// A Unicode character class.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct ClassUnicode {
- /// The span of this class.
- pub span: Span,
- /// Whether this class is negated or not.
- ///
- /// Note: be careful when using this attribute. This specifically refers
- /// to whether the class is written as `\p` or `\P`, where the latter
- /// is `negated = true`. However, it also possible to write something like
- /// `\P{scx!=Katakana}` which is actually equivalent to
- /// `\p{scx=Katakana}` and is therefore not actually negated even though
- /// `negated = true` here. To test whether this class is truly negated
- /// or not, use the `is_negated` method.
- pub negated: bool,
- /// The kind of Unicode class.
- pub kind: ClassUnicodeKind,
-}
-
-impl ClassUnicode {
- /// Returns true if this class has been negated.
- ///
- /// Note that this takes the Unicode op into account, if it's present.
- /// e.g., `is_negated` for `\P{scx!=Katakana}` will return `false`.
- pub fn is_negated(&self) -> bool {
- match self.kind {
- ClassUnicodeKind::NamedValue {
- op: ClassUnicodeOpKind::NotEqual,
- ..
- } => !self.negated,
- _ => self.negated,
- }
- }
-}
-
-/// The available forms of Unicode character classes.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum ClassUnicodeKind {
- /// A one letter abbreviated class, e.g., `\pN`.
- OneLetter(char),
- /// A binary property, general category or script. The string may be
- /// empty.
- Named(String),
- /// A property name and an associated value.
- NamedValue {
- /// The type of Unicode op used to associate `name` with `value`.
- op: ClassUnicodeOpKind,
- /// The property name (which may be empty).
- name: String,
- /// The property value (which may be empty).
- value: String,
- },
-}
-
-#[cfg(feature = "arbitrary")]
-impl arbitrary::Arbitrary<'_> for ClassUnicodeKind {
- fn arbitrary(
- u: &mut arbitrary::Unstructured,
- ) -> arbitrary::Result<ClassUnicodeKind> {
- #[cfg(any(
- feature = "unicode-age",
- feature = "unicode-bool",
- feature = "unicode-gencat",
- feature = "unicode-perl",
- feature = "unicode-script",
- feature = "unicode-segment",
- ))]
- {
- use alloc::string::ToString;
-
- use super::unicode_tables::{
- property_names::PROPERTY_NAMES,
- property_values::PROPERTY_VALUES,
- };
-
- match u.choose_index(3)? {
- 0 => {
- let all = PROPERTY_VALUES
- .iter()
- .flat_map(|e| e.1.iter())
- .filter(|(name, _)| name.len() == 1)
- .count();
- let idx = u.choose_index(all)?;
- let value = PROPERTY_VALUES
- .iter()
- .flat_map(|e| e.1.iter())
- .take(idx + 1)
- .last()
- .unwrap()
- .0
- .chars()
- .next()
- .unwrap();
- Ok(ClassUnicodeKind::OneLetter(value))
- }
- 1 => {
- let all = PROPERTY_VALUES
- .iter()
- .map(|e| e.1.len())
- .sum::<usize>()
- + PROPERTY_NAMES.len();
- let idx = u.choose_index(all)?;
- let name = PROPERTY_VALUES
- .iter()
- .flat_map(|e| e.1.iter())
- .chain(PROPERTY_NAMES)
- .map(|(_, e)| e)
- .take(idx + 1)
- .last()
- .unwrap();
- Ok(ClassUnicodeKind::Named(name.to_string()))
- }
- 2 => {
- let all = PROPERTY_VALUES
- .iter()
- .map(|e| e.1.len())
- .sum::<usize>();
- let idx = u.choose_index(all)?;
- let (prop, value) = PROPERTY_VALUES
- .iter()
- .flat_map(|e| {
- e.1.iter().map(|(_, value)| (e.0, value))
- })
- .take(idx + 1)
- .last()
- .unwrap();
- Ok(ClassUnicodeKind::NamedValue {
- op: u.arbitrary()?,
- name: prop.to_string(),
- value: value.to_string(),
- })
- }
- _ => unreachable!("index chosen is impossible"),
- }
- }
- #[cfg(not(any(
- feature = "unicode-age",
- feature = "unicode-bool",
- feature = "unicode-gencat",
- feature = "unicode-perl",
- feature = "unicode-script",
- feature = "unicode-segment",
- )))]
- {
- match u.choose_index(3)? {
- 0 => Ok(ClassUnicodeKind::OneLetter(u.arbitrary()?)),
- 1 => Ok(ClassUnicodeKind::Named(u.arbitrary()?)),
- 2 => Ok(ClassUnicodeKind::NamedValue {
- op: u.arbitrary()?,
- name: u.arbitrary()?,
- value: u.arbitrary()?,
- }),
- _ => unreachable!("index chosen is impossible"),
- }
- }
- }
-
- fn size_hint(depth: usize) -> (usize, Option<usize>) {
- #[cfg(any(
- feature = "unicode-age",
- feature = "unicode-bool",
- feature = "unicode-gencat",
- feature = "unicode-perl",
- feature = "unicode-script",
- feature = "unicode-segment",
- ))]
- {
- arbitrary::size_hint::and_all(&[
- usize::size_hint(depth),
- usize::size_hint(depth),
- arbitrary::size_hint::or(
- (0, Some(0)),
- ClassUnicodeOpKind::size_hint(depth),
- ),
- ])
- }
- #[cfg(not(any(
- feature = "unicode-age",
- feature = "unicode-bool",
- feature = "unicode-gencat",
- feature = "unicode-perl",
- feature = "unicode-script",
- feature = "unicode-segment",
- )))]
- {
- arbitrary::size_hint::and(
- usize::size_hint(depth),
- arbitrary::size_hint::or_all(&[
- char::size_hint(depth),
- String::size_hint(depth),
- arbitrary::size_hint::and_all(&[
- String::size_hint(depth),
- String::size_hint(depth),
- ClassUnicodeOpKind::size_hint(depth),
- ]),
- ]),
- )
- }
- }
-}
-
-/// The type of op used in a Unicode character class.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum ClassUnicodeOpKind {
- /// A property set to a specific value, e.g., `\p{scx=Katakana}`.
- Equal,
- /// A property set to a specific value using a colon, e.g.,
- /// `\p{scx:Katakana}`.
- Colon,
- /// A property that isn't a particular value, e.g., `\p{scx!=Katakana}`.
- NotEqual,
-}
-
-impl ClassUnicodeOpKind {
- /// Whether the op is an equality op or not.
- pub fn is_equal(&self) -> bool {
- match *self {
- ClassUnicodeOpKind::Equal | ClassUnicodeOpKind::Colon => true,
- _ => false,
- }
- }
-}
-
-/// A bracketed character class, e.g., `[a-z0-9]`.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct ClassBracketed {
- /// The span of this class.
- pub span: Span,
- /// Whether this class is negated or not. e.g., `[a]` is not negated but
- /// `[^a]` is.
- pub negated: bool,
- /// The type of this set. A set is either a normal union of things, e.g.,
- /// `[abc]` or a result of applying set operations, e.g., `[\pL--c]`.
- pub kind: ClassSet,
-}
-
-/// A character class set.
-///
-/// This type corresponds to the internal structure of a bracketed character
-/// class. That is, every bracketed character is one of two types: a union of
-/// items (literals, ranges, other bracketed classes) or a tree of binary set
-/// operations.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum ClassSet {
- /// An item, which can be a single literal, range, nested character class
- /// or a union of items.
- Item(ClassSetItem),
- /// A single binary operation (i.e., &&, -- or ~~).
- BinaryOp(ClassSetBinaryOp),
-}
-
-impl ClassSet {
- /// Build a set from a union.
- pub fn union(ast: ClassSetUnion) -> ClassSet {
- ClassSet::Item(ClassSetItem::Union(ast))
- }
-
- /// Return the span of this character class set.
- pub fn span(&self) -> &Span {
- match *self {
- ClassSet::Item(ref x) => x.span(),
- ClassSet::BinaryOp(ref x) => &x.span,
- }
- }
-
- /// Return true if and only if this class set is empty.
- fn is_empty(&self) -> bool {
- match *self {
- ClassSet::Item(ClassSetItem::Empty(_)) => true,
- _ => false,
- }
- }
-}
-
-/// A single component of a character class set.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum ClassSetItem {
- /// An empty item.
- ///
- /// Note that a bracketed character class cannot contain a single empty
- /// item. Empty items can appear when using one of the binary operators.
- /// For example, `[&&]` is the intersection of two empty classes.
- Empty(Span),
- /// A single literal.
- Literal(Literal),
- /// A range between two literals.
- Range(ClassSetRange),
- /// An ASCII character class, e.g., `[:alnum:]` or `[:punct:]`.
- Ascii(ClassAscii),
- /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
- Unicode(ClassUnicode),
- /// A perl character class, e.g., `\d` or `\W`.
- Perl(ClassPerl),
- /// A bracketed character class set, which may contain zero or more
- /// character ranges and/or zero or more nested classes. e.g.,
- /// `[a-zA-Z\pL]`.
- Bracketed(Box<ClassBracketed>),
- /// A union of items.
- Union(ClassSetUnion),
-}
-
-impl ClassSetItem {
- /// Return the span of this character class set item.
- pub fn span(&self) -> &Span {
- match *self {
- ClassSetItem::Empty(ref span) => span,
- ClassSetItem::Literal(ref x) => &x.span,
- ClassSetItem::Range(ref x) => &x.span,
- ClassSetItem::Ascii(ref x) => &x.span,
- ClassSetItem::Perl(ref x) => &x.span,
- ClassSetItem::Unicode(ref x) => &x.span,
- ClassSetItem::Bracketed(ref x) => &x.span,
- ClassSetItem::Union(ref x) => &x.span,
- }
- }
-}
-
-/// A single character class range in a set.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct ClassSetRange {
- /// The span of this range.
- pub span: Span,
- /// The start of this range.
- pub start: Literal,
- /// The end of this range.
- pub end: Literal,
-}
-
-impl ClassSetRange {
- /// Returns true if and only if this character class range is valid.
- ///
- /// The only case where a range is invalid is if its start is greater than
- /// its end.
- pub fn is_valid(&self) -> bool {
- self.start.c <= self.end.c
- }
-}
-
-/// A union of items inside a character class set.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct ClassSetUnion {
- /// The span of the items in this operation. e.g., the `a-z0-9` in
- /// `[^a-z0-9]`
- pub span: Span,
- /// The sequence of items that make up this union.
- pub items: Vec<ClassSetItem>,
-}
-
-impl ClassSetUnion {
- /// Push a new item in this union.
- ///
- /// The ending position of this union's span is updated to the ending
- /// position of the span of the item given. If the union is empty, then
- /// the starting position of this union is set to the starting position
- /// of this item.
- ///
- /// In other words, if you only use this method to add items to a union
- /// and you set the spans on each item correctly, then you should never
- /// need to adjust the span of the union directly.
- pub fn push(&mut self, item: ClassSetItem) {
- if self.items.is_empty() {
- self.span.start = item.span().start;
- }
- self.span.end = item.span().end;
- self.items.push(item);
- }
-
- /// Return this union as a character class set item.
- ///
- /// If this union contains zero items, then an empty union is
- /// returned. If this concatenation contains exactly 1 item, then the
- /// corresponding item is returned. Otherwise, ClassSetItem::Union is
- /// returned.
- pub fn into_item(mut self) -> ClassSetItem {
- match self.items.len() {
- 0 => ClassSetItem::Empty(self.span),
- 1 => self.items.pop().unwrap(),
- _ => ClassSetItem::Union(self),
- }
- }
-}
-
-/// A Unicode character class set operation.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct ClassSetBinaryOp {
- /// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
- pub span: Span,
- /// The type of this set operation.
- pub kind: ClassSetBinaryOpKind,
- /// The left hand side of the operation.
- pub lhs: Box<ClassSet>,
- /// The right hand side of the operation.
- pub rhs: Box<ClassSet>,
-}
-
-/// The type of a Unicode character class set operation.
-///
-/// Note that this doesn't explicitly represent union since there is no
-/// explicit union operator. Concatenation inside a character class corresponds
-/// to the union operation.
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum ClassSetBinaryOpKind {
- /// The intersection of two sets, e.g., `\pN&&[a-z]`.
- Intersection,
- /// The difference of two sets, e.g., `\pN--[0-9]`.
- Difference,
- /// The symmetric difference of two sets. The symmetric difference is the
- /// set of elements belonging to one but not both sets.
- /// e.g., `[\pL~~[:ascii:]]`.
- SymmetricDifference,
-}
-
-/// A single zero-width assertion.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct Assertion {
- /// The span of this assertion.
- pub span: Span,
- /// The assertion kind, e.g., `\b` or `^`.
- pub kind: AssertionKind,
-}
-
-/// An assertion kind.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum AssertionKind {
- /// `^`
- StartLine,
- /// `$`
- EndLine,
- /// `\A`
- StartText,
- /// `\z`
- EndText,
- /// `\b`
- WordBoundary,
- /// `\B`
- NotWordBoundary,
- /// `\b{start}`
- WordBoundaryStart,
- /// `\b{end}`
- WordBoundaryEnd,
- /// `\<` (alias for `\b{start}`)
- WordBoundaryStartAngle,
- /// `\>` (alias for `\b{end}`)
- WordBoundaryEndAngle,
- /// `\b{start-half}`
- WordBoundaryStartHalf,
- /// `\b{end-half}`
- WordBoundaryEndHalf,
-}
-
-/// A repetition operation applied to a regular expression.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct Repetition {
- /// The span of this operation.
- pub span: Span,
- /// The actual operation.
- pub op: RepetitionOp,
- /// Whether this operation was applied greedily or not.
- pub greedy: bool,
- /// The regular expression under repetition.
- pub ast: Box<Ast>,
-}
-
-/// The repetition operator itself.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct RepetitionOp {
- /// The span of this operator. This includes things like `+`, `*?` and
- /// `{m,n}`.
- pub span: Span,
- /// The type of operation.
- pub kind: RepetitionKind,
-}
-
-/// The kind of a repetition operator.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum RepetitionKind {
- /// `?`
- ZeroOrOne,
- /// `*`
- ZeroOrMore,
- /// `+`
- OneOrMore,
- /// `{m,n}`
- Range(RepetitionRange),
-}
-
-/// A range repetition operator.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum RepetitionRange {
- /// `{m}`
- Exactly(u32),
- /// `{m,}`
- AtLeast(u32),
- /// `{m,n}`
- Bounded(u32, u32),
-}
-
-impl RepetitionRange {
- /// Returns true if and only if this repetition range is valid.
- ///
- /// The only case where a repetition range is invalid is if it is bounded
- /// and its start is greater than its end.
- pub fn is_valid(&self) -> bool {
- match *self {
- RepetitionRange::Bounded(s, e) if s > e => false,
- _ => true,
- }
- }
-}
-
-/// A grouped regular expression.
-///
-/// This includes both capturing and non-capturing groups. This does **not**
-/// include flag-only groups like `(?is)`, but does contain any group that
-/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
-/// `(?is:a)`.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct Group {
- /// The span of this group.
- pub span: Span,
- /// The kind of this group.
- pub kind: GroupKind,
- /// The regular expression in this group.
- pub ast: Box<Ast>,
-}
-
-impl Group {
- /// If this group is non-capturing, then this returns the (possibly empty)
- /// set of flags. Otherwise, `None` is returned.
- pub fn flags(&self) -> Option<&Flags> {
- match self.kind {
- GroupKind::NonCapturing(ref flags) => Some(flags),
- _ => None,
- }
- }
-
- /// Returns true if and only if this group is capturing.
- pub fn is_capturing(&self) -> bool {
- match self.kind {
- GroupKind::CaptureIndex(_) | GroupKind::CaptureName { .. } => true,
- GroupKind::NonCapturing(_) => false,
- }
- }
-
- /// Returns the capture index of this group, if this is a capturing group.
- ///
- /// This returns a capture index precisely when `is_capturing` is `true`.
- pub fn capture_index(&self) -> Option<u32> {
- match self.kind {
- GroupKind::CaptureIndex(i) => Some(i),
- GroupKind::CaptureName { ref name, .. } => Some(name.index),
- GroupKind::NonCapturing(_) => None,
- }
- }
-}
-
-/// The kind of a group.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum GroupKind {
- /// `(a)`
- CaptureIndex(u32),
- /// `(?<name>a)` or `(?P<name>a)`
- CaptureName {
- /// True if the `?P<` syntax is used and false if the `?<` syntax is used.
- starts_with_p: bool,
- /// The capture name.
- name: CaptureName,
- },
- /// `(?:a)` and `(?i:a)`
- NonCapturing(Flags),
-}
-
-/// A capture name.
-///
-/// This corresponds to the name itself between the angle brackets in, e.g.,
-/// `(?P<foo>expr)`.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct CaptureName {
- /// The span of this capture name.
- pub span: Span,
- /// The capture name.
- pub name: String,
- /// The capture index.
- pub index: u32,
-}
-
-#[cfg(feature = "arbitrary")]
-impl arbitrary::Arbitrary<'_> for CaptureName {
- fn arbitrary(
- u: &mut arbitrary::Unstructured,
- ) -> arbitrary::Result<CaptureName> {
- let len = u.arbitrary_len::<char>()?;
- if len == 0 {
- return Err(arbitrary::Error::NotEnoughData);
- }
- let mut name: String = String::new();
- for _ in 0..len {
- let ch: char = u.arbitrary()?;
- let cp = u32::from(ch);
- let ascii_letter_offset = u8::try_from(cp % 26).unwrap();
- let ascii_letter = b'a' + ascii_letter_offset;
- name.push(char::from(ascii_letter));
- }
- Ok(CaptureName { span: u.arbitrary()?, name, index: u.arbitrary()? })
- }
-
- fn size_hint(depth: usize) -> (usize, Option<usize>) {
- arbitrary::size_hint::and_all(&[
- Span::size_hint(depth),
- usize::size_hint(depth),
- u32::size_hint(depth),
- ])
- }
-}
-
-/// A group of flags that is not applied to a particular regular expression.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct SetFlags {
- /// The span of these flags, including the grouping parentheses.
- pub span: Span,
- /// The actual sequence of flags.
- pub flags: Flags,
-}
-
-/// A group of flags.
-///
-/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct Flags {
- /// The span of this group of flags.
- pub span: Span,
- /// A sequence of flag items. Each item is either a flag or a negation
- /// operator.
- pub items: Vec<FlagsItem>,
-}
-
-impl Flags {
- /// Add the given item to this sequence of flags.
- ///
- /// If the item was added successfully, then `None` is returned. If the
- /// given item is a duplicate, then `Some(i)` is returned, where
- /// `items[i].kind == item.kind`.
- pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
- for (i, x) in self.items.iter().enumerate() {
- if x.kind == item.kind {
- return Some(i);
- }
- }
- self.items.push(item);
- None
- }
-
- /// Returns the state of the given flag in this set.
- ///
- /// If the given flag is in the set but is negated, then `Some(false)` is
- /// returned.
- ///
- /// If the given flag is in the set and is not negated, then `Some(true)`
- /// is returned.
- ///
- /// Otherwise, `None` is returned.
- pub fn flag_state(&self, flag: Flag) -> Option<bool> {
- let mut negated = false;
- for x in &self.items {
- match x.kind {
- FlagsItemKind::Negation => {
- negated = true;
- }
- FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
- return Some(!negated);
- }
- _ => {}
- }
- }
- None
- }
-}
-
-/// A single item in a group of flags.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub struct FlagsItem {
- /// The span of this item.
- pub span: Span,
- /// The kind of this item.
- pub kind: FlagsItemKind,
-}
-
-/// The kind of an item in a group of flags.
-#[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum FlagsItemKind {
- /// A negation operator applied to all subsequent flags in the enclosing
- /// group.
- Negation,
- /// A single flag in a group.
- Flag(Flag),
-}
-
-impl FlagsItemKind {
- /// Returns true if and only if this item is a negation operator.
- pub fn is_negation(&self) -> bool {
- match *self {
- FlagsItemKind::Negation => true,
- _ => false,
- }
- }
-}
-
-/// A single flag.
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
-pub enum Flag {
- /// `i`
- CaseInsensitive,
- /// `m`
- MultiLine,
- /// `s`
- DotMatchesNewLine,
- /// `U`
- SwapGreed,
- /// `u`
- Unicode,
- /// `R`
- CRLF,
- /// `x`
- IgnoreWhitespace,
-}
-
-/// A custom `Drop` impl is used for `Ast` such that it uses constant stack
-/// space but heap space proportional to the depth of the `Ast`.
-impl Drop for Ast {
- fn drop(&mut self) {
- use core::mem;
-
- match *self {
- Ast::Empty(_)
- | Ast::Flags(_)
- | Ast::Literal(_)
- | Ast::Dot(_)
- | Ast::Assertion(_)
- | Ast::ClassUnicode(_)
- | Ast::ClassPerl(_)
- // Bracketed classes are recursive, they get their own Drop impl.
- | Ast::ClassBracketed(_) => return,
- Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
- Ast::Group(ref x) if !x.ast.has_subexprs() => return,
- Ast::Alternation(ref x) if x.asts.is_empty() => return,
- Ast::Concat(ref x) if x.asts.is_empty() => return,
- _ => {}
- }
-
- let empty_span = || Span::splat(Position::new(0, 0, 0));
- let empty_ast = || Ast::empty(empty_span());
- let mut stack = vec![mem::replace(self, empty_ast())];
- while let Some(mut ast) = stack.pop() {
- match ast {
- Ast::Empty(_)
- | Ast::Flags(_)
- | Ast::Literal(_)
- | Ast::Dot(_)
- | Ast::Assertion(_)
- | Ast::ClassUnicode(_)
- | Ast::ClassPerl(_)
- // Bracketed classes are recursive, so they get their own Drop
- // impl.
- | Ast::ClassBracketed(_) => {}
- Ast::Repetition(ref mut x) => {
- stack.push(mem::replace(&mut x.ast, empty_ast()));
- }
- Ast::Group(ref mut x) => {
- stack.push(mem::replace(&mut x.ast, empty_ast()));
- }
- Ast::Alternation(ref mut x) => {
- stack.extend(x.asts.drain(..));
- }
- Ast::Concat(ref mut x) => {
- stack.extend(x.asts.drain(..));
- }
- }
- }
- }
-}
-
-/// A custom `Drop` impl is used for `ClassSet` such that it uses constant
-/// stack space but heap space proportional to the depth of the `ClassSet`.
-impl Drop for ClassSet {
- fn drop(&mut self) {
- use core::mem;
-
- match *self {
- ClassSet::Item(ref item) => match *item {
- ClassSetItem::Empty(_)
- | ClassSetItem::Literal(_)
- | ClassSetItem::Range(_)
- | ClassSetItem::Ascii(_)
- | ClassSetItem::Unicode(_)
- | ClassSetItem::Perl(_) => return,
- ClassSetItem::Bracketed(ref x) => {
- if x.kind.is_empty() {
- return;
- }
- }
- ClassSetItem::Union(ref x) => {
- if x.items.is_empty() {
- return;
- }
- }
- },
- ClassSet::BinaryOp(ref op) => {
- if op.lhs.is_empty() && op.rhs.is_empty() {
- return;
- }
- }
- }
-
- let empty_span = || Span::splat(Position::new(0, 0, 0));
- let empty_set = || ClassSet::Item(ClassSetItem::Empty(empty_span()));
- let mut stack = vec![mem::replace(self, empty_set())];
- while let Some(mut set) = stack.pop() {
- match set {
- ClassSet::Item(ref mut item) => match *item {
- ClassSetItem::Empty(_)
- | ClassSetItem::Literal(_)
- | ClassSetItem::Range(_)
- | ClassSetItem::Ascii(_)
- | ClassSetItem::Unicode(_)
- | ClassSetItem::Perl(_) => {}
- ClassSetItem::Bracketed(ref mut x) => {
- stack.push(mem::replace(&mut x.kind, empty_set()));
- }
- ClassSetItem::Union(ref mut x) => {
- stack.extend(x.items.drain(..).map(ClassSet::Item));
- }
- },
- ClassSet::BinaryOp(ref mut op) => {
- stack.push(mem::replace(&mut op.lhs, empty_set()));
- stack.push(mem::replace(&mut op.rhs, empty_set()));
- }
- }
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- // We use a thread with an explicit stack size to test that our destructor
- // for Ast can handle arbitrarily sized expressions in constant stack
- // space. In case we run on a platform without threads (WASM?), we limit
- // this test to Windows/Unix.
- #[test]
- #[cfg(any(unix, windows))]
- fn no_stack_overflow_on_drop() {
- use std::thread;
-
- let run = || {
- let span = || Span::splat(Position::new(0, 0, 0));
- let mut ast = Ast::empty(span());
- for i in 0..200 {
- ast = Ast::group(Group {
- span: span(),
- kind: GroupKind::CaptureIndex(i),
- ast: Box::new(ast),
- });
- }
- assert!(!ast.is_empty());
- };
-
- // We run our test on a thread with a small stack size so we can
- // force the issue more easily.
- //
- // NOTE(2023-03-21): It turns out that some platforms (like FreeBSD)
- // will just barf with very small stack sizes. So we bump this up a bit
- // to give more room to breath. When I did this, I confirmed that if
- // I remove the custom `Drop` impl for `Ast`, then this test does
- // indeed still fail with a stack overflow. (At the time of writing, I
- // had to bump it all the way up to 32K before the test would pass even
- // without the custom `Drop` impl. So 16K seems like a safe number
- // here.)
- //
- // See: https://github.com/rust-lang/regex/issues/967
- thread::Builder::new()
- .stack_size(16 << 10)
- .spawn(run)
- .unwrap()
- .join()
- .unwrap();
- }
-
- // This tests that our `Ast` has a reasonable size. This isn't a hard rule
- // and it can be increased if given a good enough reason. But this test
- // exists because the size of `Ast` was at one point over 200 bytes on a
- // 64-bit target. Wow.
- #[test]
- fn ast_size() {
- let max = 2 * core::mem::size_of::<usize>();
- let size = core::mem::size_of::<Ast>();
- assert!(
- size <= max,
- "Ast size of {} bytes is bigger than suggested max {}",
- size,
- max
- );
- }
-}
diff --git a/vendor/regex-syntax/src/ast/parse.rs b/vendor/regex-syntax/src/ast/parse.rs
deleted file mode 100644
index 0c2a3526..00000000
--- a/vendor/regex-syntax/src/ast/parse.rs
+++ /dev/null
@@ -1,6377 +0,0 @@
-/*!
-This module provides a regular expression parser.
-*/
-
-use core::{
- borrow::Borrow,
- cell::{Cell, RefCell},
- mem,
-};
-
-use alloc::{
- boxed::Box,
- string::{String, ToString},
- vec,
- vec::Vec,
-};
-
-use crate::{
- ast::{self, Ast, Position, Span},
- either::Either,
- is_escapeable_character, is_meta_character,
-};
-
-type Result<T> = core::result::Result<T, ast::Error>;
-
-/// A primitive is an expression with no sub-expressions. This includes
-/// literals, assertions and non-set character classes. This representation
-/// is used as intermediate state in the parser.
-///
-/// This does not include ASCII character classes, since they can only appear
-/// within a set character class.
-#[derive(Clone, Debug, Eq, PartialEq)]
-enum Primitive {
- Literal(ast::Literal),
- Assertion(ast::Assertion),
- Dot(Span),
- Perl(ast::ClassPerl),
- Unicode(ast::ClassUnicode),
-}
-
-impl Primitive {
- /// Return the span of this primitive.
- fn span(&self) -> &Span {
- match *self {
- Primitive::Literal(ref x) => &x.span,
- Primitive::Assertion(ref x) => &x.span,
- Primitive::Dot(ref span) => span,
- Primitive::Perl(ref x) => &x.span,
- Primitive::Unicode(ref x) => &x.span,
- }
- }
-
- /// Convert this primitive into a proper AST.
- fn into_ast(self) -> Ast {
- match self {
- Primitive::Literal(lit) => Ast::literal(lit),
- Primitive::Assertion(assert) => Ast::assertion(assert),
- Primitive::Dot(span) => Ast::dot(span),
- Primitive::Perl(cls) => Ast::class_perl(cls),
- Primitive::Unicode(cls) => Ast::class_unicode(cls),
- }
- }
-
- /// Convert this primitive into an item in a character class.
- ///
- /// If this primitive is not a legal item (i.e., an assertion or a dot),
- /// then return an error.
- fn into_class_set_item<P: Borrow<Parser>>(
- self,
- p: &ParserI<'_, P>,
- ) -> Result<ast::ClassSetItem> {
- use self::Primitive::*;
- use crate::ast::ClassSetItem;
-
- match self {
- Literal(lit) => Ok(ClassSetItem::Literal(lit)),
- Perl(cls) => Ok(ClassSetItem::Perl(cls)),
- Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
- x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
- }
- }
-
- /// Convert this primitive into a literal in a character class. In
- /// particular, literals are the only valid items that can appear in
- /// ranges.
- ///
- /// If this primitive is not a legal item (i.e., a class, assertion or a
- /// dot), then return an error.
- fn into_class_literal<P: Borrow<Parser>>(
- self,
- p: &ParserI<'_, P>,
- ) -> Result<ast::Literal> {
- use self::Primitive::*;
-
- match self {
- Literal(lit) => Ok(lit),
- x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
- }
- }
-}
-
-/// Returns true if the given character is a hexadecimal digit.
-fn is_hex(c: char) -> bool {
- ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
-}
-
-/// Returns true if the given character is a valid in a capture group name.
-///
-/// If `first` is true, then `c` is treated as the first character in the
-/// group name (which must be alphabetic or underscore).
-fn is_capture_char(c: char, first: bool) -> bool {
- if first {
- c == '_' || c.is_alphabetic()
- } else {
- c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric()
- }
-}
-
-/// A builder for a regular expression parser.
-///
-/// This builder permits modifying configuration options for the parser.
-#[derive(Clone, Debug)]
-pub struct ParserBuilder {
- ignore_whitespace: bool,
- nest_limit: u32,
- octal: bool,
- empty_min_range: bool,
-}
-
-impl Default for ParserBuilder {
- fn default() -> ParserBuilder {
- ParserBuilder::new()
- }
-}
-
-impl ParserBuilder {
- /// Create a new parser builder with a default configuration.
- pub fn new() -> ParserBuilder {
- ParserBuilder {
- ignore_whitespace: false,
- nest_limit: 250,
- octal: false,
- empty_min_range: false,
- }
- }
-
- /// Build a parser from this configuration with the given pattern.
- pub fn build(&self) -> Parser {
- Parser {
- pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
- capture_index: Cell::new(0),
- nest_limit: self.nest_limit,
- octal: self.octal,
- empty_min_range: self.empty_min_range,
- initial_ignore_whitespace: self.ignore_whitespace,
- ignore_whitespace: Cell::new(self.ignore_whitespace),
- comments: RefCell::new(vec![]),
- stack_group: RefCell::new(vec![]),
- stack_class: RefCell::new(vec![]),
- capture_names: RefCell::new(vec![]),
- scratch: RefCell::new(String::new()),
- }
- }
-
- /// Set the nesting limit for this parser.
- ///
- /// The nesting limit controls how deep the abstract syntax tree is allowed
- /// to be. If the AST exceeds the given limit (e.g., with too many nested
- /// groups), then an error is returned by the parser.
- ///
- /// The purpose of this limit is to act as a heuristic to prevent stack
- /// overflow for consumers that do structural induction on an `Ast` using
- /// explicit recursion. While this crate never does this (instead using
- /// constant stack space and moving the call stack to the heap), other
- /// crates may.
- ///
- /// This limit is not checked until the entire AST is parsed. Therefore,
- /// if callers want to put a limit on the amount of heap space used, then
- /// they should impose a limit on the length, in bytes, of the concrete
- /// pattern string. In particular, this is viable since this parser
- /// implementation will limit itself to heap space proportional to the
- /// length of the pattern string.
- ///
- /// Note that a nest limit of `0` will return a nest limit error for most
- /// patterns but not all. For example, a nest limit of `0` permits `a` but
- /// not `ab`, since `ab` requires a concatenation, which results in a nest
- /// depth of `1`. In general, a nest limit is not something that manifests
- /// in an obvious way in the concrete syntax, therefore, it should not be
- /// used in a granular way.
- pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
- self.nest_limit = limit;
- self
- }
-
- /// Whether to support octal syntax or not.
- ///
- /// Octal syntax is a little-known way of uttering Unicode codepoints in
- /// a regular expression. For example, `a`, `\x61`, `\u0061` and
- /// `\141` are all equivalent regular expressions, where the last example
- /// shows octal syntax.
- ///
- /// While supporting octal syntax isn't in and of itself a problem, it does
- /// make good error messages harder. That is, in PCRE based regex engines,
- /// syntax like `\0` invokes a backreference, which is explicitly
- /// unsupported in Rust's regex engine. However, many users expect it to
- /// be supported. Therefore, when octal support is disabled, the error
- /// message will explicitly mention that backreferences aren't supported.
- ///
- /// Octal syntax is disabled by default.
- pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
- self.octal = yes;
- self
- }
-
- /// Enable verbose mode in the regular expression.
- ///
- /// When enabled, verbose mode permits insignificant whitespace in many
- /// places in the regular expression, as well as comments. Comments are
- /// started using `#` and continue until the end of the line.
- ///
- /// By default, this is disabled. It may be selectively enabled in the
- /// regular expression by using the `x` flag regardless of this setting.
- pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
- self.ignore_whitespace = yes;
- self
- }
-
- /// Allow using `{,n}` as an equivalent to `{0,n}`.
- ///
- /// When enabled, the parser accepts `{,n}` as valid syntax for `{0,n}`.
- /// Most regular expression engines don't support the `{,n}` syntax, but
- /// some others do it, namely Python's `re` library.
- ///
- /// This is disabled by default.
- pub fn empty_min_range(&mut self, yes: bool) -> &mut ParserBuilder {
- self.empty_min_range = yes;
- self
- }
-}
-
-/// A regular expression parser.
-///
-/// This parses a string representation of a regular expression into an
-/// abstract syntax tree. The size of the tree is proportional to the length
-/// of the regular expression pattern.
-///
-/// A `Parser` can be configured in more detail via a [`ParserBuilder`].
-#[derive(Clone, Debug)]
-pub struct Parser {
- /// The current position of the parser.
- pos: Cell<Position>,
- /// The current capture index.
- capture_index: Cell<u32>,
- /// The maximum number of open parens/brackets allowed. If the parser
- /// exceeds this number, then an error is returned.
- nest_limit: u32,
- /// Whether to support octal syntax or not. When `false`, the parser will
- /// return an error helpfully pointing out that backreferences are not
- /// supported.
- octal: bool,
- /// The initial setting for `ignore_whitespace` as provided by
- /// `ParserBuilder`. It is used when resetting the parser's state.
- initial_ignore_whitespace: bool,
- /// Whether the parser supports `{,n}` repetitions as an equivalent to
- /// `{0,n}.`
- empty_min_range: bool,
- /// Whether whitespace should be ignored. When enabled, comments are
- /// also permitted.
- ignore_whitespace: Cell<bool>,
- /// A list of comments, in order of appearance.
- comments: RefCell<Vec<ast::Comment>>,
- /// A stack of grouped sub-expressions, including alternations.
- stack_group: RefCell<Vec<GroupState>>,
- /// A stack of nested character classes. This is only non-empty when
- /// parsing a class.
- stack_class: RefCell<Vec<ClassState>>,
- /// A sorted sequence of capture names. This is used to detect duplicate
- /// capture names and report an error if one is detected.
- capture_names: RefCell<Vec<ast::CaptureName>>,
- /// A scratch buffer used in various places. Mostly this is used to
- /// accumulate relevant characters from parts of a pattern.
- scratch: RefCell<String>,
-}
-
-/// ParserI is the internal parser implementation.
-///
-/// We use this separate type so that we can carry the provided pattern string
-/// along with us. In particular, a `Parser` internal state is not tied to any
-/// one pattern, but `ParserI` is.
-///
-/// This type also lets us use `ParserI<&Parser>` in production code while
-/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
-/// work against the internal interface of the parser.
-#[derive(Clone, Debug)]
-struct ParserI<'s, P> {
- /// The parser state/configuration.
- parser: P,
- /// The full regular expression provided by the user.
- pattern: &'s str,
-}
-
-/// GroupState represents a single stack frame while parsing nested groups
-/// and alternations. Each frame records the state up to an opening parenthesis
-/// or a alternating bracket `|`.
-#[derive(Clone, Debug)]
-enum GroupState {
- /// This state is pushed whenever an opening group is found.
- Group {
- /// The concatenation immediately preceding the opening group.
- concat: ast::Concat,
- /// The group that has been opened. Its sub-AST is always empty.
- group: ast::Group,
- /// Whether this group has the `x` flag enabled or not.
- ignore_whitespace: bool,
- },
- /// This state is pushed whenever a new alternation branch is found. If
- /// an alternation branch is found and this state is at the top of the
- /// stack, then this state should be modified to include the new
- /// alternation.
- Alternation(ast::Alternation),
-}
-
-/// ClassState represents a single stack frame while parsing character classes.
-/// Each frame records the state up to an intersection, difference, symmetric
-/// difference or nested class.
-///
-/// Note that a parser's character class stack is only non-empty when parsing
-/// a character class. In all other cases, it is empty.
-#[derive(Clone, Debug)]
-enum ClassState {
- /// This state is pushed whenever an opening bracket is found.
- Open {
- /// The union of class items immediately preceding this class.
- union: ast::ClassSetUnion,
- /// The class that has been opened. Typically this just corresponds
- /// to the `[`, but it can also include `[^` since `^` indicates
- /// negation of the class.
- set: ast::ClassBracketed,
- },
- /// This state is pushed when a operator is seen. When popped, the stored
- /// set becomes the left hand side of the operator.
- Op {
- /// The type of the operation, i.e., &&, -- or ~~.
- kind: ast::ClassSetBinaryOpKind,
- /// The left-hand side of the operator.
- lhs: ast::ClassSet,
- },
-}
-
-impl Parser {
- /// Create a new parser with a default configuration.
- ///
- /// The parser can be run with either the `parse` or `parse_with_comments`
- /// methods. The parse methods return an abstract syntax tree.
- ///
- /// To set configuration options on the parser, use [`ParserBuilder`].
- pub fn new() -> Parser {
- ParserBuilder::new().build()
- }
-
- /// Parse the regular expression into an abstract syntax tree.
- pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
- ParserI::new(self, pattern).parse()
- }
-
- /// Parse the regular expression and return an abstract syntax tree with
- /// all of the comments found in the pattern.
- pub fn parse_with_comments(
- &mut self,
- pattern: &str,
- ) -> Result<ast::WithComments> {
- ParserI::new(self, pattern).parse_with_comments()
- }
-
- /// Reset the internal state of a parser.
- ///
- /// This is called at the beginning of every parse. This prevents the
- /// parser from running with inconsistent state (say, if a previous
- /// invocation returned an error and the parser is reused).
- fn reset(&self) {
- // These settings should be in line with the construction
- // in `ParserBuilder::build`.
- self.pos.set(Position { offset: 0, line: 1, column: 1 });
- self.ignore_whitespace.set(self.initial_ignore_whitespace);
- self.comments.borrow_mut().clear();
- self.stack_group.borrow_mut().clear();
- self.stack_class.borrow_mut().clear();
- }
-}
-
-impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
- /// Build an internal parser from a parser configuration and a pattern.
- fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
- ParserI { parser, pattern }
- }
-
- /// Return a reference to the parser state.
- fn parser(&self) -> &Parser {
- self.parser.borrow()
- }
-
- /// Return a reference to the pattern being parsed.
- fn pattern(&self) -> &str {
- self.pattern
- }
-
- /// Create a new error with the given span and error type.
- fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
- ast::Error { kind, pattern: self.pattern().to_string(), span }
- }
-
- /// Return the current offset of the parser.
- ///
- /// The offset starts at `0` from the beginning of the regular expression
- /// pattern string.
- fn offset(&self) -> usize {
- self.parser().pos.get().offset
- }
-
- /// Return the current line number of the parser.
- ///
- /// The line number starts at `1`.
- fn line(&self) -> usize {
- self.parser().pos.get().line
- }
-
- /// Return the current column of the parser.
- ///
- /// The column number starts at `1` and is reset whenever a `\n` is seen.
- fn column(&self) -> usize {
- self.parser().pos.get().column
- }
-
- /// Return the next capturing index. Each subsequent call increments the
- /// internal index.
- ///
- /// The span given should correspond to the location of the opening
- /// parenthesis.
- ///
- /// If the capture limit is exceeded, then an error is returned.
- fn next_capture_index(&self, span: Span) -> Result<u32> {
- let current = self.parser().capture_index.get();
- let i = current.checked_add(1).ok_or_else(|| {
- self.error(span, ast::ErrorKind::CaptureLimitExceeded)
- })?;
- self.parser().capture_index.set(i);
- Ok(i)
- }
-
- /// Adds the given capture name to this parser. If this capture name has
- /// already been used, then an error is returned.
- fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
- let mut names = self.parser().capture_names.borrow_mut();
- match names
- .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
- {
- Err(i) => {
- names.insert(i, cap.clone());
- Ok(())
- }
- Ok(i) => Err(self.error(
- cap.span,
- ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
- )),
- }
- }
-
- /// Return whether the parser should ignore whitespace or not.
- fn ignore_whitespace(&self) -> bool {
- self.parser().ignore_whitespace.get()
- }
-
- /// Return the character at the current position of the parser.
- ///
- /// This panics if the current position does not point to a valid char.
- fn char(&self) -> char {
- self.char_at(self.offset())
- }
-
- /// Return the character at the given position.
- ///
- /// This panics if the given position does not point to a valid char.
- fn char_at(&self, i: usize) -> char {
- self.pattern()[i..]
- .chars()
- .next()
- .unwrap_or_else(|| panic!("expected char at offset {}", i))
- }
-
- /// Bump the parser to the next Unicode scalar value.
- ///
- /// If the end of the input has been reached, then `false` is returned.
- fn bump(&self) -> bool {
- if self.is_eof() {
- return false;
- }
- let Position { mut offset, mut line, mut column } = self.pos();
- if self.char() == '\n' {
- line = line.checked_add(1).unwrap();
- column = 1;
- } else {
- column = column.checked_add(1).unwrap();
- }
- offset += self.char().len_utf8();
- self.parser().pos.set(Position { offset, line, column });
- self.pattern()[self.offset()..].chars().next().is_some()
- }
-
- /// If the substring starting at the current position of the parser has
- /// the given prefix, then bump the parser to the character immediately
- /// following the prefix and return true. Otherwise, don't bump the parser
- /// and return false.
- fn bump_if(&self, prefix: &str) -> bool {
- if self.pattern()[self.offset()..].starts_with(prefix) {
- for _ in 0..prefix.chars().count() {
- self.bump();
- }
- true
- } else {
- false
- }
- }
-
- /// Returns true if and only if the parser is positioned at a look-around
- /// prefix. The conditions under which this returns true must always
- /// correspond to a regular expression that would otherwise be consider
- /// invalid.
- ///
- /// This should only be called immediately after parsing the opening of
- /// a group or a set of flags.
- fn is_lookaround_prefix(&self) -> bool {
- self.bump_if("?=")
- || self.bump_if("?!")
- || self.bump_if("?<=")
- || self.bump_if("?<!")
- }
-
- /// Bump the parser, and if the `x` flag is enabled, bump through any
- /// subsequent spaces. Return true if and only if the parser is not at
- /// EOF.
- fn bump_and_bump_space(&self) -> bool {
- if !self.bump() {
- return false;
- }
- self.bump_space();
- !self.is_eof()
- }
-
- /// If the `x` flag is enabled (i.e., whitespace insensitivity with
- /// comments), then this will advance the parser through all whitespace
- /// and comments to the next non-whitespace non-comment byte.
- ///
- /// If the `x` flag is disabled, then this is a no-op.
- ///
- /// This should be used selectively throughout the parser where
- /// arbitrary whitespace is permitted when the `x` flag is enabled. For
- /// example, `{ 5 , 6}` is equivalent to `{5,6}`.
- fn bump_space(&self) {
- if !self.ignore_whitespace() {
- return;
- }
- while !self.is_eof() {
- if self.char().is_whitespace() {
- self.bump();
- } else if self.char() == '#' {
- let start = self.pos();
- let mut comment_text = String::new();
- self.bump();
- while !self.is_eof() {
- let c = self.char();
- self.bump();
- if c == '\n' {
- break;
- }
- comment_text.push(c);
- }
- let comment = ast::Comment {
- span: Span::new(start, self.pos()),
- comment: comment_text,
- };
- self.parser().comments.borrow_mut().push(comment);
- } else {
- break;
- }
- }
- }
-
- /// Peek at the next character in the input without advancing the parser.
- ///
- /// If the input has been exhausted, then this returns `None`.
- fn peek(&self) -> Option<char> {
- if self.is_eof() {
- return None;
- }
- self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
- }
-
- /// Like peek, but will ignore spaces when the parser is in whitespace
- /// insensitive mode.
- fn peek_space(&self) -> Option<char> {
- if !self.ignore_whitespace() {
- return self.peek();
- }
- if self.is_eof() {
- return None;
- }
- let mut start = self.offset() + self.char().len_utf8();
- let mut in_comment = false;
- for (i, c) in self.pattern()[start..].char_indices() {
- if c.is_whitespace() {
- continue;
- } else if !in_comment && c == '#' {
- in_comment = true;
- } else if in_comment && c == '\n' {
- in_comment = false;
- } else {
- start += i;
- break;
- }
- }
- self.pattern()[start..].chars().next()
- }
-
- /// Returns true if the next call to `bump` would return false.
- fn is_eof(&self) -> bool {
- self.offset() == self.pattern().len()
- }
-
- /// Return the current position of the parser, which includes the offset,
- /// line and column.
- fn pos(&self) -> Position {
- self.parser().pos.get()
- }
-
- /// Create a span at the current position of the parser. Both the start
- /// and end of the span are set.
- fn span(&self) -> Span {
- Span::splat(self.pos())
- }
-
- /// Create a span that covers the current character.
- fn span_char(&self) -> Span {
- let mut next = Position {
- offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
- line: self.line(),
- column: self.column().checked_add(1).unwrap(),
- };
- if self.char() == '\n' {
- next.line += 1;
- next.column = 1;
- }
- Span::new(self.pos(), next)
- }
-
- /// Parse and push a single alternation on to the parser's internal stack.
- /// If the top of the stack already has an alternation, then add to that
- /// instead of pushing a new one.
- ///
- /// The concatenation given corresponds to a single alternation branch.
- /// The concatenation returned starts the next branch and is empty.
- ///
- /// This assumes the parser is currently positioned at `|` and will advance
- /// the parser to the character following `|`.
- #[inline(never)]
- fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
- assert_eq!(self.char(), '|');
- concat.span.end = self.pos();
- self.push_or_add_alternation(concat);
- self.bump();
- Ok(ast::Concat { span: self.span(), asts: vec![] })
- }
-
- /// Pushes or adds the given branch of an alternation to the parser's
- /// internal stack of state.
- fn push_or_add_alternation(&self, concat: ast::Concat) {
- use self::GroupState::*;
-
- let mut stack = self.parser().stack_group.borrow_mut();
- if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
- alts.asts.push(concat.into_ast());
- return;
- }
- stack.push(Alternation(ast::Alternation {
- span: Span::new(concat.span.start, self.pos()),
- asts: vec![concat.into_ast()],
- }));
- }
-
- /// Parse and push a group AST (and its parent concatenation) on to the
- /// parser's internal stack. Return a fresh concatenation corresponding
- /// to the group's sub-AST.
- ///
- /// If a set of flags was found (with no group), then the concatenation
- /// is returned with that set of flags added.
- ///
- /// This assumes that the parser is currently positioned on the opening
- /// parenthesis. It advances the parser to the character at the start
- /// of the sub-expression (or adjoining expression).
- ///
- /// If there was a problem parsing the start of the group, then an error
- /// is returned.
- #[inline(never)]
- fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
- assert_eq!(self.char(), '(');
- match self.parse_group()? {
- Either::Left(set) => {
- let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
- if let Some(v) = ignore {
- self.parser().ignore_whitespace.set(v);
- }
-
- concat.asts.push(Ast::flags(set));
- Ok(concat)
- }
- Either::Right(group) => {
- let old_ignore_whitespace = self.ignore_whitespace();
- let new_ignore_whitespace = group
- .flags()
- .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
- .unwrap_or(old_ignore_whitespace);
- self.parser().stack_group.borrow_mut().push(
- GroupState::Group {
- concat,
- group,
- ignore_whitespace: old_ignore_whitespace,
- },
- );
- self.parser().ignore_whitespace.set(new_ignore_whitespace);
- Ok(ast::Concat { span: self.span(), asts: vec![] })
- }
- }
- }
-
- /// Pop a group AST from the parser's internal stack and set the group's
- /// AST to the given concatenation. Return the concatenation containing
- /// the group.
- ///
- /// This assumes that the parser is currently positioned on the closing
- /// parenthesis and advances the parser to the character following the `)`.
- ///
- /// If no such group could be popped, then an unopened group error is
- /// returned.
- #[inline(never)]
- fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
- use self::GroupState::*;
-
- assert_eq!(self.char(), ')');
- let mut stack = self.parser().stack_group.borrow_mut();
- let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
- .pop()
- {
- Some(Group { concat, group, ignore_whitespace }) => {
- (concat, group, ignore_whitespace, None)
- }
- Some(Alternation(alt)) => match stack.pop() {
- Some(Group { concat, group, ignore_whitespace }) => {
- (concat, group, ignore_whitespace, Some(alt))
- }
- None | Some(Alternation(_)) => {
- return Err(self.error(
- self.span_char(),
- ast::ErrorKind::GroupUnopened,
- ));
- }
- },
- None => {
- return Err(self
- .error(self.span_char(), ast::ErrorKind::GroupUnopened));
- }
- };
- self.parser().ignore_whitespace.set(ignore_whitespace);
- group_concat.span.end = self.pos();
- self.bump();
- group.span.end = self.pos();
- match alt {
- Some(mut alt) => {
- alt.span.end = group_concat.span.end;
- alt.asts.push(group_concat.into_ast());
- group.ast = Box::new(alt.into_ast());
- }
- None => {
- group.ast = Box::new(group_concat.into_ast());
- }
- }
- prior_concat.asts.push(Ast::group(group));
- Ok(prior_concat)
- }
-
- /// Pop the last state from the parser's internal stack, if it exists, and
- /// add the given concatenation to it. There either must be no state or a
- /// single alternation item on the stack. Any other scenario produces an
- /// error.
- ///
- /// This assumes that the parser has advanced to the end.
- #[inline(never)]
- fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
- concat.span.end = self.pos();
- let mut stack = self.parser().stack_group.borrow_mut();
- let ast = match stack.pop() {
- None => Ok(concat.into_ast()),
- Some(GroupState::Alternation(mut alt)) => {
- alt.span.end = self.pos();
- alt.asts.push(concat.into_ast());
- Ok(Ast::alternation(alt))
- }
- Some(GroupState::Group { group, .. }) => {
- return Err(
- self.error(group.span, ast::ErrorKind::GroupUnclosed)
- );
- }
- };
- // If we try to pop again, there should be nothing.
- match stack.pop() {
- None => ast,
- Some(GroupState::Alternation(_)) => {
- // This unreachable is unfortunate. This case can't happen
- // because the only way we can be here is if there were two
- // `GroupState::Alternation`s adjacent in the parser's stack,
- // which we guarantee to never happen because we never push a
- // `GroupState::Alternation` if one is already at the top of
- // the stack.
- unreachable!()
- }
- Some(GroupState::Group { group, .. }) => {
- Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
- }
- }
- }
-
- /// Parse the opening of a character class and push the current class
- /// parsing context onto the parser's stack. This assumes that the parser
- /// is positioned at an opening `[`. The given union should correspond to
- /// the union of set items built up before seeing the `[`.
- ///
- /// If there was a problem parsing the opening of the class, then an error
- /// is returned. Otherwise, a new union of set items for the class is
- /// returned (which may be populated with either a `]` or a `-`).
- #[inline(never)]
- fn push_class_open(
- &self,
- parent_union: ast::ClassSetUnion,
- ) -> Result<ast::ClassSetUnion> {
- assert_eq!(self.char(), '[');
-
- let (nested_set, nested_union) = self.parse_set_class_open()?;
- self.parser()
- .stack_class
- .borrow_mut()
- .push(ClassState::Open { union: parent_union, set: nested_set });
- Ok(nested_union)
- }
-
- /// Parse the end of a character class set and pop the character class
- /// parser stack. The union given corresponds to the last union built
- /// before seeing the closing `]`. The union returned corresponds to the
- /// parent character class set with the nested class added to it.
- ///
- /// This assumes that the parser is positioned at a `]` and will advance
- /// the parser to the byte immediately following the `]`.
- ///
- /// If the stack is empty after popping, then this returns the final
- /// "top-level" character class AST (where a "top-level" character class
- /// is one that is not nested inside any other character class).
- ///
- /// If there is no corresponding opening bracket on the parser's stack,
- /// then an error is returned.
- #[inline(never)]
- fn pop_class(
- &self,
- nested_union: ast::ClassSetUnion,
- ) -> Result<Either<ast::ClassSetUnion, ast::ClassBracketed>> {
- assert_eq!(self.char(), ']');
-
- let item = ast::ClassSet::Item(nested_union.into_item());
- let prevset = self.pop_class_op(item);
- let mut stack = self.parser().stack_class.borrow_mut();
- match stack.pop() {
- None => {
- // We can never observe an empty stack:
- //
- // 1) We are guaranteed to start with a non-empty stack since
- // the character class parser is only initiated when it sees
- // a `[`.
- // 2) If we ever observe an empty stack while popping after
- // seeing a `]`, then we signal the character class parser
- // to terminate.
- panic!("unexpected empty character class stack")
- }
- Some(ClassState::Op { .. }) => {
- // This panic is unfortunate, but this case is impossible
- // since we already popped the Op state if one exists above.
- // Namely, every push to the class parser stack is guarded by
- // whether an existing Op is already on the top of the stack.
- // If it is, the existing Op is modified. That is, the stack
- // can never have consecutive Op states.
- panic!("unexpected ClassState::Op")
- }
- Some(ClassState::Open { mut union, mut set }) => {
- self.bump();
- set.span.end = self.pos();
- set.kind = prevset;
- if stack.is_empty() {
- Ok(Either::Right(set))
- } else {
- union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
- Ok(Either::Left(union))
- }
- }
- }
- }
-
- /// Return an "unclosed class" error whose span points to the most
- /// recently opened class.
- ///
- /// This should only be called while parsing a character class.
- #[inline(never)]
- fn unclosed_class_error(&self) -> ast::Error {
- for state in self.parser().stack_class.borrow().iter().rev() {
- if let ClassState::Open { ref set, .. } = *state {
- return self.error(set.span, ast::ErrorKind::ClassUnclosed);
- }
- }
- // We are guaranteed to have a non-empty stack with at least
- // one open bracket, so we should never get here.
- panic!("no open character class found")
- }
-
- /// Push the current set of class items on to the class parser's stack as
- /// the left hand side of the given operator.
- ///
- /// A fresh set union is returned, which should be used to build the right
- /// hand side of this operator.
- #[inline(never)]
- fn push_class_op(
- &self,
- next_kind: ast::ClassSetBinaryOpKind,
- next_union: ast::ClassSetUnion,
- ) -> ast::ClassSetUnion {
- let item = ast::ClassSet::Item(next_union.into_item());
- let new_lhs = self.pop_class_op(item);
- self.parser()
- .stack_class
- .borrow_mut()
- .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
- ast::ClassSetUnion { span: self.span(), items: vec![] }
- }
-
- /// Pop a character class set from the character class parser stack. If the
- /// top of the stack is just an item (not an operation), then return the
- /// given set unchanged. If the top of the stack is an operation, then the
- /// given set will be used as the rhs of the operation on the top of the
- /// stack. In that case, the binary operation is returned as a set.
- #[inline(never)]
- fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
- let mut stack = self.parser().stack_class.borrow_mut();
- let (kind, lhs) = match stack.pop() {
- Some(ClassState::Op { kind, lhs }) => (kind, lhs),
- Some(state @ ClassState::Open { .. }) => {
- stack.push(state);
- return rhs;
- }
- None => unreachable!(),
- };
- let span = Span::new(lhs.span().start, rhs.span().end);
- ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
- span,
- kind,
- lhs: Box::new(lhs),
- rhs: Box::new(rhs),
- })
- }
-}
-
-impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
- /// Parse the regular expression into an abstract syntax tree.
- fn parse(&self) -> Result<Ast> {
- self.parse_with_comments().map(|astc| astc.ast)
- }
-
- /// Parse the regular expression and return an abstract syntax tree with
- /// all of the comments found in the pattern.
- fn parse_with_comments(&self) -> Result<ast::WithComments> {
- assert_eq!(self.offset(), 0, "parser can only be used once");
- self.parser().reset();
- let mut concat = ast::Concat { span: self.span(), asts: vec![] };
- loop {
- self.bump_space();
- if self.is_eof() {
- break;
- }
- match self.char() {
- '(' => concat = self.push_group(concat)?,
- ')' => concat = self.pop_group(concat)?,
- '|' => concat = self.push_alternate(concat)?,
- '[' => {
- let class = self.parse_set_class()?;
- concat.asts.push(Ast::class_bracketed(class));
- }
- '?' => {
- concat = self.parse_uncounted_repetition(
- concat,
- ast::RepetitionKind::ZeroOrOne,
- )?;
- }
- '*' => {
- concat = self.parse_uncounted_repetition(
- concat,
- ast::RepetitionKind::ZeroOrMore,
- )?;
- }
- '+' => {
- concat = self.parse_uncounted_repetition(
- concat,
- ast::RepetitionKind::OneOrMore,
- )?;
- }
- '{' => {
- concat = self.parse_counted_repetition(concat)?;
- }
- _ => concat.asts.push(self.parse_primitive()?.into_ast()),
- }
- }
- let ast = self.pop_group_end(concat)?;
- NestLimiter::new(self).check(&ast)?;
- Ok(ast::WithComments {
- ast,
- comments: mem::replace(
- &mut *self.parser().comments.borrow_mut(),
- vec![],
- ),
- })
- }
-
- /// Parses an uncounted repetition operation. An uncounted repetition
- /// operator includes ?, * and +, but does not include the {m,n} syntax.
- /// The given `kind` should correspond to the operator observed by the
- /// caller.
- ///
- /// This assumes that the parser is currently positioned at the repetition
- /// operator and advances the parser to the first character after the
- /// operator. (Note that the operator may include a single additional `?`,
- /// which makes the operator ungreedy.)
- ///
- /// The caller should include the concatenation that is being built. The
- /// concatenation returned includes the repetition operator applied to the
- /// last expression in the given concatenation.
- #[inline(never)]
- fn parse_uncounted_repetition(
- &self,
- mut concat: ast::Concat,
- kind: ast::RepetitionKind,
- ) -> Result<ast::Concat> {
- assert!(
- self.char() == '?' || self.char() == '*' || self.char() == '+'
- );
- let op_start = self.pos();
- let ast = match concat.asts.pop() {
- Some(ast) => ast,
- None => {
- return Err(
- self.error(self.span(), ast::ErrorKind::RepetitionMissing)
- )
- }
- };
- match ast {
- Ast::Empty(_) | Ast::Flags(_) => {
- return Err(
- self.error(self.span(), ast::ErrorKind::RepetitionMissing)
- )
- }
- _ => {}
- }
- let mut greedy = true;
- if self.bump() && self.char() == '?' {
- greedy = false;
- self.bump();
- }
- concat.asts.push(Ast::repetition(ast::Repetition {
- span: ast.span().with_end(self.pos()),
- op: ast::RepetitionOp {
- span: Span::new(op_start, self.pos()),
- kind,
- },
- greedy,
- ast: Box::new(ast),
- }));
- Ok(concat)
- }
-
- /// Parses a counted repetition operation. A counted repetition operator
- /// corresponds to the {m,n} syntax, and does not include the ?, * or +
- /// operators.
- ///
- /// This assumes that the parser is currently positioned at the opening `{`
- /// and advances the parser to the first character after the operator.
- /// (Note that the operator may include a single additional `?`, which
- /// makes the operator ungreedy.)
- ///
- /// The caller should include the concatenation that is being built. The
- /// concatenation returned includes the repetition operator applied to the
- /// last expression in the given concatenation.
- #[inline(never)]
- fn parse_counted_repetition(
- &self,
- mut concat: ast::Concat,
- ) -> Result<ast::Concat> {
- assert!(self.char() == '{');
- let start = self.pos();
- let ast = match concat.asts.pop() {
- Some(ast) => ast,
- None => {
- return Err(
- self.error(self.span(), ast::ErrorKind::RepetitionMissing)
- )
- }
- };
- match ast {
- Ast::Empty(_) | Ast::Flags(_) => {
- return Err(
- self.error(self.span(), ast::ErrorKind::RepetitionMissing)
- )
- }
- _ => {}
- }
- if !self.bump_and_bump_space() {
- return Err(self.error(
- Span::new(start, self.pos()),
- ast::ErrorKind::RepetitionCountUnclosed,
- ));
- }
- let count_start = specialize_err(
- self.parse_decimal(),
- ast::ErrorKind::DecimalEmpty,
- ast::ErrorKind::RepetitionCountDecimalEmpty,
- );
- if self.is_eof() {
- return Err(self.error(
- Span::new(start, self.pos()),
- ast::ErrorKind::RepetitionCountUnclosed,
- ));
- }
- let range = if self.char() == ',' {
- if !self.bump_and_bump_space() {
- return Err(self.error(
- Span::new(start, self.pos()),
- ast::ErrorKind::RepetitionCountUnclosed,
- ));
- }
- if self.char() != '}' {
- let count_start = match count_start {
- Ok(c) => c,
- Err(err)
- if err.kind
- == ast::ErrorKind::RepetitionCountDecimalEmpty =>
- {
- if self.parser().empty_min_range {
- 0
- } else {
- return Err(err);
- }
- }
- err => err?,
- };
- let count_end = specialize_err(
- self.parse_decimal(),
- ast::ErrorKind::DecimalEmpty,
- ast::ErrorKind::RepetitionCountDecimalEmpty,
- )?;
- ast::RepetitionRange::Bounded(count_start, count_end)
- } else {
- ast::RepetitionRange::AtLeast(count_start?)
- }
- } else {
- ast::RepetitionRange::Exactly(count_start?)
- };
-
- if self.is_eof() || self.char() != '}' {
- return Err(self.error(
- Span::new(start, self.pos()),
- ast::ErrorKind::RepetitionCountUnclosed,
- ));
- }
-
- let mut greedy = true;
- if self.bump_and_bump_space() && self.char() == '?' {
- greedy = false;
- self.bump();
- }
-
- let op_span = Span::new(start, self.pos());
- if !range.is_valid() {
- return Err(
- self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
- );
- }
- concat.asts.push(Ast::repetition(ast::Repetition {
- span: ast.span().with_end(self.pos()),
- op: ast::RepetitionOp {
- span: op_span,
- kind: ast::RepetitionKind::Range(range),
- },
- greedy,
- ast: Box::new(ast),
- }));
- Ok(concat)
- }
-
- /// Parse a group (which contains a sub-expression) or a set of flags.
- ///
- /// If a group was found, then it is returned with an empty AST. If a set
- /// of flags is found, then that set is returned.
- ///
- /// The parser should be positioned at the opening parenthesis.
- ///
- /// This advances the parser to the character before the start of the
- /// sub-expression (in the case of a group) or to the closing parenthesis
- /// immediately following the set of flags.
- ///
- /// # Errors
- ///
- /// If flags are given and incorrectly specified, then a corresponding
- /// error is returned.
- ///
- /// If a capture name is given and it is incorrectly specified, then a
- /// corresponding error is returned.
- #[inline(never)]
- fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
- assert_eq!(self.char(), '(');
- let open_span = self.span_char();
- self.bump();
- self.bump_space();
- if self.is_lookaround_prefix() {
- return Err(self.error(
- Span::new(open_span.start, self.span().end),
- ast::ErrorKind::UnsupportedLookAround,
- ));
- }
- let inner_span = self.span();
- let mut starts_with_p = true;
- if self.bump_if("?P<") || {
- starts_with_p = false;
- self.bump_if("?<")
- } {
- let capture_index = self.next_capture_index(open_span)?;
- let name = self.parse_capture_name(capture_index)?;
- Ok(Either::Right(ast::Group {
- span: open_span,
- kind: ast::GroupKind::CaptureName { starts_with_p, name },
- ast: Box::new(Ast::empty(self.span())),
- }))
- } else if self.bump_if("?") {
- if self.is_eof() {
- return Err(
- self.error(open_span, ast::ErrorKind::GroupUnclosed)
- );
- }
- let flags = self.parse_flags()?;
- let char_end = self.char();
- self.bump();
- if char_end == ')' {
- // We don't allow empty flags, e.g., `(?)`. We instead
- // interpret it as a repetition operator missing its argument.
- if flags.items.is_empty() {
- return Err(self.error(
- inner_span,
- ast::ErrorKind::RepetitionMissing,
- ));
- }
- Ok(Either::Left(ast::SetFlags {
- span: Span { end: self.pos(), ..open_span },
- flags,
- }))
- } else {
- assert_eq!(char_end, ':');
- Ok(Either::Right(ast::Group {
- span: open_span,
- kind: ast::GroupKind::NonCapturing(flags),
- ast: Box::new(Ast::empty(self.span())),
- }))
- }
- } else {
- let capture_index = self.next_capture_index(open_span)?;
- Ok(Either::Right(ast::Group {
- span: open_span,
- kind: ast::GroupKind::CaptureIndex(capture_index),
- ast: Box::new(Ast::empty(self.span())),
- }))
- }
- }
-
- /// Parses a capture group name. Assumes that the parser is positioned at
- /// the first character in the name following the opening `<` (and may
- /// possibly be EOF). This advances the parser to the first character
- /// following the closing `>`.
- ///
- /// The caller must provide the capture index of the group for this name.
- #[inline(never)]
- fn parse_capture_name(
- &self,
- capture_index: u32,
- ) -> Result<ast::CaptureName> {
- if self.is_eof() {
- return Err(self
- .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
- }
- let start = self.pos();
- loop {
- if self.char() == '>' {
- break;
- }
- if !is_capture_char(self.char(), self.pos() == start) {
- return Err(self.error(
- self.span_char(),
- ast::ErrorKind::GroupNameInvalid,
- ));
- }
- if !self.bump() {
- break;
- }
- }
- let end = self.pos();
- if self.is_eof() {
- return Err(self
- .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
- }
- assert_eq!(self.char(), '>');
- self.bump();
- let name = &self.pattern()[start.offset..end.offset];
- if name.is_empty() {
- return Err(self.error(
- Span::new(start, start),
- ast::ErrorKind::GroupNameEmpty,
- ));
- }
- let capname = ast::CaptureName {
- span: Span::new(start, end),
- name: name.to_string(),
- index: capture_index,
- };
- self.add_capture_name(&capname)?;
- Ok(capname)
- }
-
- /// Parse a sequence of flags starting at the current character.
- ///
- /// This advances the parser to the character immediately following the
- /// flags, which is guaranteed to be either `:` or `)`.
- ///
- /// # Errors
- ///
- /// If any flags are duplicated, then an error is returned.
- ///
- /// If the negation operator is used more than once, then an error is
- /// returned.
- ///
- /// If no flags could be found or if the negation operation is not followed
- /// by any flags, then an error is returned.
- #[inline(never)]
- fn parse_flags(&self) -> Result<ast::Flags> {
- let mut flags = ast::Flags { span: self.span(), items: vec![] };
- let mut last_was_negation = None;
- while self.char() != ':' && self.char() != ')' {
- if self.char() == '-' {
- last_was_negation = Some(self.span_char());
- let item = ast::FlagsItem {
- span: self.span_char(),
- kind: ast::FlagsItemKind::Negation,
- };
- if let Some(i) = flags.add_item(item) {
- return Err(self.error(
- self.span_char(),
- ast::ErrorKind::FlagRepeatedNegation {
- original: flags.items[i].span,
- },
- ));
- }
- } else {
- last_was_negation = None;
- let item = ast::FlagsItem {
- span: self.span_char(),
- kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
- };
- if let Some(i) = flags.add_item(item) {
- return Err(self.error(
- self.span_char(),
- ast::ErrorKind::FlagDuplicate {
- original: flags.items[i].span,
- },
- ));
- }
- }
- if !self.bump() {
- return Err(
- self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
- );
- }
- }
- if let Some(span) = last_was_negation {
- return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
- }
- flags.span.end = self.pos();
- Ok(flags)
- }
-
- /// Parse the current character as a flag. Do not advance the parser.
- ///
- /// # Errors
- ///
- /// If the flag is not recognized, then an error is returned.
- #[inline(never)]
- fn parse_flag(&self) -> Result<ast::Flag> {
- match self.char() {
- 'i' => Ok(ast::Flag::CaseInsensitive),
- 'm' => Ok(ast::Flag::MultiLine),
- 's' => Ok(ast::Flag::DotMatchesNewLine),
- 'U' => Ok(ast::Flag::SwapGreed),
- 'u' => Ok(ast::Flag::Unicode),
- 'R' => Ok(ast::Flag::CRLF),
- 'x' => Ok(ast::Flag::IgnoreWhitespace),
- _ => {
- Err(self
- .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
- }
- }
- }
-
- /// Parse a primitive AST. e.g., A literal, non-set character class or
- /// assertion.
- ///
- /// This assumes that the parser expects a primitive at the current
- /// location. i.e., All other non-primitive cases have been handled.
- /// For example, if the parser's position is at `|`, then `|` will be
- /// treated as a literal (e.g., inside a character class).
- ///
- /// This advances the parser to the first character immediately following
- /// the primitive.
- fn parse_primitive(&self) -> Result<Primitive> {
- match self.char() {
- '\\' => self.parse_escape(),
- '.' => {
- let ast = Primitive::Dot(self.span_char());
- self.bump();
- Ok(ast)
- }
- '^' => {
- let ast = Primitive::Assertion(ast::Assertion {
- span: self.span_char(),
- kind: ast::AssertionKind::StartLine,
- });
- self.bump();
- Ok(ast)
- }
- '$' => {
- let ast = Primitive::Assertion(ast::Assertion {
- span: self.span_char(),
- kind: ast::AssertionKind::EndLine,
- });
- self.bump();
- Ok(ast)
- }
- c => {
- let ast = Primitive::Literal(ast::Literal {
- span: self.span_char(),
- kind: ast::LiteralKind::Verbatim,
- c,
- });
- self.bump();
- Ok(ast)
- }
- }
- }
-
- /// Parse an escape sequence as a primitive AST.
- ///
- /// This assumes the parser is positioned at the start of the escape
- /// sequence, i.e., `\`. It advances the parser to the first position
- /// immediately following the escape sequence.
- #[inline(never)]
- fn parse_escape(&self) -> Result<Primitive> {
- assert_eq!(self.char(), '\\');
- let start = self.pos();
- if !self.bump() {
- return Err(self.error(
- Span::new(start, self.pos()),
- ast::ErrorKind::EscapeUnexpectedEof,
- ));
- }
- let c = self.char();
- // Put some of the more complicated routines into helpers.
- match c {
- '0'..='7' => {
- if !self.parser().octal {
- return Err(self.error(
- Span::new(start, self.span_char().end),
- ast::ErrorKind::UnsupportedBackreference,
- ));
- }
- let mut lit = self.parse_octal();
- lit.span.start = start;
- return Ok(Primitive::Literal(lit));
- }
- '8'..='9' if !self.parser().octal => {
- return Err(self.error(
- Span::new(start, self.span_char().end),
- ast::ErrorKind::UnsupportedBackreference,
- ));
- }
- 'x' | 'u' | 'U' => {
- let mut lit = self.parse_hex()?;
- lit.span.start = start;
- return Ok(Primitive::Literal(lit));
- }
- 'p' | 'P' => {
- let mut cls = self.parse_unicode_class()?;
- cls.span.start = start;
- return Ok(Primitive::Unicode(cls));
- }
- 'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
- let mut cls = self.parse_perl_class();
- cls.span.start = start;
- return Ok(Primitive::Perl(cls));
- }
- _ => {}
- }
-
- // Handle all of the one letter sequences inline.
- self.bump();
- let span = Span::new(start, self.pos());
- if is_meta_character(c) {
- return Ok(Primitive::Literal(ast::Literal {
- span,
- kind: ast::LiteralKind::Meta,
- c,
- }));
- }
- if is_escapeable_character(c) {
- return Ok(Primitive::Literal(ast::Literal {
- span,
- kind: ast::LiteralKind::Superfluous,
- c,
- }));
- }
- let special = |kind, c| {
- Ok(Primitive::Literal(ast::Literal {
- span,
- kind: ast::LiteralKind::Special(kind),
- c,
- }))
- };
- match c {
- 'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
- 'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
- 't' => special(ast::SpecialLiteralKind::Tab, '\t'),
- 'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
- 'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
- 'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
- 'A' => Ok(Primitive::Assertion(ast::Assertion {
- span,
- kind: ast::AssertionKind::StartText,
- })),
- 'z' => Ok(Primitive::Assertion(ast::Assertion {
- span,
- kind: ast::AssertionKind::EndText,
- })),
- 'b' => {
- let mut wb = ast::Assertion {
- span,
- kind: ast::AssertionKind::WordBoundary,
- };
- // After a \b, we "try" to parse things like \b{start} for
- // special word boundary assertions.
- if !self.is_eof() && self.char() == '{' {
- if let Some(kind) =
- self.maybe_parse_special_word_boundary(start)?
- {
- wb.kind = kind;
- wb.span.end = self.pos();
- }
- }
- Ok(Primitive::Assertion(wb))
- }
- 'B' => Ok(Primitive::Assertion(ast::Assertion {
- span,
- kind: ast::AssertionKind::NotWordBoundary,
- })),
- '<' => Ok(Primitive::Assertion(ast::Assertion {
- span,
- kind: ast::AssertionKind::WordBoundaryStartAngle,
- })),
- '>' => Ok(Primitive::Assertion(ast::Assertion {
- span,
- kind: ast::AssertionKind::WordBoundaryEndAngle,
- })),
- _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
- }
- }
-
- /// Attempt to parse a specialty word boundary. That is, `\b{start}`,
- /// `\b{end}`, `\b{start-half}` or `\b{end-half}`.
- ///
- /// This is similar to `maybe_parse_ascii_class` in that, in most cases,
- /// if it fails it will just return `None` with no error. This is done
- /// because `\b{5}` is a valid expression and we want to let that be parsed
- /// by the existing counted repetition parsing code. (I thought about just
- /// invoking the counted repetition code from here, but it seemed a little
- /// ham-fisted.)
- ///
- /// Unlike `maybe_parse_ascii_class` though, this can return an error.
- /// Namely, if we definitely know it isn't a counted repetition, then we
- /// return an error specific to the specialty word boundaries.
- ///
- /// This assumes the parser is positioned at a `{` immediately following
- /// a `\b`. When `None` is returned, the parser is returned to the position
- /// at which it started: pointing at a `{`.
- ///
- /// The position given should correspond to the start of the `\b`.
- fn maybe_parse_special_word_boundary(
- &self,
- wb_start: Position,
- ) -> Result<Option<ast::AssertionKind>> {
- assert_eq!(self.char(), '{');
-
- let is_valid_char = |c| match c {
- 'A'..='Z' | 'a'..='z' | '-' => true,
- _ => false,
- };
- let start = self.pos();
- if !self.bump_and_bump_space() {
- return Err(self.error(
- Span::new(wb_start, self.pos()),
- ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
- ));
- }
- let start_contents = self.pos();
- // This is one of the critical bits: if the first non-whitespace
- // character isn't in [-A-Za-z] (i.e., this can't be a special word
- // boundary), then we bail and let the counted repetition parser deal
- // with this.
- if !is_valid_char(self.char()) {
- self.parser().pos.set(start);
- return Ok(None);
- }
-
- // Now collect up our chars until we see a '}'.
- let mut scratch = self.parser().scratch.borrow_mut();
- scratch.clear();
- while !self.is_eof() && is_valid_char(self.char()) {
- scratch.push(self.char());
- self.bump_and_bump_space();
- }
- if self.is_eof() || self.char() != '}' {
- return Err(self.error(
- Span::new(start, self.pos()),
- ast::ErrorKind::SpecialWordBoundaryUnclosed,
- ));
- }
- let end = self.pos();
- self.bump();
- let kind = match scratch.as_str() {
- "start" => ast::AssertionKind::WordBoundaryStart,
- "end" => ast::AssertionKind::WordBoundaryEnd,
- "start-half" => ast::AssertionKind::WordBoundaryStartHalf,
- "end-half" => ast::AssertionKind::WordBoundaryEndHalf,
- _ => {
- return Err(self.error(
- Span::new(start_contents, end),
- ast::ErrorKind::SpecialWordBoundaryUnrecognized,
- ))
- }
- };
- Ok(Some(kind))
- }
-
- /// Parse an octal representation of a Unicode codepoint up to 3 digits
- /// long. This expects the parser to be positioned at the first octal
- /// digit and advances the parser to the first character immediately
- /// following the octal number. This also assumes that parsing octal
- /// escapes is enabled.
- ///
- /// Assuming the preconditions are met, this routine can never fail.
- #[inline(never)]
- fn parse_octal(&self) -> ast::Literal {
- assert!(self.parser().octal);
- assert!('0' <= self.char() && self.char() <= '7');
- let start = self.pos();
- // Parse up to two more digits.
- while self.bump()
- && '0' <= self.char()
- && self.char() <= '7'
- && self.pos().offset - start.offset <= 2
- {}
- let end = self.pos();
- let octal = &self.pattern()[start.offset..end.offset];
- // Parsing the octal should never fail since the above guarantees a
- // valid number.
- let codepoint =
- u32::from_str_radix(octal, 8).expect("valid octal number");
- // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
- // invalid Unicode scalar values.
- let c = char::from_u32(codepoint).expect("Unicode scalar value");
- ast::Literal {
- span: Span::new(start, end),
- kind: ast::LiteralKind::Octal,
- c,
- }
- }
-
- /// Parse a hex representation of a Unicode codepoint. This handles both
- /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
- /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
- /// the first character immediately following the hexadecimal literal.
- #[inline(never)]
- fn parse_hex(&self) -> Result<ast::Literal> {
- assert!(
- self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
- );
-
- let hex_kind = match self.char() {
- 'x' => ast::HexLiteralKind::X,
- 'u' => ast::HexLiteralKind::UnicodeShort,
- _ => ast::HexLiteralKind::UnicodeLong,
- };
- if !self.bump_and_bump_space() {
- return Err(
- self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
- );
- }
- if self.char() == '{' {
- self.parse_hex_brace(hex_kind)
- } else {
- self.parse_hex_digits(hex_kind)
- }
- }
-
- /// Parse an N-digit hex representation of a Unicode codepoint. This
- /// expects the parser to be positioned at the first digit and will advance
- /// the parser to the first character immediately following the escape
- /// sequence.
- ///
- /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
- /// or 8 (for `\UNNNNNNNN`).
- #[inline(never)]
- fn parse_hex_digits(
- &self,
- kind: ast::HexLiteralKind,
- ) -> Result<ast::Literal> {
- let mut scratch = self.parser().scratch.borrow_mut();
- scratch.clear();
-
- let start = self.pos();
- for i in 0..kind.digits() {
- if i > 0 && !self.bump_and_bump_space() {
- return Err(self
- .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
- }
- if !is_hex(self.char()) {
- return Err(self.error(
- self.span_char(),
- ast::ErrorKind::EscapeHexInvalidDigit,
- ));
- }
- scratch.push(self.char());
- }
- // The final bump just moves the parser past the literal, which may
- // be EOF.
- self.bump_and_bump_space();
- let end = self.pos();
- let hex = scratch.as_str();
- match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
- None => Err(self.error(
- Span::new(start, end),
- ast::ErrorKind::EscapeHexInvalid,
- )),
- Some(c) => Ok(ast::Literal {
- span: Span::new(start, end),
- kind: ast::LiteralKind::HexFixed(kind),
- c,
- }),
- }
- }
-
- /// Parse a hex representation of any Unicode scalar value. This expects
- /// the parser to be positioned at the opening brace `{` and will advance
- /// the parser to the first character following the closing brace `}`.
- #[inline(never)]
- fn parse_hex_brace(
- &self,
- kind: ast::HexLiteralKind,
- ) -> Result<ast::Literal> {
- let mut scratch = self.parser().scratch.borrow_mut();
- scratch.clear();
-
- let brace_pos = self.pos();
- let start = self.span_char().end;
- while self.bump_and_bump_space() && self.char() != '}' {
- if !is_hex(self.char()) {
- return Err(self.error(
- self.span_char(),
- ast::ErrorKind::EscapeHexInvalidDigit,
- ));
- }
- scratch.push(self.char());
- }
- if self.is_eof() {
- return Err(self.error(
- Span::new(brace_pos, self.pos()),
- ast::ErrorKind::EscapeUnexpectedEof,
- ));
- }
- let end = self.pos();
- let hex = scratch.as_str();
- assert_eq!(self.char(), '}');
- self.bump_and_bump_space();
-
- if hex.is_empty() {
- return Err(self.error(
- Span::new(brace_pos, self.pos()),
- ast::ErrorKind::EscapeHexEmpty,
- ));
- }
- match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
- None => Err(self.error(
- Span::new(start, end),
- ast::ErrorKind::EscapeHexInvalid,
- )),
- Some(c) => Ok(ast::Literal {
- span: Span::new(start, self.pos()),
- kind: ast::LiteralKind::HexBrace(kind),
- c,
- }),
- }
- }
-
- /// Parse a decimal number into a u32 while trimming leading and trailing
- /// whitespace.
- ///
- /// This expects the parser to be positioned at the first position where
- /// a decimal digit could occur. This will advance the parser to the byte
- /// immediately following the last contiguous decimal digit.
- ///
- /// If no decimal digit could be found or if there was a problem parsing
- /// the complete set of digits into a u32, then an error is returned.
- fn parse_decimal(&self) -> Result<u32> {
- let mut scratch = self.parser().scratch.borrow_mut();
- scratch.clear();
-
- while !self.is_eof() && self.char().is_whitespace() {
- self.bump();
- }
- let start = self.pos();
- while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
- scratch.push(self.char());
- self.bump_and_bump_space();
- }
- let span = Span::new(start, self.pos());
- while !self.is_eof() && self.char().is_whitespace() {
- self.bump_and_bump_space();
- }
- let digits = scratch.as_str();
- if digits.is_empty() {
- return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
- }
- match u32::from_str_radix(digits, 10).ok() {
- Some(n) => Ok(n),
- None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
- }
- }
-
- /// Parse a standard character class consisting primarily of characters or
- /// character ranges, but can also contain nested character classes of
- /// any type (sans `.`).
- ///
- /// This assumes the parser is positioned at the opening `[`. If parsing
- /// is successful, then the parser is advanced to the position immediately
- /// following the closing `]`.
- #[inline(never)]
- fn parse_set_class(&self) -> Result<ast::ClassBracketed> {
- assert_eq!(self.char(), '[');
-
- let mut union =
- ast::ClassSetUnion { span: self.span(), items: vec![] };
- loop {
- self.bump_space();
- if self.is_eof() {
- return Err(self.unclosed_class_error());
- }
- match self.char() {
- '[' => {
- // If we've already parsed the opening bracket, then
- // attempt to treat this as the beginning of an ASCII
- // class. If ASCII class parsing fails, then the parser
- // backs up to `[`.
- if !self.parser().stack_class.borrow().is_empty() {
- if let Some(cls) = self.maybe_parse_ascii_class() {
- union.push(ast::ClassSetItem::Ascii(cls));
- continue;
- }
- }
- union = self.push_class_open(union)?;
- }
- ']' => match self.pop_class(union)? {
- Either::Left(nested_union) => {
- union = nested_union;
- }
- Either::Right(class) => return Ok(class),
- },
- '&' if self.peek() == Some('&') => {
- assert!(self.bump_if("&&"));
- union = self.push_class_op(
- ast::ClassSetBinaryOpKind::Intersection,
- union,
- );
- }
- '-' if self.peek() == Some('-') => {
- assert!(self.bump_if("--"));
- union = self.push_class_op(
- ast::ClassSetBinaryOpKind::Difference,
- union,
- );
- }
- '~' if self.peek() == Some('~') => {
- assert!(self.bump_if("~~"));
- union = self.push_class_op(
- ast::ClassSetBinaryOpKind::SymmetricDifference,
- union,
- );
- }
- _ => {
- union.push(self.parse_set_class_range()?);
- }
- }
- }
- }
-
- /// Parse a single primitive item in a character class set. The item to
- /// be parsed can either be one of a simple literal character, a range
- /// between two simple literal characters or a "primitive" character
- /// class like \w or \p{Greek}.
- ///
- /// If an invalid escape is found, or if a character class is found where
- /// a simple literal is expected (e.g., in a range), then an error is
- /// returned.
- #[inline(never)]
- fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
- let prim1 = self.parse_set_class_item()?;
- self.bump_space();
- if self.is_eof() {
- return Err(self.unclosed_class_error());
- }
- // If the next char isn't a `-`, then we don't have a range.
- // There are two exceptions. If the char after a `-` is a `]`, then
- // `-` is interpreted as a literal `-`. Alternatively, if the char
- // after a `-` is a `-`, then `--` corresponds to a "difference"
- // operation.
- if self.char() != '-'
- || self.peek_space() == Some(']')
- || self.peek_space() == Some('-')
- {
- return prim1.into_class_set_item(self);
- }
- // OK, now we're parsing a range, so bump past the `-` and parse the
- // second half of the range.
- if !self.bump_and_bump_space() {
- return Err(self.unclosed_class_error());
- }
- let prim2 = self.parse_set_class_item()?;
- let range = ast::ClassSetRange {
- span: Span::new(prim1.span().start, prim2.span().end),
- start: prim1.into_class_literal(self)?,
- end: prim2.into_class_literal(self)?,
- };
- if !range.is_valid() {
- return Err(
- self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
- );
- }
- Ok(ast::ClassSetItem::Range(range))
- }
-
- /// Parse a single item in a character class as a primitive, where the
- /// primitive either consists of a verbatim literal or a single escape
- /// sequence.
- ///
- /// This assumes the parser is positioned at the beginning of a primitive,
- /// and advances the parser to the first position after the primitive if
- /// successful.
- ///
- /// Note that it is the caller's responsibility to report an error if an
- /// illegal primitive was parsed.
- #[inline(never)]
- fn parse_set_class_item(&self) -> Result<Primitive> {
- if self.char() == '\\' {
- self.parse_escape()
- } else {
- let x = Primitive::Literal(ast::Literal {
- span: self.span_char(),
- kind: ast::LiteralKind::Verbatim,
- c: self.char(),
- });
- self.bump();
- Ok(x)
- }
- }
-
- /// Parses the opening of a character class set. This includes the opening
- /// bracket along with `^` if present to indicate negation. This also
- /// starts parsing the opening set of unioned items if applicable, since
- /// there are special rules applied to certain characters in the opening
- /// of a character class. For example, `[^]]` is the class of all
- /// characters not equal to `]`. (`]` would need to be escaped in any other
- /// position.) Similarly for `-`.
- ///
- /// In all cases, the op inside the returned `ast::ClassBracketed` is an
- /// empty union. This empty union should be replaced with the actual item
- /// when it is popped from the parser's stack.
- ///
- /// This assumes the parser is positioned at the opening `[` and advances
- /// the parser to the first non-special byte of the character class.
- ///
- /// An error is returned if EOF is found.
- #[inline(never)]
- fn parse_set_class_open(
- &self,
- ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
- assert_eq!(self.char(), '[');
- let start = self.pos();
- if !self.bump_and_bump_space() {
- return Err(self.error(
- Span::new(start, self.pos()),
- ast::ErrorKind::ClassUnclosed,
- ));
- }
-
- let negated = if self.char() != '^' {
- false
- } else {
- if !self.bump_and_bump_space() {
- return Err(self.error(
- Span::new(start, self.pos()),
- ast::ErrorKind::ClassUnclosed,
- ));
- }
- true
- };
- // Accept any number of `-` as literal `-`.
- let mut union =
- ast::ClassSetUnion { span: self.span(), items: vec![] };
- while self.char() == '-' {
- union.push(ast::ClassSetItem::Literal(ast::Literal {
- span: self.span_char(),
- kind: ast::LiteralKind::Verbatim,
- c: '-',
- }));
- if !self.bump_and_bump_space() {
- return Err(self.error(
- Span::new(start, start),
- ast::ErrorKind::ClassUnclosed,
- ));
- }
- }
- // If `]` is the *first* char in a set, then interpret it as a literal
- // `]`. That is, an empty class is impossible to write.
- if union.items.is_empty() && self.char() == ']' {
- union.push(ast::ClassSetItem::Literal(ast::Literal {
- span: self.span_char(),
- kind: ast::LiteralKind::Verbatim,
- c: ']',
- }));
- if !self.bump_and_bump_space() {
- return Err(self.error(
- Span::new(start, self.pos()),
- ast::ErrorKind::ClassUnclosed,
- ));
- }
- }
- let set = ast::ClassBracketed {
- span: Span::new(start, self.pos()),
- negated,
- kind: ast::ClassSet::union(ast::ClassSetUnion {
- span: Span::new(union.span.start, union.span.start),
- items: vec![],
- }),
- };
- Ok((set, union))
- }
-
- /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
- ///
- /// This assumes the parser is positioned at the opening `[`.
- ///
- /// If no valid ASCII character class could be found, then this does not
- /// advance the parser and `None` is returned. Otherwise, the parser is
- /// advanced to the first byte following the closing `]` and the
- /// corresponding ASCII class is returned.
- #[inline(never)]
- fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
- // ASCII character classes are interesting from a parsing perspective
- // because parsing cannot fail with any interesting error. For example,
- // in order to use an ASCII character class, it must be enclosed in
- // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
- // of it as "ASCII character classes have the syntax `[:NAME:]` which
- // can only appear within character brackets." This means that things
- // like `[[:lower:]A]` are legal constructs.
- //
- // However, if one types an incorrect ASCII character class, e.g.,
- // `[[:loower:]]`, then we treat that as a normal nested character
- // class containing the characters `:elorw`. One might argue that we
- // should return an error instead since the repeated colons give away
- // the intent to write an ASCII class. But what if the user typed
- // `[[:lower]]` instead? How can we tell that was intended to be an
- // ASCII class and not just a normal nested class?
- //
- // Reasonable people can probably disagree over this, but for better
- // or worse, we implement semantics that never fails at the expense
- // of better failure modes.
- assert_eq!(self.char(), '[');
- // If parsing fails, then we back up the parser to this starting point.
- let start = self.pos();
- let mut negated = false;
- if !self.bump() || self.char() != ':' {
- self.parser().pos.set(start);
- return None;
- }
- if !self.bump() {
- self.parser().pos.set(start);
- return None;
- }
- if self.char() == '^' {
- negated = true;
- if !self.bump() {
- self.parser().pos.set(start);
- return None;
- }
- }
- let name_start = self.offset();
- while self.char() != ':' && self.bump() {}
- if self.is_eof() {
- self.parser().pos.set(start);
- return None;
- }
- let name = &self.pattern()[name_start..self.offset()];
- if !self.bump_if(":]") {
- self.parser().pos.set(start);
- return None;
- }
- let kind = match ast::ClassAsciiKind::from_name(name) {
- Some(kind) => kind,
- None => {
- self.parser().pos.set(start);
- return None;
- }
- };
- Some(ast::ClassAscii {
- span: Span::new(start, self.pos()),
- kind,
- negated,
- })
- }
-
- /// Parse a Unicode class in either the single character notation, `\pN`
- /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
- /// the parser is positioned at the `p` (or `P` for negation) and will
- /// advance the parser to the character immediately following the class.
- ///
- /// Note that this does not check whether the class name is valid or not.
- #[inline(never)]
- fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
- assert!(self.char() == 'p' || self.char() == 'P');
-
- let mut scratch = self.parser().scratch.borrow_mut();
- scratch.clear();
-
- let negated = self.char() == 'P';
- if !self.bump_and_bump_space() {
- return Err(
- self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
- );
- }
- let (start, kind) = if self.char() == '{' {
- let start = self.span_char().end;
- while self.bump_and_bump_space() && self.char() != '}' {
- scratch.push(self.char());
- }
- if self.is_eof() {
- return Err(self
- .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
- }
- assert_eq!(self.char(), '}');
- self.bump();
-
- let name = scratch.as_str();
- if let Some(i) = name.find("!=") {
- (
- start,
- ast::ClassUnicodeKind::NamedValue {
- op: ast::ClassUnicodeOpKind::NotEqual,
- name: name[..i].to_string(),
- value: name[i + 2..].to_string(),
- },
- )
- } else if let Some(i) = name.find(':') {
- (
- start,
- ast::ClassUnicodeKind::NamedValue {
- op: ast::ClassUnicodeOpKind::Colon,
- name: name[..i].to_string(),
- value: name[i + 1..].to_string(),
- },
- )
- } else if let Some(i) = name.find('=') {
- (
- start,
- ast::ClassUnicodeKind::NamedValue {
- op: ast::ClassUnicodeOpKind::Equal,
- name: name[..i].to_string(),
- value: name[i + 1..].to_string(),
- },
- )
- } else {
- (start, ast::ClassUnicodeKind::Named(name.to_string()))
- }
- } else {
- let start = self.pos();
- let c = self.char();
- if c == '\\' {
- return Err(self.error(
- self.span_char(),
- ast::ErrorKind::UnicodeClassInvalid,
- ));
- }
- self.bump_and_bump_space();
- let kind = ast::ClassUnicodeKind::OneLetter(c);
- (start, kind)
- };
- Ok(ast::ClassUnicode {
- span: Span::new(start, self.pos()),
- negated,
- kind,
- })
- }
-
- /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
- /// parser is currently at a valid character class name and will be
- /// advanced to the character immediately following the class.
- #[inline(never)]
- fn parse_perl_class(&self) -> ast::ClassPerl {
- let c = self.char();
- let span = self.span_char();
- self.bump();
- let (negated, kind) = match c {
- 'd' => (false, ast::ClassPerlKind::Digit),
- 'D' => (true, ast::ClassPerlKind::Digit),
- 's' => (false, ast::ClassPerlKind::Space),
- 'S' => (true, ast::ClassPerlKind::Space),
- 'w' => (false, ast::ClassPerlKind::Word),
- 'W' => (true, ast::ClassPerlKind::Word),
- c => panic!("expected valid Perl class but got '{}'", c),
- };
- ast::ClassPerl { span, kind, negated }
- }
-}
-
-/// A type that traverses a fully parsed Ast and checks whether its depth
-/// exceeds the specified nesting limit. If it does, then an error is returned.
-#[derive(Debug)]
-struct NestLimiter<'p, 's, P> {
- /// The parser that is checking the nest limit.
- p: &'p ParserI<'s, P>,
- /// The current depth while walking an Ast.
- depth: u32,
-}
-
-impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
- fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
- NestLimiter { p, depth: 0 }
- }
-
- #[inline(never)]
- fn check(self, ast: &Ast) -> Result<()> {
- ast::visit(ast, self)
- }
-
- fn increment_depth(&mut self, span: &Span) -> Result<()> {
- let new = self.depth.checked_add(1).ok_or_else(|| {
- self.p.error(
- span.clone(),
- ast::ErrorKind::NestLimitExceeded(u32::MAX),
- )
- })?;
- let limit = self.p.parser().nest_limit;
- if new > limit {
- return Err(self.p.error(
- span.clone(),
- ast::ErrorKind::NestLimitExceeded(limit),
- ));
- }
- self.depth = new;
- Ok(())
- }
-
- fn decrement_depth(&mut self) {
- // Assuming the correctness of the visitor, this should never drop
- // below 0.
- self.depth = self.depth.checked_sub(1).unwrap();
- }
-}
-
-impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
- type Output = ();
- type Err = ast::Error;
-
- fn finish(self) -> Result<()> {
- Ok(())
- }
-
- fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
- let span = match *ast {
- Ast::Empty(_)
- | Ast::Flags(_)
- | Ast::Literal(_)
- | Ast::Dot(_)
- | Ast::Assertion(_)
- | Ast::ClassUnicode(_)
- | Ast::ClassPerl(_) => {
- // These are all base cases, so we don't increment depth.
- return Ok(());
- }
- Ast::ClassBracketed(ref x) => &x.span,
- Ast::Repetition(ref x) => &x.span,
- Ast::Group(ref x) => &x.span,
- Ast::Alternation(ref x) => &x.span,
- Ast::Concat(ref x) => &x.span,
- };
- self.increment_depth(span)
- }
-
- fn visit_post(&mut self, ast: &Ast) -> Result<()> {
- match *ast {
- Ast::Empty(_)
- | Ast::Flags(_)
- | Ast::Literal(_)
- | Ast::Dot(_)
- | Ast::Assertion(_)
- | Ast::ClassUnicode(_)
- | Ast::ClassPerl(_) => {
- // These are all base cases, so we don't decrement depth.
- Ok(())
- }
- Ast::ClassBracketed(_)
- | Ast::Repetition(_)
- | Ast::Group(_)
- | Ast::Alternation(_)
- | Ast::Concat(_) => {
- self.decrement_depth();
- Ok(())
- }
- }
- }
-
- fn visit_class_set_item_pre(
- &mut self,
- ast: &ast::ClassSetItem,
- ) -> Result<()> {
- let span = match *ast {
- ast::ClassSetItem::Empty(_)
- | ast::ClassSetItem::Literal(_)
- | ast::ClassSetItem::Range(_)
- | ast::ClassSetItem::Ascii(_)
- | ast::ClassSetItem::Unicode(_)
- | ast::ClassSetItem::Perl(_) => {
- // These are all base cases, so we don't increment depth.
- return Ok(());
- }
- ast::ClassSetItem::Bracketed(ref x) => &x.span,
- ast::ClassSetItem::Union(ref x) => &x.span,
- };
- self.increment_depth(span)
- }
-
- fn visit_class_set_item_post(
- &mut self,
- ast: &ast::ClassSetItem,
- ) -> Result<()> {
- match *ast {
- ast::ClassSetItem::Empty(_)
- | ast::ClassSetItem::Literal(_)
- | ast::ClassSetItem::Range(_)
- | ast::ClassSetItem::Ascii(_)
- | ast::ClassSetItem::Unicode(_)
- | ast::ClassSetItem::Perl(_) => {
- // These are all base cases, so we don't decrement depth.
- Ok(())
- }
- ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
- self.decrement_depth();
- Ok(())
- }
- }
- }
-
- fn visit_class_set_binary_op_pre(
- &mut self,
- ast: &ast::ClassSetBinaryOp,
- ) -> Result<()> {
- self.increment_depth(&ast.span)
- }
-
- fn visit_class_set_binary_op_post(
- &mut self,
- _ast: &ast::ClassSetBinaryOp,
- ) -> Result<()> {
- self.decrement_depth();
- Ok(())
- }
-}
-
-/// When the result is an error, transforms the ast::ErrorKind from the source
-/// Result into another one. This function is used to return clearer error
-/// messages when possible.
-fn specialize_err<T>(
- result: Result<T>,
- from: ast::ErrorKind,
- to: ast::ErrorKind,
-) -> Result<T> {
- if let Err(e) = result {
- if e.kind == from {
- Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
- } else {
- Err(e)
- }
- } else {
- result
- }
-}
-
-#[cfg(test)]
-mod tests {
- use core::ops::Range;
-
- use alloc::format;
-
- use super::*;
-
- // Our own assert_eq, which has slightly better formatting (but honestly
- // still kind of crappy).
- macro_rules! assert_eq {
- ($left:expr, $right:expr) => {{
- match (&$left, &$right) {
- (left_val, right_val) => {
- if !(*left_val == *right_val) {
- panic!(
- "assertion failed: `(left == right)`\n\n\
- left: `{:?}`\nright: `{:?}`\n\n",
- left_val, right_val
- )
- }
- }
- }
- }};
- }
-
- // We create these errors to compare with real ast::Errors in the tests.
- // We define equality between TestError and ast::Error to disregard the
- // pattern string in ast::Error, which is annoying to provide in tests.
- #[derive(Clone, Debug)]
- struct TestError {
- span: Span,
- kind: ast::ErrorKind,
- }
-
- impl PartialEq<ast::Error> for TestError {
- fn eq(&self, other: &ast::Error) -> bool {
- self.span == other.span && self.kind == other.kind
- }
- }
-
- impl PartialEq<TestError> for ast::Error {
- fn eq(&self, other: &TestError) -> bool {
- self.span == other.span && self.kind == other.kind
- }
- }
-
- fn s(str: &str) -> String {
- str.to_string()
- }
-
- fn parser(pattern: &str) -> ParserI<'_, Parser> {
- ParserI::new(Parser::new(), pattern)
- }
-
- fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
- let parser = ParserBuilder::new().octal(true).build();
- ParserI::new(parser, pattern)
- }
-
- fn parser_empty_min_range(pattern: &str) -> ParserI<'_, Parser> {
- let parser = ParserBuilder::new().empty_min_range(true).build();
- ParserI::new(parser, pattern)
- }
-
- fn parser_nest_limit(
- pattern: &str,
- nest_limit: u32,
- ) -> ParserI<'_, Parser> {
- let p = ParserBuilder::new().nest_limit(nest_limit).build();
- ParserI::new(p, pattern)
- }
-
- fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
- let p = ParserBuilder::new().ignore_whitespace(true).build();
- ParserI::new(p, pattern)
- }
-
- /// Short alias for creating a new span.
- fn nspan(start: Position, end: Position) -> Span {
- Span::new(start, end)
- }
-
- /// Short alias for creating a new position.
- fn npos(offset: usize, line: usize, column: usize) -> Position {
- Position::new(offset, line, column)
- }
-
- /// Create a new span from the given offset range. This assumes a single
- /// line and sets the columns based on the offsets. i.e., This only works
- /// out of the box for ASCII, which is fine for most tests.
- fn span(range: Range<usize>) -> Span {
- let start = Position::new(range.start, 1, range.start + 1);
- let end = Position::new(range.end, 1, range.end + 1);
- Span::new(start, end)
- }
-
- /// Create a new span for the corresponding byte range in the given string.
- fn span_range(subject: &str, range: Range<usize>) -> Span {
- let start = Position {
- offset: range.start,
- line: 1 + subject[..range.start].matches('\n').count(),
- column: 1 + subject[..range.start]
- .chars()
- .rev()
- .position(|c| c == '\n')
- .unwrap_or(subject[..range.start].chars().count()),
- };
- let end = Position {
- offset: range.end,
- line: 1 + subject[..range.end].matches('\n').count(),
- column: 1 + subject[..range.end]
- .chars()
- .rev()
- .position(|c| c == '\n')
- .unwrap_or(subject[..range.end].chars().count()),
- };
- Span::new(start, end)
- }
-
- /// Create a verbatim literal starting at the given position.
- fn lit(c: char, start: usize) -> Ast {
- lit_with(c, span(start..start + c.len_utf8()))
- }
-
- /// Create a meta literal starting at the given position.
- fn meta_lit(c: char, span: Span) -> Ast {
- Ast::literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c })
- }
-
- /// Create a verbatim literal with the given span.
- fn lit_with(c: char, span: Span) -> Ast {
- Ast::literal(ast::Literal {
- span,
- kind: ast::LiteralKind::Verbatim,
- c,
- })
- }
-
- /// Create a concatenation with the given range.
- fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
- concat_with(span(range), asts)
- }
-
- /// Create a concatenation with the given span.
- fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
- Ast::concat(ast::Concat { span, asts })
- }
-
- /// Create an alternation with the given span.
- fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
- Ast::alternation(ast::Alternation { span: span(range), asts })
- }
-
- /// Create a capturing group with the given span.
- fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
- Ast::group(ast::Group {
- span: span(range),
- kind: ast::GroupKind::CaptureIndex(index),
- ast: Box::new(ast),
- })
- }
-
- /// Create an ast::SetFlags.
- ///
- /// The given pattern should be the full pattern string. The range given
- /// should correspond to the byte offsets where the flag set occurs.
- ///
- /// If negated is true, then the set is interpreted as beginning with a
- /// negation.
- fn flag_set(
- pat: &str,
- range: Range<usize>,
- flag: ast::Flag,
- negated: bool,
- ) -> Ast {
- let mut items = vec![ast::FlagsItem {
- span: span_range(pat, (range.end - 2)..(range.end - 1)),
- kind: ast::FlagsItemKind::Flag(flag),
- }];
- if negated {
- items.insert(
- 0,
- ast::FlagsItem {
- span: span_range(pat, (range.start + 2)..(range.end - 2)),
- kind: ast::FlagsItemKind::Negation,
- },
- );
- }
- Ast::flags(ast::SetFlags {
- span: span_range(pat, range.clone()),
- flags: ast::Flags {
- span: span_range(pat, (range.start + 2)..(range.end - 1)),
- items,
- },
- })
- }
-
- #[test]
- fn parse_nest_limit() {
- // A nest limit of 0 still allows some types of regexes.
- assert_eq!(
- parser_nest_limit("", 0).parse(),
- Ok(Ast::empty(span(0..0)))
- );
- assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
-
- // Test repetition operations, which require one level of nesting.
- assert_eq!(
- parser_nest_limit("a+", 0).parse().unwrap_err(),
- TestError {
- span: span(0..2),
- kind: ast::ErrorKind::NestLimitExceeded(0),
- }
- );
- assert_eq!(
- parser_nest_limit("a+", 1).parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..2),
- op: ast::RepetitionOp {
- span: span(1..2),
- kind: ast::RepetitionKind::OneOrMore,
- },
- greedy: true,
- ast: Box::new(lit('a', 0)),
- }))
- );
- assert_eq!(
- parser_nest_limit("(a)+", 1).parse().unwrap_err(),
- TestError {
- span: span(0..3),
- kind: ast::ErrorKind::NestLimitExceeded(1),
- }
- );
- assert_eq!(
- parser_nest_limit("a+*", 1).parse().unwrap_err(),
- TestError {
- span: span(0..2),
- kind: ast::ErrorKind::NestLimitExceeded(1),
- }
- );
- assert_eq!(
- parser_nest_limit("a+*", 2).parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..3),
- op: ast::RepetitionOp {
- span: span(2..3),
- kind: ast::RepetitionKind::ZeroOrMore,
- },
- greedy: true,
- ast: Box::new(Ast::repetition(ast::Repetition {
- span: span(0..2),
- op: ast::RepetitionOp {
- span: span(1..2),
- kind: ast::RepetitionKind::OneOrMore,
- },
- greedy: true,
- ast: Box::new(lit('a', 0)),
- })),
- }))
- );
-
- // Test concatenations. A concatenation requires one level of nesting.
- assert_eq!(
- parser_nest_limit("ab", 0).parse().unwrap_err(),
- TestError {
- span: span(0..2),
- kind: ast::ErrorKind::NestLimitExceeded(0),
- }
- );
- assert_eq!(
- parser_nest_limit("ab", 1).parse(),
- Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
- );
- assert_eq!(
- parser_nest_limit("abc", 1).parse(),
- Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
- );
-
- // Test alternations. An alternation requires one level of nesting.
- assert_eq!(
- parser_nest_limit("a|b", 0).parse().unwrap_err(),
- TestError {
- span: span(0..3),
- kind: ast::ErrorKind::NestLimitExceeded(0),
- }
- );
- assert_eq!(
- parser_nest_limit("a|b", 1).parse(),
- Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
- );
- assert_eq!(
- parser_nest_limit("a|b|c", 1).parse(),
- Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
- );
-
- // Test character classes. Classes form their own mini-recursive
- // syntax!
- assert_eq!(
- parser_nest_limit("[a]", 0).parse().unwrap_err(),
- TestError {
- span: span(0..3),
- kind: ast::ErrorKind::NestLimitExceeded(0),
- }
- );
- assert_eq!(
- parser_nest_limit("[a]", 1).parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..3),
- negated: false,
- kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
- ast::Literal {
- span: span(1..2),
- kind: ast::LiteralKind::Verbatim,
- c: 'a',
- }
- )),
- }))
- );
- assert_eq!(
- parser_nest_limit("[ab]", 1).parse().unwrap_err(),
- TestError {
- span: span(1..3),
- kind: ast::ErrorKind::NestLimitExceeded(1),
- }
- );
- assert_eq!(
- parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
- TestError {
- span: span(3..7),
- kind: ast::ErrorKind::NestLimitExceeded(2),
- }
- );
- assert_eq!(
- parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
- TestError {
- span: span(4..6),
- kind: ast::ErrorKind::NestLimitExceeded(3),
- }
- );
- assert_eq!(
- parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
- TestError {
- span: span(1..5),
- kind: ast::ErrorKind::NestLimitExceeded(1),
- }
- );
- assert_eq!(
- parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
- TestError {
- span: span(4..6),
- kind: ast::ErrorKind::NestLimitExceeded(2),
- }
- );
- }
-
- #[test]
- fn parse_comments() {
- let pat = "(?x)
-# This is comment 1.
-foo # This is comment 2.
- # This is comment 3.
-bar
-# This is comment 4.";
- let astc = parser(pat).parse_with_comments().unwrap();
- assert_eq!(
- astc.ast,
- concat_with(
- span_range(pat, 0..pat.len()),
- vec![
- flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
- lit_with('f', span_range(pat, 26..27)),
- lit_with('o', span_range(pat, 27..28)),
- lit_with('o', span_range(pat, 28..29)),
- lit_with('b', span_range(pat, 74..75)),
- lit_with('a', span_range(pat, 75..76)),
- lit_with('r', span_range(pat, 76..77)),
- ]
- )
- );
- assert_eq!(
- astc.comments,
- vec![
- ast::Comment {
- span: span_range(pat, 5..26),
- comment: s(" This is comment 1."),
- },
- ast::Comment {
- span: span_range(pat, 30..51),
- comment: s(" This is comment 2."),
- },
- ast::Comment {
- span: span_range(pat, 53..74),
- comment: s(" This is comment 3."),
- },
- ast::Comment {
- span: span_range(pat, 78..98),
- comment: s(" This is comment 4."),
- },
- ]
- );
- }
-
- #[test]
- fn parse_holistic() {
- assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
- assert_eq!(
- parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
- Ok(concat(
- 0..36,
- vec![
- meta_lit('\\', span(0..2)),
- meta_lit('.', span(2..4)),
- meta_lit('+', span(4..6)),
- meta_lit('*', span(6..8)),
- meta_lit('?', span(8..10)),
- meta_lit('(', span(10..12)),
- meta_lit(')', span(12..14)),
- meta_lit('|', span(14..16)),
- meta_lit('[', span(16..18)),
- meta_lit(']', span(18..20)),
- meta_lit('{', span(20..22)),
- meta_lit('}', span(22..24)),
- meta_lit('^', span(24..26)),
- meta_lit('$', span(26..28)),
- meta_lit('#', span(28..30)),
- meta_lit('&', span(30..32)),
- meta_lit('-', span(32..34)),
- meta_lit('~', span(34..36)),
- ]
- ))
- );
- }
-
- #[test]
- fn parse_ignore_whitespace() {
- // Test that basic whitespace insensitivity works.
- let pat = "(?x)a b";
- assert_eq!(
- parser(pat).parse(),
- Ok(concat_with(
- nspan(npos(0, 1, 1), npos(7, 1, 8)),
- vec![
- flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
- lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
- lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
- ]
- ))
- );
-
- // Test that we can toggle whitespace insensitivity.
- let pat = "(?x)a b(?-x)a b";
- assert_eq!(
- parser(pat).parse(),
- Ok(concat_with(
- nspan(npos(0, 1, 1), npos(15, 1, 16)),
- vec![
- flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
- lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
- lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
- flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
- lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
- lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
- lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
- ]
- ))
- );
-
- // Test that nesting whitespace insensitive flags works.
- let pat = "a (?x:a )a ";
- assert_eq!(
- parser(pat).parse(),
- Ok(concat_with(
- span_range(pat, 0..11),
- vec![
- lit_with('a', span_range(pat, 0..1)),
- lit_with(' ', span_range(pat, 1..2)),
- Ast::group(ast::Group {
- span: span_range(pat, 2..9),
- kind: ast::GroupKind::NonCapturing(ast::Flags {
- span: span_range(pat, 4..5),
- items: vec![ast::FlagsItem {
- span: span_range(pat, 4..5),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::IgnoreWhitespace
- ),
- },],
- }),
- ast: Box::new(lit_with('a', span_range(pat, 6..7))),
- }),
- lit_with('a', span_range(pat, 9..10)),
- lit_with(' ', span_range(pat, 10..11)),
- ]
- ))
- );
-
- // Test that whitespace after an opening paren is insignificant.
- let pat = "(?x)( ?P<foo> a )";
- assert_eq!(
- parser(pat).parse(),
- Ok(concat_with(
- span_range(pat, 0..pat.len()),
- vec![
- flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
- Ast::group(ast::Group {
- span: span_range(pat, 4..pat.len()),
- kind: ast::GroupKind::CaptureName {
- starts_with_p: true,
- name: ast::CaptureName {
- span: span_range(pat, 9..12),
- name: s("foo"),
- index: 1,
- }
- },
- ast: Box::new(lit_with('a', span_range(pat, 14..15))),
- }),
- ]
- ))
- );
- let pat = "(?x)( a )";
- assert_eq!(
- parser(pat).parse(),
- Ok(concat_with(
- span_range(pat, 0..pat.len()),
- vec![
- flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
- Ast::group(ast::Group {
- span: span_range(pat, 4..pat.len()),
- kind: ast::GroupKind::CaptureIndex(1),
- ast: Box::new(lit_with('a', span_range(pat, 7..8))),
- }),
- ]
- ))
- );
- let pat = "(?x)( ?: a )";
- assert_eq!(
- parser(pat).parse(),
- Ok(concat_with(
- span_range(pat, 0..pat.len()),
- vec![
- flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
- Ast::group(ast::Group {
- span: span_range(pat, 4..pat.len()),
- kind: ast::GroupKind::NonCapturing(ast::Flags {
- span: span_range(pat, 8..8),
- items: vec![],
- }),
- ast: Box::new(lit_with('a', span_range(pat, 11..12))),
- }),
- ]
- ))
- );
- let pat = r"(?x)\x { 53 }";
- assert_eq!(
- parser(pat).parse(),
- Ok(concat_with(
- span_range(pat, 0..pat.len()),
- vec![
- flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
- Ast::literal(ast::Literal {
- span: span(4..13),
- kind: ast::LiteralKind::HexBrace(
- ast::HexLiteralKind::X
- ),
- c: 'S',
- }),
- ]
- ))
- );
-
- // Test that whitespace after an escape is OK.
- let pat = r"(?x)\ ";
- assert_eq!(
- parser(pat).parse(),
- Ok(concat_with(
- span_range(pat, 0..pat.len()),
- vec![
- flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
- Ast::literal(ast::Literal {
- span: span_range(pat, 4..6),
- kind: ast::LiteralKind::Superfluous,
- c: ' ',
- }),
- ]
- ))
- );
- }
-
- #[test]
- fn parse_newlines() {
- let pat = ".\n.";
- assert_eq!(
- parser(pat).parse(),
- Ok(concat_with(
- span_range(pat, 0..3),
- vec![
- Ast::dot(span_range(pat, 0..1)),
- lit_with('\n', span_range(pat, 1..2)),
- Ast::dot(span_range(pat, 2..3)),
- ]
- ))
- );
-
- let pat = "foobar\nbaz\nquux\n";
- assert_eq!(
- parser(pat).parse(),
- Ok(concat_with(
- span_range(pat, 0..pat.len()),
- vec![
- lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
- lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
- lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
- lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
- lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
- lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
- lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
- lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
- lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
- lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
- lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
- lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
- lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
- lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
- lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
- lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
- ]
- ))
- );
- }
-
- #[test]
- fn parse_uncounted_repetition() {
- assert_eq!(
- parser(r"a*").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..2),
- op: ast::RepetitionOp {
- span: span(1..2),
- kind: ast::RepetitionKind::ZeroOrMore,
- },
- greedy: true,
- ast: Box::new(lit('a', 0)),
- }))
- );
- assert_eq!(
- parser(r"a+").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..2),
- op: ast::RepetitionOp {
- span: span(1..2),
- kind: ast::RepetitionKind::OneOrMore,
- },
- greedy: true,
- ast: Box::new(lit('a', 0)),
- }))
- );
-
- assert_eq!(
- parser(r"a?").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..2),
- op: ast::RepetitionOp {
- span: span(1..2),
- kind: ast::RepetitionKind::ZeroOrOne,
- },
- greedy: true,
- ast: Box::new(lit('a', 0)),
- }))
- );
- assert_eq!(
- parser(r"a??").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..3),
- op: ast::RepetitionOp {
- span: span(1..3),
- kind: ast::RepetitionKind::ZeroOrOne,
- },
- greedy: false,
- ast: Box::new(lit('a', 0)),
- }))
- );
- assert_eq!(
- parser(r"a?").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..2),
- op: ast::RepetitionOp {
- span: span(1..2),
- kind: ast::RepetitionKind::ZeroOrOne,
- },
- greedy: true,
- ast: Box::new(lit('a', 0)),
- }))
- );
- assert_eq!(
- parser(r"a?b").parse(),
- Ok(concat(
- 0..3,
- vec![
- Ast::repetition(ast::Repetition {
- span: span(0..2),
- op: ast::RepetitionOp {
- span: span(1..2),
- kind: ast::RepetitionKind::ZeroOrOne,
- },
- greedy: true,
- ast: Box::new(lit('a', 0)),
- }),
- lit('b', 2),
- ]
- ))
- );
- assert_eq!(
- parser(r"a??b").parse(),
- Ok(concat(
- 0..4,
- vec![
- Ast::repetition(ast::Repetition {
- span: span(0..3),
- op: ast::RepetitionOp {
- span: span(1..3),
- kind: ast::RepetitionKind::ZeroOrOne,
- },
- greedy: false,
- ast: Box::new(lit('a', 0)),
- }),
- lit('b', 3),
- ]
- ))
- );
- assert_eq!(
- parser(r"ab?").parse(),
- Ok(concat(
- 0..3,
- vec![
- lit('a', 0),
- Ast::repetition(ast::Repetition {
- span: span(1..3),
- op: ast::RepetitionOp {
- span: span(2..3),
- kind: ast::RepetitionKind::ZeroOrOne,
- },
- greedy: true,
- ast: Box::new(lit('b', 1)),
- }),
- ]
- ))
- );
- assert_eq!(
- parser(r"(ab)?").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..5),
- op: ast::RepetitionOp {
- span: span(4..5),
- kind: ast::RepetitionKind::ZeroOrOne,
- },
- greedy: true,
- ast: Box::new(group(
- 0..4,
- 1,
- concat(1..3, vec![lit('a', 1), lit('b', 2),])
- )),
- }))
- );
- assert_eq!(
- parser(r"|a?").parse(),
- Ok(alt(
- 0..3,
- vec![
- Ast::empty(span(0..0)),
- Ast::repetition(ast::Repetition {
- span: span(1..3),
- op: ast::RepetitionOp {
- span: span(2..3),
- kind: ast::RepetitionKind::ZeroOrOne,
- },
- greedy: true,
- ast: Box::new(lit('a', 1)),
- }),
- ]
- ))
- );
-
- assert_eq!(
- parser(r"*").parse().unwrap_err(),
- TestError {
- span: span(0..0),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- assert_eq!(
- parser(r"(?i)*").parse().unwrap_err(),
- TestError {
- span: span(4..4),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- assert_eq!(
- parser(r"(*)").parse().unwrap_err(),
- TestError {
- span: span(1..1),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- assert_eq!(
- parser(r"(?:?)").parse().unwrap_err(),
- TestError {
- span: span(3..3),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- assert_eq!(
- parser(r"+").parse().unwrap_err(),
- TestError {
- span: span(0..0),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- assert_eq!(
- parser(r"?").parse().unwrap_err(),
- TestError {
- span: span(0..0),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- assert_eq!(
- parser(r"(?)").parse().unwrap_err(),
- TestError {
- span: span(1..1),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- assert_eq!(
- parser(r"|*").parse().unwrap_err(),
- TestError {
- span: span(1..1),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- assert_eq!(
- parser(r"|+").parse().unwrap_err(),
- TestError {
- span: span(1..1),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- assert_eq!(
- parser(r"|?").parse().unwrap_err(),
- TestError {
- span: span(1..1),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- }
-
- #[test]
- fn parse_counted_repetition() {
- assert_eq!(
- parser(r"a{5}").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..4),
- op: ast::RepetitionOp {
- span: span(1..4),
- kind: ast::RepetitionKind::Range(
- ast::RepetitionRange::Exactly(5)
- ),
- },
- greedy: true,
- ast: Box::new(lit('a', 0)),
- }))
- );
- assert_eq!(
- parser(r"a{5,}").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..5),
- op: ast::RepetitionOp {
- span: span(1..5),
- kind: ast::RepetitionKind::Range(
- ast::RepetitionRange::AtLeast(5)
- ),
- },
- greedy: true,
- ast: Box::new(lit('a', 0)),
- }))
- );
- assert_eq!(
- parser(r"a{5,9}").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..6),
- op: ast::RepetitionOp {
- span: span(1..6),
- kind: ast::RepetitionKind::Range(
- ast::RepetitionRange::Bounded(5, 9)
- ),
- },
- greedy: true,
- ast: Box::new(lit('a', 0)),
- }))
- );
- assert_eq!(
- parser(r"a{5}?").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..5),
- op: ast::RepetitionOp {
- span: span(1..5),
- kind: ast::RepetitionKind::Range(
- ast::RepetitionRange::Exactly(5)
- ),
- },
- greedy: false,
- ast: Box::new(lit('a', 0)),
- }))
- );
- assert_eq!(
- parser(r"ab{5}").parse(),
- Ok(concat(
- 0..5,
- vec![
- lit('a', 0),
- Ast::repetition(ast::Repetition {
- span: span(1..5),
- op: ast::RepetitionOp {
- span: span(2..5),
- kind: ast::RepetitionKind::Range(
- ast::RepetitionRange::Exactly(5)
- ),
- },
- greedy: true,
- ast: Box::new(lit('b', 1)),
- }),
- ]
- ))
- );
- assert_eq!(
- parser(r"ab{5}c").parse(),
- Ok(concat(
- 0..6,
- vec![
- lit('a', 0),
- Ast::repetition(ast::Repetition {
- span: span(1..5),
- op: ast::RepetitionOp {
- span: span(2..5),
- kind: ast::RepetitionKind::Range(
- ast::RepetitionRange::Exactly(5)
- ),
- },
- greedy: true,
- ast: Box::new(lit('b', 1)),
- }),
- lit('c', 5),
- ]
- ))
- );
-
- assert_eq!(
- parser(r"a{ 5 }").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..6),
- op: ast::RepetitionOp {
- span: span(1..6),
- kind: ast::RepetitionKind::Range(
- ast::RepetitionRange::Exactly(5)
- ),
- },
- greedy: true,
- ast: Box::new(lit('a', 0)),
- }))
- );
- assert_eq!(
- parser(r"a{ 5 , 9 }").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..10),
- op: ast::RepetitionOp {
- span: span(1..10),
- kind: ast::RepetitionKind::Range(
- ast::RepetitionRange::Bounded(5, 9)
- ),
- },
- greedy: true,
- ast: Box::new(lit('a', 0)),
- }))
- );
- assert_eq!(
- parser_empty_min_range(r"a{,9}").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..5),
- op: ast::RepetitionOp {
- span: span(1..5),
- kind: ast::RepetitionKind::Range(
- ast::RepetitionRange::Bounded(0, 9)
- ),
- },
- greedy: true,
- ast: Box::new(lit('a', 0)),
- }))
- );
- assert_eq!(
- parser_ignore_whitespace(r"a{5,9} ?").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..8),
- op: ast::RepetitionOp {
- span: span(1..8),
- kind: ast::RepetitionKind::Range(
- ast::RepetitionRange::Bounded(5, 9)
- ),
- },
- greedy: false,
- ast: Box::new(lit('a', 0)),
- }))
- );
- assert_eq!(
- parser(r"\b{5,9}").parse(),
- Ok(Ast::repetition(ast::Repetition {
- span: span(0..7),
- op: ast::RepetitionOp {
- span: span(2..7),
- kind: ast::RepetitionKind::Range(
- ast::RepetitionRange::Bounded(5, 9)
- ),
- },
- greedy: true,
- ast: Box::new(Ast::assertion(ast::Assertion {
- span: span(0..2),
- kind: ast::AssertionKind::WordBoundary,
- })),
- }))
- );
-
- assert_eq!(
- parser(r"(?i){0}").parse().unwrap_err(),
- TestError {
- span: span(4..4),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- assert_eq!(
- parser(r"(?m){1,1}").parse().unwrap_err(),
- TestError {
- span: span(4..4),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- assert_eq!(
- parser(r"a{]}").parse().unwrap_err(),
- TestError {
- span: span(2..2),
- kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
- }
- );
- assert_eq!(
- parser(r"a{1,]}").parse().unwrap_err(),
- TestError {
- span: span(4..4),
- kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
- }
- );
- assert_eq!(
- parser(r"a{").parse().unwrap_err(),
- TestError {
- span: span(1..2),
- kind: ast::ErrorKind::RepetitionCountUnclosed,
- }
- );
- assert_eq!(
- parser(r"a{}").parse().unwrap_err(),
- TestError {
- span: span(2..2),
- kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
- }
- );
- assert_eq!(
- parser(r"a{a").parse().unwrap_err(),
- TestError {
- span: span(2..2),
- kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
- }
- );
- assert_eq!(
- parser(r"a{9999999999}").parse().unwrap_err(),
- TestError {
- span: span(2..12),
- kind: ast::ErrorKind::DecimalInvalid,
- }
- );
- assert_eq!(
- parser(r"a{9").parse().unwrap_err(),
- TestError {
- span: span(1..3),
- kind: ast::ErrorKind::RepetitionCountUnclosed,
- }
- );
- assert_eq!(
- parser(r"a{9,a").parse().unwrap_err(),
- TestError {
- span: span(4..4),
- kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
- }
- );
- assert_eq!(
- parser(r"a{9,9999999999}").parse().unwrap_err(),
- TestError {
- span: span(4..14),
- kind: ast::ErrorKind::DecimalInvalid,
- }
- );
- assert_eq!(
- parser(r"a{9,").parse().unwrap_err(),
- TestError {
- span: span(1..4),
- kind: ast::ErrorKind::RepetitionCountUnclosed,
- }
- );
- assert_eq!(
- parser(r"a{9,11").parse().unwrap_err(),
- TestError {
- span: span(1..6),
- kind: ast::ErrorKind::RepetitionCountUnclosed,
- }
- );
- assert_eq!(
- parser(r"a{2,1}").parse().unwrap_err(),
- TestError {
- span: span(1..6),
- kind: ast::ErrorKind::RepetitionCountInvalid,
- }
- );
- assert_eq!(
- parser(r"{5}").parse().unwrap_err(),
- TestError {
- span: span(0..0),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- assert_eq!(
- parser(r"|{5}").parse().unwrap_err(),
- TestError {
- span: span(1..1),
- kind: ast::ErrorKind::RepetitionMissing,
- }
- );
- }
-
- #[test]
- fn parse_alternate() {
- assert_eq!(
- parser(r"a|b").parse(),
- Ok(Ast::alternation(ast::Alternation {
- span: span(0..3),
- asts: vec![lit('a', 0), lit('b', 2)],
- }))
- );
- assert_eq!(
- parser(r"(a|b)").parse(),
- Ok(group(
- 0..5,
- 1,
- Ast::alternation(ast::Alternation {
- span: span(1..4),
- asts: vec![lit('a', 1), lit('b', 3)],
- })
- ))
- );
-
- assert_eq!(
- parser(r"a|b|c").parse(),
- Ok(Ast::alternation(ast::Alternation {
- span: span(0..5),
- asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
- }))
- );
- assert_eq!(
- parser(r"ax|by|cz").parse(),
- Ok(Ast::alternation(ast::Alternation {
- span: span(0..8),
- asts: vec![
- concat(0..2, vec![lit('a', 0), lit('x', 1)]),
- concat(3..5, vec![lit('b', 3), lit('y', 4)]),
- concat(6..8, vec![lit('c', 6), lit('z', 7)]),
- ],
- }))
- );
- assert_eq!(
- parser(r"(ax|by|cz)").parse(),
- Ok(group(
- 0..10,
- 1,
- Ast::alternation(ast::Alternation {
- span: span(1..9),
- asts: vec![
- concat(1..3, vec![lit('a', 1), lit('x', 2)]),
- concat(4..6, vec![lit('b', 4), lit('y', 5)]),
- concat(7..9, vec![lit('c', 7), lit('z', 8)]),
- ],
- })
- ))
- );
- assert_eq!(
- parser(r"(ax|(by|(cz)))").parse(),
- Ok(group(
- 0..14,
- 1,
- alt(
- 1..13,
- vec![
- concat(1..3, vec![lit('a', 1), lit('x', 2)]),
- group(
- 4..13,
- 2,
- alt(
- 5..12,
- vec![
- concat(
- 5..7,
- vec![lit('b', 5), lit('y', 6)]
- ),
- group(
- 8..12,
- 3,
- concat(
- 9..11,
- vec![lit('c', 9), lit('z', 10),]
- )
- ),
- ]
- )
- ),
- ]
- )
- ))
- );
-
- assert_eq!(
- parser(r"|").parse(),
- Ok(alt(
- 0..1,
- vec![Ast::empty(span(0..0)), Ast::empty(span(1..1)),]
- ))
- );
- assert_eq!(
- parser(r"||").parse(),
- Ok(alt(
- 0..2,
- vec![
- Ast::empty(span(0..0)),
- Ast::empty(span(1..1)),
- Ast::empty(span(2..2)),
- ]
- ))
- );
- assert_eq!(
- parser(r"a|").parse(),
- Ok(alt(0..2, vec![lit('a', 0), Ast::empty(span(2..2)),]))
- );
- assert_eq!(
- parser(r"|a").parse(),
- Ok(alt(0..2, vec![Ast::empty(span(0..0)), lit('a', 1),]))
- );
-
- assert_eq!(
- parser(r"(|)").parse(),
- Ok(group(
- 0..3,
- 1,
- alt(
- 1..2,
- vec![Ast::empty(span(1..1)), Ast::empty(span(2..2)),]
- )
- ))
- );
- assert_eq!(
- parser(r"(a|)").parse(),
- Ok(group(
- 0..4,
- 1,
- alt(1..3, vec![lit('a', 1), Ast::empty(span(3..3)),])
- ))
- );
- assert_eq!(
- parser(r"(|a)").parse(),
- Ok(group(
- 0..4,
- 1,
- alt(1..3, vec![Ast::empty(span(1..1)), lit('a', 2),])
- ))
- );
-
- assert_eq!(
- parser(r"a|b)").parse().unwrap_err(),
- TestError {
- span: span(3..4),
- kind: ast::ErrorKind::GroupUnopened,
- }
- );
- assert_eq!(
- parser(r"(a|b").parse().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::GroupUnclosed,
- }
- );
- }
-
- #[test]
- fn parse_unsupported_lookaround() {
- assert_eq!(
- parser(r"(?=a)").parse().unwrap_err(),
- TestError {
- span: span(0..3),
- kind: ast::ErrorKind::UnsupportedLookAround,
- }
- );
- assert_eq!(
- parser(r"(?!a)").parse().unwrap_err(),
- TestError {
- span: span(0..3),
- kind: ast::ErrorKind::UnsupportedLookAround,
- }
- );
- assert_eq!(
- parser(r"(?<=a)").parse().unwrap_err(),
- TestError {
- span: span(0..4),
- kind: ast::ErrorKind::UnsupportedLookAround,
- }
- );
- assert_eq!(
- parser(r"(?<!a)").parse().unwrap_err(),
- TestError {
- span: span(0..4),
- kind: ast::ErrorKind::UnsupportedLookAround,
- }
- );
- }
-
- #[test]
- fn parse_group() {
- assert_eq!(
- parser("(?i)").parse(),
- Ok(Ast::flags(ast::SetFlags {
- span: span(0..4),
- flags: ast::Flags {
- span: span(2..3),
- items: vec![ast::FlagsItem {
- span: span(2..3),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::CaseInsensitive
- ),
- }],
- },
- }))
- );
- assert_eq!(
- parser("(?iU)").parse(),
- Ok(Ast::flags(ast::SetFlags {
- span: span(0..5),
- flags: ast::Flags {
- span: span(2..4),
- items: vec![
- ast::FlagsItem {
- span: span(2..3),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::CaseInsensitive
- ),
- },
- ast::FlagsItem {
- span: span(3..4),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::SwapGreed
- ),
- },
- ],
- },
- }))
- );
- assert_eq!(
- parser("(?i-U)").parse(),
- Ok(Ast::flags(ast::SetFlags {
- span: span(0..6),
- flags: ast::Flags {
- span: span(2..5),
- items: vec![
- ast::FlagsItem {
- span: span(2..3),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::CaseInsensitive
- ),
- },
- ast::FlagsItem {
- span: span(3..4),
- kind: ast::FlagsItemKind::Negation,
- },
- ast::FlagsItem {
- span: span(4..5),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::SwapGreed
- ),
- },
- ],
- },
- }))
- );
-
- assert_eq!(
- parser("()").parse(),
- Ok(Ast::group(ast::Group {
- span: span(0..2),
- kind: ast::GroupKind::CaptureIndex(1),
- ast: Box::new(Ast::empty(span(1..1))),
- }))
- );
- assert_eq!(
- parser("(a)").parse(),
- Ok(Ast::group(ast::Group {
- span: span(0..3),
- kind: ast::GroupKind::CaptureIndex(1),
- ast: Box::new(lit('a', 1)),
- }))
- );
- assert_eq!(
- parser("(())").parse(),
- Ok(Ast::group(ast::Group {
- span: span(0..4),
- kind: ast::GroupKind::CaptureIndex(1),
- ast: Box::new(Ast::group(ast::Group {
- span: span(1..3),
- kind: ast::GroupKind::CaptureIndex(2),
- ast: Box::new(Ast::empty(span(2..2))),
- })),
- }))
- );
-
- assert_eq!(
- parser("(?:a)").parse(),
- Ok(Ast::group(ast::Group {
- span: span(0..5),
- kind: ast::GroupKind::NonCapturing(ast::Flags {
- span: span(2..2),
- items: vec![],
- }),
- ast: Box::new(lit('a', 3)),
- }))
- );
-
- assert_eq!(
- parser("(?i:a)").parse(),
- Ok(Ast::group(ast::Group {
- span: span(0..6),
- kind: ast::GroupKind::NonCapturing(ast::Flags {
- span: span(2..3),
- items: vec![ast::FlagsItem {
- span: span(2..3),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::CaseInsensitive
- ),
- },],
- }),
- ast: Box::new(lit('a', 4)),
- }))
- );
- assert_eq!(
- parser("(?i-U:a)").parse(),
- Ok(Ast::group(ast::Group {
- span: span(0..8),
- kind: ast::GroupKind::NonCapturing(ast::Flags {
- span: span(2..5),
- items: vec![
- ast::FlagsItem {
- span: span(2..3),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::CaseInsensitive
- ),
- },
- ast::FlagsItem {
- span: span(3..4),
- kind: ast::FlagsItemKind::Negation,
- },
- ast::FlagsItem {
- span: span(4..5),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::SwapGreed
- ),
- },
- ],
- }),
- ast: Box::new(lit('a', 6)),
- }))
- );
-
- assert_eq!(
- parser("(").parse().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::GroupUnclosed,
- }
- );
- assert_eq!(
- parser("(?").parse().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::GroupUnclosed,
- }
- );
- assert_eq!(
- parser("(?P").parse().unwrap_err(),
- TestError {
- span: span(2..3),
- kind: ast::ErrorKind::FlagUnrecognized,
- }
- );
- assert_eq!(
- parser("(?P<").parse().unwrap_err(),
- TestError {
- span: span(4..4),
- kind: ast::ErrorKind::GroupNameUnexpectedEof,
- }
- );
- assert_eq!(
- parser("(a").parse().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::GroupUnclosed,
- }
- );
- assert_eq!(
- parser("(()").parse().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::GroupUnclosed,
- }
- );
- assert_eq!(
- parser(")").parse().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::GroupUnopened,
- }
- );
- assert_eq!(
- parser("a)").parse().unwrap_err(),
- TestError {
- span: span(1..2),
- kind: ast::ErrorKind::GroupUnopened,
- }
- );
- }
-
- #[test]
- fn parse_capture_name() {
- assert_eq!(
- parser("(?<a>z)").parse(),
- Ok(Ast::group(ast::Group {
- span: span(0..7),
- kind: ast::GroupKind::CaptureName {
- starts_with_p: false,
- name: ast::CaptureName {
- span: span(3..4),
- name: s("a"),
- index: 1,
- }
- },
- ast: Box::new(lit('z', 5)),
- }))
- );
- assert_eq!(
- parser("(?P<a>z)").parse(),
- Ok(Ast::group(ast::Group {
- span: span(0..8),
- kind: ast::GroupKind::CaptureName {
- starts_with_p: true,
- name: ast::CaptureName {
- span: span(4..5),
- name: s("a"),
- index: 1,
- }
- },
- ast: Box::new(lit('z', 6)),
- }))
- );
- assert_eq!(
- parser("(?P<abc>z)").parse(),
- Ok(Ast::group(ast::Group {
- span: span(0..10),
- kind: ast::GroupKind::CaptureName {
- starts_with_p: true,
- name: ast::CaptureName {
- span: span(4..7),
- name: s("abc"),
- index: 1,
- }
- },
- ast: Box::new(lit('z', 8)),
- }))
- );
-
- assert_eq!(
- parser("(?P<a_1>z)").parse(),
- Ok(Ast::group(ast::Group {
- span: span(0..10),
- kind: ast::GroupKind::CaptureName {
- starts_with_p: true,
- name: ast::CaptureName {
- span: span(4..7),
- name: s("a_1"),
- index: 1,
- }
- },
- ast: Box::new(lit('z', 8)),
- }))
- );
-
- assert_eq!(
- parser("(?P<a.1>z)").parse(),
- Ok(Ast::group(ast::Group {
- span: span(0..10),
- kind: ast::GroupKind::CaptureName {
- starts_with_p: true,
- name: ast::CaptureName {
- span: span(4..7),
- name: s("a.1"),
- index: 1,
- }
- },
- ast: Box::new(lit('z', 8)),
- }))
- );
-
- assert_eq!(
- parser("(?P<a[1]>z)").parse(),
- Ok(Ast::group(ast::Group {
- span: span(0..11),
- kind: ast::GroupKind::CaptureName {
- starts_with_p: true,
- name: ast::CaptureName {
- span: span(4..8),
- name: s("a[1]"),
- index: 1,
- }
- },
- ast: Box::new(lit('z', 9)),
- }))
- );
-
- assert_eq!(
- parser("(?P<a¾>)").parse(),
- Ok(Ast::group(ast::Group {
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(9, 1, 9),
- ),
- kind: ast::GroupKind::CaptureName {
- starts_with_p: true,
- name: ast::CaptureName {
- span: Span::new(
- Position::new(4, 1, 5),
- Position::new(7, 1, 7),
- ),
- name: s("a¾"),
- index: 1,
- }
- },
- ast: Box::new(Ast::empty(Span::new(
- Position::new(8, 1, 8),
- Position::new(8, 1, 8),
- ))),
- }))
- );
- assert_eq!(
- parser("(?P<名字>)").parse(),
- Ok(Ast::group(ast::Group {
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(12, 1, 9),
- ),
- kind: ast::GroupKind::CaptureName {
- starts_with_p: true,
- name: ast::CaptureName {
- span: Span::new(
- Position::new(4, 1, 5),
- Position::new(10, 1, 7),
- ),
- name: s("名字"),
- index: 1,
- }
- },
- ast: Box::new(Ast::empty(Span::new(
- Position::new(11, 1, 8),
- Position::new(11, 1, 8),
- ))),
- }))
- );
-
- assert_eq!(
- parser("(?P<").parse().unwrap_err(),
- TestError {
- span: span(4..4),
- kind: ast::ErrorKind::GroupNameUnexpectedEof,
- }
- );
- assert_eq!(
- parser("(?P<>z)").parse().unwrap_err(),
- TestError {
- span: span(4..4),
- kind: ast::ErrorKind::GroupNameEmpty,
- }
- );
- assert_eq!(
- parser("(?P<a").parse().unwrap_err(),
- TestError {
- span: span(5..5),
- kind: ast::ErrorKind::GroupNameUnexpectedEof,
- }
- );
- assert_eq!(
- parser("(?P<ab").parse().unwrap_err(),
- TestError {
- span: span(6..6),
- kind: ast::ErrorKind::GroupNameUnexpectedEof,
- }
- );
- assert_eq!(
- parser("(?P<0a").parse().unwrap_err(),
- TestError {
- span: span(4..5),
- kind: ast::ErrorKind::GroupNameInvalid,
- }
- );
- assert_eq!(
- parser("(?P<~").parse().unwrap_err(),
- TestError {
- span: span(4..5),
- kind: ast::ErrorKind::GroupNameInvalid,
- }
- );
- assert_eq!(
- parser("(?P<abc~").parse().unwrap_err(),
- TestError {
- span: span(7..8),
- kind: ast::ErrorKind::GroupNameInvalid,
- }
- );
- assert_eq!(
- parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
- TestError {
- span: span(12..13),
- kind: ast::ErrorKind::GroupNameDuplicate {
- original: span(4..5),
- },
- }
- );
- assert_eq!(
- parser("(?P<5>)").parse().unwrap_err(),
- TestError {
- span: span(4..5),
- kind: ast::ErrorKind::GroupNameInvalid,
- }
- );
- assert_eq!(
- parser("(?P<5a>)").parse().unwrap_err(),
- TestError {
- span: span(4..5),
- kind: ast::ErrorKind::GroupNameInvalid,
- }
- );
- assert_eq!(
- parser("(?P<¾>)").parse().unwrap_err(),
- TestError {
- span: Span::new(
- Position::new(4, 1, 5),
- Position::new(6, 1, 6),
- ),
- kind: ast::ErrorKind::GroupNameInvalid,
- }
- );
- assert_eq!(
- parser("(?P<¾a>)").parse().unwrap_err(),
- TestError {
- span: Span::new(
- Position::new(4, 1, 5),
- Position::new(6, 1, 6),
- ),
- kind: ast::ErrorKind::GroupNameInvalid,
- }
- );
- assert_eq!(
- parser("(?P<☃>)").parse().unwrap_err(),
- TestError {
- span: Span::new(
- Position::new(4, 1, 5),
- Position::new(7, 1, 6),
- ),
- kind: ast::ErrorKind::GroupNameInvalid,
- }
- );
- assert_eq!(
- parser("(?P<a☃>)").parse().unwrap_err(),
- TestError {
- span: Span::new(
- Position::new(5, 1, 6),
- Position::new(8, 1, 7),
- ),
- kind: ast::ErrorKind::GroupNameInvalid,
- }
- );
- }
-
- #[test]
- fn parse_flags() {
- assert_eq!(
- parser("i:").parse_flags(),
- Ok(ast::Flags {
- span: span(0..1),
- items: vec![ast::FlagsItem {
- span: span(0..1),
- kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
- }],
- })
- );
- assert_eq!(
- parser("i)").parse_flags(),
- Ok(ast::Flags {
- span: span(0..1),
- items: vec![ast::FlagsItem {
- span: span(0..1),
- kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
- }],
- })
- );
-
- assert_eq!(
- parser("isU:").parse_flags(),
- Ok(ast::Flags {
- span: span(0..3),
- items: vec![
- ast::FlagsItem {
- span: span(0..1),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::CaseInsensitive
- ),
- },
- ast::FlagsItem {
- span: span(1..2),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::DotMatchesNewLine
- ),
- },
- ast::FlagsItem {
- span: span(2..3),
- kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
- },
- ],
- })
- );
-
- assert_eq!(
- parser("-isU:").parse_flags(),
- Ok(ast::Flags {
- span: span(0..4),
- items: vec![
- ast::FlagsItem {
- span: span(0..1),
- kind: ast::FlagsItemKind::Negation,
- },
- ast::FlagsItem {
- span: span(1..2),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::CaseInsensitive
- ),
- },
- ast::FlagsItem {
- span: span(2..3),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::DotMatchesNewLine
- ),
- },
- ast::FlagsItem {
- span: span(3..4),
- kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
- },
- ],
- })
- );
- assert_eq!(
- parser("i-sU:").parse_flags(),
- Ok(ast::Flags {
- span: span(0..4),
- items: vec![
- ast::FlagsItem {
- span: span(0..1),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::CaseInsensitive
- ),
- },
- ast::FlagsItem {
- span: span(1..2),
- kind: ast::FlagsItemKind::Negation,
- },
- ast::FlagsItem {
- span: span(2..3),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::DotMatchesNewLine
- ),
- },
- ast::FlagsItem {
- span: span(3..4),
- kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
- },
- ],
- })
- );
- assert_eq!(
- parser("i-sR:").parse_flags(),
- Ok(ast::Flags {
- span: span(0..4),
- items: vec![
- ast::FlagsItem {
- span: span(0..1),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::CaseInsensitive
- ),
- },
- ast::FlagsItem {
- span: span(1..2),
- kind: ast::FlagsItemKind::Negation,
- },
- ast::FlagsItem {
- span: span(2..3),
- kind: ast::FlagsItemKind::Flag(
- ast::Flag::DotMatchesNewLine
- ),
- },
- ast::FlagsItem {
- span: span(3..4),
- kind: ast::FlagsItemKind::Flag(ast::Flag::CRLF),
- },
- ],
- })
- );
-
- assert_eq!(
- parser("isU").parse_flags().unwrap_err(),
- TestError {
- span: span(3..3),
- kind: ast::ErrorKind::FlagUnexpectedEof,
- }
- );
- assert_eq!(
- parser("isUa:").parse_flags().unwrap_err(),
- TestError {
- span: span(3..4),
- kind: ast::ErrorKind::FlagUnrecognized,
- }
- );
- assert_eq!(
- parser("isUi:").parse_flags().unwrap_err(),
- TestError {
- span: span(3..4),
- kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
- }
- );
- assert_eq!(
- parser("i-sU-i:").parse_flags().unwrap_err(),
- TestError {
- span: span(4..5),
- kind: ast::ErrorKind::FlagRepeatedNegation {
- original: span(1..2),
- },
- }
- );
- assert_eq!(
- parser("-)").parse_flags().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::FlagDanglingNegation,
- }
- );
- assert_eq!(
- parser("i-)").parse_flags().unwrap_err(),
- TestError {
- span: span(1..2),
- kind: ast::ErrorKind::FlagDanglingNegation,
- }
- );
- assert_eq!(
- parser("iU-)").parse_flags().unwrap_err(),
- TestError {
- span: span(2..3),
- kind: ast::ErrorKind::FlagDanglingNegation,
- }
- );
- }
-
- #[test]
- fn parse_flag() {
- assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
- assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
- assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
- assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
- assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
- assert_eq!(parser("R").parse_flag(), Ok(ast::Flag::CRLF));
- assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
-
- assert_eq!(
- parser("a").parse_flag().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::FlagUnrecognized,
- }
- );
- assert_eq!(
- parser("☃").parse_flag().unwrap_err(),
- TestError {
- span: span_range("☃", 0..3),
- kind: ast::ErrorKind::FlagUnrecognized,
- }
- );
- }
-
- #[test]
- fn parse_primitive_non_escape() {
- assert_eq!(
- parser(r".").parse_primitive(),
- Ok(Primitive::Dot(span(0..1)))
- );
- assert_eq!(
- parser(r"^").parse_primitive(),
- Ok(Primitive::Assertion(ast::Assertion {
- span: span(0..1),
- kind: ast::AssertionKind::StartLine,
- }))
- );
- assert_eq!(
- parser(r"$").parse_primitive(),
- Ok(Primitive::Assertion(ast::Assertion {
- span: span(0..1),
- kind: ast::AssertionKind::EndLine,
- }))
- );
-
- assert_eq!(
- parser(r"a").parse_primitive(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..1),
- kind: ast::LiteralKind::Verbatim,
- c: 'a',
- }))
- );
- assert_eq!(
- parser(r"|").parse_primitive(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..1),
- kind: ast::LiteralKind::Verbatim,
- c: '|',
- }))
- );
- assert_eq!(
- parser(r"☃").parse_primitive(),
- Ok(Primitive::Literal(ast::Literal {
- span: span_range("☃", 0..3),
- kind: ast::LiteralKind::Verbatim,
- c: '☃',
- }))
- );
- }
-
- #[test]
- fn parse_escape() {
- assert_eq!(
- parser(r"\|").parse_primitive(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..2),
- kind: ast::LiteralKind::Meta,
- c: '|',
- }))
- );
- let specials = &[
- (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
- (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
- (r"\t", '\t', ast::SpecialLiteralKind::Tab),
- (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
- (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
- (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
- ];
- for &(pat, c, ref kind) in specials {
- assert_eq!(
- parser(pat).parse_primitive(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..2),
- kind: ast::LiteralKind::Special(kind.clone()),
- c,
- }))
- );
- }
- assert_eq!(
- parser(r"\A").parse_primitive(),
- Ok(Primitive::Assertion(ast::Assertion {
- span: span(0..2),
- kind: ast::AssertionKind::StartText,
- }))
- );
- assert_eq!(
- parser(r"\z").parse_primitive(),
- Ok(Primitive::Assertion(ast::Assertion {
- span: span(0..2),
- kind: ast::AssertionKind::EndText,
- }))
- );
- assert_eq!(
- parser(r"\b").parse_primitive(),
- Ok(Primitive::Assertion(ast::Assertion {
- span: span(0..2),
- kind: ast::AssertionKind::WordBoundary,
- }))
- );
- assert_eq!(
- parser(r"\b{start}").parse_primitive(),
- Ok(Primitive::Assertion(ast::Assertion {
- span: span(0..9),
- kind: ast::AssertionKind::WordBoundaryStart,
- }))
- );
- assert_eq!(
- parser(r"\b{end}").parse_primitive(),
- Ok(Primitive::Assertion(ast::Assertion {
- span: span(0..7),
- kind: ast::AssertionKind::WordBoundaryEnd,
- }))
- );
- assert_eq!(
- parser(r"\b{start-half}").parse_primitive(),
- Ok(Primitive::Assertion(ast::Assertion {
- span: span(0..14),
- kind: ast::AssertionKind::WordBoundaryStartHalf,
- }))
- );
- assert_eq!(
- parser(r"\b{end-half}").parse_primitive(),
- Ok(Primitive::Assertion(ast::Assertion {
- span: span(0..12),
- kind: ast::AssertionKind::WordBoundaryEndHalf,
- }))
- );
- assert_eq!(
- parser(r"\<").parse_primitive(),
- Ok(Primitive::Assertion(ast::Assertion {
- span: span(0..2),
- kind: ast::AssertionKind::WordBoundaryStartAngle,
- }))
- );
- assert_eq!(
- parser(r"\>").parse_primitive(),
- Ok(Primitive::Assertion(ast::Assertion {
- span: span(0..2),
- kind: ast::AssertionKind::WordBoundaryEndAngle,
- }))
- );
- assert_eq!(
- parser(r"\B").parse_primitive(),
- Ok(Primitive::Assertion(ast::Assertion {
- span: span(0..2),
- kind: ast::AssertionKind::NotWordBoundary,
- }))
- );
-
- // We also support superfluous escapes in most cases now too.
- for c in ['!', '@', '%', '"', '\'', '/', ' '] {
- let pat = format!(r"\{}", c);
- assert_eq!(
- parser(&pat).parse_primitive(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..2),
- kind: ast::LiteralKind::Superfluous,
- c,
- }))
- );
- }
-
- // Some superfluous escapes, namely [0-9A-Za-z], are still banned. This
- // gives flexibility for future evolution.
- assert_eq!(
- parser(r"\e").parse_escape().unwrap_err(),
- TestError {
- span: span(0..2),
- kind: ast::ErrorKind::EscapeUnrecognized,
- }
- );
- assert_eq!(
- parser(r"\y").parse_escape().unwrap_err(),
- TestError {
- span: span(0..2),
- kind: ast::ErrorKind::EscapeUnrecognized,
- }
- );
-
- // Starting a special word boundary without any non-whitespace chars
- // after the brace makes it ambiguous whether the user meant to write
- // a counted repetition (probably not?) or an actual special word
- // boundary assertion.
- assert_eq!(
- parser(r"\b{").parse_escape().unwrap_err(),
- TestError {
- span: span(0..3),
- kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
- }
- );
- assert_eq!(
- parser_ignore_whitespace(r"\b{ ").parse_escape().unwrap_err(),
- TestError {
- span: span(0..4),
- kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
- }
- );
- // When 'x' is not enabled, the space is seen as a non-[-A-Za-z] char,
- // and thus causes the parser to treat it as a counted repetition.
- assert_eq!(
- parser(r"\b{ ").parse().unwrap_err(),
- TestError {
- span: span(2..4),
- kind: ast::ErrorKind::RepetitionCountUnclosed,
- }
- );
- // In this case, we got some valid chars that makes it look like the
- // user is writing one of the special word boundary assertions, but
- // we forget to close the brace.
- assert_eq!(
- parser(r"\b{foo").parse_escape().unwrap_err(),
- TestError {
- span: span(2..6),
- kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
- }
- );
- // We get the same error as above, except it is provoked by seeing a
- // char that we know is invalid before seeing a closing brace.
- assert_eq!(
- parser(r"\b{foo!}").parse_escape().unwrap_err(),
- TestError {
- span: span(2..6),
- kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
- }
- );
- // And this one occurs when, syntactically, everything looks okay, but
- // we don't use a valid spelling of a word boundary assertion.
- assert_eq!(
- parser(r"\b{foo}").parse_escape().unwrap_err(),
- TestError {
- span: span(3..6),
- kind: ast::ErrorKind::SpecialWordBoundaryUnrecognized,
- }
- );
-
- // An unfinished escape is illegal.
- assert_eq!(
- parser(r"\").parse_escape().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::EscapeUnexpectedEof,
- }
- );
- }
-
- #[test]
- fn parse_unsupported_backreference() {
- assert_eq!(
- parser(r"\0").parse_escape().unwrap_err(),
- TestError {
- span: span(0..2),
- kind: ast::ErrorKind::UnsupportedBackreference,
- }
- );
- assert_eq!(
- parser(r"\9").parse_escape().unwrap_err(),
- TestError {
- span: span(0..2),
- kind: ast::ErrorKind::UnsupportedBackreference,
- }
- );
- }
-
- #[test]
- fn parse_octal() {
- for i in 0..511 {
- let pat = format!(r"\{:o}", i);
- assert_eq!(
- parser_octal(&pat).parse_escape(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..pat.len()),
- kind: ast::LiteralKind::Octal,
- c: char::from_u32(i).unwrap(),
- }))
- );
- }
- assert_eq!(
- parser_octal(r"\778").parse_escape(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..3),
- kind: ast::LiteralKind::Octal,
- c: '?',
- }))
- );
- assert_eq!(
- parser_octal(r"\7777").parse_escape(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..4),
- kind: ast::LiteralKind::Octal,
- c: '\u{01FF}',
- }))
- );
- assert_eq!(
- parser_octal(r"\778").parse(),
- Ok(Ast::concat(ast::Concat {
- span: span(0..4),
- asts: vec![
- Ast::literal(ast::Literal {
- span: span(0..3),
- kind: ast::LiteralKind::Octal,
- c: '?',
- }),
- Ast::literal(ast::Literal {
- span: span(3..4),
- kind: ast::LiteralKind::Verbatim,
- c: '8',
- }),
- ],
- }))
- );
- assert_eq!(
- parser_octal(r"\7777").parse(),
- Ok(Ast::concat(ast::Concat {
- span: span(0..5),
- asts: vec![
- Ast::literal(ast::Literal {
- span: span(0..4),
- kind: ast::LiteralKind::Octal,
- c: '\u{01FF}',
- }),
- Ast::literal(ast::Literal {
- span: span(4..5),
- kind: ast::LiteralKind::Verbatim,
- c: '7',
- }),
- ],
- }))
- );
-
- assert_eq!(
- parser_octal(r"\8").parse_escape().unwrap_err(),
- TestError {
- span: span(0..2),
- kind: ast::ErrorKind::EscapeUnrecognized,
- }
- );
- }
-
- #[test]
- fn parse_hex_two() {
- for i in 0..256 {
- let pat = format!(r"\x{:02x}", i);
- assert_eq!(
- parser(&pat).parse_escape(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..pat.len()),
- kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
- c: char::from_u32(i).unwrap(),
- }))
- );
- }
-
- assert_eq!(
- parser(r"\xF").parse_escape().unwrap_err(),
- TestError {
- span: span(3..3),
- kind: ast::ErrorKind::EscapeUnexpectedEof,
- }
- );
- assert_eq!(
- parser(r"\xG").parse_escape().unwrap_err(),
- TestError {
- span: span(2..3),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- assert_eq!(
- parser(r"\xFG").parse_escape().unwrap_err(),
- TestError {
- span: span(3..4),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- }
-
- #[test]
- fn parse_hex_four() {
- for i in 0..65536 {
- let c = match char::from_u32(i) {
- None => continue,
- Some(c) => c,
- };
- let pat = format!(r"\u{:04x}", i);
- assert_eq!(
- parser(&pat).parse_escape(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..pat.len()),
- kind: ast::LiteralKind::HexFixed(
- ast::HexLiteralKind::UnicodeShort
- ),
- c,
- }))
- );
- }
-
- assert_eq!(
- parser(r"\uF").parse_escape().unwrap_err(),
- TestError {
- span: span(3..3),
- kind: ast::ErrorKind::EscapeUnexpectedEof,
- }
- );
- assert_eq!(
- parser(r"\uG").parse_escape().unwrap_err(),
- TestError {
- span: span(2..3),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- assert_eq!(
- parser(r"\uFG").parse_escape().unwrap_err(),
- TestError {
- span: span(3..4),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- assert_eq!(
- parser(r"\uFFG").parse_escape().unwrap_err(),
- TestError {
- span: span(4..5),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- assert_eq!(
- parser(r"\uFFFG").parse_escape().unwrap_err(),
- TestError {
- span: span(5..6),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- assert_eq!(
- parser(r"\uD800").parse_escape().unwrap_err(),
- TestError {
- span: span(2..6),
- kind: ast::ErrorKind::EscapeHexInvalid,
- }
- );
- }
-
- #[test]
- fn parse_hex_eight() {
- for i in 0..65536 {
- let c = match char::from_u32(i) {
- None => continue,
- Some(c) => c,
- };
- let pat = format!(r"\U{:08x}", i);
- assert_eq!(
- parser(&pat).parse_escape(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..pat.len()),
- kind: ast::LiteralKind::HexFixed(
- ast::HexLiteralKind::UnicodeLong
- ),
- c,
- }))
- );
- }
-
- assert_eq!(
- parser(r"\UF").parse_escape().unwrap_err(),
- TestError {
- span: span(3..3),
- kind: ast::ErrorKind::EscapeUnexpectedEof,
- }
- );
- assert_eq!(
- parser(r"\UG").parse_escape().unwrap_err(),
- TestError {
- span: span(2..3),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- assert_eq!(
- parser(r"\UFG").parse_escape().unwrap_err(),
- TestError {
- span: span(3..4),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- assert_eq!(
- parser(r"\UFFG").parse_escape().unwrap_err(),
- TestError {
- span: span(4..5),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- assert_eq!(
- parser(r"\UFFFG").parse_escape().unwrap_err(),
- TestError {
- span: span(5..6),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- assert_eq!(
- parser(r"\UFFFFG").parse_escape().unwrap_err(),
- TestError {
- span: span(6..7),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- assert_eq!(
- parser(r"\UFFFFFG").parse_escape().unwrap_err(),
- TestError {
- span: span(7..8),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- assert_eq!(
- parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
- TestError {
- span: span(8..9),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- assert_eq!(
- parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
- TestError {
- span: span(9..10),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- }
-
- #[test]
- fn parse_hex_brace() {
- assert_eq!(
- parser(r"\u{26c4}").parse_escape(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..8),
- kind: ast::LiteralKind::HexBrace(
- ast::HexLiteralKind::UnicodeShort
- ),
- c: '⛄',
- }))
- );
- assert_eq!(
- parser(r"\U{26c4}").parse_escape(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..8),
- kind: ast::LiteralKind::HexBrace(
- ast::HexLiteralKind::UnicodeLong
- ),
- c: '⛄',
- }))
- );
- assert_eq!(
- parser(r"\x{26c4}").parse_escape(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..8),
- kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
- c: '⛄',
- }))
- );
- assert_eq!(
- parser(r"\x{26C4}").parse_escape(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..8),
- kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
- c: '⛄',
- }))
- );
- assert_eq!(
- parser(r"\x{10fFfF}").parse_escape(),
- Ok(Primitive::Literal(ast::Literal {
- span: span(0..10),
- kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
- c: '\u{10FFFF}',
- }))
- );
-
- assert_eq!(
- parser(r"\x").parse_escape().unwrap_err(),
- TestError {
- span: span(2..2),
- kind: ast::ErrorKind::EscapeUnexpectedEof,
- }
- );
- assert_eq!(
- parser(r"\x{").parse_escape().unwrap_err(),
- TestError {
- span: span(2..3),
- kind: ast::ErrorKind::EscapeUnexpectedEof,
- }
- );
- assert_eq!(
- parser(r"\x{FF").parse_escape().unwrap_err(),
- TestError {
- span: span(2..5),
- kind: ast::ErrorKind::EscapeUnexpectedEof,
- }
- );
- assert_eq!(
- parser(r"\x{}").parse_escape().unwrap_err(),
- TestError {
- span: span(2..4),
- kind: ast::ErrorKind::EscapeHexEmpty,
- }
- );
- assert_eq!(
- parser(r"\x{FGF}").parse_escape().unwrap_err(),
- TestError {
- span: span(4..5),
- kind: ast::ErrorKind::EscapeHexInvalidDigit,
- }
- );
- assert_eq!(
- parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
- TestError {
- span: span(3..9),
- kind: ast::ErrorKind::EscapeHexInvalid,
- }
- );
- assert_eq!(
- parser(r"\x{D800}").parse_escape().unwrap_err(),
- TestError {
- span: span(3..7),
- kind: ast::ErrorKind::EscapeHexInvalid,
- }
- );
- assert_eq!(
- parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
- TestError {
- span: span(3..12),
- kind: ast::ErrorKind::EscapeHexInvalid,
- }
- );
- }
-
- #[test]
- fn parse_decimal() {
- assert_eq!(parser("123").parse_decimal(), Ok(123));
- assert_eq!(parser("0").parse_decimal(), Ok(0));
- assert_eq!(parser("01").parse_decimal(), Ok(1));
-
- assert_eq!(
- parser("-1").parse_decimal().unwrap_err(),
- TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
- );
- assert_eq!(
- parser("").parse_decimal().unwrap_err(),
- TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
- );
- assert_eq!(
- parser("9999999999").parse_decimal().unwrap_err(),
- TestError {
- span: span(0..10),
- kind: ast::ErrorKind::DecimalInvalid,
- }
- );
- }
-
- #[test]
- fn parse_set_class() {
- fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
- ast::ClassSet::union(ast::ClassSetUnion { span, items })
- }
-
- fn intersection(
- span: Span,
- lhs: ast::ClassSet,
- rhs: ast::ClassSet,
- ) -> ast::ClassSet {
- ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
- span,
- kind: ast::ClassSetBinaryOpKind::Intersection,
- lhs: Box::new(lhs),
- rhs: Box::new(rhs),
- })
- }
-
- fn difference(
- span: Span,
- lhs: ast::ClassSet,
- rhs: ast::ClassSet,
- ) -> ast::ClassSet {
- ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
- span,
- kind: ast::ClassSetBinaryOpKind::Difference,
- lhs: Box::new(lhs),
- rhs: Box::new(rhs),
- })
- }
-
- fn symdifference(
- span: Span,
- lhs: ast::ClassSet,
- rhs: ast::ClassSet,
- ) -> ast::ClassSet {
- ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
- span,
- kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
- lhs: Box::new(lhs),
- rhs: Box::new(rhs),
- })
- }
-
- fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
- ast::ClassSet::Item(item)
- }
-
- fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
- ast::ClassSetItem::Ascii(cls)
- }
-
- fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
- ast::ClassSetItem::Unicode(cls)
- }
-
- fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
- ast::ClassSetItem::Perl(cls)
- }
-
- fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
- ast::ClassSetItem::Bracketed(Box::new(cls))
- }
-
- fn lit(span: Span, c: char) -> ast::ClassSetItem {
- ast::ClassSetItem::Literal(ast::Literal {
- span,
- kind: ast::LiteralKind::Verbatim,
- c,
- })
- }
-
- fn empty(span: Span) -> ast::ClassSetItem {
- ast::ClassSetItem::Empty(span)
- }
-
- fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
- let pos1 = Position {
- offset: span.start.offset + start.len_utf8(),
- column: span.start.column + 1,
- ..span.start
- };
- let pos2 = Position {
- offset: span.end.offset - end.len_utf8(),
- column: span.end.column - 1,
- ..span.end
- };
- ast::ClassSetItem::Range(ast::ClassSetRange {
- span,
- start: ast::Literal {
- span: Span { end: pos1, ..span },
- kind: ast::LiteralKind::Verbatim,
- c: start,
- },
- end: ast::Literal {
- span: Span { start: pos2, ..span },
- kind: ast::LiteralKind::Verbatim,
- c: end,
- },
- })
- }
-
- fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
- ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated }
- }
-
- fn lower(span: Span, negated: bool) -> ast::ClassAscii {
- ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated }
- }
-
- assert_eq!(
- parser("[[:alnum:]]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..11),
- negated: false,
- kind: itemset(item_ascii(alnum(span(1..10), false))),
- }))
- );
- assert_eq!(
- parser("[[[:alnum:]]]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..13),
- negated: false,
- kind: itemset(item_bracket(ast::ClassBracketed {
- span: span(1..12),
- negated: false,
- kind: itemset(item_ascii(alnum(span(2..11), false))),
- })),
- }))
- );
- assert_eq!(
- parser("[[:alnum:]&&[:lower:]]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..22),
- negated: false,
- kind: intersection(
- span(1..21),
- itemset(item_ascii(alnum(span(1..10), false))),
- itemset(item_ascii(lower(span(12..21), false))),
- ),
- }))
- );
- assert_eq!(
- parser("[[:alnum:]--[:lower:]]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..22),
- negated: false,
- kind: difference(
- span(1..21),
- itemset(item_ascii(alnum(span(1..10), false))),
- itemset(item_ascii(lower(span(12..21), false))),
- ),
- }))
- );
- assert_eq!(
- parser("[[:alnum:]~~[:lower:]]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..22),
- negated: false,
- kind: symdifference(
- span(1..21),
- itemset(item_ascii(alnum(span(1..10), false))),
- itemset(item_ascii(lower(span(12..21), false))),
- ),
- }))
- );
-
- assert_eq!(
- parser("[a]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..3),
- negated: false,
- kind: itemset(lit(span(1..2), 'a')),
- }))
- );
- assert_eq!(
- parser(r"[a\]]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..5),
- negated: false,
- kind: union(
- span(1..4),
- vec![
- lit(span(1..2), 'a'),
- ast::ClassSetItem::Literal(ast::Literal {
- span: span(2..4),
- kind: ast::LiteralKind::Meta,
- c: ']',
- }),
- ]
- ),
- }))
- );
- assert_eq!(
- parser(r"[a\-z]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..6),
- negated: false,
- kind: union(
- span(1..5),
- vec![
- lit(span(1..2), 'a'),
- ast::ClassSetItem::Literal(ast::Literal {
- span: span(2..4),
- kind: ast::LiteralKind::Meta,
- c: '-',
- }),
- lit(span(4..5), 'z'),
- ]
- ),
- }))
- );
- assert_eq!(
- parser("[ab]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..4),
- negated: false,
- kind: union(
- span(1..3),
- vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
- ),
- }))
- );
- assert_eq!(
- parser("[a-]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..4),
- negated: false,
- kind: union(
- span(1..3),
- vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
- ),
- }))
- );
- assert_eq!(
- parser("[-a]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..4),
- negated: false,
- kind: union(
- span(1..3),
- vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
- ),
- }))
- );
- assert_eq!(
- parser(r"[\pL]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..5),
- negated: false,
- kind: itemset(item_unicode(ast::ClassUnicode {
- span: span(1..4),
- negated: false,
- kind: ast::ClassUnicodeKind::OneLetter('L'),
- })),
- }))
- );
- assert_eq!(
- parser(r"[\w]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..4),
- negated: false,
- kind: itemset(item_perl(ast::ClassPerl {
- span: span(1..3),
- kind: ast::ClassPerlKind::Word,
- negated: false,
- })),
- }))
- );
- assert_eq!(
- parser(r"[a\wz]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..6),
- negated: false,
- kind: union(
- span(1..5),
- vec![
- lit(span(1..2), 'a'),
- item_perl(ast::ClassPerl {
- span: span(2..4),
- kind: ast::ClassPerlKind::Word,
- negated: false,
- }),
- lit(span(4..5), 'z'),
- ]
- ),
- }))
- );
-
- assert_eq!(
- parser("[a-z]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..5),
- negated: false,
- kind: itemset(range(span(1..4), 'a', 'z')),
- }))
- );
- assert_eq!(
- parser("[a-cx-z]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..8),
- negated: false,
- kind: union(
- span(1..7),
- vec![
- range(span(1..4), 'a', 'c'),
- range(span(4..7), 'x', 'z'),
- ]
- ),
- }))
- );
- assert_eq!(
- parser(r"[\w&&a-cx-z]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..12),
- negated: false,
- kind: intersection(
- span(1..11),
- itemset(item_perl(ast::ClassPerl {
- span: span(1..3),
- kind: ast::ClassPerlKind::Word,
- negated: false,
- })),
- union(
- span(5..11),
- vec![
- range(span(5..8), 'a', 'c'),
- range(span(8..11), 'x', 'z'),
- ]
- ),
- ),
- }))
- );
- assert_eq!(
- parser(r"[a-cx-z&&\w]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..12),
- negated: false,
- kind: intersection(
- span(1..11),
- union(
- span(1..7),
- vec![
- range(span(1..4), 'a', 'c'),
- range(span(4..7), 'x', 'z'),
- ]
- ),
- itemset(item_perl(ast::ClassPerl {
- span: span(9..11),
- kind: ast::ClassPerlKind::Word,
- negated: false,
- })),
- ),
- }))
- );
- assert_eq!(
- parser(r"[a--b--c]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..9),
- negated: false,
- kind: difference(
- span(1..8),
- difference(
- span(1..5),
- itemset(lit(span(1..2), 'a')),
- itemset(lit(span(4..5), 'b')),
- ),
- itemset(lit(span(7..8), 'c')),
- ),
- }))
- );
- assert_eq!(
- parser(r"[a~~b~~c]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..9),
- negated: false,
- kind: symdifference(
- span(1..8),
- symdifference(
- span(1..5),
- itemset(lit(span(1..2), 'a')),
- itemset(lit(span(4..5), 'b')),
- ),
- itemset(lit(span(7..8), 'c')),
- ),
- }))
- );
- assert_eq!(
- parser(r"[\^&&^]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..7),
- negated: false,
- kind: intersection(
- span(1..6),
- itemset(ast::ClassSetItem::Literal(ast::Literal {
- span: span(1..3),
- kind: ast::LiteralKind::Meta,
- c: '^',
- })),
- itemset(lit(span(5..6), '^')),
- ),
- }))
- );
- assert_eq!(
- parser(r"[\&&&&]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..7),
- negated: false,
- kind: intersection(
- span(1..6),
- itemset(ast::ClassSetItem::Literal(ast::Literal {
- span: span(1..3),
- kind: ast::LiteralKind::Meta,
- c: '&',
- })),
- itemset(lit(span(5..6), '&')),
- ),
- }))
- );
- assert_eq!(
- parser(r"[&&&&]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..6),
- negated: false,
- kind: intersection(
- span(1..5),
- intersection(
- span(1..3),
- itemset(empty(span(1..1))),
- itemset(empty(span(3..3))),
- ),
- itemset(empty(span(5..5))),
- ),
- }))
- );
-
- let pat = "[☃-⛄]";
- assert_eq!(
- parser(pat).parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span_range(pat, 0..9),
- negated: false,
- kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
- span: span_range(pat, 1..8),
- start: ast::Literal {
- span: span_range(pat, 1..4),
- kind: ast::LiteralKind::Verbatim,
- c: '☃',
- },
- end: ast::Literal {
- span: span_range(pat, 5..8),
- kind: ast::LiteralKind::Verbatim,
- c: '⛄',
- },
- })),
- }))
- );
-
- assert_eq!(
- parser(r"[]]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..3),
- negated: false,
- kind: itemset(lit(span(1..2), ']')),
- }))
- );
- assert_eq!(
- parser(r"[]\[]").parse(),
- Ok(Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..5),
- negated: false,
- kind: union(
- span(1..4),
- vec![
- lit(span(1..2), ']'),
- ast::ClassSetItem::Literal(ast::Literal {
- span: span(2..4),
- kind: ast::LiteralKind::Meta,
- c: '[',
- }),
- ]
- ),
- }))
- );
- assert_eq!(
- parser(r"[\[]]").parse(),
- Ok(concat(
- 0..5,
- vec![
- Ast::class_bracketed(ast::ClassBracketed {
- span: span(0..4),
- negated: false,
- kind: itemset(ast::ClassSetItem::Literal(
- ast::Literal {
- span: span(1..3),
- kind: ast::LiteralKind::Meta,
- c: '[',
- }
- )),
- }),
- Ast::literal(ast::Literal {
- span: span(4..5),
- kind: ast::LiteralKind::Verbatim,
- c: ']',
- }),
- ]
- ))
- );
-
- assert_eq!(
- parser("[").parse().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::ClassUnclosed,
- }
- );
- assert_eq!(
- parser("[[").parse().unwrap_err(),
- TestError {
- span: span(1..2),
- kind: ast::ErrorKind::ClassUnclosed,
- }
- );
- assert_eq!(
- parser("[[-]").parse().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::ClassUnclosed,
- }
- );
- assert_eq!(
- parser("[[[:alnum:]").parse().unwrap_err(),
- TestError {
- span: span(1..2),
- kind: ast::ErrorKind::ClassUnclosed,
- }
- );
- assert_eq!(
- parser(r"[\b]").parse().unwrap_err(),
- TestError {
- span: span(1..3),
- kind: ast::ErrorKind::ClassEscapeInvalid,
- }
- );
- assert_eq!(
- parser(r"[\w-a]").parse().unwrap_err(),
- TestError {
- span: span(1..3),
- kind: ast::ErrorKind::ClassRangeLiteral,
- }
- );
- assert_eq!(
- parser(r"[a-\w]").parse().unwrap_err(),
- TestError {
- span: span(3..5),
- kind: ast::ErrorKind::ClassRangeLiteral,
- }
- );
- assert_eq!(
- parser(r"[z-a]").parse().unwrap_err(),
- TestError {
- span: span(1..4),
- kind: ast::ErrorKind::ClassRangeInvalid,
- }
- );
-
- assert_eq!(
- parser_ignore_whitespace("[a ").parse().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::ClassUnclosed,
- }
- );
- assert_eq!(
- parser_ignore_whitespace("[a- ").parse().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::ClassUnclosed,
- }
- );
- }
-
- #[test]
- fn parse_set_class_open() {
- assert_eq!(parser("[a]").parse_set_class_open(), {
- let set = ast::ClassBracketed {
- span: span(0..1),
- negated: false,
- kind: ast::ClassSet::union(ast::ClassSetUnion {
- span: span(1..1),
- items: vec![],
- }),
- };
- let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
- Ok((set, union))
- });
- assert_eq!(
- parser_ignore_whitespace("[ a]").parse_set_class_open(),
- {
- let set = ast::ClassBracketed {
- span: span(0..4),
- negated: false,
- kind: ast::ClassSet::union(ast::ClassSetUnion {
- span: span(4..4),
- items: vec![],
- }),
- };
- let union =
- ast::ClassSetUnion { span: span(4..4), items: vec![] };
- Ok((set, union))
- }
- );
- assert_eq!(parser("[^a]").parse_set_class_open(), {
- let set = ast::ClassBracketed {
- span: span(0..2),
- negated: true,
- kind: ast::ClassSet::union(ast::ClassSetUnion {
- span: span(2..2),
- items: vec![],
- }),
- };
- let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
- Ok((set, union))
- });
- assert_eq!(
- parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
- {
- let set = ast::ClassBracketed {
- span: span(0..4),
- negated: true,
- kind: ast::ClassSet::union(ast::ClassSetUnion {
- span: span(4..4),
- items: vec![],
- }),
- };
- let union =
- ast::ClassSetUnion { span: span(4..4), items: vec![] };
- Ok((set, union))
- }
- );
- assert_eq!(parser("[-a]").parse_set_class_open(), {
- let set = ast::ClassBracketed {
- span: span(0..2),
- negated: false,
- kind: ast::ClassSet::union(ast::ClassSetUnion {
- span: span(1..1),
- items: vec![],
- }),
- };
- let union = ast::ClassSetUnion {
- span: span(1..2),
- items: vec![ast::ClassSetItem::Literal(ast::Literal {
- span: span(1..2),
- kind: ast::LiteralKind::Verbatim,
- c: '-',
- })],
- };
- Ok((set, union))
- });
- assert_eq!(
- parser_ignore_whitespace("[ - a]").parse_set_class_open(),
- {
- let set = ast::ClassBracketed {
- span: span(0..4),
- negated: false,
- kind: ast::ClassSet::union(ast::ClassSetUnion {
- span: span(2..2),
- items: vec![],
- }),
- };
- let union = ast::ClassSetUnion {
- span: span(2..3),
- items: vec![ast::ClassSetItem::Literal(ast::Literal {
- span: span(2..3),
- kind: ast::LiteralKind::Verbatim,
- c: '-',
- })],
- };
- Ok((set, union))
- }
- );
- assert_eq!(parser("[^-a]").parse_set_class_open(), {
- let set = ast::ClassBracketed {
- span: span(0..3),
- negated: true,
- kind: ast::ClassSet::union(ast::ClassSetUnion {
- span: span(2..2),
- items: vec![],
- }),
- };
- let union = ast::ClassSetUnion {
- span: span(2..3),
- items: vec![ast::ClassSetItem::Literal(ast::Literal {
- span: span(2..3),
- kind: ast::LiteralKind::Verbatim,
- c: '-',
- })],
- };
- Ok((set, union))
- });
- assert_eq!(parser("[--a]").parse_set_class_open(), {
- let set = ast::ClassBracketed {
- span: span(0..3),
- negated: false,
- kind: ast::ClassSet::union(ast::ClassSetUnion {
- span: span(1..1),
- items: vec![],
- }),
- };
- let union = ast::ClassSetUnion {
- span: span(1..3),
- items: vec![
- ast::ClassSetItem::Literal(ast::Literal {
- span: span(1..2),
- kind: ast::LiteralKind::Verbatim,
- c: '-',
- }),
- ast::ClassSetItem::Literal(ast::Literal {
- span: span(2..3),
- kind: ast::LiteralKind::Verbatim,
- c: '-',
- }),
- ],
- };
- Ok((set, union))
- });
- assert_eq!(parser("[]a]").parse_set_class_open(), {
- let set = ast::ClassBracketed {
- span: span(0..2),
- negated: false,
- kind: ast::ClassSet::union(ast::ClassSetUnion {
- span: span(1..1),
- items: vec![],
- }),
- };
- let union = ast::ClassSetUnion {
- span: span(1..2),
- items: vec![ast::ClassSetItem::Literal(ast::Literal {
- span: span(1..2),
- kind: ast::LiteralKind::Verbatim,
- c: ']',
- })],
- };
- Ok((set, union))
- });
- assert_eq!(
- parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
- {
- let set = ast::ClassBracketed {
- span: span(0..4),
- negated: false,
- kind: ast::ClassSet::union(ast::ClassSetUnion {
- span: span(2..2),
- items: vec![],
- }),
- };
- let union = ast::ClassSetUnion {
- span: span(2..3),
- items: vec![ast::ClassSetItem::Literal(ast::Literal {
- span: span(2..3),
- kind: ast::LiteralKind::Verbatim,
- c: ']',
- })],
- };
- Ok((set, union))
- }
- );
- assert_eq!(parser("[^]a]").parse_set_class_open(), {
- let set = ast::ClassBracketed {
- span: span(0..3),
- negated: true,
- kind: ast::ClassSet::union(ast::ClassSetUnion {
- span: span(2..2),
- items: vec![],
- }),
- };
- let union = ast::ClassSetUnion {
- span: span(2..3),
- items: vec![ast::ClassSetItem::Literal(ast::Literal {
- span: span(2..3),
- kind: ast::LiteralKind::Verbatim,
- c: ']',
- })],
- };
- Ok((set, union))
- });
- assert_eq!(parser("[-]a]").parse_set_class_open(), {
- let set = ast::ClassBracketed {
- span: span(0..2),
- negated: false,
- kind: ast::ClassSet::union(ast::ClassSetUnion {
- span: span(1..1),
- items: vec![],
- }),
- };
- let union = ast::ClassSetUnion {
- span: span(1..2),
- items: vec![ast::ClassSetItem::Literal(ast::Literal {
- span: span(1..2),
- kind: ast::LiteralKind::Verbatim,
- c: '-',
- })],
- };
- Ok((set, union))
- });
-
- assert_eq!(
- parser("[").parse_set_class_open().unwrap_err(),
- TestError {
- span: span(0..1),
- kind: ast::ErrorKind::ClassUnclosed,
- }
- );
- assert_eq!(
- parser_ignore_whitespace("[ ")
- .parse_set_class_open()
- .unwrap_err(),
- TestError {
- span: span(0..5),
- kind: ast::ErrorKind::ClassUnclosed,
- }
- );
- assert_eq!(
- parser("[^").parse_set_class_open().unwrap_err(),
- TestError {
- span: span(0..2),
- kind: ast::ErrorKind::ClassUnclosed,
- }
- );
- assert_eq!(
- parser("[]").parse_set_class_open().unwrap_err(),
- TestError {
- span: span(0..2),
- kind: ast::ErrorKind::ClassUnclosed,
- }
- );
- assert_eq!(
- parser("[-").parse_set_class_open().unwrap_err(),
- TestError {
- span: span(0..0),
- kind: ast::ErrorKind::ClassUnclosed,
- }
- );
- assert_eq!(
- parser("[--").parse_set_class_open().unwrap_err(),
- TestError {
- span: span(0..0),
- kind: ast::ErrorKind::ClassUnclosed,
- }
- );
-
- // See: https://github.com/rust-lang/regex/issues/792
- assert_eq!(
- parser("(?x)[-#]").parse_with_comments().unwrap_err(),
- TestError {
- span: span(4..4),
- kind: ast::ErrorKind::ClassUnclosed,
- }
- );
- }
-
- #[test]
- fn maybe_parse_ascii_class() {
- assert_eq!(
- parser(r"[:alnum:]").maybe_parse_ascii_class(),
- Some(ast::ClassAscii {
- span: span(0..9),
- kind: ast::ClassAsciiKind::Alnum,
- negated: false,
- })
- );
- assert_eq!(
- parser(r"[:alnum:]A").maybe_parse_ascii_class(),
- Some(ast::ClassAscii {
- span: span(0..9),
- kind: ast::ClassAsciiKind::Alnum,
- negated: false,
- })
- );
- assert_eq!(
- parser(r"[:^alnum:]").maybe_parse_ascii_class(),
- Some(ast::ClassAscii {
- span: span(0..10),
- kind: ast::ClassAsciiKind::Alnum,
- negated: true,
- })
- );
-
- let p = parser(r"[:");
- assert_eq!(p.maybe_parse_ascii_class(), None);
- assert_eq!(p.offset(), 0);
-
- let p = parser(r"[:^");
- assert_eq!(p.maybe_parse_ascii_class(), None);
- assert_eq!(p.offset(), 0);
-
- let p = parser(r"[^:alnum:]");
- assert_eq!(p.maybe_parse_ascii_class(), None);
- assert_eq!(p.offset(), 0);
-
- let p = parser(r"[:alnnum:]");
- assert_eq!(p.maybe_parse_ascii_class(), None);
- assert_eq!(p.offset(), 0);
-
- let p = parser(r"[:alnum]");
- assert_eq!(p.maybe_parse_ascii_class(), None);
- assert_eq!(p.offset(), 0);
-
- let p = parser(r"[:alnum:");
- assert_eq!(p.maybe_parse_ascii_class(), None);
- assert_eq!(p.offset(), 0);
- }
-
- #[test]
- fn parse_unicode_class() {
- assert_eq!(
- parser(r"\pN").parse_escape(),
- Ok(Primitive::Unicode(ast::ClassUnicode {
- span: span(0..3),
- negated: false,
- kind: ast::ClassUnicodeKind::OneLetter('N'),
- }))
- );
- assert_eq!(
- parser(r"\PN").parse_escape(),
- Ok(Primitive::Unicode(ast::ClassUnicode {
- span: span(0..3),
- negated: true,
- kind: ast::ClassUnicodeKind::OneLetter('N'),
- }))
- );
- assert_eq!(
- parser(r"\p{N}").parse_escape(),
- Ok(Primitive::Unicode(ast::ClassUnicode {
- span: span(0..5),
- negated: false,
- kind: ast::ClassUnicodeKind::Named(s("N")),
- }))
- );
- assert_eq!(
- parser(r"\P{N}").parse_escape(),
- Ok(Primitive::Unicode(ast::ClassUnicode {
- span: span(0..5),
- negated: true,
- kind: ast::ClassUnicodeKind::Named(s("N")),
- }))
- );
- assert_eq!(
- parser(r"\p{Greek}").parse_escape(),
- Ok(Primitive::Unicode(ast::ClassUnicode {
- span: span(0..9),
- negated: false,
- kind: ast::ClassUnicodeKind::Named(s("Greek")),
- }))
- );
-
- assert_eq!(
- parser(r"\p{scx:Katakana}").parse_escape(),
- Ok(Primitive::Unicode(ast::ClassUnicode {
- span: span(0..16),
- negated: false,
- kind: ast::ClassUnicodeKind::NamedValue {
- op: ast::ClassUnicodeOpKind::Colon,
- name: s("scx"),
- value: s("Katakana"),
- },
- }))
- );
- assert_eq!(
- parser(r"\p{scx=Katakana}").parse_escape(),
- Ok(Primitive::Unicode(ast::ClassUnicode {
- span: span(0..16),
- negated: false,
- kind: ast::ClassUnicodeKind::NamedValue {
- op: ast::ClassUnicodeOpKind::Equal,
- name: s("scx"),
- value: s("Katakana"),
- },
- }))
- );
- assert_eq!(
- parser(r"\p{scx!=Katakana}").parse_escape(),
- Ok(Primitive::Unicode(ast::ClassUnicode {
- span: span(0..17),
- negated: false,
- kind: ast::ClassUnicodeKind::NamedValue {
- op: ast::ClassUnicodeOpKind::NotEqual,
- name: s("scx"),
- value: s("Katakana"),
- },
- }))
- );
-
- assert_eq!(
- parser(r"\p{:}").parse_escape(),
- Ok(Primitive::Unicode(ast::ClassUnicode {
- span: span(0..5),
- negated: false,
- kind: ast::ClassUnicodeKind::NamedValue {
- op: ast::ClassUnicodeOpKind::Colon,
- name: s(""),
- value: s(""),
- },
- }))
- );
- assert_eq!(
- parser(r"\p{=}").parse_escape(),
- Ok(Primitive::Unicode(ast::ClassUnicode {
- span: span(0..5),
- negated: false,
- kind: ast::ClassUnicodeKind::NamedValue {
- op: ast::ClassUnicodeOpKind::Equal,
- name: s(""),
- value: s(""),
- },
- }))
- );
- assert_eq!(
- parser(r"\p{!=}").parse_escape(),
- Ok(Primitive::Unicode(ast::ClassUnicode {
- span: span(0..6),
- negated: false,
- kind: ast::ClassUnicodeKind::NamedValue {
- op: ast::ClassUnicodeOpKind::NotEqual,
- name: s(""),
- value: s(""),
- },
- }))
- );
-
- assert_eq!(
- parser(r"\p").parse_escape().unwrap_err(),
- TestError {
- span: span(2..2),
- kind: ast::ErrorKind::EscapeUnexpectedEof,
- }
- );
- assert_eq!(
- parser(r"\p{").parse_escape().unwrap_err(),
- TestError {
- span: span(3..3),
- kind: ast::ErrorKind::EscapeUnexpectedEof,
- }
- );
- assert_eq!(
- parser(r"\p{N").parse_escape().unwrap_err(),
- TestError {
- span: span(4..4),
- kind: ast::ErrorKind::EscapeUnexpectedEof,
- }
- );
- assert_eq!(
- parser(r"\p{Greek").parse_escape().unwrap_err(),
- TestError {
- span: span(8..8),
- kind: ast::ErrorKind::EscapeUnexpectedEof,
- }
- );
-
- assert_eq!(
- parser(r"\pNz").parse(),
- Ok(Ast::concat(ast::Concat {
- span: span(0..4),
- asts: vec![
- Ast::class_unicode(ast::ClassUnicode {
- span: span(0..3),
- negated: false,
- kind: ast::ClassUnicodeKind::OneLetter('N'),
- }),
- Ast::literal(ast::Literal {
- span: span(3..4),
- kind: ast::LiteralKind::Verbatim,
- c: 'z',
- }),
- ],
- }))
- );
- assert_eq!(
- parser(r"\p{Greek}z").parse(),
- Ok(Ast::concat(ast::Concat {
- span: span(0..10),
- asts: vec![
- Ast::class_unicode(ast::ClassUnicode {
- span: span(0..9),
- negated: false,
- kind: ast::ClassUnicodeKind::Named(s("Greek")),
- }),
- Ast::literal(ast::Literal {
- span: span(9..10),
- kind: ast::LiteralKind::Verbatim,
- c: 'z',
- }),
- ],
- }))
- );
- assert_eq!(
- parser(r"\p\{").parse().unwrap_err(),
- TestError {
- span: span(2..3),
- kind: ast::ErrorKind::UnicodeClassInvalid,
- }
- );
- assert_eq!(
- parser(r"\P\{").parse().unwrap_err(),
- TestError {
- span: span(2..3),
- kind: ast::ErrorKind::UnicodeClassInvalid,
- }
- );
- }
-
- #[test]
- fn parse_perl_class() {
- assert_eq!(
- parser(r"\d").parse_escape(),
- Ok(Primitive::Perl(ast::ClassPerl {
- span: span(0..2),
- kind: ast::ClassPerlKind::Digit,
- negated: false,
- }))
- );
- assert_eq!(
- parser(r"\D").parse_escape(),
- Ok(Primitive::Perl(ast::ClassPerl {
- span: span(0..2),
- kind: ast::ClassPerlKind::Digit,
- negated: true,
- }))
- );
- assert_eq!(
- parser(r"\s").parse_escape(),
- Ok(Primitive::Perl(ast::ClassPerl {
- span: span(0..2),
- kind: ast::ClassPerlKind::Space,
- negated: false,
- }))
- );
- assert_eq!(
- parser(r"\S").parse_escape(),
- Ok(Primitive::Perl(ast::ClassPerl {
- span: span(0..2),
- kind: ast::ClassPerlKind::Space,
- negated: true,
- }))
- );
- assert_eq!(
- parser(r"\w").parse_escape(),
- Ok(Primitive::Perl(ast::ClassPerl {
- span: span(0..2),
- kind: ast::ClassPerlKind::Word,
- negated: false,
- }))
- );
- assert_eq!(
- parser(r"\W").parse_escape(),
- Ok(Primitive::Perl(ast::ClassPerl {
- span: span(0..2),
- kind: ast::ClassPerlKind::Word,
- negated: true,
- }))
- );
-
- assert_eq!(
- parser(r"\d").parse(),
- Ok(Ast::class_perl(ast::ClassPerl {
- span: span(0..2),
- kind: ast::ClassPerlKind::Digit,
- negated: false,
- }))
- );
- assert_eq!(
- parser(r"\dz").parse(),
- Ok(Ast::concat(ast::Concat {
- span: span(0..3),
- asts: vec![
- Ast::class_perl(ast::ClassPerl {
- span: span(0..2),
- kind: ast::ClassPerlKind::Digit,
- negated: false,
- }),
- Ast::literal(ast::Literal {
- span: span(2..3),
- kind: ast::LiteralKind::Verbatim,
- c: 'z',
- }),
- ],
- }))
- );
- }
-
- // This tests a bug fix where the nest limit checker wasn't decrementing
- // its depth during post-traversal, which causes long regexes to trip
- // the default limit too aggressively.
- #[test]
- fn regression_454_nest_too_big() {
- let pattern = r#"
- 2(?:
- [45]\d{3}|
- 7(?:
- 1[0-267]|
- 2[0-289]|
- 3[0-29]|
- 4[01]|
- 5[1-3]|
- 6[013]|
- 7[0178]|
- 91
- )|
- 8(?:
- 0[125]|
- [139][1-6]|
- 2[0157-9]|
- 41|
- 6[1-35]|
- 7[1-5]|
- 8[1-8]|
- 90
- )|
- 9(?:
- 0[0-2]|
- 1[0-4]|
- 2[568]|
- 3[3-6]|
- 5[5-7]|
- 6[0167]|
- 7[15]|
- 8[0146-9]
- )
- )\d{4}
- "#;
- assert!(parser_nest_limit(pattern, 50).parse().is_ok());
- }
-
- // This tests that we treat a trailing `-` in a character class as a
- // literal `-` even when whitespace mode is enabled and there is whitespace
- // after the trailing `-`.
- #[test]
- fn regression_455_trailing_dash_ignore_whitespace() {
- assert!(parser("(?x)[ / - ]").parse().is_ok());
- assert!(parser("(?x)[ a - ]").parse().is_ok());
- assert!(parser(
- "(?x)[
- a
- - ]
- "
- )
- .parse()
- .is_ok());
- assert!(parser(
- "(?x)[
- a # wat
- - ]
- "
- )
- .parse()
- .is_ok());
-
- assert!(parser("(?x)[ / -").parse().is_err());
- assert!(parser("(?x)[ / - ").parse().is_err());
- assert!(parser(
- "(?x)[
- / -
- "
- )
- .parse()
- .is_err());
- assert!(parser(
- "(?x)[
- / - # wat
- "
- )
- .parse()
- .is_err());
- }
-}
diff --git a/vendor/regex-syntax/src/ast/print.rs b/vendor/regex-syntax/src/ast/print.rs
deleted file mode 100644
index 1ceb3c7f..00000000
--- a/vendor/regex-syntax/src/ast/print.rs
+++ /dev/null
@@ -1,577 +0,0 @@
-/*!
-This module provides a regular expression printer for `Ast`.
-*/
-
-use core::fmt;
-
-use crate::ast::{
- self,
- visitor::{self, Visitor},
- Ast,
-};
-
-/// A builder for constructing a printer.
-///
-/// Note that since a printer doesn't have any configuration knobs, this type
-/// remains unexported.
-#[derive(Clone, Debug)]
-struct PrinterBuilder {
- _priv: (),
-}
-
-impl Default for PrinterBuilder {
- fn default() -> PrinterBuilder {
- PrinterBuilder::new()
- }
-}
-
-impl PrinterBuilder {
- fn new() -> PrinterBuilder {
- PrinterBuilder { _priv: () }
- }
-
- fn build(&self) -> Printer {
- Printer { _priv: () }
- }
-}
-
-/// A printer for a regular expression abstract syntax tree.
-///
-/// A printer converts an abstract syntax tree (AST) to a regular expression
-/// pattern string. This particular printer uses constant stack space and heap
-/// space proportional to the size of the AST.
-///
-/// This printer will not necessarily preserve the original formatting of the
-/// regular expression pattern string. For example, all whitespace and comments
-/// are ignored.
-#[derive(Debug)]
-pub struct Printer {
- _priv: (),
-}
-
-impl Printer {
- /// Create a new printer.
- pub fn new() -> Printer {
- PrinterBuilder::new().build()
- }
-
- /// Print the given `Ast` to the given writer. The writer must implement
- /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
- /// here are a `fmt::Formatter` (which is available in `fmt::Display`
- /// implementations) or a `&mut String`.
- pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
- visitor::visit(ast, Writer { wtr })
- }
-}
-
-#[derive(Debug)]
-struct Writer<W> {
- wtr: W,
-}
-
-impl<W: fmt::Write> Visitor for Writer<W> {
- type Output = ();
- type Err = fmt::Error;
-
- fn finish(self) -> fmt::Result {
- Ok(())
- }
-
- fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
- match *ast {
- Ast::Group(ref x) => self.fmt_group_pre(x),
- Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_pre(x),
- _ => Ok(()),
- }
- }
-
- fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
- match *ast {
- Ast::Empty(_) => Ok(()),
- Ast::Flags(ref x) => self.fmt_set_flags(x),
- Ast::Literal(ref x) => self.fmt_literal(x),
- Ast::Dot(_) => self.wtr.write_str("."),
- Ast::Assertion(ref x) => self.fmt_assertion(x),
- Ast::ClassPerl(ref x) => self.fmt_class_perl(x),
- Ast::ClassUnicode(ref x) => self.fmt_class_unicode(x),
- Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_post(x),
- Ast::Repetition(ref x) => self.fmt_repetition(x),
- Ast::Group(ref x) => self.fmt_group_post(x),
- Ast::Alternation(_) => Ok(()),
- Ast::Concat(_) => Ok(()),
- }
- }
-
- fn visit_alternation_in(&mut self) -> fmt::Result {
- self.wtr.write_str("|")
- }
-
- fn visit_class_set_item_pre(
- &mut self,
- ast: &ast::ClassSetItem,
- ) -> Result<(), Self::Err> {
- match *ast {
- ast::ClassSetItem::Bracketed(ref x) => {
- self.fmt_class_bracketed_pre(x)
- }
- _ => Ok(()),
- }
- }
-
- fn visit_class_set_item_post(
- &mut self,
- ast: &ast::ClassSetItem,
- ) -> Result<(), Self::Err> {
- use crate::ast::ClassSetItem::*;
-
- match *ast {
- Empty(_) => Ok(()),
- Literal(ref x) => self.fmt_literal(x),
- Range(ref x) => {
- self.fmt_literal(&x.start)?;
- self.wtr.write_str("-")?;
- self.fmt_literal(&x.end)?;
- Ok(())
- }
- Ascii(ref x) => self.fmt_class_ascii(x),
- Unicode(ref x) => self.fmt_class_unicode(x),
- Perl(ref x) => self.fmt_class_perl(x),
- Bracketed(ref x) => self.fmt_class_bracketed_post(x),
- Union(_) => Ok(()),
- }
- }
-
- fn visit_class_set_binary_op_in(
- &mut self,
- ast: &ast::ClassSetBinaryOp,
- ) -> Result<(), Self::Err> {
- self.fmt_class_set_binary_op_kind(&ast.kind)
- }
-}
-
-impl<W: fmt::Write> Writer<W> {
- fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
- use crate::ast::GroupKind::*;
- match ast.kind {
- CaptureIndex(_) => self.wtr.write_str("("),
- CaptureName { ref name, starts_with_p } => {
- let start = if starts_with_p { "(?P<" } else { "(?<" };
- self.wtr.write_str(start)?;
- self.wtr.write_str(&name.name)?;
- self.wtr.write_str(">")?;
- Ok(())
- }
- NonCapturing(ref flags) => {
- self.wtr.write_str("(?")?;
- self.fmt_flags(flags)?;
- self.wtr.write_str(":")?;
- Ok(())
- }
- }
- }
-
- fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
- self.wtr.write_str(")")
- }
-
- fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
- use crate::ast::RepetitionKind::*;
- match ast.op.kind {
- ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
- ZeroOrOne => self.wtr.write_str("??"),
- ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
- ZeroOrMore => self.wtr.write_str("*?"),
- OneOrMore if ast.greedy => self.wtr.write_str("+"),
- OneOrMore => self.wtr.write_str("+?"),
- Range(ref x) => {
- self.fmt_repetition_range(x)?;
- if !ast.greedy {
- self.wtr.write_str("?")?;
- }
- Ok(())
- }
- }
- }
-
- fn fmt_repetition_range(
- &mut self,
- ast: &ast::RepetitionRange,
- ) -> fmt::Result {
- use crate::ast::RepetitionRange::*;
- match *ast {
- Exactly(x) => write!(self.wtr, "{{{}}}", x),
- AtLeast(x) => write!(self.wtr, "{{{},}}", x),
- Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
- }
- }
-
- fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
- use crate::ast::LiteralKind::*;
-
- match ast.kind {
- Verbatim => self.wtr.write_char(ast.c),
- Meta | Superfluous => write!(self.wtr, r"\{}", ast.c),
- Octal => write!(self.wtr, r"\{:o}", u32::from(ast.c)),
- HexFixed(ast::HexLiteralKind::X) => {
- write!(self.wtr, r"\x{:02X}", u32::from(ast.c))
- }
- HexFixed(ast::HexLiteralKind::UnicodeShort) => {
- write!(self.wtr, r"\u{:04X}", u32::from(ast.c))
- }
- HexFixed(ast::HexLiteralKind::UnicodeLong) => {
- write!(self.wtr, r"\U{:08X}", u32::from(ast.c))
- }
- HexBrace(ast::HexLiteralKind::X) => {
- write!(self.wtr, r"\x{{{:X}}}", u32::from(ast.c))
- }
- HexBrace(ast::HexLiteralKind::UnicodeShort) => {
- write!(self.wtr, r"\u{{{:X}}}", u32::from(ast.c))
- }
- HexBrace(ast::HexLiteralKind::UnicodeLong) => {
- write!(self.wtr, r"\U{{{:X}}}", u32::from(ast.c))
- }
- Special(ast::SpecialLiteralKind::Bell) => {
- self.wtr.write_str(r"\a")
- }
- Special(ast::SpecialLiteralKind::FormFeed) => {
- self.wtr.write_str(r"\f")
- }
- Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"),
- Special(ast::SpecialLiteralKind::LineFeed) => {
- self.wtr.write_str(r"\n")
- }
- Special(ast::SpecialLiteralKind::CarriageReturn) => {
- self.wtr.write_str(r"\r")
- }
- Special(ast::SpecialLiteralKind::VerticalTab) => {
- self.wtr.write_str(r"\v")
- }
- Special(ast::SpecialLiteralKind::Space) => {
- self.wtr.write_str(r"\ ")
- }
- }
- }
-
- fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
- use crate::ast::AssertionKind::*;
- match ast.kind {
- StartLine => self.wtr.write_str("^"),
- EndLine => self.wtr.write_str("$"),
- StartText => self.wtr.write_str(r"\A"),
- EndText => self.wtr.write_str(r"\z"),
- WordBoundary => self.wtr.write_str(r"\b"),
- NotWordBoundary => self.wtr.write_str(r"\B"),
- WordBoundaryStart => self.wtr.write_str(r"\b{start}"),
- WordBoundaryEnd => self.wtr.write_str(r"\b{end}"),
- WordBoundaryStartAngle => self.wtr.write_str(r"\<"),
- WordBoundaryEndAngle => self.wtr.write_str(r"\>"),
- WordBoundaryStartHalf => self.wtr.write_str(r"\b{start-half}"),
- WordBoundaryEndHalf => self.wtr.write_str(r"\b{end-half}"),
- }
- }
-
- fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
- self.wtr.write_str("(?")?;
- self.fmt_flags(&ast.flags)?;
- self.wtr.write_str(")")?;
- Ok(())
- }
-
- fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
- use crate::ast::{Flag, FlagsItemKind};
-
- for item in &ast.items {
- match item.kind {
- FlagsItemKind::Negation => self.wtr.write_str("-"),
- FlagsItemKind::Flag(ref flag) => match *flag {
- Flag::CaseInsensitive => self.wtr.write_str("i"),
- Flag::MultiLine => self.wtr.write_str("m"),
- Flag::DotMatchesNewLine => self.wtr.write_str("s"),
- Flag::SwapGreed => self.wtr.write_str("U"),
- Flag::Unicode => self.wtr.write_str("u"),
- Flag::CRLF => self.wtr.write_str("R"),
- Flag::IgnoreWhitespace => self.wtr.write_str("x"),
- },
- }?;
- }
- Ok(())
- }
-
- fn fmt_class_bracketed_pre(
- &mut self,
- ast: &ast::ClassBracketed,
- ) -> fmt::Result {
- if ast.negated {
- self.wtr.write_str("[^")
- } else {
- self.wtr.write_str("[")
- }
- }
-
- fn fmt_class_bracketed_post(
- &mut self,
- _ast: &ast::ClassBracketed,
- ) -> fmt::Result {
- self.wtr.write_str("]")
- }
-
- fn fmt_class_set_binary_op_kind(
- &mut self,
- ast: &ast::ClassSetBinaryOpKind,
- ) -> fmt::Result {
- use crate::ast::ClassSetBinaryOpKind::*;
- match *ast {
- Intersection => self.wtr.write_str("&&"),
- Difference => self.wtr.write_str("--"),
- SymmetricDifference => self.wtr.write_str("~~"),
- }
- }
-
- fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
- use crate::ast::ClassPerlKind::*;
- match ast.kind {
- Digit if ast.negated => self.wtr.write_str(r"\D"),
- Digit => self.wtr.write_str(r"\d"),
- Space if ast.negated => self.wtr.write_str(r"\S"),
- Space => self.wtr.write_str(r"\s"),
- Word if ast.negated => self.wtr.write_str(r"\W"),
- Word => self.wtr.write_str(r"\w"),
- }
- }
-
- fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
- use crate::ast::ClassAsciiKind::*;
- match ast.kind {
- Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
- Alnum => self.wtr.write_str("[:alnum:]"),
- Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
- Alpha => self.wtr.write_str("[:alpha:]"),
- Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
- Ascii => self.wtr.write_str("[:ascii:]"),
- Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
- Blank => self.wtr.write_str("[:blank:]"),
- Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
- Cntrl => self.wtr.write_str("[:cntrl:]"),
- Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
- Digit => self.wtr.write_str("[:digit:]"),
- Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
- Graph => self.wtr.write_str("[:graph:]"),
- Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
- Lower => self.wtr.write_str("[:lower:]"),
- Print if ast.negated => self.wtr.write_str("[:^print:]"),
- Print => self.wtr.write_str("[:print:]"),
- Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
- Punct => self.wtr.write_str("[:punct:]"),
- Space if ast.negated => self.wtr.write_str("[:^space:]"),
- Space => self.wtr.write_str("[:space:]"),
- Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
- Upper => self.wtr.write_str("[:upper:]"),
- Word if ast.negated => self.wtr.write_str("[:^word:]"),
- Word => self.wtr.write_str("[:word:]"),
- Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
- Xdigit => self.wtr.write_str("[:xdigit:]"),
- }
- }
-
- fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
- use crate::ast::ClassUnicodeKind::*;
- use crate::ast::ClassUnicodeOpKind::*;
-
- if ast.negated {
- self.wtr.write_str(r"\P")?;
- } else {
- self.wtr.write_str(r"\p")?;
- }
- match ast.kind {
- OneLetter(c) => self.wtr.write_char(c),
- Named(ref x) => write!(self.wtr, "{{{}}}", x),
- NamedValue { op: Equal, ref name, ref value } => {
- write!(self.wtr, "{{{}={}}}", name, value)
- }
- NamedValue { op: Colon, ref name, ref value } => {
- write!(self.wtr, "{{{}:{}}}", name, value)
- }
- NamedValue { op: NotEqual, ref name, ref value } => {
- write!(self.wtr, "{{{}!={}}}", name, value)
- }
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use alloc::string::String;
-
- use crate::ast::parse::ParserBuilder;
-
- use super::*;
-
- fn roundtrip(given: &str) {
- roundtrip_with(|b| b, given);
- }
-
- fn roundtrip_with<F>(mut f: F, given: &str)
- where
- F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
- {
- let mut builder = ParserBuilder::new();
- f(&mut builder);
- let ast = builder.build().parse(given).unwrap();
-
- let mut printer = Printer::new();
- let mut dst = String::new();
- printer.print(&ast, &mut dst).unwrap();
- assert_eq!(given, dst);
- }
-
- #[test]
- fn print_literal() {
- roundtrip("a");
- roundtrip(r"\[");
- roundtrip_with(|b| b.octal(true), r"\141");
- roundtrip(r"\x61");
- roundtrip(r"\x7F");
- roundtrip(r"\u0061");
- roundtrip(r"\U00000061");
- roundtrip(r"\x{61}");
- roundtrip(r"\x{7F}");
- roundtrip(r"\u{61}");
- roundtrip(r"\U{61}");
-
- roundtrip(r"\a");
- roundtrip(r"\f");
- roundtrip(r"\t");
- roundtrip(r"\n");
- roundtrip(r"\r");
- roundtrip(r"\v");
- roundtrip(r"(?x)\ ");
- }
-
- #[test]
- fn print_dot() {
- roundtrip(".");
- }
-
- #[test]
- fn print_concat() {
- roundtrip("ab");
- roundtrip("abcde");
- roundtrip("a(bcd)ef");
- }
-
- #[test]
- fn print_alternation() {
- roundtrip("a|b");
- roundtrip("a|b|c|d|e");
- roundtrip("|a|b|c|d|e");
- roundtrip("|a|b|c|d|e|");
- roundtrip("a(b|c|d)|e|f");
- }
-
- #[test]
- fn print_assertion() {
- roundtrip(r"^");
- roundtrip(r"$");
- roundtrip(r"\A");
- roundtrip(r"\z");
- roundtrip(r"\b");
- roundtrip(r"\B");
- }
-
- #[test]
- fn print_repetition() {
- roundtrip("a?");
- roundtrip("a??");
- roundtrip("a*");
- roundtrip("a*?");
- roundtrip("a+");
- roundtrip("a+?");
- roundtrip("a{5}");
- roundtrip("a{5}?");
- roundtrip("a{5,}");
- roundtrip("a{5,}?");
- roundtrip("a{5,10}");
- roundtrip("a{5,10}?");
- }
-
- #[test]
- fn print_flags() {
- roundtrip("(?i)");
- roundtrip("(?-i)");
- roundtrip("(?s-i)");
- roundtrip("(?-si)");
- roundtrip("(?siUmux)");
- }
-
- #[test]
- fn print_group() {
- roundtrip("(?i:a)");
- roundtrip("(?P<foo>a)");
- roundtrip("(?<foo>a)");
- roundtrip("(a)");
- }
-
- #[test]
- fn print_class() {
- roundtrip(r"[abc]");
- roundtrip(r"[a-z]");
- roundtrip(r"[^a-z]");
- roundtrip(r"[a-z0-9]");
- roundtrip(r"[-a-z0-9]");
- roundtrip(r"[-a-z0-9]");
- roundtrip(r"[a-z0-9---]");
- roundtrip(r"[a-z&&m-n]");
- roundtrip(r"[[a-z&&m-n]]");
- roundtrip(r"[a-z--m-n]");
- roundtrip(r"[a-z~~m-n]");
- roundtrip(r"[a-z[0-9]]");
- roundtrip(r"[a-z[^0-9]]");
-
- roundtrip(r"\d");
- roundtrip(r"\D");
- roundtrip(r"\s");
- roundtrip(r"\S");
- roundtrip(r"\w");
- roundtrip(r"\W");
-
- roundtrip(r"[[:alnum:]]");
- roundtrip(r"[[:^alnum:]]");
- roundtrip(r"[[:alpha:]]");
- roundtrip(r"[[:^alpha:]]");
- roundtrip(r"[[:ascii:]]");
- roundtrip(r"[[:^ascii:]]");
- roundtrip(r"[[:blank:]]");
- roundtrip(r"[[:^blank:]]");
- roundtrip(r"[[:cntrl:]]");
- roundtrip(r"[[:^cntrl:]]");
- roundtrip(r"[[:digit:]]");
- roundtrip(r"[[:^digit:]]");
- roundtrip(r"[[:graph:]]");
- roundtrip(r"[[:^graph:]]");
- roundtrip(r"[[:lower:]]");
- roundtrip(r"[[:^lower:]]");
- roundtrip(r"[[:print:]]");
- roundtrip(r"[[:^print:]]");
- roundtrip(r"[[:punct:]]");
- roundtrip(r"[[:^punct:]]");
- roundtrip(r"[[:space:]]");
- roundtrip(r"[[:^space:]]");
- roundtrip(r"[[:upper:]]");
- roundtrip(r"[[:^upper:]]");
- roundtrip(r"[[:word:]]");
- roundtrip(r"[[:^word:]]");
- roundtrip(r"[[:xdigit:]]");
- roundtrip(r"[[:^xdigit:]]");
-
- roundtrip(r"\pL");
- roundtrip(r"\PL");
- roundtrip(r"\p{L}");
- roundtrip(r"\P{L}");
- roundtrip(r"\p{X=Y}");
- roundtrip(r"\P{X=Y}");
- roundtrip(r"\p{X:Y}");
- roundtrip(r"\P{X:Y}");
- roundtrip(r"\p{X!=Y}");
- roundtrip(r"\P{X!=Y}");
- }
-}
diff --git a/vendor/regex-syntax/src/ast/visitor.rs b/vendor/regex-syntax/src/ast/visitor.rs
deleted file mode 100644
index c1bb24d9..00000000
--- a/vendor/regex-syntax/src/ast/visitor.rs
+++ /dev/null
@@ -1,522 +0,0 @@
-use alloc::{vec, vec::Vec};
-
-use crate::ast::{self, Ast};
-
-/// A trait for visiting an abstract syntax tree (AST) in depth first order.
-///
-/// The principle aim of this trait is to enable callers to perform case
-/// analysis on an abstract syntax tree without necessarily using recursion.
-/// In particular, this permits callers to do case analysis with constant stack
-/// usage, which can be important since the size of an abstract syntax tree
-/// may be proportional to end user input.
-///
-/// Typical usage of this trait involves providing an implementation and then
-/// running it using the [`visit`] function.
-///
-/// Note that the abstract syntax tree for a regular expression is quite
-/// complex. Unless you specifically need it, you might be able to use the much
-/// simpler [high-level intermediate representation](crate::hir::Hir) and its
-/// [corresponding `Visitor` trait](crate::hir::Visitor) instead.
-pub trait Visitor {
- /// The result of visiting an AST.
- type Output;
- /// An error that visiting an AST might return.
- type Err;
-
- /// All implementors of `Visitor` must provide a `finish` method, which
- /// yields the result of visiting the AST or an error.
- fn finish(self) -> Result<Self::Output, Self::Err>;
-
- /// This method is called before beginning traversal of the AST.
- fn start(&mut self) {}
-
- /// This method is called on an `Ast` before descending into child `Ast`
- /// nodes.
- fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
- Ok(())
- }
-
- /// This method is called on an `Ast` after descending all of its child
- /// `Ast` nodes.
- fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
- Ok(())
- }
-
- /// This method is called between child nodes of an
- /// [`Alternation`](ast::Alternation).
- fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
- Ok(())
- }
-
- /// This method is called between child nodes of a concatenation.
- fn visit_concat_in(&mut self) -> Result<(), Self::Err> {
- Ok(())
- }
-
- /// This method is called on every [`ClassSetItem`](ast::ClassSetItem)
- /// before descending into child nodes.
- fn visit_class_set_item_pre(
- &mut self,
- _ast: &ast::ClassSetItem,
- ) -> Result<(), Self::Err> {
- Ok(())
- }
-
- /// This method is called on every [`ClassSetItem`](ast::ClassSetItem)
- /// after descending into child nodes.
- fn visit_class_set_item_post(
- &mut self,
- _ast: &ast::ClassSetItem,
- ) -> Result<(), Self::Err> {
- Ok(())
- }
-
- /// This method is called on every
- /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) before descending into
- /// child nodes.
- fn visit_class_set_binary_op_pre(
- &mut self,
- _ast: &ast::ClassSetBinaryOp,
- ) -> Result<(), Self::Err> {
- Ok(())
- }
-
- /// This method is called on every
- /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) after descending into child
- /// nodes.
- fn visit_class_set_binary_op_post(
- &mut self,
- _ast: &ast::ClassSetBinaryOp,
- ) -> Result<(), Self::Err> {
- Ok(())
- }
-
- /// This method is called between the left hand and right hand child nodes
- /// of a [`ClassSetBinaryOp`](ast::ClassSetBinaryOp).
- fn visit_class_set_binary_op_in(
- &mut self,
- _ast: &ast::ClassSetBinaryOp,
- ) -> Result<(), Self::Err> {
- Ok(())
- }
-}
-
-/// Executes an implementation of `Visitor` in constant stack space.
-///
-/// This function will visit every node in the given `Ast` while calling the
-/// appropriate methods provided by the [`Visitor`] trait.
-///
-/// The primary use case for this method is when one wants to perform case
-/// analysis over an `Ast` without using a stack size proportional to the depth
-/// of the `Ast`. Namely, this method will instead use constant stack size, but
-/// will use heap space proportional to the size of the `Ast`. This may be
-/// desirable in cases where the size of `Ast` is proportional to end user
-/// input.
-///
-/// If the visitor returns an error at any point, then visiting is stopped and
-/// the error is returned.
-pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
- HeapVisitor::new().visit(ast, visitor)
-}
-
-/// HeapVisitor visits every item in an `Ast` recursively using constant stack
-/// size and a heap size proportional to the size of the `Ast`.
-struct HeapVisitor<'a> {
- /// A stack of `Ast` nodes. This is roughly analogous to the call stack
- /// used in a typical recursive visitor.
- stack: Vec<(&'a Ast, Frame<'a>)>,
- /// Similar to the `Ast` stack above, but is used only for character
- /// classes. In particular, character classes embed their own mini
- /// recursive syntax.
- stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
-}
-
-/// Represents a single stack frame while performing structural induction over
-/// an `Ast`.
-enum Frame<'a> {
- /// A stack frame allocated just before descending into a repetition
- /// operator's child node.
- Repetition(&'a ast::Repetition),
- /// A stack frame allocated just before descending into a group's child
- /// node.
- Group(&'a ast::Group),
- /// The stack frame used while visiting every child node of a concatenation
- /// of expressions.
- Concat {
- /// The child node we are currently visiting.
- head: &'a Ast,
- /// The remaining child nodes to visit (which may be empty).
- tail: &'a [Ast],
- },
- /// The stack frame used while visiting every child node of an alternation
- /// of expressions.
- Alternation {
- /// The child node we are currently visiting.
- head: &'a Ast,
- /// The remaining child nodes to visit (which may be empty).
- tail: &'a [Ast],
- },
-}
-
-/// Represents a single stack frame while performing structural induction over
-/// a character class.
-enum ClassFrame<'a> {
- /// The stack frame used while visiting every child node of a union of
- /// character class items.
- Union {
- /// The child node we are currently visiting.
- head: &'a ast::ClassSetItem,
- /// The remaining child nodes to visit (which may be empty).
- tail: &'a [ast::ClassSetItem],
- },
- /// The stack frame used while a binary class operation.
- Binary { op: &'a ast::ClassSetBinaryOp },
- /// A stack frame allocated just before descending into a binary operator's
- /// left hand child node.
- BinaryLHS {
- op: &'a ast::ClassSetBinaryOp,
- lhs: &'a ast::ClassSet,
- rhs: &'a ast::ClassSet,
- },
- /// A stack frame allocated just before descending into a binary operator's
- /// right hand child node.
- BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet },
-}
-
-/// A representation of the inductive step when performing structural induction
-/// over a character class.
-///
-/// Note that there is no analogous explicit type for the inductive step for
-/// `Ast` nodes because the inductive step is just an `Ast`. For character
-/// classes, the inductive step can produce one of two possible child nodes:
-/// an item or a binary operation. (An item cannot be a binary operation
-/// because that would imply binary operations can be unioned in the concrete
-/// syntax, which is not possible.)
-enum ClassInduct<'a> {
- Item(&'a ast::ClassSetItem),
- BinaryOp(&'a ast::ClassSetBinaryOp),
-}
-
-impl<'a> HeapVisitor<'a> {
- fn new() -> HeapVisitor<'a> {
- HeapVisitor { stack: vec![], stack_class: vec![] }
- }
-
- fn visit<V: Visitor>(
- &mut self,
- mut ast: &'a Ast,
- mut visitor: V,
- ) -> Result<V::Output, V::Err> {
- self.stack.clear();
- self.stack_class.clear();
-
- visitor.start();
- loop {
- visitor.visit_pre(ast)?;
- if let Some(x) = self.induct(ast, &mut visitor)? {
- let child = x.child();
- self.stack.push((ast, x));
- ast = child;
- continue;
- }
- // No induction means we have a base case, so we can post visit
- // it now.
- visitor.visit_post(ast)?;
-
- // At this point, we now try to pop our call stack until it is
- // either empty or we hit another inductive case.
- loop {
- let (post_ast, frame) = match self.stack.pop() {
- None => return visitor.finish(),
- Some((post_ast, frame)) => (post_ast, frame),
- };
- // If this is a concat/alternate, then we might have additional
- // inductive steps to process.
- if let Some(x) = self.pop(frame) {
- match x {
- Frame::Alternation { .. } => {
- visitor.visit_alternation_in()?;
- }
- Frame::Concat { .. } => {
- visitor.visit_concat_in()?;
- }
- _ => {}
- }
- ast = x.child();
- self.stack.push((post_ast, x));
- break;
- }
- // Otherwise, we've finished visiting all the child nodes for
- // this AST, so we can post visit it now.
- visitor.visit_post(post_ast)?;
- }
- }
- }
-
- /// Build a stack frame for the given AST if one is needed (which occurs if
- /// and only if there are child nodes in the AST). Otherwise, return None.
- ///
- /// If this visits a class, then the underlying visitor implementation may
- /// return an error which will be passed on here.
- fn induct<V: Visitor>(
- &mut self,
- ast: &'a Ast,
- visitor: &mut V,
- ) -> Result<Option<Frame<'a>>, V::Err> {
- Ok(match *ast {
- Ast::ClassBracketed(ref x) => {
- self.visit_class(x, visitor)?;
- None
- }
- Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
- Ast::Group(ref x) => Some(Frame::Group(x)),
- Ast::Concat(ref x) if x.asts.is_empty() => None,
- Ast::Concat(ref x) => {
- Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
- }
- Ast::Alternation(ref x) if x.asts.is_empty() => None,
- Ast::Alternation(ref x) => Some(Frame::Alternation {
- head: &x.asts[0],
- tail: &x.asts[1..],
- }),
- _ => None,
- })
- }
-
- /// Pops the given frame. If the frame has an additional inductive step,
- /// then return it, otherwise return `None`.
- fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
- match induct {
- Frame::Repetition(_) => None,
- Frame::Group(_) => None,
- Frame::Concat { tail, .. } => {
- if tail.is_empty() {
- None
- } else {
- Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
- }
- }
- Frame::Alternation { tail, .. } => {
- if tail.is_empty() {
- None
- } else {
- Some(Frame::Alternation {
- head: &tail[0],
- tail: &tail[1..],
- })
- }
- }
- }
- }
-
- fn visit_class<V: Visitor>(
- &mut self,
- ast: &'a ast::ClassBracketed,
- visitor: &mut V,
- ) -> Result<(), V::Err> {
- let mut ast = ClassInduct::from_bracketed(ast);
- loop {
- self.visit_class_pre(&ast, visitor)?;
- if let Some(x) = self.induct_class(&ast) {
- let child = x.child();
- self.stack_class.push((ast, x));
- ast = child;
- continue;
- }
- self.visit_class_post(&ast, visitor)?;
-
- // At this point, we now try to pop our call stack until it is
- // either empty or we hit another inductive case.
- loop {
- let (post_ast, frame) = match self.stack_class.pop() {
- None => return Ok(()),
- Some((post_ast, frame)) => (post_ast, frame),
- };
- // If this is a union or a binary op, then we might have
- // additional inductive steps to process.
- if let Some(x) = self.pop_class(frame) {
- if let ClassFrame::BinaryRHS { ref op, .. } = x {
- visitor.visit_class_set_binary_op_in(op)?;
- }
- ast = x.child();
- self.stack_class.push((post_ast, x));
- break;
- }
- // Otherwise, we've finished visiting all the child nodes for
- // this class node, so we can post visit it now.
- self.visit_class_post(&post_ast, visitor)?;
- }
- }
- }
-
- /// Call the appropriate `Visitor` methods given an inductive step.
- fn visit_class_pre<V: Visitor>(
- &self,
- ast: &ClassInduct<'a>,
- visitor: &mut V,
- ) -> Result<(), V::Err> {
- match *ast {
- ClassInduct::Item(item) => {
- visitor.visit_class_set_item_pre(item)?;
- }
- ClassInduct::BinaryOp(op) => {
- visitor.visit_class_set_binary_op_pre(op)?;
- }
- }
- Ok(())
- }
-
- /// Call the appropriate `Visitor` methods given an inductive step.
- fn visit_class_post<V: Visitor>(
- &self,
- ast: &ClassInduct<'a>,
- visitor: &mut V,
- ) -> Result<(), V::Err> {
- match *ast {
- ClassInduct::Item(item) => {
- visitor.visit_class_set_item_post(item)?;
- }
- ClassInduct::BinaryOp(op) => {
- visitor.visit_class_set_binary_op_post(op)?;
- }
- }
- Ok(())
- }
-
- /// Build a stack frame for the given class node if one is needed (which
- /// occurs if and only if there are child nodes). Otherwise, return None.
- fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> {
- match *ast {
- ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
- match x.kind {
- ast::ClassSet::Item(ref item) => {
- Some(ClassFrame::Union { head: item, tail: &[] })
- }
- ast::ClassSet::BinaryOp(ref op) => {
- Some(ClassFrame::Binary { op })
- }
- }
- }
- ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
- if x.items.is_empty() {
- None
- } else {
- Some(ClassFrame::Union {
- head: &x.items[0],
- tail: &x.items[1..],
- })
- }
- }
- ClassInduct::BinaryOp(op) => {
- Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs })
- }
- _ => None,
- }
- }
-
- /// Pops the given frame. If the frame has an additional inductive step,
- /// then return it, otherwise return `None`.
- fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
- match induct {
- ClassFrame::Union { tail, .. } => {
- if tail.is_empty() {
- None
- } else {
- Some(ClassFrame::Union {
- head: &tail[0],
- tail: &tail[1..],
- })
- }
- }
- ClassFrame::Binary { .. } => None,
- ClassFrame::BinaryLHS { op, rhs, .. } => {
- Some(ClassFrame::BinaryRHS { op, rhs })
- }
- ClassFrame::BinaryRHS { .. } => None,
- }
- }
-}
-
-impl<'a> Frame<'a> {
- /// Perform the next inductive step on this frame and return the next
- /// child AST node to visit.
- fn child(&self) -> &'a Ast {
- match *self {
- Frame::Repetition(rep) => &rep.ast,
- Frame::Group(group) => &group.ast,
- Frame::Concat { head, .. } => head,
- Frame::Alternation { head, .. } => head,
- }
- }
-}
-
-impl<'a> ClassFrame<'a> {
- /// Perform the next inductive step on this frame and return the next
- /// child class node to visit.
- fn child(&self) -> ClassInduct<'a> {
- match *self {
- ClassFrame::Union { head, .. } => ClassInduct::Item(head),
- ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op),
- ClassFrame::BinaryLHS { ref lhs, .. } => {
- ClassInduct::from_set(lhs)
- }
- ClassFrame::BinaryRHS { ref rhs, .. } => {
- ClassInduct::from_set(rhs)
- }
- }
- }
-}
-
-impl<'a> ClassInduct<'a> {
- fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
- ClassInduct::from_set(&ast.kind)
- }
-
- fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
- match *ast {
- ast::ClassSet::Item(ref item) => ClassInduct::Item(item),
- ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op),
- }
- }
-}
-
-impl<'a> core::fmt::Debug for ClassFrame<'a> {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- let x = match *self {
- ClassFrame::Union { .. } => "Union",
- ClassFrame::Binary { .. } => "Binary",
- ClassFrame::BinaryLHS { .. } => "BinaryLHS",
- ClassFrame::BinaryRHS { .. } => "BinaryRHS",
- };
- write!(f, "{}", x)
- }
-}
-
-impl<'a> core::fmt::Debug for ClassInduct<'a> {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- let x = match *self {
- ClassInduct::Item(it) => match *it {
- ast::ClassSetItem::Empty(_) => "Item(Empty)",
- ast::ClassSetItem::Literal(_) => "Item(Literal)",
- ast::ClassSetItem::Range(_) => "Item(Range)",
- ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
- ast::ClassSetItem::Perl(_) => "Item(Perl)",
- ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
- ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
- ast::ClassSetItem::Union(_) => "Item(Union)",
- },
- ClassInduct::BinaryOp(it) => match it.kind {
- ast::ClassSetBinaryOpKind::Intersection => {
- "BinaryOp(Intersection)"
- }
- ast::ClassSetBinaryOpKind::Difference => {
- "BinaryOp(Difference)"
- }
- ast::ClassSetBinaryOpKind::SymmetricDifference => {
- "BinaryOp(SymmetricDifference)"
- }
- },
- };
- write!(f, "{}", x)
- }
-}
diff --git a/vendor/regex-syntax/src/debug.rs b/vendor/regex-syntax/src/debug.rs
deleted file mode 100644
index a0b051b4..00000000
--- a/vendor/regex-syntax/src/debug.rs
+++ /dev/null
@@ -1,107 +0,0 @@
-/// A type that wraps a single byte with a convenient fmt::Debug impl that
-/// escapes the byte.
-pub(crate) struct Byte(pub(crate) u8);
-
-impl core::fmt::Debug for Byte {
- fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
- // Special case ASCII space. It's too hard to read otherwise, so
- // put quotes around it. I sometimes wonder whether just '\x20' would
- // be better...
- if self.0 == b' ' {
- return write!(f, "' '");
- }
- // 10 bytes is enough to cover any output from ascii::escape_default.
- let mut bytes = [0u8; 10];
- let mut len = 0;
- for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
- // capitalize \xab to \xAB
- if i >= 2 && b'a' <= b && b <= b'f' {
- b -= 32;
- }
- bytes[len] = b;
- len += 1;
- }
- write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
- }
-}
-
-/// A type that provides a human readable debug impl for arbitrary bytes.
-///
-/// This generally works best when the bytes are presumed to be mostly UTF-8,
-/// but will work for anything.
-///
-/// N.B. This is copied nearly verbatim from regex-automata. Sigh.
-pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);
-
-impl<'a> core::fmt::Debug for Bytes<'a> {
- fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
- write!(f, "\"")?;
- // This is a sad re-implementation of a similar impl found in bstr.
- let mut bytes = self.0;
- while let Some(result) = utf8_decode(bytes) {
- let ch = match result {
- Ok(ch) => ch,
- Err(byte) => {
- write!(f, r"\x{:02x}", byte)?;
- bytes = &bytes[1..];
- continue;
- }
- };
- bytes = &bytes[ch.len_utf8()..];
- match ch {
- '\0' => write!(f, "\\0")?,
- // ASCII control characters except \0, \n, \r, \t
- '\x01'..='\x08'
- | '\x0b'
- | '\x0c'
- | '\x0e'..='\x19'
- | '\x7f' => {
- write!(f, "\\x{:02x}", u32::from(ch))?;
- }
- '\n' | '\r' | '\t' | _ => {
- write!(f, "{}", ch.escape_debug())?;
- }
- }
- }
- write!(f, "\"")?;
- Ok(())
- }
-}
-
-/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
-///
-/// If no valid encoding of a codepoint exists at the beginning of the given
-/// byte slice, then the first byte is returned instead.
-///
-/// This returns `None` if and only if `bytes` is empty.
-pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
- fn len(byte: u8) -> Option<usize> {
- if byte <= 0x7F {
- return Some(1);
- } else if byte & 0b1100_0000 == 0b1000_0000 {
- return None;
- } else if byte <= 0b1101_1111 {
- Some(2)
- } else if byte <= 0b1110_1111 {
- Some(3)
- } else if byte <= 0b1111_0111 {
- Some(4)
- } else {
- None
- }
- }
-
- if bytes.is_empty() {
- return None;
- }
- let len = match len(bytes[0]) {
- None => return Some(Err(bytes[0])),
- Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
- Some(1) => return Some(Ok(char::from(bytes[0]))),
- Some(len) => len,
- };
- match core::str::from_utf8(&bytes[..len]) {
- Ok(s) => Some(Ok(s.chars().next().unwrap())),
- Err(_) => Some(Err(bytes[0])),
- }
-}
diff --git a/vendor/regex-syntax/src/either.rs b/vendor/regex-syntax/src/either.rs
deleted file mode 100644
index 7ae41e4c..00000000
--- a/vendor/regex-syntax/src/either.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-/// A simple binary sum type.
-///
-/// This is occasionally useful in an ad hoc fashion.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum Either<Left, Right> {
- Left(Left),
- Right(Right),
-}
diff --git a/vendor/regex-syntax/src/error.rs b/vendor/regex-syntax/src/error.rs
deleted file mode 100644
index 98869c4f..00000000
--- a/vendor/regex-syntax/src/error.rs
+++ /dev/null
@@ -1,311 +0,0 @@
-use alloc::{
- format,
- string::{String, ToString},
- vec,
- vec::Vec,
-};
-
-use crate::{ast, hir};
-
-/// This error type encompasses any error that can be returned by this crate.
-///
-/// This error type is marked as `non_exhaustive`. This means that adding a
-/// new variant is not considered a breaking change.
-#[non_exhaustive]
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum Error {
- /// An error that occurred while translating concrete syntax into abstract
- /// syntax (AST).
- Parse(ast::Error),
- /// An error that occurred while translating abstract syntax into a high
- /// level intermediate representation (HIR).
- Translate(hir::Error),
-}
-
-impl From<ast::Error> for Error {
- fn from(err: ast::Error) -> Error {
- Error::Parse(err)
- }
-}
-
-impl From<hir::Error> for Error {
- fn from(err: hir::Error) -> Error {
- Error::Translate(err)
- }
-}
-
-#[cfg(feature = "std")]
-impl std::error::Error for Error {}
-
-impl core::fmt::Display for Error {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- match *self {
- Error::Parse(ref x) => x.fmt(f),
- Error::Translate(ref x) => x.fmt(f),
- }
- }
-}
-
-/// A helper type for formatting nice error messages.
-///
-/// This type is responsible for reporting regex parse errors in a nice human
-/// readable format. Most of its complexity is from interspersing notational
-/// markers pointing out the position where an error occurred.
-#[derive(Debug)]
-pub struct Formatter<'e, E> {
- /// The original regex pattern in which the error occurred.
- pattern: &'e str,
- /// The error kind. It must impl fmt::Display.
- err: &'e E,
- /// The primary span of the error.
- span: &'e ast::Span,
- /// An auxiliary and optional span, in case the error needs to point to
- /// two locations (e.g., when reporting a duplicate capture group name).
- aux_span: Option<&'e ast::Span>,
-}
-
-impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
- fn from(err: &'e ast::Error) -> Self {
- Formatter {
- pattern: err.pattern(),
- err: err.kind(),
- span: err.span(),
- aux_span: err.auxiliary_span(),
- }
- }
-}
-
-impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
- fn from(err: &'e hir::Error) -> Self {
- Formatter {
- pattern: err.pattern(),
- err: err.kind(),
- span: err.span(),
- aux_span: None,
- }
- }
-}
-
-impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- let spans = Spans::from_formatter(self);
- if self.pattern.contains('\n') {
- let divider = repeat_char('~', 79);
-
- writeln!(f, "regex parse error:")?;
- writeln!(f, "{}", divider)?;
- let notated = spans.notate();
- write!(f, "{}", notated)?;
- writeln!(f, "{}", divider)?;
- // If we have error spans that cover multiple lines, then we just
- // note the line numbers.
- if !spans.multi_line.is_empty() {
- let mut notes = vec![];
- for span in &spans.multi_line {
- notes.push(format!(
- "on line {} (column {}) through line {} (column {})",
- span.start.line,
- span.start.column,
- span.end.line,
- span.end.column - 1
- ));
- }
- writeln!(f, "{}", notes.join("\n"))?;
- }
- write!(f, "error: {}", self.err)?;
- } else {
- writeln!(f, "regex parse error:")?;
- let notated = Spans::from_formatter(self).notate();
- write!(f, "{}", notated)?;
- write!(f, "error: {}", self.err)?;
- }
- Ok(())
- }
-}
-
-/// This type represents an arbitrary number of error spans in a way that makes
-/// it convenient to notate the regex pattern. ("Notate" means "point out
-/// exactly where the error occurred in the regex pattern.")
-///
-/// Technically, we can only ever have two spans given our current error
-/// structure. However, after toiling with a specific algorithm for handling
-/// two spans, it became obvious that an algorithm to handle an arbitrary
-/// number of spans was actually much simpler.
-struct Spans<'p> {
- /// The original regex pattern string.
- pattern: &'p str,
- /// The total width that should be used for line numbers. The width is
- /// used for left padding the line numbers for alignment.
- ///
- /// A value of `0` means line numbers should not be displayed. That is,
- /// the pattern is itself only one line.
- line_number_width: usize,
- /// All error spans that occur on a single line. This sequence always has
- /// length equivalent to the number of lines in `pattern`, where the index
- /// of the sequence represents a line number, starting at `0`. The spans
- /// in each line are sorted in ascending order.
- by_line: Vec<Vec<ast::Span>>,
- /// All error spans that occur over one or more lines. That is, the start
- /// and end position of the span have different line numbers. The spans are
- /// sorted in ascending order.
- multi_line: Vec<ast::Span>,
-}
-
-impl<'p> Spans<'p> {
- /// Build a sequence of spans from a formatter.
- fn from_formatter<'e, E: core::fmt::Display>(
- fmter: &'p Formatter<'e, E>,
- ) -> Spans<'p> {
- let mut line_count = fmter.pattern.lines().count();
- // If the pattern ends with a `\n` literal, then our line count is
- // off by one, since a span can occur immediately after the last `\n`,
- // which is consider to be an additional line.
- if fmter.pattern.ends_with('\n') {
- line_count += 1;
- }
- let line_number_width =
- if line_count <= 1 { 0 } else { line_count.to_string().len() };
- let mut spans = Spans {
- pattern: &fmter.pattern,
- line_number_width,
- by_line: vec![vec![]; line_count],
- multi_line: vec![],
- };
- spans.add(fmter.span.clone());
- if let Some(span) = fmter.aux_span {
- spans.add(span.clone());
- }
- spans
- }
-
- /// Add the given span to this sequence, putting it in the right place.
- fn add(&mut self, span: ast::Span) {
- // This is grossly inefficient since we sort after each add, but right
- // now, we only ever add two spans at most.
- if span.is_one_line() {
- let i = span.start.line - 1; // because lines are 1-indexed
- self.by_line[i].push(span);
- self.by_line[i].sort();
- } else {
- self.multi_line.push(span);
- self.multi_line.sort();
- }
- }
-
- /// Notate the pattern string with carents (`^`) pointing at each span
- /// location. This only applies to spans that occur within a single line.
- fn notate(&self) -> String {
- let mut notated = String::new();
- for (i, line) in self.pattern.lines().enumerate() {
- if self.line_number_width > 0 {
- notated.push_str(&self.left_pad_line_number(i + 1));
- notated.push_str(": ");
- } else {
- notated.push_str(" ");
- }
- notated.push_str(line);
- notated.push('\n');
- if let Some(notes) = self.notate_line(i) {
- notated.push_str(&notes);
- notated.push('\n');
- }
- }
- notated
- }
-
- /// Return notes for the line indexed at `i` (zero-based). If there are no
- /// spans for the given line, then `None` is returned. Otherwise, an
- /// appropriately space padded string with correctly positioned `^` is
- /// returned, accounting for line numbers.
- fn notate_line(&self, i: usize) -> Option<String> {
- let spans = &self.by_line[i];
- if spans.is_empty() {
- return None;
- }
- let mut notes = String::new();
- for _ in 0..self.line_number_padding() {
- notes.push(' ');
- }
- let mut pos = 0;
- for span in spans {
- for _ in pos..(span.start.column - 1) {
- notes.push(' ');
- pos += 1;
- }
- let note_len = span.end.column.saturating_sub(span.start.column);
- for _ in 0..core::cmp::max(1, note_len) {
- notes.push('^');
- pos += 1;
- }
- }
- Some(notes)
- }
-
- /// Left pad the given line number with spaces such that it is aligned with
- /// other line numbers.
- fn left_pad_line_number(&self, n: usize) -> String {
- let n = n.to_string();
- let pad = self.line_number_width.checked_sub(n.len()).unwrap();
- let mut result = repeat_char(' ', pad);
- result.push_str(&n);
- result
- }
-
- /// Return the line number padding beginning at the start of each line of
- /// the pattern.
- ///
- /// If the pattern is only one line, then this returns a fixed padding
- /// for visual indentation.
- fn line_number_padding(&self) -> usize {
- if self.line_number_width == 0 {
- 4
- } else {
- 2 + self.line_number_width
- }
- }
-}
-
-fn repeat_char(c: char, count: usize) -> String {
- core::iter::repeat(c).take(count).collect()
-}
-
-#[cfg(test)]
-mod tests {
- use alloc::string::ToString;
-
- use crate::ast::parse::Parser;
-
- fn assert_panic_message(pattern: &str, expected_msg: &str) {
- let result = Parser::new().parse(pattern);
- match result {
- Ok(_) => {
- panic!("regex should not have parsed");
- }
- Err(err) => {
- assert_eq!(err.to_string(), expected_msg.trim());
- }
- }
- }
-
- // See: https://github.com/rust-lang/regex/issues/464
- #[test]
- fn regression_464() {
- let err = Parser::new().parse("a{\n").unwrap_err();
- // This test checks that the error formatter doesn't panic.
- assert!(!err.to_string().is_empty());
- }
-
- // See: https://github.com/rust-lang/regex/issues/545
- #[test]
- fn repetition_quantifier_expects_a_valid_decimal() {
- assert_panic_message(
- r"\\u{[^}]*}",
- r#"
-regex parse error:
- \\u{[^}]*}
- ^
-error: repetition quantifier expects a valid decimal
-"#,
- );
- }
-}
diff --git a/vendor/regex-syntax/src/hir/interval.rs b/vendor/regex-syntax/src/hir/interval.rs
deleted file mode 100644
index d507ee72..00000000
--- a/vendor/regex-syntax/src/hir/interval.rs
+++ /dev/null
@@ -1,564 +0,0 @@
-use core::{char, cmp, fmt::Debug, slice};
-
-use alloc::vec::Vec;
-
-use crate::unicode;
-
-// This module contains an *internal* implementation of interval sets.
-//
-// The primary invariant that interval sets guards is canonical ordering. That
-// is, every interval set contains an ordered sequence of intervals where
-// no two intervals are overlapping or adjacent. While this invariant is
-// occasionally broken within the implementation, it should be impossible for
-// callers to observe it.
-//
-// Since case folding (as implemented below) breaks that invariant, we roll
-// that into this API even though it is a little out of place in an otherwise
-// generic interval set. (Hence the reason why the `unicode` module is imported
-// here.)
-//
-// Some of the implementation complexity here is a result of me wanting to
-// preserve the sequential representation without using additional memory.
-// In many cases, we do use linear extra memory, but it is at most 2x and it
-// is amortized. If we relaxed the memory requirements, this implementation
-// could become much simpler. The extra memory is honestly probably OK, but
-// character classes (especially of the Unicode variety) can become quite
-// large, and it would be nice to keep regex compilation snappy even in debug
-// builds. (In the past, I have been careless with this area of code and it has
-// caused slow regex compilations in debug mode, so this isn't entirely
-// unwarranted.)
-//
-// Tests on this are relegated to the public API of HIR in src/hir.rs.
-
-#[derive(Clone, Debug)]
-pub struct IntervalSet<I> {
- /// A sorted set of non-overlapping ranges.
- ranges: Vec<I>,
- /// While not required at all for correctness, we keep track of whether an
- /// interval set has been case folded or not. This helps us avoid doing
- /// redundant work if, for example, a set has already been cased folded.
- /// And note that whether a set is folded or not is preserved through
- /// all of the pairwise set operations. That is, if both interval sets
- /// have been case folded, then any of difference, union, intersection or
- /// symmetric difference all produce a case folded set.
- ///
- /// Note that when this is true, it *must* be the case that the set is case
- /// folded. But when it's false, the set *may* be case folded. In other
- /// words, we only set this to true when we know it to be case, but we're
- /// okay with it being false if it would otherwise be costly to determine
- /// whether it should be true. This means code cannot assume that a false
- /// value necessarily indicates that the set is not case folded.
- ///
- /// Bottom line: this is a performance optimization.
- folded: bool,
-}
-
-impl<I: Interval> Eq for IntervalSet<I> {}
-
-// We implement PartialEq manually so that we don't consider the set's internal
-// 'folded' property to be part of its identity. The 'folded' property is
-// strictly an optimization.
-impl<I: Interval> PartialEq for IntervalSet<I> {
- fn eq(&self, other: &IntervalSet<I>) -> bool {
- self.ranges.eq(&other.ranges)
- }
-}
-
-impl<I: Interval> IntervalSet<I> {
- /// Create a new set from a sequence of intervals. Each interval is
- /// specified as a pair of bounds, where both bounds are inclusive.
- ///
- /// The given ranges do not need to be in any specific order, and ranges
- /// may overlap.
- pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> {
- let ranges: Vec<I> = intervals.into_iter().collect();
- // An empty set is case folded.
- let folded = ranges.is_empty();
- let mut set = IntervalSet { ranges, folded };
- set.canonicalize();
- set
- }
-
- /// Add a new interval to this set.
- pub fn push(&mut self, interval: I) {
- // TODO: This could be faster. e.g., Push the interval such that
- // it preserves canonicalization.
- self.ranges.push(interval);
- self.canonicalize();
- // We don't know whether the new interval added here is considered
- // case folded, so we conservatively assume that the entire set is
- // no longer case folded if it was previously.
- self.folded = false;
- }
-
- /// Return an iterator over all intervals in this set.
- ///
- /// The iterator yields intervals in ascending order.
- pub fn iter(&self) -> IntervalSetIter<'_, I> {
- IntervalSetIter(self.ranges.iter())
- }
-
- /// Return an immutable slice of intervals in this set.
- ///
- /// The sequence returned is in canonical ordering.
- pub fn intervals(&self) -> &[I] {
- &self.ranges
- }
-
- /// Expand this interval set such that it contains all case folded
- /// characters. For example, if this class consists of the range `a-z`,
- /// then applying case folding will result in the class containing both the
- /// ranges `a-z` and `A-Z`.
- ///
- /// This returns an error if the necessary case mapping data is not
- /// available.
- pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> {
- if self.folded {
- return Ok(());
- }
- let len = self.ranges.len();
- for i in 0..len {
- let range = self.ranges[i];
- if let Err(err) = range.case_fold_simple(&mut self.ranges) {
- self.canonicalize();
- return Err(err);
- }
- }
- self.canonicalize();
- self.folded = true;
- Ok(())
- }
-
- /// Union this set with the given set, in place.
- pub fn union(&mut self, other: &IntervalSet<I>) {
- if other.ranges.is_empty() || self.ranges == other.ranges {
- return;
- }
- // This could almost certainly be done more efficiently.
- self.ranges.extend(&other.ranges);
- self.canonicalize();
- self.folded = self.folded && other.folded;
- }
-
- /// Intersect this set with the given set, in place.
- pub fn intersect(&mut self, other: &IntervalSet<I>) {
- if self.ranges.is_empty() {
- return;
- }
- if other.ranges.is_empty() {
- self.ranges.clear();
- // An empty set is case folded.
- self.folded = true;
- return;
- }
-
- // There should be a way to do this in-place with constant memory,
- // but I couldn't figure out a simple way to do it. So just append
- // the intersection to the end of this range, and then drain it before
- // we're done.
- let drain_end = self.ranges.len();
-
- let mut ita = 0..drain_end;
- let mut itb = 0..other.ranges.len();
- let mut a = ita.next().unwrap();
- let mut b = itb.next().unwrap();
- loop {
- if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) {
- self.ranges.push(ab);
- }
- let (it, aorb) =
- if self.ranges[a].upper() < other.ranges[b].upper() {
- (&mut ita, &mut a)
- } else {
- (&mut itb, &mut b)
- };
- match it.next() {
- Some(v) => *aorb = v,
- None => break,
- }
- }
- self.ranges.drain(..drain_end);
- self.folded = self.folded && other.folded;
- }
-
- /// Subtract the given set from this set, in place.
- pub fn difference(&mut self, other: &IntervalSet<I>) {
- if self.ranges.is_empty() || other.ranges.is_empty() {
- return;
- }
-
- // This algorithm is (to me) surprisingly complex. A search of the
- // interwebs indicate that this is a potentially interesting problem.
- // Folks seem to suggest interval or segment trees, but I'd like to
- // avoid the overhead (both runtime and conceptual) of that.
- //
- // The following is basically my Shitty First Draft. Therefore, in
- // order to grok it, you probably need to read each line carefully.
- // Simplifications are most welcome!
- //
- // Remember, we can assume the canonical format invariant here, which
- // says that all ranges are sorted, not overlapping and not adjacent in
- // each class.
- let drain_end = self.ranges.len();
- let (mut a, mut b) = (0, 0);
- 'LOOP: while a < drain_end && b < other.ranges.len() {
- // Basically, the easy cases are when neither range overlaps with
- // each other. If the `b` range is less than our current `a`
- // range, then we can skip it and move on.
- if other.ranges[b].upper() < self.ranges[a].lower() {
- b += 1;
- continue;
- }
- // ... similarly for the `a` range. If it's less than the smallest
- // `b` range, then we can add it as-is.
- if self.ranges[a].upper() < other.ranges[b].lower() {
- let range = self.ranges[a];
- self.ranges.push(range);
- a += 1;
- continue;
- }
- // Otherwise, we have overlapping ranges.
- assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b]));
-
- // This part is tricky and was non-obvious to me without looking
- // at explicit examples (see the tests). The trickiness stems from
- // two things: 1) subtracting a range from another range could
- // yield two ranges and 2) after subtracting a range, it's possible
- // that future ranges can have an impact. The loop below advances
- // the `b` ranges until they can't possible impact the current
- // range.
- //
- // For example, if our `a` range is `a-t` and our next three `b`
- // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply
- // subtraction three times before moving on to the next `a` range.
- let mut range = self.ranges[a];
- while b < other.ranges.len()
- && !range.is_intersection_empty(&other.ranges[b])
- {
- let old_range = range;
- range = match range.difference(&other.ranges[b]) {
- (None, None) => {
- // We lost the entire range, so move on to the next
- // without adding this one.
- a += 1;
- continue 'LOOP;
- }
- (Some(range1), None) | (None, Some(range1)) => range1,
- (Some(range1), Some(range2)) => {
- self.ranges.push(range1);
- range2
- }
- };
- // It's possible that the `b` range has more to contribute
- // here. In particular, if it is greater than the original
- // range, then it might impact the next `a` range *and* it
- // has impacted the current `a` range as much as possible,
- // so we can quit. We don't bump `b` so that the next `a`
- // range can apply it.
- if other.ranges[b].upper() > old_range.upper() {
- break;
- }
- // Otherwise, the next `b` range might apply to the current
- // `a` range.
- b += 1;
- }
- self.ranges.push(range);
- a += 1;
- }
- while a < drain_end {
- let range = self.ranges[a];
- self.ranges.push(range);
- a += 1;
- }
- self.ranges.drain(..drain_end);
- self.folded = self.folded && other.folded;
- }
-
- /// Compute the symmetric difference of the two sets, in place.
- ///
- /// This computes the symmetric difference of two interval sets. This
- /// removes all elements in this set that are also in the given set,
- /// but also adds all elements from the given set that aren't in this
- /// set. That is, the set will contain all elements in either set,
- /// but will not contain any elements that are in both sets.
- pub fn symmetric_difference(&mut self, other: &IntervalSet<I>) {
- // TODO(burntsushi): Fix this so that it amortizes allocation.
- let mut intersection = self.clone();
- intersection.intersect(other);
- self.union(other);
- self.difference(&intersection);
- }
-
- /// Negate this interval set.
- ///
- /// For all `x` where `x` is any element, if `x` was in this set, then it
- /// will not be in this set after negation.
- pub fn negate(&mut self) {
- if self.ranges.is_empty() {
- let (min, max) = (I::Bound::min_value(), I::Bound::max_value());
- self.ranges.push(I::create(min, max));
- // The set containing everything must case folded.
- self.folded = true;
- return;
- }
-
- // There should be a way to do this in-place with constant memory,
- // but I couldn't figure out a simple way to do it. So just append
- // the negation to the end of this range, and then drain it before
- // we're done.
- let drain_end = self.ranges.len();
-
- // We do checked arithmetic below because of the canonical ordering
- // invariant.
- if self.ranges[0].lower() > I::Bound::min_value() {
- let upper = self.ranges[0].lower().decrement();
- self.ranges.push(I::create(I::Bound::min_value(), upper));
- }
- for i in 1..drain_end {
- let lower = self.ranges[i - 1].upper().increment();
- let upper = self.ranges[i].lower().decrement();
- self.ranges.push(I::create(lower, upper));
- }
- if self.ranges[drain_end - 1].upper() < I::Bound::max_value() {
- let lower = self.ranges[drain_end - 1].upper().increment();
- self.ranges.push(I::create(lower, I::Bound::max_value()));
- }
- self.ranges.drain(..drain_end);
- // We don't need to update whether this set is folded or not, because
- // it is conservatively preserved through negation. Namely, if a set
- // is not folded, then it is possible that its negation is folded, for
- // example, [^☃]. But we're fine with assuming that the set is not
- // folded in that case. (`folded` permits false negatives but not false
- // positives.)
- //
- // But what about when a set is folded, is its negation also
- // necessarily folded? Yes. Because if a set is folded, then for every
- // character in the set, it necessarily included its equivalence class
- // of case folded characters. Negating it in turn means that all
- // equivalence classes in the set are negated, and any equivalence
- // class that was previously not in the set is now entirely in the set.
- }
-
- /// Converts this set into a canonical ordering.
- fn canonicalize(&mut self) {
- if self.is_canonical() {
- return;
- }
- self.ranges.sort();
- assert!(!self.ranges.is_empty());
-
- // Is there a way to do this in-place with constant memory? I couldn't
- // figure out a way to do it. So just append the canonicalization to
- // the end of this range, and then drain it before we're done.
- let drain_end = self.ranges.len();
- for oldi in 0..drain_end {
- // If we've added at least one new range, then check if we can
- // merge this range in the previously added range.
- if self.ranges.len() > drain_end {
- let (last, rest) = self.ranges.split_last_mut().unwrap();
- if let Some(union) = last.union(&rest[oldi]) {
- *last = union;
- continue;
- }
- }
- let range = self.ranges[oldi];
- self.ranges.push(range);
- }
- self.ranges.drain(..drain_end);
- }
-
- /// Returns true if and only if this class is in a canonical ordering.
- fn is_canonical(&self) -> bool {
- for pair in self.ranges.windows(2) {
- if pair[0] >= pair[1] {
- return false;
- }
- if pair[0].is_contiguous(&pair[1]) {
- return false;
- }
- }
- true
- }
-}
-
-/// An iterator over intervals.
-#[derive(Debug)]
-pub struct IntervalSetIter<'a, I>(slice::Iter<'a, I>);
-
-impl<'a, I> Iterator for IntervalSetIter<'a, I> {
- type Item = &'a I;
-
- fn next(&mut self) -> Option<&'a I> {
- self.0.next()
- }
-}
-
-pub trait Interval:
- Clone + Copy + Debug + Default + Eq + PartialEq + PartialOrd + Ord
-{
- type Bound: Bound;
-
- fn lower(&self) -> Self::Bound;
- fn upper(&self) -> Self::Bound;
- fn set_lower(&mut self, bound: Self::Bound);
- fn set_upper(&mut self, bound: Self::Bound);
- fn case_fold_simple(
- &self,
- intervals: &mut Vec<Self>,
- ) -> Result<(), unicode::CaseFoldError>;
-
- /// Create a new interval.
- fn create(lower: Self::Bound, upper: Self::Bound) -> Self {
- let mut int = Self::default();
- if lower <= upper {
- int.set_lower(lower);
- int.set_upper(upper);
- } else {
- int.set_lower(upper);
- int.set_upper(lower);
- }
- int
- }
-
- /// Union the given overlapping range into this range.
- ///
- /// If the two ranges aren't contiguous, then this returns `None`.
- fn union(&self, other: &Self) -> Option<Self> {
- if !self.is_contiguous(other) {
- return None;
- }
- let lower = cmp::min(self.lower(), other.lower());
- let upper = cmp::max(self.upper(), other.upper());
- Some(Self::create(lower, upper))
- }
-
- /// Intersect this range with the given range and return the result.
- ///
- /// If the intersection is empty, then this returns `None`.
- fn intersect(&self, other: &Self) -> Option<Self> {
- let lower = cmp::max(self.lower(), other.lower());
- let upper = cmp::min(self.upper(), other.upper());
- if lower <= upper {
- Some(Self::create(lower, upper))
- } else {
- None
- }
- }
-
- /// Subtract the given range from this range and return the resulting
- /// ranges.
- ///
- /// If subtraction would result in an empty range, then no ranges are
- /// returned.
- fn difference(&self, other: &Self) -> (Option<Self>, Option<Self>) {
- if self.is_subset(other) {
- return (None, None);
- }
- if self.is_intersection_empty(other) {
- return (Some(self.clone()), None);
- }
- let add_lower = other.lower() > self.lower();
- let add_upper = other.upper() < self.upper();
- // We know this because !self.is_subset(other) and the ranges have
- // a non-empty intersection.
- assert!(add_lower || add_upper);
- let mut ret = (None, None);
- if add_lower {
- let upper = other.lower().decrement();
- ret.0 = Some(Self::create(self.lower(), upper));
- }
- if add_upper {
- let lower = other.upper().increment();
- let range = Self::create(lower, self.upper());
- if ret.0.is_none() {
- ret.0 = Some(range);
- } else {
- ret.1 = Some(range);
- }
- }
- ret
- }
-
- /// Returns true if and only if the two ranges are contiguous. Two ranges
- /// are contiguous if and only if the ranges are either overlapping or
- /// adjacent.
- fn is_contiguous(&self, other: &Self) -> bool {
- let lower1 = self.lower().as_u32();
- let upper1 = self.upper().as_u32();
- let lower2 = other.lower().as_u32();
- let upper2 = other.upper().as_u32();
- cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1)
- }
-
- /// Returns true if and only if the intersection of this range and the
- /// other range is empty.
- fn is_intersection_empty(&self, other: &Self) -> bool {
- let (lower1, upper1) = (self.lower(), self.upper());
- let (lower2, upper2) = (other.lower(), other.upper());
- cmp::max(lower1, lower2) > cmp::min(upper1, upper2)
- }
-
- /// Returns true if and only if this range is a subset of the other range.
- fn is_subset(&self, other: &Self) -> bool {
- let (lower1, upper1) = (self.lower(), self.upper());
- let (lower2, upper2) = (other.lower(), other.upper());
- (lower2 <= lower1 && lower1 <= upper2)
- && (lower2 <= upper1 && upper1 <= upper2)
- }
-}
-
-pub trait Bound:
- Copy + Clone + Debug + Eq + PartialEq + PartialOrd + Ord
-{
- fn min_value() -> Self;
- fn max_value() -> Self;
- fn as_u32(self) -> u32;
- fn increment(self) -> Self;
- fn decrement(self) -> Self;
-}
-
-impl Bound for u8 {
- fn min_value() -> Self {
- u8::MIN
- }
- fn max_value() -> Self {
- u8::MAX
- }
- fn as_u32(self) -> u32 {
- u32::from(self)
- }
- fn increment(self) -> Self {
- self.checked_add(1).unwrap()
- }
- fn decrement(self) -> Self {
- self.checked_sub(1).unwrap()
- }
-}
-
-impl Bound for char {
- fn min_value() -> Self {
- '\x00'
- }
- fn max_value() -> Self {
- '\u{10FFFF}'
- }
- fn as_u32(self) -> u32 {
- u32::from(self)
- }
-
- fn increment(self) -> Self {
- match self {
- '\u{D7FF}' => '\u{E000}',
- c => char::from_u32(u32::from(c).checked_add(1).unwrap()).unwrap(),
- }
- }
-
- fn decrement(self) -> Self {
- match self {
- '\u{E000}' => '\u{D7FF}',
- c => char::from_u32(u32::from(c).checked_sub(1).unwrap()).unwrap(),
- }
- }
-}
-
-// Tests for interval sets are written in src/hir.rs against the public API.
diff --git a/vendor/regex-syntax/src/hir/literal.rs b/vendor/regex-syntax/src/hir/literal.rs
deleted file mode 100644
index a5a3737f..00000000
--- a/vendor/regex-syntax/src/hir/literal.rs
+++ /dev/null
@@ -1,3214 +0,0 @@
-/*!
-Provides literal extraction from `Hir` expressions.
-
-An [`Extractor`] pulls literals out of [`Hir`] expressions and returns a
-[`Seq`] of [`Literal`]s.
-
-The purpose of literal extraction is generally to provide avenues for
-optimizing regex searches. The main idea is that substring searches can be an
-order of magnitude faster than a regex search. Therefore, if one can execute
-a substring search to find candidate match locations and only run the regex
-search at those locations, then it is possible for huge improvements in
-performance to be realized.
-
-With that said, literal optimizations are generally a black art because even
-though substring search is generally faster, if the number of candidates
-produced is high, then it can create a lot of overhead by ping-ponging between
-the substring search and the regex search.
-
-Here are some heuristics that might be used to help increase the chances of
-effective literal optimizations:
-
-* Stick to small [`Seq`]s. If you search for too many literals, it's likely
-to lead to substring search that is only a little faster than a regex search,
-and thus the overhead of using literal optimizations in the first place might
-make things slower overall.
-* The literals in your [`Seq`] shouldn't be too short. In general, longer is
-better. A sequence corresponding to single bytes that occur frequently in the
-haystack, for example, is probably a bad literal optimization because it's
-likely to produce many false positive candidates. Longer literals are less
-likely to match, and thus probably produce fewer false positives.
-* If it's possible to estimate the approximate frequency of each byte according
-to some pre-computed background distribution, it is possible to compute a score
-of how "good" a `Seq` is. If a `Seq` isn't good enough, you might consider
-skipping the literal optimization and just use the regex engine.
-
-(It should be noted that there are always pathological cases that can make
-any kind of literal optimization be a net slower result. This is why it
-might be a good idea to be conservative, or to even provide a means for
-literal optimizations to be dynamically disabled if they are determined to be
-ineffective according to some measure.)
-
-You're encouraged to explore the methods on [`Seq`], which permit shrinking
-the size of sequences in a preference-order preserving fashion.
-
-Finally, note that it isn't strictly necessary to use an [`Extractor`]. Namely,
-an `Extractor` only uses public APIs of the [`Seq`] and [`Literal`] types,
-so it is possible to implement your own extractor. For example, for n-grams
-or "inner" literals (i.e., not prefix or suffix literals). The `Extractor`
-is mostly responsible for the case analysis over `Hir` expressions. Much of
-the "trickier" parts are how to combine literal sequences, and that is all
-implemented on [`Seq`].
-*/
-
-use core::{cmp, mem, num::NonZeroUsize};
-
-use alloc::{vec, vec::Vec};
-
-use crate::hir::{self, Hir};
-
-/// Extracts prefix or suffix literal sequences from [`Hir`] expressions.
-///
-/// Literal extraction is based on the following observations:
-///
-/// * Many regexes start with one or a small number of literals.
-/// * Substring search for literals is often much faster (sometimes by an order
-/// of magnitude) than a regex search.
-///
-/// Thus, in many cases, one can search for literals to find candidate starting
-/// locations of a match, and then only run the full regex engine at each such
-/// location instead of over the full haystack.
-///
-/// The main downside of literal extraction is that it can wind up causing a
-/// search to be slower overall. For example, if there are many matches or if
-/// there are many candidates that don't ultimately lead to a match, then a
-/// lot of overhead will be spent in shuffing back-and-forth between substring
-/// search and the regex engine. This is the fundamental reason why literal
-/// optimizations for regex patterns is sometimes considered a "black art."
-///
-/// # Look-around assertions
-///
-/// Literal extraction treats all look-around assertions as-if they match every
-/// empty string. So for example, the regex `\bquux\b` will yield a sequence
-/// containing a single exact literal `quux`. However, not all occurrences
-/// of `quux` correspond to a match a of the regex. For example, `\bquux\b`
-/// does not match `ZquuxZ` anywhere because `quux` does not fall on a word
-/// boundary.
-///
-/// In effect, if your regex contains look-around assertions, then a match of
-/// an exact literal does not necessarily mean the regex overall matches. So
-/// you may still need to run the regex engine in such cases to confirm the
-/// match.
-///
-/// The precise guarantee you get from a literal sequence is: if every literal
-/// in the sequence is exact and the original regex contains zero look-around
-/// assertions, then a preference-order multi-substring search of those
-/// literals will precisely match a preference-order search of the original
-/// regex.
-///
-/// # Example
-///
-/// This shows how to extract prefixes:
-///
-/// ```
-/// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
-///
-/// let hir = parse(r"(a|b|c)(x|y|z)[A-Z]+foo")?;
-/// let got = Extractor::new().extract(&hir);
-/// // All literals returned are "inexact" because none of them reach the
-/// // match state.
-/// let expected = Seq::from_iter([
-/// Literal::inexact("ax"),
-/// Literal::inexact("ay"),
-/// Literal::inexact("az"),
-/// Literal::inexact("bx"),
-/// Literal::inexact("by"),
-/// Literal::inexact("bz"),
-/// Literal::inexact("cx"),
-/// Literal::inexact("cy"),
-/// Literal::inexact("cz"),
-/// ]);
-/// assert_eq!(expected, got);
-///
-/// # Ok::<(), Box<dyn std::error::Error>>(())
-/// ```
-///
-/// This shows how to extract suffixes:
-///
-/// ```
-/// use regex_syntax::{
-/// hir::literal::{Extractor, ExtractKind, Literal, Seq},
-/// parse,
-/// };
-///
-/// let hir = parse(r"foo|[A-Z]+bar")?;
-/// let got = Extractor::new().kind(ExtractKind::Suffix).extract(&hir);
-/// // Since 'foo' gets to a match state, it is considered exact. But 'bar'
-/// // does not because of the '[A-Z]+', and thus is marked inexact.
-/// let expected = Seq::from_iter([
-/// Literal::exact("foo"),
-/// Literal::inexact("bar"),
-/// ]);
-/// assert_eq!(expected, got);
-///
-/// # Ok::<(), Box<dyn std::error::Error>>(())
-/// ```
-#[derive(Clone, Debug)]
-pub struct Extractor {
- kind: ExtractKind,
- limit_class: usize,
- limit_repeat: usize,
- limit_literal_len: usize,
- limit_total: usize,
-}
-
-impl Extractor {
- /// Create a new extractor with a default configuration.
- ///
- /// The extractor can be optionally configured before calling
- /// [`Extractor::extract`] to get a literal sequence.
- pub fn new() -> Extractor {
- Extractor {
- kind: ExtractKind::Prefix,
- limit_class: 10,
- limit_repeat: 10,
- limit_literal_len: 100,
- limit_total: 250,
- }
- }
-
- /// Execute the extractor and return a sequence of literals.
- pub fn extract(&self, hir: &Hir) -> Seq {
- use crate::hir::HirKind::*;
-
- match *hir.kind() {
- Empty | Look(_) => Seq::singleton(self::Literal::exact(vec![])),
- Literal(hir::Literal(ref bytes)) => {
- let mut seq =
- Seq::singleton(self::Literal::exact(bytes.to_vec()));
- self.enforce_literal_len(&mut seq);
- seq
- }
- Class(hir::Class::Unicode(ref cls)) => {
- self.extract_class_unicode(cls)
- }
- Class(hir::Class::Bytes(ref cls)) => self.extract_class_bytes(cls),
- Repetition(ref rep) => self.extract_repetition(rep),
- Capture(hir::Capture { ref sub, .. }) => self.extract(sub),
- Concat(ref hirs) => match self.kind {
- ExtractKind::Prefix => self.extract_concat(hirs.iter()),
- ExtractKind::Suffix => self.extract_concat(hirs.iter().rev()),
- },
- Alternation(ref hirs) => {
- // Unlike concat, we always union starting from the beginning,
- // since the beginning corresponds to the highest preference,
- // which doesn't change based on forwards vs reverse.
- self.extract_alternation(hirs.iter())
- }
- }
- }
-
- /// Set the kind of literal sequence to extract from an [`Hir`] expression.
- ///
- /// The default is to extract prefixes, but suffixes can be selected
- /// instead. The contract for prefixes is that every match of the
- /// corresponding `Hir` must start with one of the literals in the sequence
- /// returned. Moreover, the _order_ of the sequence returned corresponds to
- /// the preference order.
- ///
- /// Suffixes satisfy a similar contract in that every match of the
- /// corresponding `Hir` must end with one of the literals in the sequence
- /// returned. However, there is no guarantee that the literals are in
- /// preference order.
- ///
- /// Remember that a sequence can be infinite. For example, unless the
- /// limits are configured to be impractically large, attempting to extract
- /// prefixes (or suffixes) for the pattern `[A-Z]` will return an infinite
- /// sequence. Generally speaking, if the sequence returned is infinite,
- /// then it is presumed to be unwise to do prefix (or suffix) optimizations
- /// for the pattern.
- pub fn kind(&mut self, kind: ExtractKind) -> &mut Extractor {
- self.kind = kind;
- self
- }
-
- /// Configure a limit on the length of the sequence that is permitted for
- /// a character class. If a character class exceeds this limit, then the
- /// sequence returned for it is infinite.
- ///
- /// This prevents classes like `[A-Z]` or `\pL` from getting turned into
- /// huge and likely unproductive sequences of literals.
- ///
- /// # Example
- ///
- /// This example shows how this limit can be lowered to decrease the tolerance
- /// for character classes being turned into literal sequences.
- ///
- /// ```
- /// use regex_syntax::{hir::literal::{Extractor, Seq}, parse};
- ///
- /// let hir = parse(r"[0-9]")?;
- ///
- /// let got = Extractor::new().extract(&hir);
- /// let expected = Seq::new([
- /// "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
- /// ]);
- /// assert_eq!(expected, got);
- ///
- /// // Now let's shrink the limit and see how that changes things.
- /// let got = Extractor::new().limit_class(4).extract(&hir);
- /// let expected = Seq::infinite();
- /// assert_eq!(expected, got);
- ///
- /// # Ok::<(), Box<dyn std::error::Error>>(())
- /// ```
- pub fn limit_class(&mut self, limit: usize) -> &mut Extractor {
- self.limit_class = limit;
- self
- }
-
- /// Configure a limit on the total number of repetitions that is permitted
- /// before literal extraction is stopped.
- ///
- /// This is useful for limiting things like `(abcde){50}`, or more
- /// insidiously, `(?:){1000000000}`. This limit prevents any one single
- /// repetition from adding too much to a literal sequence.
- ///
- /// With this limit set, repetitions that exceed it will be stopped and any
- /// literals extracted up to that point will be made inexact.
- ///
- /// # Example
- ///
- /// This shows how to decrease the limit and compares it with the default.
- ///
- /// ```
- /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
- ///
- /// let hir = parse(r"(abc){8}")?;
- ///
- /// let got = Extractor::new().extract(&hir);
- /// let expected = Seq::new(["abcabcabcabcabcabcabcabc"]);
- /// assert_eq!(expected, got);
- ///
- /// // Now let's shrink the limit and see how that changes things.
- /// let got = Extractor::new().limit_repeat(4).extract(&hir);
- /// let expected = Seq::from_iter([
- /// Literal::inexact("abcabcabcabc"),
- /// ]);
- /// assert_eq!(expected, got);
- ///
- /// # Ok::<(), Box<dyn std::error::Error>>(())
- /// ```
- pub fn limit_repeat(&mut self, limit: usize) -> &mut Extractor {
- self.limit_repeat = limit;
- self
- }
-
- /// Configure a limit on the maximum length of any literal in a sequence.
- ///
- /// This is useful for limiting things like `(abcde){5}{5}{5}{5}`. While
- /// each repetition or literal in that regex is small, when all the
- /// repetitions are applied, one ends up with a literal of length `5^4 =
- /// 625`.
- ///
- /// With this limit set, literals that exceed it will be made inexact and
- /// thus prevented from growing.
- ///
- /// # Example
- ///
- /// This shows how to decrease the limit and compares it with the default.
- ///
- /// ```
- /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
- ///
- /// let hir = parse(r"(abc){2}{2}{2}")?;
- ///
- /// let got = Extractor::new().extract(&hir);
- /// let expected = Seq::new(["abcabcabcabcabcabcabcabc"]);
- /// assert_eq!(expected, got);
- ///
- /// // Now let's shrink the limit and see how that changes things.
- /// let got = Extractor::new().limit_literal_len(14).extract(&hir);
- /// let expected = Seq::from_iter([
- /// Literal::inexact("abcabcabcabcab"),
- /// ]);
- /// assert_eq!(expected, got);
- ///
- /// # Ok::<(), Box<dyn std::error::Error>>(())
- /// ```
- pub fn limit_literal_len(&mut self, limit: usize) -> &mut Extractor {
- self.limit_literal_len = limit;
- self
- }
-
- /// Configure a limit on the total number of literals that will be
- /// returned.
- ///
- /// This is useful as a practical measure for avoiding the creation of
- /// large sequences of literals. While the extractor will automatically
- /// handle local creations of large sequences (for example, `[A-Z]` yields
- /// an infinite sequence by default), large sequences can be created
- /// through non-local means as well.
- ///
- /// For example, `[ab]{3}{3}` would yield a sequence of length `512 = 2^9`
- /// despite each of the repetitions being small on their own. This limit
- /// thus represents a "catch all" for avoiding locally small sequences from
- /// combining into large sequences.
- ///
- /// # Example
- ///
- /// This example shows how reducing the limit will change the literal
- /// sequence returned.
- ///
- /// ```
- /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
- ///
- /// let hir = parse(r"[ab]{2}{2}")?;
- ///
- /// let got = Extractor::new().extract(&hir);
- /// let expected = Seq::new([
- /// "aaaa", "aaab", "aaba", "aabb",
- /// "abaa", "abab", "abba", "abbb",
- /// "baaa", "baab", "baba", "babb",
- /// "bbaa", "bbab", "bbba", "bbbb",
- /// ]);
- /// assert_eq!(expected, got);
- ///
- /// // The default limit is not too big, but big enough to extract all
- /// // literals from '[ab]{2}{2}'. If we shrink the limit to less than 16,
- /// // then we'll get a truncated set. Notice that it returns a sequence of
- /// // length 4 even though our limit was 10. This is because the sequence
- /// // is difficult to increase without blowing the limit. Notice also
- /// // that every literal in the sequence is now inexact because they were
- /// // stripped of some suffix.
- /// let got = Extractor::new().limit_total(10).extract(&hir);
- /// let expected = Seq::from_iter([
- /// Literal::inexact("aa"),
- /// Literal::inexact("ab"),
- /// Literal::inexact("ba"),
- /// Literal::inexact("bb"),
- /// ]);
- /// assert_eq!(expected, got);
- ///
- /// # Ok::<(), Box<dyn std::error::Error>>(())
- /// ```
- pub fn limit_total(&mut self, limit: usize) -> &mut Extractor {
- self.limit_total = limit;
- self
- }
-
- /// Extract a sequence from the given concatenation. Sequences from each of
- /// the child HIR expressions are combined via cross product.
- ///
- /// This short circuits once the cross product turns into a sequence
- /// containing only inexact literals.
- fn extract_concat<'a, I: Iterator<Item = &'a Hir>>(&self, it: I) -> Seq {
- let mut seq = Seq::singleton(self::Literal::exact(vec![]));
- for hir in it {
- // If every element in the sequence is inexact, then a cross
- // product will always be a no-op. Thus, there is nothing else we
- // can add to it and can quit early. Note that this also includes
- // infinite sequences.
- if seq.is_inexact() {
- break;
- }
- // Note that 'cross' also dispatches based on whether we're
- // extracting prefixes or suffixes.
- seq = self.cross(seq, &mut self.extract(hir));
- }
- seq
- }
-
- /// Extract a sequence from the given alternation.
- ///
- /// This short circuits once the union turns into an infinite sequence.
- fn extract_alternation<'a, I: Iterator<Item = &'a Hir>>(
- &self,
- it: I,
- ) -> Seq {
- let mut seq = Seq::empty();
- for hir in it {
- // Once our 'seq' is infinite, every subsequent union
- // operation on it will itself always result in an
- // infinite sequence. Thus, it can never change and we can
- // short-circuit.
- if !seq.is_finite() {
- break;
- }
- seq = self.union(seq, &mut self.extract(hir));
- }
- seq
- }
-
- /// Extract a sequence of literals from the given repetition. We do our
- /// best, Some examples:
- ///
- /// 'a*' => [inexact(a), exact("")]
- /// 'a*?' => [exact(""), inexact(a)]
- /// 'a+' => [inexact(a)]
- /// 'a{3}' => [exact(aaa)]
- /// 'a{3,5} => [inexact(aaa)]
- ///
- /// The key here really is making sure we get the 'inexact' vs 'exact'
- /// attributes correct on each of the literals we add. For example, the
- /// fact that 'a*' gives us an inexact 'a' and an exact empty string means
- /// that a regex like 'ab*c' will result in [inexact(ab), exact(ac)]
- /// literals being extracted, which might actually be a better prefilter
- /// than just 'a'.
- fn extract_repetition(&self, rep: &hir::Repetition) -> Seq {
- let mut subseq = self.extract(&rep.sub);
- match *rep {
- hir::Repetition { min: 0, max, greedy, .. } => {
- // When 'max=1', we can retain exactness, since 'a?' is
- // equivalent to 'a|'. Similarly below, 'a??' is equivalent to
- // '|a'.
- if max != Some(1) {
- subseq.make_inexact();
- }
- let mut empty = Seq::singleton(Literal::exact(vec![]));
- if !greedy {
- mem::swap(&mut subseq, &mut empty);
- }
- self.union(subseq, &mut empty)
- }
- hir::Repetition { min, max: Some(max), .. } if min == max => {
- assert!(min > 0); // handled above
- let limit =
- u32::try_from(self.limit_repeat).unwrap_or(u32::MAX);
- let mut seq = Seq::singleton(Literal::exact(vec![]));
- for _ in 0..cmp::min(min, limit) {
- if seq.is_inexact() {
- break;
- }
- seq = self.cross(seq, &mut subseq.clone());
- }
- if usize::try_from(min).is_err() || min > limit {
- seq.make_inexact();
- }
- seq
- }
- hir::Repetition { min, .. } => {
- assert!(min > 0); // handled above
- let limit =
- u32::try_from(self.limit_repeat).unwrap_or(u32::MAX);
- let mut seq = Seq::singleton(Literal::exact(vec![]));
- for _ in 0..cmp::min(min, limit) {
- if seq.is_inexact() {
- break;
- }
- seq = self.cross(seq, &mut subseq.clone());
- }
- seq.make_inexact();
- seq
- }
- }
- }
-
- /// Convert the given Unicode class into a sequence of literals if the
- /// class is small enough. If the class is too big, return an infinite
- /// sequence.
- fn extract_class_unicode(&self, cls: &hir::ClassUnicode) -> Seq {
- if self.class_over_limit_unicode(cls) {
- return Seq::infinite();
- }
- let mut seq = Seq::empty();
- for r in cls.iter() {
- for ch in r.start()..=r.end() {
- seq.push(Literal::from(ch));
- }
- }
- self.enforce_literal_len(&mut seq);
- seq
- }
-
- /// Convert the given byte class into a sequence of literals if the class
- /// is small enough. If the class is too big, return an infinite sequence.
- fn extract_class_bytes(&self, cls: &hir::ClassBytes) -> Seq {
- if self.class_over_limit_bytes(cls) {
- return Seq::infinite();
- }
- let mut seq = Seq::empty();
- for r in cls.iter() {
- for b in r.start()..=r.end() {
- seq.push(Literal::from(b));
- }
- }
- self.enforce_literal_len(&mut seq);
- seq
- }
-
- /// Returns true if the given Unicode class exceeds the configured limits
- /// on this extractor.
- fn class_over_limit_unicode(&self, cls: &hir::ClassUnicode) -> bool {
- let mut count = 0;
- for r in cls.iter() {
- if count > self.limit_class {
- return true;
- }
- count += r.len();
- }
- count > self.limit_class
- }
-
- /// Returns true if the given byte class exceeds the configured limits on
- /// this extractor.
- fn class_over_limit_bytes(&self, cls: &hir::ClassBytes) -> bool {
- let mut count = 0;
- for r in cls.iter() {
- if count > self.limit_class {
- return true;
- }
- count += r.len();
- }
- count > self.limit_class
- }
-
- /// Compute the cross product of the two sequences if the result would be
- /// within configured limits. Otherwise, make `seq2` infinite and cross the
- /// infinite sequence with `seq1`.
- fn cross(&self, mut seq1: Seq, seq2: &mut Seq) -> Seq {
- if seq1.max_cross_len(seq2).map_or(false, |len| len > self.limit_total)
- {
- seq2.make_infinite();
- }
- if let ExtractKind::Suffix = self.kind {
- seq1.cross_reverse(seq2);
- } else {
- seq1.cross_forward(seq2);
- }
- assert!(seq1.len().map_or(true, |x| x <= self.limit_total));
- self.enforce_literal_len(&mut seq1);
- seq1
- }
-
- /// Union the two sequences if the result would be within configured
- /// limits. Otherwise, make `seq2` infinite and union the infinite sequence
- /// with `seq1`.
- fn union(&self, mut seq1: Seq, seq2: &mut Seq) -> Seq {
- if seq1.max_union_len(seq2).map_or(false, |len| len > self.limit_total)
- {
- // We try to trim our literal sequences to see if we can make
- // room for more literals. The idea is that we'd rather trim down
- // literals already in our sequence if it means we can add a few
- // more and retain a finite sequence. Otherwise, we'll union with
- // an infinite sequence and that infects everything and effectively
- // stops literal extraction in its tracks.
- //
- // We do we keep 4 bytes here? Well, it's a bit of an abstraction
- // leakage. Downstream, the literals may wind up getting fed to
- // the Teddy algorithm, which supports searching literals up to
- // length 4. So that's why we pick that number here. Arguably this
- // should be a tuneable parameter, but it seems a little tricky to
- // describe. And I'm still unsure if this is the right way to go
- // about culling literal sequences.
- match self.kind {
- ExtractKind::Prefix => {
- seq1.keep_first_bytes(4);
- seq2.keep_first_bytes(4);
- }
- ExtractKind::Suffix => {
- seq1.keep_last_bytes(4);
- seq2.keep_last_bytes(4);
- }
- }
- seq1.dedup();
- seq2.dedup();
- if seq1
- .max_union_len(seq2)
- .map_or(false, |len| len > self.limit_total)
- {
- seq2.make_infinite();
- }
- }
- seq1.union(seq2);
- assert!(seq1.len().map_or(true, |x| x <= self.limit_total));
- seq1
- }
-
- /// Applies the literal length limit to the given sequence. If none of the
- /// literals in the sequence exceed the limit, then this is a no-op.
- fn enforce_literal_len(&self, seq: &mut Seq) {
- let len = self.limit_literal_len;
- match self.kind {
- ExtractKind::Prefix => seq.keep_first_bytes(len),
- ExtractKind::Suffix => seq.keep_last_bytes(len),
- }
- }
-}
-
-impl Default for Extractor {
- fn default() -> Extractor {
- Extractor::new()
- }
-}
-
-/// The kind of literals to extract from an [`Hir`] expression.
-///
-/// The default extraction kind is `Prefix`.
-#[non_exhaustive]
-#[derive(Clone, Debug)]
-pub enum ExtractKind {
- /// Extracts only prefix literals from a regex.
- Prefix,
- /// Extracts only suffix literals from a regex.
- ///
- /// Note that the sequence returned by suffix literals currently may
- /// not correctly represent leftmost-first or "preference" order match
- /// semantics.
- Suffix,
-}
-
-impl ExtractKind {
- /// Returns true if this kind is the `Prefix` variant.
- pub fn is_prefix(&self) -> bool {
- matches!(*self, ExtractKind::Prefix)
- }
-
- /// Returns true if this kind is the `Suffix` variant.
- pub fn is_suffix(&self) -> bool {
- matches!(*self, ExtractKind::Suffix)
- }
-}
-
-impl Default for ExtractKind {
- fn default() -> ExtractKind {
- ExtractKind::Prefix
- }
-}
-
-/// A sequence of literals.
-///
-/// A `Seq` is very much like a set in that it represents a union of its
-/// members. That is, it corresponds to a set of literals where at least one
-/// must match in order for a particular [`Hir`] expression to match. (Whether
-/// this corresponds to the entire `Hir` expression, a prefix of it or a suffix
-/// of it depends on how the `Seq` was extracted from the `Hir`.)
-///
-/// It is also unlike a set in that multiple identical literals may appear,
-/// and that the order of the literals in the `Seq` matters. For example, if
-/// the sequence is `[sam, samwise]` and leftmost-first matching is used, then
-/// `samwise` can never match and the sequence is equivalent to `[sam]`.
-///
-/// # States of a sequence
-///
-/// A `Seq` has a few different logical states to consider:
-///
-/// * The sequence can represent "any" literal. When this happens, the set does
-/// not have a finite size. The purpose of this state is to inhibit callers
-/// from making assumptions about what literals are required in order to match
-/// a particular [`Hir`] expression. Generally speaking, when a set is in this
-/// state, literal optimizations are inhibited. A good example of a regex that
-/// will cause this sort of set to appear is `[A-Za-z]`. The character class
-/// is just too big (and also too narrow) to be usefully expanded into 52
-/// different literals. (Note that the decision for when a seq should become
-/// infinite is determined by the caller. A seq itself has no hard-coded
-/// limits.)
-/// * The sequence can be empty, in which case, it is an affirmative statement
-/// that there are no literals that can match the corresponding `Hir`.
-/// Consequently, the `Hir` never matches any input. For example, `[a&&b]`.
-/// * The sequence can be non-empty, in which case, at least one of the
-/// literals must match in order for the corresponding `Hir` to match.
-///
-/// # Example
-///
-/// This example shows how literal sequences can be simplified by stripping
-/// suffixes and minimizing while maintaining preference order.
-///
-/// ```
-/// use regex_syntax::hir::literal::{Literal, Seq};
-///
-/// let mut seq = Seq::new(&[
-/// "farm",
-/// "appliance",
-/// "faraway",
-/// "apple",
-/// "fare",
-/// "gap",
-/// "applicant",
-/// "applaud",
-/// ]);
-/// seq.keep_first_bytes(3);
-/// seq.minimize_by_preference();
-/// // Notice that 'far' comes before 'app', which matches the order in the
-/// // original sequence. This guarantees that leftmost-first semantics are
-/// // not altered by simplifying the set.
-/// let expected = Seq::from_iter([
-/// Literal::inexact("far"),
-/// Literal::inexact("app"),
-/// Literal::exact("gap"),
-/// ]);
-/// assert_eq!(expected, seq);
-/// ```
-#[derive(Clone, Eq, PartialEq)]
-pub struct Seq {
- /// The members of this seq.
- ///
- /// When `None`, the seq represents all possible literals. That is, it
- /// prevents one from making assumptions about specific literals in the
- /// seq, and forces one to treat it as if any literal might be in the seq.
- ///
- /// Note that `Some(vec![])` is valid and corresponds to the empty seq of
- /// literals, i.e., a regex that can never match. For example, `[a&&b]`.
- /// It is distinct from `Some(vec![""])`, which corresponds to the seq
- /// containing an empty string, which matches at every position.
- literals: Option<Vec<Literal>>,
-}
-
-impl Seq {
- /// Returns an empty sequence.
- ///
- /// An empty sequence matches zero literals, and thus corresponds to a
- /// regex that itself can never match.
- #[inline]
- pub fn empty() -> Seq {
- Seq { literals: Some(vec![]) }
- }
-
- /// Returns a sequence of literals without a finite size and may contain
- /// any literal.
- ///
- /// A sequence without finite size does not reveal anything about the
- /// characteristics of the literals in its set. There are no fixed prefixes
- /// or suffixes, nor are lower or upper bounds on the length of the literals
- /// in the set known.
- ///
- /// This is useful to represent constructs in a regex that are "too big"
- /// to useful represent as a sequence of literals. For example, `[A-Za-z]`.
- /// When sequences get too big, they lose their discriminating nature and
- /// are more likely to produce false positives, which in turn makes them
- /// less likely to speed up searches.
- ///
- /// More pragmatically, for many regexes, enumerating all possible literals
- /// is itself not possible or might otherwise use too many resources. So
- /// constraining the size of sets during extraction is a practical trade
- /// off to make.
- #[inline]
- pub fn infinite() -> Seq {
- Seq { literals: None }
- }
-
- /// Returns a sequence containing a single literal.
- #[inline]
- pub fn singleton(lit: Literal) -> Seq {
- Seq { literals: Some(vec![lit]) }
- }
-
- /// Returns a sequence of exact literals from the given byte strings.
- #[inline]
- pub fn new<I, B>(it: I) -> Seq
- where
- I: IntoIterator<Item = B>,
- B: AsRef<[u8]>,
- {
- it.into_iter().map(|b| Literal::exact(b.as_ref())).collect()
- }
-
- /// If this is a finite sequence, return its members as a slice of
- /// literals.
- ///
- /// The slice returned may be empty, in which case, there are no literals
- /// that can match this sequence.
- #[inline]
- pub fn literals(&self) -> Option<&[Literal]> {
- self.literals.as_deref()
- }
-
- /// Push a literal to the end of this sequence.
- ///
- /// If this sequence is not finite, then this is a no-op.
- ///
- /// Similarly, if the most recently added item of this sequence is
- /// equivalent to the literal given, then it is not added. This reflects
- /// a `Seq`'s "set like" behavior, and represents a practical trade off.
- /// Namely, there is never any need to have two adjacent and equivalent
- /// literals in the same sequence, _and_ it is easy to detect in some
- /// cases.
- #[inline]
- pub fn push(&mut self, lit: Literal) {
- let lits = match self.literals {
- None => return,
- Some(ref mut lits) => lits,
- };
- if lits.last().map_or(false, |m| m == &lit) {
- return;
- }
- lits.push(lit);
- }
-
- /// Make all of the literals in this sequence inexact.
- ///
- /// This is a no-op if this sequence is not finite.
- #[inline]
- pub fn make_inexact(&mut self) {
- let lits = match self.literals {
- None => return,
- Some(ref mut lits) => lits,
- };
- for lit in lits.iter_mut() {
- lit.make_inexact();
- }
- }
-
- /// Converts this sequence to an infinite sequence.
- ///
- /// This is a no-op if the sequence is already infinite.
- #[inline]
- pub fn make_infinite(&mut self) {
- self.literals = None;
- }
-
- /// Modify this sequence to contain the cross product between it and the
- /// sequence given.
- ///
- /// The cross product only considers literals in this sequence that are
- /// exact. That is, inexact literals are not extended.
- ///
- /// The literals are always drained from `other`, even if none are used.
- /// This permits callers to reuse the sequence allocation elsewhere.
- ///
- /// If this sequence is infinite, then this is a no-op, regardless of what
- /// `other` contains (and in this case, the literals are still drained from
- /// `other`). If `other` is infinite and this sequence is finite, then this
- /// is a no-op, unless this sequence contains a zero-length literal. In
- /// which case, the infiniteness of `other` infects this sequence, and this
- /// sequence is itself made infinite.
- ///
- /// Like [`Seq::union`], this may attempt to deduplicate literals. See
- /// [`Seq::dedup`] for how deduplication deals with exact and inexact
- /// literals.
- ///
- /// # Example
- ///
- /// This example shows basic usage and how exact and inexact literals
- /// interact.
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Literal, Seq};
- ///
- /// let mut seq1 = Seq::from_iter([
- /// Literal::exact("foo"),
- /// Literal::inexact("bar"),
- /// ]);
- /// let mut seq2 = Seq::from_iter([
- /// Literal::inexact("quux"),
- /// Literal::exact("baz"),
- /// ]);
- /// seq1.cross_forward(&mut seq2);
- ///
- /// // The literals are pulled out of seq2.
- /// assert_eq!(Some(0), seq2.len());
- ///
- /// let expected = Seq::from_iter([
- /// Literal::inexact("fooquux"),
- /// Literal::exact("foobaz"),
- /// Literal::inexact("bar"),
- /// ]);
- /// assert_eq!(expected, seq1);
- /// ```
- ///
- /// This example shows the behavior of when `other` is an infinite
- /// sequence.
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Literal, Seq};
- ///
- /// let mut seq1 = Seq::from_iter([
- /// Literal::exact("foo"),
- /// Literal::inexact("bar"),
- /// ]);
- /// let mut seq2 = Seq::infinite();
- /// seq1.cross_forward(&mut seq2);
- ///
- /// // When seq2 is infinite, cross product doesn't add anything, but
- /// // ensures all members of seq1 are inexact.
- /// let expected = Seq::from_iter([
- /// Literal::inexact("foo"),
- /// Literal::inexact("bar"),
- /// ]);
- /// assert_eq!(expected, seq1);
- /// ```
- ///
- /// This example is like the one above, but shows what happens when this
- /// sequence contains an empty string. In this case, an infinite `other`
- /// sequence infects this sequence (because the empty string means that
- /// there are no finite prefixes):
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Literal, Seq};
- ///
- /// let mut seq1 = Seq::from_iter([
- /// Literal::exact("foo"),
- /// Literal::exact(""), // inexact provokes same behavior
- /// Literal::inexact("bar"),
- /// ]);
- /// let mut seq2 = Seq::infinite();
- /// seq1.cross_forward(&mut seq2);
- ///
- /// // seq1 is now infinite!
- /// assert!(!seq1.is_finite());
- /// ```
- ///
- /// This example shows the behavior of this sequence is infinite.
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Literal, Seq};
- ///
- /// let mut seq1 = Seq::infinite();
- /// let mut seq2 = Seq::from_iter([
- /// Literal::exact("foo"),
- /// Literal::inexact("bar"),
- /// ]);
- /// seq1.cross_forward(&mut seq2);
- ///
- /// // seq1 remains unchanged.
- /// assert!(!seq1.is_finite());
- /// // Even though the literals in seq2 weren't used, it was still drained.
- /// assert_eq!(Some(0), seq2.len());
- /// ```
- #[inline]
- pub fn cross_forward(&mut self, other: &mut Seq) {
- let (lits1, lits2) = match self.cross_preamble(other) {
- None => return,
- Some((lits1, lits2)) => (lits1, lits2),
- };
- let newcap = lits1.len().saturating_mul(lits2.len());
- for selflit in mem::replace(lits1, Vec::with_capacity(newcap)) {
- if !selflit.is_exact() {
- lits1.push(selflit);
- continue;
- }
- for otherlit in lits2.iter() {
- let mut newlit = Literal::exact(Vec::with_capacity(
- selflit.len() + otherlit.len(),
- ));
- newlit.extend(&selflit);
- newlit.extend(&otherlit);
- if !otherlit.is_exact() {
- newlit.make_inexact();
- }
- lits1.push(newlit);
- }
- }
- lits2.drain(..);
- self.dedup();
- }
-
- /// Modify this sequence to contain the cross product between it and
- /// the sequence given, where the sequences are treated as suffixes
- /// instead of prefixes. Namely, the sequence `other` is *prepended*
- /// to `self` (as opposed to `other` being *appended* to `self` in
- /// [`Seq::cross_forward`]).
- ///
- /// The cross product only considers literals in this sequence that are
- /// exact. That is, inexact literals are not extended.
- ///
- /// The literals are always drained from `other`, even if none are used.
- /// This permits callers to reuse the sequence allocation elsewhere.
- ///
- /// If this sequence is infinite, then this is a no-op, regardless of what
- /// `other` contains (and in this case, the literals are still drained from
- /// `other`). If `other` is infinite and this sequence is finite, then this
- /// is a no-op, unless this sequence contains a zero-length literal. In
- /// which case, the infiniteness of `other` infects this sequence, and this
- /// sequence is itself made infinite.
- ///
- /// Like [`Seq::union`], this may attempt to deduplicate literals. See
- /// [`Seq::dedup`] for how deduplication deals with exact and inexact
- /// literals.
- ///
- /// # Example
- ///
- /// This example shows basic usage and how exact and inexact literals
- /// interact.
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Literal, Seq};
- ///
- /// let mut seq1 = Seq::from_iter([
- /// Literal::exact("foo"),
- /// Literal::inexact("bar"),
- /// ]);
- /// let mut seq2 = Seq::from_iter([
- /// Literal::inexact("quux"),
- /// Literal::exact("baz"),
- /// ]);
- /// seq1.cross_reverse(&mut seq2);
- ///
- /// // The literals are pulled out of seq2.
- /// assert_eq!(Some(0), seq2.len());
- ///
- /// let expected = Seq::from_iter([
- /// Literal::inexact("quuxfoo"),
- /// Literal::inexact("bar"),
- /// Literal::exact("bazfoo"),
- /// ]);
- /// assert_eq!(expected, seq1);
- /// ```
- ///
- /// This example shows the behavior of when `other` is an infinite
- /// sequence.
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Literal, Seq};
- ///
- /// let mut seq1 = Seq::from_iter([
- /// Literal::exact("foo"),
- /// Literal::inexact("bar"),
- /// ]);
- /// let mut seq2 = Seq::infinite();
- /// seq1.cross_reverse(&mut seq2);
- ///
- /// // When seq2 is infinite, cross product doesn't add anything, but
- /// // ensures all members of seq1 are inexact.
- /// let expected = Seq::from_iter([
- /// Literal::inexact("foo"),
- /// Literal::inexact("bar"),
- /// ]);
- /// assert_eq!(expected, seq1);
- /// ```
- ///
- /// This example is like the one above, but shows what happens when this
- /// sequence contains an empty string. In this case, an infinite `other`
- /// sequence infects this sequence (because the empty string means that
- /// there are no finite suffixes):
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Literal, Seq};
- ///
- /// let mut seq1 = Seq::from_iter([
- /// Literal::exact("foo"),
- /// Literal::exact(""), // inexact provokes same behavior
- /// Literal::inexact("bar"),
- /// ]);
- /// let mut seq2 = Seq::infinite();
- /// seq1.cross_reverse(&mut seq2);
- ///
- /// // seq1 is now infinite!
- /// assert!(!seq1.is_finite());
- /// ```
- ///
- /// This example shows the behavior when this sequence is infinite.
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Literal, Seq};
- ///
- /// let mut seq1 = Seq::infinite();
- /// let mut seq2 = Seq::from_iter([
- /// Literal::exact("foo"),
- /// Literal::inexact("bar"),
- /// ]);
- /// seq1.cross_reverse(&mut seq2);
- ///
- /// // seq1 remains unchanged.
- /// assert!(!seq1.is_finite());
- /// // Even though the literals in seq2 weren't used, it was still drained.
- /// assert_eq!(Some(0), seq2.len());
- /// ```
- #[inline]
- pub fn cross_reverse(&mut self, other: &mut Seq) {
- let (lits1, lits2) = match self.cross_preamble(other) {
- None => return,
- Some((lits1, lits2)) => (lits1, lits2),
- };
- // We basically proceed as we do in 'cross_forward' at this point,
- // except that the outer loop is now 'other' and the inner loop is now
- // 'self'. That's because 'self' corresponds to suffixes and 'other'
- // corresponds to the sequence we want to *prepend* to the suffixes.
- let newcap = lits1.len().saturating_mul(lits2.len());
- let selflits = mem::replace(lits1, Vec::with_capacity(newcap));
- for (i, otherlit) in lits2.drain(..).enumerate() {
- for selflit in selflits.iter() {
- if !selflit.is_exact() {
- // If the suffix isn't exact, then we can't prepend
- // anything to it. However, we still want to keep it. But
- // we only want to keep one of them, to avoid duplication.
- // (The duplication is okay from a correctness perspective,
- // but wasteful.)
- if i == 0 {
- lits1.push(selflit.clone());
- }
- continue;
- }
- let mut newlit = Literal::exact(Vec::with_capacity(
- otherlit.len() + selflit.len(),
- ));
- newlit.extend(&otherlit);
- newlit.extend(&selflit);
- if !otherlit.is_exact() {
- newlit.make_inexact();
- }
- lits1.push(newlit);
- }
- }
- self.dedup();
- }
-
- /// A helper function the corresponds to the subtle preamble for both
- /// `cross_forward` and `cross_reverse`. In effect, it handles the cases
- /// of infinite sequences for both `self` and `other`, as well as ensuring
- /// that literals from `other` are drained even if they aren't used.
- fn cross_preamble<'a>(
- &'a mut self,
- other: &'a mut Seq,
- ) -> Option<(&'a mut Vec<Literal>, &'a mut Vec<Literal>)> {
- let lits2 = match other.literals {
- None => {
- // If our current seq contains the empty string and the seq
- // we're adding matches any literal, then it follows that the
- // current seq must now also match any literal.
- //
- // Otherwise, we just have to make sure everything in this
- // sequence is inexact.
- if self.min_literal_len() == Some(0) {
- *self = Seq::infinite();
- } else {
- self.make_inexact();
- }
- return None;
- }
- Some(ref mut lits) => lits,
- };
- let lits1 = match self.literals {
- None => {
- // If we aren't going to make it to the end of this routine
- // where lits2 is drained, then we need to do it now.
- lits2.drain(..);
- return None;
- }
- Some(ref mut lits) => lits,
- };
- Some((lits1, lits2))
- }
-
- /// Unions the `other` sequence into this one.
- ///
- /// The literals are always drained out of the given `other` sequence,
- /// even if they are being unioned into an infinite sequence. This permits
- /// the caller to reuse the `other` sequence in another context.
- ///
- /// Some literal deduping may be performed. If any deduping happens,
- /// any leftmost-first or "preference" order match semantics will be
- /// preserved.
- ///
- /// # Example
- ///
- /// This example shows basic usage.
- ///
- /// ```
- /// use regex_syntax::hir::literal::Seq;
- ///
- /// let mut seq1 = Seq::new(&["foo", "bar"]);
- /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]);
- /// seq1.union(&mut seq2);
- ///
- /// // The literals are pulled out of seq2.
- /// assert_eq!(Some(0), seq2.len());
- ///
- /// // Adjacent literals are deduped, but non-adjacent literals may not be.
- /// assert_eq!(Seq::new(&["foo", "bar", "quux", "foo"]), seq1);
- /// ```
- ///
- /// This example shows that literals are drained from `other` even when
- /// they aren't necessarily used.
- ///
- /// ```
- /// use regex_syntax::hir::literal::Seq;
- ///
- /// let mut seq1 = Seq::infinite();
- /// // Infinite sequences have no finite length.
- /// assert_eq!(None, seq1.len());
- ///
- /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]);
- /// seq1.union(&mut seq2);
- ///
- /// // seq1 is still infinite and seq2 has been drained.
- /// assert_eq!(None, seq1.len());
- /// assert_eq!(Some(0), seq2.len());
- /// ```
- #[inline]
- pub fn union(&mut self, other: &mut Seq) {
- let lits2 = match other.literals {
- None => {
- // Unioning with an infinite sequence always results in an
- // infinite sequence.
- self.make_infinite();
- return;
- }
- Some(ref mut lits) => lits.drain(..),
- };
- let lits1 = match self.literals {
- None => return,
- Some(ref mut lits) => lits,
- };
- lits1.extend(lits2);
- self.dedup();
- }
-
- /// Unions the `other` sequence into this one by splice the `other`
- /// sequence at the position of the first zero-length literal.
- ///
- /// This is useful for preserving preference order semantics when combining
- /// two literal sequences. For example, in the regex `(a||f)+foo`, the
- /// correct preference order prefix sequence is `[a, foo, f]`.
- ///
- /// The literals are always drained out of the given `other` sequence,
- /// even if they are being unioned into an infinite sequence. This permits
- /// the caller to reuse the `other` sequence in another context. Note that
- /// the literals are drained even if no union is performed as well, i.e.,
- /// when this sequence does not contain a zero-length literal.
- ///
- /// Some literal deduping may be performed. If any deduping happens,
- /// any leftmost-first or "preference" order match semantics will be
- /// preserved.
- ///
- /// # Example
- ///
- /// This example shows basic usage.
- ///
- /// ```
- /// use regex_syntax::hir::literal::Seq;
- ///
- /// let mut seq1 = Seq::new(&["a", "", "f", ""]);
- /// let mut seq2 = Seq::new(&["foo"]);
- /// seq1.union_into_empty(&mut seq2);
- ///
- /// // The literals are pulled out of seq2.
- /// assert_eq!(Some(0), seq2.len());
- /// // 'foo' gets spliced into seq1 where the first empty string occurs.
- /// assert_eq!(Seq::new(&["a", "foo", "f"]), seq1);
- /// ```
- ///
- /// This example shows that literals are drained from `other` even when
- /// they aren't necessarily used.
- ///
- /// ```
- /// use regex_syntax::hir::literal::Seq;
- ///
- /// let mut seq1 = Seq::new(&["foo", "bar"]);
- /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]);
- /// seq1.union_into_empty(&mut seq2);
- ///
- /// // seq1 has no zero length literals, so no splicing happens.
- /// assert_eq!(Seq::new(&["foo", "bar"]), seq1);
- /// // Even though no splicing happens, seq2 is still drained.
- /// assert_eq!(Some(0), seq2.len());
- /// ```
- #[inline]
- pub fn union_into_empty(&mut self, other: &mut Seq) {
- let lits2 = other.literals.as_mut().map(|lits| lits.drain(..));
- let lits1 = match self.literals {
- None => return,
- Some(ref mut lits) => lits,
- };
- let first_empty = match lits1.iter().position(|m| m.is_empty()) {
- None => return,
- Some(i) => i,
- };
- let lits2 = match lits2 {
- None => {
- // Note that we are only here if we've found an empty literal,
- // which implies that an infinite sequence infects this seq and
- // also turns it into an infinite sequence.
- self.literals = None;
- return;
- }
- Some(lits) => lits,
- };
- // Clearing out the empties needs to come before the splice because
- // the splice might add more empties that we don't want to get rid
- // of. Since we're splicing into the position of the first empty, the
- // 'first_empty' position computed above is still correct.
- lits1.retain(|m| !m.is_empty());
- lits1.splice(first_empty..first_empty, lits2);
- self.dedup();
- }
-
- /// Deduplicate adjacent equivalent literals in this sequence.
- ///
- /// If adjacent literals are equivalent strings but one is exact and the
- /// other inexact, the inexact literal is kept and the exact one is
- /// removed.
- ///
- /// Deduping an infinite sequence is a no-op.
- ///
- /// # Example
- ///
- /// This example shows how literals that are duplicate byte strings but
- /// are not equivalent with respect to exactness are resolved.
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Literal, Seq};
- ///
- /// let mut seq = Seq::from_iter([
- /// Literal::exact("foo"),
- /// Literal::inexact("foo"),
- /// ]);
- /// seq.dedup();
- ///
- /// assert_eq!(Seq::from_iter([Literal::inexact("foo")]), seq);
- /// ```
- #[inline]
- pub fn dedup(&mut self) {
- if let Some(ref mut lits) = self.literals {
- lits.dedup_by(|lit1, lit2| {
- if lit1.as_bytes() != lit2.as_bytes() {
- return false;
- }
- if lit1.is_exact() != lit2.is_exact() {
- lit1.make_inexact();
- lit2.make_inexact();
- }
- true
- });
- }
- }
-
- /// Sorts this sequence of literals lexicographically.
- ///
- /// Note that if, before sorting, if a literal that is a prefix of another
- /// literal appears after it, then after sorting, the sequence will not
- /// represent the same preference order match semantics. For example,
- /// sorting the sequence `[samwise, sam]` yields the sequence `[sam,
- /// samwise]`. Under preference order semantics, the latter sequence will
- /// never match `samwise` where as the first sequence can.
- ///
- /// # Example
- ///
- /// This example shows basic usage.
- ///
- /// ```
- /// use regex_syntax::hir::literal::Seq;
- ///
- /// let mut seq = Seq::new(&["foo", "quux", "bar"]);
- /// seq.sort();
- ///
- /// assert_eq!(Seq::new(&["bar", "foo", "quux"]), seq);
- /// ```
- #[inline]
- pub fn sort(&mut self) {
- if let Some(ref mut lits) = self.literals {
- lits.sort();
- }
- }
-
- /// Reverses all of the literals in this sequence.
- ///
- /// The order of the sequence itself is preserved.
- ///
- /// # Example
- ///
- /// This example shows basic usage.
- ///
- /// ```
- /// use regex_syntax::hir::literal::Seq;
- ///
- /// let mut seq = Seq::new(&["oof", "rab"]);
- /// seq.reverse_literals();
- /// assert_eq!(Seq::new(&["foo", "bar"]), seq);
- /// ```
- #[inline]
- pub fn reverse_literals(&mut self) {
- if let Some(ref mut lits) = self.literals {
- for lit in lits.iter_mut() {
- lit.reverse();
- }
- }
- }
-
- /// Shrinks this seq to its minimal size while respecting the preference
- /// order of its literals.
- ///
- /// While this routine will remove duplicate literals from this seq, it
- /// will also remove literals that can never match in a leftmost-first or
- /// "preference order" search. Similar to [`Seq::dedup`], if a literal is
- /// deduped, then the one that remains is made inexact.
- ///
- /// This is a no-op on seqs that are empty or not finite.
- ///
- /// # Example
- ///
- /// This example shows the difference between `{sam, samwise}` and
- /// `{samwise, sam}`.
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Literal, Seq};
- ///
- /// // If 'sam' comes before 'samwise' and a preference order search is
- /// // executed, then 'samwise' can never match.
- /// let mut seq = Seq::new(&["sam", "samwise"]);
- /// seq.minimize_by_preference();
- /// assert_eq!(Seq::from_iter([Literal::inexact("sam")]), seq);
- ///
- /// // But if they are reversed, then it's possible for 'samwise' to match
- /// // since it is given higher preference.
- /// let mut seq = Seq::new(&["samwise", "sam"]);
- /// seq.minimize_by_preference();
- /// assert_eq!(Seq::new(&["samwise", "sam"]), seq);
- /// ```
- ///
- /// This example shows that if an empty string is in this seq, then
- /// anything that comes after it can never match.
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Literal, Seq};
- ///
- /// // An empty string is a prefix of all strings, so it automatically
- /// // inhibits any subsequent strings from matching.
- /// let mut seq = Seq::new(&["foo", "bar", "", "quux", "fox"]);
- /// seq.minimize_by_preference();
- /// let expected = Seq::from_iter([
- /// Literal::exact("foo"),
- /// Literal::exact("bar"),
- /// Literal::inexact(""),
- /// ]);
- /// assert_eq!(expected, seq);
- ///
- /// // And of course, if it's at the beginning, then it makes it impossible
- /// // for anything else to match.
- /// let mut seq = Seq::new(&["", "foo", "quux", "fox"]);
- /// seq.minimize_by_preference();
- /// assert_eq!(Seq::from_iter([Literal::inexact("")]), seq);
- /// ```
- #[inline]
- pub fn minimize_by_preference(&mut self) {
- if let Some(ref mut lits) = self.literals {
- PreferenceTrie::minimize(lits, false);
- }
- }
-
- /// Trims all literals in this seq such that only the first `len` bytes
- /// remain. If a literal has less than or equal to `len` bytes, then it
- /// remains unchanged. Otherwise, it is trimmed and made inexact.
- ///
- /// # Example
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Literal, Seq};
- ///
- /// let mut seq = Seq::new(&["a", "foo", "quux"]);
- /// seq.keep_first_bytes(2);
- ///
- /// let expected = Seq::from_iter([
- /// Literal::exact("a"),
- /// Literal::inexact("fo"),
- /// Literal::inexact("qu"),
- /// ]);
- /// assert_eq!(expected, seq);
- /// ```
- #[inline]
- pub fn keep_first_bytes(&mut self, len: usize) {
- if let Some(ref mut lits) = self.literals {
- for m in lits.iter_mut() {
- m.keep_first_bytes(len);
- }
- }
- }
-
- /// Trims all literals in this seq such that only the last `len` bytes
- /// remain. If a literal has less than or equal to `len` bytes, then it
- /// remains unchanged. Otherwise, it is trimmed and made inexact.
- ///
- /// # Example
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Literal, Seq};
- ///
- /// let mut seq = Seq::new(&["a", "foo", "quux"]);
- /// seq.keep_last_bytes(2);
- ///
- /// let expected = Seq::from_iter([
- /// Literal::exact("a"),
- /// Literal::inexact("oo"),
- /// Literal::inexact("ux"),
- /// ]);
- /// assert_eq!(expected, seq);
- /// ```
- #[inline]
- pub fn keep_last_bytes(&mut self, len: usize) {
- if let Some(ref mut lits) = self.literals {
- for m in lits.iter_mut() {
- m.keep_last_bytes(len);
- }
- }
- }
-
- /// Returns true if this sequence is finite.
- ///
- /// When false, this sequence is infinite and must be treated as if it
- /// contains every possible literal.
- #[inline]
- pub fn is_finite(&self) -> bool {
- self.literals.is_some()
- }
-
- /// Returns true if and only if this sequence is finite and empty.
- ///
- /// An empty sequence never matches anything. It can only be produced by
- /// literal extraction when the corresponding regex itself cannot match.
- #[inline]
- pub fn is_empty(&self) -> bool {
- self.len() == Some(0)
- }
-
- /// Returns the number of literals in this sequence if the sequence is
- /// finite. If the sequence is infinite, then `None` is returned.
- #[inline]
- pub fn len(&self) -> Option<usize> {
- self.literals.as_ref().map(|lits| lits.len())
- }
-
- /// Returns true if and only if all literals in this sequence are exact.
- ///
- /// This returns false if the sequence is infinite.
- #[inline]
- pub fn is_exact(&self) -> bool {
- self.literals().map_or(false, |lits| lits.iter().all(|x| x.is_exact()))
- }
-
- /// Returns true if and only if all literals in this sequence are inexact.
- ///
- /// This returns true if the sequence is infinite.
- #[inline]
- pub fn is_inexact(&self) -> bool {
- self.literals().map_or(true, |lits| lits.iter().all(|x| !x.is_exact()))
- }
-
- /// Return the maximum length of the sequence that would result from
- /// unioning `self` with `other`. If either set is infinite, then this
- /// returns `None`.
- #[inline]
- pub fn max_union_len(&self, other: &Seq) -> Option<usize> {
- let len1 = self.len()?;
- let len2 = other.len()?;
- Some(len1.saturating_add(len2))
- }
-
- /// Return the maximum length of the sequence that would result from the
- /// cross product of `self` with `other`. If either set is infinite, then
- /// this returns `None`.
- #[inline]
- pub fn max_cross_len(&self, other: &Seq) -> Option<usize> {
- let len1 = self.len()?;
- let len2 = other.len()?;
- Some(len1.saturating_mul(len2))
- }
-
- /// Returns the length of the shortest literal in this sequence.
- ///
- /// If the sequence is infinite or empty, then this returns `None`.
- #[inline]
- pub fn min_literal_len(&self) -> Option<usize> {
- self.literals.as_ref()?.iter().map(|x| x.len()).min()
- }
-
- /// Returns the length of the longest literal in this sequence.
- ///
- /// If the sequence is infinite or empty, then this returns `None`.
- #[inline]
- pub fn max_literal_len(&self) -> Option<usize> {
- self.literals.as_ref()?.iter().map(|x| x.len()).max()
- }
-
- /// Returns the longest common prefix from this seq.
- ///
- /// If the seq matches any literal or other contains no literals, then
- /// there is no meaningful prefix and this returns `None`.
- ///
- /// # Example
- ///
- /// This shows some example seqs and their longest common prefix.
- ///
- /// ```
- /// use regex_syntax::hir::literal::Seq;
- ///
- /// let seq = Seq::new(&["foo", "foobar", "fo"]);
- /// assert_eq!(Some(&b"fo"[..]), seq.longest_common_prefix());
- /// let seq = Seq::new(&["foo", "foo"]);
- /// assert_eq!(Some(&b"foo"[..]), seq.longest_common_prefix());
- /// let seq = Seq::new(&["foo", "bar"]);
- /// assert_eq!(Some(&b""[..]), seq.longest_common_prefix());
- /// let seq = Seq::new(&[""]);
- /// assert_eq!(Some(&b""[..]), seq.longest_common_prefix());
- ///
- /// let seq = Seq::infinite();
- /// assert_eq!(None, seq.longest_common_prefix());
- /// let seq = Seq::empty();
- /// assert_eq!(None, seq.longest_common_prefix());
- /// ```
- #[inline]
- pub fn longest_common_prefix(&self) -> Option<&[u8]> {
- // If we match everything or match nothing, then there's no meaningful
- // longest common prefix.
- let lits = match self.literals {
- None => return None,
- Some(ref lits) => lits,
- };
- if lits.len() == 0 {
- return None;
- }
- let base = lits[0].as_bytes();
- let mut len = base.len();
- for m in lits.iter().skip(1) {
- len = m
- .as_bytes()
- .iter()
- .zip(base[..len].iter())
- .take_while(|&(a, b)| a == b)
- .count();
- if len == 0 {
- return Some(&[]);
- }
- }
- Some(&base[..len])
- }
-
- /// Returns the longest common suffix from this seq.
- ///
- /// If the seq matches any literal or other contains no literals, then
- /// there is no meaningful suffix and this returns `None`.
- ///
- /// # Example
- ///
- /// This shows some example seqs and their longest common suffix.
- ///
- /// ```
- /// use regex_syntax::hir::literal::Seq;
- ///
- /// let seq = Seq::new(&["oof", "raboof", "of"]);
- /// assert_eq!(Some(&b"of"[..]), seq.longest_common_suffix());
- /// let seq = Seq::new(&["foo", "foo"]);
- /// assert_eq!(Some(&b"foo"[..]), seq.longest_common_suffix());
- /// let seq = Seq::new(&["foo", "bar"]);
- /// assert_eq!(Some(&b""[..]), seq.longest_common_suffix());
- /// let seq = Seq::new(&[""]);
- /// assert_eq!(Some(&b""[..]), seq.longest_common_suffix());
- ///
- /// let seq = Seq::infinite();
- /// assert_eq!(None, seq.longest_common_suffix());
- /// let seq = Seq::empty();
- /// assert_eq!(None, seq.longest_common_suffix());
- /// ```
- #[inline]
- pub fn longest_common_suffix(&self) -> Option<&[u8]> {
- // If we match everything or match nothing, then there's no meaningful
- // longest common suffix.
- let lits = match self.literals {
- None => return None,
- Some(ref lits) => lits,
- };
- if lits.len() == 0 {
- return None;
- }
- let base = lits[0].as_bytes();
- let mut len = base.len();
- for m in lits.iter().skip(1) {
- len = m
- .as_bytes()
- .iter()
- .rev()
- .zip(base[base.len() - len..].iter().rev())
- .take_while(|&(a, b)| a == b)
- .count();
- if len == 0 {
- return Some(&[]);
- }
- }
- Some(&base[base.len() - len..])
- }
-
- /// Optimizes this seq while treating its literals as prefixes and
- /// respecting the preference order of its literals.
- ///
- /// The specific way "optimization" works is meant to be an implementation
- /// detail, as it essentially represents a set of heuristics. The goal
- /// that optimization tries to accomplish is to make the literals in this
- /// set reflect inputs that will result in a more effective prefilter.
- /// Principally by reducing the false positive rate of candidates found by
- /// the literals in this sequence. That is, when a match of a literal is
- /// found, we would like it to be a strong predictor of the overall match
- /// of the regex. If it isn't, then much time will be spent starting and
- /// stopping the prefilter search and attempting to confirm the match only
- /// to have it fail.
- ///
- /// Some of those heuristics might be:
- ///
- /// * Identifying a common prefix from a larger sequence of literals, and
- /// shrinking the sequence down to that single common prefix.
- /// * Rejecting the sequence entirely if it is believed to result in very
- /// high false positive rate. When this happens, the sequence is made
- /// infinite.
- /// * Shrinking the sequence to a smaller number of literals representing
- /// prefixes, but not shrinking it so much as to make literals too short.
- /// (A sequence with very short literals, of 1 or 2 bytes, will typically
- /// result in a higher false positive rate.)
- ///
- /// Optimization should only be run once extraction is complete. Namely,
- /// optimization may make assumptions that do not compose with other
- /// operations in the middle of extraction. For example, optimization will
- /// reduce `[E(sam), E(samwise)]` to `[E(sam)]`, but such a transformation
- /// is only valid if no other extraction will occur. If other extraction
- /// may occur, then the correct transformation would be to `[I(sam)]`.
- ///
- /// The [`Seq::optimize_for_suffix_by_preference`] does the same thing, but
- /// for suffixes.
- ///
- /// # Example
- ///
- /// This shows how optimization might transform a sequence. Note that
- /// the specific behavior is not a documented guarantee. The heuristics
- /// used are an implementation detail and may change over time in semver
- /// compatible releases.
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Seq, Literal};
- ///
- /// let mut seq = Seq::new(&[
- /// "samantha",
- /// "sam",
- /// "samwise",
- /// "frodo",
- /// ]);
- /// seq.optimize_for_prefix_by_preference();
- /// assert_eq!(Seq::from_iter([
- /// Literal::exact("samantha"),
- /// // Kept exact even though 'samwise' got pruned
- /// // because optimization assumes literal extraction
- /// // has finished.
- /// Literal::exact("sam"),
- /// Literal::exact("frodo"),
- /// ]), seq);
- /// ```
- ///
- /// # Example: optimization may make the sequence infinite
- ///
- /// If the heuristics deem that the sequence could cause a very high false
- /// positive rate, then it may make the sequence infinite, effectively
- /// disabling its use as a prefilter.
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Seq, Literal};
- ///
- /// let mut seq = Seq::new(&[
- /// "samantha",
- /// // An empty string matches at every position,
- /// // thus rendering the prefilter completely
- /// // ineffective.
- /// "",
- /// "sam",
- /// "samwise",
- /// "frodo",
- /// ]);
- /// seq.optimize_for_prefix_by_preference();
- /// assert!(!seq.is_finite());
- /// ```
- ///
- /// Do note that just because there is a `" "` in the sequence, that
- /// doesn't mean the sequence will always be made infinite after it is
- /// optimized. Namely, if the sequence is considered exact (any match
- /// corresponds to an overall match of the original regex), then any match
- /// is an overall match, and so the false positive rate is always `0`.
- ///
- /// To demonstrate this, we remove `samwise` from our sequence. This
- /// results in no optimization happening and all literals remain exact.
- /// Thus the entire sequence is exact, and it is kept as-is, even though
- /// one is an ASCII space:
- ///
- /// ```
- /// use regex_syntax::hir::literal::{Seq, Literal};
- ///
- /// let mut seq = Seq::new(&[
- /// "samantha",
- /// " ",
- /// "sam",
- /// "frodo",
- /// ]);
- /// seq.optimize_for_prefix_by_preference();
- /// assert!(seq.is_finite());
- /// ```
- #[inline]
- pub fn optimize_for_prefix_by_preference(&mut self) {
- self.optimize_by_preference(true);
- }
-
- /// Optimizes this seq while treating its literals as suffixes and
- /// respecting the preference order of its literals.
- ///
- /// Optimization should only be run once extraction is complete.
- ///
- /// The [`Seq::optimize_for_prefix_by_preference`] does the same thing, but
- /// for prefixes. See its documentation for more explanation.
- #[inline]
- pub fn optimize_for_suffix_by_preference(&mut self) {
- self.optimize_by_preference(false);
- }
-
- fn optimize_by_preference(&mut self, prefix: bool) {
- let origlen = match self.len() {
- None => return,
- Some(len) => len,
- };
- // Just give up now if our sequence contains an empty string.
- if self.min_literal_len().map_or(false, |len| len == 0) {
- // We squash the sequence so that nobody else gets any bright
- // ideas to try and use it. An empty string implies a match at
- // every position. A prefilter cannot help you here.
- self.make_infinite();
- return;
- }
- // Make sure we start with the smallest sequence possible. We use a
- // special version of preference minimization that retains exactness.
- // This is legal because optimization is only expected to occur once
- // extraction is complete.
- if prefix {
- if let Some(ref mut lits) = self.literals {
- PreferenceTrie::minimize(lits, true);
- }
- }
-
- // Look for a common prefix (or suffix). If we found one of those and
- // it's long enough, then it's a good bet that it will be our fastest
- // possible prefilter since single-substring search is so fast.
- let fix = if prefix {
- self.longest_common_prefix()
- } else {
- self.longest_common_suffix()
- };
- if let Some(fix) = fix {
- // As a special case, if we have a common prefix and the leading
- // byte of that prefix is one that we think probably occurs rarely,
- // then strip everything down to just that single byte. This should
- // promote the use of memchr.
- //
- // ... we only do this though if our sequence has more than one
- // literal. Otherwise, we'd rather just stick with a single literal
- // scan. That is, using memchr is probably better than looking
- // for 2 or more literals, but probably not as good as a straight
- // memmem search.
- //
- // ... and also only do this when the prefix is short and probably
- // not too discriminatory anyway. If it's longer, then it's
- // probably quite discriminatory and thus is likely to have a low
- // false positive rate.
- if prefix
- && origlen > 1
- && fix.len() >= 1
- && fix.len() <= 3
- && rank(fix[0]) < 200
- {
- self.keep_first_bytes(1);
- self.dedup();
- return;
- }
- // We only strip down to the common prefix/suffix if we think
- // the existing set of literals isn't great, or if the common
- // prefix/suffix is expected to be particularly discriminatory.
- let isfast =
- self.is_exact() && self.len().map_or(false, |len| len <= 16);
- let usefix = fix.len() > 4 || (fix.len() > 1 && !isfast);
- if usefix {
- // If we keep exactly the number of bytes equal to the length
- // of the prefix (or suffix), then by the definition of a
- // prefix, every literal in the sequence will be equivalent.
- // Thus, 'dedup' will leave us with one literal.
- //
- // We do it this way to avoid an alloc, but also to make sure
- // the exactness of literals is kept (or not).
- if prefix {
- self.keep_first_bytes(fix.len());
- } else {
- self.keep_last_bytes(fix.len());
- }
- self.dedup();
- assert_eq!(Some(1), self.len());
- // We still fall through here. In particular, we want our
- // longest common prefix to be subject to the poison check.
- }
- }
- // If we have an exact sequence, we *probably* just want to keep it
- // as-is. But there are some cases where we don't. So we save a copy of
- // the exact sequence now, and then try to do some more optimizations
- // below. If those don't work out, we go back to this exact sequence.
- //
- // The specific motivation for this is that we sometimes wind up with
- // an exact sequence with a hefty number of literals. Say, 100. If we
- // stuck with that, it would be too big for Teddy and would result in
- // using Aho-Corasick. Which is fine... but the lazy DFA is plenty
- // suitable in such cases. The real issue is that we will wind up not
- // using a fast prefilter at all. So in cases like this, even though
- // we have an exact sequence, it would be better to try and shrink the
- // sequence (which we do below) and use it as a prefilter that can
- // produce false positive matches.
- //
- // But if the shrinking below results in a sequence that "sucks," then
- // we don't want to use that because we already have an exact sequence
- // in hand.
- let exact: Option<Seq> =
- if self.is_exact() { Some(self.clone()) } else { None };
- // Now we attempt to shorten the sequence. The idea here is that we
- // don't want to look for too many literals, but we want to shorten
- // our sequence enough to improve our odds of using better algorithms
- // downstream (such as Teddy).
- //
- // The pair of numbers in this list corresponds to the maximal prefix
- // (in bytes) to keep for all literals and the length of the sequence
- // at which to do it.
- //
- // So for example, the pair (3, 500) would mean, "if we have more than
- // 500 literals in our sequence, then truncate all of our literals
- // such that they are at most 3 bytes in length and the minimize the
- // sequence."
- const ATTEMPTS: [(usize, usize); 5] =
- [(5, 10), (4, 10), (3, 64), (2, 64), (1, 10)];
- for (keep, limit) in ATTEMPTS {
- let len = match self.len() {
- None => break,
- Some(len) => len,
- };
- if len <= limit {
- break;
- }
- if prefix {
- self.keep_first_bytes(keep);
- } else {
- self.keep_last_bytes(keep);
- }
- if prefix {
- if let Some(ref mut lits) = self.literals {
- PreferenceTrie::minimize(lits, true);
- }
- }
- }
- // Check for a poison literal. A poison literal is one that is short
- // and is believed to have a very high match count. These poisons
- // generally lead to a prefilter with a very high false positive rate,
- // and thus overall worse performance.
- //
- // We do this last because we could have gone from a non-poisonous
- // sequence to a poisonous one. Perhaps we should add some code to
- // prevent such transitions in the first place, but then again, we
- // likely only made the transition in the first place if the sequence
- // was itself huge. And huge sequences are themselves poisonous. So...
- if let Some(lits) = self.literals() {
- if lits.iter().any(|lit| lit.is_poisonous()) {
- self.make_infinite();
- }
- }
- // OK, if we had an exact sequence before attempting more optimizations
- // above and our post-optimized sequence sucks for some reason or
- // another, then we go back to the exact sequence.
- if let Some(exact) = exact {
- // If optimizing resulted in dropping our literals, then certainly
- // backup and use the exact sequence that we had.
- if !self.is_finite() {
- *self = exact;
- return;
- }
- // If our optimized sequence contains a short literal, then it's
- // *probably* not so great. So throw it away and revert to the
- // exact sequence.
- if self.min_literal_len().map_or(true, |len| len <= 2) {
- *self = exact;
- return;
- }
- // Finally, if our optimized sequence is "big" (i.e., can't use
- // Teddy), then also don't use it and rely on the exact sequence.
- if self.len().map_or(true, |len| len > 64) {
- *self = exact;
- return;
- }
- }
- }
-}
-
-impl core::fmt::Debug for Seq {
- fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
- write!(f, "Seq")?;
- if let Some(lits) = self.literals() {
- f.debug_list().entries(lits.iter()).finish()
- } else {
- write!(f, "[∞]")
- }
- }
-}
-
-impl FromIterator<Literal> for Seq {
- fn from_iter<T: IntoIterator<Item = Literal>>(it: T) -> Seq {
- let mut seq = Seq::empty();
- for literal in it {
- seq.push(literal);
- }
- seq
- }
-}
-
-/// A single literal extracted from an [`Hir`] expression.
-///
-/// A literal is composed of two things:
-///
-/// * A sequence of bytes. No guarantees with respect to UTF-8 are provided.
-/// In particular, even if the regex a literal is extracted from is UTF-8, the
-/// literal extracted may not be valid UTF-8. (For example, if an [`Extractor`]
-/// limit resulted in trimming a literal in a way that splits a codepoint.)
-/// * Whether the literal is "exact" or not. An "exact" literal means that it
-/// has not been trimmed, and may continue to be extended. If a literal is
-/// "exact" after visiting the entire `Hir` expression, then this implies that
-/// the literal leads to a match state. (Although it doesn't necessarily imply
-/// all occurrences of the literal correspond to a match of the regex, since
-/// literal extraction ignores look-around assertions.)
-#[derive(Clone, Eq, PartialEq, PartialOrd, Ord)]
-pub struct Literal {
- bytes: Vec<u8>,
- exact: bool,
-}
-
-impl Literal {
- /// Returns a new exact literal containing the bytes given.
- #[inline]
- pub fn exact<B: Into<Vec<u8>>>(bytes: B) -> Literal {
- Literal { bytes: bytes.into(), exact: true }
- }
-
- /// Returns a new inexact literal containing the bytes given.
- #[inline]
- pub fn inexact<B: Into<Vec<u8>>>(bytes: B) -> Literal {
- Literal { bytes: bytes.into(), exact: false }
- }
-
- /// Returns the bytes in this literal.
- #[inline]
- pub fn as_bytes(&self) -> &[u8] {
- &self.bytes
- }
-
- /// Yields ownership of the bytes inside this literal.
- ///
- /// Note that this throws away whether the literal is "exact" or not.
- #[inline]
- pub fn into_bytes(self) -> Vec<u8> {
- self.bytes
- }
-
- /// Returns the length of this literal in bytes.
- #[inline]
- pub fn len(&self) -> usize {
- self.as_bytes().len()
- }
-
- /// Returns true if and only if this literal has zero bytes.
- #[inline]
- pub fn is_empty(&self) -> bool {
- self.len() == 0
- }
-
- /// Returns true if and only if this literal is exact.
- #[inline]
- pub fn is_exact(&self) -> bool {
- self.exact
- }
-
- /// Marks this literal as inexact.
- ///
- /// Inexact literals can never be extended. For example,
- /// [`Seq::cross_forward`] will not extend inexact literals.
- #[inline]
- pub fn make_inexact(&mut self) {
- self.exact = false;
- }
-
- /// Reverse the bytes in this literal.
- #[inline]
- pub fn reverse(&mut self) {
- self.bytes.reverse();
- }
-
- /// Extend this literal with the literal given.
- ///
- /// If this literal is inexact, then this is a no-op.
- #[inline]
- pub fn extend(&mut self, lit: &Literal) {
- if !self.is_exact() {
- return;
- }
- self.bytes.extend_from_slice(&lit.bytes);
- }
-
- /// Trims this literal such that only the first `len` bytes remain. If
- /// this literal has fewer than `len` bytes, then it remains unchanged.
- /// Otherwise, the literal is marked as inexact.
- #[inline]
- pub fn keep_first_bytes(&mut self, len: usize) {
- if len >= self.len() {
- return;
- }
- self.make_inexact();
- self.bytes.truncate(len);
- }
-
- /// Trims this literal such that only the last `len` bytes remain. If this
- /// literal has fewer than `len` bytes, then it remains unchanged.
- /// Otherwise, the literal is marked as inexact.
- #[inline]
- pub fn keep_last_bytes(&mut self, len: usize) {
- if len >= self.len() {
- return;
- }
- self.make_inexact();
- self.bytes.drain(..self.len() - len);
- }
-
- /// Returns true if it is believe that this literal is likely to match very
- /// frequently, and is thus not a good candidate for a prefilter.
- fn is_poisonous(&self) -> bool {
- self.is_empty() || (self.len() == 1 && rank(self.as_bytes()[0]) >= 250)
- }
-}
-
-impl From<u8> for Literal {
- fn from(byte: u8) -> Literal {
- Literal::exact(vec![byte])
- }
-}
-
-impl From<char> for Literal {
- fn from(ch: char) -> Literal {
- use alloc::string::ToString;
- Literal::exact(ch.encode_utf8(&mut [0; 4]).to_string())
- }
-}
-
-impl AsRef<[u8]> for Literal {
- fn as_ref(&self) -> &[u8] {
- self.as_bytes()
- }
-}
-
-impl core::fmt::Debug for Literal {
- fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
- let tag = if self.exact { "E" } else { "I" };
- f.debug_tuple(tag)
- .field(&crate::debug::Bytes(self.as_bytes()))
- .finish()
- }
-}
-
-/// A "preference" trie that rejects literals that will never match when
-/// executing a leftmost first or "preference" search.
-///
-/// For example, if 'sam' is inserted, then trying to insert 'samwise' will be
-/// rejected because 'samwise' can never match since 'sam' will always take
-/// priority. However, if 'samwise' is inserted first, then inserting 'sam'
-/// after it is accepted. In this case, either 'samwise' or 'sam' can match in
-/// a "preference" search.
-///
-/// Note that we only use this trie as a "set." That is, given a sequence of
-/// literals, we insert each one in order. An `insert` will reject a literal
-/// if a prefix of that literal already exists in the trie. Thus, to rebuild
-/// the "minimal" sequence, we simply only keep literals that were successfully
-/// inserted. (Since we don't need traversal, one wonders whether we can make
-/// some simplifications here, but I haven't given it a ton of thought and I've
-/// never seen this show up on a profile. Because of the heuristic limits
-/// imposed on literal extractions, the size of the inputs here is usually
-/// very small.)
-#[derive(Debug)]
-struct PreferenceTrie {
- /// The states in this trie. The index of a state in this vector is its ID.
- states: Vec<State>,
- /// This vec indicates which states are match states. It always has
- /// the same length as `states` and is indexed by the same state ID.
- /// A state with identifier `sid` is a match state if and only if
- /// `matches[sid].is_some()`. The option contains the index of the literal
- /// corresponding to the match. The index is offset by 1 so that it fits in
- /// a NonZeroUsize.
- matches: Vec<Option<NonZeroUsize>>,
- /// The index to allocate to the next literal added to this trie. Starts at
- /// 1 and increments by 1 for every literal successfully added to the trie.
- next_literal_index: usize,
-}
-
-/// A single state in a trie. Uses a sparse representation for its transitions.
-#[derive(Debug, Default)]
-struct State {
- /// Sparse representation of the transitions out of this state. Transitions
- /// are sorted by byte. There is at most one such transition for any
- /// particular byte.
- trans: Vec<(u8, usize)>,
-}
-
-impl PreferenceTrie {
- /// Minimizes the given sequence of literals while preserving preference
- /// order semantics.
- ///
- /// When `keep_exact` is true, the exactness of every literal retained is
- /// kept. This is useful when dealing with a fully extracted `Seq` that
- /// only contains exact literals. In that case, we can keep all retained
- /// literals as exact because we know we'll never need to match anything
- /// after them and because any removed literals are guaranteed to never
- /// match.
- fn minimize(literals: &mut Vec<Literal>, keep_exact: bool) {
- let mut trie = PreferenceTrie {
- states: vec![],
- matches: vec![],
- next_literal_index: 1,
- };
- let mut make_inexact = vec![];
- literals.retain_mut(|lit| match trie.insert(lit.as_bytes()) {
- Ok(_) => true,
- Err(i) => {
- if !keep_exact {
- make_inexact.push(i.checked_sub(1).unwrap());
- }
- false
- }
- });
- for i in make_inexact {
- literals[i].make_inexact();
- }
- }
-
- /// Returns `Ok` if the given byte string is accepted into this trie and
- /// `Err` otherwise. The index for the success case corresponds to the
- /// index of the literal added. The index for the error case corresponds to
- /// the index of the literal already in the trie that prevented the given
- /// byte string from being added. (Which implies it is a prefix of the one
- /// given.)
- ///
- /// In short, the byte string given is accepted into the trie if and only
- /// if it is possible for it to match when executing a preference order
- /// search.
- fn insert(&mut self, bytes: &[u8]) -> Result<usize, usize> {
- let mut prev = self.root();
- if let Some(idx) = self.matches[prev] {
- return Err(idx.get());
- }
- for &b in bytes.iter() {
- match self.states[prev].trans.binary_search_by_key(&b, |t| t.0) {
- Ok(i) => {
- prev = self.states[prev].trans[i].1;
- if let Some(idx) = self.matches[prev] {
- return Err(idx.get());
- }
- }
- Err(i) => {
- let next = self.create_state();
- self.states[prev].trans.insert(i, (b, next));
- prev = next;
- }
- }
- }
- let idx = self.next_literal_index;
- self.next_literal_index += 1;
- self.matches[prev] = NonZeroUsize::new(idx);
- Ok(idx)
- }
-
- /// Returns the root state ID, and if it doesn't exist, creates it.
- fn root(&mut self) -> usize {
- if !self.states.is_empty() {
- 0
- } else {
- self.create_state()
- }
- }
-
- /// Creates a new empty state and returns its ID.
- fn create_state(&mut self) -> usize {
- let id = self.states.len();
- self.states.push(State::default());
- self.matches.push(None);
- id
- }
-}
-
-/// Returns the "rank" of the given byte.
-///
-/// The minimum rank value is `0` and the maximum rank value is `255`.
-///
-/// The rank of a byte is derived from a heuristic background distribution of
-/// relative frequencies of bytes. The heuristic says that lower the rank of a
-/// byte, the less likely that byte is to appear in any arbitrary haystack.
-pub fn rank(byte: u8) -> u8 {
- crate::rank::BYTE_FREQUENCIES[usize::from(byte)]
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- fn parse(pattern: &str) -> Hir {
- crate::ParserBuilder::new().utf8(false).build().parse(pattern).unwrap()
- }
-
- fn prefixes(pattern: &str) -> Seq {
- Extractor::new().kind(ExtractKind::Prefix).extract(&parse(pattern))
- }
-
- fn suffixes(pattern: &str) -> Seq {
- Extractor::new().kind(ExtractKind::Suffix).extract(&parse(pattern))
- }
-
- fn e(pattern: &str) -> (Seq, Seq) {
- (prefixes(pattern), suffixes(pattern))
- }
-
- #[allow(non_snake_case)]
- fn E(x: &str) -> Literal {
- Literal::exact(x.as_bytes())
- }
-
- #[allow(non_snake_case)]
- fn I(x: &str) -> Literal {
- Literal::inexact(x.as_bytes())
- }
-
- fn seq<I: IntoIterator<Item = Literal>>(it: I) -> Seq {
- Seq::from_iter(it)
- }
-
- fn infinite() -> (Seq, Seq) {
- (Seq::infinite(), Seq::infinite())
- }
-
- fn inexact<I1, I2>(it1: I1, it2: I2) -> (Seq, Seq)
- where
- I1: IntoIterator<Item = Literal>,
- I2: IntoIterator<Item = Literal>,
- {
- (Seq::from_iter(it1), Seq::from_iter(it2))
- }
-
- fn exact<B: AsRef<[u8]>, I: IntoIterator<Item = B>>(it: I) -> (Seq, Seq) {
- let s1 = Seq::new(it);
- let s2 = s1.clone();
- (s1, s2)
- }
-
- fn opt<B: AsRef<[u8]>, I: IntoIterator<Item = B>>(it: I) -> (Seq, Seq) {
- let (mut p, mut s) = exact(it);
- p.optimize_for_prefix_by_preference();
- s.optimize_for_suffix_by_preference();
- (p, s)
- }
-
- #[test]
- fn literal() {
- assert_eq!(exact(["a"]), e("a"));
- assert_eq!(exact(["aaaaa"]), e("aaaaa"));
- assert_eq!(exact(["A", "a"]), e("(?i-u)a"));
- assert_eq!(exact(["AB", "Ab", "aB", "ab"]), e("(?i-u)ab"));
- assert_eq!(exact(["abC", "abc"]), e("ab(?i-u)c"));
-
- assert_eq!(exact([b"\xFF"]), e(r"(?-u:\xFF)"));
-
- #[cfg(feature = "unicode-case")]
- {
- assert_eq!(exact(["☃"]), e("☃"));
- assert_eq!(exact(["☃"]), e("(?i)☃"));
- assert_eq!(exact(["☃☃☃☃☃"]), e("☃☃☃☃☃"));
-
- assert_eq!(exact(["Δ"]), e("Δ"));
- assert_eq!(exact(["δ"]), e("δ"));
- assert_eq!(exact(["Δ", "δ"]), e("(?i)Δ"));
- assert_eq!(exact(["Δ", "δ"]), e("(?i)δ"));
-
- assert_eq!(exact(["S", "s", "ſ"]), e("(?i)S"));
- assert_eq!(exact(["S", "s", "ſ"]), e("(?i)s"));
- assert_eq!(exact(["S", "s", "ſ"]), e("(?i)ſ"));
- }
-
- let letters = "ͱͳͷΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋ";
- assert_eq!(exact([letters]), e(letters));
- }
-
- #[test]
- fn class() {
- assert_eq!(exact(["a", "b", "c"]), e("[abc]"));
- assert_eq!(exact(["a1b", "a2b", "a3b"]), e("a[123]b"));
- assert_eq!(exact(["δ", "ε"]), e("[εδ]"));
- #[cfg(feature = "unicode-case")]
- {
- assert_eq!(exact(["Δ", "Ε", "δ", "ε", "ϵ"]), e(r"(?i)[εδ]"));
- }
- }
-
- #[test]
- fn look() {
- assert_eq!(exact(["ab"]), e(r"a\Ab"));
- assert_eq!(exact(["ab"]), e(r"a\zb"));
- assert_eq!(exact(["ab"]), e(r"a(?m:^)b"));
- assert_eq!(exact(["ab"]), e(r"a(?m:$)b"));
- assert_eq!(exact(["ab"]), e(r"a\bb"));
- assert_eq!(exact(["ab"]), e(r"a\Bb"));
- assert_eq!(exact(["ab"]), e(r"a(?-u:\b)b"));
- assert_eq!(exact(["ab"]), e(r"a(?-u:\B)b"));
-
- assert_eq!(exact(["ab"]), e(r"^ab"));
- assert_eq!(exact(["ab"]), e(r"$ab"));
- assert_eq!(exact(["ab"]), e(r"(?m:^)ab"));
- assert_eq!(exact(["ab"]), e(r"(?m:$)ab"));
- assert_eq!(exact(["ab"]), e(r"\bab"));
- assert_eq!(exact(["ab"]), e(r"\Bab"));
- assert_eq!(exact(["ab"]), e(r"(?-u:\b)ab"));
- assert_eq!(exact(["ab"]), e(r"(?-u:\B)ab"));
-
- assert_eq!(exact(["ab"]), e(r"ab^"));
- assert_eq!(exact(["ab"]), e(r"ab$"));
- assert_eq!(exact(["ab"]), e(r"ab(?m:^)"));
- assert_eq!(exact(["ab"]), e(r"ab(?m:$)"));
- assert_eq!(exact(["ab"]), e(r"ab\b"));
- assert_eq!(exact(["ab"]), e(r"ab\B"));
- assert_eq!(exact(["ab"]), e(r"ab(?-u:\b)"));
- assert_eq!(exact(["ab"]), e(r"ab(?-u:\B)"));
-
- let expected = (seq([I("aZ"), E("ab")]), seq([I("Zb"), E("ab")]));
- assert_eq!(expected, e(r"^aZ*b"));
- }
-
- #[test]
- fn repetition() {
- assert_eq!(exact(["a", ""]), e(r"a?"));
- assert_eq!(exact(["", "a"]), e(r"a??"));
- assert_eq!(inexact([I("a"), E("")], [I("a"), E("")]), e(r"a*"));
- assert_eq!(inexact([E(""), I("a")], [E(""), I("a")]), e(r"a*?"));
- assert_eq!(inexact([I("a")], [I("a")]), e(r"a+"));
- assert_eq!(inexact([I("a")], [I("a")]), e(r"(a+)+"));
-
- assert_eq!(exact(["ab"]), e(r"aZ{0}b"));
- assert_eq!(exact(["aZb", "ab"]), e(r"aZ?b"));
- assert_eq!(exact(["ab", "aZb"]), e(r"aZ??b"));
- assert_eq!(
- inexact([I("aZ"), E("ab")], [I("Zb"), E("ab")]),
- e(r"aZ*b")
- );
- assert_eq!(
- inexact([E("ab"), I("aZ")], [E("ab"), I("Zb")]),
- e(r"aZ*?b")
- );
- assert_eq!(inexact([I("aZ")], [I("Zb")]), e(r"aZ+b"));
- assert_eq!(inexact([I("aZ")], [I("Zb")]), e(r"aZ+?b"));
-
- assert_eq!(exact(["aZZb"]), e(r"aZ{2}b"));
- assert_eq!(inexact([I("aZZ")], [I("ZZb")]), e(r"aZ{2,3}b"));
-
- assert_eq!(exact(["abc", ""]), e(r"(abc)?"));
- assert_eq!(exact(["", "abc"]), e(r"(abc)??"));
-
- assert_eq!(inexact([I("a"), E("b")], [I("ab"), E("b")]), e(r"a*b"));
- assert_eq!(inexact([E("b"), I("a")], [E("b"), I("ab")]), e(r"a*?b"));
- assert_eq!(inexact([I("ab")], [I("b")]), e(r"ab+"));
- assert_eq!(inexact([I("a"), I("b")], [I("b")]), e(r"a*b+"));
-
- // FIXME: The suffixes for this don't look quite right to me. I think
- // the right suffixes would be: [I(ac), I(bc), E(c)]. The main issue I
- // think is that suffixes are computed by iterating over concatenations
- // in reverse, and then [bc, ac, c] ordering is indeed correct from
- // that perspective. We also test a few more equivalent regexes, and
- // we get the same result, so it is consistent at least I suppose.
- //
- // The reason why this isn't an issue is that it only messes up
- // preference order, and currently, suffixes are never used in a
- // context where preference order matters. For prefixes it matters
- // because we sometimes want to use prefilters without confirmation
- // when all of the literals are exact (and there's no look-around). But
- // we never do that for suffixes. Any time we use suffixes, we always
- // include a confirmation step. If that ever changes, then it's likely
- // this bug will need to be fixed, but last time I looked, it appears
- // hard to do so.
- assert_eq!(
- inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]),
- e(r"a*b*c")
- );
- assert_eq!(
- inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]),
- e(r"(a+)?(b+)?c")
- );
- assert_eq!(
- inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]),
- e(r"(a+|)(b+|)c")
- );
- // A few more similarish but not identical regexes. These may have a
- // similar problem as above.
- assert_eq!(
- inexact(
- [I("a"), I("b"), I("c"), E("")],
- [I("c"), I("b"), I("a"), E("")]
- ),
- e(r"a*b*c*")
- );
- assert_eq!(inexact([I("a"), I("b"), I("c")], [I("c")]), e(r"a*b*c+"));
- assert_eq!(inexact([I("a"), I("b")], [I("bc")]), e(r"a*b+c"));
- assert_eq!(inexact([I("a"), I("b")], [I("c"), I("b")]), e(r"a*b+c*"));
- assert_eq!(inexact([I("ab"), E("a")], [I("b"), E("a")]), e(r"ab*"));
- assert_eq!(
- inexact([I("ab"), E("ac")], [I("bc"), E("ac")]),
- e(r"ab*c")
- );
- assert_eq!(inexact([I("ab")], [I("b")]), e(r"ab+"));
- assert_eq!(inexact([I("ab")], [I("bc")]), e(r"ab+c"));
-
- assert_eq!(
- inexact([I("z"), E("azb")], [I("zazb"), E("azb")]),
- e(r"z*azb")
- );
-
- let expected =
- exact(["aaa", "aab", "aba", "abb", "baa", "bab", "bba", "bbb"]);
- assert_eq!(expected, e(r"[ab]{3}"));
- let expected = inexact(
- [
- I("aaa"),
- I("aab"),
- I("aba"),
- I("abb"),
- I("baa"),
- I("bab"),
- I("bba"),
- I("bbb"),
- ],
- [
- I("aaa"),
- I("aab"),
- I("aba"),
- I("abb"),
- I("baa"),
- I("bab"),
- I("bba"),
- I("bbb"),
- ],
- );
- assert_eq!(expected, e(r"[ab]{3,4}"));
- }
-
- #[test]
- fn concat() {
- let empty: [&str; 0] = [];
-
- assert_eq!(exact(["abcxyz"]), e(r"abc()xyz"));
- assert_eq!(exact(["abcxyz"]), e(r"(abc)(xyz)"));
- assert_eq!(exact(["abcmnoxyz"]), e(r"abc()mno()xyz"));
- assert_eq!(exact(empty), e(r"abc[a&&b]xyz"));
- assert_eq!(exact(["abcxyz"]), e(r"abc[a&&b]*xyz"));
- }
-
- #[test]
- fn alternation() {
- assert_eq!(exact(["abc", "mno", "xyz"]), e(r"abc|mno|xyz"));
- assert_eq!(
- inexact(
- [E("abc"), I("mZ"), E("mo"), E("xyz")],
- [E("abc"), I("Zo"), E("mo"), E("xyz")]
- ),
- e(r"abc|mZ*o|xyz")
- );
- assert_eq!(exact(["abc", "xyz"]), e(r"abc|M[a&&b]N|xyz"));
- assert_eq!(exact(["abc", "MN", "xyz"]), e(r"abc|M[a&&b]*N|xyz"));
-
- assert_eq!(exact(["aaa", "aaaaa"]), e(r"(?:|aa)aaa"));
- assert_eq!(
- inexact(
- [I("aaa"), E(""), I("aaaaa"), E("aa")],
- [I("aaa"), E(""), E("aa")]
- ),
- e(r"(?:|aa)(?:aaa)*")
- );
- assert_eq!(
- inexact(
- [E(""), I("aaa"), E("aa"), I("aaaaa")],
- [E(""), I("aaa"), E("aa")]
- ),
- e(r"(?:|aa)(?:aaa)*?")
- );
-
- assert_eq!(
- inexact([E("a"), I("b"), E("")], [E("a"), I("b"), E("")]),
- e(r"a|b*")
- );
- assert_eq!(inexact([E("a"), I("b")], [E("a"), I("b")]), e(r"a|b+"));
-
- assert_eq!(
- inexact([I("a"), E("b"), E("c")], [I("ab"), E("b"), E("c")]),
- e(r"a*b|c")
- );
-
- assert_eq!(
- inexact(
- [E("a"), E("b"), I("c"), E("")],
- [E("a"), E("b"), I("c"), E("")]
- ),
- e(r"a|(?:b|c*)")
- );
-
- assert_eq!(
- inexact(
- [I("a"), I("b"), E("c"), I("a"), I("ab"), E("c")],
- [I("ac"), I("bc"), E("c"), I("ac"), I("abc"), E("c")],
- ),
- e(r"(a|b)*c|(a|ab)*c")
- );
-
- assert_eq!(
- exact(["abef", "abgh", "cdef", "cdgh"]),
- e(r"(ab|cd)(ef|gh)")
- );
- assert_eq!(
- exact([
- "abefij", "abefkl", "abghij", "abghkl", "cdefij", "cdefkl",
- "cdghij", "cdghkl",
- ]),
- e(r"(ab|cd)(ef|gh)(ij|kl)")
- );
-
- assert_eq!(inexact([E("abab")], [E("abab")]), e(r"(ab){2}"));
-
- assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,3}"));
-
- assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,}"));
- }
-
- #[test]
- fn impossible() {
- let empty: [&str; 0] = [];
-
- assert_eq!(exact(empty), e(r"[a&&b]"));
- assert_eq!(exact(empty), e(r"a[a&&b]"));
- assert_eq!(exact(empty), e(r"[a&&b]b"));
- assert_eq!(exact(empty), e(r"a[a&&b]b"));
- assert_eq!(exact(["a", "b"]), e(r"a|[a&&b]|b"));
- assert_eq!(exact(["a", "b"]), e(r"a|c[a&&b]|b"));
- assert_eq!(exact(["a", "b"]), e(r"a|[a&&b]d|b"));
- assert_eq!(exact(["a", "b"]), e(r"a|c[a&&b]d|b"));
- assert_eq!(exact([""]), e(r"[a&&b]*"));
- assert_eq!(exact(["MN"]), e(r"M[a&&b]*N"));
- }
-
- // This tests patterns that contain something that defeats literal
- // detection, usually because it would blow some limit on the total number
- // of literals that can be returned.
- //
- // The main idea is that when literal extraction sees something that
- // it knows will blow a limit, it replaces it with a marker that says
- // "any literal will match here." While not necessarily true, the
- // over-estimation is just fine for the purposes of literal extraction,
- // because the imprecision doesn't matter: too big is too big.
- //
- // This is one of the trickier parts of literal extraction, since we need
- // to make sure all of our literal extraction operations correctly compose
- // with the markers.
- #[test]
- fn anything() {
- assert_eq!(infinite(), e(r"."));
- assert_eq!(infinite(), e(r"(?s)."));
- assert_eq!(infinite(), e(r"[A-Za-z]"));
- assert_eq!(infinite(), e(r"[A-Z]"));
- assert_eq!(exact([""]), e(r"[A-Z]{0}"));
- assert_eq!(infinite(), e(r"[A-Z]?"));
- assert_eq!(infinite(), e(r"[A-Z]*"));
- assert_eq!(infinite(), e(r"[A-Z]+"));
- assert_eq!((seq([I("1")]), Seq::infinite()), e(r"1[A-Z]"));
- assert_eq!((seq([I("1")]), seq([I("2")])), e(r"1[A-Z]2"));
- assert_eq!((Seq::infinite(), seq([I("123")])), e(r"[A-Z]+123"));
- assert_eq!(infinite(), e(r"[A-Z]+123[A-Z]+"));
- assert_eq!(infinite(), e(r"1|[A-Z]|3"));
- assert_eq!(
- (seq([E("1"), I("2"), E("3")]), Seq::infinite()),
- e(r"1|2[A-Z]|3"),
- );
- assert_eq!(
- (Seq::infinite(), seq([E("1"), I("2"), E("3")])),
- e(r"1|[A-Z]2|3"),
- );
- assert_eq!(
- (seq([E("1"), I("2"), E("4")]), seq([E("1"), I("3"), E("4")])),
- e(r"1|2[A-Z]3|4"),
- );
- assert_eq!((Seq::infinite(), seq([I("2")])), e(r"(?:|1)[A-Z]2"));
- assert_eq!(inexact([I("a")], [I("z")]), e(r"a.z"));
- }
-
- // Like the 'anything' test, but it uses smaller limits in order to test
- // the logic for effectively aborting literal extraction when the seqs get
- // too big.
- #[test]
- fn anything_small_limits() {
- fn prefixes(pattern: &str) -> Seq {
- Extractor::new()
- .kind(ExtractKind::Prefix)
- .limit_total(10)
- .extract(&parse(pattern))
- }
-
- fn suffixes(pattern: &str) -> Seq {
- Extractor::new()
- .kind(ExtractKind::Suffix)
- .limit_total(10)
- .extract(&parse(pattern))
- }
-
- fn e(pattern: &str) -> (Seq, Seq) {
- (prefixes(pattern), suffixes(pattern))
- }
-
- assert_eq!(
- (
- seq([
- I("aaa"),
- I("aab"),
- I("aba"),
- I("abb"),
- I("baa"),
- I("bab"),
- I("bba"),
- I("bbb")
- ]),
- seq([
- I("aaa"),
- I("aab"),
- I("aba"),
- I("abb"),
- I("baa"),
- I("bab"),
- I("bba"),
- I("bbb")
- ])
- ),
- e(r"[ab]{3}{3}")
- );
-
- assert_eq!(infinite(), e(r"ab|cd|ef|gh|ij|kl|mn|op|qr|st|uv|wx|yz"));
- }
-
- #[test]
- fn empty() {
- assert_eq!(exact([""]), e(r""));
- assert_eq!(exact([""]), e(r"^"));
- assert_eq!(exact([""]), e(r"$"));
- assert_eq!(exact([""]), e(r"(?m:^)"));
- assert_eq!(exact([""]), e(r"(?m:$)"));
- assert_eq!(exact([""]), e(r"\b"));
- assert_eq!(exact([""]), e(r"\B"));
- assert_eq!(exact([""]), e(r"(?-u:\b)"));
- assert_eq!(exact([""]), e(r"(?-u:\B)"));
- }
-
- #[test]
- fn odds_and_ends() {
- assert_eq!((Seq::infinite(), seq([I("a")])), e(r".a"));
- assert_eq!((seq([I("a")]), Seq::infinite()), e(r"a."));
- assert_eq!(infinite(), e(r"a|."));
- assert_eq!(infinite(), e(r".|a"));
-
- let pat = r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]";
- let expected = inexact(
- ["Mo'am", "Moam", "Mu'am", "Muam"].map(I),
- [
- "ddafi", "ddafy", "dhafi", "dhafy", "dzafi", "dzafy", "dafi",
- "dafy", "tdafi", "tdafy", "thafi", "thafy", "tzafi", "tzafy",
- "tafi", "tafy", "zdafi", "zdafy", "zhafi", "zhafy", "zzafi",
- "zzafy", "zafi", "zafy",
- ]
- .map(I),
- );
- assert_eq!(expected, e(pat));
-
- assert_eq!(
- (seq(["fn is_", "fn as_"].map(I)), Seq::infinite()),
- e(r"fn is_([A-Z]+)|fn as_([A-Z]+)"),
- );
- assert_eq!(
- inexact([I("foo")], [I("quux")]),
- e(r"foo[A-Z]+bar[A-Z]+quux")
- );
- assert_eq!(infinite(), e(r"[A-Z]+bar[A-Z]+"));
- assert_eq!(
- exact(["Sherlock Holmes"]),
- e(r"(?m)^Sherlock Holmes|Sherlock Holmes$")
- );
-
- assert_eq!(exact(["sa", "sb"]), e(r"\bs(?:[ab])"));
- }
-
- // This tests a specific regex along with some heuristic steps to reduce
- // the sequences extracted. This is meant to roughly correspond to the
- // types of heuristics used to shrink literal sets in practice. (Shrinking
- // is done because you want to balance "spend too much work looking for
- // too many literals" and "spend too much work processing false positive
- // matches from short literals.")
- #[test]
- #[cfg(feature = "unicode-case")]
- fn holmes() {
- let expected = inexact(
- ["HOL", "HOl", "HoL", "Hol", "hOL", "hOl", "hoL", "hol"].map(I),
- [
- "MES", "MEs", "Eſ", "MeS", "Mes", "eſ", "mES", "mEs", "meS",
- "mes",
- ]
- .map(I),
- );
- let (mut prefixes, mut suffixes) = e(r"(?i)Holmes");
- prefixes.keep_first_bytes(3);
- suffixes.keep_last_bytes(3);
- prefixes.minimize_by_preference();
- suffixes.minimize_by_preference();
- assert_eq!(expected, (prefixes, suffixes));
- }
-
- // This tests that we get some kind of literals extracted for a beefier
- // alternation with case insensitive mode enabled. At one point during
- // development, this returned nothing, and motivated some special case
- // code in Extractor::union to try and trim down the literal sequences
- // if the union would blow the limits set.
- #[test]
- #[cfg(feature = "unicode-case")]
- fn holmes_alt() {
- let mut pre =
- prefixes(r"(?i)Sherlock|Holmes|Watson|Irene|Adler|John|Baker");
- assert!(pre.len().unwrap() > 0);
- pre.optimize_for_prefix_by_preference();
- assert!(pre.len().unwrap() > 0);
- }
-
- // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
- // See: CVE-2022-24713
- //
- // We test this here to ensure literal extraction completes in reasonable
- // time and isn't materially impacted by these sorts of pathological
- // repeats.
- #[test]
- fn crazy_repeats() {
- assert_eq!(inexact([E("")], [E("")]), e(r"(?:){4294967295}"));
- assert_eq!(
- inexact([E("")], [E("")]),
- e(r"(?:){64}{64}{64}{64}{64}{64}")
- );
- assert_eq!(inexact([E("")], [E("")]), e(r"x{0}{4294967295}"));
- assert_eq!(inexact([E("")], [E("")]), e(r"(?:|){4294967295}"));
-
- assert_eq!(
- inexact([E("")], [E("")]),
- e(r"(?:){8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}")
- );
- let repa = "a".repeat(100);
- assert_eq!(
- inexact([I(&repa)], [I(&repa)]),
- e(r"a{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}")
- );
- }
-
- #[test]
- fn huge() {
- let pat = r#"(?-u)
- 2(?:
- [45]\d{3}|
- 7(?:
- 1[0-267]|
- 2[0-289]|
- 3[0-29]|
- 4[01]|
- 5[1-3]|
- 6[013]|
- 7[0178]|
- 91
- )|
- 8(?:
- 0[125]|
- [139][1-6]|
- 2[0157-9]|
- 41|
- 6[1-35]|
- 7[1-5]|
- 8[1-8]|
- 90
- )|
- 9(?:
- 0[0-2]|
- 1[0-4]|
- 2[568]|
- 3[3-6]|
- 5[5-7]|
- 6[0167]|
- 7[15]|
- 8[0146-9]
- )
- )\d{4}|
- 3(?:
- 12?[5-7]\d{2}|
- 0(?:
- 2(?:
- [025-79]\d|
- [348]\d{1,2}
- )|
- 3(?:
- [2-4]\d|
- [56]\d?
- )
- )|
- 2(?:
- 1\d{2}|
- 2(?:
- [12]\d|
- [35]\d{1,2}|
- 4\d?
- )
- )|
- 3(?:
- 1\d{2}|
- 2(?:
- [2356]\d|
- 4\d{1,2}
- )
- )|
- 4(?:
- 1\d{2}|
- 2(?:
- 2\d{1,2}|
- [47]|
- 5\d{2}
- )
- )|
- 5(?:
- 1\d{2}|
- 29
- )|
- [67]1\d{2}|
- 8(?:
- 1\d{2}|
- 2(?:
- 2\d{2}|
- 3|
- 4\d
- )
- )
- )\d{3}|
- 4(?:
- 0(?:
- 2(?:
- [09]\d|
- 7
- )|
- 33\d{2}
- )|
- 1\d{3}|
- 2(?:
- 1\d{2}|
- 2(?:
- [25]\d?|
- [348]\d|
- [67]\d{1,2}
- )
- )|
- 3(?:
- 1\d{2}(?:
- \d{2}
- )?|
- 2(?:
- [045]\d|
- [236-9]\d{1,2}
- )|
- 32\d{2}
- )|
- 4(?:
- [18]\d{2}|
- 2(?:
- [2-46]\d{2}|
- 3
- )|
- 5[25]\d{2}
- )|
- 5(?:
- 1\d{2}|
- 2(?:
- 3\d|
- 5
- )
- )|
- 6(?:
- [18]\d{2}|
- 2(?:
- 3(?:
- \d{2}
- )?|
- [46]\d{1,2}|
- 5\d{2}|
- 7\d
- )|
- 5(?:
- 3\d?|
- 4\d|
- [57]\d{1,2}|
- 6\d{2}|
- 8
- )
- )|
- 71\d{2}|
- 8(?:
- [18]\d{2}|
- 23\d{2}|
- 54\d{2}
- )|
- 9(?:
- [18]\d{2}|
- 2[2-5]\d{2}|
- 53\d{1,2}
- )
- )\d{3}|
- 5(?:
- 02[03489]\d{2}|
- 1\d{2}|
- 2(?:
- 1\d{2}|
- 2(?:
- 2(?:
- \d{2}
- )?|
- [457]\d{2}
- )
- )|
- 3(?:
- 1\d{2}|
- 2(?:
- [37](?:
- \d{2}
- )?|
- [569]\d{2}
- )
- )|
- 4(?:
- 1\d{2}|
- 2[46]\d{2}
- )|
- 5(?:
- 1\d{2}|
- 26\d{1,2}
- )|
- 6(?:
- [18]\d{2}|
- 2|
- 53\d{2}
- )|
- 7(?:
- 1|
- 24
- )\d{2}|
- 8(?:
- 1|
- 26
- )\d{2}|
- 91\d{2}
- )\d{3}|
- 6(?:
- 0(?:
- 1\d{2}|
- 2(?:
- 3\d{2}|
- 4\d{1,2}
- )
- )|
- 2(?:
- 2[2-5]\d{2}|
- 5(?:
- [3-5]\d{2}|
- 7
- )|
- 8\d{2}
- )|
- 3(?:
- 1|
- 2[3478]
- )\d{2}|
- 4(?:
- 1|
- 2[34]
- )\d{2}|
- 5(?:
- 1|
- 2[47]
- )\d{2}|
- 6(?:
- [18]\d{2}|
- 6(?:
- 2(?:
- 2\d|
- [34]\d{2}
- )|
- 5(?:
- [24]\d{2}|
- 3\d|
- 5\d{1,2}
- )
- )
- )|
- 72[2-5]\d{2}|
- 8(?:
- 1\d{2}|
- 2[2-5]\d{2}
- )|
- 9(?:
- 1\d{2}|
- 2[2-6]\d{2}
- )
- )\d{3}|
- 7(?:
- (?:
- 02|
- [3-589]1|
- 6[12]|
- 72[24]
- )\d{2}|
- 21\d{3}|
- 32
- )\d{3}|
- 8(?:
- (?:
- 4[12]|
- [5-7]2|
- 1\d?
- )|
- (?:
- 0|
- 3[12]|
- [5-7]1|
- 217
- )\d
- )\d{4}|
- 9(?:
- [35]1|
- (?:
- [024]2|
- 81
- )\d|
- (?:
- 1|
- [24]1
- )\d{2}
- )\d{3}
- "#;
- // TODO: This is a good candidate of a seq of literals that could be
- // shrunk quite a bit and still be very productive with respect to
- // literal optimizations.
- let (prefixes, suffixes) = e(pat);
- assert!(!suffixes.is_finite());
- assert_eq!(Some(243), prefixes.len());
- }
-
- #[test]
- fn optimize() {
- // This gets a common prefix that isn't too short.
- let (p, s) =
- opt(["foobarfoobar", "foobar", "foobarzfoobar", "foobarfoobar"]);
- assert_eq!(seq([I("foobar")]), p);
- assert_eq!(seq([I("foobar")]), s);
-
- // This also finds a common prefix, but since it's only one byte, it
- // prefers the multiple literals.
- let (p, s) = opt(["abba", "akka", "abccba"]);
- assert_eq!(exact(["abba", "akka", "abccba"]), (p, s));
-
- let (p, s) = opt(["sam", "samwise"]);
- assert_eq!((seq([E("sam")]), seq([E("sam"), E("samwise")])), (p, s));
-
- // The empty string is poisonous, so our seq becomes infinite, even
- // though all literals are exact.
- let (p, s) = opt(["foobarfoo", "foo", "", "foozfoo", "foofoo"]);
- assert!(!p.is_finite());
- assert!(!s.is_finite());
-
- // A space is also poisonous, so our seq becomes infinite. But this
- // only gets triggered when we don't have a completely exact sequence.
- // When the sequence is exact, spaces are okay, since we presume that
- // any prefilter will match a space more quickly than the regex engine.
- // (When the sequence is exact, there's a chance of the prefilter being
- // used without needing the regex engine at all.)
- let mut p = seq([E("foobarfoo"), I("foo"), E(" "), E("foofoo")]);
- p.optimize_for_prefix_by_preference();
- assert!(!p.is_finite());
- }
-}
diff --git a/vendor/regex-syntax/src/hir/mod.rs b/vendor/regex-syntax/src/hir/mod.rs
deleted file mode 100644
index 5db78438..00000000
--- a/vendor/regex-syntax/src/hir/mod.rs
+++ /dev/null
@@ -1,3873 +0,0 @@
-/*!
-Defines a high-level intermediate (HIR) representation for regular expressions.
-
-The HIR is represented by the [`Hir`] type, and it principally constructed via
-[translation](translate) from an [`Ast`](crate::ast::Ast). Alternatively, users
-may use the smart constructors defined on `Hir` to build their own by hand. The
-smart constructors simultaneously simplify and "optimize" the HIR, and are also
-the same routines used by translation.
-
-Most regex engines only have an HIR like this, and usually construct it
-directly from the concrete syntax. This crate however first parses the
-concrete syntax into an `Ast`, and only then creates the HIR from the `Ast`,
-as mentioned above. It's done this way to facilitate better error reporting,
-and to have a structured representation of a regex that faithfully represents
-its concrete syntax. Namely, while an `Hir` value can be converted back to an
-equivalent regex pattern string, it is unlikely to look like the original due
-to its simplified structure.
-*/
-
-use core::{char, cmp};
-
-use alloc::{
- boxed::Box,
- format,
- string::{String, ToString},
- vec,
- vec::Vec,
-};
-
-use crate::{
- ast::Span,
- hir::interval::{Interval, IntervalSet, IntervalSetIter},
- unicode,
-};
-
-pub use crate::{
- hir::visitor::{visit, Visitor},
- unicode::CaseFoldError,
-};
-
-mod interval;
-pub mod literal;
-pub mod print;
-pub mod translate;
-mod visitor;
-
-/// An error that can occur while translating an `Ast` to a `Hir`.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct Error {
- /// The kind of error.
- kind: ErrorKind,
- /// The original pattern that the translator's Ast was parsed from. Every
- /// span in an error is a valid range into this string.
- pattern: String,
- /// The span of this error, derived from the Ast given to the translator.
- span: Span,
-}
-
-impl Error {
- /// Return the type of this error.
- pub fn kind(&self) -> &ErrorKind {
- &self.kind
- }
-
- /// The original pattern string in which this error occurred.
- ///
- /// Every span reported by this error is reported in terms of this string.
- pub fn pattern(&self) -> &str {
- &self.pattern
- }
-
- /// Return the span at which this error occurred.
- pub fn span(&self) -> &Span {
- &self.span
- }
-}
-
-/// The type of an error that occurred while building an `Hir`.
-///
-/// This error type is marked as `non_exhaustive`. This means that adding a
-/// new variant is not considered a breaking change.
-#[non_exhaustive]
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum ErrorKind {
- /// This error occurs when a Unicode feature is used when Unicode
- /// support is disabled. For example `(?-u:\pL)` would trigger this error.
- UnicodeNotAllowed,
- /// This error occurs when translating a pattern that could match a byte
- /// sequence that isn't UTF-8 and `utf8` was enabled.
- InvalidUtf8,
- /// This error occurs when one uses a non-ASCII byte for a line terminator,
- /// but where Unicode mode is enabled and UTF-8 mode is disabled.
- InvalidLineTerminator,
- /// This occurs when an unrecognized Unicode property name could not
- /// be found.
- UnicodePropertyNotFound,
- /// This occurs when an unrecognized Unicode property value could not
- /// be found.
- UnicodePropertyValueNotFound,
- /// This occurs when a Unicode-aware Perl character class (`\w`, `\s` or
- /// `\d`) could not be found. This can occur when the `unicode-perl`
- /// crate feature is not enabled.
- UnicodePerlClassNotFound,
- /// This occurs when the Unicode simple case mapping tables are not
- /// available, and the regular expression required Unicode aware case
- /// insensitivity.
- UnicodeCaseUnavailable,
-}
-
-#[cfg(feature = "std")]
-impl std::error::Error for Error {}
-
-impl core::fmt::Display for Error {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- crate::error::Formatter::from(self).fmt(f)
- }
-}
-
-impl core::fmt::Display for ErrorKind {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- use self::ErrorKind::*;
-
- let msg = match *self {
- UnicodeNotAllowed => "Unicode not allowed here",
- InvalidUtf8 => "pattern can match invalid UTF-8",
- InvalidLineTerminator => "invalid line terminator, must be ASCII",
- UnicodePropertyNotFound => "Unicode property not found",
- UnicodePropertyValueNotFound => "Unicode property value not found",
- UnicodePerlClassNotFound => {
- "Unicode-aware Perl class not found \
- (make sure the unicode-perl feature is enabled)"
- }
- UnicodeCaseUnavailable => {
- "Unicode-aware case insensitivity matching is not available \
- (make sure the unicode-case feature is enabled)"
- }
- };
- f.write_str(msg)
- }
-}
-
-/// A high-level intermediate representation (HIR) for a regular expression.
-///
-/// An HIR value is a combination of a [`HirKind`] and a set of [`Properties`].
-/// An `HirKind` indicates what kind of regular expression it is (a literal,
-/// a repetition, a look-around assertion, etc.), where as a `Properties`
-/// describes various facts about the regular expression. For example, whether
-/// it matches UTF-8 or if it matches the empty string.
-///
-/// The HIR of a regular expression represents an intermediate step between
-/// its abstract syntax (a structured description of the concrete syntax) and
-/// an actual regex matcher. The purpose of HIR is to make regular expressions
-/// easier to analyze. In particular, the AST is much more complex than the
-/// HIR. For example, while an AST supports arbitrarily nested character
-/// classes, the HIR will flatten all nested classes into a single set. The HIR
-/// will also "compile away" every flag present in the concrete syntax. For
-/// example, users of HIR expressions never need to worry about case folding;
-/// it is handled automatically by the translator (e.g., by translating
-/// `(?i:A)` to `[aA]`).
-///
-/// The specific type of an HIR expression can be accessed via its `kind`
-/// or `into_kind` methods. This extra level of indirection exists for two
-/// reasons:
-///
-/// 1. Construction of an HIR expression *must* use the constructor methods on
-/// this `Hir` type instead of building the `HirKind` values directly. This
-/// permits construction to enforce invariants like "concatenations always
-/// consist of two or more sub-expressions."
-/// 2. Every HIR expression contains attributes that are defined inductively,
-/// and can be computed cheaply during the construction process. For example,
-/// one such attribute is whether the expression must match at the beginning of
-/// the haystack.
-///
-/// In particular, if you have an `HirKind` value, then there is intentionally
-/// no way to build an `Hir` value from it. You instead need to do case
-/// analysis on the `HirKind` value and build the `Hir` value using its smart
-/// constructors.
-///
-/// # UTF-8
-///
-/// If the HIR was produced by a translator with
-/// [`TranslatorBuilder::utf8`](translate::TranslatorBuilder::utf8) enabled,
-/// then the HIR is guaranteed to match UTF-8 exclusively for all non-empty
-/// matches.
-///
-/// For empty matches, those can occur at any position. It is the
-/// responsibility of the regex engine to determine whether empty matches are
-/// permitted between the code units of a single codepoint.
-///
-/// # Stack space
-///
-/// This type defines its own destructor that uses constant stack space and
-/// heap space proportional to the size of the HIR.
-///
-/// Also, an `Hir`'s `fmt::Display` implementation prints an HIR as a regular
-/// expression pattern string, and uses constant stack space and heap space
-/// proportional to the size of the `Hir`. The regex it prints is guaranteed to
-/// be _semantically_ equivalent to the original concrete syntax, but it may
-/// look very different. (And potentially not practically readable by a human.)
-///
-/// An `Hir`'s `fmt::Debug` implementation currently does not use constant
-/// stack space. The implementation will also suppress some details (such as
-/// the `Properties` inlined into every `Hir` value to make it less noisy).
-#[derive(Clone, Eq, PartialEq)]
-pub struct Hir {
- /// The underlying HIR kind.
- kind: HirKind,
- /// Analysis info about this HIR, computed during construction.
- props: Properties,
-}
-
-/// Methods for accessing the underlying `HirKind` and `Properties`.
-impl Hir {
- /// Returns a reference to the underlying HIR kind.
- pub fn kind(&self) -> &HirKind {
- &self.kind
- }
-
- /// Consumes ownership of this HIR expression and returns its underlying
- /// `HirKind`.
- pub fn into_kind(mut self) -> HirKind {
- core::mem::replace(&mut self.kind, HirKind::Empty)
- }
-
- /// Returns the properties computed for this `Hir`.
- pub fn properties(&self) -> &Properties {
- &self.props
- }
-
- /// Splits this HIR into its constituent parts.
- ///
- /// This is useful because `let Hir { kind, props } = hir;` does not work
- /// because of `Hir`'s custom `Drop` implementation.
- fn into_parts(mut self) -> (HirKind, Properties) {
- (
- core::mem::replace(&mut self.kind, HirKind::Empty),
- core::mem::replace(&mut self.props, Properties::empty()),
- )
- }
-}
-
-/// Smart constructors for HIR values.
-///
-/// These constructors are called "smart" because they do inductive work or
-/// simplifications. For example, calling `Hir::repetition` with a repetition
-/// like `a{0}` will actually return a `Hir` with a `HirKind::Empty` kind
-/// since it is equivalent to an empty regex. Another example is calling
-/// `Hir::concat(vec![expr])`. Instead of getting a `HirKind::Concat`, you'll
-/// just get back the original `expr` since it's precisely equivalent.
-///
-/// Smart constructors enable maintaining invariants about the HIR data type
-/// while also simulanteously keeping the representation as simple as possible.
-impl Hir {
- /// Returns an empty HIR expression.
- ///
- /// An empty HIR expression always matches, including the empty string.
- #[inline]
- pub fn empty() -> Hir {
- let props = Properties::empty();
- Hir { kind: HirKind::Empty, props }
- }
-
- /// Returns an HIR expression that can never match anything. That is,
- /// the size of the set of strings in the language described by the HIR
- /// returned is `0`.
- ///
- /// This is distinct from [`Hir::empty`] in that the empty string matches
- /// the HIR returned by `Hir::empty`. That is, the set of strings in the
- /// language describe described by `Hir::empty` is non-empty.
- ///
- /// Note that currently, the HIR returned uses an empty character class to
- /// indicate that nothing can match. An equivalent expression that cannot
- /// match is an empty alternation, but all such "fail" expressions are
- /// normalized (via smart constructors) to empty character classes. This is
- /// because empty character classes can be spelled in the concrete syntax
- /// of a regex (e.g., `\P{any}` or `(?-u:[^\x00-\xFF])` or `[a&&b]`), but
- /// empty alternations cannot.
- #[inline]
- pub fn fail() -> Hir {
- let class = Class::Bytes(ClassBytes::empty());
- let props = Properties::class(&class);
- // We can't just call Hir::class here because it defers to Hir::fail
- // in order to canonicalize the Hir value used to represent "cannot
- // match."
- Hir { kind: HirKind::Class(class), props }
- }
-
- /// Creates a literal HIR expression.
- ///
- /// This accepts anything that can be converted into a `Box<[u8]>`.
- ///
- /// Note that there is no mechanism for storing a `char` or a `Box<str>`
- /// in an HIR. Everything is "just bytes." Whether a `Literal` (or
- /// any HIR node) matches valid UTF-8 exclusively can be queried via
- /// [`Properties::is_utf8`].
- ///
- /// # Example
- ///
- /// This example shows that concatenations of `Literal` HIR values will
- /// automatically get flattened and combined together. So for example, even
- /// if you concat multiple `Literal` values that are themselves not valid
- /// UTF-8, they might add up to valid UTF-8. This also demonstrates just
- /// how "smart" Hir's smart constructors are.
- ///
- /// ```
- /// use regex_syntax::hir::{Hir, HirKind, Literal};
- ///
- /// let literals = vec![
- /// Hir::literal([0xE2]),
- /// Hir::literal([0x98]),
- /// Hir::literal([0x83]),
- /// ];
- /// // Each literal, on its own, is invalid UTF-8.
- /// assert!(literals.iter().all(|hir| !hir.properties().is_utf8()));
- ///
- /// let concat = Hir::concat(literals);
- /// // But the concatenation is valid UTF-8!
- /// assert!(concat.properties().is_utf8());
- ///
- /// // And also notice that the literals have been concatenated into a
- /// // single `Literal`, to the point where there is no explicit `Concat`!
- /// let expected = HirKind::Literal(Literal(Box::from("☃".as_bytes())));
- /// assert_eq!(&expected, concat.kind());
- /// ```
- ///
- /// # Example: building a literal from a `char`
- ///
- /// This example shows how to build a single `Hir` literal from a `char`
- /// value. Since a [`Literal`] is just bytes, we just need to UTF-8
- /// encode a `char` value:
- ///
- /// ```
- /// use regex_syntax::hir::{Hir, HirKind, Literal};
- ///
- /// let ch = '☃';
- /// let got = Hir::literal(ch.encode_utf8(&mut [0; 4]).as_bytes());
- ///
- /// let expected = HirKind::Literal(Literal(Box::from("☃".as_bytes())));
- /// assert_eq!(&expected, got.kind());
- /// ```
- #[inline]
- pub fn literal<B: Into<Box<[u8]>>>(lit: B) -> Hir {
- let bytes = lit.into();
- if bytes.is_empty() {
- return Hir::empty();
- }
-
- let lit = Literal(bytes);
- let props = Properties::literal(&lit);
- Hir { kind: HirKind::Literal(lit), props }
- }
-
- /// Creates a class HIR expression. The class may either be defined over
- /// ranges of Unicode codepoints or ranges of raw byte values.
- ///
- /// Note that an empty class is permitted. An empty class is equivalent to
- /// `Hir::fail()`.
- #[inline]
- pub fn class(class: Class) -> Hir {
- if class.is_empty() {
- return Hir::fail();
- } else if let Some(bytes) = class.literal() {
- return Hir::literal(bytes);
- }
- let props = Properties::class(&class);
- Hir { kind: HirKind::Class(class), props }
- }
-
- /// Creates a look-around assertion HIR expression.
- #[inline]
- pub fn look(look: Look) -> Hir {
- let props = Properties::look(look);
- Hir { kind: HirKind::Look(look), props }
- }
-
- /// Creates a repetition HIR expression.
- #[inline]
- pub fn repetition(mut rep: Repetition) -> Hir {
- // If the sub-expression of a repetition can only match the empty
- // string, then we force its maximum to be at most 1.
- if rep.sub.properties().maximum_len() == Some(0) {
- rep.min = cmp::min(rep.min, 1);
- rep.max = rep.max.map(|n| cmp::min(n, 1)).or(Some(1));
- }
- // The regex 'a{0}' is always equivalent to the empty regex. This is
- // true even when 'a' is an expression that never matches anything
- // (like '\P{any}').
- //
- // Additionally, the regex 'a{1}' is always equivalent to 'a'.
- if rep.min == 0 && rep.max == Some(0) {
- return Hir::empty();
- } else if rep.min == 1 && rep.max == Some(1) {
- return *rep.sub;
- }
- let props = Properties::repetition(&rep);
- Hir { kind: HirKind::Repetition(rep), props }
- }
-
- /// Creates a capture HIR expression.
- ///
- /// Note that there is no explicit HIR value for a non-capturing group.
- /// Since a non-capturing group only exists to override precedence in the
- /// concrete syntax and since an HIR already does its own grouping based on
- /// what is parsed, there is no need to explicitly represent non-capturing
- /// groups in the HIR.
- #[inline]
- pub fn capture(capture: Capture) -> Hir {
- let props = Properties::capture(&capture);
- Hir { kind: HirKind::Capture(capture), props }
- }
-
- /// Returns the concatenation of the given expressions.
- ///
- /// This attempts to flatten and simplify the concatenation as appropriate.
- ///
- /// # Example
- ///
- /// This shows a simple example of basic flattening of both concatenations
- /// and literals.
- ///
- /// ```
- /// use regex_syntax::hir::Hir;
- ///
- /// let hir = Hir::concat(vec![
- /// Hir::concat(vec![
- /// Hir::literal([b'a']),
- /// Hir::literal([b'b']),
- /// Hir::literal([b'c']),
- /// ]),
- /// Hir::concat(vec![
- /// Hir::literal([b'x']),
- /// Hir::literal([b'y']),
- /// Hir::literal([b'z']),
- /// ]),
- /// ]);
- /// let expected = Hir::literal("abcxyz".as_bytes());
- /// assert_eq!(expected, hir);
- /// ```
- pub fn concat(subs: Vec<Hir>) -> Hir {
- // We rebuild the concatenation by simplifying it. Would be nice to do
- // it in place, but that seems a little tricky?
- let mut new = vec![];
- // This gobbles up any adjacent literals in a concatenation and smushes
- // them together. Basically, when we see a literal, we add its bytes
- // to 'prior_lit', and whenever we see anything else, we first take
- // any bytes in 'prior_lit' and add it to the 'new' concatenation.
- let mut prior_lit: Option<Vec<u8>> = None;
- for sub in subs {
- let (kind, props) = sub.into_parts();
- match kind {
- HirKind::Literal(Literal(bytes)) => {
- if let Some(ref mut prior_bytes) = prior_lit {
- prior_bytes.extend_from_slice(&bytes);
- } else {
- prior_lit = Some(bytes.to_vec());
- }
- }
- // We also flatten concats that are direct children of another
- // concat. We only need to do this one level deep since
- // Hir::concat is the only way to build concatenations, and so
- // flattening happens inductively.
- HirKind::Concat(subs2) => {
- for sub2 in subs2 {
- let (kind2, props2) = sub2.into_parts();
- match kind2 {
- HirKind::Literal(Literal(bytes)) => {
- if let Some(ref mut prior_bytes) = prior_lit {
- prior_bytes.extend_from_slice(&bytes);
- } else {
- prior_lit = Some(bytes.to_vec());
- }
- }
- kind2 => {
- if let Some(prior_bytes) = prior_lit.take() {
- new.push(Hir::literal(prior_bytes));
- }
- new.push(Hir { kind: kind2, props: props2 });
- }
- }
- }
- }
- // We can just skip empty HIRs.
- HirKind::Empty => {}
- kind => {
- if let Some(prior_bytes) = prior_lit.take() {
- new.push(Hir::literal(prior_bytes));
- }
- new.push(Hir { kind, props });
- }
- }
- }
- if let Some(prior_bytes) = prior_lit.take() {
- new.push(Hir::literal(prior_bytes));
- }
- if new.is_empty() {
- return Hir::empty();
- } else if new.len() == 1 {
- return new.pop().unwrap();
- }
- let props = Properties::concat(&new);
- Hir { kind: HirKind::Concat(new), props }
- }
-
- /// Returns the alternation of the given expressions.
- ///
- /// This flattens and simplifies the alternation as appropriate. This may
- /// include factoring out common prefixes or even rewriting the alternation
- /// as a character class.
- ///
- /// Note that an empty alternation is equivalent to `Hir::fail()`. (It
- /// is not possible for one to write an empty alternation, or even an
- /// alternation with a single sub-expression, in the concrete syntax of a
- /// regex.)
- ///
- /// # Example
- ///
- /// This is a simple example showing how an alternation might get
- /// simplified.
- ///
- /// ```
- /// use regex_syntax::hir::{Hir, Class, ClassUnicode, ClassUnicodeRange};
- ///
- /// let hir = Hir::alternation(vec![
- /// Hir::literal([b'a']),
- /// Hir::literal([b'b']),
- /// Hir::literal([b'c']),
- /// Hir::literal([b'd']),
- /// Hir::literal([b'e']),
- /// Hir::literal([b'f']),
- /// ]);
- /// let expected = Hir::class(Class::Unicode(ClassUnicode::new([
- /// ClassUnicodeRange::new('a', 'f'),
- /// ])));
- /// assert_eq!(expected, hir);
- /// ```
- ///
- /// And another example showing how common prefixes might get factored
- /// out.
- ///
- /// ```
- /// use regex_syntax::hir::{Hir, Class, ClassUnicode, ClassUnicodeRange};
- ///
- /// let hir = Hir::alternation(vec![
- /// Hir::concat(vec![
- /// Hir::literal("abc".as_bytes()),
- /// Hir::class(Class::Unicode(ClassUnicode::new([
- /// ClassUnicodeRange::new('A', 'Z'),
- /// ]))),
- /// ]),
- /// Hir::concat(vec![
- /// Hir::literal("abc".as_bytes()),
- /// Hir::class(Class::Unicode(ClassUnicode::new([
- /// ClassUnicodeRange::new('a', 'z'),
- /// ]))),
- /// ]),
- /// ]);
- /// let expected = Hir::concat(vec![
- /// Hir::literal("abc".as_bytes()),
- /// Hir::alternation(vec![
- /// Hir::class(Class::Unicode(ClassUnicode::new([
- /// ClassUnicodeRange::new('A', 'Z'),
- /// ]))),
- /// Hir::class(Class::Unicode(ClassUnicode::new([
- /// ClassUnicodeRange::new('a', 'z'),
- /// ]))),
- /// ]),
- /// ]);
- /// assert_eq!(expected, hir);
- /// ```
- ///
- /// Note that these sorts of simplifications are not guaranteed.
- pub fn alternation(subs: Vec<Hir>) -> Hir {
- // We rebuild the alternation by simplifying it. We proceed similarly
- // as the concatenation case. But in this case, there's no literal
- // simplification happening. We're just flattening alternations.
- let mut new = Vec::with_capacity(subs.len());
- for sub in subs {
- let (kind, props) = sub.into_parts();
- match kind {
- HirKind::Alternation(subs2) => {
- new.extend(subs2);
- }
- kind => {
- new.push(Hir { kind, props });
- }
- }
- }
- if new.is_empty() {
- return Hir::fail();
- } else if new.len() == 1 {
- return new.pop().unwrap();
- }
- // Now that it's completely flattened, look for the special case of
- // 'char1|char2|...|charN' and collapse that into a class. Note that
- // we look for 'char' first and then bytes. The issue here is that if
- // we find both non-ASCII codepoints and non-ASCII singleton bytes,
- // then it isn't actually possible to smush them into a single class.
- // (Because classes are either "all codepoints" or "all bytes." You
- // can have a class that both matches non-ASCII but valid UTF-8 and
- // invalid UTF-8.) So we look for all chars and then all bytes, and
- // don't handle anything else.
- if let Some(singletons) = singleton_chars(&new) {
- let it = singletons
- .into_iter()
- .map(|ch| ClassUnicodeRange { start: ch, end: ch });
- return Hir::class(Class::Unicode(ClassUnicode::new(it)));
- }
- if let Some(singletons) = singleton_bytes(&new) {
- let it = singletons
- .into_iter()
- .map(|b| ClassBytesRange { start: b, end: b });
- return Hir::class(Class::Bytes(ClassBytes::new(it)));
- }
- // Similar to singleton chars, we can also look for alternations of
- // classes. Those can be smushed into a single class.
- if let Some(cls) = class_chars(&new) {
- return Hir::class(cls);
- }
- if let Some(cls) = class_bytes(&new) {
- return Hir::class(cls);
- }
- // Factor out a common prefix if we can, which might potentially
- // simplify the expression and unlock other optimizations downstream.
- // It also might generally make NFA matching and DFA construction
- // faster by reducing the scope of branching in the regex.
- new = match lift_common_prefix(new) {
- Ok(hir) => return hir,
- Err(unchanged) => unchanged,
- };
- let props = Properties::alternation(&new);
- Hir { kind: HirKind::Alternation(new), props }
- }
-
- /// Returns an HIR expression for `.`.
- ///
- /// * [`Dot::AnyChar`] maps to `(?su-R:.)`.
- /// * [`Dot::AnyByte`] maps to `(?s-Ru:.)`.
- /// * [`Dot::AnyCharExceptLF`] maps to `(?u-Rs:.)`.
- /// * [`Dot::AnyCharExceptCRLF`] maps to `(?Ru-s:.)`.
- /// * [`Dot::AnyByteExceptLF`] maps to `(?-Rsu:.)`.
- /// * [`Dot::AnyByteExceptCRLF`] maps to `(?R-su:.)`.
- ///
- /// # Example
- ///
- /// Note that this is a convenience routine for constructing the correct
- /// character class based on the value of `Dot`. There is no explicit "dot"
- /// HIR value. It is just an abbreviation for a common character class.
- ///
- /// ```
- /// use regex_syntax::hir::{Hir, Dot, Class, ClassBytes, ClassBytesRange};
- ///
- /// let hir = Hir::dot(Dot::AnyByte);
- /// let expected = Hir::class(Class::Bytes(ClassBytes::new([
- /// ClassBytesRange::new(0x00, 0xFF),
- /// ])));
- /// assert_eq!(expected, hir);
- /// ```
- #[inline]
- pub fn dot(dot: Dot) -> Hir {
- match dot {
- Dot::AnyChar => Hir::class(Class::Unicode(ClassUnicode::new([
- ClassUnicodeRange::new('\0', '\u{10FFFF}'),
- ]))),
- Dot::AnyByte => Hir::class(Class::Bytes(ClassBytes::new([
- ClassBytesRange::new(b'\0', b'\xFF'),
- ]))),
- Dot::AnyCharExcept(ch) => {
- let mut cls =
- ClassUnicode::new([ClassUnicodeRange::new(ch, ch)]);
- cls.negate();
- Hir::class(Class::Unicode(cls))
- }
- Dot::AnyCharExceptLF => {
- Hir::class(Class::Unicode(ClassUnicode::new([
- ClassUnicodeRange::new('\0', '\x09'),
- ClassUnicodeRange::new('\x0B', '\u{10FFFF}'),
- ])))
- }
- Dot::AnyCharExceptCRLF => {
- Hir::class(Class::Unicode(ClassUnicode::new([
- ClassUnicodeRange::new('\0', '\x09'),
- ClassUnicodeRange::new('\x0B', '\x0C'),
- ClassUnicodeRange::new('\x0E', '\u{10FFFF}'),
- ])))
- }
- Dot::AnyByteExcept(byte) => {
- let mut cls =
- ClassBytes::new([ClassBytesRange::new(byte, byte)]);
- cls.negate();
- Hir::class(Class::Bytes(cls))
- }
- Dot::AnyByteExceptLF => {
- Hir::class(Class::Bytes(ClassBytes::new([
- ClassBytesRange::new(b'\0', b'\x09'),
- ClassBytesRange::new(b'\x0B', b'\xFF'),
- ])))
- }
- Dot::AnyByteExceptCRLF => {
- Hir::class(Class::Bytes(ClassBytes::new([
- ClassBytesRange::new(b'\0', b'\x09'),
- ClassBytesRange::new(b'\x0B', b'\x0C'),
- ClassBytesRange::new(b'\x0E', b'\xFF'),
- ])))
- }
- }
- }
-}
-
-/// The underlying kind of an arbitrary [`Hir`] expression.
-///
-/// An `HirKind` is principally useful for doing case analysis on the type
-/// of a regular expression. If you're looking to build new `Hir` values,
-/// then you _must_ use the smart constructors defined on `Hir`, like
-/// [`Hir::repetition`], to build new `Hir` values. The API intentionally does
-/// not expose any way of building an `Hir` directly from an `HirKind`.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum HirKind {
- /// The empty regular expression, which matches everything, including the
- /// empty string.
- Empty,
- /// A literalstring that matches exactly these bytes.
- Literal(Literal),
- /// A single character class that matches any of the characters in the
- /// class. A class can either consist of Unicode scalar values as
- /// characters, or it can use bytes.
- ///
- /// A class may be empty. In which case, it matches nothing.
- Class(Class),
- /// A look-around assertion. A look-around match always has zero length.
- Look(Look),
- /// A repetition operation applied to a sub-expression.
- Repetition(Repetition),
- /// A capturing group, which contains a sub-expression.
- Capture(Capture),
- /// A concatenation of expressions.
- ///
- /// A concatenation matches only if each of its sub-expressions match one
- /// after the other.
- ///
- /// Concatenations are guaranteed by `Hir`'s smart constructors to always
- /// have at least two sub-expressions.
- Concat(Vec<Hir>),
- /// An alternation of expressions.
- ///
- /// An alternation matches only if at least one of its sub-expressions
- /// match. If multiple sub-expressions match, then the leftmost is
- /// preferred.
- ///
- /// Alternations are guaranteed by `Hir`'s smart constructors to always
- /// have at least two sub-expressions.
- Alternation(Vec<Hir>),
-}
-
-impl HirKind {
- /// Returns a slice of this kind's sub-expressions, if any.
- pub fn subs(&self) -> &[Hir] {
- use core::slice::from_ref;
-
- match *self {
- HirKind::Empty
- | HirKind::Literal(_)
- | HirKind::Class(_)
- | HirKind::Look(_) => &[],
- HirKind::Repetition(Repetition { ref sub, .. }) => from_ref(sub),
- HirKind::Capture(Capture { ref sub, .. }) => from_ref(sub),
- HirKind::Concat(ref subs) => subs,
- HirKind::Alternation(ref subs) => subs,
- }
- }
-}
-
-impl core::fmt::Debug for Hir {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- self.kind.fmt(f)
- }
-}
-
-/// Print a display representation of this Hir.
-///
-/// The result of this is a valid regular expression pattern string.
-///
-/// This implementation uses constant stack space and heap space proportional
-/// to the size of the `Hir`.
-impl core::fmt::Display for Hir {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- crate::hir::print::Printer::new().print(self, f)
- }
-}
-
-/// The high-level intermediate representation of a literal.
-///
-/// A literal corresponds to `0` or more bytes that should be matched
-/// literally. The smart constructors defined on `Hir` will automatically
-/// concatenate adjacent literals into one literal, and will even automatically
-/// replace empty literals with `Hir::empty()`.
-///
-/// Note that despite a literal being represented by a sequence of bytes, its
-/// `Debug` implementation will attempt to print it as a normal string. (That
-/// is, not a sequence of decimal numbers.)
-#[derive(Clone, Eq, PartialEq)]
-pub struct Literal(pub Box<[u8]>);
-
-impl core::fmt::Debug for Literal {
- fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
- crate::debug::Bytes(&self.0).fmt(f)
- }
-}
-
-/// The high-level intermediate representation of a character class.
-///
-/// A character class corresponds to a set of characters. A character is either
-/// defined by a Unicode scalar value or a byte.
-///
-/// A character class, regardless of its character type, is represented by a
-/// sequence of non-overlapping non-adjacent ranges of characters.
-///
-/// There are no guarantees about which class variant is used. Generally
-/// speaking, the Unicode variat is used whenever a class needs to contain
-/// non-ASCII Unicode scalar values. But the Unicode variant can be used even
-/// when Unicode mode is disabled. For example, at the time of writing, the
-/// regex `(?-u:a|\xc2\xa0)` will compile down to HIR for the Unicode class
-/// `[a\u00A0]` due to optimizations.
-///
-/// Note that `Bytes` variant may be produced even when it exclusively matches
-/// valid UTF-8. This is because a `Bytes` variant represents an intention by
-/// the author of the regular expression to disable Unicode mode, which in turn
-/// impacts the semantics of case insensitive matching. For example, `(?i)k`
-/// and `(?i-u)k` will not match the same set of strings.
-#[derive(Clone, Eq, PartialEq)]
-pub enum Class {
- /// A set of characters represented by Unicode scalar values.
- Unicode(ClassUnicode),
- /// A set of characters represented by arbitrary bytes (one byte per
- /// character).
- Bytes(ClassBytes),
-}
-
-impl Class {
- /// Apply Unicode simple case folding to this character class, in place.
- /// The character class will be expanded to include all simple case folded
- /// character variants.
- ///
- /// If this is a byte oriented character class, then this will be limited
- /// to the ASCII ranges `A-Z` and `a-z`.
- ///
- /// # Panics
- ///
- /// This routine panics when the case mapping data necessary for this
- /// routine to complete is unavailable. This occurs when the `unicode-case`
- /// feature is not enabled and the underlying class is Unicode oriented.
- ///
- /// Callers should prefer using `try_case_fold_simple` instead, which will
- /// return an error instead of panicking.
- pub fn case_fold_simple(&mut self) {
- match *self {
- Class::Unicode(ref mut x) => x.case_fold_simple(),
- Class::Bytes(ref mut x) => x.case_fold_simple(),
- }
- }
-
- /// Apply Unicode simple case folding to this character class, in place.
- /// The character class will be expanded to include all simple case folded
- /// character variants.
- ///
- /// If this is a byte oriented character class, then this will be limited
- /// to the ASCII ranges `A-Z` and `a-z`.
- ///
- /// # Error
- ///
- /// This routine returns an error when the case mapping data necessary
- /// for this routine to complete is unavailable. This occurs when the
- /// `unicode-case` feature is not enabled and the underlying class is
- /// Unicode oriented.
- pub fn try_case_fold_simple(
- &mut self,
- ) -> core::result::Result<(), CaseFoldError> {
- match *self {
- Class::Unicode(ref mut x) => x.try_case_fold_simple()?,
- Class::Bytes(ref mut x) => x.case_fold_simple(),
- }
- Ok(())
- }
-
- /// Negate this character class in place.
- ///
- /// After completion, this character class will contain precisely the
- /// characters that weren't previously in the class.
- pub fn negate(&mut self) {
- match *self {
- Class::Unicode(ref mut x) => x.negate(),
- Class::Bytes(ref mut x) => x.negate(),
- }
- }
-
- /// Returns true if and only if this character class will only ever match
- /// valid UTF-8.
- ///
- /// A character class can match invalid UTF-8 only when the following
- /// conditions are met:
- ///
- /// 1. The translator was configured to permit generating an expression
- /// that can match invalid UTF-8. (By default, this is disabled.)
- /// 2. Unicode mode (via the `u` flag) was disabled either in the concrete
- /// syntax or in the parser builder. By default, Unicode mode is
- /// enabled.
- pub fn is_utf8(&self) -> bool {
- match *self {
- Class::Unicode(_) => true,
- Class::Bytes(ref x) => x.is_ascii(),
- }
- }
-
- /// Returns the length, in bytes, of the smallest string matched by this
- /// character class.
- ///
- /// For non-empty byte oriented classes, this always returns `1`. For
- /// non-empty Unicode oriented classes, this can return `1`, `2`, `3` or
- /// `4`. For empty classes, `None` is returned. It is impossible for `0` to
- /// be returned.
- ///
- /// # Example
- ///
- /// This example shows some examples of regexes and their corresponding
- /// minimum length, if any.
- ///
- /// ```
- /// use regex_syntax::{hir::Properties, parse};
- ///
- /// // The empty string has a min length of 0.
- /// let hir = parse(r"")?;
- /// assert_eq!(Some(0), hir.properties().minimum_len());
- /// // As do other types of regexes that only match the empty string.
- /// let hir = parse(r"^$\b\B")?;
- /// assert_eq!(Some(0), hir.properties().minimum_len());
- /// // A regex that can match the empty string but match more is still 0.
- /// let hir = parse(r"a*")?;
- /// assert_eq!(Some(0), hir.properties().minimum_len());
- /// // A regex that matches nothing has no minimum defined.
- /// let hir = parse(r"[a&&b]")?;
- /// assert_eq!(None, hir.properties().minimum_len());
- /// // Character classes usually have a minimum length of 1.
- /// let hir = parse(r"\w")?;
- /// assert_eq!(Some(1), hir.properties().minimum_len());
- /// // But sometimes Unicode classes might be bigger!
- /// let hir = parse(r"\p{Cyrillic}")?;
- /// assert_eq!(Some(2), hir.properties().minimum_len());
- ///
- /// # Ok::<(), Box<dyn std::error::Error>>(())
- /// ```
- pub fn minimum_len(&self) -> Option<usize> {
- match *self {
- Class::Unicode(ref x) => x.minimum_len(),
- Class::Bytes(ref x) => x.minimum_len(),
- }
- }
-
- /// Returns the length, in bytes, of the longest string matched by this
- /// character class.
- ///
- /// For non-empty byte oriented classes, this always returns `1`. For
- /// non-empty Unicode oriented classes, this can return `1`, `2`, `3` or
- /// `4`. For empty classes, `None` is returned. It is impossible for `0` to
- /// be returned.
- ///
- /// # Example
- ///
- /// This example shows some examples of regexes and their corresponding
- /// maximum length, if any.
- ///
- /// ```
- /// use regex_syntax::{hir::Properties, parse};
- ///
- /// // The empty string has a max length of 0.
- /// let hir = parse(r"")?;
- /// assert_eq!(Some(0), hir.properties().maximum_len());
- /// // As do other types of regexes that only match the empty string.
- /// let hir = parse(r"^$\b\B")?;
- /// assert_eq!(Some(0), hir.properties().maximum_len());
- /// // A regex that matches nothing has no maximum defined.
- /// let hir = parse(r"[a&&b]")?;
- /// assert_eq!(None, hir.properties().maximum_len());
- /// // Bounded repeats work as you expect.
- /// let hir = parse(r"x{2,10}")?;
- /// assert_eq!(Some(10), hir.properties().maximum_len());
- /// // An unbounded repeat means there is no maximum.
- /// let hir = parse(r"x{2,}")?;
- /// assert_eq!(None, hir.properties().maximum_len());
- /// // With Unicode enabled, \w can match up to 4 bytes!
- /// let hir = parse(r"\w")?;
- /// assert_eq!(Some(4), hir.properties().maximum_len());
- /// // Without Unicode enabled, \w matches at most 1 byte.
- /// let hir = parse(r"(?-u)\w")?;
- /// assert_eq!(Some(1), hir.properties().maximum_len());
- ///
- /// # Ok::<(), Box<dyn std::error::Error>>(())
- /// ```
- pub fn maximum_len(&self) -> Option<usize> {
- match *self {
- Class::Unicode(ref x) => x.maximum_len(),
- Class::Bytes(ref x) => x.maximum_len(),
- }
- }
-
- /// Returns true if and only if this character class is empty. That is,
- /// it has no elements.
- ///
- /// An empty character can never match anything, including an empty string.
- pub fn is_empty(&self) -> bool {
- match *self {
- Class::Unicode(ref x) => x.ranges().is_empty(),
- Class::Bytes(ref x) => x.ranges().is_empty(),
- }
- }
-
- /// If this class consists of exactly one element (whether a codepoint or a
- /// byte), then return it as a literal byte string.
- ///
- /// If this class is empty or contains more than one element, then `None`
- /// is returned.
- pub fn literal(&self) -> Option<Vec<u8>> {
- match *self {
- Class::Unicode(ref x) => x.literal(),
- Class::Bytes(ref x) => x.literal(),
- }
- }
-}
-
-impl core::fmt::Debug for Class {
- fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
- use crate::debug::Byte;
-
- let mut fmter = f.debug_set();
- match *self {
- Class::Unicode(ref cls) => {
- for r in cls.ranges().iter() {
- fmter.entry(&(r.start..=r.end));
- }
- }
- Class::Bytes(ref cls) => {
- for r in cls.ranges().iter() {
- fmter.entry(&(Byte(r.start)..=Byte(r.end)));
- }
- }
- }
- fmter.finish()
- }
-}
-
-/// A set of characters represented by Unicode scalar values.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct ClassUnicode {
- set: IntervalSet<ClassUnicodeRange>,
-}
-
-impl ClassUnicode {
- /// Create a new class from a sequence of ranges.
- ///
- /// The given ranges do not need to be in any specific order, and ranges
- /// may overlap. Ranges will automatically be sorted into a canonical
- /// non-overlapping order.
- pub fn new<I>(ranges: I) -> ClassUnicode
- where
- I: IntoIterator<Item = ClassUnicodeRange>,
- {
- ClassUnicode { set: IntervalSet::new(ranges) }
- }
-
- /// Create a new class with no ranges.
- ///
- /// An empty class matches nothing. That is, it is equivalent to
- /// [`Hir::fail`].
- pub fn empty() -> ClassUnicode {
- ClassUnicode::new(vec![])
- }
-
- /// Add a new range to this set.
- pub fn push(&mut self, range: ClassUnicodeRange) {
- self.set.push(range);
- }
-
- /// Return an iterator over all ranges in this class.
- ///
- /// The iterator yields ranges in ascending order.
- pub fn iter(&self) -> ClassUnicodeIter<'_> {
- ClassUnicodeIter(self.set.iter())
- }
-
- /// Return the underlying ranges as a slice.
- pub fn ranges(&self) -> &[ClassUnicodeRange] {
- self.set.intervals()
- }
-
- /// Expand this character class such that it contains all case folded
- /// characters, according to Unicode's "simple" mapping. For example, if
- /// this class consists of the range `a-z`, then applying case folding will
- /// result in the class containing both the ranges `a-z` and `A-Z`.
- ///
- /// # Panics
- ///
- /// This routine panics when the case mapping data necessary for this
- /// routine to complete is unavailable. This occurs when the `unicode-case`
- /// feature is not enabled.
- ///
- /// Callers should prefer using `try_case_fold_simple` instead, which will
- /// return an error instead of panicking.
- pub fn case_fold_simple(&mut self) {
- self.set
- .case_fold_simple()
- .expect("unicode-case feature must be enabled");
- }
-
- /// Expand this character class such that it contains all case folded
- /// characters, according to Unicode's "simple" mapping. For example, if
- /// this class consists of the range `a-z`, then applying case folding will
- /// result in the class containing both the ranges `a-z` and `A-Z`.
- ///
- /// # Error
- ///
- /// This routine returns an error when the case mapping data necessary
- /// for this routine to complete is unavailable. This occurs when the
- /// `unicode-case` feature is not enabled.
- pub fn try_case_fold_simple(
- &mut self,
- ) -> core::result::Result<(), CaseFoldError> {
- self.set.case_fold_simple()
- }
-
- /// Negate this character class.
- ///
- /// For all `c` where `c` is a Unicode scalar value, if `c` was in this
- /// set, then it will not be in this set after negation.
- pub fn negate(&mut self) {
- self.set.negate();
- }
-
- /// Union this character class with the given character class, in place.
- pub fn union(&mut self, other: &ClassUnicode) {
- self.set.union(&other.set);
- }
-
- /// Intersect this character class with the given character class, in
- /// place.
- pub fn intersect(&mut self, other: &ClassUnicode) {
- self.set.intersect(&other.set);
- }
-
- /// Subtract the given character class from this character class, in place.
- pub fn difference(&mut self, other: &ClassUnicode) {
- self.set.difference(&other.set);
- }
-
- /// Compute the symmetric difference of the given character classes, in
- /// place.
- ///
- /// This computes the symmetric difference of two character classes. This
- /// removes all elements in this class that are also in the given class,
- /// but all adds all elements from the given class that aren't in this
- /// class. That is, the class will contain all elements in either class,
- /// but will not contain any elements that are in both classes.
- pub fn symmetric_difference(&mut self, other: &ClassUnicode) {
- self.set.symmetric_difference(&other.set);
- }
-
- /// Returns true if and only if this character class will either match
- /// nothing or only ASCII bytes. Stated differently, this returns false
- /// if and only if this class contains a non-ASCII codepoint.
- pub fn is_ascii(&self) -> bool {
- self.set.intervals().last().map_or(true, |r| r.end <= '\x7F')
- }
-
- /// Returns the length, in bytes, of the smallest string matched by this
- /// character class.
- ///
- /// Returns `None` when the class is empty.
- pub fn minimum_len(&self) -> Option<usize> {
- let first = self.ranges().get(0)?;
- // Correct because c1 < c2 implies c1.len_utf8() < c2.len_utf8().
- Some(first.start.len_utf8())
- }
-
- /// Returns the length, in bytes, of the longest string matched by this
- /// character class.
- ///
- /// Returns `None` when the class is empty.
- pub fn maximum_len(&self) -> Option<usize> {
- let last = self.ranges().last()?;
- // Correct because c1 < c2 implies c1.len_utf8() < c2.len_utf8().
- Some(last.end.len_utf8())
- }
-
- /// If this class consists of exactly one codepoint, then return it as
- /// a literal byte string.
- ///
- /// If this class is empty or contains more than one codepoint, then `None`
- /// is returned.
- pub fn literal(&self) -> Option<Vec<u8>> {
- let rs = self.ranges();
- if rs.len() == 1 && rs[0].start == rs[0].end {
- Some(rs[0].start.encode_utf8(&mut [0; 4]).to_string().into_bytes())
- } else {
- None
- }
- }
-
- /// If this class consists of only ASCII ranges, then return its
- /// corresponding and equivalent byte class.
- pub fn to_byte_class(&self) -> Option<ClassBytes> {
- if !self.is_ascii() {
- return None;
- }
- Some(ClassBytes::new(self.ranges().iter().map(|r| {
- // Since we are guaranteed that our codepoint range is ASCII, the
- // 'u8::try_from' calls below are guaranteed to be correct.
- ClassBytesRange {
- start: u8::try_from(r.start).unwrap(),
- end: u8::try_from(r.end).unwrap(),
- }
- })))
- }
-}
-
-/// An iterator over all ranges in a Unicode character class.
-///
-/// The lifetime `'a` refers to the lifetime of the underlying class.
-#[derive(Debug)]
-pub struct ClassUnicodeIter<'a>(IntervalSetIter<'a, ClassUnicodeRange>);
-
-impl<'a> Iterator for ClassUnicodeIter<'a> {
- type Item = &'a ClassUnicodeRange;
-
- fn next(&mut self) -> Option<&'a ClassUnicodeRange> {
- self.0.next()
- }
-}
-
-/// A single range of characters represented by Unicode scalar values.
-///
-/// The range is closed. That is, the start and end of the range are included
-/// in the range.
-#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
-pub struct ClassUnicodeRange {
- start: char,
- end: char,
-}
-
-impl core::fmt::Debug for ClassUnicodeRange {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- let start = if !self.start.is_whitespace() && !self.start.is_control()
- {
- self.start.to_string()
- } else {
- format!("0x{:X}", u32::from(self.start))
- };
- let end = if !self.end.is_whitespace() && !self.end.is_control() {
- self.end.to_string()
- } else {
- format!("0x{:X}", u32::from(self.end))
- };
- f.debug_struct("ClassUnicodeRange")
- .field("start", &start)
- .field("end", &end)
- .finish()
- }
-}
-
-impl Interval for ClassUnicodeRange {
- type Bound = char;
-
- #[inline]
- fn lower(&self) -> char {
- self.start
- }
- #[inline]
- fn upper(&self) -> char {
- self.end
- }
- #[inline]
- fn set_lower(&mut self, bound: char) {
- self.start = bound;
- }
- #[inline]
- fn set_upper(&mut self, bound: char) {
- self.end = bound;
- }
-
- /// Apply simple case folding to this Unicode scalar value range.
- ///
- /// Additional ranges are appended to the given vector. Canonical ordering
- /// is *not* maintained in the given vector.
- fn case_fold_simple(
- &self,
- ranges: &mut Vec<ClassUnicodeRange>,
- ) -> Result<(), unicode::CaseFoldError> {
- let mut folder = unicode::SimpleCaseFolder::new()?;
- if !folder.overlaps(self.start, self.end) {
- return Ok(());
- }
- let (start, end) = (u32::from(self.start), u32::from(self.end));
- for cp in (start..=end).filter_map(char::from_u32) {
- for &cp_folded in folder.mapping(cp) {
- ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded));
- }
- }
- Ok(())
- }
-}
-
-impl ClassUnicodeRange {
- /// Create a new Unicode scalar value range for a character class.
- ///
- /// The returned range is always in a canonical form. That is, the range
- /// returned always satisfies the invariant that `start <= end`.
- pub fn new(start: char, end: char) -> ClassUnicodeRange {
- ClassUnicodeRange::create(start, end)
- }
-
- /// Return the start of this range.
- ///
- /// The start of a range is always less than or equal to the end of the
- /// range.
- pub fn start(&self) -> char {
- self.start
- }
-
- /// Return the end of this range.
- ///
- /// The end of a range is always greater than or equal to the start of the
- /// range.
- pub fn end(&self) -> char {
- self.end
- }
-
- /// Returns the number of codepoints in this range.
- pub fn len(&self) -> usize {
- let diff = 1 + u32::from(self.end) - u32::from(self.start);
- // This is likely to panic in 16-bit targets since a usize can only fit
- // 2^16. It's not clear what to do here, other than to return an error
- // when building a Unicode class that contains a range whose length
- // overflows usize. (Which, to be honest, is probably quite common on
- // 16-bit targets. For example, this would imply that '.' and '\p{any}'
- // would be impossible to build.)
- usize::try_from(diff).expect("char class len fits in usize")
- }
-}
-
-/// A set of characters represented by arbitrary bytes.
-///
-/// Each byte corresponds to one character.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct ClassBytes {
- set: IntervalSet<ClassBytesRange>,
-}
-
-impl ClassBytes {
- /// Create a new class from a sequence of ranges.
- ///
- /// The given ranges do not need to be in any specific order, and ranges
- /// may overlap. Ranges will automatically be sorted into a canonical
- /// non-overlapping order.
- pub fn new<I>(ranges: I) -> ClassBytes
- where
- I: IntoIterator<Item = ClassBytesRange>,
- {
- ClassBytes { set: IntervalSet::new(ranges) }
- }
-
- /// Create a new class with no ranges.
- ///
- /// An empty class matches nothing. That is, it is equivalent to
- /// [`Hir::fail`].
- pub fn empty() -> ClassBytes {
- ClassBytes::new(vec![])
- }
-
- /// Add a new range to this set.
- pub fn push(&mut self, range: ClassBytesRange) {
- self.set.push(range);
- }
-
- /// Return an iterator over all ranges in this class.
- ///
- /// The iterator yields ranges in ascending order.
- pub fn iter(&self) -> ClassBytesIter<'_> {
- ClassBytesIter(self.set.iter())
- }
-
- /// Return the underlying ranges as a slice.
- pub fn ranges(&self) -> &[ClassBytesRange] {
- self.set.intervals()
- }
-
- /// Expand this character class such that it contains all case folded
- /// characters. For example, if this class consists of the range `a-z`,
- /// then applying case folding will result in the class containing both the
- /// ranges `a-z` and `A-Z`.
- ///
- /// Note that this only applies ASCII case folding, which is limited to the
- /// characters `a-z` and `A-Z`.
- pub fn case_fold_simple(&mut self) {
- self.set.case_fold_simple().expect("ASCII case folding never fails");
- }
-
- /// Negate this byte class.
- ///
- /// For all `b` where `b` is a any byte, if `b` was in this set, then it
- /// will not be in this set after negation.
- pub fn negate(&mut self) {
- self.set.negate();
- }
-
- /// Union this byte class with the given byte class, in place.
- pub fn union(&mut self, other: &ClassBytes) {
- self.set.union(&other.set);
- }
-
- /// Intersect this byte class with the given byte class, in place.
- pub fn intersect(&mut self, other: &ClassBytes) {
- self.set.intersect(&other.set);
- }
-
- /// Subtract the given byte class from this byte class, in place.
- pub fn difference(&mut self, other: &ClassBytes) {
- self.set.difference(&other.set);
- }
-
- /// Compute the symmetric difference of the given byte classes, in place.
- ///
- /// This computes the symmetric difference of two byte classes. This
- /// removes all elements in this class that are also in the given class,
- /// but all adds all elements from the given class that aren't in this
- /// class. That is, the class will contain all elements in either class,
- /// but will not contain any elements that are in both classes.
- pub fn symmetric_difference(&mut self, other: &ClassBytes) {
- self.set.symmetric_difference(&other.set);
- }
-
- /// Returns true if and only if this character class will either match
- /// nothing or only ASCII bytes. Stated differently, this returns false
- /// if and only if this class contains a non-ASCII byte.
- pub fn is_ascii(&self) -> bool {
- self.set.intervals().last().map_or(true, |r| r.end <= 0x7F)
- }
-
- /// Returns the length, in bytes, of the smallest string matched by this
- /// character class.
- ///
- /// Returns `None` when the class is empty.
- pub fn minimum_len(&self) -> Option<usize> {
- if self.ranges().is_empty() {
- None
- } else {
- Some(1)
- }
- }
-
- /// Returns the length, in bytes, of the longest string matched by this
- /// character class.
- ///
- /// Returns `None` when the class is empty.
- pub fn maximum_len(&self) -> Option<usize> {
- if self.ranges().is_empty() {
- None
- } else {
- Some(1)
- }
- }
-
- /// If this class consists of exactly one byte, then return it as
- /// a literal byte string.
- ///
- /// If this class is empty or contains more than one byte, then `None`
- /// is returned.
- pub fn literal(&self) -> Option<Vec<u8>> {
- let rs = self.ranges();
- if rs.len() == 1 && rs[0].start == rs[0].end {
- Some(vec![rs[0].start])
- } else {
- None
- }
- }
-
- /// If this class consists of only ASCII ranges, then return its
- /// corresponding and equivalent Unicode class.
- pub fn to_unicode_class(&self) -> Option<ClassUnicode> {
- if !self.is_ascii() {
- return None;
- }
- Some(ClassUnicode::new(self.ranges().iter().map(|r| {
- // Since we are guaranteed that our byte range is ASCII, the
- // 'char::from' calls below are correct and will not erroneously
- // convert a raw byte value into its corresponding codepoint.
- ClassUnicodeRange {
- start: char::from(r.start),
- end: char::from(r.end),
- }
- })))
- }
-}
-
-/// An iterator over all ranges in a byte character class.
-///
-/// The lifetime `'a` refers to the lifetime of the underlying class.
-#[derive(Debug)]
-pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>);
-
-impl<'a> Iterator for ClassBytesIter<'a> {
- type Item = &'a ClassBytesRange;
-
- fn next(&mut self) -> Option<&'a ClassBytesRange> {
- self.0.next()
- }
-}
-
-/// A single range of characters represented by arbitrary bytes.
-///
-/// The range is closed. That is, the start and end of the range are included
-/// in the range.
-#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
-pub struct ClassBytesRange {
- start: u8,
- end: u8,
-}
-
-impl Interval for ClassBytesRange {
- type Bound = u8;
-
- #[inline]
- fn lower(&self) -> u8 {
- self.start
- }
- #[inline]
- fn upper(&self) -> u8 {
- self.end
- }
- #[inline]
- fn set_lower(&mut self, bound: u8) {
- self.start = bound;
- }
- #[inline]
- fn set_upper(&mut self, bound: u8) {
- self.end = bound;
- }
-
- /// Apply simple case folding to this byte range. Only ASCII case mappings
- /// (for a-z) are applied.
- ///
- /// Additional ranges are appended to the given vector. Canonical ordering
- /// is *not* maintained in the given vector.
- fn case_fold_simple(
- &self,
- ranges: &mut Vec<ClassBytesRange>,
- ) -> Result<(), unicode::CaseFoldError> {
- if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) {
- let lower = cmp::max(self.start, b'a');
- let upper = cmp::min(self.end, b'z');
- ranges.push(ClassBytesRange::new(lower - 32, upper - 32));
- }
- if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) {
- let lower = cmp::max(self.start, b'A');
- let upper = cmp::min(self.end, b'Z');
- ranges.push(ClassBytesRange::new(lower + 32, upper + 32));
- }
- Ok(())
- }
-}
-
-impl ClassBytesRange {
- /// Create a new byte range for a character class.
- ///
- /// The returned range is always in a canonical form. That is, the range
- /// returned always satisfies the invariant that `start <= end`.
- pub fn new(start: u8, end: u8) -> ClassBytesRange {
- ClassBytesRange::create(start, end)
- }
-
- /// Return the start of this range.
- ///
- /// The start of a range is always less than or equal to the end of the
- /// range.
- pub fn start(&self) -> u8 {
- self.start
- }
-
- /// Return the end of this range.
- ///
- /// The end of a range is always greater than or equal to the start of the
- /// range.
- pub fn end(&self) -> u8 {
- self.end
- }
-
- /// Returns the number of bytes in this range.
- pub fn len(&self) -> usize {
- usize::from(self.end.checked_sub(self.start).unwrap())
- .checked_add(1)
- .unwrap()
- }
-}
-
-impl core::fmt::Debug for ClassBytesRange {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- f.debug_struct("ClassBytesRange")
- .field("start", &crate::debug::Byte(self.start))
- .field("end", &crate::debug::Byte(self.end))
- .finish()
- }
-}
-
-/// The high-level intermediate representation for a look-around assertion.
-///
-/// An assertion match is always zero-length. Also called an "empty match."
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-pub enum Look {
- /// Match the beginning of text. Specifically, this matches at the starting
- /// position of the input.
- Start = 1 << 0,
- /// Match the end of text. Specifically, this matches at the ending
- /// position of the input.
- End = 1 << 1,
- /// Match the beginning of a line or the beginning of text. Specifically,
- /// this matches at the starting position of the input, or at the position
- /// immediately following a `\n` character.
- StartLF = 1 << 2,
- /// Match the end of a line or the end of text. Specifically, this matches
- /// at the end position of the input, or at the position immediately
- /// preceding a `\n` character.
- EndLF = 1 << 3,
- /// Match the beginning of a line or the beginning of text. Specifically,
- /// this matches at the starting position of the input, or at the position
- /// immediately following either a `\r` or `\n` character, but never after
- /// a `\r` when a `\n` follows.
- StartCRLF = 1 << 4,
- /// Match the end of a line or the end of text. Specifically, this matches
- /// at the end position of the input, or at the position immediately
- /// preceding a `\r` or `\n` character, but never before a `\n` when a `\r`
- /// precedes it.
- EndCRLF = 1 << 5,
- /// Match an ASCII-only word boundary. That is, this matches a position
- /// where the left adjacent character and right adjacent character
- /// correspond to a word and non-word or a non-word and word character.
- WordAscii = 1 << 6,
- /// Match an ASCII-only negation of a word boundary.
- WordAsciiNegate = 1 << 7,
- /// Match a Unicode-aware word boundary. That is, this matches a position
- /// where the left adjacent character and right adjacent character
- /// correspond to a word and non-word or a non-word and word character.
- WordUnicode = 1 << 8,
- /// Match a Unicode-aware negation of a word boundary.
- WordUnicodeNegate = 1 << 9,
- /// Match the start of an ASCII-only word boundary. That is, this matches a
- /// position at either the beginning of the haystack or where the previous
- /// character is not a word character and the following character is a word
- /// character.
- WordStartAscii = 1 << 10,
- /// Match the end of an ASCII-only word boundary. That is, this matches
- /// a position at either the end of the haystack or where the previous
- /// character is a word character and the following character is not a word
- /// character.
- WordEndAscii = 1 << 11,
- /// Match the start of a Unicode word boundary. That is, this matches a
- /// position at either the beginning of the haystack or where the previous
- /// character is not a word character and the following character is a word
- /// character.
- WordStartUnicode = 1 << 12,
- /// Match the end of a Unicode word boundary. That is, this matches a
- /// position at either the end of the haystack or where the previous
- /// character is a word character and the following character is not a word
- /// character.
- WordEndUnicode = 1 << 13,
- /// Match the start half of an ASCII-only word boundary. That is, this
- /// matches a position at either the beginning of the haystack or where the
- /// previous character is not a word character.
- WordStartHalfAscii = 1 << 14,
- /// Match the end half of an ASCII-only word boundary. That is, this
- /// matches a position at either the end of the haystack or where the
- /// following character is not a word character.
- WordEndHalfAscii = 1 << 15,
- /// Match the start half of a Unicode word boundary. That is, this matches
- /// a position at either the beginning of the haystack or where the
- /// previous character is not a word character.
- WordStartHalfUnicode = 1 << 16,
- /// Match the end half of a Unicode word boundary. That is, this matches
- /// a position at either the end of the haystack or where the following
- /// character is not a word character.
- WordEndHalfUnicode = 1 << 17,
-}
-
-impl Look {
- /// Flip the look-around assertion to its equivalent for reverse searches.
- /// For example, `StartLF` gets translated to `EndLF`.
- ///
- /// Some assertions, such as `WordUnicode`, remain the same since they
- /// match the same positions regardless of the direction of the search.
- #[inline]
- pub const fn reversed(self) -> Look {
- match self {
- Look::Start => Look::End,
- Look::End => Look::Start,
- Look::StartLF => Look::EndLF,
- Look::EndLF => Look::StartLF,
- Look::StartCRLF => Look::EndCRLF,
- Look::EndCRLF => Look::StartCRLF,
- Look::WordAscii => Look::WordAscii,
- Look::WordAsciiNegate => Look::WordAsciiNegate,
- Look::WordUnicode => Look::WordUnicode,
- Look::WordUnicodeNegate => Look::WordUnicodeNegate,
- Look::WordStartAscii => Look::WordEndAscii,
- Look::WordEndAscii => Look::WordStartAscii,
- Look::WordStartUnicode => Look::WordEndUnicode,
- Look::WordEndUnicode => Look::WordStartUnicode,
- Look::WordStartHalfAscii => Look::WordEndHalfAscii,
- Look::WordEndHalfAscii => Look::WordStartHalfAscii,
- Look::WordStartHalfUnicode => Look::WordEndHalfUnicode,
- Look::WordEndHalfUnicode => Look::WordStartHalfUnicode,
- }
- }
-
- /// Return the underlying representation of this look-around enumeration
- /// as an integer. Giving the return value to the [`Look::from_repr`]
- /// constructor is guaranteed to return the same look-around variant that
- /// one started with within a semver compatible release of this crate.
- #[inline]
- pub const fn as_repr(self) -> u32 {
- // AFAIK, 'as' is the only way to zero-cost convert an int enum to an
- // actual int.
- self as u32
- }
-
- /// Given the underlying representation of a `Look` value, return the
- /// corresponding `Look` value if the representation is valid. Otherwise
- /// `None` is returned.
- #[inline]
- pub const fn from_repr(repr: u32) -> Option<Look> {
- match repr {
- 0b00_0000_0000_0000_0001 => Some(Look::Start),
- 0b00_0000_0000_0000_0010 => Some(Look::End),
- 0b00_0000_0000_0000_0100 => Some(Look::StartLF),
- 0b00_0000_0000_0000_1000 => Some(Look::EndLF),
- 0b00_0000_0000_0001_0000 => Some(Look::StartCRLF),
- 0b00_0000_0000_0010_0000 => Some(Look::EndCRLF),
- 0b00_0000_0000_0100_0000 => Some(Look::WordAscii),
- 0b00_0000_0000_1000_0000 => Some(Look::WordAsciiNegate),
- 0b00_0000_0001_0000_0000 => Some(Look::WordUnicode),
- 0b00_0000_0010_0000_0000 => Some(Look::WordUnicodeNegate),
- 0b00_0000_0100_0000_0000 => Some(Look::WordStartAscii),
- 0b00_0000_1000_0000_0000 => Some(Look::WordEndAscii),
- 0b00_0001_0000_0000_0000 => Some(Look::WordStartUnicode),
- 0b00_0010_0000_0000_0000 => Some(Look::WordEndUnicode),
- 0b00_0100_0000_0000_0000 => Some(Look::WordStartHalfAscii),
- 0b00_1000_0000_0000_0000 => Some(Look::WordEndHalfAscii),
- 0b01_0000_0000_0000_0000 => Some(Look::WordStartHalfUnicode),
- 0b10_0000_0000_0000_0000 => Some(Look::WordEndHalfUnicode),
- _ => None,
- }
- }
-
- /// Returns a convenient single codepoint representation of this
- /// look-around assertion. Each assertion is guaranteed to be represented
- /// by a distinct character.
- ///
- /// This is useful for succinctly representing a look-around assertion in
- /// human friendly but succinct output intended for a programmer working on
- /// regex internals.
- #[inline]
- pub const fn as_char(self) -> char {
- match self {
- Look::Start => 'A',
- Look::End => 'z',
- Look::StartLF => '^',
- Look::EndLF => '$',
- Look::StartCRLF => 'r',
- Look::EndCRLF => 'R',
- Look::WordAscii => 'b',
- Look::WordAsciiNegate => 'B',
- Look::WordUnicode => '𝛃',
- Look::WordUnicodeNegate => '𝚩',
- Look::WordStartAscii => '<',
- Look::WordEndAscii => '>',
- Look::WordStartUnicode => '〈',
- Look::WordEndUnicode => '〉',
- Look::WordStartHalfAscii => '◁',
- Look::WordEndHalfAscii => '▷',
- Look::WordStartHalfUnicode => '◀',
- Look::WordEndHalfUnicode => '▶',
- }
- }
-}
-
-/// The high-level intermediate representation for a capturing group.
-///
-/// A capturing group always has an index and a child expression. It may
-/// also have a name associated with it (e.g., `(?P<foo>\w)`), but it's not
-/// necessary.
-///
-/// Note that there is no explicit representation of a non-capturing group
-/// in a `Hir`. Instead, non-capturing grouping is handled automatically by
-/// the recursive structure of the `Hir` itself.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct Capture {
- /// The capture index of the capture.
- pub index: u32,
- /// The name of the capture, if it exists.
- pub name: Option<Box<str>>,
- /// The expression inside the capturing group, which may be empty.
- pub sub: Box<Hir>,
-}
-
-/// The high-level intermediate representation of a repetition operator.
-///
-/// A repetition operator permits the repetition of an arbitrary
-/// sub-expression.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct Repetition {
- /// The minimum range of the repetition.
- ///
- /// Note that special cases like `?`, `+` and `*` all get translated into
- /// the ranges `{0,1}`, `{1,}` and `{0,}`, respectively.
- ///
- /// When `min` is zero, this expression can match the empty string
- /// regardless of what its sub-expression is.
- pub min: u32,
- /// The maximum range of the repetition.
- ///
- /// Note that when `max` is `None`, `min` acts as a lower bound but where
- /// there is no upper bound. For something like `x{5}` where the min and
- /// max are equivalent, `min` will be set to `5` and `max` will be set to
- /// `Some(5)`.
- pub max: Option<u32>,
- /// Whether this repetition operator is greedy or not. A greedy operator
- /// will match as much as it can. A non-greedy operator will match as
- /// little as it can.
- ///
- /// Typically, operators are greedy by default and are only non-greedy when
- /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is
- /// not. However, this can be inverted via the `U` "ungreedy" flag.
- pub greedy: bool,
- /// The expression being repeated.
- pub sub: Box<Hir>,
-}
-
-impl Repetition {
- /// Returns a new repetition with the same `min`, `max` and `greedy`
- /// values, but with its sub-expression replaced with the one given.
- pub fn with(&self, sub: Hir) -> Repetition {
- Repetition {
- min: self.min,
- max: self.max,
- greedy: self.greedy,
- sub: Box::new(sub),
- }
- }
-}
-
-/// A type describing the different flavors of `.`.
-///
-/// This type is meant to be used with [`Hir::dot`], which is a convenience
-/// routine for building HIR values derived from the `.` regex.
-#[non_exhaustive]
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-pub enum Dot {
- /// Matches the UTF-8 encoding of any Unicode scalar value.
- ///
- /// This is equivalent to `(?su:.)` and also `\p{any}`.
- AnyChar,
- /// Matches any byte value.
- ///
- /// This is equivalent to `(?s-u:.)` and also `(?-u:[\x00-\xFF])`.
- AnyByte,
- /// Matches the UTF-8 encoding of any Unicode scalar value except for the
- /// `char` given.
- ///
- /// This is equivalent to using `(?u-s:.)` with the line terminator set
- /// to a particular ASCII byte. (Because of peculiarities in the regex
- /// engines, a line terminator must be a single byte. It follows that when
- /// UTF-8 mode is enabled, this single byte must also be a Unicode scalar
- /// value. That is, ti must be ASCII.)
- ///
- /// (This and `AnyCharExceptLF` both exist because of legacy reasons.
- /// `AnyCharExceptLF` will be dropped in the next breaking change release.)
- AnyCharExcept(char),
- /// Matches the UTF-8 encoding of any Unicode scalar value except for `\n`.
- ///
- /// This is equivalent to `(?u-s:.)` and also `[\p{any}--\n]`.
- AnyCharExceptLF,
- /// Matches the UTF-8 encoding of any Unicode scalar value except for `\r`
- /// and `\n`.
- ///
- /// This is equivalent to `(?uR-s:.)` and also `[\p{any}--\r\n]`.
- AnyCharExceptCRLF,
- /// Matches any byte value except for the `u8` given.
- ///
- /// This is equivalent to using `(?-us:.)` with the line terminator set
- /// to a particular ASCII byte. (Because of peculiarities in the regex
- /// engines, a line terminator must be a single byte. It follows that when
- /// UTF-8 mode is enabled, this single byte must also be a Unicode scalar
- /// value. That is, ti must be ASCII.)
- ///
- /// (This and `AnyByteExceptLF` both exist because of legacy reasons.
- /// `AnyByteExceptLF` will be dropped in the next breaking change release.)
- AnyByteExcept(u8),
- /// Matches any byte value except for `\n`.
- ///
- /// This is equivalent to `(?-su:.)` and also `(?-u:[[\x00-\xFF]--\n])`.
- AnyByteExceptLF,
- /// Matches any byte value except for `\r` and `\n`.
- ///
- /// This is equivalent to `(?R-su:.)` and also `(?-u:[[\x00-\xFF]--\r\n])`.
- AnyByteExceptCRLF,
-}
-
-/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
-/// space but heap space proportional to the depth of the total `Hir`.
-impl Drop for Hir {
- fn drop(&mut self) {
- use core::mem;
-
- match *self.kind() {
- HirKind::Empty
- | HirKind::Literal(_)
- | HirKind::Class(_)
- | HirKind::Look(_) => return,
- HirKind::Capture(ref x) if x.sub.kind.subs().is_empty() => return,
- HirKind::Repetition(ref x) if x.sub.kind.subs().is_empty() => {
- return
- }
- HirKind::Concat(ref x) if x.is_empty() => return,
- HirKind::Alternation(ref x) if x.is_empty() => return,
- _ => {}
- }
-
- let mut stack = vec![mem::replace(self, Hir::empty())];
- while let Some(mut expr) = stack.pop() {
- match expr.kind {
- HirKind::Empty
- | HirKind::Literal(_)
- | HirKind::Class(_)
- | HirKind::Look(_) => {}
- HirKind::Capture(ref mut x) => {
- stack.push(mem::replace(&mut x.sub, Hir::empty()));
- }
- HirKind::Repetition(ref mut x) => {
- stack.push(mem::replace(&mut x.sub, Hir::empty()));
- }
- HirKind::Concat(ref mut x) => {
- stack.extend(x.drain(..));
- }
- HirKind::Alternation(ref mut x) => {
- stack.extend(x.drain(..));
- }
- }
- }
- }
-}
-
-/// A type that collects various properties of an HIR value.
-///
-/// Properties are always scalar values and represent meta data that is
-/// computed inductively on an HIR value. Properties are defined for all
-/// HIR values.
-///
-/// All methods on a `Properties` value take constant time and are meant to
-/// be cheap to call.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct Properties(Box<PropertiesI>);
-
-/// The property definition. It is split out so that we can box it, and
-/// there by make `Properties` use less stack size. This is kind-of important
-/// because every HIR value has a `Properties` attached to it.
-///
-/// This does have the unfortunate consequence that creating any HIR value
-/// always leads to at least one alloc for properties, but this is generally
-/// true anyway (for pretty much all HirKinds except for look-arounds).
-#[derive(Clone, Debug, Eq, PartialEq)]
-struct PropertiesI {
- minimum_len: Option<usize>,
- maximum_len: Option<usize>,
- look_set: LookSet,
- look_set_prefix: LookSet,
- look_set_suffix: LookSet,
- look_set_prefix_any: LookSet,
- look_set_suffix_any: LookSet,
- utf8: bool,
- explicit_captures_len: usize,
- static_explicit_captures_len: Option<usize>,
- literal: bool,
- alternation_literal: bool,
-}
-
-impl Properties {
- /// Returns the length (in bytes) of the smallest string matched by this
- /// HIR.
- ///
- /// A return value of `0` is possible and occurs when the HIR can match an
- /// empty string.
- ///
- /// `None` is returned when there is no minimum length. This occurs in
- /// precisely the cases where the HIR matches nothing. i.e., The language
- /// the regex matches is empty. An example of such a regex is `\P{any}`.
- #[inline]
- pub fn minimum_len(&self) -> Option<usize> {
- self.0.minimum_len
- }
-
- /// Returns the length (in bytes) of the longest string matched by this
- /// HIR.
- ///
- /// A return value of `0` is possible and occurs when nothing longer than
- /// the empty string is in the language described by this HIR.
- ///
- /// `None` is returned when there is no longest matching string. This
- /// occurs when the HIR matches nothing or when there is no upper bound on
- /// the length of matching strings. Example of such regexes are `\P{any}`
- /// (matches nothing) and `a+` (has no upper bound).
- #[inline]
- pub fn maximum_len(&self) -> Option<usize> {
- self.0.maximum_len
- }
-
- /// Returns a set of all look-around assertions that appear at least once
- /// in this HIR value.
- #[inline]
- pub fn look_set(&self) -> LookSet {
- self.0.look_set
- }
-
- /// Returns a set of all look-around assertions that appear as a prefix for
- /// this HIR value. That is, the set returned corresponds to the set of
- /// assertions that must be passed before matching any bytes in a haystack.
- ///
- /// For example, `hir.look_set_prefix().contains(Look::Start)` returns true
- /// if and only if the HIR is fully anchored at the start.
- #[inline]
- pub fn look_set_prefix(&self) -> LookSet {
- self.0.look_set_prefix
- }
-
- /// Returns a set of all look-around assertions that appear as a _possible_
- /// prefix for this HIR value. That is, the set returned corresponds to the
- /// set of assertions that _may_ be passed before matching any bytes in a
- /// haystack.
- ///
- /// For example, `hir.look_set_prefix_any().contains(Look::Start)` returns
- /// true if and only if it's possible for the regex to match through a
- /// anchored assertion before consuming any input.
- #[inline]
- pub fn look_set_prefix_any(&self) -> LookSet {
- self.0.look_set_prefix_any
- }
-
- /// Returns a set of all look-around assertions that appear as a suffix for
- /// this HIR value. That is, the set returned corresponds to the set of
- /// assertions that must be passed in order to be considered a match after
- /// all other consuming HIR expressions.
- ///
- /// For example, `hir.look_set_suffix().contains(Look::End)` returns true
- /// if and only if the HIR is fully anchored at the end.
- #[inline]
- pub fn look_set_suffix(&self) -> LookSet {
- self.0.look_set_suffix
- }
-
- /// Returns a set of all look-around assertions that appear as a _possible_
- /// suffix for this HIR value. That is, the set returned corresponds to the
- /// set of assertions that _may_ be passed before matching any bytes in a
- /// haystack.
- ///
- /// For example, `hir.look_set_suffix_any().contains(Look::End)` returns
- /// true if and only if it's possible for the regex to match through a
- /// anchored assertion at the end of a match without consuming any input.
- #[inline]
- pub fn look_set_suffix_any(&self) -> LookSet {
- self.0.look_set_suffix_any
- }
-
- /// Return true if and only if the corresponding HIR will always match
- /// valid UTF-8.
- ///
- /// When this returns false, then it is possible for this HIR expression to
- /// match invalid UTF-8, including by matching between the code units of
- /// a single UTF-8 encoded codepoint.
- ///
- /// Note that this returns true even when the corresponding HIR can match
- /// the empty string. Since an empty string can technically appear between
- /// UTF-8 code units, it is possible for a match to be reported that splits
- /// a codepoint which could in turn be considered matching invalid UTF-8.
- /// However, it is generally assumed that such empty matches are handled
- /// specially by the search routine if it is absolutely required that
- /// matches not split a codepoint.
- ///
- /// # Example
- ///
- /// This code example shows the UTF-8 property of a variety of patterns.
- ///
- /// ```
- /// use regex_syntax::{ParserBuilder, parse};
- ///
- /// // Examples of 'is_utf8() == true'.
- /// assert!(parse(r"a")?.properties().is_utf8());
- /// assert!(parse(r"[^a]")?.properties().is_utf8());
- /// assert!(parse(r".")?.properties().is_utf8());
- /// assert!(parse(r"\W")?.properties().is_utf8());
- /// assert!(parse(r"\b")?.properties().is_utf8());
- /// assert!(parse(r"\B")?.properties().is_utf8());
- /// assert!(parse(r"(?-u)\b")?.properties().is_utf8());
- /// assert!(parse(r"(?-u)\B")?.properties().is_utf8());
- /// // Unicode mode is enabled by default, and in
- /// // that mode, all \x hex escapes are treated as
- /// // codepoints. So this actually matches the UTF-8
- /// // encoding of U+00FF.
- /// assert!(parse(r"\xFF")?.properties().is_utf8());
- ///
- /// // Now we show examples of 'is_utf8() == false'.
- /// // The only way to do this is to force the parser
- /// // to permit invalid UTF-8, otherwise all of these
- /// // would fail to parse!
- /// let parse = |pattern| {
- /// ParserBuilder::new().utf8(false).build().parse(pattern)
- /// };
- /// assert!(!parse(r"(?-u)[^a]")?.properties().is_utf8());
- /// assert!(!parse(r"(?-u).")?.properties().is_utf8());
- /// assert!(!parse(r"(?-u)\W")?.properties().is_utf8());
- /// // Conversely to the equivalent example above,
- /// // when Unicode mode is disabled, \x hex escapes
- /// // are treated as their raw byte values.
- /// assert!(!parse(r"(?-u)\xFF")?.properties().is_utf8());
- /// // Note that just because we disabled UTF-8 in the
- /// // parser doesn't mean we still can't use Unicode.
- /// // It is enabled by default, so \xFF is still
- /// // equivalent to matching the UTF-8 encoding of
- /// // U+00FF by default.
- /// assert!(parse(r"\xFF")?.properties().is_utf8());
- /// // Even though we use raw bytes that individually
- /// // are not valid UTF-8, when combined together, the
- /// // overall expression *does* match valid UTF-8!
- /// assert!(parse(r"(?-u)\xE2\x98\x83")?.properties().is_utf8());
- ///
- /// # Ok::<(), Box<dyn std::error::Error>>(())
- /// ```
- #[inline]
- pub fn is_utf8(&self) -> bool {
- self.0.utf8
- }
-
- /// Returns the total number of explicit capturing groups in the
- /// corresponding HIR.
- ///
- /// Note that this does not include the implicit capturing group
- /// corresponding to the entire match that is typically included by regex
- /// engines.
- ///
- /// # Example
- ///
- /// This method will return `0` for `a` and `1` for `(a)`:
- ///
- /// ```
- /// use regex_syntax::parse;
- ///
- /// assert_eq!(0, parse("a")?.properties().explicit_captures_len());
- /// assert_eq!(1, parse("(a)")?.properties().explicit_captures_len());
- ///
- /// # Ok::<(), Box<dyn std::error::Error>>(())
- /// ```
- #[inline]
- pub fn explicit_captures_len(&self) -> usize {
- self.0.explicit_captures_len
- }
-
- /// Returns the total number of explicit capturing groups that appear in
- /// every possible match.
- ///
- /// If the number of capture groups can vary depending on the match, then
- /// this returns `None`. That is, a value is only returned when the number
- /// of matching groups is invariant or "static."
- ///
- /// Note that this does not include the implicit capturing group
- /// corresponding to the entire match.
- ///
- /// # Example
- ///
- /// This shows a few cases where a static number of capture groups is
- /// available and a few cases where it is not.
- ///
- /// ```
- /// use regex_syntax::parse;
- ///
- /// let len = |pattern| {
- /// parse(pattern).map(|h| {
- /// h.properties().static_explicit_captures_len()
- /// })
- /// };
- ///
- /// assert_eq!(Some(0), len("a")?);
- /// assert_eq!(Some(1), len("(a)")?);
- /// assert_eq!(Some(1), len("(a)|(b)")?);
- /// assert_eq!(Some(2), len("(a)(b)|(c)(d)")?);
- /// assert_eq!(None, len("(a)|b")?);
- /// assert_eq!(None, len("a|(b)")?);
- /// assert_eq!(None, len("(b)*")?);
- /// assert_eq!(Some(1), len("(b)+")?);
- ///
- /// # Ok::<(), Box<dyn std::error::Error>>(())
- /// ```
- #[inline]
- pub fn static_explicit_captures_len(&self) -> Option<usize> {
- self.0.static_explicit_captures_len
- }
-
- /// Return true if and only if this HIR is a simple literal. This is
- /// only true when this HIR expression is either itself a `Literal` or a
- /// concatenation of only `Literal`s.
- ///
- /// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()` and
- /// the empty string are not (even though they contain sub-expressions that
- /// are literals).
- #[inline]
- pub fn is_literal(&self) -> bool {
- self.0.literal
- }
-
- /// Return true if and only if this HIR is either a simple literal or an
- /// alternation of simple literals. This is only
- /// true when this HIR expression is either itself a `Literal` or a
- /// concatenation of only `Literal`s or an alternation of only `Literal`s.
- ///
- /// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternation
- /// literals, but `f+`, `(foo)`, `foo()`, and the empty pattern are not
- /// (even though that contain sub-expressions that are literals).
- #[inline]
- pub fn is_alternation_literal(&self) -> bool {
- self.0.alternation_literal
- }
-
- /// Returns the total amount of heap memory usage, in bytes, used by this
- /// `Properties` value.
- #[inline]
- pub fn memory_usage(&self) -> usize {
- core::mem::size_of::<PropertiesI>()
- }
-
- /// Returns a new set of properties that corresponds to the union of the
- /// iterator of properties given.
- ///
- /// This is useful when one has multiple `Hir` expressions and wants
- /// to combine them into a single alternation without constructing the
- /// corresponding `Hir`. This routine provides a way of combining the
- /// properties of each `Hir` expression into one set of properties
- /// representing the union of those expressions.
- ///
- /// # Example: union with HIRs that never match
- ///
- /// This example shows that unioning properties together with one that
- /// represents a regex that never matches will "poison" certain attributes,
- /// like the minimum and maximum lengths.
- ///
- /// ```
- /// use regex_syntax::{hir::Properties, parse};
- ///
- /// let hir1 = parse("ab?c?")?;
- /// assert_eq!(Some(1), hir1.properties().minimum_len());
- /// assert_eq!(Some(3), hir1.properties().maximum_len());
- ///
- /// let hir2 = parse(r"[a&&b]")?;
- /// assert_eq!(None, hir2.properties().minimum_len());
- /// assert_eq!(None, hir2.properties().maximum_len());
- ///
- /// let hir3 = parse(r"wxy?z?")?;
- /// assert_eq!(Some(2), hir3.properties().minimum_len());
- /// assert_eq!(Some(4), hir3.properties().maximum_len());
- ///
- /// let unioned = Properties::union([
- /// hir1.properties(),
- /// hir2.properties(),
- /// hir3.properties(),
- /// ]);
- /// assert_eq!(None, unioned.minimum_len());
- /// assert_eq!(None, unioned.maximum_len());
- ///
- /// # Ok::<(), Box<dyn std::error::Error>>(())
- /// ```
- ///
- /// The maximum length can also be "poisoned" by a pattern that has no
- /// upper bound on the length of a match. The minimum length remains
- /// unaffected:
- ///
- /// ```
- /// use regex_syntax::{hir::Properties, parse};
- ///
- /// let hir1 = parse("ab?c?")?;
- /// assert_eq!(Some(1), hir1.properties().minimum_len());
- /// assert_eq!(Some(3), hir1.properties().maximum_len());
- ///
- /// let hir2 = parse(r"a+")?;
- /// assert_eq!(Some(1), hir2.properties().minimum_len());
- /// assert_eq!(None, hir2.properties().maximum_len());
- ///
- /// let hir3 = parse(r"wxy?z?")?;
- /// assert_eq!(Some(2), hir3.properties().minimum_len());
- /// assert_eq!(Some(4), hir3.properties().maximum_len());
- ///
- /// let unioned = Properties::union([
- /// hir1.properties(),
- /// hir2.properties(),
- /// hir3.properties(),
- /// ]);
- /// assert_eq!(Some(1), unioned.minimum_len());
- /// assert_eq!(None, unioned.maximum_len());
- ///
- /// # Ok::<(), Box<dyn std::error::Error>>(())
- /// ```
- pub fn union<I, P>(props: I) -> Properties
- where
- I: IntoIterator<Item = P>,
- P: core::borrow::Borrow<Properties>,
- {
- let mut it = props.into_iter().peekable();
- // While empty alternations aren't possible, we still behave as if they
- // are. When we have an empty alternate, then clearly the look-around
- // prefix and suffix is empty. Otherwise, it is the intersection of all
- // prefixes and suffixes (respectively) of the branches.
- let fix = if it.peek().is_none() {
- LookSet::empty()
- } else {
- LookSet::full()
- };
- // And also, an empty alternate means we have 0 static capture groups,
- // but we otherwise start with the number corresponding to the first
- // alternate. If any subsequent alternate has a different number of
- // static capture groups, then we overall have a variation and not a
- // static number of groups.
- let static_explicit_captures_len =
- it.peek().and_then(|p| p.borrow().static_explicit_captures_len());
- // The base case is an empty alternation, which matches nothing.
- // Note though that empty alternations aren't possible, because the
- // Hir::alternation smart constructor rewrites those as empty character
- // classes.
- let mut props = PropertiesI {
- minimum_len: None,
- maximum_len: None,
- look_set: LookSet::empty(),
- look_set_prefix: fix,
- look_set_suffix: fix,
- look_set_prefix_any: LookSet::empty(),
- look_set_suffix_any: LookSet::empty(),
- utf8: true,
- explicit_captures_len: 0,
- static_explicit_captures_len,
- literal: false,
- alternation_literal: true,
- };
- let (mut min_poisoned, mut max_poisoned) = (false, false);
- // Handle properties that need to visit every child hir.
- for prop in it {
- let p = prop.borrow();
- props.look_set.set_union(p.look_set());
- props.look_set_prefix.set_intersect(p.look_set_prefix());
- props.look_set_suffix.set_intersect(p.look_set_suffix());
- props.look_set_prefix_any.set_union(p.look_set_prefix_any());
- props.look_set_suffix_any.set_union(p.look_set_suffix_any());
- props.utf8 = props.utf8 && p.is_utf8();
- props.explicit_captures_len = props
- .explicit_captures_len
- .saturating_add(p.explicit_captures_len());
- if props.static_explicit_captures_len
- != p.static_explicit_captures_len()
- {
- props.static_explicit_captures_len = None;
- }
- props.alternation_literal =
- props.alternation_literal && p.is_literal();
- if !min_poisoned {
- if let Some(xmin) = p.minimum_len() {
- if props.minimum_len.map_or(true, |pmin| xmin < pmin) {
- props.minimum_len = Some(xmin);
- }
- } else {
- props.minimum_len = None;
- min_poisoned = true;
- }
- }
- if !max_poisoned {
- if let Some(xmax) = p.maximum_len() {
- if props.maximum_len.map_or(true, |pmax| xmax > pmax) {
- props.maximum_len = Some(xmax);
- }
- } else {
- props.maximum_len = None;
- max_poisoned = true;
- }
- }
- }
- Properties(Box::new(props))
- }
-}
-
-impl Properties {
- /// Create a new set of HIR properties for an empty regex.
- fn empty() -> Properties {
- let inner = PropertiesI {
- minimum_len: Some(0),
- maximum_len: Some(0),
- look_set: LookSet::empty(),
- look_set_prefix: LookSet::empty(),
- look_set_suffix: LookSet::empty(),
- look_set_prefix_any: LookSet::empty(),
- look_set_suffix_any: LookSet::empty(),
- // It is debatable whether an empty regex always matches at valid
- // UTF-8 boundaries. Strictly speaking, at a byte oriented view,
- // it is clearly false. There are, for example, many empty strings
- // between the bytes encoding a '☃'.
- //
- // However, when Unicode mode is enabled, the fundamental atom
- // of matching is really a codepoint. And in that scenario, an
- // empty regex is defined to only match at valid UTF-8 boundaries
- // and to never split a codepoint. It just so happens that this
- // enforcement is somewhat tricky to do for regexes that match
- // the empty string inside regex engines themselves. It usually
- // requires some layer above the regex engine to filter out such
- // matches.
- //
- // In any case, 'true' is really the only coherent option. If it
- // were false, for example, then 'a*' would also need to be false
- // since it too can match the empty string.
- utf8: true,
- explicit_captures_len: 0,
- static_explicit_captures_len: Some(0),
- literal: false,
- alternation_literal: false,
- };
- Properties(Box::new(inner))
- }
-
- /// Create a new set of HIR properties for a literal regex.
- fn literal(lit: &Literal) -> Properties {
- let inner = PropertiesI {
- minimum_len: Some(lit.0.len()),
- maximum_len: Some(lit.0.len()),
- look_set: LookSet::empty(),
- look_set_prefix: LookSet::empty(),
- look_set_suffix: LookSet::empty(),
- look_set_prefix_any: LookSet::empty(),
- look_set_suffix_any: LookSet::empty(),
- utf8: core::str::from_utf8(&lit.0).is_ok(),
- explicit_captures_len: 0,
- static_explicit_captures_len: Some(0),
- literal: true,
- alternation_literal: true,
- };
- Properties(Box::new(inner))
- }
-
- /// Create a new set of HIR properties for a character class.
- fn class(class: &Class) -> Properties {
- let inner = PropertiesI {
- minimum_len: class.minimum_len(),
- maximum_len: class.maximum_len(),
- look_set: LookSet::empty(),
- look_set_prefix: LookSet::empty(),
- look_set_suffix: LookSet::empty(),
- look_set_prefix_any: LookSet::empty(),
- look_set_suffix_any: LookSet::empty(),
- utf8: class.is_utf8(),
- explicit_captures_len: 0,
- static_explicit_captures_len: Some(0),
- literal: false,
- alternation_literal: false,
- };
- Properties(Box::new(inner))
- }
-
- /// Create a new set of HIR properties for a look-around assertion.
- fn look(look: Look) -> Properties {
- let inner = PropertiesI {
- minimum_len: Some(0),
- maximum_len: Some(0),
- look_set: LookSet::singleton(look),
- look_set_prefix: LookSet::singleton(look),
- look_set_suffix: LookSet::singleton(look),
- look_set_prefix_any: LookSet::singleton(look),
- look_set_suffix_any: LookSet::singleton(look),
- // This requires a little explanation. Basically, we don't consider
- // matching an empty string to be equivalent to matching invalid
- // UTF-8, even though technically matching every empty string will
- // split the UTF-8 encoding of a single codepoint when treating a
- // UTF-8 encoded string as a sequence of bytes. Our defense here is
- // that in such a case, a codepoint should logically be treated as
- // the fundamental atom for matching, and thus the only valid match
- // points are between codepoints and not bytes.
- //
- // More practically, this is true here because it's also true
- // for 'Hir::empty()', otherwise something like 'a*' would be
- // considered to match invalid UTF-8. That in turn makes this
- // property borderline useless.
- utf8: true,
- explicit_captures_len: 0,
- static_explicit_captures_len: Some(0),
- literal: false,
- alternation_literal: false,
- };
- Properties(Box::new(inner))
- }
-
- /// Create a new set of HIR properties for a repetition.
- fn repetition(rep: &Repetition) -> Properties {
- let p = rep.sub.properties();
- let minimum_len = p.minimum_len().map(|child_min| {
- let rep_min = usize::try_from(rep.min).unwrap_or(usize::MAX);
- child_min.saturating_mul(rep_min)
- });
- let maximum_len = rep.max.and_then(|rep_max| {
- let rep_max = usize::try_from(rep_max).ok()?;
- let child_max = p.maximum_len()?;
- child_max.checked_mul(rep_max)
- });
-
- let mut inner = PropertiesI {
- minimum_len,
- maximum_len,
- look_set: p.look_set(),
- look_set_prefix: LookSet::empty(),
- look_set_suffix: LookSet::empty(),
- look_set_prefix_any: p.look_set_prefix_any(),
- look_set_suffix_any: p.look_set_suffix_any(),
- utf8: p.is_utf8(),
- explicit_captures_len: p.explicit_captures_len(),
- static_explicit_captures_len: p.static_explicit_captures_len(),
- literal: false,
- alternation_literal: false,
- };
- // If the repetition operator can match the empty string, then its
- // lookset prefix and suffixes themselves remain empty since they are
- // no longer required to match.
- if rep.min > 0 {
- inner.look_set_prefix = p.look_set_prefix();
- inner.look_set_suffix = p.look_set_suffix();
- }
- // If the static captures len of the sub-expression is not known or
- // is greater than zero, then it automatically propagates to the
- // repetition, regardless of the repetition. Otherwise, it might
- // change, but only when the repetition can match 0 times.
- if rep.min == 0
- && inner.static_explicit_captures_len.map_or(false, |len| len > 0)
- {
- // If we require a match 0 times, then our captures len is
- // guaranteed to be zero. Otherwise, if we *can* match the empty
- // string, then it's impossible to know how many captures will be
- // in the resulting match.
- if rep.max == Some(0) {
- inner.static_explicit_captures_len = Some(0);
- } else {
- inner.static_explicit_captures_len = None;
- }
- }
- Properties(Box::new(inner))
- }
-
- /// Create a new set of HIR properties for a capture.
- fn capture(capture: &Capture) -> Properties {
- let p = capture.sub.properties();
- Properties(Box::new(PropertiesI {
- explicit_captures_len: p.explicit_captures_len().saturating_add(1),
- static_explicit_captures_len: p
- .static_explicit_captures_len()
- .map(|len| len.saturating_add(1)),
- literal: false,
- alternation_literal: false,
- ..*p.0.clone()
- }))
- }
-
- /// Create a new set of HIR properties for a concatenation.
- fn concat(concat: &[Hir]) -> Properties {
- // The base case is an empty concatenation, which matches the empty
- // string. Note though that empty concatenations aren't possible,
- // because the Hir::concat smart constructor rewrites those as
- // Hir::empty.
- let mut props = PropertiesI {
- minimum_len: Some(0),
- maximum_len: Some(0),
- look_set: LookSet::empty(),
- look_set_prefix: LookSet::empty(),
- look_set_suffix: LookSet::empty(),
- look_set_prefix_any: LookSet::empty(),
- look_set_suffix_any: LookSet::empty(),
- utf8: true,
- explicit_captures_len: 0,
- static_explicit_captures_len: Some(0),
- literal: true,
- alternation_literal: true,
- };
- // Handle properties that need to visit every child hir.
- for x in concat.iter() {
- let p = x.properties();
- props.look_set.set_union(p.look_set());
- props.utf8 = props.utf8 && p.is_utf8();
- props.explicit_captures_len = props
- .explicit_captures_len
- .saturating_add(p.explicit_captures_len());
- props.static_explicit_captures_len = p
- .static_explicit_captures_len()
- .and_then(|len1| {
- Some((len1, props.static_explicit_captures_len?))
- })
- .and_then(|(len1, len2)| Some(len1.saturating_add(len2)));
- props.literal = props.literal && p.is_literal();
- props.alternation_literal =
- props.alternation_literal && p.is_alternation_literal();
- if let Some(minimum_len) = props.minimum_len {
- match p.minimum_len() {
- None => props.minimum_len = None,
- Some(len) => {
- // We use saturating arithmetic here because the
- // minimum is just a lower bound. We can't go any
- // higher than what our number types permit.
- props.minimum_len =
- Some(minimum_len.saturating_add(len));
- }
- }
- }
- if let Some(maximum_len) = props.maximum_len {
- match p.maximum_len() {
- None => props.maximum_len = None,
- Some(len) => {
- props.maximum_len = maximum_len.checked_add(len)
- }
- }
- }
- }
- // Handle the prefix properties, which only requires visiting
- // child exprs until one matches more than the empty string.
- let mut it = concat.iter();
- while let Some(x) = it.next() {
- props.look_set_prefix.set_union(x.properties().look_set_prefix());
- props
- .look_set_prefix_any
- .set_union(x.properties().look_set_prefix_any());
- if x.properties().maximum_len().map_or(true, |x| x > 0) {
- break;
- }
- }
- // Same thing for the suffix properties, but in reverse.
- let mut it = concat.iter().rev();
- while let Some(x) = it.next() {
- props.look_set_suffix.set_union(x.properties().look_set_suffix());
- props
- .look_set_suffix_any
- .set_union(x.properties().look_set_suffix_any());
- if x.properties().maximum_len().map_or(true, |x| x > 0) {
- break;
- }
- }
- Properties(Box::new(props))
- }
-
- /// Create a new set of HIR properties for a concatenation.
- fn alternation(alts: &[Hir]) -> Properties {
- Properties::union(alts.iter().map(|hir| hir.properties()))
- }
-}
-
-/// A set of look-around assertions.
-///
-/// This is useful for efficiently tracking look-around assertions. For
-/// example, an [`Hir`] provides properties that return `LookSet`s.
-#[derive(Clone, Copy, Default, Eq, PartialEq)]
-pub struct LookSet {
- /// The underlying representation this set is exposed to make it possible
- /// to store it somewhere efficiently. The representation is that
- /// of a bitset, where each assertion occupies bit `i` where `i =
- /// Look::as_repr()`.
- ///
- /// Note that users of this internal representation must permit the full
- /// range of `u16` values to be represented. For example, even if the
- /// current implementation only makes use of the 10 least significant bits,
- /// it may use more bits in a future semver compatible release.
- pub bits: u32,
-}
-
-impl LookSet {
- /// Create an empty set of look-around assertions.
- #[inline]
- pub fn empty() -> LookSet {
- LookSet { bits: 0 }
- }
-
- /// Create a full set of look-around assertions.
- ///
- /// This set contains all possible look-around assertions.
- #[inline]
- pub fn full() -> LookSet {
- LookSet { bits: !0 }
- }
-
- /// Create a look-around set containing the look-around assertion given.
- ///
- /// This is a convenience routine for creating an empty set and inserting
- /// one look-around assertions.
- #[inline]
- pub fn singleton(look: Look) -> LookSet {
- LookSet::empty().insert(look)
- }
-
- /// Returns the total number of look-around assertions in this set.
- #[inline]
- pub fn len(self) -> usize {
- // OK because max value always fits in a u8, which in turn always
- // fits in a usize, regardless of target.
- usize::try_from(self.bits.count_ones()).unwrap()
- }
-
- /// Returns true if and only if this set is empty.
- #[inline]
- pub fn is_empty(self) -> bool {
- self.len() == 0
- }
-
- /// Returns true if and only if the given look-around assertion is in this
- /// set.
- #[inline]
- pub fn contains(self, look: Look) -> bool {
- self.bits & look.as_repr() != 0
- }
-
- /// Returns true if and only if this set contains any anchor assertions.
- /// This includes both "start/end of haystack" and "start/end of line."
- #[inline]
- pub fn contains_anchor(&self) -> bool {
- self.contains_anchor_haystack() || self.contains_anchor_line()
- }
-
- /// Returns true if and only if this set contains any "start/end of
- /// haystack" anchors. This doesn't include "start/end of line" anchors.
- #[inline]
- pub fn contains_anchor_haystack(&self) -> bool {
- self.contains(Look::Start) || self.contains(Look::End)
- }
-
- /// Returns true if and only if this set contains any "start/end of line"
- /// anchors. This doesn't include "start/end of haystack" anchors. This
- /// includes both `\n` line anchors and CRLF (`\r\n`) aware line anchors.
- #[inline]
- pub fn contains_anchor_line(&self) -> bool {
- self.contains(Look::StartLF)
- || self.contains(Look::EndLF)
- || self.contains(Look::StartCRLF)
- || self.contains(Look::EndCRLF)
- }
-
- /// Returns true if and only if this set contains any "start/end of line"
- /// anchors that only treat `\n` as line terminators. This does not include
- /// haystack anchors or CRLF aware line anchors.
- #[inline]
- pub fn contains_anchor_lf(&self) -> bool {
- self.contains(Look::StartLF) || self.contains(Look::EndLF)
- }
-
- /// Returns true if and only if this set contains any "start/end of line"
- /// anchors that are CRLF-aware. This doesn't include "start/end of
- /// haystack" or "start/end of line-feed" anchors.
- #[inline]
- pub fn contains_anchor_crlf(&self) -> bool {
- self.contains(Look::StartCRLF) || self.contains(Look::EndCRLF)
- }
-
- /// Returns true if and only if this set contains any word boundary or
- /// negated word boundary assertions. This include both Unicode and ASCII
- /// word boundaries.
- #[inline]
- pub fn contains_word(self) -> bool {
- self.contains_word_unicode() || self.contains_word_ascii()
- }
-
- /// Returns true if and only if this set contains any Unicode word boundary
- /// or negated Unicode word boundary assertions.
- #[inline]
- pub fn contains_word_unicode(self) -> bool {
- self.contains(Look::WordUnicode)
- || self.contains(Look::WordUnicodeNegate)
- || self.contains(Look::WordStartUnicode)
- || self.contains(Look::WordEndUnicode)
- || self.contains(Look::WordStartHalfUnicode)
- || self.contains(Look::WordEndHalfUnicode)
- }
-
- /// Returns true if and only if this set contains any ASCII word boundary
- /// or negated ASCII word boundary assertions.
- #[inline]
- pub fn contains_word_ascii(self) -> bool {
- self.contains(Look::WordAscii)
- || self.contains(Look::WordAsciiNegate)
- || self.contains(Look::WordStartAscii)
- || self.contains(Look::WordEndAscii)
- || self.contains(Look::WordStartHalfAscii)
- || self.contains(Look::WordEndHalfAscii)
- }
-
- /// Returns an iterator over all of the look-around assertions in this set.
- #[inline]
- pub fn iter(self) -> LookSetIter {
- LookSetIter { set: self }
- }
-
- /// Return a new set that is equivalent to the original, but with the given
- /// assertion added to it. If the assertion is already in the set, then the
- /// returned set is equivalent to the original.
- #[inline]
- pub fn insert(self, look: Look) -> LookSet {
- LookSet { bits: self.bits | look.as_repr() }
- }
-
- /// Updates this set in place with the result of inserting the given
- /// assertion into this set.
- #[inline]
- pub fn set_insert(&mut self, look: Look) {
- *self = self.insert(look);
- }
-
- /// Return a new set that is equivalent to the original, but with the given
- /// assertion removed from it. If the assertion is not in the set, then the
- /// returned set is equivalent to the original.
- #[inline]
- pub fn remove(self, look: Look) -> LookSet {
- LookSet { bits: self.bits & !look.as_repr() }
- }
-
- /// Updates this set in place with the result of removing the given
- /// assertion from this set.
- #[inline]
- pub fn set_remove(&mut self, look: Look) {
- *self = self.remove(look);
- }
-
- /// Returns a new set that is the result of subtracting the given set from
- /// this set.
- #[inline]
- pub fn subtract(self, other: LookSet) -> LookSet {
- LookSet { bits: self.bits & !other.bits }
- }
-
- /// Updates this set in place with the result of subtracting the given set
- /// from this set.
- #[inline]
- pub fn set_subtract(&mut self, other: LookSet) {
- *self = self.subtract(other);
- }
-
- /// Returns a new set that is the union of this and the one given.
- #[inline]
- pub fn union(self, other: LookSet) -> LookSet {
- LookSet { bits: self.bits | other.bits }
- }
-
- /// Updates this set in place with the result of unioning it with the one
- /// given.
- #[inline]
- pub fn set_union(&mut self, other: LookSet) {
- *self = self.union(other);
- }
-
- /// Returns a new set that is the intersection of this and the one given.
- #[inline]
- pub fn intersect(self, other: LookSet) -> LookSet {
- LookSet { bits: self.bits & other.bits }
- }
-
- /// Updates this set in place with the result of intersecting it with the
- /// one given.
- #[inline]
- pub fn set_intersect(&mut self, other: LookSet) {
- *self = self.intersect(other);
- }
-
- /// Return a `LookSet` from the slice given as a native endian 32-bit
- /// integer.
- ///
- /// # Panics
- ///
- /// This panics if `slice.len() < 4`.
- #[inline]
- pub fn read_repr(slice: &[u8]) -> LookSet {
- let bits = u32::from_ne_bytes(slice[..4].try_into().unwrap());
- LookSet { bits }
- }
-
- /// Write a `LookSet` as a native endian 32-bit integer to the beginning
- /// of the slice given.
- ///
- /// # Panics
- ///
- /// This panics if `slice.len() < 4`.
- #[inline]
- pub fn write_repr(self, slice: &mut [u8]) {
- let raw = self.bits.to_ne_bytes();
- slice[0] = raw[0];
- slice[1] = raw[1];
- slice[2] = raw[2];
- slice[3] = raw[3];
- }
-}
-
-impl core::fmt::Debug for LookSet {
- fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
- if self.is_empty() {
- return write!(f, "∅");
- }
- for look in self.iter() {
- write!(f, "{}", look.as_char())?;
- }
- Ok(())
- }
-}
-
-/// An iterator over all look-around assertions in a [`LookSet`].
-///
-/// This iterator is created by [`LookSet::iter`].
-#[derive(Clone, Debug)]
-pub struct LookSetIter {
- set: LookSet,
-}
-
-impl Iterator for LookSetIter {
- type Item = Look;
-
- #[inline]
- fn next(&mut self) -> Option<Look> {
- if self.set.is_empty() {
- return None;
- }
- // We'll never have more than u8::MAX distinct look-around assertions,
- // so 'bit' will always fit into a u16.
- let bit = u16::try_from(self.set.bits.trailing_zeros()).unwrap();
- let look = Look::from_repr(1 << bit)?;
- self.set = self.set.remove(look);
- Some(look)
- }
-}
-
-/// Given a sequence of HIR values where each value corresponds to a Unicode
-/// class (or an all-ASCII byte class), return a single Unicode class
-/// corresponding to the union of the classes found.
-fn class_chars(hirs: &[Hir]) -> Option<Class> {
- let mut cls = ClassUnicode::new(vec![]);
- for hir in hirs.iter() {
- match *hir.kind() {
- HirKind::Class(Class::Unicode(ref cls2)) => {
- cls.union(cls2);
- }
- HirKind::Class(Class::Bytes(ref cls2)) => {
- cls.union(&cls2.to_unicode_class()?);
- }
- _ => return None,
- };
- }
- Some(Class::Unicode(cls))
-}
-
-/// Given a sequence of HIR values where each value corresponds to a byte class
-/// (or an all-ASCII Unicode class), return a single byte class corresponding
-/// to the union of the classes found.
-fn class_bytes(hirs: &[Hir]) -> Option<Class> {
- let mut cls = ClassBytes::new(vec![]);
- for hir in hirs.iter() {
- match *hir.kind() {
- HirKind::Class(Class::Unicode(ref cls2)) => {
- cls.union(&cls2.to_byte_class()?);
- }
- HirKind::Class(Class::Bytes(ref cls2)) => {
- cls.union(cls2);
- }
- _ => return None,
- };
- }
- Some(Class::Bytes(cls))
-}
-
-/// Given a sequence of HIR values where each value corresponds to a literal
-/// that is a single `char`, return that sequence of `char`s. Otherwise return
-/// None. No deduplication is done.
-fn singleton_chars(hirs: &[Hir]) -> Option<Vec<char>> {
- let mut singletons = vec![];
- for hir in hirs.iter() {
- let literal = match *hir.kind() {
- HirKind::Literal(Literal(ref bytes)) => bytes,
- _ => return None,
- };
- let ch = match crate::debug::utf8_decode(literal) {
- None => return None,
- Some(Err(_)) => return None,
- Some(Ok(ch)) => ch,
- };
- if literal.len() != ch.len_utf8() {
- return None;
- }
- singletons.push(ch);
- }
- Some(singletons)
-}
-
-/// Given a sequence of HIR values where each value corresponds to a literal
-/// that is a single byte, return that sequence of bytes. Otherwise return
-/// None. No deduplication is done.
-fn singleton_bytes(hirs: &[Hir]) -> Option<Vec<u8>> {
- let mut singletons = vec![];
- for hir in hirs.iter() {
- let literal = match *hir.kind() {
- HirKind::Literal(Literal(ref bytes)) => bytes,
- _ => return None,
- };
- if literal.len() != 1 {
- return None;
- }
- singletons.push(literal[0]);
- }
- Some(singletons)
-}
-
-/// Looks for a common prefix in the list of alternation branches given. If one
-/// is found, then an equivalent but (hopefully) simplified Hir is returned.
-/// Otherwise, the original given list of branches is returned unmodified.
-///
-/// This is not quite as good as it could be. Right now, it requires that
-/// all branches are 'Concat' expressions. It also doesn't do well with
-/// literals. For example, given 'foofoo|foobar', it will not refactor it to
-/// 'foo(?:foo|bar)' because literals are flattened into their own special
-/// concatenation. (One wonders if perhaps 'Literal' should be a single atom
-/// instead of a string of bytes because of this. Otherwise, handling the
-/// current representation in this routine will be pretty gnarly. Sigh.)
-fn lift_common_prefix(hirs: Vec<Hir>) -> Result<Hir, Vec<Hir>> {
- if hirs.len() <= 1 {
- return Err(hirs);
- }
- let mut prefix = match hirs[0].kind() {
- HirKind::Concat(ref xs) => &**xs,
- _ => return Err(hirs),
- };
- if prefix.is_empty() {
- return Err(hirs);
- }
- for h in hirs.iter().skip(1) {
- let concat = match h.kind() {
- HirKind::Concat(ref xs) => xs,
- _ => return Err(hirs),
- };
- let common_len = prefix
- .iter()
- .zip(concat.iter())
- .take_while(|(x, y)| x == y)
- .count();
- prefix = &prefix[..common_len];
- if prefix.is_empty() {
- return Err(hirs);
- }
- }
- let len = prefix.len();
- assert_ne!(0, len);
- let mut prefix_concat = vec![];
- let mut suffix_alts = vec![];
- for h in hirs {
- let mut concat = match h.into_kind() {
- HirKind::Concat(xs) => xs,
- // We required all sub-expressions to be
- // concats above, so we're only here if we
- // have a concat.
- _ => unreachable!(),
- };
- suffix_alts.push(Hir::concat(concat.split_off(len)));
- if prefix_concat.is_empty() {
- prefix_concat = concat;
- }
- }
- let mut concat = prefix_concat;
- concat.push(Hir::alternation(suffix_alts));
- Ok(Hir::concat(concat))
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- fn uclass(ranges: &[(char, char)]) -> ClassUnicode {
- let ranges: Vec<ClassUnicodeRange> = ranges
- .iter()
- .map(|&(s, e)| ClassUnicodeRange::new(s, e))
- .collect();
- ClassUnicode::new(ranges)
- }
-
- fn bclass(ranges: &[(u8, u8)]) -> ClassBytes {
- let ranges: Vec<ClassBytesRange> =
- ranges.iter().map(|&(s, e)| ClassBytesRange::new(s, e)).collect();
- ClassBytes::new(ranges)
- }
-
- fn uranges(cls: &ClassUnicode) -> Vec<(char, char)> {
- cls.iter().map(|x| (x.start(), x.end())).collect()
- }
-
- #[cfg(feature = "unicode-case")]
- fn ucasefold(cls: &ClassUnicode) -> ClassUnicode {
- let mut cls_ = cls.clone();
- cls_.case_fold_simple();
- cls_
- }
-
- fn uunion(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
- let mut cls_ = cls1.clone();
- cls_.union(cls2);
- cls_
- }
-
- fn uintersect(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
- let mut cls_ = cls1.clone();
- cls_.intersect(cls2);
- cls_
- }
-
- fn udifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
- let mut cls_ = cls1.clone();
- cls_.difference(cls2);
- cls_
- }
-
- fn usymdifference(
- cls1: &ClassUnicode,
- cls2: &ClassUnicode,
- ) -> ClassUnicode {
- let mut cls_ = cls1.clone();
- cls_.symmetric_difference(cls2);
- cls_
- }
-
- fn unegate(cls: &ClassUnicode) -> ClassUnicode {
- let mut cls_ = cls.clone();
- cls_.negate();
- cls_
- }
-
- fn branges(cls: &ClassBytes) -> Vec<(u8, u8)> {
- cls.iter().map(|x| (x.start(), x.end())).collect()
- }
-
- fn bcasefold(cls: &ClassBytes) -> ClassBytes {
- let mut cls_ = cls.clone();
- cls_.case_fold_simple();
- cls_
- }
-
- fn bunion(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
- let mut cls_ = cls1.clone();
- cls_.union(cls2);
- cls_
- }
-
- fn bintersect(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
- let mut cls_ = cls1.clone();
- cls_.intersect(cls2);
- cls_
- }
-
- fn bdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
- let mut cls_ = cls1.clone();
- cls_.difference(cls2);
- cls_
- }
-
- fn bsymdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
- let mut cls_ = cls1.clone();
- cls_.symmetric_difference(cls2);
- cls_
- }
-
- fn bnegate(cls: &ClassBytes) -> ClassBytes {
- let mut cls_ = cls.clone();
- cls_.negate();
- cls_
- }
-
- #[test]
- fn class_range_canonical_unicode() {
- let range = ClassUnicodeRange::new('\u{00FF}', '\0');
- assert_eq!('\0', range.start());
- assert_eq!('\u{00FF}', range.end());
- }
-
- #[test]
- fn class_range_canonical_bytes() {
- let range = ClassBytesRange::new(b'\xFF', b'\0');
- assert_eq!(b'\0', range.start());
- assert_eq!(b'\xFF', range.end());
- }
-
- #[test]
- fn class_canonicalize_unicode() {
- let cls = uclass(&[('a', 'c'), ('x', 'z')]);
- let expected = vec![('a', 'c'), ('x', 'z')];
- assert_eq!(expected, uranges(&cls));
-
- let cls = uclass(&[('x', 'z'), ('a', 'c')]);
- let expected = vec![('a', 'c'), ('x', 'z')];
- assert_eq!(expected, uranges(&cls));
-
- let cls = uclass(&[('x', 'z'), ('w', 'y')]);
- let expected = vec![('w', 'z')];
- assert_eq!(expected, uranges(&cls));
-
- let cls = uclass(&[
- ('c', 'f'),
- ('a', 'g'),
- ('d', 'j'),
- ('a', 'c'),
- ('m', 'p'),
- ('l', 's'),
- ]);
- let expected = vec![('a', 'j'), ('l', 's')];
- assert_eq!(expected, uranges(&cls));
-
- let cls = uclass(&[('x', 'z'), ('u', 'w')]);
- let expected = vec![('u', 'z')];
- assert_eq!(expected, uranges(&cls));
-
- let cls = uclass(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
- let expected = vec![('\x00', '\u{10FFFF}')];
- assert_eq!(expected, uranges(&cls));
-
- let cls = uclass(&[('a', 'a'), ('b', 'b')]);
- let expected = vec![('a', 'b')];
- assert_eq!(expected, uranges(&cls));
- }
-
- #[test]
- fn class_canonicalize_bytes() {
- let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
- let expected = vec![(b'a', b'c'), (b'x', b'z')];
- assert_eq!(expected, branges(&cls));
-
- let cls = bclass(&[(b'x', b'z'), (b'a', b'c')]);
- let expected = vec![(b'a', b'c'), (b'x', b'z')];
- assert_eq!(expected, branges(&cls));
-
- let cls = bclass(&[(b'x', b'z'), (b'w', b'y')]);
- let expected = vec![(b'w', b'z')];
- assert_eq!(expected, branges(&cls));
-
- let cls = bclass(&[
- (b'c', b'f'),
- (b'a', b'g'),
- (b'd', b'j'),
- (b'a', b'c'),
- (b'm', b'p'),
- (b'l', b's'),
- ]);
- let expected = vec![(b'a', b'j'), (b'l', b's')];
- assert_eq!(expected, branges(&cls));
-
- let cls = bclass(&[(b'x', b'z'), (b'u', b'w')]);
- let expected = vec![(b'u', b'z')];
- assert_eq!(expected, branges(&cls));
-
- let cls = bclass(&[(b'\x00', b'\xFF'), (b'\x00', b'\xFF')]);
- let expected = vec![(b'\x00', b'\xFF')];
- assert_eq!(expected, branges(&cls));
-
- let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
- let expected = vec![(b'a', b'b')];
- assert_eq!(expected, branges(&cls));
- }
-
- #[test]
- #[cfg(feature = "unicode-case")]
- fn class_case_fold_unicode() {
- let cls = uclass(&[
- ('C', 'F'),
- ('A', 'G'),
- ('D', 'J'),
- ('A', 'C'),
- ('M', 'P'),
- ('L', 'S'),
- ('c', 'f'),
- ]);
- let expected = uclass(&[
- ('A', 'J'),
- ('L', 'S'),
- ('a', 'j'),
- ('l', 's'),
- ('\u{17F}', '\u{17F}'),
- ]);
- assert_eq!(expected, ucasefold(&cls));
-
- let cls = uclass(&[('A', 'Z')]);
- let expected = uclass(&[
- ('A', 'Z'),
- ('a', 'z'),
- ('\u{17F}', '\u{17F}'),
- ('\u{212A}', '\u{212A}'),
- ]);
- assert_eq!(expected, ucasefold(&cls));
-
- let cls = uclass(&[('a', 'z')]);
- let expected = uclass(&[
- ('A', 'Z'),
- ('a', 'z'),
- ('\u{17F}', '\u{17F}'),
- ('\u{212A}', '\u{212A}'),
- ]);
- assert_eq!(expected, ucasefold(&cls));
-
- let cls = uclass(&[('A', 'A'), ('_', '_')]);
- let expected = uclass(&[('A', 'A'), ('_', '_'), ('a', 'a')]);
- assert_eq!(expected, ucasefold(&cls));
-
- let cls = uclass(&[('A', 'A'), ('=', '=')]);
- let expected = uclass(&[('=', '='), ('A', 'A'), ('a', 'a')]);
- assert_eq!(expected, ucasefold(&cls));
-
- let cls = uclass(&[('\x00', '\x10')]);
- assert_eq!(cls, ucasefold(&cls));
-
- let cls = uclass(&[('k', 'k')]);
- let expected =
- uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}')]);
- assert_eq!(expected, ucasefold(&cls));
-
- let cls = uclass(&[('@', '@')]);
- assert_eq!(cls, ucasefold(&cls));
- }
-
- #[test]
- #[cfg(not(feature = "unicode-case"))]
- fn class_case_fold_unicode_disabled() {
- let mut cls = uclass(&[
- ('C', 'F'),
- ('A', 'G'),
- ('D', 'J'),
- ('A', 'C'),
- ('M', 'P'),
- ('L', 'S'),
- ('c', 'f'),
- ]);
- assert!(cls.try_case_fold_simple().is_err());
- }
-
- #[test]
- #[should_panic]
- #[cfg(not(feature = "unicode-case"))]
- fn class_case_fold_unicode_disabled_panics() {
- let mut cls = uclass(&[
- ('C', 'F'),
- ('A', 'G'),
- ('D', 'J'),
- ('A', 'C'),
- ('M', 'P'),
- ('L', 'S'),
- ('c', 'f'),
- ]);
- cls.case_fold_simple();
- }
-
- #[test]
- fn class_case_fold_bytes() {
- let cls = bclass(&[
- (b'C', b'F'),
- (b'A', b'G'),
- (b'D', b'J'),
- (b'A', b'C'),
- (b'M', b'P'),
- (b'L', b'S'),
- (b'c', b'f'),
- ]);
- let expected =
- bclass(&[(b'A', b'J'), (b'L', b'S'), (b'a', b'j'), (b'l', b's')]);
- assert_eq!(expected, bcasefold(&cls));
-
- let cls = bclass(&[(b'A', b'Z')]);
- let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
- assert_eq!(expected, bcasefold(&cls));
-
- let cls = bclass(&[(b'a', b'z')]);
- let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
- assert_eq!(expected, bcasefold(&cls));
-
- let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]);
- let expected = bclass(&[(b'A', b'A'), (b'_', b'_'), (b'a', b'a')]);
- assert_eq!(expected, bcasefold(&cls));
-
- let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]);
- let expected = bclass(&[(b'=', b'='), (b'A', b'A'), (b'a', b'a')]);
- assert_eq!(expected, bcasefold(&cls));
-
- let cls = bclass(&[(b'\x00', b'\x10')]);
- assert_eq!(cls, bcasefold(&cls));
-
- let cls = bclass(&[(b'k', b'k')]);
- let expected = bclass(&[(b'K', b'K'), (b'k', b'k')]);
- assert_eq!(expected, bcasefold(&cls));
-
- let cls = bclass(&[(b'@', b'@')]);
- assert_eq!(cls, bcasefold(&cls));
- }
-
- #[test]
- fn class_negate_unicode() {
- let cls = uclass(&[('a', 'a')]);
- let expected = uclass(&[('\x00', '\x60'), ('\x62', '\u{10FFFF}')]);
- assert_eq!(expected, unegate(&cls));
-
- let cls = uclass(&[('a', 'a'), ('b', 'b')]);
- let expected = uclass(&[('\x00', '\x60'), ('\x63', '\u{10FFFF}')]);
- assert_eq!(expected, unegate(&cls));
-
- let cls = uclass(&[('a', 'c'), ('x', 'z')]);
- let expected = uclass(&[
- ('\x00', '\x60'),
- ('\x64', '\x77'),
- ('\x7B', '\u{10FFFF}'),
- ]);
- assert_eq!(expected, unegate(&cls));
-
- let cls = uclass(&[('\x00', 'a')]);
- let expected = uclass(&[('\x62', '\u{10FFFF}')]);
- assert_eq!(expected, unegate(&cls));
-
- let cls = uclass(&[('a', '\u{10FFFF}')]);
- let expected = uclass(&[('\x00', '\x60')]);
- assert_eq!(expected, unegate(&cls));
-
- let cls = uclass(&[('\x00', '\u{10FFFF}')]);
- let expected = uclass(&[]);
- assert_eq!(expected, unegate(&cls));
-
- let cls = uclass(&[]);
- let expected = uclass(&[('\x00', '\u{10FFFF}')]);
- assert_eq!(expected, unegate(&cls));
-
- let cls =
- uclass(&[('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')]);
- let expected = uclass(&[('\u{10FFFE}', '\u{10FFFE}')]);
- assert_eq!(expected, unegate(&cls));
-
- let cls = uclass(&[('\x00', '\u{D7FF}')]);
- let expected = uclass(&[('\u{E000}', '\u{10FFFF}')]);
- assert_eq!(expected, unegate(&cls));
-
- let cls = uclass(&[('\x00', '\u{D7FE}')]);
- let expected = uclass(&[('\u{D7FF}', '\u{10FFFF}')]);
- assert_eq!(expected, unegate(&cls));
-
- let cls = uclass(&[('\u{E000}', '\u{10FFFF}')]);
- let expected = uclass(&[('\x00', '\u{D7FF}')]);
- assert_eq!(expected, unegate(&cls));
-
- let cls = uclass(&[('\u{E001}', '\u{10FFFF}')]);
- let expected = uclass(&[('\x00', '\u{E000}')]);
- assert_eq!(expected, unegate(&cls));
- }
-
- #[test]
- fn class_negate_bytes() {
- let cls = bclass(&[(b'a', b'a')]);
- let expected = bclass(&[(b'\x00', b'\x60'), (b'\x62', b'\xFF')]);
- assert_eq!(expected, bnegate(&cls));
-
- let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
- let expected = bclass(&[(b'\x00', b'\x60'), (b'\x63', b'\xFF')]);
- assert_eq!(expected, bnegate(&cls));
-
- let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
- let expected = bclass(&[
- (b'\x00', b'\x60'),
- (b'\x64', b'\x77'),
- (b'\x7B', b'\xFF'),
- ]);
- assert_eq!(expected, bnegate(&cls));
-
- let cls = bclass(&[(b'\x00', b'a')]);
- let expected = bclass(&[(b'\x62', b'\xFF')]);
- assert_eq!(expected, bnegate(&cls));
-
- let cls = bclass(&[(b'a', b'\xFF')]);
- let expected = bclass(&[(b'\x00', b'\x60')]);
- assert_eq!(expected, bnegate(&cls));
-
- let cls = bclass(&[(b'\x00', b'\xFF')]);
- let expected = bclass(&[]);
- assert_eq!(expected, bnegate(&cls));
-
- let cls = bclass(&[]);
- let expected = bclass(&[(b'\x00', b'\xFF')]);
- assert_eq!(expected, bnegate(&cls));
-
- let cls = bclass(&[(b'\x00', b'\xFD'), (b'\xFF', b'\xFF')]);
- let expected = bclass(&[(b'\xFE', b'\xFE')]);
- assert_eq!(expected, bnegate(&cls));
- }
-
- #[test]
- fn class_union_unicode() {
- let cls1 = uclass(&[('a', 'g'), ('m', 't'), ('A', 'C')]);
- let cls2 = uclass(&[('a', 'z')]);
- let expected = uclass(&[('a', 'z'), ('A', 'C')]);
- assert_eq!(expected, uunion(&cls1, &cls2));
- }
-
- #[test]
- fn class_union_bytes() {
- let cls1 = bclass(&[(b'a', b'g'), (b'm', b't'), (b'A', b'C')]);
- let cls2 = bclass(&[(b'a', b'z')]);
- let expected = bclass(&[(b'a', b'z'), (b'A', b'C')]);
- assert_eq!(expected, bunion(&cls1, &cls2));
- }
-
- #[test]
- fn class_intersect_unicode() {
- let cls1 = uclass(&[]);
- let cls2 = uclass(&[('a', 'a')]);
- let expected = uclass(&[]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'a')]);
- let cls2 = uclass(&[('a', 'a')]);
- let expected = uclass(&[('a', 'a')]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'a')]);
- let cls2 = uclass(&[('b', 'b')]);
- let expected = uclass(&[]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'a')]);
- let cls2 = uclass(&[('a', 'c')]);
- let expected = uclass(&[('a', 'a')]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'b')]);
- let cls2 = uclass(&[('a', 'c')]);
- let expected = uclass(&[('a', 'b')]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'b')]);
- let cls2 = uclass(&[('b', 'c')]);
- let expected = uclass(&[('b', 'b')]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'b')]);
- let cls2 = uclass(&[('c', 'd')]);
- let expected = uclass(&[]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
-
- let cls1 = uclass(&[('b', 'c')]);
- let cls2 = uclass(&[('a', 'd')]);
- let expected = uclass(&[('b', 'c')]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
- let cls2 = uclass(&[('a', 'h')]);
- let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
- let cls2 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
- let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'b'), ('g', 'h')]);
- let cls2 = uclass(&[('d', 'e'), ('k', 'l')]);
- let expected = uclass(&[]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
- let cls2 = uclass(&[('h', 'h')]);
- let expected = uclass(&[('h', 'h')]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'b'), ('e', 'f'), ('i', 'j')]);
- let cls2 = uclass(&[('c', 'd'), ('g', 'h'), ('k', 'l')]);
- let expected = uclass(&[]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'b'), ('c', 'd'), ('e', 'f')]);
- let cls2 = uclass(&[('b', 'c'), ('d', 'e'), ('f', 'g')]);
- let expected = uclass(&[('b', 'f')]);
- assert_eq!(expected, uintersect(&cls1, &cls2));
- }
-
- #[test]
- fn class_intersect_bytes() {
- let cls1 = bclass(&[]);
- let cls2 = bclass(&[(b'a', b'a')]);
- let expected = bclass(&[]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'a')]);
- let cls2 = bclass(&[(b'a', b'a')]);
- let expected = bclass(&[(b'a', b'a')]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'a')]);
- let cls2 = bclass(&[(b'b', b'b')]);
- let expected = bclass(&[]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'a')]);
- let cls2 = bclass(&[(b'a', b'c')]);
- let expected = bclass(&[(b'a', b'a')]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'b')]);
- let cls2 = bclass(&[(b'a', b'c')]);
- let expected = bclass(&[(b'a', b'b')]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'b')]);
- let cls2 = bclass(&[(b'b', b'c')]);
- let expected = bclass(&[(b'b', b'b')]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'b')]);
- let cls2 = bclass(&[(b'c', b'd')]);
- let expected = bclass(&[]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'b', b'c')]);
- let cls2 = bclass(&[(b'a', b'd')]);
- let expected = bclass(&[(b'b', b'c')]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
- let cls2 = bclass(&[(b'a', b'h')]);
- let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
- let cls2 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
- let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'b'), (b'g', b'h')]);
- let cls2 = bclass(&[(b'd', b'e'), (b'k', b'l')]);
- let expected = bclass(&[]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
- let cls2 = bclass(&[(b'h', b'h')]);
- let expected = bclass(&[(b'h', b'h')]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'b'), (b'e', b'f'), (b'i', b'j')]);
- let cls2 = bclass(&[(b'c', b'd'), (b'g', b'h'), (b'k', b'l')]);
- let expected = bclass(&[]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]);
- let cls2 = bclass(&[(b'b', b'c'), (b'd', b'e'), (b'f', b'g')]);
- let expected = bclass(&[(b'b', b'f')]);
- assert_eq!(expected, bintersect(&cls1, &cls2));
- }
-
- #[test]
- fn class_difference_unicode() {
- let cls1 = uclass(&[('a', 'a')]);
- let cls2 = uclass(&[('a', 'a')]);
- let expected = uclass(&[]);
- assert_eq!(expected, udifference(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'a')]);
- let cls2 = uclass(&[]);
- let expected = uclass(&[('a', 'a')]);
- assert_eq!(expected, udifference(&cls1, &cls2));
-
- let cls1 = uclass(&[]);
- let cls2 = uclass(&[('a', 'a')]);
- let expected = uclass(&[]);
- assert_eq!(expected, udifference(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'z')]);
- let cls2 = uclass(&[('a', 'a')]);
- let expected = uclass(&[('b', 'z')]);
- assert_eq!(expected, udifference(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'z')]);
- let cls2 = uclass(&[('z', 'z')]);
- let expected = uclass(&[('a', 'y')]);
- assert_eq!(expected, udifference(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'z')]);
- let cls2 = uclass(&[('m', 'm')]);
- let expected = uclass(&[('a', 'l'), ('n', 'z')]);
- assert_eq!(expected, udifference(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
- let cls2 = uclass(&[('a', 'z')]);
- let expected = uclass(&[]);
- assert_eq!(expected, udifference(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
- let cls2 = uclass(&[('d', 'v')]);
- let expected = uclass(&[('a', 'c')]);
- assert_eq!(expected, udifference(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
- let cls2 = uclass(&[('b', 'g'), ('s', 'u')]);
- let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
- assert_eq!(expected, udifference(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
- let cls2 = uclass(&[('b', 'd'), ('e', 'g'), ('s', 'u')]);
- let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
- assert_eq!(expected, udifference(&cls1, &cls2));
-
- let cls1 = uclass(&[('x', 'z')]);
- let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
- let expected = uclass(&[('x', 'z')]);
- assert_eq!(expected, udifference(&cls1, &cls2));
-
- let cls1 = uclass(&[('a', 'z')]);
- let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
- let expected = uclass(&[('d', 'd'), ('h', 'r'), ('v', 'z')]);
- assert_eq!(expected, udifference(&cls1, &cls2));
- }
-
- #[test]
- fn class_difference_bytes() {
- let cls1 = bclass(&[(b'a', b'a')]);
- let cls2 = bclass(&[(b'a', b'a')]);
- let expected = bclass(&[]);
- assert_eq!(expected, bdifference(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'a')]);
- let cls2 = bclass(&[]);
- let expected = bclass(&[(b'a', b'a')]);
- assert_eq!(expected, bdifference(&cls1, &cls2));
-
- let cls1 = bclass(&[]);
- let cls2 = bclass(&[(b'a', b'a')]);
- let expected = bclass(&[]);
- assert_eq!(expected, bdifference(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'z')]);
- let cls2 = bclass(&[(b'a', b'a')]);
- let expected = bclass(&[(b'b', b'z')]);
- assert_eq!(expected, bdifference(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'z')]);
- let cls2 = bclass(&[(b'z', b'z')]);
- let expected = bclass(&[(b'a', b'y')]);
- assert_eq!(expected, bdifference(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'z')]);
- let cls2 = bclass(&[(b'm', b'm')]);
- let expected = bclass(&[(b'a', b'l'), (b'n', b'z')]);
- assert_eq!(expected, bdifference(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
- let cls2 = bclass(&[(b'a', b'z')]);
- let expected = bclass(&[]);
- assert_eq!(expected, bdifference(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
- let cls2 = bclass(&[(b'd', b'v')]);
- let expected = bclass(&[(b'a', b'c')]);
- assert_eq!(expected, bdifference(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
- let cls2 = bclass(&[(b'b', b'g'), (b's', b'u')]);
- let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
- assert_eq!(expected, bdifference(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
- let cls2 = bclass(&[(b'b', b'd'), (b'e', b'g'), (b's', b'u')]);
- let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
- assert_eq!(expected, bdifference(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'x', b'z')]);
- let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
- let expected = bclass(&[(b'x', b'z')]);
- assert_eq!(expected, bdifference(&cls1, &cls2));
-
- let cls1 = bclass(&[(b'a', b'z')]);
- let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
- let expected = bclass(&[(b'd', b'd'), (b'h', b'r'), (b'v', b'z')]);
- assert_eq!(expected, bdifference(&cls1, &cls2));
- }
-
- #[test]
- fn class_symmetric_difference_unicode() {
- let cls1 = uclass(&[('a', 'm')]);
- let cls2 = uclass(&[('g', 't')]);
- let expected = uclass(&[('a', 'f'), ('n', 't')]);
- assert_eq!(expected, usymdifference(&cls1, &cls2));
- }
-
- #[test]
- fn class_symmetric_difference_bytes() {
- let cls1 = bclass(&[(b'a', b'm')]);
- let cls2 = bclass(&[(b'g', b't')]);
- let expected = bclass(&[(b'a', b'f'), (b'n', b't')]);
- assert_eq!(expected, bsymdifference(&cls1, &cls2));
- }
-
- // We use a thread with an explicit stack size to test that our destructor
- // for Hir can handle arbitrarily sized expressions in constant stack
- // space. In case we run on a platform without threads (WASM?), we limit
- // this test to Windows/Unix.
- #[test]
- #[cfg(any(unix, windows))]
- fn no_stack_overflow_on_drop() {
- use std::thread;
-
- let run = || {
- let mut expr = Hir::empty();
- for _ in 0..100 {
- expr = Hir::capture(Capture {
- index: 1,
- name: None,
- sub: Box::new(expr),
- });
- expr = Hir::repetition(Repetition {
- min: 0,
- max: Some(1),
- greedy: true,
- sub: Box::new(expr),
- });
-
- expr = Hir {
- kind: HirKind::Concat(vec![expr]),
- props: Properties::empty(),
- };
- expr = Hir {
- kind: HirKind::Alternation(vec![expr]),
- props: Properties::empty(),
- };
- }
- assert!(!matches!(*expr.kind(), HirKind::Empty));
- };
-
- // We run our test on a thread with a small stack size so we can
- // force the issue more easily.
- //
- // NOTE(2023-03-21): See the corresponding test in 'crate::ast::tests'
- // for context on the specific stack size chosen here.
- thread::Builder::new()
- .stack_size(16 << 10)
- .spawn(run)
- .unwrap()
- .join()
- .unwrap();
- }
-
- #[test]
- fn look_set_iter() {
- let set = LookSet::empty();
- assert_eq!(0, set.iter().count());
-
- let set = LookSet::full();
- assert_eq!(18, set.iter().count());
-
- let set =
- LookSet::empty().insert(Look::StartLF).insert(Look::WordUnicode);
- assert_eq!(2, set.iter().count());
-
- let set = LookSet::empty().insert(Look::StartLF);
- assert_eq!(1, set.iter().count());
-
- let set = LookSet::empty().insert(Look::WordAsciiNegate);
- assert_eq!(1, set.iter().count());
- }
-
- #[test]
- fn look_set_debug() {
- let res = format!("{:?}", LookSet::empty());
- assert_eq!("∅", res);
- let res = format!("{:?}", LookSet::full());
- assert_eq!("Az^$rRbB𝛃𝚩<>〈〉◁▷◀▶", res);
- }
-}
diff --git a/vendor/regex-syntax/src/hir/print.rs b/vendor/regex-syntax/src/hir/print.rs
deleted file mode 100644
index dfa6d403..00000000
--- a/vendor/regex-syntax/src/hir/print.rs
+++ /dev/null
@@ -1,608 +0,0 @@
-/*!
-This module provides a regular expression printer for `Hir`.
-*/
-
-use core::fmt;
-
-use crate::{
- hir::{
- self,
- visitor::{self, Visitor},
- Hir, HirKind,
- },
- is_meta_character,
-};
-
-/// A builder for constructing a printer.
-///
-/// Note that since a printer doesn't have any configuration knobs, this type
-/// remains unexported.
-#[derive(Clone, Debug)]
-struct PrinterBuilder {
- _priv: (),
-}
-
-impl Default for PrinterBuilder {
- fn default() -> PrinterBuilder {
- PrinterBuilder::new()
- }
-}
-
-impl PrinterBuilder {
- fn new() -> PrinterBuilder {
- PrinterBuilder { _priv: () }
- }
-
- fn build(&self) -> Printer {
- Printer { _priv: () }
- }
-}
-
-/// A printer for a regular expression's high-level intermediate
-/// representation.
-///
-/// A printer converts a high-level intermediate representation (HIR) to a
-/// regular expression pattern string. This particular printer uses constant
-/// stack space and heap space proportional to the size of the HIR.
-///
-/// Since this printer is only using the HIR, the pattern it prints will likely
-/// not resemble the original pattern at all. For example, a pattern like
-/// `\pL` will have its entire class written out.
-///
-/// The purpose of this printer is to provide a means to mutate an HIR and then
-/// build a regular expression from the result of that mutation. (A regex
-/// library could provide a constructor from this HIR explicitly, but that
-/// creates an unnecessary public coupling between the regex library and this
-/// specific HIR representation.)
-#[derive(Debug)]
-pub struct Printer {
- _priv: (),
-}
-
-impl Printer {
- /// Create a new printer.
- pub fn new() -> Printer {
- PrinterBuilder::new().build()
- }
-
- /// Print the given `Ast` to the given writer. The writer must implement
- /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
- /// here are a `fmt::Formatter` (which is available in `fmt::Display`
- /// implementations) or a `&mut String`.
- pub fn print<W: fmt::Write>(&mut self, hir: &Hir, wtr: W) -> fmt::Result {
- visitor::visit(hir, Writer { wtr })
- }
-}
-
-#[derive(Debug)]
-struct Writer<W> {
- wtr: W,
-}
-
-impl<W: fmt::Write> Visitor for Writer<W> {
- type Output = ();
- type Err = fmt::Error;
-
- fn finish(self) -> fmt::Result {
- Ok(())
- }
-
- fn visit_pre(&mut self, hir: &Hir) -> fmt::Result {
- match *hir.kind() {
- HirKind::Empty => {
- // Technically an empty sub-expression could be "printed" by
- // just ignoring it, but in practice, you could have a
- // repetition operator attached to an empty expression, and you
- // really need something in the concrete syntax to make that
- // work as you'd expect.
- self.wtr.write_str(r"(?:)")?;
- }
- // Repetition operators are strictly suffix oriented.
- HirKind::Repetition(_) => {}
- HirKind::Literal(hir::Literal(ref bytes)) => {
- // See the comment on the 'Concat' and 'Alternation' case below
- // for why we put parens here. Literals are, conceptually,
- // a special case of concatenation where each element is a
- // character. The HIR flattens this into a Box<[u8]>, but we
- // still need to treat it like a concatenation for correct
- // printing. As a special case, we don't write parens if there
- // is only one character. One character means there is no
- // concat so we don't need parens. Adding parens would still be
- // correct, but we drop them here because it tends to create
- // rather noisy regexes even in simple cases.
- let result = core::str::from_utf8(bytes);
- let len = result.map_or(bytes.len(), |s| s.chars().count());
- if len > 1 {
- self.wtr.write_str(r"(?:")?;
- }
- match result {
- Ok(string) => {
- for c in string.chars() {
- self.write_literal_char(c)?;
- }
- }
- Err(_) => {
- for &b in bytes.iter() {
- self.write_literal_byte(b)?;
- }
- }
- }
- if len > 1 {
- self.wtr.write_str(r")")?;
- }
- }
- HirKind::Class(hir::Class::Unicode(ref cls)) => {
- if cls.ranges().is_empty() {
- return self.wtr.write_str("[a&&b]");
- }
- self.wtr.write_str("[")?;
- for range in cls.iter() {
- if range.start() == range.end() {
- self.write_literal_char(range.start())?;
- } else if u32::from(range.start()) + 1
- == u32::from(range.end())
- {
- self.write_literal_char(range.start())?;
- self.write_literal_char(range.end())?;
- } else {
- self.write_literal_char(range.start())?;
- self.wtr.write_str("-")?;
- self.write_literal_char(range.end())?;
- }
- }
- self.wtr.write_str("]")?;
- }
- HirKind::Class(hir::Class::Bytes(ref cls)) => {
- if cls.ranges().is_empty() {
- return self.wtr.write_str("[a&&b]");
- }
- self.wtr.write_str("(?-u:[")?;
- for range in cls.iter() {
- if range.start() == range.end() {
- self.write_literal_class_byte(range.start())?;
- } else if range.start() + 1 == range.end() {
- self.write_literal_class_byte(range.start())?;
- self.write_literal_class_byte(range.end())?;
- } else {
- self.write_literal_class_byte(range.start())?;
- self.wtr.write_str("-")?;
- self.write_literal_class_byte(range.end())?;
- }
- }
- self.wtr.write_str("])")?;
- }
- HirKind::Look(ref look) => match *look {
- hir::Look::Start => {
- self.wtr.write_str(r"\A")?;
- }
- hir::Look::End => {
- self.wtr.write_str(r"\z")?;
- }
- hir::Look::StartLF => {
- self.wtr.write_str("(?m:^)")?;
- }
- hir::Look::EndLF => {
- self.wtr.write_str("(?m:$)")?;
- }
- hir::Look::StartCRLF => {
- self.wtr.write_str("(?mR:^)")?;
- }
- hir::Look::EndCRLF => {
- self.wtr.write_str("(?mR:$)")?;
- }
- hir::Look::WordAscii => {
- self.wtr.write_str(r"(?-u:\b)")?;
- }
- hir::Look::WordAsciiNegate => {
- self.wtr.write_str(r"(?-u:\B)")?;
- }
- hir::Look::WordUnicode => {
- self.wtr.write_str(r"\b")?;
- }
- hir::Look::WordUnicodeNegate => {
- self.wtr.write_str(r"\B")?;
- }
- hir::Look::WordStartAscii => {
- self.wtr.write_str(r"(?-u:\b{start})")?;
- }
- hir::Look::WordEndAscii => {
- self.wtr.write_str(r"(?-u:\b{end})")?;
- }
- hir::Look::WordStartUnicode => {
- self.wtr.write_str(r"\b{start}")?;
- }
- hir::Look::WordEndUnicode => {
- self.wtr.write_str(r"\b{end}")?;
- }
- hir::Look::WordStartHalfAscii => {
- self.wtr.write_str(r"(?-u:\b{start-half})")?;
- }
- hir::Look::WordEndHalfAscii => {
- self.wtr.write_str(r"(?-u:\b{end-half})")?;
- }
- hir::Look::WordStartHalfUnicode => {
- self.wtr.write_str(r"\b{start-half}")?;
- }
- hir::Look::WordEndHalfUnicode => {
- self.wtr.write_str(r"\b{end-half}")?;
- }
- },
- HirKind::Capture(hir::Capture { ref name, .. }) => {
- self.wtr.write_str("(")?;
- if let Some(ref name) = *name {
- write!(self.wtr, "?P<{}>", name)?;
- }
- }
- // Why do this? Wrapping concats and alts in non-capturing groups
- // is not *always* necessary, but is sometimes necessary. For
- // example, 'concat(a, alt(b, c))' should be written as 'a(?:b|c)'
- // and not 'ab|c'. The former is clearly the intended meaning, but
- // the latter is actually 'alt(concat(a, b), c)'.
- //
- // It would be possible to only group these things in cases where
- // it's strictly necessary, but it requires knowing the parent
- // expression. And since this technique is simpler and always
- // correct, we take this route. More to the point, it is a non-goal
- // of an HIR printer to show a nice easy-to-read regex. Indeed,
- // its construction forbids it from doing so. Therefore, inserting
- // extra groups where they aren't necessary is perfectly okay.
- HirKind::Concat(_) | HirKind::Alternation(_) => {
- self.wtr.write_str(r"(?:")?;
- }
- }
- Ok(())
- }
-
- fn visit_post(&mut self, hir: &Hir) -> fmt::Result {
- match *hir.kind() {
- // Handled during visit_pre
- HirKind::Empty
- | HirKind::Literal(_)
- | HirKind::Class(_)
- | HirKind::Look(_) => {}
- HirKind::Repetition(ref x) => {
- match (x.min, x.max) {
- (0, Some(1)) => {
- self.wtr.write_str("?")?;
- }
- (0, None) => {
- self.wtr.write_str("*")?;
- }
- (1, None) => {
- self.wtr.write_str("+")?;
- }
- (1, Some(1)) => {
- // 'a{1}' and 'a{1}?' are exactly equivalent to 'a'.
- return Ok(());
- }
- (m, None) => {
- write!(self.wtr, "{{{},}}", m)?;
- }
- (m, Some(n)) if m == n => {
- write!(self.wtr, "{{{}}}", m)?;
- // a{m} and a{m}? are always exactly equivalent.
- return Ok(());
- }
- (m, Some(n)) => {
- write!(self.wtr, "{{{},{}}}", m, n)?;
- }
- }
- if !x.greedy {
- self.wtr.write_str("?")?;
- }
- }
- HirKind::Capture(_)
- | HirKind::Concat(_)
- | HirKind::Alternation(_) => {
- self.wtr.write_str(r")")?;
- }
- }
- Ok(())
- }
-
- fn visit_alternation_in(&mut self) -> fmt::Result {
- self.wtr.write_str("|")
- }
-}
-
-impl<W: fmt::Write> Writer<W> {
- fn write_literal_char(&mut self, c: char) -> fmt::Result {
- if is_meta_character(c) {
- self.wtr.write_str("\\")?;
- }
- self.wtr.write_char(c)
- }
-
- fn write_literal_byte(&mut self, b: u8) -> fmt::Result {
- if b <= 0x7F && !b.is_ascii_control() && !b.is_ascii_whitespace() {
- self.write_literal_char(char::try_from(b).unwrap())
- } else {
- write!(self.wtr, "(?-u:\\x{:02X})", b)
- }
- }
-
- fn write_literal_class_byte(&mut self, b: u8) -> fmt::Result {
- if b <= 0x7F && !b.is_ascii_control() && !b.is_ascii_whitespace() {
- self.write_literal_char(char::try_from(b).unwrap())
- } else {
- write!(self.wtr, "\\x{:02X}", b)
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use alloc::{
- boxed::Box,
- string::{String, ToString},
- };
-
- use crate::ParserBuilder;
-
- use super::*;
-
- fn roundtrip(given: &str, expected: &str) {
- roundtrip_with(|b| b, given, expected);
- }
-
- fn roundtrip_bytes(given: &str, expected: &str) {
- roundtrip_with(|b| b.utf8(false), given, expected);
- }
-
- fn roundtrip_with<F>(mut f: F, given: &str, expected: &str)
- where
- F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
- {
- let mut builder = ParserBuilder::new();
- f(&mut builder);
- let hir = builder.build().parse(given).unwrap();
-
- let mut printer = Printer::new();
- let mut dst = String::new();
- printer.print(&hir, &mut dst).unwrap();
-
- // Check that the result is actually valid.
- builder.build().parse(&dst).unwrap();
-
- assert_eq!(expected, dst);
- }
-
- #[test]
- fn print_literal() {
- roundtrip("a", "a");
- roundtrip(r"\xff", "\u{FF}");
- roundtrip_bytes(r"\xff", "\u{FF}");
- roundtrip_bytes(r"(?-u)\xff", r"(?-u:\xFF)");
- roundtrip("☃", "☃");
- }
-
- #[test]
- fn print_class() {
- roundtrip(r"[a]", r"a");
- roundtrip(r"[ab]", r"[ab]");
- roundtrip(r"[a-z]", r"[a-z]");
- roundtrip(r"[a-z--b-c--x-y]", r"[ad-wz]");
- roundtrip(r"[^\x01-\u{10FFFF}]", "\u{0}");
- roundtrip(r"[-]", r"\-");
- roundtrip(r"[☃-⛄]", r"[☃-⛄]");
-
- roundtrip(r"(?-u)[a]", r"a");
- roundtrip(r"(?-u)[ab]", r"(?-u:[ab])");
- roundtrip(r"(?-u)[a-z]", r"(?-u:[a-z])");
- roundtrip_bytes(r"(?-u)[a-\xFF]", r"(?-u:[a-\xFF])");
-
- // The following test that the printer escapes meta characters
- // in character classes.
- roundtrip(r"[\[]", r"\[");
- roundtrip(r"[Z-_]", r"[Z-_]");
- roundtrip(r"[Z-_--Z]", r"[\[-_]");
-
- // The following test that the printer escapes meta characters
- // in byte oriented character classes.
- roundtrip_bytes(r"(?-u)[\[]", r"\[");
- roundtrip_bytes(r"(?-u)[Z-_]", r"(?-u:[Z-_])");
- roundtrip_bytes(r"(?-u)[Z-_--Z]", r"(?-u:[\[-_])");
-
- // This tests that an empty character class is correctly roundtripped.
- #[cfg(feature = "unicode-gencat")]
- roundtrip(r"\P{any}", r"[a&&b]");
- roundtrip_bytes(r"(?-u)[^\x00-\xFF]", r"[a&&b]");
- }
-
- #[test]
- fn print_anchor() {
- roundtrip(r"^", r"\A");
- roundtrip(r"$", r"\z");
- roundtrip(r"(?m)^", r"(?m:^)");
- roundtrip(r"(?m)$", r"(?m:$)");
- }
-
- #[test]
- fn print_word_boundary() {
- roundtrip(r"\b", r"\b");
- roundtrip(r"\B", r"\B");
- roundtrip(r"(?-u)\b", r"(?-u:\b)");
- roundtrip_bytes(r"(?-u)\B", r"(?-u:\B)");
- }
-
- #[test]
- fn print_repetition() {
- roundtrip("a?", "a?");
- roundtrip("a??", "a??");
- roundtrip("(?U)a?", "a??");
-
- roundtrip("a*", "a*");
- roundtrip("a*?", "a*?");
- roundtrip("(?U)a*", "a*?");
-
- roundtrip("a+", "a+");
- roundtrip("a+?", "a+?");
- roundtrip("(?U)a+", "a+?");
-
- roundtrip("a{1}", "a");
- roundtrip("a{2}", "a{2}");
- roundtrip("a{1,}", "a+");
- roundtrip("a{1,5}", "a{1,5}");
- roundtrip("a{1}?", "a");
- roundtrip("a{2}?", "a{2}");
- roundtrip("a{1,}?", "a+?");
- roundtrip("a{1,5}?", "a{1,5}?");
- roundtrip("(?U)a{1}", "a");
- roundtrip("(?U)a{2}", "a{2}");
- roundtrip("(?U)a{1,}", "a+?");
- roundtrip("(?U)a{1,5}", "a{1,5}?");
-
- // Test that various zero-length repetitions always translate to an
- // empty regex. This is more a property of HIR's smart constructors
- // than the printer though.
- roundtrip("a{0}", "(?:)");
- roundtrip("(?:ab){0}", "(?:)");
- #[cfg(feature = "unicode-gencat")]
- {
- roundtrip(r"\p{any}{0}", "(?:)");
- roundtrip(r"\P{any}{0}", "(?:)");
- }
- }
-
- #[test]
- fn print_group() {
- roundtrip("()", "((?:))");
- roundtrip("(?P<foo>)", "(?P<foo>(?:))");
- roundtrip("(?:)", "(?:)");
-
- roundtrip("(a)", "(a)");
- roundtrip("(?P<foo>a)", "(?P<foo>a)");
- roundtrip("(?:a)", "a");
-
- roundtrip("((((a))))", "((((a))))");
- }
-
- #[test]
- fn print_alternation() {
- roundtrip("|", "(?:(?:)|(?:))");
- roundtrip("||", "(?:(?:)|(?:)|(?:))");
-
- roundtrip("a|b", "[ab]");
- roundtrip("ab|cd", "(?:(?:ab)|(?:cd))");
- roundtrip("a|b|c", "[a-c]");
- roundtrip("ab|cd|ef", "(?:(?:ab)|(?:cd)|(?:ef))");
- roundtrip("foo|bar|quux", "(?:(?:foo)|(?:bar)|(?:quux))");
- }
-
- // This is a regression test that stresses a peculiarity of how the HIR
- // is both constructed and printed. Namely, it is legal for a repetition
- // to directly contain a concatenation. This particular construct isn't
- // really possible to build from the concrete syntax directly, since you'd
- // be forced to put the concatenation into (at least) a non-capturing
- // group. Concurrently, the printer doesn't consider this case and just
- // kind of naively prints the child expression and tacks on the repetition
- // operator.
- //
- // As a result, if you attached '+' to a 'concat(a, b)', the printer gives
- // you 'ab+', but clearly it really should be '(?:ab)+'.
- //
- // This bug isn't easy to surface because most ways of building an HIR
- // come directly from the concrete syntax, and as mentioned above, it just
- // isn't possible to build this kind of HIR from the concrete syntax.
- // Nevertheless, this is definitely a bug.
- //
- // See: https://github.com/rust-lang/regex/issues/731
- #[test]
- fn regression_repetition_concat() {
- let expr = Hir::concat(alloc::vec![
- Hir::literal("x".as_bytes()),
- Hir::repetition(hir::Repetition {
- min: 1,
- max: None,
- greedy: true,
- sub: Box::new(Hir::literal("ab".as_bytes())),
- }),
- Hir::literal("y".as_bytes()),
- ]);
- assert_eq!(r"(?:x(?:ab)+y)", expr.to_string());
-
- let expr = Hir::concat(alloc::vec![
- Hir::look(hir::Look::Start),
- Hir::repetition(hir::Repetition {
- min: 1,
- max: None,
- greedy: true,
- sub: Box::new(Hir::concat(alloc::vec![
- Hir::look(hir::Look::Start),
- Hir::look(hir::Look::End),
- ])),
- }),
- Hir::look(hir::Look::End),
- ]);
- assert_eq!(r"(?:\A\A\z\z)", expr.to_string());
- }
-
- // Just like regression_repetition_concat, but with the repetition using
- // an alternation as a child expression instead.
- //
- // See: https://github.com/rust-lang/regex/issues/731
- #[test]
- fn regression_repetition_alternation() {
- let expr = Hir::concat(alloc::vec![
- Hir::literal("ab".as_bytes()),
- Hir::repetition(hir::Repetition {
- min: 1,
- max: None,
- greedy: true,
- sub: Box::new(Hir::alternation(alloc::vec![
- Hir::literal("cd".as_bytes()),
- Hir::literal("ef".as_bytes()),
- ])),
- }),
- Hir::literal("gh".as_bytes()),
- ]);
- assert_eq!(r"(?:(?:ab)(?:(?:cd)|(?:ef))+(?:gh))", expr.to_string());
-
- let expr = Hir::concat(alloc::vec![
- Hir::look(hir::Look::Start),
- Hir::repetition(hir::Repetition {
- min: 1,
- max: None,
- greedy: true,
- sub: Box::new(Hir::alternation(alloc::vec![
- Hir::look(hir::Look::Start),
- Hir::look(hir::Look::End),
- ])),
- }),
- Hir::look(hir::Look::End),
- ]);
- assert_eq!(r"(?:\A(?:\A|\z)\z)", expr.to_string());
- }
-
- // This regression test is very similar in flavor to
- // regression_repetition_concat in that the root of the issue lies in a
- // peculiarity of how the HIR is represented and how the printer writes it
- // out. Like the other regression, this one is also rooted in the fact that
- // you can't produce the peculiar HIR from the concrete syntax. Namely, you
- // just can't have a 'concat(a, alt(b, c))' because the 'alt' will normally
- // be in (at least) a non-capturing group. Why? Because the '|' has very
- // low precedence (lower that concatenation), and so something like 'ab|c'
- // is actually 'alt(ab, c)'.
- //
- // See: https://github.com/rust-lang/regex/issues/516
- #[test]
- fn regression_alternation_concat() {
- let expr = Hir::concat(alloc::vec![
- Hir::literal("ab".as_bytes()),
- Hir::alternation(alloc::vec![
- Hir::literal("mn".as_bytes()),
- Hir::literal("xy".as_bytes()),
- ]),
- ]);
- assert_eq!(r"(?:(?:ab)(?:(?:mn)|(?:xy)))", expr.to_string());
-
- let expr = Hir::concat(alloc::vec![
- Hir::look(hir::Look::Start),
- Hir::alternation(alloc::vec![
- Hir::look(hir::Look::Start),
- Hir::look(hir::Look::End),
- ]),
- ]);
- assert_eq!(r"(?:\A(?:\A|\z))", expr.to_string());
- }
-}
diff --git a/vendor/regex-syntax/src/hir/translate.rs b/vendor/regex-syntax/src/hir/translate.rs
deleted file mode 100644
index e8e5a881..00000000
--- a/vendor/regex-syntax/src/hir/translate.rs
+++ /dev/null
@@ -1,3744 +0,0 @@
-/*!
-Defines a translator that converts an `Ast` to an `Hir`.
-*/
-
-use core::cell::{Cell, RefCell};
-
-use alloc::{boxed::Box, string::ToString, vec, vec::Vec};
-
-use crate::{
- ast::{self, Ast, Span, Visitor},
- either::Either,
- hir::{self, Error, ErrorKind, Hir, HirKind},
- unicode::{self, ClassQuery},
-};
-
-type Result<T> = core::result::Result<T, Error>;
-
-/// A builder for constructing an AST->HIR translator.
-#[derive(Clone, Debug)]
-pub struct TranslatorBuilder {
- utf8: bool,
- line_terminator: u8,
- flags: Flags,
-}
-
-impl Default for TranslatorBuilder {
- fn default() -> TranslatorBuilder {
- TranslatorBuilder::new()
- }
-}
-
-impl TranslatorBuilder {
- /// Create a new translator builder with a default c onfiguration.
- pub fn new() -> TranslatorBuilder {
- TranslatorBuilder {
- utf8: true,
- line_terminator: b'\n',
- flags: Flags::default(),
- }
- }
-
- /// Build a translator using the current configuration.
- pub fn build(&self) -> Translator {
- Translator {
- stack: RefCell::new(vec![]),
- flags: Cell::new(self.flags),
- utf8: self.utf8,
- line_terminator: self.line_terminator,
- }
- }
-
- /// When disabled, translation will permit the construction of a regular
- /// expression that may match invalid UTF-8.
- ///
- /// When enabled (the default), the translator is guaranteed to produce an
- /// expression that, for non-empty matches, will only ever produce spans
- /// that are entirely valid UTF-8 (otherwise, the translator will return an
- /// error).
- ///
- /// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even
- /// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete
- /// syntax) will be allowed even though they can produce matches that split
- /// a UTF-8 encoded codepoint. This only applies to zero-width or "empty"
- /// matches, and it is expected that the regex engine itself must handle
- /// these cases if necessary (perhaps by suppressing any zero-width matches
- /// that split a codepoint).
- pub fn utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
- self.utf8 = yes;
- self
- }
-
- /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`.
- ///
- /// Namely, instead of `.` (by default) matching everything except for `\n`,
- /// this will cause `.` to match everything except for the byte given.
- ///
- /// If `.` is used in a context where Unicode mode is enabled and this byte
- /// isn't ASCII, then an error will be returned. When Unicode mode is
- /// disabled, then any byte is permitted, but will return an error if UTF-8
- /// mode is enabled and it is a non-ASCII byte.
- ///
- /// In short, any ASCII value for a line terminator is always okay. But a
- /// non-ASCII byte might result in an error depending on whether Unicode
- /// mode or UTF-8 mode are enabled.
- ///
- /// Note that if `R` mode is enabled then it always takes precedence and
- /// the line terminator will be treated as `\r` and `\n` simultaneously.
- ///
- /// Note also that this *doesn't* impact the look-around assertions
- /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional
- /// configuration in the regex engine itself.
- pub fn line_terminator(&mut self, byte: u8) -> &mut TranslatorBuilder {
- self.line_terminator = byte;
- self
- }
-
- /// Enable or disable the case insensitive flag (`i`) by default.
- pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
- self.flags.case_insensitive = if yes { Some(true) } else { None };
- self
- }
-
- /// Enable or disable the multi-line matching flag (`m`) by default.
- pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
- self.flags.multi_line = if yes { Some(true) } else { None };
- self
- }
-
- /// Enable or disable the "dot matches any character" flag (`s`) by
- /// default.
- pub fn dot_matches_new_line(
- &mut self,
- yes: bool,
- ) -> &mut TranslatorBuilder {
- self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
- self
- }
-
- /// Enable or disable the CRLF mode flag (`R`) by default.
- pub fn crlf(&mut self, yes: bool) -> &mut TranslatorBuilder {
- self.flags.crlf = if yes { Some(true) } else { None };
- self
- }
-
- /// Enable or disable the "swap greed" flag (`U`) by default.
- pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
- self.flags.swap_greed = if yes { Some(true) } else { None };
- self
- }
-
- /// Enable or disable the Unicode flag (`u`) by default.
- pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
- self.flags.unicode = if yes { None } else { Some(false) };
- self
- }
-}
-
-/// A translator maps abstract syntax to a high level intermediate
-/// representation.
-///
-/// A translator may be benefit from reuse. That is, a translator can translate
-/// many abstract syntax trees.
-///
-/// A `Translator` can be configured in more detail via a
-/// [`TranslatorBuilder`].
-#[derive(Clone, Debug)]
-pub struct Translator {
- /// Our call stack, but on the heap.
- stack: RefCell<Vec<HirFrame>>,
- /// The current flag settings.
- flags: Cell<Flags>,
- /// Whether we're allowed to produce HIR that can match arbitrary bytes.
- utf8: bool,
- /// The line terminator to use for `.`.
- line_terminator: u8,
-}
-
-impl Translator {
- /// Create a new translator using the default configuration.
- pub fn new() -> Translator {
- TranslatorBuilder::new().build()
- }
-
- /// Translate the given abstract syntax tree (AST) into a high level
- /// intermediate representation (HIR).
- ///
- /// If there was a problem doing the translation, then an HIR-specific
- /// error is returned.
- ///
- /// The original pattern string used to produce the `Ast` *must* also be
- /// provided. The translator does not use the pattern string during any
- /// correct translation, but is used for error reporting.
- pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
- ast::visit(ast, TranslatorI::new(self, pattern))
- }
-}
-
-/// An HirFrame is a single stack frame, represented explicitly, which is
-/// created for each item in the Ast that we traverse.
-///
-/// Note that technically, this type doesn't represent our entire stack
-/// frame. In particular, the Ast visitor represents any state associated with
-/// traversing the Ast itself.
-#[derive(Clone, Debug)]
-enum HirFrame {
- /// An arbitrary HIR expression. These get pushed whenever we hit a base
- /// case in the Ast. They get popped after an inductive (i.e., recursive)
- /// step is complete.
- Expr(Hir),
- /// A literal that is being constructed, character by character, from the
- /// AST. We need this because the AST gives each individual character its
- /// own node. So as we see characters, we peek at the top-most HirFrame.
- /// If it's a literal, then we add to it. Otherwise, we push a new literal.
- /// When it comes time to pop it, we convert it to an Hir via Hir::literal.
- Literal(Vec<u8>),
- /// A Unicode character class. This frame is mutated as we descend into
- /// the Ast of a character class (which is itself its own mini recursive
- /// structure).
- ClassUnicode(hir::ClassUnicode),
- /// A byte-oriented character class. This frame is mutated as we descend
- /// into the Ast of a character class (which is itself its own mini
- /// recursive structure).
- ///
- /// Byte character classes are created when Unicode mode (`u`) is disabled.
- /// If `utf8` is enabled (the default), then a byte character is only
- /// permitted to match ASCII text.
- ClassBytes(hir::ClassBytes),
- /// This is pushed whenever a repetition is observed. After visiting every
- /// sub-expression in the repetition, the translator's stack is expected to
- /// have this sentinel at the top.
- ///
- /// This sentinel only exists to stop other things (like flattening
- /// literals) from reaching across repetition operators.
- Repetition,
- /// This is pushed on to the stack upon first seeing any kind of capture,
- /// indicated by parentheses (including non-capturing groups). It is popped
- /// upon leaving a group.
- Group {
- /// The old active flags when this group was opened.
- ///
- /// If this group sets flags, then the new active flags are set to the
- /// result of merging the old flags with the flags introduced by this
- /// group. If the group doesn't set any flags, then this is simply
- /// equivalent to whatever flags were set when the group was opened.
- ///
- /// When this group is popped, the active flags should be restored to
- /// the flags set here.
- ///
- /// The "active" flags correspond to whatever flags are set in the
- /// Translator.
- old_flags: Flags,
- },
- /// This is pushed whenever a concatenation is observed. After visiting
- /// every sub-expression in the concatenation, the translator's stack is
- /// popped until it sees a Concat frame.
- Concat,
- /// This is pushed whenever an alternation is observed. After visiting
- /// every sub-expression in the alternation, the translator's stack is
- /// popped until it sees an Alternation frame.
- Alternation,
- /// This is pushed immediately before each sub-expression in an
- /// alternation. This separates the branches of an alternation on the
- /// stack and prevents literal flattening from reaching across alternation
- /// branches.
- ///
- /// It is popped after each expression in a branch until an 'Alternation'
- /// frame is observed when doing a post visit on an alternation.
- AlternationBranch,
-}
-
-impl HirFrame {
- /// Assert that the current stack frame is an Hir expression and return it.
- fn unwrap_expr(self) -> Hir {
- match self {
- HirFrame::Expr(expr) => expr,
- HirFrame::Literal(lit) => Hir::literal(lit),
- _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
- }
- }
-
- /// Assert that the current stack frame is a Unicode class expression and
- /// return it.
- fn unwrap_class_unicode(self) -> hir::ClassUnicode {
- match self {
- HirFrame::ClassUnicode(cls) => cls,
- _ => panic!(
- "tried to unwrap Unicode class \
- from HirFrame, got: {:?}",
- self
- ),
- }
- }
-
- /// Assert that the current stack frame is a byte class expression and
- /// return it.
- fn unwrap_class_bytes(self) -> hir::ClassBytes {
- match self {
- HirFrame::ClassBytes(cls) => cls,
- _ => panic!(
- "tried to unwrap byte class \
- from HirFrame, got: {:?}",
- self
- ),
- }
- }
-
- /// Assert that the current stack frame is a repetition sentinel. If it
- /// isn't, then panic.
- fn unwrap_repetition(self) {
- match self {
- HirFrame::Repetition => {}
- _ => {
- panic!(
- "tried to unwrap repetition from HirFrame, got: {:?}",
- self
- )
- }
- }
- }
-
- /// Assert that the current stack frame is a group indicator and return
- /// its corresponding flags (the flags that were active at the time the
- /// group was entered).
- fn unwrap_group(self) -> Flags {
- match self {
- HirFrame::Group { old_flags } => old_flags,
- _ => {
- panic!("tried to unwrap group from HirFrame, got: {:?}", self)
- }
- }
- }
-
- /// Assert that the current stack frame is an alternation pipe sentinel. If
- /// it isn't, then panic.
- fn unwrap_alternation_pipe(self) {
- match self {
- HirFrame::AlternationBranch => {}
- _ => {
- panic!(
- "tried to unwrap alt pipe from HirFrame, got: {:?}",
- self
- )
- }
- }
- }
-}
-
-impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
- type Output = Hir;
- type Err = Error;
-
- fn finish(self) -> Result<Hir> {
- // ... otherwise, we should have exactly one HIR on the stack.
- assert_eq!(self.trans().stack.borrow().len(), 1);
- Ok(self.pop().unwrap().unwrap_expr())
- }
-
- fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
- match *ast {
- Ast::ClassBracketed(_) => {
- if self.flags().unicode() {
- let cls = hir::ClassUnicode::empty();
- self.push(HirFrame::ClassUnicode(cls));
- } else {
- let cls = hir::ClassBytes::empty();
- self.push(HirFrame::ClassBytes(cls));
- }
- }
- Ast::Repetition(_) => self.push(HirFrame::Repetition),
- Ast::Group(ref x) => {
- let old_flags = x
- .flags()
- .map(|ast| self.set_flags(ast))
- .unwrap_or_else(|| self.flags());
- self.push(HirFrame::Group { old_flags });
- }
- Ast::Concat(_) => {
- self.push(HirFrame::Concat);
- }
- Ast::Alternation(ref x) => {
- self.push(HirFrame::Alternation);
- if !x.asts.is_empty() {
- self.push(HirFrame::AlternationBranch);
- }
- }
- _ => {}
- }
- Ok(())
- }
-
- fn visit_post(&mut self, ast: &Ast) -> Result<()> {
- match *ast {
- Ast::Empty(_) => {
- self.push(HirFrame::Expr(Hir::empty()));
- }
- Ast::Flags(ref x) => {
- self.set_flags(&x.flags);
- // Flags in the AST are generally considered directives and
- // not actual sub-expressions. However, they can be used in
- // the concrete syntax like `((?i))`, and we need some kind of
- // indication of an expression there, and Empty is the correct
- // choice.
- //
- // There can also be things like `(?i)+`, but we rule those out
- // in the parser. In the future, we might allow them for
- // consistency sake.
- self.push(HirFrame::Expr(Hir::empty()));
- }
- Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? {
- Either::Right(byte) => self.push_byte(byte),
- Either::Left(ch) => match self.case_fold_char(x.span, ch)? {
- None => self.push_char(ch),
- Some(expr) => self.push(HirFrame::Expr(expr)),
- },
- },
- Ast::Dot(ref span) => {
- self.push(HirFrame::Expr(self.hir_dot(**span)?));
- }
- Ast::Assertion(ref x) => {
- self.push(HirFrame::Expr(self.hir_assertion(x)?));
- }
- Ast::ClassPerl(ref x) => {
- if self.flags().unicode() {
- let cls = self.hir_perl_unicode_class(x)?;
- let hcls = hir::Class::Unicode(cls);
- self.push(HirFrame::Expr(Hir::class(hcls)));
- } else {
- let cls = self.hir_perl_byte_class(x)?;
- let hcls = hir::Class::Bytes(cls);
- self.push(HirFrame::Expr(Hir::class(hcls)));
- }
- }
- Ast::ClassUnicode(ref x) => {
- let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
- self.push(HirFrame::Expr(Hir::class(cls)));
- }
- Ast::ClassBracketed(ref ast) => {
- if self.flags().unicode() {
- let mut cls = self.pop().unwrap().unwrap_class_unicode();
- self.unicode_fold_and_negate(
- &ast.span,
- ast.negated,
- &mut cls,
- )?;
- let expr = Hir::class(hir::Class::Unicode(cls));
- self.push(HirFrame::Expr(expr));
- } else {
- let mut cls = self.pop().unwrap().unwrap_class_bytes();
- self.bytes_fold_and_negate(
- &ast.span,
- ast.negated,
- &mut cls,
- )?;
- let expr = Hir::class(hir::Class::Bytes(cls));
- self.push(HirFrame::Expr(expr));
- }
- }
- Ast::Repetition(ref x) => {
- let expr = self.pop().unwrap().unwrap_expr();
- self.pop().unwrap().unwrap_repetition();
- self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
- }
- Ast::Group(ref x) => {
- let expr = self.pop().unwrap().unwrap_expr();
- let old_flags = self.pop().unwrap().unwrap_group();
- self.trans().flags.set(old_flags);
- self.push(HirFrame::Expr(self.hir_capture(x, expr)));
- }
- Ast::Concat(_) => {
- let mut exprs = vec![];
- while let Some(expr) = self.pop_concat_expr() {
- if !matches!(*expr.kind(), HirKind::Empty) {
- exprs.push(expr);
- }
- }
- exprs.reverse();
- self.push(HirFrame::Expr(Hir::concat(exprs)));
- }
- Ast::Alternation(_) => {
- let mut exprs = vec![];
- while let Some(expr) = self.pop_alt_expr() {
- self.pop().unwrap().unwrap_alternation_pipe();
- exprs.push(expr);
- }
- exprs.reverse();
- self.push(HirFrame::Expr(Hir::alternation(exprs)));
- }
- }
- Ok(())
- }
-
- fn visit_alternation_in(&mut self) -> Result<()> {
- self.push(HirFrame::AlternationBranch);
- Ok(())
- }
-
- fn visit_class_set_item_pre(
- &mut self,
- ast: &ast::ClassSetItem,
- ) -> Result<()> {
- match *ast {
- ast::ClassSetItem::Bracketed(_) => {
- if self.flags().unicode() {
- let cls = hir::ClassUnicode::empty();
- self.push(HirFrame::ClassUnicode(cls));
- } else {
- let cls = hir::ClassBytes::empty();
- self.push(HirFrame::ClassBytes(cls));
- }
- }
- // We needn't handle the Union case here since the visitor will
- // do it for us.
- _ => {}
- }
- Ok(())
- }
-
- fn visit_class_set_item_post(
- &mut self,
- ast: &ast::ClassSetItem,
- ) -> Result<()> {
- match *ast {
- ast::ClassSetItem::Empty(_) => {}
- ast::ClassSetItem::Literal(ref x) => {
- if self.flags().unicode() {
- let mut cls = self.pop().unwrap().unwrap_class_unicode();
- cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
- self.push(HirFrame::ClassUnicode(cls));
- } else {
- let mut cls = self.pop().unwrap().unwrap_class_bytes();
- let byte = self.class_literal_byte(x)?;
- cls.push(hir::ClassBytesRange::new(byte, byte));
- self.push(HirFrame::ClassBytes(cls));
- }
- }
- ast::ClassSetItem::Range(ref x) => {
- if self.flags().unicode() {
- let mut cls = self.pop().unwrap().unwrap_class_unicode();
- cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
- self.push(HirFrame::ClassUnicode(cls));
- } else {
- let mut cls = self.pop().unwrap().unwrap_class_bytes();
- let start = self.class_literal_byte(&x.start)?;
- let end = self.class_literal_byte(&x.end)?;
- cls.push(hir::ClassBytesRange::new(start, end));
- self.push(HirFrame::ClassBytes(cls));
- }
- }
- ast::ClassSetItem::Ascii(ref x) => {
- if self.flags().unicode() {
- let xcls = self.hir_ascii_unicode_class(x)?;
- let mut cls = self.pop().unwrap().unwrap_class_unicode();
- cls.union(&xcls);
- self.push(HirFrame::ClassUnicode(cls));
- } else {
- let xcls = self.hir_ascii_byte_class(x)?;
- let mut cls = self.pop().unwrap().unwrap_class_bytes();
- cls.union(&xcls);
- self.push(HirFrame::ClassBytes(cls));
- }
- }
- ast::ClassSetItem::Unicode(ref x) => {
- let xcls = self.hir_unicode_class(x)?;
- let mut cls = self.pop().unwrap().unwrap_class_unicode();
- cls.union(&xcls);
- self.push(HirFrame::ClassUnicode(cls));
- }
- ast::ClassSetItem::Perl(ref x) => {
- if self.flags().unicode() {
- let xcls = self.hir_perl_unicode_class(x)?;
- let mut cls = self.pop().unwrap().unwrap_class_unicode();
- cls.union(&xcls);
- self.push(HirFrame::ClassUnicode(cls));
- } else {
- let xcls = self.hir_perl_byte_class(x)?;
- let mut cls = self.pop().unwrap().unwrap_class_bytes();
- cls.union(&xcls);
- self.push(HirFrame::ClassBytes(cls));
- }
- }
- ast::ClassSetItem::Bracketed(ref ast) => {
- if self.flags().unicode() {
- let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
- self.unicode_fold_and_negate(
- &ast.span,
- ast.negated,
- &mut cls1,
- )?;
-
- let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
- cls2.union(&cls1);
- self.push(HirFrame::ClassUnicode(cls2));
- } else {
- let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
- self.bytes_fold_and_negate(
- &ast.span,
- ast.negated,
- &mut cls1,
- )?;
-
- let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
- cls2.union(&cls1);
- self.push(HirFrame::ClassBytes(cls2));
- }
- }
- // This is handled automatically by the visitor.
- ast::ClassSetItem::Union(_) => {}
- }
- Ok(())
- }
-
- fn visit_class_set_binary_op_pre(
- &mut self,
- _op: &ast::ClassSetBinaryOp,
- ) -> Result<()> {
- if self.flags().unicode() {
- let cls = hir::ClassUnicode::empty();
- self.push(HirFrame::ClassUnicode(cls));
- } else {
- let cls = hir::ClassBytes::empty();
- self.push(HirFrame::ClassBytes(cls));
- }
- Ok(())
- }
-
- fn visit_class_set_binary_op_in(
- &mut self,
- _op: &ast::ClassSetBinaryOp,
- ) -> Result<()> {
- if self.flags().unicode() {
- let cls = hir::ClassUnicode::empty();
- self.push(HirFrame::ClassUnicode(cls));
- } else {
- let cls = hir::ClassBytes::empty();
- self.push(HirFrame::ClassBytes(cls));
- }
- Ok(())
- }
-
- fn visit_class_set_binary_op_post(
- &mut self,
- op: &ast::ClassSetBinaryOp,
- ) -> Result<()> {
- use crate::ast::ClassSetBinaryOpKind::*;
-
- if self.flags().unicode() {
- let mut rhs = self.pop().unwrap().unwrap_class_unicode();
- let mut lhs = self.pop().unwrap().unwrap_class_unicode();
- let mut cls = self.pop().unwrap().unwrap_class_unicode();
- if self.flags().case_insensitive() {
- rhs.try_case_fold_simple().map_err(|_| {
- self.error(
- op.rhs.span().clone(),
- ErrorKind::UnicodeCaseUnavailable,
- )
- })?;
- lhs.try_case_fold_simple().map_err(|_| {
- self.error(
- op.lhs.span().clone(),
- ErrorKind::UnicodeCaseUnavailable,
- )
- })?;
- }
- match op.kind {
- Intersection => lhs.intersect(&rhs),
- Difference => lhs.difference(&rhs),
- SymmetricDifference => lhs.symmetric_difference(&rhs),
- }
- cls.union(&lhs);
- self.push(HirFrame::ClassUnicode(cls));
- } else {
- let mut rhs = self.pop().unwrap().unwrap_class_bytes();
- let mut lhs = self.pop().unwrap().unwrap_class_bytes();
- let mut cls = self.pop().unwrap().unwrap_class_bytes();
- if self.flags().case_insensitive() {
- rhs.case_fold_simple();
- lhs.case_fold_simple();
- }
- match op.kind {
- Intersection => lhs.intersect(&rhs),
- Difference => lhs.difference(&rhs),
- SymmetricDifference => lhs.symmetric_difference(&rhs),
- }
- cls.union(&lhs);
- self.push(HirFrame::ClassBytes(cls));
- }
- Ok(())
- }
-}
-
-/// The internal implementation of a translator.
-///
-/// This type is responsible for carrying around the original pattern string,
-/// which is not tied to the internal state of a translator.
-///
-/// A TranslatorI exists for the time it takes to translate a single Ast.
-#[derive(Clone, Debug)]
-struct TranslatorI<'t, 'p> {
- trans: &'t Translator,
- pattern: &'p str,
-}
-
-impl<'t, 'p> TranslatorI<'t, 'p> {
- /// Build a new internal translator.
- fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
- TranslatorI { trans, pattern }
- }
-
- /// Return a reference to the underlying translator.
- fn trans(&self) -> &Translator {
- &self.trans
- }
-
- /// Push the given frame on to the call stack.
- fn push(&self, frame: HirFrame) {
- self.trans().stack.borrow_mut().push(frame);
- }
-
- /// Push the given literal char on to the call stack.
- ///
- /// If the top-most element of the stack is a literal, then the char
- /// is appended to the end of that literal. Otherwise, a new literal
- /// containing just the given char is pushed to the top of the stack.
- fn push_char(&self, ch: char) {
- let mut buf = [0; 4];
- let bytes = ch.encode_utf8(&mut buf).as_bytes();
- let mut stack = self.trans().stack.borrow_mut();
- if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
- literal.extend_from_slice(bytes);
- } else {
- stack.push(HirFrame::Literal(bytes.to_vec()));
- }
- }
-
- /// Push the given literal byte on to the call stack.
- ///
- /// If the top-most element of the stack is a literal, then the byte
- /// is appended to the end of that literal. Otherwise, a new literal
- /// containing just the given byte is pushed to the top of the stack.
- fn push_byte(&self, byte: u8) {
- let mut stack = self.trans().stack.borrow_mut();
- if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
- literal.push(byte);
- } else {
- stack.push(HirFrame::Literal(vec![byte]));
- }
- }
-
- /// Pop the top of the call stack. If the call stack is empty, return None.
- fn pop(&self) -> Option<HirFrame> {
- self.trans().stack.borrow_mut().pop()
- }
-
- /// Pop an HIR expression from the top of the stack for a concatenation.
- ///
- /// This returns None if the stack is empty or when a concat frame is seen.
- /// Otherwise, it panics if it could not find an HIR expression.
- fn pop_concat_expr(&self) -> Option<Hir> {
- let frame = self.pop()?;
- match frame {
- HirFrame::Concat => None,
- HirFrame::Expr(expr) => Some(expr),
- HirFrame::Literal(lit) => Some(Hir::literal(lit)),
- HirFrame::ClassUnicode(_) => {
- unreachable!("expected expr or concat, got Unicode class")
- }
- HirFrame::ClassBytes(_) => {
- unreachable!("expected expr or concat, got byte class")
- }
- HirFrame::Repetition => {
- unreachable!("expected expr or concat, got repetition")
- }
- HirFrame::Group { .. } => {
- unreachable!("expected expr or concat, got group")
- }
- HirFrame::Alternation => {
- unreachable!("expected expr or concat, got alt marker")
- }
- HirFrame::AlternationBranch => {
- unreachable!("expected expr or concat, got alt branch marker")
- }
- }
- }
-
- /// Pop an HIR expression from the top of the stack for an alternation.
- ///
- /// This returns None if the stack is empty or when an alternation frame is
- /// seen. Otherwise, it panics if it could not find an HIR expression.
- fn pop_alt_expr(&self) -> Option<Hir> {
- let frame = self.pop()?;
- match frame {
- HirFrame::Alternation => None,
- HirFrame::Expr(expr) => Some(expr),
- HirFrame::Literal(lit) => Some(Hir::literal(lit)),
- HirFrame::ClassUnicode(_) => {
- unreachable!("expected expr or alt, got Unicode class")
- }
- HirFrame::ClassBytes(_) => {
- unreachable!("expected expr or alt, got byte class")
- }
- HirFrame::Repetition => {
- unreachable!("expected expr or alt, got repetition")
- }
- HirFrame::Group { .. } => {
- unreachable!("expected expr or alt, got group")
- }
- HirFrame::Concat => {
- unreachable!("expected expr or alt, got concat marker")
- }
- HirFrame::AlternationBranch => {
- unreachable!("expected expr or alt, got alt branch marker")
- }
- }
- }
-
- /// Create a new error with the given span and error type.
- fn error(&self, span: Span, kind: ErrorKind) -> Error {
- Error { kind, pattern: self.pattern.to_string(), span }
- }
-
- /// Return a copy of the active flags.
- fn flags(&self) -> Flags {
- self.trans().flags.get()
- }
-
- /// Set the flags of this translator from the flags set in the given AST.
- /// Then, return the old flags.
- fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
- let old_flags = self.flags();
- let mut new_flags = Flags::from_ast(ast_flags);
- new_flags.merge(&old_flags);
- self.trans().flags.set(new_flags);
- old_flags
- }
-
- /// Convert an Ast literal to its scalar representation.
- ///
- /// When Unicode mode is enabled, then this always succeeds and returns a
- /// `char` (Unicode scalar value).
- ///
- /// When Unicode mode is disabled, then a `char` will still be returned
- /// whenever possible. A byte is returned only when invalid UTF-8 is
- /// allowed and when the byte is not ASCII. Otherwise, a non-ASCII byte
- /// will result in an error when invalid UTF-8 is not allowed.
- fn ast_literal_to_scalar(
- &self,
- lit: &ast::Literal,
- ) -> Result<Either<char, u8>> {
- if self.flags().unicode() {
- return Ok(Either::Left(lit.c));
- }
- let byte = match lit.byte() {
- None => return Ok(Either::Left(lit.c)),
- Some(byte) => byte,
- };
- if byte <= 0x7F {
- return Ok(Either::Left(char::try_from(byte).unwrap()));
- }
- if self.trans().utf8 {
- return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
- }
- Ok(Either::Right(byte))
- }
-
- fn case_fold_char(&self, span: Span, c: char) -> Result<Option<Hir>> {
- if !self.flags().case_insensitive() {
- return Ok(None);
- }
- if self.flags().unicode() {
- // If case folding won't do anything, then don't bother trying.
- let map = unicode::SimpleCaseFolder::new()
- .map(|f| f.overlaps(c, c))
- .map_err(|_| {
- self.error(span, ErrorKind::UnicodeCaseUnavailable)
- })?;
- if !map {
- return Ok(None);
- }
- let mut cls =
- hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
- c, c,
- )]);
- cls.try_case_fold_simple().map_err(|_| {
- self.error(span, ErrorKind::UnicodeCaseUnavailable)
- })?;
- Ok(Some(Hir::class(hir::Class::Unicode(cls))))
- } else {
- if !c.is_ascii() {
- return Ok(None);
- }
- // If case folding won't do anything, then don't bother trying.
- match c {
- 'A'..='Z' | 'a'..='z' => {}
- _ => return Ok(None),
- }
- let mut cls =
- hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
- // OK because 'c.len_utf8() == 1' which in turn implies
- // that 'c' is ASCII.
- u8::try_from(c).unwrap(),
- u8::try_from(c).unwrap(),
- )]);
- cls.case_fold_simple();
- Ok(Some(Hir::class(hir::Class::Bytes(cls))))
- }
- }
-
- fn hir_dot(&self, span: Span) -> Result<Hir> {
- let (utf8, lineterm, flags) =
- (self.trans().utf8, self.trans().line_terminator, self.flags());
- if utf8 && (!flags.unicode() || !lineterm.is_ascii()) {
- return Err(self.error(span, ErrorKind::InvalidUtf8));
- }
- let dot = if flags.dot_matches_new_line() {
- if flags.unicode() {
- hir::Dot::AnyChar
- } else {
- hir::Dot::AnyByte
- }
- } else {
- if flags.unicode() {
- if flags.crlf() {
- hir::Dot::AnyCharExceptCRLF
- } else {
- if !lineterm.is_ascii() {
- return Err(
- self.error(span, ErrorKind::InvalidLineTerminator)
- );
- }
- hir::Dot::AnyCharExcept(char::from(lineterm))
- }
- } else {
- if flags.crlf() {
- hir::Dot::AnyByteExceptCRLF
- } else {
- hir::Dot::AnyByteExcept(lineterm)
- }
- }
- };
- Ok(Hir::dot(dot))
- }
-
- fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
- let unicode = self.flags().unicode();
- let multi_line = self.flags().multi_line();
- let crlf = self.flags().crlf();
- Ok(match asst.kind {
- ast::AssertionKind::StartLine => Hir::look(if multi_line {
- if crlf {
- hir::Look::StartCRLF
- } else {
- hir::Look::StartLF
- }
- } else {
- hir::Look::Start
- }),
- ast::AssertionKind::EndLine => Hir::look(if multi_line {
- if crlf {
- hir::Look::EndCRLF
- } else {
- hir::Look::EndLF
- }
- } else {
- hir::Look::End
- }),
- ast::AssertionKind::StartText => Hir::look(hir::Look::Start),
- ast::AssertionKind::EndText => Hir::look(hir::Look::End),
- ast::AssertionKind::WordBoundary => Hir::look(if unicode {
- hir::Look::WordUnicode
- } else {
- hir::Look::WordAscii
- }),
- ast::AssertionKind::NotWordBoundary => Hir::look(if unicode {
- hir::Look::WordUnicodeNegate
- } else {
- hir::Look::WordAsciiNegate
- }),
- ast::AssertionKind::WordBoundaryStart
- | ast::AssertionKind::WordBoundaryStartAngle => {
- Hir::look(if unicode {
- hir::Look::WordStartUnicode
- } else {
- hir::Look::WordStartAscii
- })
- }
- ast::AssertionKind::WordBoundaryEnd
- | ast::AssertionKind::WordBoundaryEndAngle => {
- Hir::look(if unicode {
- hir::Look::WordEndUnicode
- } else {
- hir::Look::WordEndAscii
- })
- }
- ast::AssertionKind::WordBoundaryStartHalf => {
- Hir::look(if unicode {
- hir::Look::WordStartHalfUnicode
- } else {
- hir::Look::WordStartHalfAscii
- })
- }
- ast::AssertionKind::WordBoundaryEndHalf => Hir::look(if unicode {
- hir::Look::WordEndHalfUnicode
- } else {
- hir::Look::WordEndHalfAscii
- }),
- })
- }
-
- fn hir_capture(&self, group: &ast::Group, expr: Hir) -> Hir {
- let (index, name) = match group.kind {
- ast::GroupKind::CaptureIndex(index) => (index, None),
- ast::GroupKind::CaptureName { ref name, .. } => {
- (name.index, Some(name.name.clone().into_boxed_str()))
- }
- // The HIR doesn't need to use non-capturing groups, since the way
- // in which the data type is defined handles this automatically.
- ast::GroupKind::NonCapturing(_) => return expr,
- };
- Hir::capture(hir::Capture { index, name, sub: Box::new(expr) })
- }
-
- fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
- let (min, max) = match rep.op.kind {
- ast::RepetitionKind::ZeroOrOne => (0, Some(1)),
- ast::RepetitionKind::ZeroOrMore => (0, None),
- ast::RepetitionKind::OneOrMore => (1, None),
- ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
- (m, Some(m))
- }
- ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
- (m, None)
- }
- ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
- m,
- n,
- )) => (m, Some(n)),
- };
- let greedy =
- if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
- Hir::repetition(hir::Repetition {
- min,
- max,
- greedy,
- sub: Box::new(expr),
- })
- }
-
- fn hir_unicode_class(
- &self,
- ast_class: &ast::ClassUnicode,
- ) -> Result<hir::ClassUnicode> {
- use crate::ast::ClassUnicodeKind::*;
-
- if !self.flags().unicode() {
- return Err(
- self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
- );
- }
- let query = match ast_class.kind {
- OneLetter(name) => ClassQuery::OneLetter(name),
- Named(ref name) => ClassQuery::Binary(name),
- NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
- property_name: name,
- property_value: value,
- },
- };
- let mut result = self.convert_unicode_class_error(
- &ast_class.span,
- unicode::class(query),
- );
- if let Ok(ref mut class) = result {
- self.unicode_fold_and_negate(
- &ast_class.span,
- ast_class.negated,
- class,
- )?;
- }
- result
- }
-
- fn hir_ascii_unicode_class(
- &self,
- ast: &ast::ClassAscii,
- ) -> Result<hir::ClassUnicode> {
- let mut cls = hir::ClassUnicode::new(
- ascii_class_as_chars(&ast.kind)
- .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)),
- );
- self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
- Ok(cls)
- }
-
- fn hir_ascii_byte_class(
- &self,
- ast: &ast::ClassAscii,
- ) -> Result<hir::ClassBytes> {
- let mut cls = hir::ClassBytes::new(
- ascii_class(&ast.kind)
- .map(|(s, e)| hir::ClassBytesRange::new(s, e)),
- );
- self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
- Ok(cls)
- }
-
- fn hir_perl_unicode_class(
- &self,
- ast_class: &ast::ClassPerl,
- ) -> Result<hir::ClassUnicode> {
- use crate::ast::ClassPerlKind::*;
-
- assert!(self.flags().unicode());
- let result = match ast_class.kind {
- Digit => unicode::perl_digit(),
- Space => unicode::perl_space(),
- Word => unicode::perl_word(),
- };
- let mut class =
- self.convert_unicode_class_error(&ast_class.span, result)?;
- // We needn't apply case folding here because the Perl Unicode classes
- // are already closed under Unicode simple case folding.
- if ast_class.negated {
- class.negate();
- }
- Ok(class)
- }
-
- fn hir_perl_byte_class(
- &self,
- ast_class: &ast::ClassPerl,
- ) -> Result<hir::ClassBytes> {
- use crate::ast::ClassPerlKind::*;
-
- assert!(!self.flags().unicode());
- let mut class = match ast_class.kind {
- Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
- Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
- Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
- };
- // We needn't apply case folding here because the Perl ASCII classes
- // are already closed (under ASCII case folding).
- if ast_class.negated {
- class.negate();
- }
- // Negating a Perl byte class is likely to cause it to match invalid
- // UTF-8. That's only OK if the translator is configured to allow such
- // things.
- if self.trans().utf8 && !class.is_ascii() {
- return Err(self.error(ast_class.span, ErrorKind::InvalidUtf8));
- }
- Ok(class)
- }
-
- /// Converts the given Unicode specific error to an HIR translation error.
- ///
- /// The span given should approximate the position at which an error would
- /// occur.
- fn convert_unicode_class_error(
- &self,
- span: &Span,
- result: core::result::Result<hir::ClassUnicode, unicode::Error>,
- ) -> Result<hir::ClassUnicode> {
- result.map_err(|err| {
- let sp = span.clone();
- match err {
- unicode::Error::PropertyNotFound => {
- self.error(sp, ErrorKind::UnicodePropertyNotFound)
- }
- unicode::Error::PropertyValueNotFound => {
- self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
- }
- unicode::Error::PerlClassNotFound => {
- self.error(sp, ErrorKind::UnicodePerlClassNotFound)
- }
- }
- })
- }
-
- fn unicode_fold_and_negate(
- &self,
- span: &Span,
- negated: bool,
- class: &mut hir::ClassUnicode,
- ) -> Result<()> {
- // Note that we must apply case folding before negation!
- // Consider `(?i)[^x]`. If we applied negation first, then
- // the result would be the character class that matched any
- // Unicode scalar value.
- if self.flags().case_insensitive() {
- class.try_case_fold_simple().map_err(|_| {
- self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
- })?;
- }
- if negated {
- class.negate();
- }
- Ok(())
- }
-
- fn bytes_fold_and_negate(
- &self,
- span: &Span,
- negated: bool,
- class: &mut hir::ClassBytes,
- ) -> Result<()> {
- // Note that we must apply case folding before negation!
- // Consider `(?i)[^x]`. If we applied negation first, then
- // the result would be the character class that matched any
- // Unicode scalar value.
- if self.flags().case_insensitive() {
- class.case_fold_simple();
- }
- if negated {
- class.negate();
- }
- if self.trans().utf8 && !class.is_ascii() {
- return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
- }
- Ok(())
- }
-
- /// Return a scalar byte value suitable for use as a literal in a byte
- /// character class.
- fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
- match self.ast_literal_to_scalar(ast)? {
- Either::Right(byte) => Ok(byte),
- Either::Left(ch) => {
- if ch.is_ascii() {
- Ok(u8::try_from(ch).unwrap())
- } else {
- // We can't feasibly support Unicode in
- // byte oriented classes. Byte classes don't
- // do Unicode case folding.
- Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
- }
- }
- }
- }
-}
-
-/// A translator's representation of a regular expression's flags at any given
-/// moment in time.
-///
-/// Each flag can be in one of three states: absent, present but disabled or
-/// present but enabled.
-#[derive(Clone, Copy, Debug, Default)]
-struct Flags {
- case_insensitive: Option<bool>,
- multi_line: Option<bool>,
- dot_matches_new_line: Option<bool>,
- swap_greed: Option<bool>,
- unicode: Option<bool>,
- crlf: Option<bool>,
- // Note that `ignore_whitespace` is omitted here because it is handled
- // entirely in the parser.
-}
-
-impl Flags {
- fn from_ast(ast: &ast::Flags) -> Flags {
- let mut flags = Flags::default();
- let mut enable = true;
- for item in &ast.items {
- match item.kind {
- ast::FlagsItemKind::Negation => {
- enable = false;
- }
- ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
- flags.case_insensitive = Some(enable);
- }
- ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
- flags.multi_line = Some(enable);
- }
- ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
- flags.dot_matches_new_line = Some(enable);
- }
- ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
- flags.swap_greed = Some(enable);
- }
- ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
- flags.unicode = Some(enable);
- }
- ast::FlagsItemKind::Flag(ast::Flag::CRLF) => {
- flags.crlf = Some(enable);
- }
- ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
- }
- }
- flags
- }
-
- fn merge(&mut self, previous: &Flags) {
- if self.case_insensitive.is_none() {
- self.case_insensitive = previous.case_insensitive;
- }
- if self.multi_line.is_none() {
- self.multi_line = previous.multi_line;
- }
- if self.dot_matches_new_line.is_none() {
- self.dot_matches_new_line = previous.dot_matches_new_line;
- }
- if self.swap_greed.is_none() {
- self.swap_greed = previous.swap_greed;
- }
- if self.unicode.is_none() {
- self.unicode = previous.unicode;
- }
- if self.crlf.is_none() {
- self.crlf = previous.crlf;
- }
- }
-
- fn case_insensitive(&self) -> bool {
- self.case_insensitive.unwrap_or(false)
- }
-
- fn multi_line(&self) -> bool {
- self.multi_line.unwrap_or(false)
- }
-
- fn dot_matches_new_line(&self) -> bool {
- self.dot_matches_new_line.unwrap_or(false)
- }
-
- fn swap_greed(&self) -> bool {
- self.swap_greed.unwrap_or(false)
- }
-
- fn unicode(&self) -> bool {
- self.unicode.unwrap_or(true)
- }
-
- fn crlf(&self) -> bool {
- self.crlf.unwrap_or(false)
- }
-}
-
-fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
- let ranges: Vec<_> = ascii_class(kind)
- .map(|(s, e)| hir::ClassBytesRange::new(s, e))
- .collect();
- hir::ClassBytes::new(ranges)
-}
-
-fn ascii_class(kind: &ast::ClassAsciiKind) -> impl Iterator<Item = (u8, u8)> {
- use crate::ast::ClassAsciiKind::*;
-
- let slice: &'static [(u8, u8)] = match *kind {
- Alnum => &[(b'0', b'9'), (b'A', b'Z'), (b'a', b'z')],
- Alpha => &[(b'A', b'Z'), (b'a', b'z')],
- Ascii => &[(b'\x00', b'\x7F')],
- Blank => &[(b'\t', b'\t'), (b' ', b' ')],
- Cntrl => &[(b'\x00', b'\x1F'), (b'\x7F', b'\x7F')],
- Digit => &[(b'0', b'9')],
- Graph => &[(b'!', b'~')],
- Lower => &[(b'a', b'z')],
- Print => &[(b' ', b'~')],
- Punct => &[(b'!', b'/'), (b':', b'@'), (b'[', b'`'), (b'{', b'~')],
- Space => &[
- (b'\t', b'\t'),
- (b'\n', b'\n'),
- (b'\x0B', b'\x0B'),
- (b'\x0C', b'\x0C'),
- (b'\r', b'\r'),
- (b' ', b' '),
- ],
- Upper => &[(b'A', b'Z')],
- Word => &[(b'0', b'9'), (b'A', b'Z'), (b'_', b'_'), (b'a', b'z')],
- Xdigit => &[(b'0', b'9'), (b'A', b'F'), (b'a', b'f')],
- };
- slice.iter().copied()
-}
-
-fn ascii_class_as_chars(
- kind: &ast::ClassAsciiKind,
-) -> impl Iterator<Item = (char, char)> {
- ascii_class(kind).map(|(s, e)| (char::from(s), char::from(e)))
-}
-
-#[cfg(test)]
-mod tests {
- use crate::{
- ast::{parse::ParserBuilder, Position},
- hir::{Look, Properties},
- };
-
- use super::*;
-
- // We create these errors to compare with real hir::Errors in the tests.
- // We define equality between TestError and hir::Error to disregard the
- // pattern string in hir::Error, which is annoying to provide in tests.
- #[derive(Clone, Debug)]
- struct TestError {
- span: Span,
- kind: hir::ErrorKind,
- }
-
- impl PartialEq<hir::Error> for TestError {
- fn eq(&self, other: &hir::Error) -> bool {
- self.span == other.span && self.kind == other.kind
- }
- }
-
- impl PartialEq<TestError> for hir::Error {
- fn eq(&self, other: &TestError) -> bool {
- self.span == other.span && self.kind == other.kind
- }
- }
-
- fn parse(pattern: &str) -> Ast {
- ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
- }
-
- fn t(pattern: &str) -> Hir {
- TranslatorBuilder::new()
- .utf8(true)
- .build()
- .translate(pattern, &parse(pattern))
- .unwrap()
- }
-
- fn t_err(pattern: &str) -> hir::Error {
- TranslatorBuilder::new()
- .utf8(true)
- .build()
- .translate(pattern, &parse(pattern))
- .unwrap_err()
- }
-
- fn t_bytes(pattern: &str) -> Hir {
- TranslatorBuilder::new()
- .utf8(false)
- .build()
- .translate(pattern, &parse(pattern))
- .unwrap()
- }
-
- fn props(pattern: &str) -> Properties {
- t(pattern).properties().clone()
- }
-
- fn props_bytes(pattern: &str) -> Properties {
- t_bytes(pattern).properties().clone()
- }
-
- fn hir_lit(s: &str) -> Hir {
- hir_blit(s.as_bytes())
- }
-
- fn hir_blit(s: &[u8]) -> Hir {
- Hir::literal(s)
- }
-
- fn hir_capture(index: u32, expr: Hir) -> Hir {
- Hir::capture(hir::Capture { index, name: None, sub: Box::new(expr) })
- }
-
- fn hir_capture_name(index: u32, name: &str, expr: Hir) -> Hir {
- Hir::capture(hir::Capture {
- index,
- name: Some(name.into()),
- sub: Box::new(expr),
- })
- }
-
- fn hir_quest(greedy: bool, expr: Hir) -> Hir {
- Hir::repetition(hir::Repetition {
- min: 0,
- max: Some(1),
- greedy,
- sub: Box::new(expr),
- })
- }
-
- fn hir_star(greedy: bool, expr: Hir) -> Hir {
- Hir::repetition(hir::Repetition {
- min: 0,
- max: None,
- greedy,
- sub: Box::new(expr),
- })
- }
-
- fn hir_plus(greedy: bool, expr: Hir) -> Hir {
- Hir::repetition(hir::Repetition {
- min: 1,
- max: None,
- greedy,
- sub: Box::new(expr),
- })
- }
-
- fn hir_range(greedy: bool, min: u32, max: Option<u32>, expr: Hir) -> Hir {
- Hir::repetition(hir::Repetition {
- min,
- max,
- greedy,
- sub: Box::new(expr),
- })
- }
-
- fn hir_alt(alts: Vec<Hir>) -> Hir {
- Hir::alternation(alts)
- }
-
- fn hir_cat(exprs: Vec<Hir>) -> Hir {
- Hir::concat(exprs)
- }
-
- #[allow(dead_code)]
- fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
- Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
- }
-
- #[allow(dead_code)]
- fn hir_uclass_perl_word() -> Hir {
- Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
- }
-
- fn hir_ascii_uclass(kind: &ast::ClassAsciiKind) -> Hir {
- Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(
- ascii_class_as_chars(kind)
- .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)),
- )))
- }
-
- fn hir_ascii_bclass(kind: &ast::ClassAsciiKind) -> Hir {
- Hir::class(hir::Class::Bytes(hir::ClassBytes::new(
- ascii_class(kind).map(|(s, e)| hir::ClassBytesRange::new(s, e)),
- )))
- }
-
- fn hir_uclass(ranges: &[(char, char)]) -> Hir {
- Hir::class(uclass(ranges))
- }
-
- fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
- Hir::class(bclass(ranges))
- }
-
- fn hir_case_fold(expr: Hir) -> Hir {
- match expr.into_kind() {
- HirKind::Class(mut cls) => {
- cls.case_fold_simple();
- Hir::class(cls)
- }
- _ => panic!("cannot case fold non-class Hir expr"),
- }
- }
-
- fn hir_negate(expr: Hir) -> Hir {
- match expr.into_kind() {
- HirKind::Class(mut cls) => {
- cls.negate();
- Hir::class(cls)
- }
- _ => panic!("cannot negate non-class Hir expr"),
- }
- }
-
- fn uclass(ranges: &[(char, char)]) -> hir::Class {
- let ranges: Vec<hir::ClassUnicodeRange> = ranges
- .iter()
- .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
- .collect();
- hir::Class::Unicode(hir::ClassUnicode::new(ranges))
- }
-
- fn bclass(ranges: &[(u8, u8)]) -> hir::Class {
- let ranges: Vec<hir::ClassBytesRange> = ranges
- .iter()
- .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
- .collect();
- hir::Class::Bytes(hir::ClassBytes::new(ranges))
- }
-
- #[cfg(feature = "unicode-case")]
- fn class_case_fold(mut cls: hir::Class) -> Hir {
- cls.case_fold_simple();
- Hir::class(cls)
- }
-
- fn class_negate(mut cls: hir::Class) -> Hir {
- cls.negate();
- Hir::class(cls)
- }
-
- #[allow(dead_code)]
- fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
- use crate::hir::Class::{Bytes, Unicode};
-
- match (expr1.into_kind(), expr2.into_kind()) {
- (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
- c1.union(&c2);
- Hir::class(hir::Class::Unicode(c1))
- }
- (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
- c1.union(&c2);
- Hir::class(hir::Class::Bytes(c1))
- }
- _ => panic!("cannot union non-class Hir exprs"),
- }
- }
-
- #[allow(dead_code)]
- fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
- use crate::hir::Class::{Bytes, Unicode};
-
- match (expr1.into_kind(), expr2.into_kind()) {
- (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
- c1.difference(&c2);
- Hir::class(hir::Class::Unicode(c1))
- }
- (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
- c1.difference(&c2);
- Hir::class(hir::Class::Bytes(c1))
- }
- _ => panic!("cannot difference non-class Hir exprs"),
- }
- }
-
- fn hir_look(look: hir::Look) -> Hir {
- Hir::look(look)
- }
-
- #[test]
- fn empty() {
- assert_eq!(t(""), Hir::empty());
- assert_eq!(t("(?i)"), Hir::empty());
- assert_eq!(t("()"), hir_capture(1, Hir::empty()));
- assert_eq!(t("(?:)"), Hir::empty());
- assert_eq!(t("(?P<wat>)"), hir_capture_name(1, "wat", Hir::empty()));
- assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
- assert_eq!(
- t("()|()"),
- hir_alt(vec![
- hir_capture(1, Hir::empty()),
- hir_capture(2, Hir::empty()),
- ])
- );
- assert_eq!(
- t("(|b)"),
- hir_capture(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
- );
- assert_eq!(
- t("(a|)"),
- hir_capture(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
- );
- assert_eq!(
- t("(a||c)"),
- hir_capture(
- 1,
- hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
- )
- );
- assert_eq!(
- t("(||)"),
- hir_capture(
- 1,
- hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
- )
- );
- }
-
- #[test]
- fn literal() {
- assert_eq!(t("a"), hir_lit("a"));
- assert_eq!(t("(?-u)a"), hir_lit("a"));
- assert_eq!(t("☃"), hir_lit("☃"));
- assert_eq!(t("abcd"), hir_lit("abcd"));
-
- assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
- assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
- assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
- assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
-
- assert_eq!(t("(?-u)☃"), hir_lit("☃"));
- assert_eq!(
- t_err(r"(?-u)\xFF"),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(5, 1, 6),
- Position::new(9, 1, 10)
- ),
- }
- );
- }
-
- #[test]
- fn literal_case_insensitive() {
- #[cfg(feature = "unicode-case")]
- assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
- #[cfg(feature = "unicode-case")]
- assert_eq!(t("(?i:a)"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("a(?i)a(?-i)a"),
- hir_cat(vec![
- hir_lit("a"),
- hir_uclass(&[('A', 'A'), ('a', 'a')]),
- hir_lit("a"),
- ])
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i)ab@c"),
- hir_cat(vec![
- hir_uclass(&[('A', 'A'), ('a', 'a')]),
- hir_uclass(&[('B', 'B'), ('b', 'b')]),
- hir_lit("@"),
- hir_uclass(&[('C', 'C'), ('c', 'c')]),
- ])
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i)β"),
- hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
- );
-
- assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?-u)a(?i)a(?-i)a"),
- hir_cat(vec![
- hir_lit("a"),
- hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
- hir_lit("a"),
- ])
- );
- assert_eq!(
- t("(?i-u)ab@c"),
- hir_cat(vec![
- hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
- hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
- hir_lit("@"),
- hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
- ])
- );
-
- assert_eq!(
- t_bytes("(?i-u)a"),
- hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
- );
- assert_eq!(
- t_bytes("(?i-u)\x61"),
- hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
- );
- assert_eq!(
- t_bytes(r"(?i-u)\x61"),
- hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
- );
- assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
-
- assert_eq!(t("(?i-u)β"), hir_lit("β"),);
- }
-
- #[test]
- fn dot() {
- assert_eq!(
- t("."),
- hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}')])
- );
- assert_eq!(
- t("(?R)."),
- hir_uclass(&[
- ('\0', '\t'),
- ('\x0B', '\x0C'),
- ('\x0E', '\u{10FFFF}'),
- ])
- );
- assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}')]));
- assert_eq!(t("(?Rs)."), hir_uclass(&[('\0', '\u{10FFFF}')]));
- assert_eq!(
- t_bytes("(?-u)."),
- hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF')])
- );
- assert_eq!(
- t_bytes("(?R-u)."),
- hir_bclass(&[
- (b'\0', b'\t'),
- (b'\x0B', b'\x0C'),
- (b'\x0E', b'\xFF'),
- ])
- );
- assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
- assert_eq!(t_bytes("(?Rs-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
-
- // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
- assert_eq!(
- t_err("(?-u)."),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(5, 1, 6),
- Position::new(6, 1, 7)
- ),
- }
- );
- assert_eq!(
- t_err("(?R-u)."),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(6, 1, 7),
- Position::new(7, 1, 8)
- ),
- }
- );
- assert_eq!(
- t_err("(?s-u)."),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(6, 1, 7),
- Position::new(7, 1, 8)
- ),
- }
- );
- assert_eq!(
- t_err("(?Rs-u)."),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(7, 1, 8),
- Position::new(8, 1, 9)
- ),
- }
- );
- }
-
- #[test]
- fn assertions() {
- assert_eq!(t("^"), hir_look(hir::Look::Start));
- assert_eq!(t("$"), hir_look(hir::Look::End));
- assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
- assert_eq!(t(r"\z"), hir_look(hir::Look::End));
- assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
- assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
- assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
- assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
-
- assert_eq!(t(r"\b"), hir_look(hir::Look::WordUnicode));
- assert_eq!(t(r"\B"), hir_look(hir::Look::WordUnicodeNegate));
- assert_eq!(t(r"(?-u)\b"), hir_look(hir::Look::WordAscii));
- assert_eq!(t(r"(?-u)\B"), hir_look(hir::Look::WordAsciiNegate));
- }
-
- #[test]
- fn group() {
- assert_eq!(t("(a)"), hir_capture(1, hir_lit("a")));
- assert_eq!(
- t("(a)(b)"),
- hir_cat(vec![
- hir_capture(1, hir_lit("a")),
- hir_capture(2, hir_lit("b")),
- ])
- );
- assert_eq!(
- t("(a)|(b)"),
- hir_alt(vec![
- hir_capture(1, hir_lit("a")),
- hir_capture(2, hir_lit("b")),
- ])
- );
- assert_eq!(t("(?P<foo>)"), hir_capture_name(1, "foo", Hir::empty()));
- assert_eq!(t("(?P<foo>a)"), hir_capture_name(1, "foo", hir_lit("a")));
- assert_eq!(
- t("(?P<foo>a)(?P<bar>b)"),
- hir_cat(vec![
- hir_capture_name(1, "foo", hir_lit("a")),
- hir_capture_name(2, "bar", hir_lit("b")),
- ])
- );
- assert_eq!(t("(?:)"), Hir::empty());
- assert_eq!(t("(?:a)"), hir_lit("a"));
- assert_eq!(
- t("(?:a)(b)"),
- hir_cat(vec![hir_lit("a"), hir_capture(1, hir_lit("b")),])
- );
- assert_eq!(
- t("(a)(?:b)(c)"),
- hir_cat(vec![
- hir_capture(1, hir_lit("a")),
- hir_lit("b"),
- hir_capture(2, hir_lit("c")),
- ])
- );
- assert_eq!(
- t("(a)(?P<foo>b)(c)"),
- hir_cat(vec![
- hir_capture(1, hir_lit("a")),
- hir_capture_name(2, "foo", hir_lit("b")),
- hir_capture(3, hir_lit("c")),
- ])
- );
- assert_eq!(t("()"), hir_capture(1, Hir::empty()));
- assert_eq!(t("((?i))"), hir_capture(1, Hir::empty()));
- assert_eq!(t("((?x))"), hir_capture(1, Hir::empty()));
- assert_eq!(
- t("(((?x)))"),
- hir_capture(1, hir_capture(2, Hir::empty()))
- );
- }
-
- #[test]
- fn line_anchors() {
- assert_eq!(t("^"), hir_look(hir::Look::Start));
- assert_eq!(t("$"), hir_look(hir::Look::End));
- assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
- assert_eq!(t(r"\z"), hir_look(hir::Look::End));
-
- assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
- assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
- assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
- assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
-
- assert_eq!(t(r"(?R)\A"), hir_look(hir::Look::Start));
- assert_eq!(t(r"(?R)\z"), hir_look(hir::Look::End));
- assert_eq!(t("(?R)^"), hir_look(hir::Look::Start));
- assert_eq!(t("(?R)$"), hir_look(hir::Look::End));
-
- assert_eq!(t(r"(?Rm)\A"), hir_look(hir::Look::Start));
- assert_eq!(t(r"(?Rm)\z"), hir_look(hir::Look::End));
- assert_eq!(t("(?Rm)^"), hir_look(hir::Look::StartCRLF));
- assert_eq!(t("(?Rm)$"), hir_look(hir::Look::EndCRLF));
- }
-
- #[test]
- fn flags() {
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i:a)a"),
- hir_cat(
- vec![hir_uclass(&[('A', 'A'), ('a', 'a')]), hir_lit("a"),]
- )
- );
- assert_eq!(
- t("(?i-u:a)β"),
- hir_cat(vec![
- hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
- hir_lit("β"),
- ])
- );
- assert_eq!(
- t("(?:(?i-u)a)b"),
- hir_cat(vec![
- hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
- hir_lit("b"),
- ])
- );
- assert_eq!(
- t("((?i-u)a)b"),
- hir_cat(vec![
- hir_capture(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
- hir_lit("b"),
- ])
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i)(?-i:a)a"),
- hir_cat(
- vec![hir_lit("a"), hir_uclass(&[('A', 'A'), ('a', 'a')]),]
- )
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?im)a^"),
- hir_cat(vec![
- hir_uclass(&[('A', 'A'), ('a', 'a')]),
- hir_look(hir::Look::StartLF),
- ])
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?im)a^(?i-m)a^"),
- hir_cat(vec![
- hir_uclass(&[('A', 'A'), ('a', 'a')]),
- hir_look(hir::Look::StartLF),
- hir_uclass(&[('A', 'A'), ('a', 'a')]),
- hir_look(hir::Look::Start),
- ])
- );
- assert_eq!(
- t("(?U)a*a*?(?-U)a*a*?"),
- hir_cat(vec![
- hir_star(false, hir_lit("a")),
- hir_star(true, hir_lit("a")),
- hir_star(true, hir_lit("a")),
- hir_star(false, hir_lit("a")),
- ])
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?:a(?i)a)a"),
- hir_cat(vec![
- hir_cat(vec![
- hir_lit("a"),
- hir_uclass(&[('A', 'A'), ('a', 'a')]),
- ]),
- hir_lit("a"),
- ])
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i)(?:a(?-i)a)a"),
- hir_cat(vec![
- hir_cat(vec![
- hir_uclass(&[('A', 'A'), ('a', 'a')]),
- hir_lit("a"),
- ]),
- hir_uclass(&[('A', 'A'), ('a', 'a')]),
- ])
- );
- }
-
- #[test]
- fn escape() {
- assert_eq!(
- t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
- hir_lit(r"\.+*?()|[]{}^$#")
- );
- }
-
- #[test]
- fn repetition() {
- assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
- assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
- assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
- assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
- assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
- assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
-
- assert_eq!(t("a{1}"), hir_range(true, 1, Some(1), hir_lit("a"),));
- assert_eq!(t("a{1,}"), hir_range(true, 1, None, hir_lit("a"),));
- assert_eq!(t("a{1,2}"), hir_range(true, 1, Some(2), hir_lit("a"),));
- assert_eq!(t("a{1}?"), hir_range(false, 1, Some(1), hir_lit("a"),));
- assert_eq!(t("a{1,}?"), hir_range(false, 1, None, hir_lit("a"),));
- assert_eq!(t("a{1,2}?"), hir_range(false, 1, Some(2), hir_lit("a"),));
-
- assert_eq!(
- t("ab?"),
- hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
- );
- assert_eq!(t("(ab)?"), hir_quest(true, hir_capture(1, hir_lit("ab"))));
- assert_eq!(
- t("a|b?"),
- hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
- );
- }
-
- #[test]
- fn cat_alt() {
- let a = || hir_look(hir::Look::Start);
- let b = || hir_look(hir::Look::End);
- let c = || hir_look(hir::Look::WordUnicode);
- let d = || hir_look(hir::Look::WordUnicodeNegate);
-
- assert_eq!(t("(^$)"), hir_capture(1, hir_cat(vec![a(), b()])));
- assert_eq!(t("^|$"), hir_alt(vec![a(), b()]));
- assert_eq!(t(r"^|$|\b"), hir_alt(vec![a(), b(), c()]));
- assert_eq!(
- t(r"^$|$\b|\b\B"),
- hir_alt(vec![
- hir_cat(vec![a(), b()]),
- hir_cat(vec![b(), c()]),
- hir_cat(vec![c(), d()]),
- ])
- );
- assert_eq!(t("(^|$)"), hir_capture(1, hir_alt(vec![a(), b()])));
- assert_eq!(
- t(r"(^|$|\b)"),
- hir_capture(1, hir_alt(vec![a(), b(), c()]))
- );
- assert_eq!(
- t(r"(^$|$\b|\b\B)"),
- hir_capture(
- 1,
- hir_alt(vec![
- hir_cat(vec![a(), b()]),
- hir_cat(vec![b(), c()]),
- hir_cat(vec![c(), d()]),
- ])
- )
- );
- assert_eq!(
- t(r"(^$|($\b|(\b\B)))"),
- hir_capture(
- 1,
- hir_alt(vec![
- hir_cat(vec![a(), b()]),
- hir_capture(
- 2,
- hir_alt(vec![
- hir_cat(vec![b(), c()]),
- hir_capture(3, hir_cat(vec![c(), d()])),
- ])
- ),
- ])
- )
- );
- }
-
- // Tests the HIR transformation of things like '[a-z]|[A-Z]' into
- // '[A-Za-z]'. In other words, an alternation of just classes is always
- // equivalent to a single class corresponding to the union of the branches
- // in that class. (Unless some branches match invalid UTF-8 and others
- // match non-ASCII Unicode.)
- #[test]
- fn cat_class_flattened() {
- assert_eq!(t(r"[a-z]|[A-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
- // Combining all of the letter properties should give us the one giant
- // letter property.
- #[cfg(feature = "unicode-gencat")]
- assert_eq!(
- t(r"(?x)
- \p{Lowercase_Letter}
- |\p{Uppercase_Letter}
- |\p{Titlecase_Letter}
- |\p{Modifier_Letter}
- |\p{Other_Letter}
- "),
- hir_uclass_query(ClassQuery::Binary("letter"))
- );
- // Byte classes that can truly match invalid UTF-8 cannot be combined
- // with Unicode classes.
- assert_eq!(
- t_bytes(r"[Δδ]|(?-u:[\x90-\xFF])|[Λλ]"),
- hir_alt(vec![
- hir_uclass(&[('Δ', 'Δ'), ('δ', 'δ')]),
- hir_bclass(&[(b'\x90', b'\xFF')]),
- hir_uclass(&[('Λ', 'Λ'), ('λ', 'λ')]),
- ])
- );
- // Byte classes on their own can be combined, even if some are ASCII
- // and others are invalid UTF-8.
- assert_eq!(
- t_bytes(r"[a-z]|(?-u:[\x90-\xFF])|[A-Z]"),
- hir_bclass(&[(b'A', b'Z'), (b'a', b'z'), (b'\x90', b'\xFF')]),
- );
- }
-
- #[test]
- fn class_ascii() {
- assert_eq!(
- t("[[:alnum:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Alnum)
- );
- assert_eq!(
- t("[[:alpha:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Alpha)
- );
- assert_eq!(
- t("[[:ascii:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Ascii)
- );
- assert_eq!(
- t("[[:blank:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Blank)
- );
- assert_eq!(
- t("[[:cntrl:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Cntrl)
- );
- assert_eq!(
- t("[[:digit:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Digit)
- );
- assert_eq!(
- t("[[:graph:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Graph)
- );
- assert_eq!(
- t("[[:lower:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Lower)
- );
- assert_eq!(
- t("[[:print:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Print)
- );
- assert_eq!(
- t("[[:punct:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Punct)
- );
- assert_eq!(
- t("[[:space:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Space)
- );
- assert_eq!(
- t("[[:upper:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Upper)
- );
- assert_eq!(
- t("[[:word:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Word)
- );
- assert_eq!(
- t("[[:xdigit:]]"),
- hir_ascii_uclass(&ast::ClassAsciiKind::Xdigit)
- );
-
- assert_eq!(
- t("[[:^lower:]]"),
- hir_negate(hir_ascii_uclass(&ast::ClassAsciiKind::Lower))
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i)[[:lower:]]"),
- hir_uclass(&[
- ('A', 'Z'),
- ('a', 'z'),
- ('\u{17F}', '\u{17F}'),
- ('\u{212A}', '\u{212A}'),
- ])
- );
-
- assert_eq!(
- t("(?-u)[[:lower:]]"),
- hir_ascii_bclass(&ast::ClassAsciiKind::Lower)
- );
- assert_eq!(
- t("(?i-u)[[:lower:]]"),
- hir_case_fold(hir_ascii_bclass(&ast::ClassAsciiKind::Lower))
- );
-
- assert_eq!(
- t_err("(?-u)[[:^lower:]]"),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(6, 1, 7),
- Position::new(16, 1, 17)
- ),
- }
- );
- assert_eq!(
- t_err("(?i-u)[[:^lower:]]"),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(7, 1, 8),
- Position::new(17, 1, 18)
- ),
- }
- );
- }
-
- #[test]
- fn class_ascii_multiple() {
- // See: https://github.com/rust-lang/regex/issues/680
- assert_eq!(
- t("[[:alnum:][:^ascii:]]"),
- hir_union(
- hir_ascii_uclass(&ast::ClassAsciiKind::Alnum),
- hir_uclass(&[('\u{80}', '\u{10FFFF}')]),
- ),
- );
- assert_eq!(
- t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
- hir_union(
- hir_ascii_bclass(&ast::ClassAsciiKind::Alnum),
- hir_bclass(&[(0x80, 0xFF)]),
- ),
- );
- }
-
- #[test]
- #[cfg(feature = "unicode-perl")]
- fn class_perl_unicode() {
- // Unicode
- assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
- assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
- assert_eq!(t(r"\w"), hir_uclass_perl_word());
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t(r"(?i)\d"),
- hir_uclass_query(ClassQuery::Binary("digit"))
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t(r"(?i)\s"),
- hir_uclass_query(ClassQuery::Binary("space"))
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
-
- // Unicode, negated
- assert_eq!(
- t(r"\D"),
- hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
- );
- assert_eq!(
- t(r"\S"),
- hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
- );
- assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t(r"(?i)\D"),
- hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t(r"(?i)\S"),
- hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
- }
-
- #[test]
- fn class_perl_ascii() {
- // ASCII only
- assert_eq!(
- t(r"(?-u)\d"),
- hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
- );
- assert_eq!(
- t(r"(?-u)\s"),
- hir_ascii_bclass(&ast::ClassAsciiKind::Space)
- );
- assert_eq!(
- t(r"(?-u)\w"),
- hir_ascii_bclass(&ast::ClassAsciiKind::Word)
- );
- assert_eq!(
- t(r"(?i-u)\d"),
- hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
- );
- assert_eq!(
- t(r"(?i-u)\s"),
- hir_ascii_bclass(&ast::ClassAsciiKind::Space)
- );
- assert_eq!(
- t(r"(?i-u)\w"),
- hir_ascii_bclass(&ast::ClassAsciiKind::Word)
- );
-
- // ASCII only, negated
- assert_eq!(
- t_bytes(r"(?-u)\D"),
- hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
- );
- assert_eq!(
- t_bytes(r"(?-u)\S"),
- hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
- );
- assert_eq!(
- t_bytes(r"(?-u)\W"),
- hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
- );
- assert_eq!(
- t_bytes(r"(?i-u)\D"),
- hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
- );
- assert_eq!(
- t_bytes(r"(?i-u)\S"),
- hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
- );
- assert_eq!(
- t_bytes(r"(?i-u)\W"),
- hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
- );
-
- // ASCII only, negated, with UTF-8 mode enabled.
- // In this case, negating any Perl class results in an error because
- // all such classes can match invalid UTF-8.
- assert_eq!(
- t_err(r"(?-u)\D"),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(5, 1, 6),
- Position::new(7, 1, 8),
- ),
- },
- );
- assert_eq!(
- t_err(r"(?-u)\S"),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(5, 1, 6),
- Position::new(7, 1, 8),
- ),
- },
- );
- assert_eq!(
- t_err(r"(?-u)\W"),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(5, 1, 6),
- Position::new(7, 1, 8),
- ),
- },
- );
- assert_eq!(
- t_err(r"(?i-u)\D"),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(6, 1, 7),
- Position::new(8, 1, 9),
- ),
- },
- );
- assert_eq!(
- t_err(r"(?i-u)\S"),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(6, 1, 7),
- Position::new(8, 1, 9),
- ),
- },
- );
- assert_eq!(
- t_err(r"(?i-u)\W"),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(6, 1, 7),
- Position::new(8, 1, 9),
- ),
- },
- );
- }
-
- #[test]
- #[cfg(not(feature = "unicode-perl"))]
- fn class_perl_word_disabled() {
- assert_eq!(
- t_err(r"\w"),
- TestError {
- kind: hir::ErrorKind::UnicodePerlClassNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(2, 1, 3)
- ),
- }
- );
- }
-
- #[test]
- #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
- fn class_perl_space_disabled() {
- assert_eq!(
- t_err(r"\s"),
- TestError {
- kind: hir::ErrorKind::UnicodePerlClassNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(2, 1, 3)
- ),
- }
- );
- }
-
- #[test]
- #[cfg(all(
- not(feature = "unicode-perl"),
- not(feature = "unicode-gencat")
- ))]
- fn class_perl_digit_disabled() {
- assert_eq!(
- t_err(r"\d"),
- TestError {
- kind: hir::ErrorKind::UnicodePerlClassNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(2, 1, 3)
- ),
- }
- );
- }
-
- #[test]
- #[cfg(feature = "unicode-gencat")]
- fn class_unicode_gencat() {
- assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
- assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
- assert_eq!(
- t(r"\p{Separator}"),
- hir_uclass_query(ClassQuery::Binary("Z"))
- );
- assert_eq!(
- t(r"\p{se PaRa ToR}"),
- hir_uclass_query(ClassQuery::Binary("Z"))
- );
- assert_eq!(
- t(r"\p{gc:Separator}"),
- hir_uclass_query(ClassQuery::Binary("Z"))
- );
- assert_eq!(
- t(r"\p{gc=Separator}"),
- hir_uclass_query(ClassQuery::Binary("Z"))
- );
- assert_eq!(
- t(r"\p{Other}"),
- hir_uclass_query(ClassQuery::Binary("Other"))
- );
- assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
-
- assert_eq!(
- t(r"\PZ"),
- hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
- );
- assert_eq!(
- t(r"\P{separator}"),
- hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
- );
- assert_eq!(
- t(r"\P{gc!=separator}"),
- hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
- );
-
- assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
- assert_eq!(
- t(r"\p{assigned}"),
- hir_uclass_query(ClassQuery::Binary("Assigned"))
- );
- assert_eq!(
- t(r"\p{ascii}"),
- hir_uclass_query(ClassQuery::Binary("ASCII"))
- );
- assert_eq!(
- t(r"\p{gc:any}"),
- hir_uclass_query(ClassQuery::Binary("Any"))
- );
- assert_eq!(
- t(r"\p{gc:assigned}"),
- hir_uclass_query(ClassQuery::Binary("Assigned"))
- );
- assert_eq!(
- t(r"\p{gc:ascii}"),
- hir_uclass_query(ClassQuery::Binary("ASCII"))
- );
-
- assert_eq!(
- t_err(r"(?-u)\pZ"),
- TestError {
- kind: hir::ErrorKind::UnicodeNotAllowed,
- span: Span::new(
- Position::new(5, 1, 6),
- Position::new(8, 1, 9)
- ),
- }
- );
- assert_eq!(
- t_err(r"(?-u)\p{Separator}"),
- TestError {
- kind: hir::ErrorKind::UnicodeNotAllowed,
- span: Span::new(
- Position::new(5, 1, 6),
- Position::new(18, 1, 19)
- ),
- }
- );
- assert_eq!(
- t_err(r"\pE"),
- TestError {
- kind: hir::ErrorKind::UnicodePropertyNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(3, 1, 4)
- ),
- }
- );
- assert_eq!(
- t_err(r"\p{Foo}"),
- TestError {
- kind: hir::ErrorKind::UnicodePropertyNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(7, 1, 8)
- ),
- }
- );
- assert_eq!(
- t_err(r"\p{gc:Foo}"),
- TestError {
- kind: hir::ErrorKind::UnicodePropertyValueNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(10, 1, 11)
- ),
- }
- );
- }
-
- #[test]
- #[cfg(not(feature = "unicode-gencat"))]
- fn class_unicode_gencat_disabled() {
- assert_eq!(
- t_err(r"\p{Separator}"),
- TestError {
- kind: hir::ErrorKind::UnicodePropertyNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(13, 1, 14)
- ),
- }
- );
-
- assert_eq!(
- t_err(r"\p{Any}"),
- TestError {
- kind: hir::ErrorKind::UnicodePropertyNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(7, 1, 8)
- ),
- }
- );
- }
-
- #[test]
- #[cfg(feature = "unicode-script")]
- fn class_unicode_script() {
- assert_eq!(
- t(r"\p{Greek}"),
- hir_uclass_query(ClassQuery::Binary("Greek"))
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t(r"(?i)\p{Greek}"),
- hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t(r"(?i)\P{Greek}"),
- hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
- "Greek"
- ))))
- );
-
- assert_eq!(
- t_err(r"\p{sc:Foo}"),
- TestError {
- kind: hir::ErrorKind::UnicodePropertyValueNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(10, 1, 11)
- ),
- }
- );
- assert_eq!(
- t_err(r"\p{scx:Foo}"),
- TestError {
- kind: hir::ErrorKind::UnicodePropertyValueNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(11, 1, 12)
- ),
- }
- );
- }
-
- #[test]
- #[cfg(not(feature = "unicode-script"))]
- fn class_unicode_script_disabled() {
- assert_eq!(
- t_err(r"\p{Greek}"),
- TestError {
- kind: hir::ErrorKind::UnicodePropertyNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(9, 1, 10)
- ),
- }
- );
-
- assert_eq!(
- t_err(r"\p{scx:Greek}"),
- TestError {
- kind: hir::ErrorKind::UnicodePropertyNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(13, 1, 14)
- ),
- }
- );
- }
-
- #[test]
- #[cfg(feature = "unicode-age")]
- fn class_unicode_age() {
- assert_eq!(
- t_err(r"\p{age:Foo}"),
- TestError {
- kind: hir::ErrorKind::UnicodePropertyValueNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(11, 1, 12)
- ),
- }
- );
- }
-
- #[test]
- #[cfg(feature = "unicode-gencat")]
- fn class_unicode_any_empty() {
- assert_eq!(t(r"\P{any}"), hir_uclass(&[]),);
- }
-
- #[test]
- #[cfg(not(feature = "unicode-age"))]
- fn class_unicode_age_disabled() {
- assert_eq!(
- t_err(r"\p{age:3.0}"),
- TestError {
- kind: hir::ErrorKind::UnicodePropertyNotFound,
- span: Span::new(
- Position::new(0, 1, 1),
- Position::new(11, 1, 12)
- ),
- }
- );
- }
-
- #[test]
- fn class_bracketed() {
- assert_eq!(t("[a]"), hir_lit("a"));
- assert_eq!(t("[ab]"), hir_uclass(&[('a', 'b')]));
- assert_eq!(t("[^[a]]"), class_negate(uclass(&[('a', 'a')])));
- assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
- assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
- assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
- assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
- assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
- assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
- #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
- assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
- #[cfg(feature = "unicode-gencat")]
- assert_eq!(
- t(r"[\pZ]"),
- hir_uclass_query(ClassQuery::Binary("separator"))
- );
- #[cfg(feature = "unicode-gencat")]
- assert_eq!(
- t(r"[\p{separator}]"),
- hir_uclass_query(ClassQuery::Binary("separator"))
- );
- #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
- assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
- #[cfg(feature = "unicode-gencat")]
- assert_eq!(
- t(r"[^\PZ]"),
- hir_uclass_query(ClassQuery::Binary("separator"))
- );
- #[cfg(feature = "unicode-gencat")]
- assert_eq!(
- t(r"[^\P{separator}]"),
- hir_uclass_query(ClassQuery::Binary("separator"))
- );
- #[cfg(all(
- feature = "unicode-case",
- any(feature = "unicode-perl", feature = "unicode-gencat")
- ))]
- assert_eq!(
- t(r"(?i)[^\D]"),
- hir_uclass_query(ClassQuery::Binary("digit"))
- );
- #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
- assert_eq!(
- t(r"(?i)[^\P{greek}]"),
- hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
- );
-
- assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
- assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
- assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
-
- #[cfg(feature = "unicode-case")]
- assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i)[k]"),
- hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i)[β]"),
- hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
- );
- assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
-
- assert_eq!(t("[^a]"), class_negate(uclass(&[('a', 'a')])));
- assert_eq!(t(r"[^\x00]"), class_negate(uclass(&[('\0', '\0')])));
- assert_eq!(
- t_bytes("(?-u)[^a]"),
- class_negate(bclass(&[(b'a', b'a')]))
- );
- #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
- assert_eq!(
- t(r"[^\d]"),
- hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
- );
- #[cfg(feature = "unicode-gencat")]
- assert_eq!(
- t(r"[^\pZ]"),
- hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
- );
- #[cfg(feature = "unicode-gencat")]
- assert_eq!(
- t(r"[^\p{separator}]"),
- hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
- );
- #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
- assert_eq!(
- t(r"(?i)[^\p{greek}]"),
- hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
- "greek"
- ))))
- );
- #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
- assert_eq!(
- t(r"(?i)[\P{greek}]"),
- hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
- "greek"
- ))))
- );
-
- // Test some weird cases.
- assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
-
- assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
- assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
- assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
- assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
- assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
-
- assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
- assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
- assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
- assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
- assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
-
- assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
- assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
- assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
- assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
- assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
-
- assert_eq!(
- t_err("(?-u)[^a]"),
- TestError {
- kind: hir::ErrorKind::InvalidUtf8,
- span: Span::new(
- Position::new(5, 1, 6),
- Position::new(9, 1, 10)
- ),
- }
- );
- #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
- assert_eq!(t(r"[^\s\S]"), hir_uclass(&[]),);
- #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
- assert_eq!(t_bytes(r"(?-u)[^\s\S]"), hir_bclass(&[]),);
- }
-
- #[test]
- fn class_bracketed_union() {
- assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
- #[cfg(feature = "unicode-gencat")]
- assert_eq!(
- t(r"[a\pZb]"),
- hir_union(
- hir_uclass(&[('a', 'b')]),
- hir_uclass_query(ClassQuery::Binary("separator"))
- )
- );
- #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
- assert_eq!(
- t(r"[\pZ\p{Greek}]"),
- hir_union(
- hir_uclass_query(ClassQuery::Binary("greek")),
- hir_uclass_query(ClassQuery::Binary("separator"))
- )
- );
- #[cfg(all(
- feature = "unicode-age",
- feature = "unicode-gencat",
- feature = "unicode-script"
- ))]
- assert_eq!(
- t(r"[\p{age:3.0}\pZ\p{Greek}]"),
- hir_union(
- hir_uclass_query(ClassQuery::ByValue {
- property_name: "age",
- property_value: "3.0",
- }),
- hir_union(
- hir_uclass_query(ClassQuery::Binary("greek")),
- hir_uclass_query(ClassQuery::Binary("separator"))
- )
- )
- );
- #[cfg(all(
- feature = "unicode-age",
- feature = "unicode-gencat",
- feature = "unicode-script"
- ))]
- assert_eq!(
- t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
- hir_union(
- hir_uclass_query(ClassQuery::ByValue {
- property_name: "age",
- property_value: "3.0",
- }),
- hir_union(
- hir_uclass_query(ClassQuery::Binary("cyrillic")),
- hir_union(
- hir_uclass_query(ClassQuery::Binary("greek")),
- hir_uclass_query(ClassQuery::Binary("separator"))
- )
- )
- )
- );
-
- #[cfg(all(
- feature = "unicode-age",
- feature = "unicode-case",
- feature = "unicode-gencat",
- feature = "unicode-script"
- ))]
- assert_eq!(
- t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
- hir_case_fold(hir_union(
- hir_uclass_query(ClassQuery::ByValue {
- property_name: "age",
- property_value: "3.0",
- }),
- hir_union(
- hir_uclass_query(ClassQuery::Binary("greek")),
- hir_uclass_query(ClassQuery::Binary("separator"))
- )
- ))
- );
- #[cfg(all(
- feature = "unicode-age",
- feature = "unicode-gencat",
- feature = "unicode-script"
- ))]
- assert_eq!(
- t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
- hir_negate(hir_union(
- hir_uclass_query(ClassQuery::ByValue {
- property_name: "age",
- property_value: "3.0",
- }),
- hir_union(
- hir_uclass_query(ClassQuery::Binary("greek")),
- hir_uclass_query(ClassQuery::Binary("separator"))
- )
- ))
- );
- #[cfg(all(
- feature = "unicode-age",
- feature = "unicode-case",
- feature = "unicode-gencat",
- feature = "unicode-script"
- ))]
- assert_eq!(
- t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
- hir_negate(hir_case_fold(hir_union(
- hir_uclass_query(ClassQuery::ByValue {
- property_name: "age",
- property_value: "3.0",
- }),
- hir_union(
- hir_uclass_query(ClassQuery::Binary("greek")),
- hir_uclass_query(ClassQuery::Binary("separator"))
- )
- )))
- );
- }
-
- #[test]
- fn class_bracketed_nested() {
- assert_eq!(t(r"[a[^c]]"), class_negate(uclass(&[('c', 'c')])));
- assert_eq!(t(r"[a-b[^c]]"), class_negate(uclass(&[('c', 'c')])));
- assert_eq!(t(r"[a-c[^c]]"), class_negate(uclass(&[])));
-
- assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
- assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
-
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t(r"(?i)[a[^c]]"),
- hir_negate(class_case_fold(uclass(&[('c', 'c')])))
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t(r"(?i)[a-b[^c]]"),
- hir_negate(class_case_fold(uclass(&[('c', 'c')])))
- );
-
- #[cfg(feature = "unicode-case")]
- assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t(r"(?i)[^a-b[^c]]"),
- hir_uclass(&[('C', 'C'), ('c', 'c')])
- );
-
- assert_eq!(t(r"[^a-c[^c]]"), hir_uclass(&[]),);
- #[cfg(feature = "unicode-case")]
- assert_eq!(t(r"(?i)[^a-c[^c]]"), hir_uclass(&[]),);
- }
-
- #[test]
- fn class_bracketed_intersect() {
- assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
- assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
- assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
- assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
- assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
- assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
- assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
- assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
- assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
-
- assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
- assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
- assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
- assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
- assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
- assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
-
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i)[abc&&b-c]"),
- hir_case_fold(hir_uclass(&[('b', 'c')]))
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i)[abc&&[b-c]]"),
- hir_case_fold(hir_uclass(&[('b', 'c')]))
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i)[[abc]&&[b-c]]"),
- hir_case_fold(hir_uclass(&[('b', 'c')]))
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i)[a-z&&b-y&&c-x]"),
- hir_case_fold(hir_uclass(&[('c', 'x')]))
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i)[c-da-b&&a-d]"),
- hir_case_fold(hir_uclass(&[('a', 'd')]))
- );
- #[cfg(feature = "unicode-case")]
- assert_eq!(
- t("(?i)[a-d&&c-da-b]"),
- hir_case_fold(hir_uclass(&[('a', 'd')]))
- );
-
- assert_eq!(
- t("(?i-u)[abc&&b-c]"),
- hir_case_fold(hir_bclass(&[(b'b', b'c')]))
- );
- assert_eq!(
- t("(?i-u)[abc&&[b-c]]"),
- hir_case_fold(hir_bclass(&[(b'b', b'c')]))
- );
- assert_eq!(
- t("(?i-u)[[abc]&&[b-c]]"),
- hir_case_fold(hir_bclass(&[(b'b', b'c')]))
- );
- assert_eq!(
- t("(?i-u)[a-z&&b-y&&c-x]"),
- hir_case_fold(hir_bclass(&[(b'c', b'x')]))
- );
- assert_eq!(
- t("(?i-u)[c-da-b&&a-d]"),
- hir_case_fold(hir_bclass(&[(b'a', b'd')]))
- );
- assert_eq!(
- t("(?i-u)[a-d&&c-da-b]"),
- hir_case_fold(hir_bclass(&[(b'a', b'd')]))
- );
-
- // In `[a^]`, `^` does not need to be escaped, so it makes sense that
- // `^` is also allowed to be unescaped after `&&`.
- assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
- // `]` needs to be escaped after `&&` since it's not at start of class.
- assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
- assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
- assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
- assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
- // Test precedence.
- assert_eq!(
- t(r"[a-w&&[^c-g]z]"),
- hir_uclass(&[('a', 'b'), ('h', 'w')])
- );
- }
-
- #[test]
- fn class_bracketed_intersect_negate() {
- #[cfg(feature = "unicode-perl")]
- assert_eq!(
- t(r"[^\w&&\d]"),
- hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
- );
- assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
- #[cfg(feature = "unicode-perl")]
- assert_eq!(
- t(r"[^[\w&&\d]]"),
- hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
- );
- #[cfg(feature = "unicode-perl")]
- assert_eq!(
- t(r"[^[^\w&&\d]]"),
- hir_uclass_query(ClassQuery::Binary("digit"))
- );
- #[cfg(feature = "unicode-perl")]
- assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
-
- #[cfg(feature = "unicode-perl")]
- assert_eq!(
- t_bytes(r"(?-u)[^\w&&\d]"),
- hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
- );
- assert_eq!(
- t_bytes(r"(?-u)[^[a-z&&a-c]]"),
- hir_negate(hir_bclass(&[(b'a', b'c')]))
- );
- assert_eq!(
- t_bytes(r"(?-u)[^[\w&&\d]]"),
- hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
- );
- assert_eq!(
- t_bytes(r"(?-u)[^[^\w&&\d]]"),
- hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
- );
- assert_eq!(
- t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
- hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
- );
- }
-
- #[test]
- fn class_bracketed_difference() {
- #[cfg(feature = "unicode-gencat")]
- assert_eq!(
- t(r"[\pL--[:ascii:]]"),
- hir_difference(
- hir_uclass_query(ClassQuery::Binary("letter")),
- hir_uclass(&[('\0', '\x7F')])
- )
- );
-
- assert_eq!(
- t(r"(?-u)[[:alpha:]--[:lower:]]"),
- hir_bclass(&[(b'A', b'Z')])
- );
- }
-
- #[test]
- fn class_bracketed_symmetric_difference() {
- #[cfg(feature = "unicode-script")]
- assert_eq!(
- t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
- // Class({
- // '·'..='·',
- // '\u{300}'..='\u{301}',
- // '\u{304}'..='\u{304}',
- // '\u{306}'..='\u{306}',
- // '\u{308}'..='\u{308}',
- // '\u{313}'..='\u{313}',
- // '\u{342}'..='\u{342}',
- // '\u{345}'..='\u{345}',
- // 'ʹ'..='ʹ',
- // '\u{1dc0}'..='\u{1dc1}',
- // '⁝'..='⁝',
- // })
- hir_uclass(&[
- ('·', '·'),
- ('\u{0300}', '\u{0301}'),
- ('\u{0304}', '\u{0304}'),
- ('\u{0306}', '\u{0306}'),
- ('\u{0308}', '\u{0308}'),
- ('\u{0313}', '\u{0313}'),
- ('\u{0342}', '\u{0342}'),
- ('\u{0345}', '\u{0345}'),
- ('ʹ', 'ʹ'),
- ('\u{1DC0}', '\u{1DC1}'),
- ('⁝', '⁝'),
- ])
- );
- assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
-
- assert_eq!(
- t(r"(?-u)[a-g~~c-j]"),
- hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
- );
- }
-
- #[test]
- fn ignore_whitespace() {
- assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
- assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
- assert_eq!(
- t(r"(?x)\x # comment
-{ # comment
- 53 # comment
-} #comment"),
- hir_lit("S")
- );
-
- assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
- assert_eq!(
- t(r"(?x)\x # comment
- 53 # comment"),
- hir_lit("S")
- );
- assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
-
- #[cfg(feature = "unicode-gencat")]
- assert_eq!(
- t(r"(?x)\p # comment
-{ # comment
- Separator # comment
-} # comment"),
- hir_uclass_query(ClassQuery::Binary("separator"))
- );
-
- assert_eq!(
- t(r"(?x)a # comment
-{ # comment
- 5 # comment
- , # comment
- 10 # comment
-} # comment"),
- hir_range(true, 5, Some(10), hir_lit("a"))
- );
-
- assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a "));
- }
-
- #[test]
- fn analysis_is_utf8() {
- // Positive examples.
- assert!(props_bytes(r"a").is_utf8());
- assert!(props_bytes(r"ab").is_utf8());
- assert!(props_bytes(r"(?-u)a").is_utf8());
- assert!(props_bytes(r"(?-u)ab").is_utf8());
- assert!(props_bytes(r"\xFF").is_utf8());
- assert!(props_bytes(r"\xFF\xFF").is_utf8());
- assert!(props_bytes(r"[^a]").is_utf8());
- assert!(props_bytes(r"[^a][^a]").is_utf8());
- assert!(props_bytes(r"\b").is_utf8());
- assert!(props_bytes(r"\B").is_utf8());
- assert!(props_bytes(r"(?-u)\b").is_utf8());
- assert!(props_bytes(r"(?-u)\B").is_utf8());
-
- // Negative examples.
- assert!(!props_bytes(r"(?-u)\xFF").is_utf8());
- assert!(!props_bytes(r"(?-u)\xFF\xFF").is_utf8());
- assert!(!props_bytes(r"(?-u)[^a]").is_utf8());
- assert!(!props_bytes(r"(?-u)[^a][^a]").is_utf8());
- }
-
- #[test]
- fn analysis_captures_len() {
- assert_eq!(0, props(r"a").explicit_captures_len());
- assert_eq!(0, props(r"(?:a)").explicit_captures_len());
- assert_eq!(0, props(r"(?i-u:a)").explicit_captures_len());
- assert_eq!(0, props(r"(?i-u)a").explicit_captures_len());
- assert_eq!(1, props(r"(a)").explicit_captures_len());
- assert_eq!(1, props(r"(?P<foo>a)").explicit_captures_len());
- assert_eq!(1, props(r"()").explicit_captures_len());
- assert_eq!(1, props(r"()a").explicit_captures_len());
- assert_eq!(1, props(r"(a)+").explicit_captures_len());
- assert_eq!(2, props(r"(a)(b)").explicit_captures_len());
- assert_eq!(2, props(r"(a)|(b)").explicit_captures_len());
- assert_eq!(2, props(r"((a))").explicit_captures_len());
- assert_eq!(1, props(r"([a&&b])").explicit_captures_len());
- }
-
- #[test]
- fn analysis_static_captures_len() {
- let len = |pattern| props(pattern).static_explicit_captures_len();
- assert_eq!(Some(0), len(r""));
- assert_eq!(Some(0), len(r"foo|bar"));
- assert_eq!(None, len(r"(foo)|bar"));
- assert_eq!(None, len(r"foo|(bar)"));
- assert_eq!(Some(1), len(r"(foo|bar)"));
- assert_eq!(Some(1), len(r"(a|b|c|d|e|f)"));
- assert_eq!(Some(1), len(r"(a)|(b)|(c)|(d)|(e)|(f)"));
- assert_eq!(Some(2), len(r"(a)(b)|(c)(d)|(e)(f)"));
- assert_eq!(Some(6), len(r"(a)(b)(c)(d)(e)(f)"));
- assert_eq!(Some(3), len(r"(a)(b)(extra)|(a)(b)()"));
- assert_eq!(Some(3), len(r"(a)(b)((?:extra)?)"));
- assert_eq!(None, len(r"(a)(b)(extra)?"));
- assert_eq!(Some(1), len(r"(foo)|(bar)"));
- assert_eq!(Some(2), len(r"(foo)(bar)"));
- assert_eq!(Some(2), len(r"(foo)+(bar)"));
- assert_eq!(None, len(r"(foo)*(bar)"));
- assert_eq!(Some(0), len(r"(foo)?{0}"));
- assert_eq!(None, len(r"(foo)?{1}"));
- assert_eq!(Some(1), len(r"(foo){1}"));
- assert_eq!(Some(1), len(r"(foo){1,}"));
- assert_eq!(Some(1), len(r"(foo){1,}?"));
- assert_eq!(None, len(r"(foo){1,}??"));
- assert_eq!(None, len(r"(foo){0,}"));
- assert_eq!(Some(1), len(r"(foo)(?:bar)"));
- assert_eq!(Some(2), len(r"(foo(?:bar)+)(?:baz(boo))"));
- assert_eq!(Some(2), len(r"(?P<bar>foo)(?:bar)(bal|loon)"));
- assert_eq!(
- Some(2),
- len(r#"<(a)[^>]+href="([^"]+)"|<(img)[^>]+src="([^"]+)""#)
- );
- }
-
- #[test]
- fn analysis_is_all_assertions() {
- // Positive examples.
- let p = props(r"\b");
- assert!(!p.look_set().is_empty());
- assert_eq!(p.minimum_len(), Some(0));
-
- let p = props(r"\B");
- assert!(!p.look_set().is_empty());
- assert_eq!(p.minimum_len(), Some(0));
-
- let p = props(r"^");
- assert!(!p.look_set().is_empty());
- assert_eq!(p.minimum_len(), Some(0));
-
- let p = props(r"$");
- assert!(!p.look_set().is_empty());
- assert_eq!(p.minimum_len(), Some(0));
-
- let p = props(r"\A");
- assert!(!p.look_set().is_empty());
- assert_eq!(p.minimum_len(), Some(0));
-
- let p = props(r"\z");
- assert!(!p.look_set().is_empty());
- assert_eq!(p.minimum_len(), Some(0));
-
- let p = props(r"$^\z\A\b\B");
- assert!(!p.look_set().is_empty());
- assert_eq!(p.minimum_len(), Some(0));
-
- let p = props(r"$|^|\z|\A|\b|\B");
- assert!(!p.look_set().is_empty());
- assert_eq!(p.minimum_len(), Some(0));
-
- let p = props(r"^$|$^");
- assert!(!p.look_set().is_empty());
- assert_eq!(p.minimum_len(), Some(0));
-
- let p = props(r"((\b)+())*^");
- assert!(!p.look_set().is_empty());
- assert_eq!(p.minimum_len(), Some(0));
-
- // Negative examples.
- let p = props(r"^a");
- assert!(!p.look_set().is_empty());
- assert_eq!(p.minimum_len(), Some(1));
- }
-
- #[test]
- fn analysis_look_set_prefix_any() {
- let p = props(r"(?-u)(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))");
- assert!(p.look_set_prefix_any().contains(Look::WordAscii));
- }
-
- #[test]
- fn analysis_is_anchored() {
- let is_start = |p| props(p).look_set_prefix().contains(Look::Start);
- let is_end = |p| props(p).look_set_suffix().contains(Look::End);
-
- // Positive examples.
- assert!(is_start(r"^"));
- assert!(is_end(r"$"));
-
- assert!(is_start(r"^^"));
- assert!(props(r"$$").look_set_suffix().contains(Look::End));
-
- assert!(is_start(r"^$"));
- assert!(is_end(r"^$"));
-
- assert!(is_start(r"^foo"));
- assert!(is_end(r"foo$"));
-
- assert!(is_start(r"^foo|^bar"));
- assert!(is_end(r"foo$|bar$"));
-
- assert!(is_start(r"^(foo|bar)"));
- assert!(is_end(r"(foo|bar)$"));
-
- assert!(is_start(r"^+"));
- assert!(is_end(r"$+"));
- assert!(is_start(r"^++"));
- assert!(is_end(r"$++"));
- assert!(is_start(r"(^)+"));
- assert!(is_end(r"($)+"));
-
- assert!(is_start(r"$^"));
- assert!(is_start(r"$^"));
- assert!(is_start(r"$^|^$"));
- assert!(is_end(r"$^|^$"));
-
- assert!(is_start(r"\b^"));
- assert!(is_end(r"$\b"));
- assert!(is_start(r"^(?m:^)"));
- assert!(is_end(r"(?m:$)$"));
- assert!(is_start(r"(?m:^)^"));
- assert!(is_end(r"$(?m:$)"));
-
- // Negative examples.
- assert!(!is_start(r"(?m)^"));
- assert!(!is_end(r"(?m)$"));
- assert!(!is_start(r"(?m:^$)|$^"));
- assert!(!is_end(r"(?m:^$)|$^"));
- assert!(!is_start(r"$^|(?m:^$)"));
- assert!(!is_end(r"$^|(?m:^$)"));
-
- assert!(!is_start(r"a^"));
- assert!(!is_start(r"$a"));
-
- assert!(!is_end(r"a^"));
- assert!(!is_end(r"$a"));
-
- assert!(!is_start(r"^foo|bar"));
- assert!(!is_end(r"foo|bar$"));
-
- assert!(!is_start(r"^*"));
- assert!(!is_end(r"$*"));
- assert!(!is_start(r"^*+"));
- assert!(!is_end(r"$*+"));
- assert!(!is_start(r"^+*"));
- assert!(!is_end(r"$+*"));
- assert!(!is_start(r"(^)*"));
- assert!(!is_end(r"($)*"));
- }
-
- #[test]
- fn analysis_is_any_anchored() {
- let is_start = |p| props(p).look_set().contains(Look::Start);
- let is_end = |p| props(p).look_set().contains(Look::End);
-
- // Positive examples.
- assert!(is_start(r"^"));
- assert!(is_end(r"$"));
- assert!(is_start(r"\A"));
- assert!(is_end(r"\z"));
-
- // Negative examples.
- assert!(!is_start(r"(?m)^"));
- assert!(!is_end(r"(?m)$"));
- assert!(!is_start(r"$"));
- assert!(!is_end(r"^"));
- }
-
- #[test]
- fn analysis_can_empty() {
- // Positive examples.
- let assert_empty =
- |p| assert_eq!(Some(0), props_bytes(p).minimum_len());
- assert_empty(r"");
- assert_empty(r"()");
- assert_empty(r"()*");
- assert_empty(r"()+");
- assert_empty(r"()?");
- assert_empty(r"a*");
- assert_empty(r"a?");
- assert_empty(r"a{0}");
- assert_empty(r"a{0,}");
- assert_empty(r"a{0,1}");
- assert_empty(r"a{0,10}");
- #[cfg(feature = "unicode-gencat")]
- assert_empty(r"\pL*");
- assert_empty(r"a*|b");
- assert_empty(r"b|a*");
- assert_empty(r"a|");
- assert_empty(r"|a");
- assert_empty(r"a||b");
- assert_empty(r"a*a?(abcd)*");
- assert_empty(r"^");
- assert_empty(r"$");
- assert_empty(r"(?m)^");
- assert_empty(r"(?m)$");
- assert_empty(r"\A");
- assert_empty(r"\z");
- assert_empty(r"\B");
- assert_empty(r"(?-u)\B");
- assert_empty(r"\b");
- assert_empty(r"(?-u)\b");
-
- // Negative examples.
- let assert_non_empty =
- |p| assert_ne!(Some(0), props_bytes(p).minimum_len());
- assert_non_empty(r"a+");
- assert_non_empty(r"a{1}");
- assert_non_empty(r"a{1,}");
- assert_non_empty(r"a{1,2}");
- assert_non_empty(r"a{1,10}");
- assert_non_empty(r"b|a");
- assert_non_empty(r"a*a+(abcd)*");
- #[cfg(feature = "unicode-gencat")]
- assert_non_empty(r"\P{any}");
- assert_non_empty(r"[a--a]");
- assert_non_empty(r"[a&&b]");
- }
-
- #[test]
- fn analysis_is_literal() {
- // Positive examples.
- assert!(props(r"a").is_literal());
- assert!(props(r"ab").is_literal());
- assert!(props(r"abc").is_literal());
- assert!(props(r"(?m)abc").is_literal());
- assert!(props(r"(?:a)").is_literal());
- assert!(props(r"foo(?:a)").is_literal());
- assert!(props(r"(?:a)foo").is_literal());
- assert!(props(r"[a]").is_literal());
-
- // Negative examples.
- assert!(!props(r"").is_literal());
- assert!(!props(r"^").is_literal());
- assert!(!props(r"a|b").is_literal());
- assert!(!props(r"(a)").is_literal());
- assert!(!props(r"a+").is_literal());
- assert!(!props(r"foo(a)").is_literal());
- assert!(!props(r"(a)foo").is_literal());
- assert!(!props(r"[ab]").is_literal());
- }
-
- #[test]
- fn analysis_is_alternation_literal() {
- // Positive examples.
- assert!(props(r"a").is_alternation_literal());
- assert!(props(r"ab").is_alternation_literal());
- assert!(props(r"abc").is_alternation_literal());
- assert!(props(r"(?m)abc").is_alternation_literal());
- assert!(props(r"foo|bar").is_alternation_literal());
- assert!(props(r"foo|bar|baz").is_alternation_literal());
- assert!(props(r"[a]").is_alternation_literal());
- assert!(props(r"(?:ab)|cd").is_alternation_literal());
- assert!(props(r"ab|(?:cd)").is_alternation_literal());
-
- // Negative examples.
- assert!(!props(r"").is_alternation_literal());
- assert!(!props(r"^").is_alternation_literal());
- assert!(!props(r"(a)").is_alternation_literal());
- assert!(!props(r"a+").is_alternation_literal());
- assert!(!props(r"foo(a)").is_alternation_literal());
- assert!(!props(r"(a)foo").is_alternation_literal());
- assert!(!props(r"[ab]").is_alternation_literal());
- assert!(!props(r"[ab]|b").is_alternation_literal());
- assert!(!props(r"a|[ab]").is_alternation_literal());
- assert!(!props(r"(a)|b").is_alternation_literal());
- assert!(!props(r"a|(b)").is_alternation_literal());
- assert!(!props(r"a|b").is_alternation_literal());
- assert!(!props(r"a|b|c").is_alternation_literal());
- assert!(!props(r"[a]|b").is_alternation_literal());
- assert!(!props(r"a|[b]").is_alternation_literal());
- assert!(!props(r"(?:a)|b").is_alternation_literal());
- assert!(!props(r"a|(?:b)").is_alternation_literal());
- assert!(!props(r"(?:z|xx)@|xx").is_alternation_literal());
- }
-
- // This tests that the smart Hir::repetition constructors does some basic
- // simplifications.
- #[test]
- fn smart_repetition() {
- assert_eq!(t(r"a{0}"), Hir::empty());
- assert_eq!(t(r"a{1}"), hir_lit("a"));
- assert_eq!(t(r"\B{32111}"), hir_look(hir::Look::WordUnicodeNegate));
- }
-
- // This tests that the smart Hir::concat constructor simplifies the given
- // exprs in a way we expect.
- #[test]
- fn smart_concat() {
- assert_eq!(t(""), Hir::empty());
- assert_eq!(t("(?:)"), Hir::empty());
- assert_eq!(t("abc"), hir_lit("abc"));
- assert_eq!(t("(?:foo)(?:bar)"), hir_lit("foobar"));
- assert_eq!(t("quux(?:foo)(?:bar)baz"), hir_lit("quuxfoobarbaz"));
- assert_eq!(
- t("foo(?:bar^baz)quux"),
- hir_cat(vec![
- hir_lit("foobar"),
- hir_look(hir::Look::Start),
- hir_lit("bazquux"),
- ])
- );
- assert_eq!(
- t("foo(?:ba(?:r^b)az)quux"),
- hir_cat(vec![
- hir_lit("foobar"),
- hir_look(hir::Look::Start),
- hir_lit("bazquux"),
- ])
- );
- }
-
- // This tests that the smart Hir::alternation constructor simplifies the
- // given exprs in a way we expect.
- #[test]
- fn smart_alternation() {
- assert_eq!(
- t("(?:foo)|(?:bar)"),
- hir_alt(vec![hir_lit("foo"), hir_lit("bar")])
- );
- assert_eq!(
- t("quux|(?:abc|def|xyz)|baz"),
- hir_alt(vec![
- hir_lit("quux"),
- hir_lit("abc"),
- hir_lit("def"),
- hir_lit("xyz"),
- hir_lit("baz"),
- ])
- );
- assert_eq!(
- t("quux|(?:abc|(?:def|mno)|xyz)|baz"),
- hir_alt(vec![
- hir_lit("quux"),
- hir_lit("abc"),
- hir_lit("def"),
- hir_lit("mno"),
- hir_lit("xyz"),
- hir_lit("baz"),
- ])
- );
- assert_eq!(
- t("a|b|c|d|e|f|x|y|z"),
- hir_uclass(&[('a', 'f'), ('x', 'z')]),
- );
- // Tests that we lift common prefixes out of an alternation.
- assert_eq!(
- t("[A-Z]foo|[A-Z]quux"),
- hir_cat(vec![
- hir_uclass(&[('A', 'Z')]),
- hir_alt(vec![hir_lit("foo"), hir_lit("quux")]),
- ]),
- );
- assert_eq!(
- t("[A-Z][A-Z]|[A-Z]quux"),
- hir_cat(vec![
- hir_uclass(&[('A', 'Z')]),
- hir_alt(vec![hir_uclass(&[('A', 'Z')]), hir_lit("quux")]),
- ]),
- );
- assert_eq!(
- t("[A-Z][A-Z]|[A-Z][A-Z]quux"),
- hir_cat(vec![
- hir_uclass(&[('A', 'Z')]),
- hir_uclass(&[('A', 'Z')]),
- hir_alt(vec![Hir::empty(), hir_lit("quux")]),
- ]),
- );
- assert_eq!(
- t("[A-Z]foo|[A-Z]foobar"),
- hir_cat(vec![
- hir_uclass(&[('A', 'Z')]),
- hir_alt(vec![hir_lit("foo"), hir_lit("foobar")]),
- ]),
- );
- }
-
- #[test]
- fn regression_alt_empty_concat() {
- use crate::ast::{self, Ast};
-
- let span = Span::splat(Position::new(0, 0, 0));
- let ast = Ast::alternation(ast::Alternation {
- span,
- asts: vec![Ast::concat(ast::Concat { span, asts: vec![] })],
- });
-
- let mut t = Translator::new();
- assert_eq!(Ok(Hir::empty()), t.translate("", &ast));
- }
-
- #[test]
- fn regression_empty_alt() {
- use crate::ast::{self, Ast};
-
- let span = Span::splat(Position::new(0, 0, 0));
- let ast = Ast::concat(ast::Concat {
- span,
- asts: vec![Ast::alternation(ast::Alternation {
- span,
- asts: vec![],
- })],
- });
-
- let mut t = Translator::new();
- assert_eq!(Ok(Hir::fail()), t.translate("", &ast));
- }
-
- #[test]
- fn regression_singleton_alt() {
- use crate::{
- ast::{self, Ast},
- hir::Dot,
- };
-
- let span = Span::splat(Position::new(0, 0, 0));
- let ast = Ast::concat(ast::Concat {
- span,
- asts: vec![Ast::alternation(ast::Alternation {
- span,
- asts: vec![Ast::dot(span)],
- })],
- });
-
- let mut t = Translator::new();
- assert_eq!(Ok(Hir::dot(Dot::AnyCharExceptLF)), t.translate("", &ast));
- }
-
- // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63168
- #[test]
- fn regression_fuzz_match() {
- let pat = "[(\u{6} \0-\u{afdf5}] \0 ";
- let ast = ParserBuilder::new()
- .octal(false)
- .ignore_whitespace(true)
- .build()
- .parse(pat)
- .unwrap();
- let hir = TranslatorBuilder::new()
- .utf8(true)
- .case_insensitive(false)
- .multi_line(false)
- .dot_matches_new_line(false)
- .swap_greed(true)
- .unicode(true)
- .build()
- .translate(pat, &ast)
- .unwrap();
- assert_eq!(
- hir,
- Hir::concat(vec![
- hir_uclass(&[('\0', '\u{afdf5}')]),
- hir_lit("\0"),
- ])
- );
- }
-
- // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63155
- #[cfg(feature = "unicode")]
- #[test]
- fn regression_fuzz_difference1() {
- let pat = r"\W\W|\W[^\v--\W\W\P{Script_Extensions:Pau_Cin_Hau}\u10A1A1-\U{3E3E3}--~~~~--~~~~~~~~------~~~~~~--~~~~~~]*";
- let _ = t(pat); // shouldn't panic
- }
-
- // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63153
- #[test]
- fn regression_fuzz_char_decrement1() {
- let pat = "w[w[^w?\rw\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0*\0\0\u{1}\0]\0\0-*\0][^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0x\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\0\0*??\0\u{7f}{2}\u{10}??\0\0\0\0\0\0\0\0\0\u{3}\0\0\0}\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\u{1}\0]\0\u{1}\u{1}H-i]-]\0\0\0\0\u{1}\0]\0\0\0\u{1}\0]\0\0-*\0\0\0\0\u{1}9-\u{7f}]\0'|-\u{7f}]\0'|(?i-ux)[-\u{7f}]\0'\u{3}\0\0\0}\0-*\0]<D\0\0\0\0\0\0\u{1}]\0\0\0\0]\0\0-*\0]\0\0 ";
- let _ = t(pat); // shouldn't panic
- }
-}
diff --git a/vendor/regex-syntax/src/hir/visitor.rs b/vendor/regex-syntax/src/hir/visitor.rs
deleted file mode 100644
index f30f0a16..00000000
--- a/vendor/regex-syntax/src/hir/visitor.rs
+++ /dev/null
@@ -1,215 +0,0 @@
-use alloc::{vec, vec::Vec};
-
-use crate::hir::{self, Hir, HirKind};
-
-/// A trait for visiting the high-level IR (HIR) in depth first order.
-///
-/// The principle aim of this trait is to enable callers to perform case
-/// analysis on a high-level intermediate representation of a regular
-/// expression without necessarily using recursion. In particular, this permits
-/// callers to do case analysis with constant stack usage, which can be
-/// important since the size of an HIR may be proportional to end user input.
-///
-/// Typical usage of this trait involves providing an implementation and then
-/// running it using the [`visit`] function.
-pub trait Visitor {
- /// The result of visiting an HIR.
- type Output;
- /// An error that visiting an HIR might return.
- type Err;
-
- /// All implementors of `Visitor` must provide a `finish` method, which
- /// yields the result of visiting the HIR or an error.
- fn finish(self) -> Result<Self::Output, Self::Err>;
-
- /// This method is called before beginning traversal of the HIR.
- fn start(&mut self) {}
-
- /// This method is called on an `Hir` before descending into child `Hir`
- /// nodes.
- fn visit_pre(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
- Ok(())
- }
-
- /// This method is called on an `Hir` after descending all of its child
- /// `Hir` nodes.
- fn visit_post(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
- Ok(())
- }
-
- /// This method is called between child nodes of an alternation.
- fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
- Ok(())
- }
-
- /// This method is called between child nodes of a concatenation.
- fn visit_concat_in(&mut self) -> Result<(), Self::Err> {
- Ok(())
- }
-}
-
-/// Executes an implementation of `Visitor` in constant stack space.
-///
-/// This function will visit every node in the given `Hir` while calling
-/// appropriate methods provided by the [`Visitor`] trait.
-///
-/// The primary use case for this method is when one wants to perform case
-/// analysis over an `Hir` without using a stack size proportional to the depth
-/// of the `Hir`. Namely, this method will instead use constant stack space,
-/// but will use heap space proportional to the size of the `Hir`. This may be
-/// desirable in cases where the size of `Hir` is proportional to end user
-/// input.
-///
-/// If the visitor returns an error at any point, then visiting is stopped and
-/// the error is returned.
-pub fn visit<V: Visitor>(hir: &Hir, visitor: V) -> Result<V::Output, V::Err> {
- HeapVisitor::new().visit(hir, visitor)
-}
-
-/// HeapVisitor visits every item in an `Hir` recursively using constant stack
-/// size and a heap size proportional to the size of the `Hir`.
-struct HeapVisitor<'a> {
- /// A stack of `Hir` nodes. This is roughly analogous to the call stack
- /// used in a typical recursive visitor.
- stack: Vec<(&'a Hir, Frame<'a>)>,
-}
-
-/// Represents a single stack frame while performing structural induction over
-/// an `Hir`.
-enum Frame<'a> {
- /// A stack frame allocated just before descending into a repetition
- /// operator's child node.
- Repetition(&'a hir::Repetition),
- /// A stack frame allocated just before descending into a capture's child
- /// node.
- Capture(&'a hir::Capture),
- /// The stack frame used while visiting every child node of a concatenation
- /// of expressions.
- Concat {
- /// The child node we are currently visiting.
- head: &'a Hir,
- /// The remaining child nodes to visit (which may be empty).
- tail: &'a [Hir],
- },
- /// The stack frame used while visiting every child node of an alternation
- /// of expressions.
- Alternation {
- /// The child node we are currently visiting.
- head: &'a Hir,
- /// The remaining child nodes to visit (which may be empty).
- tail: &'a [Hir],
- },
-}
-
-impl<'a> HeapVisitor<'a> {
- fn new() -> HeapVisitor<'a> {
- HeapVisitor { stack: vec![] }
- }
-
- fn visit<V: Visitor>(
- &mut self,
- mut hir: &'a Hir,
- mut visitor: V,
- ) -> Result<V::Output, V::Err> {
- self.stack.clear();
-
- visitor.start();
- loop {
- visitor.visit_pre(hir)?;
- if let Some(x) = self.induct(hir) {
- let child = x.child();
- self.stack.push((hir, x));
- hir = child;
- continue;
- }
- // No induction means we have a base case, so we can post visit
- // it now.
- visitor.visit_post(hir)?;
-
- // At this point, we now try to pop our call stack until it is
- // either empty or we hit another inductive case.
- loop {
- let (post_hir, frame) = match self.stack.pop() {
- None => return visitor.finish(),
- Some((post_hir, frame)) => (post_hir, frame),
- };
- // If this is a concat/alternate, then we might have additional
- // inductive steps to process.
- if let Some(x) = self.pop(frame) {
- match x {
- Frame::Alternation { .. } => {
- visitor.visit_alternation_in()?;
- }
- Frame::Concat { .. } => {
- visitor.visit_concat_in()?;
- }
- _ => {}
- }
- hir = x.child();
- self.stack.push((post_hir, x));
- break;
- }
- // Otherwise, we've finished visiting all the child nodes for
- // this HIR, so we can post visit it now.
- visitor.visit_post(post_hir)?;
- }
- }
- }
-
- /// Build a stack frame for the given HIR if one is needed (which occurs if
- /// and only if there are child nodes in the HIR). Otherwise, return None.
- fn induct(&mut self, hir: &'a Hir) -> Option<Frame<'a>> {
- match *hir.kind() {
- HirKind::Repetition(ref x) => Some(Frame::Repetition(x)),
- HirKind::Capture(ref x) => Some(Frame::Capture(x)),
- HirKind::Concat(ref x) if x.is_empty() => None,
- HirKind::Concat(ref x) => {
- Some(Frame::Concat { head: &x[0], tail: &x[1..] })
- }
- HirKind::Alternation(ref x) if x.is_empty() => None,
- HirKind::Alternation(ref x) => {
- Some(Frame::Alternation { head: &x[0], tail: &x[1..] })
- }
- _ => None,
- }
- }
-
- /// Pops the given frame. If the frame has an additional inductive step,
- /// then return it, otherwise return `None`.
- fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
- match induct {
- Frame::Repetition(_) => None,
- Frame::Capture(_) => None,
- Frame::Concat { tail, .. } => {
- if tail.is_empty() {
- None
- } else {
- Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
- }
- }
- Frame::Alternation { tail, .. } => {
- if tail.is_empty() {
- None
- } else {
- Some(Frame::Alternation {
- head: &tail[0],
- tail: &tail[1..],
- })
- }
- }
- }
- }
-}
-
-impl<'a> Frame<'a> {
- /// Perform the next inductive step on this frame and return the next
- /// child HIR node to visit.
- fn child(&self) -> &'a Hir {
- match *self {
- Frame::Repetition(rep) => &rep.sub,
- Frame::Capture(capture) => &capture.sub,
- Frame::Concat { head, .. } => head,
- Frame::Alternation { head, .. } => head,
- }
- }
-}
diff --git a/vendor/regex-syntax/src/lib.rs b/vendor/regex-syntax/src/lib.rs
deleted file mode 100644
index 20f25db7..00000000
--- a/vendor/regex-syntax/src/lib.rs
+++ /dev/null
@@ -1,431 +0,0 @@
-/*!
-This crate provides a robust regular expression parser.
-
-This crate defines two primary types:
-
-* [`Ast`](ast::Ast) is the abstract syntax of a regular expression.
- An abstract syntax corresponds to a *structured representation* of the
- concrete syntax of a regular expression, where the concrete syntax is the
- pattern string itself (e.g., `foo(bar)+`). Given some abstract syntax, it
- can be converted back to the original concrete syntax (modulo some details,
- like whitespace). To a first approximation, the abstract syntax is complex
- and difficult to analyze.
-* [`Hir`](hir::Hir) is the high-level intermediate representation
- ("HIR" or "high-level IR" for short) of regular expression. It corresponds to
- an intermediate state of a regular expression that sits between the abstract
- syntax and the low level compiled opcodes that are eventually responsible for
- executing a regular expression search. Given some high-level IR, it is not
- possible to produce the original concrete syntax (although it is possible to
- produce an equivalent concrete syntax, but it will likely scarcely resemble
- the original pattern). To a first approximation, the high-level IR is simple
- and easy to analyze.
-
-These two types come with conversion routines:
-
-* An [`ast::parse::Parser`] converts concrete syntax (a `&str`) to an
-[`Ast`](ast::Ast).
-* A [`hir::translate::Translator`] converts an [`Ast`](ast::Ast) to a
-[`Hir`](hir::Hir).
-
-As a convenience, the above two conversion routines are combined into one via
-the top-level [`Parser`] type. This `Parser` will first convert your pattern to
-an `Ast` and then convert the `Ast` to an `Hir`. It's also exposed as top-level
-[`parse`] free function.
-
-
-# Example
-
-This example shows how to parse a pattern string into its HIR:
-
-```
-use regex_syntax::{hir::Hir, parse};
-
-let hir = parse("a|b")?;
-assert_eq!(hir, Hir::alternation(vec![
- Hir::literal("a".as_bytes()),
- Hir::literal("b".as_bytes()),
-]));
-# Ok::<(), Box<dyn std::error::Error>>(())
-```
-
-
-# Concrete syntax supported
-
-The concrete syntax is documented as part of the public API of the
-[`regex` crate](https://docs.rs/regex/%2A/regex/#syntax).
-
-
-# Input safety
-
-A key feature of this library is that it is safe to use with end user facing
-input. This plays a significant role in the internal implementation. In
-particular:
-
-1. Parsers provide a `nest_limit` option that permits callers to control how
- deeply nested a regular expression is allowed to be. This makes it possible
- to do case analysis over an `Ast` or an `Hir` using recursion without
- worrying about stack overflow.
-2. Since relying on a particular stack size is brittle, this crate goes to
- great lengths to ensure that all interactions with both the `Ast` and the
- `Hir` do not use recursion. Namely, they use constant stack space and heap
- space proportional to the size of the original pattern string (in bytes).
- This includes the type's corresponding destructors. (One exception to this
- is literal extraction, but this will eventually get fixed.)
-
-
-# Error reporting
-
-The `Display` implementations on all `Error` types exposed in this library
-provide nice human readable errors that are suitable for showing to end users
-in a monospace font.
-
-
-# Literal extraction
-
-This crate provides limited support for [literal extraction from `Hir`
-values](hir::literal). Be warned that literal extraction uses recursion, and
-therefore, stack size proportional to the size of the `Hir`.
-
-The purpose of literal extraction is to speed up searches. That is, if you
-know a regular expression must match a prefix or suffix literal, then it is
-often quicker to search for instances of that literal, and then confirm or deny
-the match using the full regular expression engine. These optimizations are
-done automatically in the `regex` crate.
-
-
-# Crate features
-
-An important feature provided by this crate is its Unicode support. This
-includes things like case folding, boolean properties, general categories,
-scripts and Unicode-aware support for the Perl classes `\w`, `\s` and `\d`.
-However, a downside of this support is that it requires bundling several
-Unicode data tables that are substantial in size.
-
-A fair number of use cases do not require full Unicode support. For this
-reason, this crate exposes a number of features to control which Unicode
-data is available.
-
-If a regular expression attempts to use a Unicode feature that is not available
-because the corresponding crate feature was disabled, then translating that
-regular expression to an `Hir` will return an error. (It is still possible
-construct an `Ast` for such a regular expression, since Unicode data is not
-used until translation to an `Hir`.) Stated differently, enabling or disabling
-any of the features below can only add or subtract from the total set of valid
-regular expressions. Enabling or disabling a feature will never modify the
-match semantics of a regular expression.
-
-The following features are available:
-
-* **std** -
- Enables support for the standard library. This feature is enabled by default.
- When disabled, only `core` and `alloc` are used. Otherwise, enabling `std`
- generally just enables `std::error::Error` trait impls for the various error
- types.
-* **unicode** -
- Enables all Unicode features. This feature is enabled by default, and will
- always cover all Unicode features, even if more are added in the future.
-* **unicode-age** -
- Provide the data for the
- [Unicode `Age` property](https://www.unicode.org/reports/tr44/tr44-24.html#Character_Age).
- This makes it possible to use classes like `\p{Age:6.0}` to refer to all
- codepoints first introduced in Unicode 6.0
-* **unicode-bool** -
- Provide the data for numerous Unicode boolean properties. The full list
- is not included here, but contains properties like `Alphabetic`, `Emoji`,
- `Lowercase`, `Math`, `Uppercase` and `White_Space`.
-* **unicode-case** -
- Provide the data for case insensitive matching using
- [Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches).
-* **unicode-gencat** -
- Provide the data for
- [Unicode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values).
- This includes, but is not limited to, `Decimal_Number`, `Letter`,
- `Math_Symbol`, `Number` and `Punctuation`.
-* **unicode-perl** -
- Provide the data for supporting the Unicode-aware Perl character classes,
- corresponding to `\w`, `\s` and `\d`. This is also necessary for using
- Unicode-aware word boundary assertions. Note that if this feature is
- disabled, the `\s` and `\d` character classes are still available if the
- `unicode-bool` and `unicode-gencat` features are enabled, respectively.
-* **unicode-script** -
- Provide the data for
- [Unicode scripts and script extensions](https://www.unicode.org/reports/tr24/).
- This includes, but is not limited to, `Arabic`, `Cyrillic`, `Hebrew`,
- `Latin` and `Thai`.
-* **unicode-segment** -
- Provide the data necessary to provide the properties used to implement the
- [Unicode text segmentation algorithms](https://www.unicode.org/reports/tr29/).
- This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and
- `\p{sb=ATerm}`.
-* **arbitrary** -
- Enabling this feature introduces a public dependency on the
- [`arbitrary`](https://crates.io/crates/arbitrary)
- crate. Namely, it implements the `Arbitrary` trait from that crate for the
- [`Ast`](crate::ast::Ast) type. This feature is disabled by default.
-*/
-
-#![no_std]
-#![forbid(unsafe_code)]
-#![deny(missing_docs, rustdoc::broken_intra_doc_links)]
-#![warn(missing_debug_implementations)]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
-
-#[cfg(any(test, feature = "std"))]
-extern crate std;
-
-extern crate alloc;
-
-pub use crate::{
- error::Error,
- parser::{parse, Parser, ParserBuilder},
- unicode::UnicodeWordError,
-};
-
-use alloc::string::String;
-
-pub mod ast;
-mod debug;
-mod either;
-mod error;
-pub mod hir;
-mod parser;
-mod rank;
-mod unicode;
-mod unicode_tables;
-pub mod utf8;
-
-/// Escapes all regular expression meta characters in `text`.
-///
-/// The string returned may be safely used as a literal in a regular
-/// expression.
-pub fn escape(text: &str) -> String {
- let mut quoted = String::new();
- escape_into(text, &mut quoted);
- quoted
-}
-
-/// Escapes all meta characters in `text` and writes the result into `buf`.
-///
-/// This will append escape characters into the given buffer. The characters
-/// that are appended are safe to use as a literal in a regular expression.
-pub fn escape_into(text: &str, buf: &mut String) {
- buf.reserve(text.len());
- for c in text.chars() {
- if is_meta_character(c) {
- buf.push('\\');
- }
- buf.push(c);
- }
-}
-
-/// Returns true if the given character has significance in a regex.
-///
-/// Generally speaking, these are the only characters which _must_ be escaped
-/// in order to match their literal meaning. For example, to match a literal
-/// `|`, one could write `\|`. Sometimes escaping isn't always necessary. For
-/// example, `-` is treated as a meta character because of its significance
-/// for writing ranges inside of character classes, but the regex `-` will
-/// match a literal `-` because `-` has no special meaning outside of character
-/// classes.
-///
-/// In order to determine whether a character may be escaped at all, the
-/// [`is_escapeable_character`] routine should be used. The difference between
-/// `is_meta_character` and `is_escapeable_character` is that the latter will
-/// return true for some characters that are _not_ meta characters. For
-/// example, `%` and `\%` both match a literal `%` in all contexts. In other
-/// words, `is_escapeable_character` includes "superfluous" escapes.
-///
-/// Note that the set of characters for which this function returns `true` or
-/// `false` is fixed and won't change in a semver compatible release. (In this
-/// case, "semver compatible release" actually refers to the `regex` crate
-/// itself, since reducing or expanding the set of meta characters would be a
-/// breaking change for not just `regex-syntax` but also `regex` itself.)
-///
-/// # Example
-///
-/// ```
-/// use regex_syntax::is_meta_character;
-///
-/// assert!(is_meta_character('?'));
-/// assert!(is_meta_character('-'));
-/// assert!(is_meta_character('&'));
-/// assert!(is_meta_character('#'));
-///
-/// assert!(!is_meta_character('%'));
-/// assert!(!is_meta_character('/'));
-/// assert!(!is_meta_character('!'));
-/// assert!(!is_meta_character('"'));
-/// assert!(!is_meta_character('e'));
-/// ```
-pub fn is_meta_character(c: char) -> bool {
- match c {
- '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{'
- | '}' | '^' | '$' | '#' | '&' | '-' | '~' => true,
- _ => false,
- }
-}
-
-/// Returns true if the given character can be escaped in a regex.
-///
-/// This returns true in all cases that `is_meta_character` returns true, but
-/// also returns true in some cases where `is_meta_character` returns false.
-/// For example, `%` is not a meta character, but it is escapeable. That is,
-/// `%` and `\%` both match a literal `%` in all contexts.
-///
-/// The purpose of this routine is to provide knowledge about what characters
-/// may be escaped. Namely, most regex engines permit "superfluous" escapes
-/// where characters without any special significance may be escaped even
-/// though there is no actual _need_ to do so.
-///
-/// This will return false for some characters. For example, `e` is not
-/// escapeable. Therefore, `\e` will either result in a parse error (which is
-/// true today), or it could backwards compatibly evolve into a new construct
-/// with its own meaning. Indeed, that is the purpose of banning _some_
-/// superfluous escapes: it provides a way to evolve the syntax in a compatible
-/// manner.
-///
-/// # Example
-///
-/// ```
-/// use regex_syntax::is_escapeable_character;
-///
-/// assert!(is_escapeable_character('?'));
-/// assert!(is_escapeable_character('-'));
-/// assert!(is_escapeable_character('&'));
-/// assert!(is_escapeable_character('#'));
-/// assert!(is_escapeable_character('%'));
-/// assert!(is_escapeable_character('/'));
-/// assert!(is_escapeable_character('!'));
-/// assert!(is_escapeable_character('"'));
-///
-/// assert!(!is_escapeable_character('e'));
-/// ```
-pub fn is_escapeable_character(c: char) -> bool {
- // Certainly escapeable if it's a meta character.
- if is_meta_character(c) {
- return true;
- }
- // Any character that isn't ASCII is definitely not escapeable. There's
- // no real need to allow things like \☃ right?
- if !c.is_ascii() {
- return false;
- }
- // Otherwise, we basically say that everything is escapeable unless it's a
- // letter or digit. Things like \3 are either octal (when enabled) or an
- // error, and we should keep it that way. Otherwise, letters are reserved
- // for adding new syntax in a backwards compatible way.
- match c {
- '0'..='9' | 'A'..='Z' | 'a'..='z' => false,
- // While not currently supported, we keep these as not escapeable to
- // give us some flexibility with respect to supporting the \< and
- // \> word boundary assertions in the future. By rejecting them as
- // escapeable, \< and \> will result in a parse error. Thus, we can
- // turn them into something else in the future without it being a
- // backwards incompatible change.
- //
- // OK, now we support \< and \>, and we need to retain them as *not*
- // escapeable here since the escape sequence is significant.
- '<' | '>' => false,
- _ => true,
- }
-}
-
-/// Returns true if and only if the given character is a Unicode word
-/// character.
-///
-/// A Unicode word character is defined by
-/// [UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties).
-/// In particular, a character
-/// is considered a word character if it is in either of the `Alphabetic` or
-/// `Join_Control` properties, or is in one of the `Decimal_Number`, `Mark`
-/// or `Connector_Punctuation` general categories.
-///
-/// # Panics
-///
-/// If the `unicode-perl` feature is not enabled, then this function
-/// panics. For this reason, it is recommended that callers use
-/// [`try_is_word_character`] instead.
-pub fn is_word_character(c: char) -> bool {
- try_is_word_character(c).expect("unicode-perl feature must be enabled")
-}
-
-/// Returns true if and only if the given character is a Unicode word
-/// character.
-///
-/// A Unicode word character is defined by
-/// [UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties).
-/// In particular, a character
-/// is considered a word character if it is in either of the `Alphabetic` or
-/// `Join_Control` properties, or is in one of the `Decimal_Number`, `Mark`
-/// or `Connector_Punctuation` general categories.
-///
-/// # Errors
-///
-/// If the `unicode-perl` feature is not enabled, then this function always
-/// returns an error.
-pub fn try_is_word_character(
- c: char,
-) -> core::result::Result<bool, UnicodeWordError> {
- unicode::is_word_character(c)
-}
-
-/// Returns true if and only if the given character is an ASCII word character.
-///
-/// An ASCII word character is defined by the following character class:
-/// `[_0-9a-zA-Z]`.
-pub fn is_word_byte(c: u8) -> bool {
- match c {
- b'_' | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' => true,
- _ => false,
- }
-}
-
-#[cfg(test)]
-mod tests {
- use alloc::string::ToString;
-
- use super::*;
-
- #[test]
- fn escape_meta() {
- assert_eq!(
- escape(r"\.+*?()|[]{}^$#&-~"),
- r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~".to_string()
- );
- }
-
- #[test]
- fn word_byte() {
- assert!(is_word_byte(b'a'));
- assert!(!is_word_byte(b'-'));
- }
-
- #[test]
- #[cfg(feature = "unicode-perl")]
- fn word_char() {
- assert!(is_word_character('a'), "ASCII");
- assert!(is_word_character('à'), "Latin-1");
- assert!(is_word_character('β'), "Greek");
- assert!(is_word_character('\u{11011}'), "Brahmi (Unicode 6.0)");
- assert!(is_word_character('\u{11611}'), "Modi (Unicode 7.0)");
- assert!(is_word_character('\u{11711}'), "Ahom (Unicode 8.0)");
- assert!(is_word_character('\u{17828}'), "Tangut (Unicode 9.0)");
- assert!(is_word_character('\u{1B1B1}'), "Nushu (Unicode 10.0)");
- assert!(is_word_character('\u{16E40}'), "Medefaidrin (Unicode 11.0)");
- assert!(!is_word_character('-'));
- assert!(!is_word_character('☃'));
- }
-
- #[test]
- #[should_panic]
- #[cfg(not(feature = "unicode-perl"))]
- fn word_char_disabled_panic() {
- assert!(is_word_character('a'));
- }
-
- #[test]
- #[cfg(not(feature = "unicode-perl"))]
- fn word_char_disabled_error() {
- assert!(try_is_word_character('a').is_err());
- }
-}
diff --git a/vendor/regex-syntax/src/parser.rs b/vendor/regex-syntax/src/parser.rs
deleted file mode 100644
index f482b846..00000000
--- a/vendor/regex-syntax/src/parser.rs
+++ /dev/null
@@ -1,254 +0,0 @@
-use crate::{ast, hir, Error};
-
-/// A convenience routine for parsing a regex using default options.
-///
-/// This is equivalent to `Parser::new().parse(pattern)`.
-///
-/// If you need to set non-default options, then use a [`ParserBuilder`].
-///
-/// This routine returns an [`Hir`](hir::Hir) value. Namely, it automatically
-/// parses the pattern as an [`Ast`](ast::Ast) and then invokes the translator
-/// to convert the `Ast` into an `Hir`. If you need access to the `Ast`, then
-/// you should use a [`ast::parse::Parser`].
-pub fn parse(pattern: &str) -> Result<hir::Hir, Error> {
- Parser::new().parse(pattern)
-}
-
-/// A builder for a regular expression parser.
-///
-/// This builder permits modifying configuration options for the parser.
-///
-/// This type combines the builder options for both the [AST
-/// `ParserBuilder`](ast::parse::ParserBuilder) and the [HIR
-/// `TranslatorBuilder`](hir::translate::TranslatorBuilder).
-#[derive(Clone, Debug, Default)]
-pub struct ParserBuilder {
- ast: ast::parse::ParserBuilder,
- hir: hir::translate::TranslatorBuilder,
-}
-
-impl ParserBuilder {
- /// Create a new parser builder with a default configuration.
- pub fn new() -> ParserBuilder {
- ParserBuilder::default()
- }
-
- /// Build a parser from this configuration with the given pattern.
- pub fn build(&self) -> Parser {
- Parser { ast: self.ast.build(), hir: self.hir.build() }
- }
-
- /// Set the nesting limit for this parser.
- ///
- /// The nesting limit controls how deep the abstract syntax tree is allowed
- /// to be. If the AST exceeds the given limit (e.g., with too many nested
- /// groups), then an error is returned by the parser.
- ///
- /// The purpose of this limit is to act as a heuristic to prevent stack
- /// overflow for consumers that do structural induction on an `Ast` using
- /// explicit recursion. While this crate never does this (instead using
- /// constant stack space and moving the call stack to the heap), other
- /// crates may.
- ///
- /// This limit is not checked until the entire Ast is parsed. Therefore,
- /// if callers want to put a limit on the amount of heap space used, then
- /// they should impose a limit on the length, in bytes, of the concrete
- /// pattern string. In particular, this is viable since this parser
- /// implementation will limit itself to heap space proportional to the
- /// length of the pattern string.
- ///
- /// Note that a nest limit of `0` will return a nest limit error for most
- /// patterns but not all. For example, a nest limit of `0` permits `a` but
- /// not `ab`, since `ab` requires a concatenation, which results in a nest
- /// depth of `1`. In general, a nest limit is not something that manifests
- /// in an obvious way in the concrete syntax, therefore, it should not be
- /// used in a granular way.
- pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
- self.ast.nest_limit(limit);
- self
- }
-
- /// Whether to support octal syntax or not.
- ///
- /// Octal syntax is a little-known way of uttering Unicode codepoints in
- /// a regular expression. For example, `a`, `\x61`, `\u0061` and
- /// `\141` are all equivalent regular expressions, where the last example
- /// shows octal syntax.
- ///
- /// While supporting octal syntax isn't in and of itself a problem, it does
- /// make good error messages harder. That is, in PCRE based regex engines,
- /// syntax like `\0` invokes a backreference, which is explicitly
- /// unsupported in Rust's regex engine. However, many users expect it to
- /// be supported. Therefore, when octal support is disabled, the error
- /// message will explicitly mention that backreferences aren't supported.
- ///
- /// Octal syntax is disabled by default.
- pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
- self.ast.octal(yes);
- self
- }
-
- /// When disabled, translation will permit the construction of a regular
- /// expression that may match invalid UTF-8.
- ///
- /// When enabled (the default), the translator is guaranteed to produce an
- /// expression that, for non-empty matches, will only ever produce spans
- /// that are entirely valid UTF-8 (otherwise, the translator will return an
- /// error).
- ///
- /// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even
- /// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete
- /// syntax) will be allowed even though they can produce matches that split
- /// a UTF-8 encoded codepoint. This only applies to zero-width or "empty"
- /// matches, and it is expected that the regex engine itself must handle
- /// these cases if necessary (perhaps by suppressing any zero-width matches
- /// that split a codepoint).
- pub fn utf8(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.utf8(yes);
- self
- }
-
- /// Enable verbose mode in the regular expression.
- ///
- /// When enabled, verbose mode permits insignificant whitespace in many
- /// places in the regular expression, as well as comments. Comments are
- /// started using `#` and continue until the end of the line.
- ///
- /// By default, this is disabled. It may be selectively enabled in the
- /// regular expression by using the `x` flag regardless of this setting.
- pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
- self.ast.ignore_whitespace(yes);
- self
- }
-
- /// Enable or disable the case insensitive flag by default.
- ///
- /// By default this is disabled. It may alternatively be selectively
- /// enabled in the regular expression itself via the `i` flag.
- pub fn case_insensitive(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.case_insensitive(yes);
- self
- }
-
- /// Enable or disable the multi-line matching flag by default.
- ///
- /// By default this is disabled. It may alternatively be selectively
- /// enabled in the regular expression itself via the `m` flag.
- pub fn multi_line(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.multi_line(yes);
- self
- }
-
- /// Enable or disable the "dot matches any character" flag by default.
- ///
- /// By default this is disabled. It may alternatively be selectively
- /// enabled in the regular expression itself via the `s` flag.
- pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.dot_matches_new_line(yes);
- self
- }
-
- /// Enable or disable the CRLF mode flag by default.
- ///
- /// By default this is disabled. It may alternatively be selectively
- /// enabled in the regular expression itself via the `R` flag.
- ///
- /// When CRLF mode is enabled, the following happens:
- ///
- /// * Unless `dot_matches_new_line` is enabled, `.` will match any character
- /// except for `\r` and `\n`.
- /// * When `multi_line` mode is enabled, `^` and `$` will treat `\r\n`,
- /// `\r` and `\n` as line terminators. And in particular, neither will
- /// match between a `\r` and a `\n`.
- pub fn crlf(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.crlf(yes);
- self
- }
-
- /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`.
- ///
- /// Namely, instead of `.` (by default) matching everything except for `\n`,
- /// this will cause `.` to match everything except for the byte given.
- ///
- /// If `.` is used in a context where Unicode mode is enabled and this byte
- /// isn't ASCII, then an error will be returned. When Unicode mode is
- /// disabled, then any byte is permitted, but will return an error if UTF-8
- /// mode is enabled and it is a non-ASCII byte.
- ///
- /// In short, any ASCII value for a line terminator is always okay. But a
- /// non-ASCII byte might result in an error depending on whether Unicode
- /// mode or UTF-8 mode are enabled.
- ///
- /// Note that if `R` mode is enabled then it always takes precedence and
- /// the line terminator will be treated as `\r` and `\n` simultaneously.
- ///
- /// Note also that this *doesn't* impact the look-around assertions
- /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional
- /// configuration in the regex engine itself.
- pub fn line_terminator(&mut self, byte: u8) -> &mut ParserBuilder {
- self.hir.line_terminator(byte);
- self
- }
-
- /// Enable or disable the "swap greed" flag by default.
- ///
- /// By default this is disabled. It may alternatively be selectively
- /// enabled in the regular expression itself via the `U` flag.
- pub fn swap_greed(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.swap_greed(yes);
- self
- }
-
- /// Enable or disable the Unicode flag (`u`) by default.
- ///
- /// By default this is **enabled**. It may alternatively be selectively
- /// disabled in the regular expression itself via the `u` flag.
- ///
- /// Note that unless `utf8` is disabled (it's enabled by default), a
- /// regular expression will fail to parse if Unicode mode is disabled and a
- /// sub-expression could possibly match invalid UTF-8.
- pub fn unicode(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.unicode(yes);
- self
- }
-}
-
-/// A convenience parser for regular expressions.
-///
-/// This parser takes as input a regular expression pattern string (the
-/// "concrete syntax") and returns a high-level intermediate representation
-/// (the HIR) suitable for most types of analysis. In particular, this parser
-/// hides the intermediate state of producing an AST (the "abstract syntax").
-/// The AST is itself far more complex than the HIR, so this parser serves as a
-/// convenience for never having to deal with it at all.
-///
-/// If callers have more fine grained use cases that need an AST, then please
-/// see the [`ast::parse`] module.
-///
-/// A `Parser` can be configured in more detail via a [`ParserBuilder`].
-#[derive(Clone, Debug)]
-pub struct Parser {
- ast: ast::parse::Parser,
- hir: hir::translate::Translator,
-}
-
-impl Parser {
- /// Create a new parser with a default configuration.
- ///
- /// The parser can be run with `parse` method. The parse method returns
- /// a high level intermediate representation of the given regular
- /// expression.
- ///
- /// To set configuration options on the parser, use [`ParserBuilder`].
- pub fn new() -> Parser {
- ParserBuilder::new().build()
- }
-
- /// Parse the regular expression into a high level intermediate
- /// representation.
- pub fn parse(&mut self, pattern: &str) -> Result<hir::Hir, Error> {
- let ast = self.ast.parse(pattern)?;
- let hir = self.hir.translate(pattern, &ast)?;
- Ok(hir)
- }
-}
diff --git a/vendor/regex-syntax/src/rank.rs b/vendor/regex-syntax/src/rank.rs
deleted file mode 100644
index ccb25a20..00000000
--- a/vendor/regex-syntax/src/rank.rs
+++ /dev/null
@@ -1,258 +0,0 @@
-pub(crate) const BYTE_FREQUENCIES: [u8; 256] = [
- 55, // '\x00'
- 52, // '\x01'
- 51, // '\x02'
- 50, // '\x03'
- 49, // '\x04'
- 48, // '\x05'
- 47, // '\x06'
- 46, // '\x07'
- 45, // '\x08'
- 103, // '\t'
- 242, // '\n'
- 66, // '\x0b'
- 67, // '\x0c'
- 229, // '\r'
- 44, // '\x0e'
- 43, // '\x0f'
- 42, // '\x10'
- 41, // '\x11'
- 40, // '\x12'
- 39, // '\x13'
- 38, // '\x14'
- 37, // '\x15'
- 36, // '\x16'
- 35, // '\x17'
- 34, // '\x18'
- 33, // '\x19'
- 56, // '\x1a'
- 32, // '\x1b'
- 31, // '\x1c'
- 30, // '\x1d'
- 29, // '\x1e'
- 28, // '\x1f'
- 255, // ' '
- 148, // '!'
- 164, // '"'
- 149, // '#'
- 136, // '$'
- 160, // '%'
- 155, // '&'
- 173, // "'"
- 221, // '('
- 222, // ')'
- 134, // '*'
- 122, // '+'
- 232, // ','
- 202, // '-'
- 215, // '.'
- 224, // '/'
- 208, // '0'
- 220, // '1'
- 204, // '2'
- 187, // '3'
- 183, // '4'
- 179, // '5'
- 177, // '6'
- 168, // '7'
- 178, // '8'
- 200, // '9'
- 226, // ':'
- 195, // ';'
- 154, // '<'
- 184, // '='
- 174, // '>'
- 126, // '?'
- 120, // '@'
- 191, // 'A'
- 157, // 'B'
- 194, // 'C'
- 170, // 'D'
- 189, // 'E'
- 162, // 'F'
- 161, // 'G'
- 150, // 'H'
- 193, // 'I'
- 142, // 'J'
- 137, // 'K'
- 171, // 'L'
- 176, // 'M'
- 185, // 'N'
- 167, // 'O'
- 186, // 'P'
- 112, // 'Q'
- 175, // 'R'
- 192, // 'S'
- 188, // 'T'
- 156, // 'U'
- 140, // 'V'
- 143, // 'W'
- 123, // 'X'
- 133, // 'Y'
- 128, // 'Z'
- 147, // '['
- 138, // '\\'
- 146, // ']'
- 114, // '^'
- 223, // '_'
- 151, // '`'
- 249, // 'a'
- 216, // 'b'
- 238, // 'c'
- 236, // 'd'
- 253, // 'e'
- 227, // 'f'
- 218, // 'g'
- 230, // 'h'
- 247, // 'i'
- 135, // 'j'
- 180, // 'k'
- 241, // 'l'
- 233, // 'm'
- 246, // 'n'
- 244, // 'o'
- 231, // 'p'
- 139, // 'q'
- 245, // 'r'
- 243, // 's'
- 251, // 't'
- 235, // 'u'
- 201, // 'v'
- 196, // 'w'
- 240, // 'x'
- 214, // 'y'
- 152, // 'z'
- 182, // '{'
- 205, // '|'
- 181, // '}'
- 127, // '~'
- 27, // '\x7f'
- 212, // '\x80'
- 211, // '\x81'
- 210, // '\x82'
- 213, // '\x83'
- 228, // '\x84'
- 197, // '\x85'
- 169, // '\x86'
- 159, // '\x87'
- 131, // '\x88'
- 172, // '\x89'
- 105, // '\x8a'
- 80, // '\x8b'
- 98, // '\x8c'
- 96, // '\x8d'
- 97, // '\x8e'
- 81, // '\x8f'
- 207, // '\x90'
- 145, // '\x91'
- 116, // '\x92'
- 115, // '\x93'
- 144, // '\x94'
- 130, // '\x95'
- 153, // '\x96'
- 121, // '\x97'
- 107, // '\x98'
- 132, // '\x99'
- 109, // '\x9a'
- 110, // '\x9b'
- 124, // '\x9c'
- 111, // '\x9d'
- 82, // '\x9e'
- 108, // '\x9f'
- 118, // '\xa0'
- 141, // '¡'
- 113, // '¢'
- 129, // '£'
- 119, // '¤'
- 125, // '¥'
- 165, // '¦'
- 117, // '§'
- 92, // '¨'
- 106, // '©'
- 83, // 'ª'
- 72, // '«'
- 99, // '¬'
- 93, // '\xad'
- 65, // '®'
- 79, // '¯'
- 166, // '°'
- 237, // '±'
- 163, // '²'
- 199, // '³'
- 190, // '´'
- 225, // 'µ'
- 209, // '¶'
- 203, // '·'
- 198, // '¸'
- 217, // '¹'
- 219, // 'º'
- 206, // '»'
- 234, // '¼'
- 248, // '½'
- 158, // '¾'
- 239, // '¿'
- 255, // 'À'
- 255, // 'Á'
- 255, // 'Â'
- 255, // 'Ã'
- 255, // 'Ä'
- 255, // 'Å'
- 255, // 'Æ'
- 255, // 'Ç'
- 255, // 'È'
- 255, // 'É'
- 255, // 'Ê'
- 255, // 'Ë'
- 255, // 'Ì'
- 255, // 'Í'
- 255, // 'Î'
- 255, // 'Ï'
- 255, // 'Ð'
- 255, // 'Ñ'
- 255, // 'Ò'
- 255, // 'Ó'
- 255, // 'Ô'
- 255, // 'Õ'
- 255, // 'Ö'
- 255, // '×'
- 255, // 'Ø'
- 255, // 'Ù'
- 255, // 'Ú'
- 255, // 'Û'
- 255, // 'Ü'
- 255, // 'Ý'
- 255, // 'Þ'
- 255, // 'ß'
- 255, // 'à'
- 255, // 'á'
- 255, // 'â'
- 255, // 'ã'
- 255, // 'ä'
- 255, // 'å'
- 255, // 'æ'
- 255, // 'ç'
- 255, // 'è'
- 255, // 'é'
- 255, // 'ê'
- 255, // 'ë'
- 255, // 'ì'
- 255, // 'í'
- 255, // 'î'
- 255, // 'ï'
- 255, // 'ð'
- 255, // 'ñ'
- 255, // 'ò'
- 255, // 'ó'
- 255, // 'ô'
- 255, // 'õ'
- 255, // 'ö'
- 255, // '÷'
- 255, // 'ø'
- 255, // 'ù'
- 255, // 'ú'
- 255, // 'û'
- 255, // 'ü'
- 255, // 'ý'
- 255, // 'þ'
- 255, // 'ÿ'
-];
diff --git a/vendor/regex-syntax/src/unicode.rs b/vendor/regex-syntax/src/unicode.rs
deleted file mode 100644
index 07f78194..00000000
--- a/vendor/regex-syntax/src/unicode.rs
+++ /dev/null
@@ -1,1041 +0,0 @@
-use alloc::{
- string::{String, ToString},
- vec::Vec,
-};
-
-use crate::hir;
-
-/// An inclusive range of codepoints from a generated file (hence the static
-/// lifetime).
-type Range = &'static [(char, char)];
-
-/// An error that occurs when dealing with Unicode.
-///
-/// We don't impl the Error trait here because these always get converted
-/// into other public errors. (This error type isn't exported.)
-#[derive(Debug)]
-pub enum Error {
- PropertyNotFound,
- PropertyValueNotFound,
- // Not used when unicode-perl is enabled.
- #[allow(dead_code)]
- PerlClassNotFound,
-}
-
-/// An error that occurs when Unicode-aware simple case folding fails.
-///
-/// This error can occur when the case mapping tables necessary for Unicode
-/// aware case folding are unavailable. This only occurs when the
-/// `unicode-case` feature is disabled. (The feature is enabled by default.)
-#[derive(Debug)]
-pub struct CaseFoldError(());
-
-#[cfg(feature = "std")]
-impl std::error::Error for CaseFoldError {}
-
-impl core::fmt::Display for CaseFoldError {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- write!(
- f,
- "Unicode-aware case folding is not available \
- (probably because the unicode-case feature is not enabled)"
- )
- }
-}
-
-/// An error that occurs when the Unicode-aware `\w` class is unavailable.
-///
-/// This error can occur when the data tables necessary for the Unicode aware
-/// Perl character class `\w` are unavailable. This only occurs when the
-/// `unicode-perl` feature is disabled. (The feature is enabled by default.)
-#[derive(Debug)]
-pub struct UnicodeWordError(());
-
-#[cfg(feature = "std")]
-impl std::error::Error for UnicodeWordError {}
-
-impl core::fmt::Display for UnicodeWordError {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
- write!(
- f,
- "Unicode-aware \\w class is not available \
- (probably because the unicode-perl feature is not enabled)"
- )
- }
-}
-
-/// A state oriented traverser of the simple case folding table.
-///
-/// A case folder can be constructed via `SimpleCaseFolder::new()`, which will
-/// return an error if the underlying case folding table is unavailable.
-///
-/// After construction, it is expected that callers will use
-/// `SimpleCaseFolder::mapping` by calling it with codepoints in strictly
-/// increasing order. For example, calling it on `b` and then on `a` is illegal
-/// and will result in a panic.
-///
-/// The main idea of this type is that it tries hard to make mapping lookups
-/// fast by exploiting the structure of the underlying table, and the ordering
-/// assumption enables this.
-#[derive(Debug)]
-pub struct SimpleCaseFolder {
- /// The simple case fold table. It's a sorted association list, where the
- /// keys are Unicode scalar values and the values are the corresponding
- /// equivalence class (not including the key) of the "simple" case folded
- /// Unicode scalar values.
- table: &'static [(char, &'static [char])],
- /// The last codepoint that was used for a lookup.
- last: Option<char>,
- /// The index to the entry in `table` corresponding to the smallest key `k`
- /// such that `k > k0`, where `k0` is the most recent key lookup. Note that
- /// in particular, `k0` may not be in the table!
- next: usize,
-}
-
-impl SimpleCaseFolder {
- /// Create a new simple case folder, returning an error if the underlying
- /// case folding table is unavailable.
- pub fn new() -> Result<SimpleCaseFolder, CaseFoldError> {
- #[cfg(not(feature = "unicode-case"))]
- {
- Err(CaseFoldError(()))
- }
- #[cfg(feature = "unicode-case")]
- {
- Ok(SimpleCaseFolder {
- table: crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE,
- last: None,
- next: 0,
- })
- }
- }
-
- /// Return the equivalence class of case folded codepoints for the given
- /// codepoint. The equivalence class returned never includes the codepoint
- /// given. If the given codepoint has no case folded codepoints (i.e.,
- /// no entry in the underlying case folding table), then this returns an
- /// empty slice.
- ///
- /// # Panics
- ///
- /// This panics when called with a `c` that is less than or equal to the
- /// previous call. In other words, callers need to use this method with
- /// strictly increasing values of `c`.
- pub fn mapping(&mut self, c: char) -> &'static [char] {
- if let Some(last) = self.last {
- assert!(
- last < c,
- "got codepoint U+{:X} which occurs before \
- last codepoint U+{:X}",
- u32::from(c),
- u32::from(last),
- );
- }
- self.last = Some(c);
- if self.next >= self.table.len() {
- return &[];
- }
- let (k, v) = self.table[self.next];
- if k == c {
- self.next += 1;
- return v;
- }
- match self.get(c) {
- Err(i) => {
- self.next = i;
- &[]
- }
- Ok(i) => {
- // Since we require lookups to proceed
- // in order, anything we find should be
- // after whatever we thought might be
- // next. Otherwise, the caller is either
- // going out of order or we would have
- // found our next key at 'self.next'.
- assert!(i > self.next);
- self.next = i + 1;
- self.table[i].1
- }
- }
- }
-
- /// Returns true if and only if the given range overlaps with any region
- /// of the underlying case folding table. That is, when true, there exists
- /// at least one codepoint in the inclusive range `[start, end]` that has
- /// a non-trivial equivalence class of case folded codepoints. Conversely,
- /// when this returns false, all codepoints in the range `[start, end]`
- /// correspond to the trivial equivalence class of case folded codepoints,
- /// i.e., itself.
- ///
- /// This is useful to call before iterating over the codepoints in the
- /// range and looking up the mapping for each. If you know none of the
- /// mappings will return anything, then you might be able to skip doing it
- /// altogether.
- ///
- /// # Panics
- ///
- /// This panics when `end < start`.
- pub fn overlaps(&self, start: char, end: char) -> bool {
- use core::cmp::Ordering;
-
- assert!(start <= end);
- self.table
- .binary_search_by(|&(c, _)| {
- if start <= c && c <= end {
- Ordering::Equal
- } else if c > end {
- Ordering::Greater
- } else {
- Ordering::Less
- }
- })
- .is_ok()
- }
-
- /// Returns the index at which `c` occurs in the simple case fold table. If
- /// `c` does not occur, then this returns an `i` such that `table[i-1].0 <
- /// c` and `table[i].0 > c`.
- fn get(&self, c: char) -> Result<usize, usize> {
- self.table.binary_search_by_key(&c, |&(c1, _)| c1)
- }
-}
-
-/// A query for finding a character class defined by Unicode. This supports
-/// either use of a property name directly, or lookup by property value. The
-/// former generally refers to Binary properties (see UTS#44, Table 8), but
-/// as a special exception (see UTS#18, Section 1.2) both general categories
-/// (an enumeration) and scripts (a catalog) are supported as if each of their
-/// possible values were a binary property.
-///
-/// In all circumstances, property names and values are normalized and
-/// canonicalized. That is, `GC == gc == GeneralCategory == general_category`.
-///
-/// The lifetime `'a` refers to the shorter of the lifetimes of property name
-/// and property value.
-#[derive(Debug)]
-pub enum ClassQuery<'a> {
- /// Return a class corresponding to a Unicode binary property, named by
- /// a single letter.
- OneLetter(char),
- /// Return a class corresponding to a Unicode binary property.
- ///
- /// Note that, by special exception (see UTS#18, Section 1.2), both
- /// general category values and script values are permitted here as if
- /// they were a binary property.
- Binary(&'a str),
- /// Return a class corresponding to all codepoints whose property
- /// (identified by `property_name`) corresponds to the given value
- /// (identified by `property_value`).
- ByValue {
- /// A property name.
- property_name: &'a str,
- /// A property value.
- property_value: &'a str,
- },
-}
-
-impl<'a> ClassQuery<'a> {
- fn canonicalize(&self) -> Result<CanonicalClassQuery, Error> {
- match *self {
- ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()),
- ClassQuery::Binary(name) => self.canonical_binary(name),
- ClassQuery::ByValue { property_name, property_value } => {
- let property_name = symbolic_name_normalize(property_name);
- let property_value = symbolic_name_normalize(property_value);
-
- let canon_name = match canonical_prop(&property_name)? {
- None => return Err(Error::PropertyNotFound),
- Some(canon_name) => canon_name,
- };
- Ok(match canon_name {
- "General_Category" => {
- let canon = match canonical_gencat(&property_value)? {
- None => return Err(Error::PropertyValueNotFound),
- Some(canon) => canon,
- };
- CanonicalClassQuery::GeneralCategory(canon)
- }
- "Script" => {
- let canon = match canonical_script(&property_value)? {
- None => return Err(Error::PropertyValueNotFound),
- Some(canon) => canon,
- };
- CanonicalClassQuery::Script(canon)
- }
- _ => {
- let vals = match property_values(canon_name)? {
- None => return Err(Error::PropertyValueNotFound),
- Some(vals) => vals,
- };
- let canon_val =
- match canonical_value(vals, &property_value) {
- None => {
- return Err(Error::PropertyValueNotFound)
- }
- Some(canon_val) => canon_val,
- };
- CanonicalClassQuery::ByValue {
- property_name: canon_name,
- property_value: canon_val,
- }
- }
- })
- }
- }
- }
-
- fn canonical_binary(
- &self,
- name: &str,
- ) -> Result<CanonicalClassQuery, Error> {
- let norm = symbolic_name_normalize(name);
-
- // This is a special case where 'cf' refers to the 'Format' general
- // category, but where the 'cf' abbreviation is also an abbreviation
- // for the 'Case_Folding' property. But we want to treat it as
- // a general category. (Currently, we don't even support the
- // 'Case_Folding' property. But if we do in the future, users will be
- // required to spell it out.)
- //
- // Also 'sc' refers to the 'Currency_Symbol' general category, but is
- // also the abbreviation for the 'Script' property. So we avoid calling
- // 'canonical_prop' for it too, which would erroneously normalize it
- // to 'Script'.
- //
- // Another case: 'lc' is an abbreviation for the 'Cased_Letter'
- // general category, but is also an abbreviation for the 'Lowercase_Mapping'
- // property. We don't currently support the latter, so as with 'cf'
- // above, we treat 'lc' as 'Cased_Letter'.
- if norm != "cf" && norm != "sc" && norm != "lc" {
- if let Some(canon) = canonical_prop(&norm)? {
- return Ok(CanonicalClassQuery::Binary(canon));
- }
- }
- if let Some(canon) = canonical_gencat(&norm)? {
- return Ok(CanonicalClassQuery::GeneralCategory(canon));
- }
- if let Some(canon) = canonical_script(&norm)? {
- return Ok(CanonicalClassQuery::Script(canon));
- }
- Err(Error::PropertyNotFound)
- }
-}
-
-/// Like ClassQuery, but its parameters have been canonicalized. This also
-/// differentiates binary properties from flattened general categories and
-/// scripts.
-#[derive(Debug, Eq, PartialEq)]
-enum CanonicalClassQuery {
- /// The canonical binary property name.
- Binary(&'static str),
- /// The canonical general category name.
- GeneralCategory(&'static str),
- /// The canonical script name.
- Script(&'static str),
- /// An arbitrary association between property and value, both of which
- /// have been canonicalized.
- ///
- /// Note that by construction, the property name of ByValue will never
- /// be General_Category or Script. Those two cases are subsumed by the
- /// eponymous variants.
- ByValue {
- /// The canonical property name.
- property_name: &'static str,
- /// The canonical property value.
- property_value: &'static str,
- },
-}
-
-/// Looks up a Unicode class given a query. If one doesn't exist, then
-/// `None` is returned.
-pub fn class(query: ClassQuery<'_>) -> Result<hir::ClassUnicode, Error> {
- use self::CanonicalClassQuery::*;
-
- match query.canonicalize()? {
- Binary(name) => bool_property(name),
- GeneralCategory(name) => gencat(name),
- Script(name) => script(name),
- ByValue { property_name: "Age", property_value } => {
- let mut class = hir::ClassUnicode::empty();
- for set in ages(property_value)? {
- class.union(&hir_class(set));
- }
- Ok(class)
- }
- ByValue { property_name: "Script_Extensions", property_value } => {
- script_extension(property_value)
- }
- ByValue {
- property_name: "Grapheme_Cluster_Break",
- property_value,
- } => gcb(property_value),
- ByValue { property_name: "Sentence_Break", property_value } => {
- sb(property_value)
- }
- ByValue { property_name: "Word_Break", property_value } => {
- wb(property_value)
- }
- _ => {
- // What else should we support?
- Err(Error::PropertyNotFound)
- }
- }
-}
-
-/// Returns a Unicode aware class for \w.
-///
-/// This returns an error if the data is not available for \w.
-pub fn perl_word() -> Result<hir::ClassUnicode, Error> {
- #[cfg(not(feature = "unicode-perl"))]
- fn imp() -> Result<hir::ClassUnicode, Error> {
- Err(Error::PerlClassNotFound)
- }
-
- #[cfg(feature = "unicode-perl")]
- fn imp() -> Result<hir::ClassUnicode, Error> {
- use crate::unicode_tables::perl_word::PERL_WORD;
- Ok(hir_class(PERL_WORD))
- }
-
- imp()
-}
-
-/// Returns a Unicode aware class for \s.
-///
-/// This returns an error if the data is not available for \s.
-pub fn perl_space() -> Result<hir::ClassUnicode, Error> {
- #[cfg(not(any(feature = "unicode-perl", feature = "unicode-bool")))]
- fn imp() -> Result<hir::ClassUnicode, Error> {
- Err(Error::PerlClassNotFound)
- }
-
- #[cfg(all(feature = "unicode-perl", not(feature = "unicode-bool")))]
- fn imp() -> Result<hir::ClassUnicode, Error> {
- use crate::unicode_tables::perl_space::WHITE_SPACE;
- Ok(hir_class(WHITE_SPACE))
- }
-
- #[cfg(feature = "unicode-bool")]
- fn imp() -> Result<hir::ClassUnicode, Error> {
- use crate::unicode_tables::property_bool::WHITE_SPACE;
- Ok(hir_class(WHITE_SPACE))
- }
-
- imp()
-}
-
-/// Returns a Unicode aware class for \d.
-///
-/// This returns an error if the data is not available for \d.
-pub fn perl_digit() -> Result<hir::ClassUnicode, Error> {
- #[cfg(not(any(feature = "unicode-perl", feature = "unicode-gencat")))]
- fn imp() -> Result<hir::ClassUnicode, Error> {
- Err(Error::PerlClassNotFound)
- }
-
- #[cfg(all(feature = "unicode-perl", not(feature = "unicode-gencat")))]
- fn imp() -> Result<hir::ClassUnicode, Error> {
- use crate::unicode_tables::perl_decimal::DECIMAL_NUMBER;
- Ok(hir_class(DECIMAL_NUMBER))
- }
-
- #[cfg(feature = "unicode-gencat")]
- fn imp() -> Result<hir::ClassUnicode, Error> {
- use crate::unicode_tables::general_category::DECIMAL_NUMBER;
- Ok(hir_class(DECIMAL_NUMBER))
- }
-
- imp()
-}
-
-/// Build a Unicode HIR class from a sequence of Unicode scalar value ranges.
-pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode {
- let hir_ranges: Vec<hir::ClassUnicodeRange> = ranges
- .iter()
- .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
- .collect();
- hir::ClassUnicode::new(hir_ranges)
-}
-
-/// Returns true only if the given codepoint is in the `\w` character class.
-///
-/// If the `unicode-perl` feature is not enabled, then this returns an error.
-pub fn is_word_character(c: char) -> Result<bool, UnicodeWordError> {
- #[cfg(not(feature = "unicode-perl"))]
- fn imp(_: char) -> Result<bool, UnicodeWordError> {
- Err(UnicodeWordError(()))
- }
-
- #[cfg(feature = "unicode-perl")]
- fn imp(c: char) -> Result<bool, UnicodeWordError> {
- use crate::{is_word_byte, unicode_tables::perl_word::PERL_WORD};
-
- if u8::try_from(c).map_or(false, is_word_byte) {
- return Ok(true);
- }
- Ok(PERL_WORD
- .binary_search_by(|&(start, end)| {
- use core::cmp::Ordering;
-
- if start <= c && c <= end {
- Ordering::Equal
- } else if start > c {
- Ordering::Greater
- } else {
- Ordering::Less
- }
- })
- .is_ok())
- }
-
- imp(c)
-}
-
-/// A mapping of property values for a specific property.
-///
-/// The first element of each tuple is a normalized property value while the
-/// second element of each tuple is the corresponding canonical property
-/// value.
-type PropertyValues = &'static [(&'static str, &'static str)];
-
-fn canonical_gencat(
- normalized_value: &str,
-) -> Result<Option<&'static str>, Error> {
- Ok(match normalized_value {
- "any" => Some("Any"),
- "assigned" => Some("Assigned"),
- "ascii" => Some("ASCII"),
- _ => {
- let gencats = property_values("General_Category")?.unwrap();
- canonical_value(gencats, normalized_value)
- }
- })
-}
-
-fn canonical_script(
- normalized_value: &str,
-) -> Result<Option<&'static str>, Error> {
- let scripts = property_values("Script")?.unwrap();
- Ok(canonical_value(scripts, normalized_value))
-}
-
-/// Find the canonical property name for the given normalized property name.
-///
-/// If no such property exists, then `None` is returned.
-///
-/// The normalized property name must have been normalized according to
-/// UAX44 LM3, which can be done using `symbolic_name_normalize`.
-///
-/// If the property names data is not available, then an error is returned.
-fn canonical_prop(
- normalized_name: &str,
-) -> Result<Option<&'static str>, Error> {
- #[cfg(not(any(
- feature = "unicode-age",
- feature = "unicode-bool",
- feature = "unicode-gencat",
- feature = "unicode-perl",
- feature = "unicode-script",
- feature = "unicode-segment",
- )))]
- fn imp(_: &str) -> Result<Option<&'static str>, Error> {
- Err(Error::PropertyNotFound)
- }
-
- #[cfg(any(
- feature = "unicode-age",
- feature = "unicode-bool",
- feature = "unicode-gencat",
- feature = "unicode-perl",
- feature = "unicode-script",
- feature = "unicode-segment",
- ))]
- fn imp(name: &str) -> Result<Option<&'static str>, Error> {
- use crate::unicode_tables::property_names::PROPERTY_NAMES;
-
- Ok(PROPERTY_NAMES
- .binary_search_by_key(&name, |&(n, _)| n)
- .ok()
- .map(|i| PROPERTY_NAMES[i].1))
- }
-
- imp(normalized_name)
-}
-
-/// Find the canonical property value for the given normalized property
-/// value.
-///
-/// The given property values should correspond to the values for the property
-/// under question, which can be found using `property_values`.
-///
-/// If no such property value exists, then `None` is returned.
-///
-/// The normalized property value must have been normalized according to
-/// UAX44 LM3, which can be done using `symbolic_name_normalize`.
-fn canonical_value(
- vals: PropertyValues,
- normalized_value: &str,
-) -> Option<&'static str> {
- vals.binary_search_by_key(&normalized_value, |&(n, _)| n)
- .ok()
- .map(|i| vals[i].1)
-}
-
-/// Return the table of property values for the given property name.
-///
-/// If the property values data is not available, then an error is returned.
-fn property_values(
- canonical_property_name: &'static str,
-) -> Result<Option<PropertyValues>, Error> {
- #[cfg(not(any(
- feature = "unicode-age",
- feature = "unicode-bool",
- feature = "unicode-gencat",
- feature = "unicode-perl",
- feature = "unicode-script",
- feature = "unicode-segment",
- )))]
- fn imp(_: &'static str) -> Result<Option<PropertyValues>, Error> {
- Err(Error::PropertyValueNotFound)
- }
-
- #[cfg(any(
- feature = "unicode-age",
- feature = "unicode-bool",
- feature = "unicode-gencat",
- feature = "unicode-perl",
- feature = "unicode-script",
- feature = "unicode-segment",
- ))]
- fn imp(name: &'static str) -> Result<Option<PropertyValues>, Error> {
- use crate::unicode_tables::property_values::PROPERTY_VALUES;
-
- Ok(PROPERTY_VALUES
- .binary_search_by_key(&name, |&(n, _)| n)
- .ok()
- .map(|i| PROPERTY_VALUES[i].1))
- }
-
- imp(canonical_property_name)
-}
-
-// This is only used in some cases, but small enough to just let it be dead
-// instead of figuring out (and maintaining) the right set of features.
-#[allow(dead_code)]
-fn property_set(
- name_map: &'static [(&'static str, Range)],
- canonical: &'static str,
-) -> Option<Range> {
- name_map
- .binary_search_by_key(&canonical, |x| x.0)
- .ok()
- .map(|i| name_map[i].1)
-}
-
-/// Returns an iterator over Unicode Age sets. Each item corresponds to a set
-/// of codepoints that were added in a particular revision of Unicode. The
-/// iterator yields items in chronological order.
-///
-/// If the given age value isn't valid or if the data isn't available, then an
-/// error is returned instead.
-fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>, Error> {
- #[cfg(not(feature = "unicode-age"))]
- fn imp(_: &str) -> Result<impl Iterator<Item = Range>, Error> {
- use core::option::IntoIter;
- Err::<IntoIter<Range>, _>(Error::PropertyNotFound)
- }
-
- #[cfg(feature = "unicode-age")]
- fn imp(canonical_age: &str) -> Result<impl Iterator<Item = Range>, Error> {
- use crate::unicode_tables::age;
-
- const AGES: &[(&str, Range)] = &[
- ("V1_1", age::V1_1),
- ("V2_0", age::V2_0),
- ("V2_1", age::V2_1),
- ("V3_0", age::V3_0),
- ("V3_1", age::V3_1),
- ("V3_2", age::V3_2),
- ("V4_0", age::V4_0),
- ("V4_1", age::V4_1),
- ("V5_0", age::V5_0),
- ("V5_1", age::V5_1),
- ("V5_2", age::V5_2),
- ("V6_0", age::V6_0),
- ("V6_1", age::V6_1),
- ("V6_2", age::V6_2),
- ("V6_3", age::V6_3),
- ("V7_0", age::V7_0),
- ("V8_0", age::V8_0),
- ("V9_0", age::V9_0),
- ("V10_0", age::V10_0),
- ("V11_0", age::V11_0),
- ("V12_0", age::V12_0),
- ("V12_1", age::V12_1),
- ("V13_0", age::V13_0),
- ("V14_0", age::V14_0),
- ("V15_0", age::V15_0),
- ("V15_1", age::V15_1),
- ("V16_0", age::V16_0),
- ];
- assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");
-
- let pos = AGES.iter().position(|&(age, _)| canonical_age == age);
- match pos {
- None => Err(Error::PropertyValueNotFound),
- Some(i) => Ok(AGES[..=i].iter().map(|&(_, classes)| classes)),
- }
- }
-
- imp(canonical_age)
-}
-
-/// Returns the Unicode HIR class corresponding to the given general category.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given general category could not be found, or if the general
-/// category data is not available, then an error is returned.
-fn gencat(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
- #[cfg(not(feature = "unicode-gencat"))]
- fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
- Err(Error::PropertyNotFound)
- }
-
- #[cfg(feature = "unicode-gencat")]
- fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
- use crate::unicode_tables::general_category::BY_NAME;
- match name {
- "ASCII" => Ok(hir_class(&[('\0', '\x7F')])),
- "Any" => Ok(hir_class(&[('\0', '\u{10FFFF}')])),
- "Assigned" => {
- let mut cls = gencat("Unassigned")?;
- cls.negate();
- Ok(cls)
- }
- name => property_set(BY_NAME, name)
- .map(hir_class)
- .ok_or(Error::PropertyValueNotFound),
- }
- }
-
- match canonical_name {
- "Decimal_Number" => perl_digit(),
- name => imp(name),
- }
-}
-
-/// Returns the Unicode HIR class corresponding to the given script.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given script could not be found, or if the script data is not
-/// available, then an error is returned.
-fn script(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
- #[cfg(not(feature = "unicode-script"))]
- fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
- Err(Error::PropertyNotFound)
- }
-
- #[cfg(feature = "unicode-script")]
- fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
- use crate::unicode_tables::script::BY_NAME;
- property_set(BY_NAME, name)
- .map(hir_class)
- .ok_or(Error::PropertyValueNotFound)
- }
-
- imp(canonical_name)
-}
-
-/// Returns the Unicode HIR class corresponding to the given script extension.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given script extension could not be found, or if the script data is
-/// not available, then an error is returned.
-fn script_extension(
- canonical_name: &'static str,
-) -> Result<hir::ClassUnicode, Error> {
- #[cfg(not(feature = "unicode-script"))]
- fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
- Err(Error::PropertyNotFound)
- }
-
- #[cfg(feature = "unicode-script")]
- fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
- use crate::unicode_tables::script_extension::BY_NAME;
- property_set(BY_NAME, name)
- .map(hir_class)
- .ok_or(Error::PropertyValueNotFound)
- }
-
- imp(canonical_name)
-}
-
-/// Returns the Unicode HIR class corresponding to the given Unicode boolean
-/// property.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given boolean property could not be found, or if the boolean
-/// property data is not available, then an error is returned.
-fn bool_property(
- canonical_name: &'static str,
-) -> Result<hir::ClassUnicode, Error> {
- #[cfg(not(feature = "unicode-bool"))]
- fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
- Err(Error::PropertyNotFound)
- }
-
- #[cfg(feature = "unicode-bool")]
- fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
- use crate::unicode_tables::property_bool::BY_NAME;
- property_set(BY_NAME, name)
- .map(hir_class)
- .ok_or(Error::PropertyNotFound)
- }
-
- match canonical_name {
- "Decimal_Number" => perl_digit(),
- "White_Space" => perl_space(),
- name => imp(name),
- }
-}
-
-/// Returns the Unicode HIR class corresponding to the given grapheme cluster
-/// break property.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given property could not be found, or if the corresponding data is
-/// not available, then an error is returned.
-fn gcb(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
- #[cfg(not(feature = "unicode-segment"))]
- fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
- Err(Error::PropertyNotFound)
- }
-
- #[cfg(feature = "unicode-segment")]
- fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
- use crate::unicode_tables::grapheme_cluster_break::BY_NAME;
- property_set(BY_NAME, name)
- .map(hir_class)
- .ok_or(Error::PropertyValueNotFound)
- }
-
- imp(canonical_name)
-}
-
-/// Returns the Unicode HIR class corresponding to the given word break
-/// property.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given property could not be found, or if the corresponding data is
-/// not available, then an error is returned.
-fn wb(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
- #[cfg(not(feature = "unicode-segment"))]
- fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
- Err(Error::PropertyNotFound)
- }
-
- #[cfg(feature = "unicode-segment")]
- fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
- use crate::unicode_tables::word_break::BY_NAME;
- property_set(BY_NAME, name)
- .map(hir_class)
- .ok_or(Error::PropertyValueNotFound)
- }
-
- imp(canonical_name)
-}
-
-/// Returns the Unicode HIR class corresponding to the given sentence
-/// break property.
-///
-/// Name canonicalization is assumed to be performed by the caller.
-///
-/// If the given property could not be found, or if the corresponding data is
-/// not available, then an error is returned.
-fn sb(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> {
- #[cfg(not(feature = "unicode-segment"))]
- fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> {
- Err(Error::PropertyNotFound)
- }
-
- #[cfg(feature = "unicode-segment")]
- fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> {
- use crate::unicode_tables::sentence_break::BY_NAME;
- property_set(BY_NAME, name)
- .map(hir_class)
- .ok_or(Error::PropertyValueNotFound)
- }
-
- imp(canonical_name)
-}
-
-/// Like symbolic_name_normalize_bytes, but operates on a string.
-fn symbolic_name_normalize(x: &str) -> String {
- let mut tmp = x.as_bytes().to_vec();
- let len = symbolic_name_normalize_bytes(&mut tmp).len();
- tmp.truncate(len);
- // This should always succeed because `symbolic_name_normalize_bytes`
- // guarantees that `&tmp[..len]` is always valid UTF-8.
- //
- // N.B. We could avoid the additional UTF-8 check here, but it's unlikely
- // to be worth skipping the additional safety check. A benchmark must
- // justify it first.
- String::from_utf8(tmp).unwrap()
-}
-
-/// Normalize the given symbolic name in place according to UAX44-LM3.
-///
-/// A "symbolic name" typically corresponds to property names and property
-/// value aliases. Note, though, that it should not be applied to property
-/// string values.
-///
-/// The slice returned is guaranteed to be valid UTF-8 for all possible values
-/// of `slice`.
-///
-/// See: https://unicode.org/reports/tr44/#UAX44-LM3
-fn symbolic_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] {
- // I couldn't find a place in the standard that specified that property
- // names/aliases had a particular structure (unlike character names), but
- // we assume that it's ASCII only and drop anything that isn't ASCII.
- let mut start = 0;
- let mut starts_with_is = false;
- if slice.len() >= 2 {
- // Ignore any "is" prefix.
- starts_with_is = slice[0..2] == b"is"[..]
- || slice[0..2] == b"IS"[..]
- || slice[0..2] == b"iS"[..]
- || slice[0..2] == b"Is"[..];
- if starts_with_is {
- start = 2;
- }
- }
- let mut next_write = 0;
- for i in start..slice.len() {
- // VALIDITY ARGUMENT: To guarantee that the resulting slice is valid
- // UTF-8, we ensure that the slice contains only ASCII bytes. In
- // particular, we drop every non-ASCII byte from the normalized string.
- let b = slice[i];
- if b == b' ' || b == b'_' || b == b'-' {
- continue;
- } else if b'A' <= b && b <= b'Z' {
- slice[next_write] = b + (b'a' - b'A');
- next_write += 1;
- } else if b <= 0x7F {
- slice[next_write] = b;
- next_write += 1;
- }
- }
- // Special case: ISO_Comment has a 'isc' abbreviation. Since we generally
- // ignore 'is' prefixes, the 'isc' abbreviation gets caught in the cross
- // fire and ends up creating an alias for 'c' to 'ISO_Comment', but it
- // is actually an alias for the 'Other' general category.
- if starts_with_is && next_write == 1 && slice[0] == b'c' {
- slice[0] = b'i';
- slice[1] = b's';
- slice[2] = b'c';
- next_write = 3;
- }
- &mut slice[..next_write]
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[cfg(feature = "unicode-case")]
- fn simple_fold_ok(c: char) -> impl Iterator<Item = char> {
- SimpleCaseFolder::new().unwrap().mapping(c).iter().copied()
- }
-
- #[cfg(feature = "unicode-case")]
- fn contains_case_map(start: char, end: char) -> bool {
- SimpleCaseFolder::new().unwrap().overlaps(start, end)
- }
-
- #[test]
- #[cfg(feature = "unicode-case")]
- fn simple_fold_k() {
- let xs: Vec<char> = simple_fold_ok('k').collect();
- assert_eq!(xs, alloc::vec!['K', 'K']);
-
- let xs: Vec<char> = simple_fold_ok('K').collect();
- assert_eq!(xs, alloc::vec!['k', 'K']);
-
- let xs: Vec<char> = simple_fold_ok('K').collect();
- assert_eq!(xs, alloc::vec!['K', 'k']);
- }
-
- #[test]
- #[cfg(feature = "unicode-case")]
- fn simple_fold_a() {
- let xs: Vec<char> = simple_fold_ok('a').collect();
- assert_eq!(xs, alloc::vec!['A']);
-
- let xs: Vec<char> = simple_fold_ok('A').collect();
- assert_eq!(xs, alloc::vec!['a']);
- }
-
- #[test]
- #[cfg(not(feature = "unicode-case"))]
- fn simple_fold_disabled() {
- assert!(SimpleCaseFolder::new().is_err());
- }
-
- #[test]
- #[cfg(feature = "unicode-case")]
- fn range_contains() {
- assert!(contains_case_map('A', 'A'));
- assert!(contains_case_map('Z', 'Z'));
- assert!(contains_case_map('A', 'Z'));
- assert!(contains_case_map('@', 'A'));
- assert!(contains_case_map('Z', '['));
- assert!(contains_case_map('☃', 'Ⰰ'));
-
- assert!(!contains_case_map('[', '['));
- assert!(!contains_case_map('[', '`'));
-
- assert!(!contains_case_map('☃', '☃'));
- }
-
- #[test]
- #[cfg(feature = "unicode-gencat")]
- fn regression_466() {
- use super::{CanonicalClassQuery, ClassQuery};
-
- let q = ClassQuery::OneLetter('C');
- assert_eq!(
- q.canonicalize().unwrap(),
- CanonicalClassQuery::GeneralCategory("Other")
- );
- }
-
- #[test]
- fn sym_normalize() {
- let sym_norm = symbolic_name_normalize;
-
- assert_eq!(sym_norm("Line_Break"), "linebreak");
- assert_eq!(sym_norm("Line-break"), "linebreak");
- assert_eq!(sym_norm("linebreak"), "linebreak");
- assert_eq!(sym_norm("BA"), "ba");
- assert_eq!(sym_norm("ba"), "ba");
- assert_eq!(sym_norm("Greek"), "greek");
- assert_eq!(sym_norm("isGreek"), "greek");
- assert_eq!(sym_norm("IS_Greek"), "greek");
- assert_eq!(sym_norm("isc"), "isc");
- assert_eq!(sym_norm("is c"), "isc");
- assert_eq!(sym_norm("is_c"), "isc");
- }
-
- #[test]
- fn valid_utf8_symbolic() {
- let mut x = b"abc\xFFxyz".to_vec();
- let y = symbolic_name_normalize_bytes(&mut x);
- assert_eq!(y, b"abcxyz");
- }
-}
diff --git a/vendor/regex-syntax/src/unicode_tables/LICENSE-UNICODE b/vendor/regex-syntax/src/unicode_tables/LICENSE-UNICODE
deleted file mode 100644
index b82826bd..00000000
--- a/vendor/regex-syntax/src/unicode_tables/LICENSE-UNICODE
+++ /dev/null
@@ -1,57 +0,0 @@
-UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
-
-Unicode Data Files include all data files under the directories
-http://www.unicode.org/Public/, http://www.unicode.org/reports/,
-http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
-http://www.unicode.org/utility/trac/browser/.
-
-Unicode Data Files do not include PDF online code charts under the
-directory http://www.unicode.org/Public/.
-
-Software includes any source code published in the Unicode Standard
-or under the directories
-http://www.unicode.org/Public/, http://www.unicode.org/reports/,
-http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
-http://www.unicode.org/utility/trac/browser/.
-
-NOTICE TO USER: Carefully read the following legal agreement.
-BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
-DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
-YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
-TERMS AND CONDITIONS OF THIS AGREEMENT.
-IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
-THE DATA FILES OR SOFTWARE.
-
-COPYRIGHT AND PERMISSION NOTICE
-
-Copyright © 1991-2018 Unicode, Inc. All rights reserved.
-Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of the Unicode data files and any associated documentation
-(the "Data Files") or Unicode software and any associated documentation
-(the "Software") to deal in the Data Files or Software
-without restriction, including without limitation the rights to use,
-copy, modify, merge, publish, distribute, and/or sell copies of
-the Data Files or Software, and to permit persons to whom the Data Files
-or Software are furnished to do so, provided that either
-(a) this copyright and permission notice appear with all copies
-of the Data Files or Software, or
-(b) this copyright and permission notice appear in associated
-Documentation.
-
-THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
-ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
-WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT OF THIRD PARTY RIGHTS.
-IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
-NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
-DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
-DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
-TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
-PERFORMANCE OF THE DATA FILES OR SOFTWARE.
-
-Except as contained in this notice, the name of a copyright holder
-shall not be used in advertising or otherwise to promote the sale,
-use or other dealings in these Data Files or Software without prior
-written authorization of the copyright holder.
diff --git a/vendor/regex-syntax/src/unicode_tables/age.rs b/vendor/regex-syntax/src/unicode_tables/age.rs
deleted file mode 100644
index 466510c9..00000000
--- a/vendor/regex-syntax/src/unicode_tables/age.rs
+++ /dev/null
@@ -1,1846 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate age ucd-16.0.0 --chars
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
- ("V10_0", V10_0),
- ("V11_0", V11_0),
- ("V12_0", V12_0),
- ("V12_1", V12_1),
- ("V13_0", V13_0),
- ("V14_0", V14_0),
- ("V15_0", V15_0),
- ("V15_1", V15_1),
- ("V16_0", V16_0),
- ("V1_1", V1_1),
- ("V2_0", V2_0),
- ("V2_1", V2_1),
- ("V3_0", V3_0),
- ("V3_1", V3_1),
- ("V3_2", V3_2),
- ("V4_0", V4_0),
- ("V4_1", V4_1),
- ("V5_0", V5_0),
- ("V5_1", V5_1),
- ("V5_2", V5_2),
- ("V6_0", V6_0),
- ("V6_1", V6_1),
- ("V6_2", V6_2),
- ("V6_3", V6_3),
- ("V7_0", V7_0),
- ("V8_0", V8_0),
- ("V9_0", V9_0),
-];
-
-pub const V10_0: &'static [(char, char)] = &[
- ('ࡠ', 'ࡪ'),
- ('ৼ', '৽'),
- ('\u{afa}', '\u{aff}'),
- ('\u{d00}', '\u{d00}'),
- ('\u{d3b}', '\u{d3c}'),
- ('᳷', '᳷'),
- ('\u{1df6}', '\u{1df9}'),
- ('₿', '₿'),
- ('⏿', '⏿'),
- ('⯒', '⯒'),
- ('⹅', '⹉'),
- ('ㄮ', 'ㄮ'),
- ('鿖', '鿪'),
- ('𐌭', '𐌯'),
- ('𑨀', '\u{11a47}'),
- ('𑩐', '𑪃'),
- ('𑪆', '𑪜'),
- ('𑪞', '𑪢'),
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d47}'),
- ('𑵐', '𑵙'),
- ('𖿡', '𖿡'),
- ('𛀂', '𛄞'),
- ('𛅰', '𛋻'),
- ('🉠', '🉥'),
- ('🛓', '🛔'),
- ('🛷', '🛸'),
- ('🤀', '🤋'),
- ('🤟', '🤟'),
- ('🤨', '🤯'),
- ('🤱', '🤲'),
- ('🥌', '🥌'),
- ('🥟', '🥫'),
- ('🦒', '🦗'),
- ('🧐', '🧦'),
- ('𬺰', '𮯠'),
-];
-
-pub const V11_0: &'static [(char, char)] = &[
- ('ՠ', 'ՠ'),
- ('ֈ', 'ֈ'),
- ('ׯ', 'ׯ'),
- ('\u{7fd}', '߿'),
- ('\u{8d3}', '\u{8d3}'),
- ('\u{9fe}', '\u{9fe}'),
- ('੶', '੶'),
- ('\u{c04}', '\u{c04}'),
- ('಄', '಄'),
- ('ᡸ', 'ᡸ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('⮺', '⮼'),
- ('⯓', '⯫'),
- ('⯰', '⯾'),
- ('⹊', '⹎'),
- ('ㄯ', 'ㄯ'),
- ('鿫', '鿯'),
- ('ꞯ', 'ꞯ'),
- ('Ꞹ', 'ꞹ'),
- ('ꣾ', '\u{a8ff}'),
- ('𐨴', '𐨵'),
- ('𐩈', '𐩈'),
- ('𐴀', '\u{10d27}'),
- ('𐴰', '𐴹'),
- ('𐼀', '𐼧'),
- ('𐼰', '𐽙'),
- ('\u{110cd}', '\u{110cd}'),
- ('𑅄', '𑅆'),
- ('\u{1133b}', '\u{1133b}'),
- ('\u{1145e}', '\u{1145e}'),
- ('𑜚', '𑜚'),
- ('𑠀', '𑠻'),
- ('𑪝', '𑪝'),
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶎'),
- ('\u{11d90}', '\u{11d91}'),
- ('𑶓', '𑶘'),
- ('𑶠', '𑶩'),
- ('𑻠', '𑻸'),
- ('𖹀', '𖺚'),
- ('𘟭', '𘟱'),
- ('𝋠', '𝋳'),
- ('𝍲', '𝍸'),
- ('𞱱', '𞲴'),
- ('🄯', '🄯'),
- ('🛹', '🛹'),
- ('🟕', '🟘'),
- ('🥍', '🥏'),
- ('🥬', '🥰'),
- ('🥳', '🥶'),
- ('🥺', '🥺'),
- ('🥼', '🥿'),
- ('🦘', '🦢'),
- ('🦰', '🦹'),
- ('🧁', '🧂'),
- ('🧧', '🧿'),
- ('🩠', '🩭'),
-];
-
-pub const V12_0: &'static [(char, char)] = &[
- ('౷', '౷'),
- ('ຆ', 'ຆ'),
- ('ຉ', 'ຉ'),
- ('ຌ', 'ຌ'),
- ('ຎ', 'ຓ'),
- ('ຘ', 'ຘ'),
- ('ຠ', 'ຠ'),
- ('ຨ', 'ຩ'),
- ('ຬ', 'ຬ'),
- ('\u{eba}', '\u{eba}'),
- ('ᳺ', 'ᳺ'),
- ('⯉', '⯉'),
- ('⯿', '⯿'),
- ('⹏', '⹏'),
- ('Ꞻ', 'ꞿ'),
- ('Ꟃ', 'Ᶎ'),
- ('ꭦ', 'ꭧ'),
- ('𐿠', '𐿶'),
- ('𑑟', '𑑟'),
- ('𑚸', '𑚸'),
- ('𑦠', '𑦧'),
- ('𑦪', '\u{119d7}'),
- ('\u{119da}', '𑧤'),
- ('𑪄', '𑪅'),
- ('𑿀', '𑿱'),
- ('𑿿', '𑿿'),
- ('\u{13430}', '\u{13438}'),
- ('𖽅', '𖽊'),
- ('\u{16f4f}', '\u{16f4f}'),
- ('𖽿', '𖾇'),
- ('𖿢', '𖿣'),
- ('𘟲', '𘟷'),
- ('𛅐', '𛅒'),
- ('𛅤', '𛅧'),
- ('𞄀', '𞄬'),
- ('\u{1e130}', '𞄽'),
- ('𞅀', '𞅉'),
- ('𞅎', '𞅏'),
- ('𞋀', '𞋹'),
- ('𞋿', '𞋿'),
- ('𞥋', '𞥋'),
- ('𞴁', '𞴽'),
- ('🅬', '🅬'),
- ('🛕', '🛕'),
- ('🛺', '🛺'),
- ('🟠', '🟫'),
- ('🤍', '🤏'),
- ('🤿', '🤿'),
- ('🥱', '🥱'),
- ('🥻', '🥻'),
- ('🦥', '🦪'),
- ('🦮', '🦯'),
- ('🦺', '🦿'),
- ('🧃', '🧊'),
- ('🧍', '🧏'),
- ('🨀', '🩓'),
- ('🩰', '🩳'),
- ('🩸', '🩺'),
- ('🪀', '🪂'),
- ('🪐', '🪕'),
-];
-
-pub const V12_1: &'static [(char, char)] = &[('㋿', '㋿')];
-
-pub const V13_0: &'static [(char, char)] = &[
- ('ࢾ', 'ࣇ'),
- ('\u{b55}', '\u{b55}'),
- ('ഄ', 'ഄ'),
- ('\u{d81}', '\u{d81}'),
- ('\u{1abf}', '\u{1ac0}'),
- ('⮗', '⮗'),
- ('⹐', '⹒'),
- ('ㆻ', 'ㆿ'),
- ('䶶', '䶿'),
- ('鿰', '鿼'),
- ('Ꟈ', 'ꟊ'),
- ('Ꟶ', 'ꟶ'),
- ('\u{a82c}', '\u{a82c}'),
- ('ꭨ', '꭫'),
- ('𐆜', '𐆜'),
- ('𐺀', '𐺩'),
- ('\u{10eab}', '𐺭'),
- ('𐺰', '𐺱'),
- ('𐾰', '𐿋'),
- ('𑅇', '𑅇'),
- ('𑇎', '\u{111cf}'),
- ('𑑚', '𑑚'),
- ('𑑠', '𑑡'),
- ('𑤀', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤵'),
- ('𑤷', '𑤸'),
- ('\u{1193b}', '𑥆'),
- ('𑥐', '𑥙'),
- ('𑾰', '𑾰'),
- ('\u{16fe4}', '\u{16fe4}'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('𘫳', '𘳕'),
- ('𘴀', '𘴈'),
- ('🄍', '🄏'),
- ('🅭', '🅯'),
- ('🆭', '🆭'),
- ('🛖', '🛗'),
- ('🛻', '🛼'),
- ('🢰', '🢱'),
- ('🤌', '🤌'),
- ('🥲', '🥲'),
- ('🥷', '🥸'),
- ('🦣', '🦤'),
- ('🦫', '🦭'),
- ('🧋', '🧋'),
- ('🩴', '🩴'),
- ('🪃', '🪆'),
- ('🪖', '🪨'),
- ('🪰', '🪶'),
- ('🫀', '🫂'),
- ('🫐', '🫖'),
- ('🬀', '🮒'),
- ('🮔', '🯊'),
- ('🯰', '🯹'),
- ('𪛗', '𪛝'),
- ('𰀀', '𱍊'),
-];
-
-pub const V14_0: &'static [(char, char)] = &[
- ('؝', '؝'),
- ('ࡰ', 'ࢎ'),
- ('\u{890}', '\u{891}'),
- ('\u{898}', '\u{89f}'),
- ('ࢵ', 'ࢵ'),
- ('ࣈ', '\u{8d2}'),
- ('\u{c3c}', '\u{c3c}'),
- ('ౝ', 'ౝ'),
- ('ೝ', 'ೝ'),
- ('ᜍ', 'ᜍ'),
- ('\u{1715}', '\u{1715}'),
- ('ᜟ', 'ᜟ'),
- ('\u{180f}', '\u{180f}'),
- ('\u{1ac1}', '\u{1ace}'),
- ('ᭌ', 'ᭌ'),
- ('᭽', '᭾'),
- ('\u{1dfa}', '\u{1dfa}'),
- ('⃀', '⃀'),
- ('Ⱟ', 'Ⱟ'),
- ('ⱟ', 'ⱟ'),
- ('⹓', '⹝'),
- ('鿽', '鿿'),
- ('Ꟁ', 'ꟁ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'ꟙ'),
- ('ꟲ', 'ꟴ'),
- ('﯂', '﯂'),
- ('﵀', '﵏'),
- ('﷏', '﷏'),
- ('﷾', '﷿'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐽰', '𐾉'),
- ('\u{11070}', '𑁵'),
- ('\u{110c2}', '\u{110c2}'),
- ('𑚹', '𑚹'),
- ('𑝀', '𑝆'),
- ('𑪰', '𑪿'),
- ('𒾐', '𒿲'),
- ('𖩰', '𖪾'),
- ('𖫀', '𖫉'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𛄟', '𛄢'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('𜽐', '𜿃'),
- ('𝇩', '𝇪'),
- ('𝼀', '𝼞'),
- ('𞊐', '\u{1e2ae}'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
- ('🛝', '🛟'),
- ('🟰', '🟰'),
- ('🥹', '🥹'),
- ('🧌', '🧌'),
- ('🩻', '🩼'),
- ('🪩', '🪬'),
- ('🪷', '🪺'),
- ('🫃', '🫅'),
- ('🫗', '🫙'),
- ('🫠', '🫧'),
- ('🫰', '🫶'),
- ('𪛞', '𪛟'),
- ('𫜵', '𫜸'),
-];
-
-pub const V15_0: &'static [(char, char)] = &[
- ('ೳ', 'ೳ'),
- ('\u{ece}', '\u{ece}'),
- ('\u{10efd}', '\u{10eff}'),
- ('𑈿', '\u{11241}'),
- ('𑬀', '𑬉'),
- ('\u{11f00}', '𑼐'),
- ('𑼒', '\u{11f3a}'),
- ('𑼾', '𑽙'),
- ('𓐯', '𓐯'),
- ('\u{13439}', '\u{13455}'),
- ('𛄲', '𛄲'),
- ('𛅕', '𛅕'),
- ('𝋀', '𝋓'),
- ('𝼥', '𝼪'),
- ('𞀰', '𞁭'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('𞓐', '𞓹'),
- ('🛜', '🛜'),
- ('🝴', '🝶'),
- ('🝻', '🝿'),
- ('🟙', '🟙'),
- ('🩵', '🩷'),
- ('🪇', '🪈'),
- ('🪭', '🪯'),
- ('🪻', '🪽'),
- ('🪿', '🪿'),
- ('🫎', '🫏'),
- ('🫚', '🫛'),
- ('🫨', '🫨'),
- ('🫷', '🫸'),
- ('𫜹', '𫜹'),
- ('𱍐', '𲎯'),
-];
-
-pub const V15_1: &'static [(char, char)] =
- &[('⿼', '⿿'), ('㇯', '㇯'), ('𮯰', '𮹝')];
-
-pub const V16_0: &'static [(char, char)] = &[
- ('\u{897}', '\u{897}'),
- ('᭎', '᭏'),
- ('᭿', '᭿'),
- ('Ᲊ', 'ᲊ'),
- ('␧', '␩'),
- ('㇤', '㇥'),
- ('Ɤ', 'ꟍ'),
- ('Ꟛ', 'Ƛ'),
- ('𐗀', '𐗳'),
- ('𐵀', '𐵥'),
- ('\u{10d69}', '𐶅'),
- ('𐶎', '𐶏'),
- ('𐻂', '𐻄'),
- ('\u{10efc}', '\u{10efc}'),
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '𑏊'),
- ('𑏌', '𑏕'),
- ('𑏗', '𑏘'),
- ('\u{113e1}', '\u{113e2}'),
- ('𑛐', '𑛣'),
- ('𑯀', '𑯡'),
- ('𑯰', '𑯹'),
- ('\u{11f5a}', '\u{11f5a}'),
- ('𓑠', '𔏺'),
- ('𖄀', '𖄹'),
- ('𖵀', '𖵹'),
- ('𘳿', '𘳿'),
- ('𜰀', '𜳹'),
- ('𜴀', '𜺳'),
- ('𞗐', '𞗺'),
- ('𞗿', '𞗿'),
- ('🢲', '🢻'),
- ('🣀', '🣁'),
- ('🪉', '🪉'),
- ('🪏', '🪏'),
- ('🪾', '🪾'),
- ('🫆', '🫆'),
- ('🫜', '🫜'),
- ('🫟', '🫟'),
- ('🫩', '🫩'),
- ('🯋', '🯯'),
-];
-
-pub const V1_1: &'static [(char, char)] = &[
- ('\0', 'ǵ'),
- ('Ǻ', 'ȗ'),
- ('ɐ', 'ʨ'),
- ('ʰ', '˞'),
- ('ˠ', '˩'),
- ('\u{300}', '\u{345}'),
- ('\u{360}', '\u{361}'),
- ('ʹ', '͵'),
- ('ͺ', 'ͺ'),
- (';', ';'),
- ('΄', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ώ'),
- ('ϐ', 'ϖ'),
- ('Ϛ', 'Ϛ'),
- ('Ϝ', 'Ϝ'),
- ('Ϟ', 'Ϟ'),
- ('Ϡ', 'Ϡ'),
- ('Ϣ', 'ϳ'),
- ('Ё', 'Ќ'),
- ('Ў', 'я'),
- ('ё', 'ќ'),
- ('ў', '\u{486}'),
- ('Ґ', 'ӄ'),
- ('Ӈ', 'ӈ'),
- ('Ӌ', 'ӌ'),
- ('Ӑ', 'ӫ'),
- ('Ӯ', 'ӵ'),
- ('Ӹ', 'ӹ'),
- ('Ա', 'Ֆ'),
- ('ՙ', '՟'),
- ('ա', 'և'),
- ('։', '։'),
- ('\u{5b0}', '\u{5b9}'),
- ('\u{5bb}', '׃'),
- ('א', 'ת'),
- ('װ', '״'),
- ('،', '،'),
- ('؛', '؛'),
- ('؟', '؟'),
- ('ء', 'غ'),
- ('ـ', '\u{652}'),
- ('٠', '٭'),
- ('\u{670}', 'ڷ'),
- ('ں', 'ھ'),
- ('ۀ', 'ێ'),
- ('ې', '\u{6ed}'),
- ('۰', '۹'),
- ('\u{901}', 'ः'),
- ('अ', 'ह'),
- ('\u{93c}', '\u{94d}'),
- ('ॐ', '\u{954}'),
- ('क़', '॰'),
- ('\u{981}', 'ঃ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('\u{9bc}', '\u{9bc}'),
- ('\u{9be}', '\u{9c4}'),
- ('ে', 'ৈ'),
- ('ো', '\u{9cd}'),
- ('\u{9d7}', '\u{9d7}'),
- ('ড়', 'ঢ়'),
- ('য়', '\u{9e3}'),
- ('০', '৺'),
- ('\u{a02}', '\u{a02}'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('\u{a3c}', '\u{a3c}'),
- ('ਾ', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('੦', 'ੴ'),
- ('\u{a81}', 'ઃ'),
- ('અ', 'ઋ'),
- ('ઍ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('\u{abc}', '\u{ac5}'),
- ('\u{ac7}', 'ૉ'),
- ('ો', '\u{acd}'),
- ('ૐ', 'ૐ'),
- ('ૠ', 'ૠ'),
- ('૦', '૯'),
- ('\u{b01}', 'ଃ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଶ', 'ହ'),
- ('\u{b3c}', '\u{b43}'),
- ('େ', 'ୈ'),
- ('ୋ', '\u{b4d}'),
- ('\u{b56}', '\u{b57}'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', 'ୡ'),
- ('୦', '୰'),
- ('\u{b82}', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'வ'),
- ('ஷ', 'ஹ'),
- ('\u{bbe}', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', '\u{bcd}'),
- ('\u{bd7}', '\u{bd7}'),
- ('௧', '௲'),
- ('ఁ', 'ః'),
- ('అ', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'ళ'),
- ('వ', 'హ'),
- ('\u{c3e}', 'ౄ'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('ౠ', 'ౡ'),
- ('౦', '౯'),
- ('ಂ', 'ಃ'),
- ('ಅ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('ಾ', 'ೄ'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccd}'),
- ('\u{cd5}', '\u{cd6}'),
- ('ೞ', 'ೞ'),
- ('ೠ', 'ೡ'),
- ('೦', '೯'),
- ('ം', 'ഃ'),
- ('അ', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', 'ന'),
- ('പ', 'ഹ'),
- ('\u{d3e}', '\u{d43}'),
- ('െ', 'ൈ'),
- ('ൊ', '\u{d4d}'),
- ('\u{d57}', '\u{d57}'),
- ('ൠ', 'ൡ'),
- ('൦', '൯'),
- ('ก', '\u{e3a}'),
- ('฿', '๛'),
- ('ກ', 'ຂ'),
- ('ຄ', 'ຄ'),
- ('ງ', 'ຈ'),
- ('ຊ', 'ຊ'),
- ('ຍ', 'ຍ'),
- ('ດ', 'ທ'),
- ('ນ', 'ຟ'),
- ('ມ', 'ຣ'),
- ('ລ', 'ລ'),
- ('ວ', 'ວ'),
- ('ສ', 'ຫ'),
- ('ອ', '\u{eb9}'),
- ('\u{ebb}', 'ຽ'),
- ('ເ', 'ໄ'),
- ('ໆ', 'ໆ'),
- ('\u{ec8}', '\u{ecd}'),
- ('໐', '໙'),
- ('ໜ', 'ໝ'),
- ('Ⴀ', 'Ⴥ'),
- ('ა', 'ჶ'),
- ('჻', '჻'),
- ('ᄀ', 'ᅙ'),
- ('ᅟ', 'ᆢ'),
- ('ᆨ', 'ᇹ'),
- ('Ḁ', 'ẚ'),
- ('Ạ', 'ỹ'),
- ('ἀ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ῄ'),
- ('ῆ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('῝', '`'),
- ('ῲ', 'ῴ'),
- ('ῶ', '῾'),
- ('\u{2000}', '\u{202e}'),
- ('‰', '⁆'),
- ('\u{206a}', '⁰'),
- ('⁴', '₎'),
- ('₠', '₪'),
- ('\u{20d0}', '\u{20e1}'),
- ('℀', 'ℸ'),
- ('⅓', 'ↂ'),
- ('←', '⇪'),
- ('∀', '⋱'),
- ('⌀', '⌀'),
- ('⌂', '⍺'),
- ('␀', '␤'),
- ('⑀', '⑊'),
- ('①', '⓪'),
- ('─', '▕'),
- ('■', '◯'),
- ('☀', '☓'),
- ('☚', '♯'),
- ('✁', '✄'),
- ('✆', '✉'),
- ('✌', '✧'),
- ('✩', '❋'),
- ('❍', '❍'),
- ('❏', '❒'),
- ('❖', '❖'),
- ('❘', '❞'),
- ('❡', '❧'),
- ('❶', '➔'),
- ('➘', '➯'),
- ('➱', '➾'),
- ('\u{3000}', '〷'),
- ('〿', '〿'),
- ('ぁ', 'ゔ'),
- ('\u{3099}', 'ゞ'),
- ('ァ', 'ヾ'),
- ('ㄅ', 'ㄬ'),
- ('ㄱ', 'ㆎ'),
- ('㆐', '㆟'),
- ('㈀', '㈜'),
- ('㈠', '㉃'),
- ('㉠', '㉻'),
- ('㉿', '㊰'),
- ('㋀', '㋋'),
- ('㋐', '㋾'),
- ('㌀', '㍶'),
- ('㍻', '㏝'),
- ('㏠', '㏾'),
- ('一', '龥'),
- ('\u{e000}', '鶴'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('\u{fb1e}', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', 'ﮱ'),
- ('ﯓ', '﴿'),
- ('ﵐ', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('ﷰ', 'ﷻ'),
- ('\u{fe20}', '\u{fe23}'),
- ('︰', '﹄'),
- ('﹉', '﹒'),
- ('﹔', '﹦'),
- ('﹨', '﹫'),
- ('ﹰ', 'ﹲ'),
- ('ﹴ', 'ﹴ'),
- ('ﹶ', 'ﻼ'),
- ('\u{feff}', '\u{feff}'),
- ('!', '~'),
- ('。', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
- ('¢', '₩'),
- ('│', '○'),
- ('�', '\u{ffff}'),
-];
-
-pub const V2_0: &'static [(char, char)] = &[
- ('\u{591}', '\u{5a1}'),
- ('\u{5a3}', '\u{5af}'),
- ('\u{5c4}', '\u{5c4}'),
- ('ༀ', 'ཇ'),
- ('ཉ', 'ཀྵ'),
- ('\u{f71}', 'ྋ'),
- ('\u{f90}', '\u{f95}'),
- ('\u{f97}', '\u{f97}'),
- ('\u{f99}', '\u{fad}'),
- ('\u{fb1}', '\u{fb7}'),
- ('\u{fb9}', '\u{fb9}'),
- ('ẛ', 'ẛ'),
- ('₫', '₫'),
- ('가', '힣'),
- ('\u{1fffe}', '\u{1ffff}'),
- ('\u{2fffe}', '\u{2ffff}'),
- ('\u{3fffe}', '\u{3ffff}'),
- ('\u{4fffe}', '\u{4ffff}'),
- ('\u{5fffe}', '\u{5ffff}'),
- ('\u{6fffe}', '\u{6ffff}'),
- ('\u{7fffe}', '\u{7ffff}'),
- ('\u{8fffe}', '\u{8ffff}'),
- ('\u{9fffe}', '\u{9ffff}'),
- ('\u{afffe}', '\u{affff}'),
- ('\u{bfffe}', '\u{bffff}'),
- ('\u{cfffe}', '\u{cffff}'),
- ('\u{dfffe}', '\u{dffff}'),
- ('\u{efffe}', '\u{10ffff}'),
-];
-
-pub const V2_1: &'static [(char, char)] = &[('€', '€'), ('', '')];
-
-pub const V3_0: &'static [(char, char)] = &[
- ('Ƕ', 'ǹ'),
- ('Ș', 'ȟ'),
- ('Ȣ', 'ȳ'),
- ('ʩ', 'ʭ'),
- ('˟', '˟'),
- ('˪', 'ˮ'),
- ('\u{346}', '\u{34e}'),
- ('\u{362}', '\u{362}'),
- ('ϗ', 'ϗ'),
- ('ϛ', 'ϛ'),
- ('ϝ', 'ϝ'),
- ('ϟ', 'ϟ'),
- ('ϡ', 'ϡ'),
- ('Ѐ', 'Ѐ'),
- ('Ѝ', 'Ѝ'),
- ('ѐ', 'ѐ'),
- ('ѝ', 'ѝ'),
- ('\u{488}', '\u{489}'),
- ('Ҍ', 'ҏ'),
- ('Ӭ', 'ӭ'),
- ('֊', '֊'),
- ('\u{653}', '\u{655}'),
- ('ڸ', 'ڹ'),
- ('ڿ', 'ڿ'),
- ('ۏ', 'ۏ'),
- ('ۺ', '۾'),
- ('܀', '܍'),
- ('\u{70f}', 'ܬ'),
- ('\u{730}', '\u{74a}'),
- ('ހ', '\u{7b0}'),
- ('ං', 'ඃ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dcf}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('ෘ', '\u{ddf}'),
- ('ෲ', '෴'),
- ('ཪ', 'ཪ'),
- ('\u{f96}', '\u{f96}'),
- ('\u{fae}', '\u{fb0}'),
- ('\u{fb8}', '\u{fb8}'),
- ('\u{fba}', '\u{fbc}'),
- ('྾', '࿌'),
- ('࿏', '࿏'),
- ('က', 'အ'),
- ('ဣ', 'ဧ'),
- ('ဩ', 'ဪ'),
- ('ာ', '\u{1032}'),
- ('\u{1036}', '\u{1039}'),
- ('၀', '\u{1059}'),
- ('ሀ', 'ሆ'),
- ('ለ', 'ቆ'),
- ('ቈ', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኆ'),
- ('ኈ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኮ'),
- ('ኰ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዎ'),
- ('ዐ', 'ዖ'),
- ('ዘ', 'ዮ'),
- ('ደ', 'ጎ'),
- ('ጐ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ጞ'),
- ('ጠ', 'ፆ'),
- ('ፈ', 'ፚ'),
- ('፡', '፼'),
- ('Ꭰ', 'Ᏼ'),
- ('ᐁ', 'ᙶ'),
- ('\u{1680}', '᚜'),
- ('ᚠ', 'ᛰ'),
- ('ក', 'ៜ'),
- ('០', '៩'),
- ('᠀', '\u{180e}'),
- ('᠐', '᠙'),
- ('ᠠ', 'ᡷ'),
- ('ᢀ', '\u{18a9}'),
- ('\u{202f}', '\u{202f}'),
- ('⁈', '⁍'),
- ('₭', '₯'),
- ('\u{20e2}', '\u{20e3}'),
- ('ℹ', '℺'),
- ('Ↄ', 'Ↄ'),
- ('⇫', '⇳'),
- ('⌁', '⌁'),
- ('⍻', '⍻'),
- ('⍽', '⎚'),
- ('␥', '␦'),
- ('◰', '◷'),
- ('☙', '☙'),
- ('♰', '♱'),
- ('⠀', '⣿'),
- ('⺀', '⺙'),
- ('⺛', '⻳'),
- ('⼀', '⿕'),
- ('⿰', '⿻'),
- ('〸', '〺'),
- ('〾', '〾'),
- ('ㆠ', 'ㆷ'),
- ('㐀', '䶵'),
- ('ꀀ', 'ꒌ'),
- ('꒐', '꒡'),
- ('꒤', '꒳'),
- ('꒵', '꓀'),
- ('꓂', '꓄'),
- ('꓆', '꓆'),
- ('יִ', 'יִ'),
- ('\u{fff9}', '\u{fffb}'),
-];
-
-pub const V3_1: &'static [(char, char)] = &[
- ('ϴ', 'ϵ'),
- ('\u{fdd0}', '\u{fdef}'),
- ('𐌀', '𐌞'),
- ('𐌠', '𐌣'),
- ('𐌰', '𐍊'),
- ('𐐀', '𐐥'),
- ('𐐨', '𐑍'),
- ('𝀀', '𝃵'),
- ('𝄀', '𝄦'),
- ('𝄪', '𝇝'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓀'),
- ('𝓂', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚣'),
- ('𝚨', '𝟉'),
- ('𝟎', '𝟿'),
- ('𠀀', '𪛖'),
- ('丽', '𪘀'),
- ('\u{e0001}', '\u{e0001}'),
- ('\u{e0020}', '\u{e007f}'),
-];
-
-pub const V3_2: &'static [(char, char)] = &[
- ('Ƞ', 'Ƞ'),
- ('\u{34f}', '\u{34f}'),
- ('\u{363}', '\u{36f}'),
- ('Ϙ', 'ϙ'),
- ('϶', '϶'),
- ('Ҋ', 'ҋ'),
- ('Ӆ', 'ӆ'),
- ('Ӊ', 'ӊ'),
- ('Ӎ', 'ӎ'),
- ('Ԁ', 'ԏ'),
- ('ٮ', 'ٯ'),
- ('ޱ', 'ޱ'),
- ('ჷ', 'ჸ'),
- ('ᜀ', 'ᜌ'),
- ('ᜎ', '\u{1714}'),
- ('ᜠ', '᜶'),
- ('ᝀ', '\u{1753}'),
- ('ᝠ', 'ᝬ'),
- ('ᝮ', 'ᝰ'),
- ('\u{1772}', '\u{1773}'),
- ('⁇', '⁇'),
- ('⁎', '⁒'),
- ('⁗', '⁗'),
- ('\u{205f}', '\u{2063}'),
- ('ⁱ', 'ⁱ'),
- ('₰', '₱'),
- ('\u{20e4}', '\u{20ea}'),
- ('ℽ', '⅋'),
- ('⇴', '⇿'),
- ('⋲', '⋿'),
- ('⍼', '⍼'),
- ('⎛', '⏎'),
- ('⓫', '⓾'),
- ('▖', '▟'),
- ('◸', '◿'),
- ('☖', '☗'),
- ('♲', '♽'),
- ('⚀', '⚉'),
- ('❨', '❵'),
- ('⟐', '⟫'),
- ('⟰', '⟿'),
- ('⤀', '⫿'),
- ('〻', '〽'),
- ('ゕ', 'ゖ'),
- ('ゟ', '゠'),
- ('ヿ', 'ヿ'),
- ('ㇰ', 'ㇿ'),
- ('㉑', '㉟'),
- ('㊱', '㊿'),
- ('꒢', '꒣'),
- ('꒴', '꒴'),
- ('꓁', '꓁'),
- ('꓅', '꓅'),
- ('侮', '頻'),
- ('﷼', '﷼'),
- ('\u{fe00}', '\u{fe0f}'),
- ('﹅', '﹆'),
- ('ﹳ', 'ﹳ'),
- ('⦅', '⦆'),
-];
-
-pub const V4_0: &'static [(char, char)] = &[
- ('ȡ', 'ȡ'),
- ('ȴ', 'ȶ'),
- ('ʮ', 'ʯ'),
- ('˯', '˿'),
- ('\u{350}', '\u{357}'),
- ('\u{35d}', '\u{35f}'),
- ('Ϸ', 'ϻ'),
- ('\u{600}', '\u{603}'),
- ('؍', '\u{615}'),
- ('\u{656}', '\u{658}'),
- ('ۮ', 'ۯ'),
- ('ۿ', 'ۿ'),
- ('ܭ', 'ܯ'),
- ('ݍ', 'ݏ'),
- ('ऄ', 'ऄ'),
- ('ঽ', 'ঽ'),
- ('\u{a01}', '\u{a01}'),
- ('ਃ', 'ਃ'),
- ('ઌ', 'ઌ'),
- ('ૡ', '\u{ae3}'),
- ('૱', '૱'),
- ('ଵ', 'ଵ'),
- ('ୱ', 'ୱ'),
- ('௳', '௺'),
- ('\u{cbc}', 'ಽ'),
- ('\u{17dd}', '\u{17dd}'),
- ('៰', '៹'),
- ('ᤀ', 'ᤜ'),
- ('\u{1920}', 'ᤫ'),
- ('ᤰ', '\u{193b}'),
- ('᥀', '᥀'),
- ('᥄', 'ᥭ'),
- ('ᥰ', 'ᥴ'),
- ('᧠', '᧿'),
- ('ᴀ', 'ᵫ'),
- ('⁓', '⁔'),
- ('℻', '℻'),
- ('⏏', '⏐'),
- ('⓿', '⓿'),
- ('☔', '☕'),
- ('⚊', '⚑'),
- ('⚠', '⚡'),
- ('⬀', '⬍'),
- ('㈝', '㈞'),
- ('㉐', '㉐'),
- ('㉼', '㉽'),
- ('㋌', '㋏'),
- ('㍷', '㍺'),
- ('㏞', '㏟'),
- ('㏿', '㏿'),
- ('䷀', '䷿'),
- ('﷽', '﷽'),
- ('﹇', '﹈'),
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
- ('𐄀', '𐄂'),
- ('𐄇', '𐄳'),
- ('𐄷', '𐄿'),
- ('𐎀', '𐎝'),
- ('𐎟', '𐎟'),
- ('𐐦', '𐐧'),
- ('𐑎', '𐒝'),
- ('𐒠', '𐒩'),
- ('𐠀', '𐠅'),
- ('𐠈', '𐠈'),
- ('𐠊', '𐠵'),
- ('𐠷', '𐠸'),
- ('𐠼', '𐠼'),
- ('𐠿', '𐠿'),
- ('𝌀', '𝍖'),
- ('𝓁', '𝓁'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const V4_1: &'static [(char, char)] = &[
- ('ȷ', 'Ɂ'),
- ('\u{358}', '\u{35c}'),
- ('ϼ', 'Ͽ'),
- ('Ӷ', 'ӷ'),
- ('\u{5a2}', '\u{5a2}'),
- ('\u{5c5}', '\u{5c7}'),
- ('؋', '؋'),
- ('؞', '؞'),
- ('\u{659}', '\u{65e}'),
- ('ݐ', 'ݭ'),
- ('ॽ', 'ॽ'),
- ('ৎ', 'ৎ'),
- ('ஶ', 'ஶ'),
- ('௦', '௦'),
- ('࿐', '࿑'),
- ('ჹ', 'ჺ'),
- ('ჼ', 'ჼ'),
- ('ሇ', 'ሇ'),
- ('ቇ', 'ቇ'),
- ('ኇ', 'ኇ'),
- ('ኯ', 'ኯ'),
- ('ዏ', 'ዏ'),
- ('ዯ', 'ዯ'),
- ('ጏ', 'ጏ'),
- ('ጟ', 'ጟ'),
- ('ፇ', 'ፇ'),
- ('\u{135f}', '፠'),
- ('ᎀ', '᎙'),
- ('ᦀ', 'ᦩ'),
- ('ᦰ', 'ᧉ'),
- ('᧐', '᧙'),
- ('᧞', '᧟'),
- ('ᨀ', '\u{1a1b}'),
- ('᨞', '᨟'),
- ('ᵬ', '\u{1dc3}'),
- ('⁕', '⁖'),
- ('⁘', '⁞'),
- ('ₐ', 'ₔ'),
- ('₲', '₵'),
- ('\u{20eb}', '\u{20eb}'),
- ('ℼ', 'ℼ'),
- ('⅌', '⅌'),
- ('⏑', '⏛'),
- ('☘', '☘'),
- ('♾', '♿'),
- ('⚒', '⚜'),
- ('⚢', '⚱'),
- ('⟀', '⟆'),
- ('⬎', '⬓'),
- ('Ⰰ', 'Ⱞ'),
- ('ⰰ', 'ⱞ'),
- ('Ⲁ', '⳪'),
- ('⳹', 'ⴥ'),
- ('ⴰ', 'ⵥ'),
- ('ⵯ', 'ⵯ'),
- ('ⶀ', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('⸀', '⸗'),
- ('⸜', '⸝'),
- ('㇀', '㇏'),
- ('㉾', '㉾'),
- ('龦', '龻'),
- ('꜀', '꜖'),
- ('ꠀ', '꠫'),
- ('並', '龎'),
- ('︐', '︙'),
- ('𐅀', '𐆊'),
- ('𐎠', '𐏃'),
- ('𐏈', '𐏕'),
- ('𐨀', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨳'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '𐩇'),
- ('𐩐', '𐩘'),
- ('𝈀', '𝉅'),
- ('𝚤', '𝚥'),
-];
-
-pub const V5_0: &'static [(char, char)] = &[
- ('ɂ', 'ɏ'),
- ('ͻ', 'ͽ'),
- ('ӏ', 'ӏ'),
- ('Ӻ', 'ӿ'),
- ('Ԑ', 'ԓ'),
- ('\u{5ba}', '\u{5ba}'),
- ('߀', 'ߺ'),
- ('ॻ', 'ॼ'),
- ('ॾ', 'ॿ'),
- ('\u{ce2}', '\u{ce3}'),
- ('ೱ', 'ೲ'),
- ('\u{1b00}', 'ᭋ'),
- ('᭐', '᭼'),
- ('\u{1dc4}', '\u{1dca}'),
- ('\u{1dfe}', '\u{1dff}'),
- ('\u{20ec}', '\u{20ef}'),
- ('⅍', 'ⅎ'),
- ('ↄ', 'ↄ'),
- ('⏜', '⏧'),
- ('⚲', '⚲'),
- ('⟇', '⟊'),
- ('⬔', '⬚'),
- ('⬠', '⬣'),
- ('Ⱡ', 'ⱬ'),
- ('ⱴ', 'ⱷ'),
- ('ꜗ', 'ꜚ'),
- ('꜠', '꜡'),
- ('ꡀ', '꡷'),
- ('𐤀', '𐤙'),
- ('𐤟', '𐤟'),
- ('𒀀', '𒍮'),
- ('𒐀', '𒑢'),
- ('𒑰', '𒑳'),
- ('𝍠', '𝍱'),
- ('𝟊', '𝟋'),
-];
-
-pub const V5_1: &'static [(char, char)] = &[
- ('Ͱ', 'ͳ'),
- ('Ͷ', 'ͷ'),
- ('Ϗ', 'Ϗ'),
- ('\u{487}', '\u{487}'),
- ('Ԕ', 'ԣ'),
- ('؆', '؊'),
- ('\u{616}', '\u{61a}'),
- ('ػ', 'ؿ'),
- ('ݮ', 'ݿ'),
- ('ॱ', 'ॲ'),
- ('\u{a51}', '\u{a51}'),
- ('\u{a75}', '\u{a75}'),
- ('\u{b44}', '\u{b44}'),
- ('\u{b62}', '\u{b63}'),
- ('ௐ', 'ௐ'),
- ('ఽ', 'ఽ'),
- ('ౘ', 'ౙ'),
- ('\u{c62}', '\u{c63}'),
- ('౸', '౿'),
- ('ഽ', 'ഽ'),
- ('\u{d44}', '\u{d44}'),
- ('\u{d62}', '\u{d63}'),
- ('൰', '൵'),
- ('൹', 'ൿ'),
- ('ཫ', 'ཬ'),
- ('࿎', '࿎'),
- ('࿒', '࿔'),
- ('ဢ', 'ဢ'),
- ('ဨ', 'ဨ'),
- ('ါ', 'ါ'),
- ('\u{1033}', '\u{1035}'),
- ('\u{103a}', 'ဿ'),
- ('ၚ', '႙'),
- ('႞', '႟'),
- ('ᢪ', 'ᢪ'),
- ('\u{1b80}', '\u{1baa}'),
- ('ᮮ', '᮹'),
- ('ᰀ', '\u{1c37}'),
- ('᰻', '᱉'),
- ('ᱍ', '᱿'),
- ('\u{1dcb}', '\u{1de6}'),
- ('ẜ', 'ẟ'),
- ('Ỻ', 'ỿ'),
- ('\u{2064}', '\u{2064}'),
- ('\u{20f0}', '\u{20f0}'),
- ('⅏', '⅏'),
- ('ↅ', 'ↈ'),
- ('⚝', '⚝'),
- ('⚳', '⚼'),
- ('⛀', '⛃'),
- ('⟌', '⟌'),
- ('⟬', '⟯'),
- ('⬛', '⬟'),
- ('⬤', '⭌'),
- ('⭐', '⭔'),
- ('Ɑ', 'Ɐ'),
- ('ⱱ', 'ⱳ'),
- ('ⱸ', 'ⱽ'),
- ('\u{2de0}', '\u{2dff}'),
- ('⸘', '⸛'),
- ('⸞', '⸰'),
- ('ㄭ', 'ㄭ'),
- ('㇐', '㇣'),
- ('龼', '鿃'),
- ('ꔀ', 'ꘫ'),
- ('Ꙁ', 'ꙟ'),
- ('Ꙣ', '꙳'),
- ('\u{a67c}', 'ꚗ'),
- ('ꜛ', 'ꜟ'),
- ('Ꜣ', 'ꞌ'),
- ('ꟻ', 'ꟿ'),
- ('ꢀ', '\u{a8c4}'),
- ('꣎', '꣙'),
- ('꤀', '\u{a953}'),
- ('꥟', '꥟'),
- ('ꨀ', '\u{aa36}'),
- ('ꩀ', 'ꩍ'),
- ('꩐', '꩙'),
- ('꩜', '꩟'),
- ('\u{fe24}', '\u{fe26}'),
- ('𐆐', '𐆛'),
- ('𐇐', '\u{101fd}'),
- ('𐊀', '𐊜'),
- ('𐊠', '𐋐'),
- ('𐤠', '𐤹'),
- ('𐤿', '𐤿'),
- ('𝄩', '𝄩'),
- ('🀀', '🀫'),
- ('🀰', '🂓'),
-];
-
-pub const V5_2: &'static [(char, char)] = &[
- ('Ԥ', 'ԥ'),
- ('ࠀ', '\u{82d}'),
- ('࠰', '࠾'),
- ('\u{900}', '\u{900}'),
- ('ॎ', 'ॎ'),
- ('\u{955}', '\u{955}'),
- ('ॹ', 'ॺ'),
- ('৻', '৻'),
- ('࿕', '࿘'),
- ('ႚ', '\u{109d}'),
- ('ᅚ', 'ᅞ'),
- ('ᆣ', 'ᆧ'),
- ('ᇺ', 'ᇿ'),
- ('᐀', '᐀'),
- ('ᙷ', 'ᙿ'),
- ('ᢰ', 'ᣵ'),
- ('ᦪ', 'ᦫ'),
- ('᧚', '᧚'),
- ('ᨠ', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a7c}'),
- ('\u{1a7f}', '᪉'),
- ('᪐', '᪙'),
- ('᪠', '᪭'),
- ('\u{1cd0}', 'ᳲ'),
- ('\u{1dfd}', '\u{1dfd}'),
- ('₶', '₸'),
- ('⅐', '⅒'),
- ('↉', '↉'),
- ('⏨', '⏨'),
- ('⚞', '⚟'),
- ('⚽', '⚿'),
- ('⛄', '⛍'),
- ('⛏', '⛡'),
- ('⛣', '⛣'),
- ('⛨', '⛿'),
- ('❗', '❗'),
- ('⭕', '⭙'),
- ('Ɒ', 'Ɒ'),
- ('Ȿ', 'Ɀ'),
- ('Ⳬ', '\u{2cf1}'),
- ('⸱', '⸱'),
- ('㉄', '㉏'),
- ('鿄', '鿋'),
- ('ꓐ', '꓿'),
- ('ꚠ', '꛷'),
- ('꠰', '꠹'),
- ('\u{a8e0}', 'ꣻ'),
- ('ꥠ', 'ꥼ'),
- ('\u{a980}', '꧍'),
- ('ꧏ', '꧙'),
- ('꧞', '꧟'),
- ('ꩠ', 'ꩻ'),
- ('ꪀ', 'ꫂ'),
- ('ꫛ', '꫟'),
- ('ꯀ', '\u{abed}'),
- ('꯰', '꯹'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('恵', '舘'),
- ('𐡀', '𐡕'),
- ('𐡗', '𐡟'),
- ('𐤚', '𐤛'),
- ('𐩠', '𐩿'),
- ('𐬀', '𐬵'),
- ('𐬹', '𐭕'),
- ('𐭘', '𐭲'),
- ('𐭸', '𐭿'),
- ('𐰀', '𐱈'),
- ('𐹠', '𐹾'),
- ('\u{11080}', '𑃁'),
- ('𓀀', '𓐮'),
- ('🄀', '🄊'),
- ('🄐', '🄮'),
- ('🄱', '🄱'),
- ('🄽', '🄽'),
- ('🄿', '🄿'),
- ('🅂', '🅂'),
- ('🅆', '🅆'),
- ('🅊', '🅎'),
- ('🅗', '🅗'),
- ('🅟', '🅟'),
- ('🅹', '🅹'),
- ('🅻', '🅼'),
- ('🅿', '🅿'),
- ('🆊', '🆍'),
- ('🆐', '🆐'),
- ('🈀', '🈀'),
- ('🈐', '🈱'),
- ('🉀', '🉈'),
- ('𪜀', '𫜴'),
-];
-
-pub const V6_0: &'static [(char, char)] = &[
- ('Ԧ', 'ԧ'),
- ('ؠ', 'ؠ'),
- ('\u{65f}', '\u{65f}'),
- ('ࡀ', '\u{85b}'),
- ('࡞', '࡞'),
- ('\u{93a}', 'ऻ'),
- ('ॏ', 'ॏ'),
- ('\u{956}', '\u{957}'),
- ('ॳ', 'ॷ'),
- ('୲', '୷'),
- ('ഩ', 'ഩ'),
- ('ഺ', 'ഺ'),
- ('ൎ', 'ൎ'),
- ('ྌ', '\u{f8f}'),
- ('࿙', '࿚'),
- ('\u{135d}', '\u{135e}'),
- ('ᯀ', '\u{1bf3}'),
- ('᯼', '᯿'),
- ('\u{1dfc}', '\u{1dfc}'),
- ('ₕ', 'ₜ'),
- ('₹', '₹'),
- ('⏩', '⏳'),
- ('⛎', '⛎'),
- ('⛢', '⛢'),
- ('⛤', '⛧'),
- ('✅', '✅'),
- ('✊', '✋'),
- ('✨', '✨'),
- ('❌', '❌'),
- ('❎', '❎'),
- ('❓', '❕'),
- ('❟', '❠'),
- ('➕', '➗'),
- ('➰', '➰'),
- ('➿', '➿'),
- ('⟎', '⟏'),
- ('⵰', '⵰'),
- ('\u{2d7f}', '\u{2d7f}'),
- ('ㆸ', 'ㆺ'),
- ('Ꙡ', 'ꙡ'),
- ('Ɥ', 'ꞎ'),
- ('Ꞑ', 'ꞑ'),
- ('Ꞡ', 'ꞩ'),
- ('ꟺ', 'ꟺ'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('﮲', '﯁'),
- ('𑀀', '𑁍'),
- ('𑁒', '𑁯'),
- ('𖠀', '𖨸'),
- ('𛀀', '𛀁'),
- ('🂠', '🂮'),
- ('🂱', '🂾'),
- ('🃁', '🃏'),
- ('🃑', '🃟'),
- ('🄰', '🄰'),
- ('🄲', '🄼'),
- ('🄾', '🄾'),
- ('🅀', '🅁'),
- ('🅃', '🅅'),
- ('🅇', '🅉'),
- ('🅏', '🅖'),
- ('🅘', '🅞'),
- ('🅠', '🅩'),
- ('🅰', '🅸'),
- ('🅺', '🅺'),
- ('🅽', '🅾'),
- ('🆀', '🆉'),
- ('🆎', '🆏'),
- ('🆑', '🆚'),
- ('🇦', '🇿'),
- ('🈁', '🈂'),
- ('🈲', '🈺'),
- ('🉐', '🉑'),
- ('🌀', '🌠'),
- ('🌰', '🌵'),
- ('🌷', '🍼'),
- ('🎀', '🎓'),
- ('🎠', '🏄'),
- ('🏆', '🏊'),
- ('🏠', '🏰'),
- ('🐀', '🐾'),
- ('👀', '👀'),
- ('👂', '📷'),
- ('📹', '📼'),
- ('🔀', '🔽'),
- ('🕐', '🕧'),
- ('🗻', '🗿'),
- ('😁', '😐'),
- ('😒', '😔'),
- ('😖', '😖'),
- ('😘', '😘'),
- ('😚', '😚'),
- ('😜', '😞'),
- ('😠', '😥'),
- ('😨', '😫'),
- ('😭', '😭'),
- ('😰', '😳'),
- ('😵', '🙀'),
- ('🙅', '🙏'),
- ('🚀', '🛅'),
- ('🜀', '🝳'),
- ('𫝀', '𫠝'),
-];
-
-pub const V6_1: &'static [(char, char)] = &[
- ('֏', '֏'),
- ('\u{604}', '\u{604}'),
- ('ࢠ', 'ࢠ'),
- ('ࢢ', 'ࢬ'),
- ('\u{8e4}', '\u{8fe}'),
- ('૰', '૰'),
- ('ໞ', 'ໟ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ჽ', 'ჿ'),
- ('\u{1bab}', '\u{1bad}'),
- ('ᮺ', 'ᮿ'),
- ('᳀', '᳇'),
- ('ᳳ', 'ᳶ'),
- ('⟋', '⟋'),
- ('⟍', '⟍'),
- ('Ⳳ', 'ⳳ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ⵦ', 'ⵧ'),
- ('⸲', '⸻'),
- ('鿌', '鿌'),
- ('\u{a674}', '\u{a67b}'),
- ('\u{a69f}', '\u{a69f}'),
- ('Ꞓ', 'ꞓ'),
- ('Ɦ', 'Ɦ'),
- ('ꟸ', 'ꟹ'),
- ('ꫠ', '\u{aaf6}'),
- ('郞', '隷'),
- ('𐦀', '𐦷'),
- ('𐦾', '𐦿'),
- ('𑃐', '𑃨'),
- ('𑃰', '𑃹'),
- ('\u{11100}', '\u{11134}'),
- ('𑄶', '𑅃'),
- ('\u{11180}', '𑇈'),
- ('𑇐', '𑇙'),
- ('𑚀', '\u{116b7}'),
- ('𑛀', '𑛉'),
- ('𖼀', '𖽄'),
- ('𖽐', '𖽾'),
- ('\u{16f8f}', '𖾟'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('𞻰', '𞻱'),
- ('🅪', '🅫'),
- ('🕀', '🕃'),
- ('😀', '😀'),
- ('😑', '😑'),
- ('😕', '😕'),
- ('😗', '😗'),
- ('😙', '😙'),
- ('😛', '😛'),
- ('😟', '😟'),
- ('😦', '😧'),
- ('😬', '😬'),
- ('😮', '😯'),
- ('😴', '😴'),
-];
-
-pub const V6_2: &'static [(char, char)] = &[('₺', '₺')];
-
-pub const V6_3: &'static [(char, char)] =
- &[('\u{61c}', '\u{61c}'), ('\u{2066}', '\u{2069}')];
-
-pub const V7_0: &'static [(char, char)] = &[
- ('Ϳ', 'Ϳ'),
- ('Ԩ', 'ԯ'),
- ('֍', '֎'),
- ('\u{605}', '\u{605}'),
- ('ࢡ', 'ࢡ'),
- ('ࢭ', 'ࢲ'),
- ('\u{8ff}', '\u{8ff}'),
- ('ॸ', 'ॸ'),
- ('ঀ', 'ঀ'),
- ('\u{c00}', '\u{c00}'),
- ('ఴ', 'ఴ'),
- ('\u{c81}', '\u{c81}'),
- ('\u{d01}', '\u{d01}'),
- ('෦', '෯'),
- ('ᛱ', 'ᛸ'),
- ('ᤝ', 'ᤞ'),
- ('\u{1ab0}', '\u{1abe}'),
- ('\u{1cf8}', '\u{1cf9}'),
- ('\u{1de7}', '\u{1df5}'),
- ('₻', '₽'),
- ('⏴', '⏺'),
- ('✀', '✀'),
- ('⭍', '⭏'),
- ('⭚', '⭳'),
- ('⭶', '⮕'),
- ('⮘', '⮹'),
- ('⮽', '⯈'),
- ('⯊', '⯑'),
- ('⸼', '⹂'),
- ('Ꚙ', 'ꚝ'),
- ('ꞔ', 'ꞟ'),
- ('Ɜ', 'Ɬ'),
- ('Ʞ', 'Ʇ'),
- ('ꟷ', 'ꟷ'),
- ('ꧠ', 'ꧾ'),
- ('\u{aa7c}', 'ꩿ'),
- ('ꬰ', 'ꭟ'),
- ('ꭤ', 'ꭥ'),
- ('\u{fe27}', '\u{fe2d}'),
- ('𐆋', '𐆌'),
- ('𐆠', '𐆠'),
- ('\u{102e0}', '𐋻'),
- ('𐌟', '𐌟'),
- ('𐍐', '\u{1037a}'),
- ('𐔀', '𐔧'),
- ('𐔰', '𐕣'),
- ('𐕯', '𐕯'),
- ('𐘀', '𐜶'),
- ('𐝀', '𐝕'),
- ('𐝠', '𐝧'),
- ('𐡠', '𐢞'),
- ('𐢧', '𐢯'),
- ('𐪀', '𐪟'),
- ('𐫀', '\u{10ae6}'),
- ('𐫫', '𐫶'),
- ('𐮀', '𐮑'),
- ('𐮙', '𐮜'),
- ('𐮩', '𐮯'),
- ('\u{1107f}', '\u{1107f}'),
- ('𑅐', '𑅶'),
- ('𑇍', '𑇍'),
- ('𑇚', '𑇚'),
- ('𑇡', '𑇴'),
- ('𑈀', '𑈑'),
- ('𑈓', '𑈽'),
- ('𑊰', '\u{112ea}'),
- ('𑋰', '𑋹'),
- ('\u{11301}', '𑌃'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('\u{1133c}', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '\u{1134d}'),
- ('\u{11357}', '\u{11357}'),
- ('𑍝', '𑍣'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('𑒀', '𑓇'),
- ('𑓐', '𑓙'),
- ('𑖀', '\u{115b5}'),
- ('𑖸', '𑗉'),
- ('𑘀', '𑙄'),
- ('𑙐', '𑙙'),
- ('𑢠', '𑣲'),
- ('𑣿', '𑣿'),
- ('𑫀', '𑫸'),
- ('𒍯', '𒎘'),
- ('𒑣', '𒑮'),
- ('𒑴', '𒑴'),
- ('𖩀', '𖩞'),
- ('𖩠', '𖩩'),
- ('𖩮', '𖩯'),
- ('𖫐', '𖫭'),
- ('\u{16af0}', '𖫵'),
- ('𖬀', '𖭅'),
- ('𖭐', '𖭙'),
- ('𖭛', '𖭡'),
- ('𖭣', '𖭷'),
- ('𖭽', '𖮏'),
- ('𛰀', '𛱪'),
- ('𛱰', '𛱼'),
- ('𛲀', '𛲈'),
- ('𛲐', '𛲙'),
- ('𛲜', '\u{1bca3}'),
- ('𞠀', '𞣄'),
- ('𞣇', '\u{1e8d6}'),
- ('🂿', '🂿'),
- ('🃠', '🃵'),
- ('🄋', '🄌'),
- ('🌡', '🌬'),
- ('🌶', '🌶'),
- ('🍽', '🍽'),
- ('🎔', '🎟'),
- ('🏅', '🏅'),
- ('🏋', '🏎'),
- ('🏔', '🏟'),
- ('🏱', '🏷'),
- ('🐿', '🐿'),
- ('👁', '👁'),
- ('📸', '📸'),
- ('📽', '📾'),
- ('🔾', '🔿'),
- ('🕄', '🕊'),
- ('🕨', '🕹'),
- ('🕻', '🖣'),
- ('🖥', '🗺'),
- ('🙁', '🙂'),
- ('🙐', '🙿'),
- ('🛆', '🛏'),
- ('🛠', '🛬'),
- ('🛰', '🛳'),
- ('🞀', '🟔'),
- ('🠀', '🠋'),
- ('🠐', '🡇'),
- ('🡐', '🡙'),
- ('🡠', '🢇'),
- ('🢐', '🢭'),
-];
-
-pub const V8_0: &'static [(char, char)] = &[
- ('ࢳ', 'ࢴ'),
- ('\u{8e3}', '\u{8e3}'),
- ('ૹ', 'ૹ'),
- ('ౚ', 'ౚ'),
- ('ൟ', 'ൟ'),
- ('Ᏽ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('₾', '₾'),
- ('↊', '↋'),
- ('⯬', '⯯'),
- ('鿍', '鿕'),
- ('\u{a69e}', '\u{a69e}'),
- ('ꞏ', 'ꞏ'),
- ('Ʝ', 'ꞷ'),
- ('꣼', 'ꣽ'),
- ('ꭠ', 'ꭣ'),
- ('ꭰ', 'ꮿ'),
- ('\u{fe2e}', '\u{fe2f}'),
- ('𐣠', '𐣲'),
- ('𐣴', '𐣵'),
- ('𐣻', '𐣿'),
- ('𐦼', '𐦽'),
- ('𐧀', '𐧏'),
- ('𐧒', '𐧿'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐳺', '𐳿'),
- ('\u{111c9}', '\u{111cc}'),
- ('𑇛', '𑇟'),
- ('𑊀', '𑊆'),
- ('𑊈', '𑊈'),
- ('𑊊', '𑊍'),
- ('𑊏', '𑊝'),
- ('𑊟', '𑊩'),
- ('\u{11300}', '\u{11300}'),
- ('𑍐', '𑍐'),
- ('𑗊', '\u{115dd}'),
- ('𑜀', '𑜙'),
- ('\u{1171d}', '\u{1172b}'),
- ('𑜰', '𑜿'),
- ('𒎙', '𒎙'),
- ('𒒀', '𒕃'),
- ('𔐀', '𔙆'),
- ('𝇞', '𝇨'),
- ('𝠀', '𝪋'),
- ('\u{1da9b}', '\u{1da9f}'),
- ('\u{1daa1}', '\u{1daaf}'),
- ('🌭', '🌯'),
- ('🍾', '🍿'),
- ('🏏', '🏓'),
- ('🏸', '🏿'),
- ('📿', '📿'),
- ('🕋', '🕏'),
- ('🙃', '🙄'),
- ('🛐', '🛐'),
- ('🤐', '🤘'),
- ('🦀', '🦄'),
- ('🧀', '🧀'),
- ('𫠠', '𬺡'),
-];
-
-pub const V9_0: &'static [(char, char)] = &[
- ('ࢶ', 'ࢽ'),
- ('\u{8d4}', '\u{8e2}'),
- ('ಀ', 'ಀ'),
- ('൏', '൏'),
- ('ൔ', 'ൖ'),
- ('൘', '൞'),
- ('൶', '൸'),
- ('ᲀ', 'ᲈ'),
- ('\u{1dfb}', '\u{1dfb}'),
- ('⏻', '⏾'),
- ('⹃', '⹄'),
- ('Ɪ', 'Ɪ'),
- ('\u{a8c5}', '\u{a8c5}'),
- ('𐆍', '𐆎'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
- ('\u{1123e}', '\u{1123e}'),
- ('𑐀', '𑑙'),
- ('𑑛', '𑑛'),
- ('𑑝', '𑑝'),
- ('𑙠', '𑙬'),
- ('𑰀', '𑰈'),
- ('𑰊', '\u{11c36}'),
- ('\u{11c38}', '𑱅'),
- ('𑱐', '𑱬'),
- ('𑱰', '𑲏'),
- ('\u{11c92}', '\u{11ca7}'),
- ('𑲩', '\u{11cb6}'),
- ('𖿠', '𖿠'),
- ('𗀀', '𘟬'),
- ('𘠀', '𘫲'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('𞤀', '\u{1e94a}'),
- ('𞥐', '𞥙'),
- ('𞥞', '𞥟'),
- ('🆛', '🆬'),
- ('🈻', '🈻'),
- ('🕺', '🕺'),
- ('🖤', '🖤'),
- ('🛑', '🛒'),
- ('🛴', '🛶'),
- ('🤙', '🤞'),
- ('🤠', '🤧'),
- ('🤰', '🤰'),
- ('🤳', '🤾'),
- ('🥀', '🥋'),
- ('🥐', '🥞'),
- ('🦅', '🦑'),
-];
diff --git a/vendor/regex-syntax/src/unicode_tables/case_folding_simple.rs b/vendor/regex-syntax/src/unicode_tables/case_folding_simple.rs
deleted file mode 100644
index 07f6ff2f..00000000
--- a/vendor/regex-syntax/src/unicode_tables/case_folding_simple.rs
+++ /dev/null
@@ -1,2948 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate case-folding-simple ucd-16.0.0 --chars --all-pairs
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
- ('A', &['a']),
- ('B', &['b']),
- ('C', &['c']),
- ('D', &['d']),
- ('E', &['e']),
- ('F', &['f']),
- ('G', &['g']),
- ('H', &['h']),
- ('I', &['i']),
- ('J', &['j']),
- ('K', &['k', 'K']),
- ('L', &['l']),
- ('M', &['m']),
- ('N', &['n']),
- ('O', &['o']),
- ('P', &['p']),
- ('Q', &['q']),
- ('R', &['r']),
- ('S', &['s', 'ſ']),
- ('T', &['t']),
- ('U', &['u']),
- ('V', &['v']),
- ('W', &['w']),
- ('X', &['x']),
- ('Y', &['y']),
- ('Z', &['z']),
- ('a', &['A']),
- ('b', &['B']),
- ('c', &['C']),
- ('d', &['D']),
- ('e', &['E']),
- ('f', &['F']),
- ('g', &['G']),
- ('h', &['H']),
- ('i', &['I']),
- ('j', &['J']),
- ('k', &['K', 'K']),
- ('l', &['L']),
- ('m', &['M']),
- ('n', &['N']),
- ('o', &['O']),
- ('p', &['P']),
- ('q', &['Q']),
- ('r', &['R']),
- ('s', &['S', 'ſ']),
- ('t', &['T']),
- ('u', &['U']),
- ('v', &['V']),
- ('w', &['W']),
- ('x', &['X']),
- ('y', &['Y']),
- ('z', &['Z']),
- ('µ', &['Μ', 'μ']),
- ('À', &['à']),
- ('Á', &['á']),
- ('Â', &['â']),
- ('Ã', &['ã']),
- ('Ä', &['ä']),
- ('Å', &['å', 'Å']),
- ('Æ', &['æ']),
- ('Ç', &['ç']),
- ('È', &['è']),
- ('É', &['é']),
- ('Ê', &['ê']),
- ('Ë', &['ë']),
- ('Ì', &['ì']),
- ('Í', &['í']),
- ('Î', &['î']),
- ('Ï', &['ï']),
- ('Ð', &['ð']),
- ('Ñ', &['ñ']),
- ('Ò', &['ò']),
- ('Ó', &['ó']),
- ('Ô', &['ô']),
- ('Õ', &['õ']),
- ('Ö', &['ö']),
- ('Ø', &['ø']),
- ('Ù', &['ù']),
- ('Ú', &['ú']),
- ('Û', &['û']),
- ('Ü', &['ü']),
- ('Ý', &['ý']),
- ('Þ', &['þ']),
- ('ß', &['ẞ']),
- ('à', &['À']),
- ('á', &['Á']),
- ('â', &['Â']),
- ('ã', &['Ã']),
- ('ä', &['Ä']),
- ('å', &['Å', 'Å']),
- ('æ', &['Æ']),
- ('ç', &['Ç']),
- ('è', &['È']),
- ('é', &['É']),
- ('ê', &['Ê']),
- ('ë', &['Ë']),
- ('ì', &['Ì']),
- ('í', &['Í']),
- ('î', &['Î']),
- ('ï', &['Ï']),
- ('ð', &['Ð']),
- ('ñ', &['Ñ']),
- ('ò', &['Ò']),
- ('ó', &['Ó']),
- ('ô', &['Ô']),
- ('õ', &['Õ']),
- ('ö', &['Ö']),
- ('ø', &['Ø']),
- ('ù', &['Ù']),
- ('ú', &['Ú']),
- ('û', &['Û']),
- ('ü', &['Ü']),
- ('ý', &['Ý']),
- ('þ', &['Þ']),
- ('ÿ', &['Ÿ']),
- ('Ā', &['ā']),
- ('ā', &['Ā']),
- ('Ă', &['ă']),
- ('ă', &['Ă']),
- ('Ą', &['ą']),
- ('ą', &['Ą']),
- ('Ć', &['ć']),
- ('ć', &['Ć']),
- ('Ĉ', &['ĉ']),
- ('ĉ', &['Ĉ']),
- ('Ċ', &['ċ']),
- ('ċ', &['Ċ']),
- ('Č', &['č']),
- ('č', &['Č']),
- ('Ď', &['ď']),
- ('ď', &['Ď']),
- ('Đ', &['đ']),
- ('đ', &['Đ']),
- ('Ē', &['ē']),
- ('ē', &['Ē']),
- ('Ĕ', &['ĕ']),
- ('ĕ', &['Ĕ']),
- ('Ė', &['ė']),
- ('ė', &['Ė']),
- ('Ę', &['ę']),
- ('ę', &['Ę']),
- ('Ě', &['ě']),
- ('ě', &['Ě']),
- ('Ĝ', &['ĝ']),
- ('ĝ', &['Ĝ']),
- ('Ğ', &['ğ']),
- ('ğ', &['Ğ']),
- ('Ġ', &['ġ']),
- ('ġ', &['Ġ']),
- ('Ģ', &['ģ']),
- ('ģ', &['Ģ']),
- ('Ĥ', &['ĥ']),
- ('ĥ', &['Ĥ']),
- ('Ħ', &['ħ']),
- ('ħ', &['Ħ']),
- ('Ĩ', &['ĩ']),
- ('ĩ', &['Ĩ']),
- ('Ī', &['ī']),
- ('ī', &['Ī']),
- ('Ĭ', &['ĭ']),
- ('ĭ', &['Ĭ']),
- ('Į', &['į']),
- ('į', &['Į']),
- ('IJ', &['ij']),
- ('ij', &['IJ']),
- ('Ĵ', &['ĵ']),
- ('ĵ', &['Ĵ']),
- ('Ķ', &['ķ']),
- ('ķ', &['Ķ']),
- ('Ĺ', &['ĺ']),
- ('ĺ', &['Ĺ']),
- ('Ļ', &['ļ']),
- ('ļ', &['Ļ']),
- ('Ľ', &['ľ']),
- ('ľ', &['Ľ']),
- ('Ŀ', &['ŀ']),
- ('ŀ', &['Ŀ']),
- ('Ł', &['ł']),
- ('ł', &['Ł']),
- ('Ń', &['ń']),
- ('ń', &['Ń']),
- ('Ņ', &['ņ']),
- ('ņ', &['Ņ']),
- ('Ň', &['ň']),
- ('ň', &['Ň']),
- ('Ŋ', &['ŋ']),
- ('ŋ', &['Ŋ']),
- ('Ō', &['ō']),
- ('ō', &['Ō']),
- ('Ŏ', &['ŏ']),
- ('ŏ', &['Ŏ']),
- ('Ő', &['ő']),
- ('ő', &['Ő']),
- ('Œ', &['œ']),
- ('œ', &['Œ']),
- ('Ŕ', &['ŕ']),
- ('ŕ', &['Ŕ']),
- ('Ŗ', &['ŗ']),
- ('ŗ', &['Ŗ']),
- ('Ř', &['ř']),
- ('ř', &['Ř']),
- ('Ś', &['ś']),
- ('ś', &['Ś']),
- ('Ŝ', &['ŝ']),
- ('ŝ', &['Ŝ']),
- ('Ş', &['ş']),
- ('ş', &['Ş']),
- ('Š', &['š']),
- ('š', &['Š']),
- ('Ţ', &['ţ']),
- ('ţ', &['Ţ']),
- ('Ť', &['ť']),
- ('ť', &['Ť']),
- ('Ŧ', &['ŧ']),
- ('ŧ', &['Ŧ']),
- ('Ũ', &['ũ']),
- ('ũ', &['Ũ']),
- ('Ū', &['ū']),
- ('ū', &['Ū']),
- ('Ŭ', &['ŭ']),
- ('ŭ', &['Ŭ']),
- ('Ů', &['ů']),
- ('ů', &['Ů']),
- ('Ű', &['ű']),
- ('ű', &['Ű']),
- ('Ų', &['ų']),
- ('ų', &['Ų']),
- ('Ŵ', &['ŵ']),
- ('ŵ', &['Ŵ']),
- ('Ŷ', &['ŷ']),
- ('ŷ', &['Ŷ']),
- ('Ÿ', &['ÿ']),
- ('Ź', &['ź']),
- ('ź', &['Ź']),
- ('Ż', &['ż']),
- ('ż', &['Ż']),
- ('Ž', &['ž']),
- ('ž', &['Ž']),
- ('ſ', &['S', 's']),
- ('ƀ', &['Ƀ']),
- ('Ɓ', &['ɓ']),
- ('Ƃ', &['ƃ']),
- ('ƃ', &['Ƃ']),
- ('Ƅ', &['ƅ']),
- ('ƅ', &['Ƅ']),
- ('Ɔ', &['ɔ']),
- ('Ƈ', &['ƈ']),
- ('ƈ', &['Ƈ']),
- ('Ɖ', &['ɖ']),
- ('Ɗ', &['ɗ']),
- ('Ƌ', &['ƌ']),
- ('ƌ', &['Ƌ']),
- ('Ǝ', &['ǝ']),
- ('Ə', &['ə']),
- ('Ɛ', &['ɛ']),
- ('Ƒ', &['ƒ']),
- ('ƒ', &['Ƒ']),
- ('Ɠ', &['ɠ']),
- ('Ɣ', &['ɣ']),
- ('ƕ', &['Ƕ']),
- ('Ɩ', &['ɩ']),
- ('Ɨ', &['ɨ']),
- ('Ƙ', &['ƙ']),
- ('ƙ', &['Ƙ']),
- ('ƚ', &['Ƚ']),
- ('ƛ', &['Ƛ']),
- ('Ɯ', &['ɯ']),
- ('Ɲ', &['ɲ']),
- ('ƞ', &['Ƞ']),
- ('Ɵ', &['ɵ']),
- ('Ơ', &['ơ']),
- ('ơ', &['Ơ']),
- ('Ƣ', &['ƣ']),
- ('ƣ', &['Ƣ']),
- ('Ƥ', &['ƥ']),
- ('ƥ', &['Ƥ']),
- ('Ʀ', &['ʀ']),
- ('Ƨ', &['ƨ']),
- ('ƨ', &['Ƨ']),
- ('Ʃ', &['ʃ']),
- ('Ƭ', &['ƭ']),
- ('ƭ', &['Ƭ']),
- ('Ʈ', &['ʈ']),
- ('Ư', &['ư']),
- ('ư', &['Ư']),
- ('Ʊ', &['ʊ']),
- ('Ʋ', &['ʋ']),
- ('Ƴ', &['ƴ']),
- ('ƴ', &['Ƴ']),
- ('Ƶ', &['ƶ']),
- ('ƶ', &['Ƶ']),
- ('Ʒ', &['ʒ']),
- ('Ƹ', &['ƹ']),
- ('ƹ', &['Ƹ']),
- ('Ƽ', &['ƽ']),
- ('ƽ', &['Ƽ']),
- ('ƿ', &['Ƿ']),
- ('DŽ', &['Dž', 'dž']),
- ('Dž', &['DŽ', 'dž']),
- ('dž', &['DŽ', 'Dž']),
- ('LJ', &['Lj', 'lj']),
- ('Lj', &['LJ', 'lj']),
- ('lj', &['LJ', 'Lj']),
- ('NJ', &['Nj', 'nj']),
- ('Nj', &['NJ', 'nj']),
- ('nj', &['NJ', 'Nj']),
- ('Ǎ', &['ǎ']),
- ('ǎ', &['Ǎ']),
- ('Ǐ', &['ǐ']),
- ('ǐ', &['Ǐ']),
- ('Ǒ', &['ǒ']),
- ('ǒ', &['Ǒ']),
- ('Ǔ', &['ǔ']),
- ('ǔ', &['Ǔ']),
- ('Ǖ', &['ǖ']),
- ('ǖ', &['Ǖ']),
- ('Ǘ', &['ǘ']),
- ('ǘ', &['Ǘ']),
- ('Ǚ', &['ǚ']),
- ('ǚ', &['Ǚ']),
- ('Ǜ', &['ǜ']),
- ('ǜ', &['Ǜ']),
- ('ǝ', &['Ǝ']),
- ('Ǟ', &['ǟ']),
- ('ǟ', &['Ǟ']),
- ('Ǡ', &['ǡ']),
- ('ǡ', &['Ǡ']),
- ('Ǣ', &['ǣ']),
- ('ǣ', &['Ǣ']),
- ('Ǥ', &['ǥ']),
- ('ǥ', &['Ǥ']),
- ('Ǧ', &['ǧ']),
- ('ǧ', &['Ǧ']),
- ('Ǩ', &['ǩ']),
- ('ǩ', &['Ǩ']),
- ('Ǫ', &['ǫ']),
- ('ǫ', &['Ǫ']),
- ('Ǭ', &['ǭ']),
- ('ǭ', &['Ǭ']),
- ('Ǯ', &['ǯ']),
- ('ǯ', &['Ǯ']),
- ('DZ', &['Dz', 'dz']),
- ('Dz', &['DZ', 'dz']),
- ('dz', &['DZ', 'Dz']),
- ('Ǵ', &['ǵ']),
- ('ǵ', &['Ǵ']),
- ('Ƕ', &['ƕ']),
- ('Ƿ', &['ƿ']),
- ('Ǹ', &['ǹ']),
- ('ǹ', &['Ǹ']),
- ('Ǻ', &['ǻ']),
- ('ǻ', &['Ǻ']),
- ('Ǽ', &['ǽ']),
- ('ǽ', &['Ǽ']),
- ('Ǿ', &['ǿ']),
- ('ǿ', &['Ǿ']),
- ('Ȁ', &['ȁ']),
- ('ȁ', &['Ȁ']),
- ('Ȃ', &['ȃ']),
- ('ȃ', &['Ȃ']),
- ('Ȅ', &['ȅ']),
- ('ȅ', &['Ȅ']),
- ('Ȇ', &['ȇ']),
- ('ȇ', &['Ȇ']),
- ('Ȉ', &['ȉ']),
- ('ȉ', &['Ȉ']),
- ('Ȋ', &['ȋ']),
- ('ȋ', &['Ȋ']),
- ('Ȍ', &['ȍ']),
- ('ȍ', &['Ȍ']),
- ('Ȏ', &['ȏ']),
- ('ȏ', &['Ȏ']),
- ('Ȑ', &['ȑ']),
- ('ȑ', &['Ȑ']),
- ('Ȓ', &['ȓ']),
- ('ȓ', &['Ȓ']),
- ('Ȕ', &['ȕ']),
- ('ȕ', &['Ȕ']),
- ('Ȗ', &['ȗ']),
- ('ȗ', &['Ȗ']),
- ('Ș', &['ș']),
- ('ș', &['Ș']),
- ('Ț', &['ț']),
- ('ț', &['Ț']),
- ('Ȝ', &['ȝ']),
- ('ȝ', &['Ȝ']),
- ('Ȟ', &['ȟ']),
- ('ȟ', &['Ȟ']),
- ('Ƞ', &['ƞ']),
- ('Ȣ', &['ȣ']),
- ('ȣ', &['Ȣ']),
- ('Ȥ', &['ȥ']),
- ('ȥ', &['Ȥ']),
- ('Ȧ', &['ȧ']),
- ('ȧ', &['Ȧ']),
- ('Ȩ', &['ȩ']),
- ('ȩ', &['Ȩ']),
- ('Ȫ', &['ȫ']),
- ('ȫ', &['Ȫ']),
- ('Ȭ', &['ȭ']),
- ('ȭ', &['Ȭ']),
- ('Ȯ', &['ȯ']),
- ('ȯ', &['Ȯ']),
- ('Ȱ', &['ȱ']),
- ('ȱ', &['Ȱ']),
- ('Ȳ', &['ȳ']),
- ('ȳ', &['Ȳ']),
- ('Ⱥ', &['ⱥ']),
- ('Ȼ', &['ȼ']),
- ('ȼ', &['Ȼ']),
- ('Ƚ', &['ƚ']),
- ('Ⱦ', &['ⱦ']),
- ('ȿ', &['Ȿ']),
- ('ɀ', &['Ɀ']),
- ('Ɂ', &['ɂ']),
- ('ɂ', &['Ɂ']),
- ('Ƀ', &['ƀ']),
- ('Ʉ', &['ʉ']),
- ('Ʌ', &['ʌ']),
- ('Ɇ', &['ɇ']),
- ('ɇ', &['Ɇ']),
- ('Ɉ', &['ɉ']),
- ('ɉ', &['Ɉ']),
- ('Ɋ', &['ɋ']),
- ('ɋ', &['Ɋ']),
- ('Ɍ', &['ɍ']),
- ('ɍ', &['Ɍ']),
- ('Ɏ', &['ɏ']),
- ('ɏ', &['Ɏ']),
- ('ɐ', &['Ɐ']),
- ('ɑ', &['Ɑ']),
- ('ɒ', &['Ɒ']),
- ('ɓ', &['Ɓ']),
- ('ɔ', &['Ɔ']),
- ('ɖ', &['Ɖ']),
- ('ɗ', &['Ɗ']),
- ('ə', &['Ə']),
- ('ɛ', &['Ɛ']),
- ('ɜ', &['Ɜ']),
- ('ɠ', &['Ɠ']),
- ('ɡ', &['Ɡ']),
- ('ɣ', &['Ɣ']),
- ('ɤ', &['Ɤ']),
- ('ɥ', &['Ɥ']),
- ('ɦ', &['Ɦ']),
- ('ɨ', &['Ɨ']),
- ('ɩ', &['Ɩ']),
- ('ɪ', &['Ɪ']),
- ('ɫ', &['Ɫ']),
- ('ɬ', &['Ɬ']),
- ('ɯ', &['Ɯ']),
- ('ɱ', &['Ɱ']),
- ('ɲ', &['Ɲ']),
- ('ɵ', &['Ɵ']),
- ('ɽ', &['Ɽ']),
- ('ʀ', &['Ʀ']),
- ('ʂ', &['Ʂ']),
- ('ʃ', &['Ʃ']),
- ('ʇ', &['Ʇ']),
- ('ʈ', &['Ʈ']),
- ('ʉ', &['Ʉ']),
- ('ʊ', &['Ʊ']),
- ('ʋ', &['Ʋ']),
- ('ʌ', &['Ʌ']),
- ('ʒ', &['Ʒ']),
- ('ʝ', &['Ʝ']),
- ('ʞ', &['Ʞ']),
- ('\u{345}', &['Ι', 'ι', 'ι']),
- ('Ͱ', &['ͱ']),
- ('ͱ', &['Ͱ']),
- ('Ͳ', &['ͳ']),
- ('ͳ', &['Ͳ']),
- ('Ͷ', &['ͷ']),
- ('ͷ', &['Ͷ']),
- ('ͻ', &['Ͻ']),
- ('ͼ', &['Ͼ']),
- ('ͽ', &['Ͽ']),
- ('Ϳ', &['ϳ']),
- ('Ά', &['ά']),
- ('Έ', &['έ']),
- ('Ή', &['ή']),
- ('Ί', &['ί']),
- ('Ό', &['ό']),
- ('Ύ', &['ύ']),
- ('Ώ', &['ώ']),
- ('ΐ', &['ΐ']),
- ('Α', &['α']),
- ('Β', &['β', 'ϐ']),
- ('Γ', &['γ']),
- ('Δ', &['δ']),
- ('Ε', &['ε', 'ϵ']),
- ('Ζ', &['ζ']),
- ('Η', &['η']),
- ('Θ', &['θ', 'ϑ', 'ϴ']),
- ('Ι', &['\u{345}', 'ι', 'ι']),
- ('Κ', &['κ', 'ϰ']),
- ('Λ', &['λ']),
- ('Μ', &['µ', 'μ']),
- ('Ν', &['ν']),
- ('Ξ', &['ξ']),
- ('Ο', &['ο']),
- ('Π', &['π', 'ϖ']),
- ('Ρ', &['ρ', 'ϱ']),
- ('Σ', &['ς', 'σ']),
- ('Τ', &['τ']),
- ('Υ', &['υ']),
- ('Φ', &['φ', 'ϕ']),
- ('Χ', &['χ']),
- ('Ψ', &['ψ']),
- ('Ω', &['ω', 'Ω']),
- ('Ϊ', &['ϊ']),
- ('Ϋ', &['ϋ']),
- ('ά', &['Ά']),
- ('έ', &['Έ']),
- ('ή', &['Ή']),
- ('ί', &['Ί']),
- ('ΰ', &['ΰ']),
- ('α', &['Α']),
- ('β', &['Β', 'ϐ']),
- ('γ', &['Γ']),
- ('δ', &['Δ']),
- ('ε', &['Ε', 'ϵ']),
- ('ζ', &['Ζ']),
- ('η', &['Η']),
- ('θ', &['Θ', 'ϑ', 'ϴ']),
- ('ι', &['\u{345}', 'Ι', 'ι']),
- ('κ', &['Κ', 'ϰ']),
- ('λ', &['Λ']),
- ('μ', &['µ', 'Μ']),
- ('ν', &['Ν']),
- ('ξ', &['Ξ']),
- ('ο', &['Ο']),
- ('π', &['Π', 'ϖ']),
- ('ρ', &['Ρ', 'ϱ']),
- ('ς', &['Σ', 'σ']),
- ('σ', &['Σ', 'ς']),
- ('τ', &['Τ']),
- ('υ', &['Υ']),
- ('φ', &['Φ', 'ϕ']),
- ('χ', &['Χ']),
- ('ψ', &['Ψ']),
- ('ω', &['Ω', 'Ω']),
- ('ϊ', &['Ϊ']),
- ('ϋ', &['Ϋ']),
- ('ό', &['Ό']),
- ('ύ', &['Ύ']),
- ('ώ', &['Ώ']),
- ('Ϗ', &['ϗ']),
- ('ϐ', &['Β', 'β']),
- ('ϑ', &['Θ', 'θ', 'ϴ']),
- ('ϕ', &['Φ', 'φ']),
- ('ϖ', &['Π', 'π']),
- ('ϗ', &['Ϗ']),
- ('Ϙ', &['ϙ']),
- ('ϙ', &['Ϙ']),
- ('Ϛ', &['ϛ']),
- ('ϛ', &['Ϛ']),
- ('Ϝ', &['ϝ']),
- ('ϝ', &['Ϝ']),
- ('Ϟ', &['ϟ']),
- ('ϟ', &['Ϟ']),
- ('Ϡ', &['ϡ']),
- ('ϡ', &['Ϡ']),
- ('Ϣ', &['ϣ']),
- ('ϣ', &['Ϣ']),
- ('Ϥ', &['ϥ']),
- ('ϥ', &['Ϥ']),
- ('Ϧ', &['ϧ']),
- ('ϧ', &['Ϧ']),
- ('Ϩ', &['ϩ']),
- ('ϩ', &['Ϩ']),
- ('Ϫ', &['ϫ']),
- ('ϫ', &['Ϫ']),
- ('Ϭ', &['ϭ']),
- ('ϭ', &['Ϭ']),
- ('Ϯ', &['ϯ']),
- ('ϯ', &['Ϯ']),
- ('ϰ', &['Κ', 'κ']),
- ('ϱ', &['Ρ', 'ρ']),
- ('ϲ', &['Ϲ']),
- ('ϳ', &['Ϳ']),
- ('ϴ', &['Θ', 'θ', 'ϑ']),
- ('ϵ', &['Ε', 'ε']),
- ('Ϸ', &['ϸ']),
- ('ϸ', &['Ϸ']),
- ('Ϲ', &['ϲ']),
- ('Ϻ', &['ϻ']),
- ('ϻ', &['Ϻ']),
- ('Ͻ', &['ͻ']),
- ('Ͼ', &['ͼ']),
- ('Ͽ', &['ͽ']),
- ('Ѐ', &['ѐ']),
- ('Ё', &['ё']),
- ('Ђ', &['ђ']),
- ('Ѓ', &['ѓ']),
- ('Є', &['є']),
- ('Ѕ', &['ѕ']),
- ('І', &['і']),
- ('Ї', &['ї']),
- ('Ј', &['ј']),
- ('Љ', &['љ']),
- ('Њ', &['њ']),
- ('Ћ', &['ћ']),
- ('Ќ', &['ќ']),
- ('Ѝ', &['ѝ']),
- ('Ў', &['ў']),
- ('Џ', &['џ']),
- ('А', &['а']),
- ('Б', &['б']),
- ('В', &['в', 'ᲀ']),
- ('Г', &['г']),
- ('Д', &['д', 'ᲁ']),
- ('Е', &['е']),
- ('Ж', &['ж']),
- ('З', &['з']),
- ('И', &['и']),
- ('Й', &['й']),
- ('К', &['к']),
- ('Л', &['л']),
- ('М', &['м']),
- ('Н', &['н']),
- ('О', &['о', 'ᲂ']),
- ('П', &['п']),
- ('Р', &['р']),
- ('С', &['с', 'ᲃ']),
- ('Т', &['т', 'ᲄ', 'ᲅ']),
- ('У', &['у']),
- ('Ф', &['ф']),
- ('Х', &['х']),
- ('Ц', &['ц']),
- ('Ч', &['ч']),
- ('Ш', &['ш']),
- ('Щ', &['щ']),
- ('Ъ', &['ъ', 'ᲆ']),
- ('Ы', &['ы']),
- ('Ь', &['ь']),
- ('Э', &['э']),
- ('Ю', &['ю']),
- ('Я', &['я']),
- ('а', &['А']),
- ('б', &['Б']),
- ('в', &['В', 'ᲀ']),
- ('г', &['Г']),
- ('д', &['Д', 'ᲁ']),
- ('е', &['Е']),
- ('ж', &['Ж']),
- ('з', &['З']),
- ('и', &['И']),
- ('й', &['Й']),
- ('к', &['К']),
- ('л', &['Л']),
- ('м', &['М']),
- ('н', &['Н']),
- ('о', &['О', 'ᲂ']),
- ('п', &['П']),
- ('р', &['Р']),
- ('с', &['С', 'ᲃ']),
- ('т', &['Т', 'ᲄ', 'ᲅ']),
- ('у', &['У']),
- ('ф', &['Ф']),
- ('х', &['Х']),
- ('ц', &['Ц']),
- ('ч', &['Ч']),
- ('ш', &['Ш']),
- ('щ', &['Щ']),
- ('ъ', &['Ъ', 'ᲆ']),
- ('ы', &['Ы']),
- ('ь', &['Ь']),
- ('э', &['Э']),
- ('ю', &['Ю']),
- ('я', &['Я']),
- ('ѐ', &['Ѐ']),
- ('ё', &['Ё']),
- ('ђ', &['Ђ']),
- ('ѓ', &['Ѓ']),
- ('є', &['Є']),
- ('ѕ', &['Ѕ']),
- ('і', &['І']),
- ('ї', &['Ї']),
- ('ј', &['Ј']),
- ('љ', &['Љ']),
- ('њ', &['Њ']),
- ('ћ', &['Ћ']),
- ('ќ', &['Ќ']),
- ('ѝ', &['Ѝ']),
- ('ў', &['Ў']),
- ('џ', &['Џ']),
- ('Ѡ', &['ѡ']),
- ('ѡ', &['Ѡ']),
- ('Ѣ', &['ѣ', 'ᲇ']),
- ('ѣ', &['Ѣ', 'ᲇ']),
- ('Ѥ', &['ѥ']),
- ('ѥ', &['Ѥ']),
- ('Ѧ', &['ѧ']),
- ('ѧ', &['Ѧ']),
- ('Ѩ', &['ѩ']),
- ('ѩ', &['Ѩ']),
- ('Ѫ', &['ѫ']),
- ('ѫ', &['Ѫ']),
- ('Ѭ', &['ѭ']),
- ('ѭ', &['Ѭ']),
- ('Ѯ', &['ѯ']),
- ('ѯ', &['Ѯ']),
- ('Ѱ', &['ѱ']),
- ('ѱ', &['Ѱ']),
- ('Ѳ', &['ѳ']),
- ('ѳ', &['Ѳ']),
- ('Ѵ', &['ѵ']),
- ('ѵ', &['Ѵ']),
- ('Ѷ', &['ѷ']),
- ('ѷ', &['Ѷ']),
- ('Ѹ', &['ѹ']),
- ('ѹ', &['Ѹ']),
- ('Ѻ', &['ѻ']),
- ('ѻ', &['Ѻ']),
- ('Ѽ', &['ѽ']),
- ('ѽ', &['Ѽ']),
- ('Ѿ', &['ѿ']),
- ('ѿ', &['Ѿ']),
- ('Ҁ', &['ҁ']),
- ('ҁ', &['Ҁ']),
- ('Ҋ', &['ҋ']),
- ('ҋ', &['Ҋ']),
- ('Ҍ', &['ҍ']),
- ('ҍ', &['Ҍ']),
- ('Ҏ', &['ҏ']),
- ('ҏ', &['Ҏ']),
- ('Ґ', &['ґ']),
- ('ґ', &['Ґ']),
- ('Ғ', &['ғ']),
- ('ғ', &['Ғ']),
- ('Ҕ', &['ҕ']),
- ('ҕ', &['Ҕ']),
- ('Җ', &['җ']),
- ('җ', &['Җ']),
- ('Ҙ', &['ҙ']),
- ('ҙ', &['Ҙ']),
- ('Қ', &['қ']),
- ('қ', &['Қ']),
- ('Ҝ', &['ҝ']),
- ('ҝ', &['Ҝ']),
- ('Ҟ', &['ҟ']),
- ('ҟ', &['Ҟ']),
- ('Ҡ', &['ҡ']),
- ('ҡ', &['Ҡ']),
- ('Ң', &['ң']),
- ('ң', &['Ң']),
- ('Ҥ', &['ҥ']),
- ('ҥ', &['Ҥ']),
- ('Ҧ', &['ҧ']),
- ('ҧ', &['Ҧ']),
- ('Ҩ', &['ҩ']),
- ('ҩ', &['Ҩ']),
- ('Ҫ', &['ҫ']),
- ('ҫ', &['Ҫ']),
- ('Ҭ', &['ҭ']),
- ('ҭ', &['Ҭ']),
- ('Ү', &['ү']),
- ('ү', &['Ү']),
- ('Ұ', &['ұ']),
- ('ұ', &['Ұ']),
- ('Ҳ', &['ҳ']),
- ('ҳ', &['Ҳ']),
- ('Ҵ', &['ҵ']),
- ('ҵ', &['Ҵ']),
- ('Ҷ', &['ҷ']),
- ('ҷ', &['Ҷ']),
- ('Ҹ', &['ҹ']),
- ('ҹ', &['Ҹ']),
- ('Һ', &['һ']),
- ('һ', &['Һ']),
- ('Ҽ', &['ҽ']),
- ('ҽ', &['Ҽ']),
- ('Ҿ', &['ҿ']),
- ('ҿ', &['Ҿ']),
- ('Ӏ', &['ӏ']),
- ('Ӂ', &['ӂ']),
- ('ӂ', &['Ӂ']),
- ('Ӄ', &['ӄ']),
- ('ӄ', &['Ӄ']),
- ('Ӆ', &['ӆ']),
- ('ӆ', &['Ӆ']),
- ('Ӈ', &['ӈ']),
- ('ӈ', &['Ӈ']),
- ('Ӊ', &['ӊ']),
- ('ӊ', &['Ӊ']),
- ('Ӌ', &['ӌ']),
- ('ӌ', &['Ӌ']),
- ('Ӎ', &['ӎ']),
- ('ӎ', &['Ӎ']),
- ('ӏ', &['Ӏ']),
- ('Ӑ', &['ӑ']),
- ('ӑ', &['Ӑ']),
- ('Ӓ', &['ӓ']),
- ('ӓ', &['Ӓ']),
- ('Ӕ', &['ӕ']),
- ('ӕ', &['Ӕ']),
- ('Ӗ', &['ӗ']),
- ('ӗ', &['Ӗ']),
- ('Ә', &['ә']),
- ('ә', &['Ә']),
- ('Ӛ', &['ӛ']),
- ('ӛ', &['Ӛ']),
- ('Ӝ', &['ӝ']),
- ('ӝ', &['Ӝ']),
- ('Ӟ', &['ӟ']),
- ('ӟ', &['Ӟ']),
- ('Ӡ', &['ӡ']),
- ('ӡ', &['Ӡ']),
- ('Ӣ', &['ӣ']),
- ('ӣ', &['Ӣ']),
- ('Ӥ', &['ӥ']),
- ('ӥ', &['Ӥ']),
- ('Ӧ', &['ӧ']),
- ('ӧ', &['Ӧ']),
- ('Ө', &['ө']),
- ('ө', &['Ө']),
- ('Ӫ', &['ӫ']),
- ('ӫ', &['Ӫ']),
- ('Ӭ', &['ӭ']),
- ('ӭ', &['Ӭ']),
- ('Ӯ', &['ӯ']),
- ('ӯ', &['Ӯ']),
- ('Ӱ', &['ӱ']),
- ('ӱ', &['Ӱ']),
- ('Ӳ', &['ӳ']),
- ('ӳ', &['Ӳ']),
- ('Ӵ', &['ӵ']),
- ('ӵ', &['Ӵ']),
- ('Ӷ', &['ӷ']),
- ('ӷ', &['Ӷ']),
- ('Ӹ', &['ӹ']),
- ('ӹ', &['Ӹ']),
- ('Ӻ', &['ӻ']),
- ('ӻ', &['Ӻ']),
- ('Ӽ', &['ӽ']),
- ('ӽ', &['Ӽ']),
- ('Ӿ', &['ӿ']),
- ('ӿ', &['Ӿ']),
- ('Ԁ', &['ԁ']),
- ('ԁ', &['Ԁ']),
- ('Ԃ', &['ԃ']),
- ('ԃ', &['Ԃ']),
- ('Ԅ', &['ԅ']),
- ('ԅ', &['Ԅ']),
- ('Ԇ', &['ԇ']),
- ('ԇ', &['Ԇ']),
- ('Ԉ', &['ԉ']),
- ('ԉ', &['Ԉ']),
- ('Ԋ', &['ԋ']),
- ('ԋ', &['Ԋ']),
- ('Ԍ', &['ԍ']),
- ('ԍ', &['Ԍ']),
- ('Ԏ', &['ԏ']),
- ('ԏ', &['Ԏ']),
- ('Ԑ', &['ԑ']),
- ('ԑ', &['Ԑ']),
- ('Ԓ', &['ԓ']),
- ('ԓ', &['Ԓ']),
- ('Ԕ', &['ԕ']),
- ('ԕ', &['Ԕ']),
- ('Ԗ', &['ԗ']),
- ('ԗ', &['Ԗ']),
- ('Ԙ', &['ԙ']),
- ('ԙ', &['Ԙ']),
- ('Ԛ', &['ԛ']),
- ('ԛ', &['Ԛ']),
- ('Ԝ', &['ԝ']),
- ('ԝ', &['Ԝ']),
- ('Ԟ', &['ԟ']),
- ('ԟ', &['Ԟ']),
- ('Ԡ', &['ԡ']),
- ('ԡ', &['Ԡ']),
- ('Ԣ', &['ԣ']),
- ('ԣ', &['Ԣ']),
- ('Ԥ', &['ԥ']),
- ('ԥ', &['Ԥ']),
- ('Ԧ', &['ԧ']),
- ('ԧ', &['Ԧ']),
- ('Ԩ', &['ԩ']),
- ('ԩ', &['Ԩ']),
- ('Ԫ', &['ԫ']),
- ('ԫ', &['Ԫ']),
- ('Ԭ', &['ԭ']),
- ('ԭ', &['Ԭ']),
- ('Ԯ', &['ԯ']),
- ('ԯ', &['Ԯ']),
- ('Ա', &['ա']),
- ('Բ', &['բ']),
- ('Գ', &['գ']),
- ('Դ', &['դ']),
- ('Ե', &['ե']),
- ('Զ', &['զ']),
- ('Է', &['է']),
- ('Ը', &['ը']),
- ('Թ', &['թ']),
- ('Ժ', &['ժ']),
- ('Ի', &['ի']),
- ('Լ', &['լ']),
- ('Խ', &['խ']),
- ('Ծ', &['ծ']),
- ('Կ', &['կ']),
- ('Հ', &['հ']),
- ('Ձ', &['ձ']),
- ('Ղ', &['ղ']),
- ('Ճ', &['ճ']),
- ('Մ', &['մ']),
- ('Յ', &['յ']),
- ('Ն', &['ն']),
- ('Շ', &['շ']),
- ('Ո', &['ո']),
- ('Չ', &['չ']),
- ('Պ', &['պ']),
- ('Ջ', &['ջ']),
- ('Ռ', &['ռ']),
- ('Ս', &['ս']),
- ('Վ', &['վ']),
- ('Տ', &['տ']),
- ('Ր', &['ր']),
- ('Ց', &['ց']),
- ('Ւ', &['ւ']),
- ('Փ', &['փ']),
- ('Ք', &['ք']),
- ('Օ', &['օ']),
- ('Ֆ', &['ֆ']),
- ('ա', &['Ա']),
- ('բ', &['Բ']),
- ('գ', &['Գ']),
- ('դ', &['Դ']),
- ('ե', &['Ե']),
- ('զ', &['Զ']),
- ('է', &['Է']),
- ('ը', &['Ը']),
- ('թ', &['Թ']),
- ('ժ', &['Ժ']),
- ('ի', &['Ի']),
- ('լ', &['Լ']),
- ('խ', &['Խ']),
- ('ծ', &['Ծ']),
- ('կ', &['Կ']),
- ('հ', &['Հ']),
- ('ձ', &['Ձ']),
- ('ղ', &['Ղ']),
- ('ճ', &['Ճ']),
- ('մ', &['Մ']),
- ('յ', &['Յ']),
- ('ն', &['Ն']),
- ('շ', &['Շ']),
- ('ո', &['Ո']),
- ('չ', &['Չ']),
- ('պ', &['Պ']),
- ('ջ', &['Ջ']),
- ('ռ', &['Ռ']),
- ('ս', &['Ս']),
- ('վ', &['Վ']),
- ('տ', &['Տ']),
- ('ր', &['Ր']),
- ('ց', &['Ց']),
- ('ւ', &['Ւ']),
- ('փ', &['Փ']),
- ('ք', &['Ք']),
- ('օ', &['Օ']),
- ('ֆ', &['Ֆ']),
- ('Ⴀ', &['ⴀ']),
- ('Ⴁ', &['ⴁ']),
- ('Ⴂ', &['ⴂ']),
- ('Ⴃ', &['ⴃ']),
- ('Ⴄ', &['ⴄ']),
- ('Ⴅ', &['ⴅ']),
- ('Ⴆ', &['ⴆ']),
- ('Ⴇ', &['ⴇ']),
- ('Ⴈ', &['ⴈ']),
- ('Ⴉ', &['ⴉ']),
- ('Ⴊ', &['ⴊ']),
- ('Ⴋ', &['ⴋ']),
- ('Ⴌ', &['ⴌ']),
- ('Ⴍ', &['ⴍ']),
- ('Ⴎ', &['ⴎ']),
- ('Ⴏ', &['ⴏ']),
- ('Ⴐ', &['ⴐ']),
- ('Ⴑ', &['ⴑ']),
- ('Ⴒ', &['ⴒ']),
- ('Ⴓ', &['ⴓ']),
- ('Ⴔ', &['ⴔ']),
- ('Ⴕ', &['ⴕ']),
- ('Ⴖ', &['ⴖ']),
- ('Ⴗ', &['ⴗ']),
- ('Ⴘ', &['ⴘ']),
- ('Ⴙ', &['ⴙ']),
- ('Ⴚ', &['ⴚ']),
- ('Ⴛ', &['ⴛ']),
- ('Ⴜ', &['ⴜ']),
- ('Ⴝ', &['ⴝ']),
- ('Ⴞ', &['ⴞ']),
- ('Ⴟ', &['ⴟ']),
- ('Ⴠ', &['ⴠ']),
- ('Ⴡ', &['ⴡ']),
- ('Ⴢ', &['ⴢ']),
- ('Ⴣ', &['ⴣ']),
- ('Ⴤ', &['ⴤ']),
- ('Ⴥ', &['ⴥ']),
- ('Ⴧ', &['ⴧ']),
- ('Ⴭ', &['ⴭ']),
- ('ა', &['Ა']),
- ('ბ', &['Ბ']),
- ('გ', &['Გ']),
- ('დ', &['Დ']),
- ('ე', &['Ე']),
- ('ვ', &['Ვ']),
- ('ზ', &['Ზ']),
- ('თ', &['Თ']),
- ('ი', &['Ი']),
- ('კ', &['Კ']),
- ('ლ', &['Ლ']),
- ('მ', &['Მ']),
- ('ნ', &['Ნ']),
- ('ო', &['Ო']),
- ('პ', &['Პ']),
- ('ჟ', &['Ჟ']),
- ('რ', &['Რ']),
- ('ს', &['Ს']),
- ('ტ', &['Ტ']),
- ('უ', &['Უ']),
- ('ფ', &['Ფ']),
- ('ქ', &['Ქ']),
- ('ღ', &['Ღ']),
- ('ყ', &['Ყ']),
- ('შ', &['Შ']),
- ('ჩ', &['Ჩ']),
- ('ც', &['Ც']),
- ('ძ', &['Ძ']),
- ('წ', &['Წ']),
- ('ჭ', &['Ჭ']),
- ('ხ', &['Ხ']),
- ('ჯ', &['Ჯ']),
- ('ჰ', &['Ჰ']),
- ('ჱ', &['Ჱ']),
- ('ჲ', &['Ჲ']),
- ('ჳ', &['Ჳ']),
- ('ჴ', &['Ჴ']),
- ('ჵ', &['Ჵ']),
- ('ჶ', &['Ჶ']),
- ('ჷ', &['Ჷ']),
- ('ჸ', &['Ჸ']),
- ('ჹ', &['Ჹ']),
- ('ჺ', &['Ჺ']),
- ('ჽ', &['Ჽ']),
- ('ჾ', &['Ჾ']),
- ('ჿ', &['Ჿ']),
- ('Ꭰ', &['ꭰ']),
- ('Ꭱ', &['ꭱ']),
- ('Ꭲ', &['ꭲ']),
- ('Ꭳ', &['ꭳ']),
- ('Ꭴ', &['ꭴ']),
- ('Ꭵ', &['ꭵ']),
- ('Ꭶ', &['ꭶ']),
- ('Ꭷ', &['ꭷ']),
- ('Ꭸ', &['ꭸ']),
- ('Ꭹ', &['ꭹ']),
- ('Ꭺ', &['ꭺ']),
- ('Ꭻ', &['ꭻ']),
- ('Ꭼ', &['ꭼ']),
- ('Ꭽ', &['ꭽ']),
- ('Ꭾ', &['ꭾ']),
- ('Ꭿ', &['ꭿ']),
- ('Ꮀ', &['ꮀ']),
- ('Ꮁ', &['ꮁ']),
- ('Ꮂ', &['ꮂ']),
- ('Ꮃ', &['ꮃ']),
- ('Ꮄ', &['ꮄ']),
- ('Ꮅ', &['ꮅ']),
- ('Ꮆ', &['ꮆ']),
- ('Ꮇ', &['ꮇ']),
- ('Ꮈ', &['ꮈ']),
- ('Ꮉ', &['ꮉ']),
- ('Ꮊ', &['ꮊ']),
- ('Ꮋ', &['ꮋ']),
- ('Ꮌ', &['ꮌ']),
- ('Ꮍ', &['ꮍ']),
- ('Ꮎ', &['ꮎ']),
- ('Ꮏ', &['ꮏ']),
- ('Ꮐ', &['ꮐ']),
- ('Ꮑ', &['ꮑ']),
- ('Ꮒ', &['ꮒ']),
- ('Ꮓ', &['ꮓ']),
- ('Ꮔ', &['ꮔ']),
- ('Ꮕ', &['ꮕ']),
- ('Ꮖ', &['ꮖ']),
- ('Ꮗ', &['ꮗ']),
- ('Ꮘ', &['ꮘ']),
- ('Ꮙ', &['ꮙ']),
- ('Ꮚ', &['ꮚ']),
- ('Ꮛ', &['ꮛ']),
- ('Ꮜ', &['ꮜ']),
- ('Ꮝ', &['ꮝ']),
- ('Ꮞ', &['ꮞ']),
- ('Ꮟ', &['ꮟ']),
- ('Ꮠ', &['ꮠ']),
- ('Ꮡ', &['ꮡ']),
- ('Ꮢ', &['ꮢ']),
- ('Ꮣ', &['ꮣ']),
- ('Ꮤ', &['ꮤ']),
- ('Ꮥ', &['ꮥ']),
- ('Ꮦ', &['ꮦ']),
- ('Ꮧ', &['ꮧ']),
- ('Ꮨ', &['ꮨ']),
- ('Ꮩ', &['ꮩ']),
- ('Ꮪ', &['ꮪ']),
- ('Ꮫ', &['ꮫ']),
- ('Ꮬ', &['ꮬ']),
- ('Ꮭ', &['ꮭ']),
- ('Ꮮ', &['ꮮ']),
- ('Ꮯ', &['ꮯ']),
- ('Ꮰ', &['ꮰ']),
- ('Ꮱ', &['ꮱ']),
- ('Ꮲ', &['ꮲ']),
- ('Ꮳ', &['ꮳ']),
- ('Ꮴ', &['ꮴ']),
- ('Ꮵ', &['ꮵ']),
- ('Ꮶ', &['ꮶ']),
- ('Ꮷ', &['ꮷ']),
- ('Ꮸ', &['ꮸ']),
- ('Ꮹ', &['ꮹ']),
- ('Ꮺ', &['ꮺ']),
- ('Ꮻ', &['ꮻ']),
- ('Ꮼ', &['ꮼ']),
- ('Ꮽ', &['ꮽ']),
- ('Ꮾ', &['ꮾ']),
- ('Ꮿ', &['ꮿ']),
- ('Ᏸ', &['ᏸ']),
- ('Ᏹ', &['ᏹ']),
- ('Ᏺ', &['ᏺ']),
- ('Ᏻ', &['ᏻ']),
- ('Ᏼ', &['ᏼ']),
- ('Ᏽ', &['ᏽ']),
- ('ᏸ', &['Ᏸ']),
- ('ᏹ', &['Ᏹ']),
- ('ᏺ', &['Ᏺ']),
- ('ᏻ', &['Ᏻ']),
- ('ᏼ', &['Ᏼ']),
- ('ᏽ', &['Ᏽ']),
- ('ᲀ', &['В', 'в']),
- ('ᲁ', &['Д', 'д']),
- ('ᲂ', &['О', 'о']),
- ('ᲃ', &['С', 'с']),
- ('ᲄ', &['Т', 'т', 'ᲅ']),
- ('ᲅ', &['Т', 'т', 'ᲄ']),
- ('ᲆ', &['Ъ', 'ъ']),
- ('ᲇ', &['Ѣ', 'ѣ']),
- ('ᲈ', &['Ꙋ', 'ꙋ']),
- ('Ᲊ', &['ᲊ']),
- ('ᲊ', &['Ᲊ']),
- ('Ა', &['ა']),
- ('Ბ', &['ბ']),
- ('Გ', &['გ']),
- ('Დ', &['დ']),
- ('Ე', &['ე']),
- ('Ვ', &['ვ']),
- ('Ზ', &['ზ']),
- ('Თ', &['თ']),
- ('Ი', &['ი']),
- ('Კ', &['კ']),
- ('Ლ', &['ლ']),
- ('Მ', &['მ']),
- ('Ნ', &['ნ']),
- ('Ო', &['ო']),
- ('Პ', &['პ']),
- ('Ჟ', &['ჟ']),
- ('Რ', &['რ']),
- ('Ს', &['ს']),
- ('Ტ', &['ტ']),
- ('Უ', &['უ']),
- ('Ფ', &['ფ']),
- ('Ქ', &['ქ']),
- ('Ღ', &['ღ']),
- ('Ყ', &['ყ']),
- ('Შ', &['შ']),
- ('Ჩ', &['ჩ']),
- ('Ც', &['ც']),
- ('Ძ', &['ძ']),
- ('Წ', &['წ']),
- ('Ჭ', &['ჭ']),
- ('Ხ', &['ხ']),
- ('Ჯ', &['ჯ']),
- ('Ჰ', &['ჰ']),
- ('Ჱ', &['ჱ']),
- ('Ჲ', &['ჲ']),
- ('Ჳ', &['ჳ']),
- ('Ჴ', &['ჴ']),
- ('Ჵ', &['ჵ']),
- ('Ჶ', &['ჶ']),
- ('Ჷ', &['ჷ']),
- ('Ჸ', &['ჸ']),
- ('Ჹ', &['ჹ']),
- ('Ჺ', &['ჺ']),
- ('Ჽ', &['ჽ']),
- ('Ჾ', &['ჾ']),
- ('Ჿ', &['ჿ']),
- ('ᵹ', &['Ᵹ']),
- ('ᵽ', &['Ᵽ']),
- ('ᶎ', &['Ᶎ']),
- ('Ḁ', &['ḁ']),
- ('ḁ', &['Ḁ']),
- ('Ḃ', &['ḃ']),
- ('ḃ', &['Ḃ']),
- ('Ḅ', &['ḅ']),
- ('ḅ', &['Ḅ']),
- ('Ḇ', &['ḇ']),
- ('ḇ', &['Ḇ']),
- ('Ḉ', &['ḉ']),
- ('ḉ', &['Ḉ']),
- ('Ḋ', &['ḋ']),
- ('ḋ', &['Ḋ']),
- ('Ḍ', &['ḍ']),
- ('ḍ', &['Ḍ']),
- ('Ḏ', &['ḏ']),
- ('ḏ', &['Ḏ']),
- ('Ḑ', &['ḑ']),
- ('ḑ', &['Ḑ']),
- ('Ḓ', &['ḓ']),
- ('ḓ', &['Ḓ']),
- ('Ḕ', &['ḕ']),
- ('ḕ', &['Ḕ']),
- ('Ḗ', &['ḗ']),
- ('ḗ', &['Ḗ']),
- ('Ḙ', &['ḙ']),
- ('ḙ', &['Ḙ']),
- ('Ḛ', &['ḛ']),
- ('ḛ', &['Ḛ']),
- ('Ḝ', &['ḝ']),
- ('ḝ', &['Ḝ']),
- ('Ḟ', &['ḟ']),
- ('ḟ', &['Ḟ']),
- ('Ḡ', &['ḡ']),
- ('ḡ', &['Ḡ']),
- ('Ḣ', &['ḣ']),
- ('ḣ', &['Ḣ']),
- ('Ḥ', &['ḥ']),
- ('ḥ', &['Ḥ']),
- ('Ḧ', &['ḧ']),
- ('ḧ', &['Ḧ']),
- ('Ḩ', &['ḩ']),
- ('ḩ', &['Ḩ']),
- ('Ḫ', &['ḫ']),
- ('ḫ', &['Ḫ']),
- ('Ḭ', &['ḭ']),
- ('ḭ', &['Ḭ']),
- ('Ḯ', &['ḯ']),
- ('ḯ', &['Ḯ']),
- ('Ḱ', &['ḱ']),
- ('ḱ', &['Ḱ']),
- ('Ḳ', &['ḳ']),
- ('ḳ', &['Ḳ']),
- ('Ḵ', &['ḵ']),
- ('ḵ', &['Ḵ']),
- ('Ḷ', &['ḷ']),
- ('ḷ', &['Ḷ']),
- ('Ḹ', &['ḹ']),
- ('ḹ', &['Ḹ']),
- ('Ḻ', &['ḻ']),
- ('ḻ', &['Ḻ']),
- ('Ḽ', &['ḽ']),
- ('ḽ', &['Ḽ']),
- ('Ḿ', &['ḿ']),
- ('ḿ', &['Ḿ']),
- ('Ṁ', &['ṁ']),
- ('ṁ', &['Ṁ']),
- ('Ṃ', &['ṃ']),
- ('ṃ', &['Ṃ']),
- ('Ṅ', &['ṅ']),
- ('ṅ', &['Ṅ']),
- ('Ṇ', &['ṇ']),
- ('ṇ', &['Ṇ']),
- ('Ṉ', &['ṉ']),
- ('ṉ', &['Ṉ']),
- ('Ṋ', &['ṋ']),
- ('ṋ', &['Ṋ']),
- ('Ṍ', &['ṍ']),
- ('ṍ', &['Ṍ']),
- ('Ṏ', &['ṏ']),
- ('ṏ', &['Ṏ']),
- ('Ṑ', &['ṑ']),
- ('ṑ', &['Ṑ']),
- ('Ṓ', &['ṓ']),
- ('ṓ', &['Ṓ']),
- ('Ṕ', &['ṕ']),
- ('ṕ', &['Ṕ']),
- ('Ṗ', &['ṗ']),
- ('ṗ', &['Ṗ']),
- ('Ṙ', &['ṙ']),
- ('ṙ', &['Ṙ']),
- ('Ṛ', &['ṛ']),
- ('ṛ', &['Ṛ']),
- ('Ṝ', &['ṝ']),
- ('ṝ', &['Ṝ']),
- ('Ṟ', &['ṟ']),
- ('ṟ', &['Ṟ']),
- ('Ṡ', &['ṡ', 'ẛ']),
- ('ṡ', &['Ṡ', 'ẛ']),
- ('Ṣ', &['ṣ']),
- ('ṣ', &['Ṣ']),
- ('Ṥ', &['ṥ']),
- ('ṥ', &['Ṥ']),
- ('Ṧ', &['ṧ']),
- ('ṧ', &['Ṧ']),
- ('Ṩ', &['ṩ']),
- ('ṩ', &['Ṩ']),
- ('Ṫ', &['ṫ']),
- ('ṫ', &['Ṫ']),
- ('Ṭ', &['ṭ']),
- ('ṭ', &['Ṭ']),
- ('Ṯ', &['ṯ']),
- ('ṯ', &['Ṯ']),
- ('Ṱ', &['ṱ']),
- ('ṱ', &['Ṱ']),
- ('Ṳ', &['ṳ']),
- ('ṳ', &['Ṳ']),
- ('Ṵ', &['ṵ']),
- ('ṵ', &['Ṵ']),
- ('Ṷ', &['ṷ']),
- ('ṷ', &['Ṷ']),
- ('Ṹ', &['ṹ']),
- ('ṹ', &['Ṹ']),
- ('Ṻ', &['ṻ']),
- ('ṻ', &['Ṻ']),
- ('Ṽ', &['ṽ']),
- ('ṽ', &['Ṽ']),
- ('Ṿ', &['ṿ']),
- ('ṿ', &['Ṿ']),
- ('Ẁ', &['ẁ']),
- ('ẁ', &['Ẁ']),
- ('Ẃ', &['ẃ']),
- ('ẃ', &['Ẃ']),
- ('Ẅ', &['ẅ']),
- ('ẅ', &['Ẅ']),
- ('Ẇ', &['ẇ']),
- ('ẇ', &['Ẇ']),
- ('Ẉ', &['ẉ']),
- ('ẉ', &['Ẉ']),
- ('Ẋ', &['ẋ']),
- ('ẋ', &['Ẋ']),
- ('Ẍ', &['ẍ']),
- ('ẍ', &['Ẍ']),
- ('Ẏ', &['ẏ']),
- ('ẏ', &['Ẏ']),
- ('Ẑ', &['ẑ']),
- ('ẑ', &['Ẑ']),
- ('Ẓ', &['ẓ']),
- ('ẓ', &['Ẓ']),
- ('Ẕ', &['ẕ']),
- ('ẕ', &['Ẕ']),
- ('ẛ', &['Ṡ', 'ṡ']),
- ('ẞ', &['ß']),
- ('Ạ', &['ạ']),
- ('ạ', &['Ạ']),
- ('Ả', &['ả']),
- ('ả', &['Ả']),
- ('Ấ', &['ấ']),
- ('ấ', &['Ấ']),
- ('Ầ', &['ầ']),
- ('ầ', &['Ầ']),
- ('Ẩ', &['ẩ']),
- ('ẩ', &['Ẩ']),
- ('Ẫ', &['ẫ']),
- ('ẫ', &['Ẫ']),
- ('Ậ', &['ậ']),
- ('ậ', &['Ậ']),
- ('Ắ', &['ắ']),
- ('ắ', &['Ắ']),
- ('Ằ', &['ằ']),
- ('ằ', &['Ằ']),
- ('Ẳ', &['ẳ']),
- ('ẳ', &['Ẳ']),
- ('Ẵ', &['ẵ']),
- ('ẵ', &['Ẵ']),
- ('Ặ', &['ặ']),
- ('ặ', &['Ặ']),
- ('Ẹ', &['ẹ']),
- ('ẹ', &['Ẹ']),
- ('Ẻ', &['ẻ']),
- ('ẻ', &['Ẻ']),
- ('Ẽ', &['ẽ']),
- ('ẽ', &['Ẽ']),
- ('Ế', &['ế']),
- ('ế', &['Ế']),
- ('Ề', &['ề']),
- ('ề', &['Ề']),
- ('Ể', &['ể']),
- ('ể', &['Ể']),
- ('Ễ', &['ễ']),
- ('ễ', &['Ễ']),
- ('Ệ', &['ệ']),
- ('ệ', &['Ệ']),
- ('Ỉ', &['ỉ']),
- ('ỉ', &['Ỉ']),
- ('Ị', &['ị']),
- ('ị', &['Ị']),
- ('Ọ', &['ọ']),
- ('ọ', &['Ọ']),
- ('Ỏ', &['ỏ']),
- ('ỏ', &['Ỏ']),
- ('Ố', &['ố']),
- ('ố', &['Ố']),
- ('Ồ', &['ồ']),
- ('ồ', &['Ồ']),
- ('Ổ', &['ổ']),
- ('ổ', &['Ổ']),
- ('Ỗ', &['ỗ']),
- ('ỗ', &['Ỗ']),
- ('Ộ', &['ộ']),
- ('ộ', &['Ộ']),
- ('Ớ', &['ớ']),
- ('ớ', &['Ớ']),
- ('Ờ', &['ờ']),
- ('ờ', &['Ờ']),
- ('Ở', &['ở']),
- ('ở', &['Ở']),
- ('Ỡ', &['ỡ']),
- ('ỡ', &['Ỡ']),
- ('Ợ', &['ợ']),
- ('ợ', &['Ợ']),
- ('Ụ', &['ụ']),
- ('ụ', &['Ụ']),
- ('Ủ', &['ủ']),
- ('ủ', &['Ủ']),
- ('Ứ', &['ứ']),
- ('ứ', &['Ứ']),
- ('Ừ', &['ừ']),
- ('ừ', &['Ừ']),
- ('Ử', &['ử']),
- ('ử', &['Ử']),
- ('Ữ', &['ữ']),
- ('ữ', &['Ữ']),
- ('Ự', &['ự']),
- ('ự', &['Ự']),
- ('Ỳ', &['ỳ']),
- ('ỳ', &['Ỳ']),
- ('Ỵ', &['ỵ']),
- ('ỵ', &['Ỵ']),
- ('Ỷ', &['ỷ']),
- ('ỷ', &['Ỷ']),
- ('Ỹ', &['ỹ']),
- ('ỹ', &['Ỹ']),
- ('Ỻ', &['ỻ']),
- ('ỻ', &['Ỻ']),
- ('Ỽ', &['ỽ']),
- ('ỽ', &['Ỽ']),
- ('Ỿ', &['ỿ']),
- ('ỿ', &['Ỿ']),
- ('ἀ', &['Ἀ']),
- ('ἁ', &['Ἁ']),
- ('ἂ', &['Ἂ']),
- ('ἃ', &['Ἃ']),
- ('ἄ', &['Ἄ']),
- ('ἅ', &['Ἅ']),
- ('ἆ', &['Ἆ']),
- ('ἇ', &['Ἇ']),
- ('Ἀ', &['ἀ']),
- ('Ἁ', &['ἁ']),
- ('Ἂ', &['ἂ']),
- ('Ἃ', &['ἃ']),
- ('Ἄ', &['ἄ']),
- ('Ἅ', &['ἅ']),
- ('Ἆ', &['ἆ']),
- ('Ἇ', &['ἇ']),
- ('ἐ', &['Ἐ']),
- ('ἑ', &['Ἑ']),
- ('ἒ', &['Ἒ']),
- ('ἓ', &['Ἓ']),
- ('ἔ', &['Ἔ']),
- ('ἕ', &['Ἕ']),
- ('Ἐ', &['ἐ']),
- ('Ἑ', &['ἑ']),
- ('Ἒ', &['ἒ']),
- ('Ἓ', &['ἓ']),
- ('Ἔ', &['ἔ']),
- ('Ἕ', &['ἕ']),
- ('ἠ', &['Ἠ']),
- ('ἡ', &['Ἡ']),
- ('ἢ', &['Ἢ']),
- ('ἣ', &['Ἣ']),
- ('ἤ', &['Ἤ']),
- ('ἥ', &['Ἥ']),
- ('ἦ', &['Ἦ']),
- ('ἧ', &['Ἧ']),
- ('Ἠ', &['ἠ']),
- ('Ἡ', &['ἡ']),
- ('Ἢ', &['ἢ']),
- ('Ἣ', &['ἣ']),
- ('Ἤ', &['ἤ']),
- ('Ἥ', &['ἥ']),
- ('Ἦ', &['ἦ']),
- ('Ἧ', &['ἧ']),
- ('ἰ', &['Ἰ']),
- ('ἱ', &['Ἱ']),
- ('ἲ', &['Ἲ']),
- ('ἳ', &['Ἳ']),
- ('ἴ', &['Ἴ']),
- ('ἵ', &['Ἵ']),
- ('ἶ', &['Ἶ']),
- ('ἷ', &['Ἷ']),
- ('Ἰ', &['ἰ']),
- ('Ἱ', &['ἱ']),
- ('Ἲ', &['ἲ']),
- ('Ἳ', &['ἳ']),
- ('Ἴ', &['ἴ']),
- ('Ἵ', &['ἵ']),
- ('Ἶ', &['ἶ']),
- ('Ἷ', &['ἷ']),
- ('ὀ', &['Ὀ']),
- ('ὁ', &['Ὁ']),
- ('ὂ', &['Ὂ']),
- ('ὃ', &['Ὃ']),
- ('ὄ', &['Ὄ']),
- ('ὅ', &['Ὅ']),
- ('Ὀ', &['ὀ']),
- ('Ὁ', &['ὁ']),
- ('Ὂ', &['ὂ']),
- ('Ὃ', &['ὃ']),
- ('Ὄ', &['ὄ']),
- ('Ὅ', &['ὅ']),
- ('ὑ', &['Ὑ']),
- ('ὓ', &['Ὓ']),
- ('ὕ', &['Ὕ']),
- ('ὗ', &['Ὗ']),
- ('Ὑ', &['ὑ']),
- ('Ὓ', &['ὓ']),
- ('Ὕ', &['ὕ']),
- ('Ὗ', &['ὗ']),
- ('ὠ', &['Ὠ']),
- ('ὡ', &['Ὡ']),
- ('ὢ', &['Ὢ']),
- ('ὣ', &['Ὣ']),
- ('ὤ', &['Ὤ']),
- ('ὥ', &['Ὥ']),
- ('ὦ', &['Ὦ']),
- ('ὧ', &['Ὧ']),
- ('Ὠ', &['ὠ']),
- ('Ὡ', &['ὡ']),
- ('Ὢ', &['ὢ']),
- ('Ὣ', &['ὣ']),
- ('Ὤ', &['ὤ']),
- ('Ὥ', &['ὥ']),
- ('Ὦ', &['ὦ']),
- ('Ὧ', &['ὧ']),
- ('ὰ', &['Ὰ']),
- ('ά', &['Ά']),
- ('ὲ', &['Ὲ']),
- ('έ', &['Έ']),
- ('ὴ', &['Ὴ']),
- ('ή', &['Ή']),
- ('ὶ', &['Ὶ']),
- ('ί', &['Ί']),
- ('ὸ', &['Ὸ']),
- ('ό', &['Ό']),
- ('ὺ', &['Ὺ']),
- ('ύ', &['Ύ']),
- ('ὼ', &['Ὼ']),
- ('ώ', &['Ώ']),
- ('ᾀ', &['ᾈ']),
- ('ᾁ', &['ᾉ']),
- ('ᾂ', &['ᾊ']),
- ('ᾃ', &['ᾋ']),
- ('ᾄ', &['ᾌ']),
- ('ᾅ', &['ᾍ']),
- ('ᾆ', &['ᾎ']),
- ('ᾇ', &['ᾏ']),
- ('ᾈ', &['ᾀ']),
- ('ᾉ', &['ᾁ']),
- ('ᾊ', &['ᾂ']),
- ('ᾋ', &['ᾃ']),
- ('ᾌ', &['ᾄ']),
- ('ᾍ', &['ᾅ']),
- ('ᾎ', &['ᾆ']),
- ('ᾏ', &['ᾇ']),
- ('ᾐ', &['ᾘ']),
- ('ᾑ', &['ᾙ']),
- ('ᾒ', &['ᾚ']),
- ('ᾓ', &['ᾛ']),
- ('ᾔ', &['ᾜ']),
- ('ᾕ', &['ᾝ']),
- ('ᾖ', &['ᾞ']),
- ('ᾗ', &['ᾟ']),
- ('ᾘ', &['ᾐ']),
- ('ᾙ', &['ᾑ']),
- ('ᾚ', &['ᾒ']),
- ('ᾛ', &['ᾓ']),
- ('ᾜ', &['ᾔ']),
- ('ᾝ', &['ᾕ']),
- ('ᾞ', &['ᾖ']),
- ('ᾟ', &['ᾗ']),
- ('ᾠ', &['ᾨ']),
- ('ᾡ', &['ᾩ']),
- ('ᾢ', &['ᾪ']),
- ('ᾣ', &['ᾫ']),
- ('ᾤ', &['ᾬ']),
- ('ᾥ', &['ᾭ']),
- ('ᾦ', &['ᾮ']),
- ('ᾧ', &['ᾯ']),
- ('ᾨ', &['ᾠ']),
- ('ᾩ', &['ᾡ']),
- ('ᾪ', &['ᾢ']),
- ('ᾫ', &['ᾣ']),
- ('ᾬ', &['ᾤ']),
- ('ᾭ', &['ᾥ']),
- ('ᾮ', &['ᾦ']),
- ('ᾯ', &['ᾧ']),
- ('ᾰ', &['Ᾰ']),
- ('ᾱ', &['Ᾱ']),
- ('ᾳ', &['ᾼ']),
- ('Ᾰ', &['ᾰ']),
- ('Ᾱ', &['ᾱ']),
- ('Ὰ', &['ὰ']),
- ('Ά', &['ά']),
- ('ᾼ', &['ᾳ']),
- ('ι', &['\u{345}', 'Ι', 'ι']),
- ('ῃ', &['ῌ']),
- ('Ὲ', &['ὲ']),
- ('Έ', &['έ']),
- ('Ὴ', &['ὴ']),
- ('Ή', &['ή']),
- ('ῌ', &['ῃ']),
- ('ῐ', &['Ῐ']),
- ('ῑ', &['Ῑ']),
- ('ΐ', &['ΐ']),
- ('Ῐ', &['ῐ']),
- ('Ῑ', &['ῑ']),
- ('Ὶ', &['ὶ']),
- ('Ί', &['ί']),
- ('ῠ', &['Ῠ']),
- ('ῡ', &['Ῡ']),
- ('ΰ', &['ΰ']),
- ('ῥ', &['Ῥ']),
- ('Ῠ', &['ῠ']),
- ('Ῡ', &['ῡ']),
- ('Ὺ', &['ὺ']),
- ('Ύ', &['ύ']),
- ('Ῥ', &['ῥ']),
- ('ῳ', &['ῼ']),
- ('Ὸ', &['ὸ']),
- ('Ό', &['ό']),
- ('Ὼ', &['ὼ']),
- ('Ώ', &['ώ']),
- ('ῼ', &['ῳ']),
- ('Ω', &['Ω', 'ω']),
- ('K', &['K', 'k']),
- ('Å', &['Å', 'å']),
- ('Ⅎ', &['ⅎ']),
- ('ⅎ', &['Ⅎ']),
- ('Ⅰ', &['ⅰ']),
- ('Ⅱ', &['ⅱ']),
- ('Ⅲ', &['ⅲ']),
- ('Ⅳ', &['ⅳ']),
- ('Ⅴ', &['ⅴ']),
- ('Ⅵ', &['ⅵ']),
- ('Ⅶ', &['ⅶ']),
- ('Ⅷ', &['ⅷ']),
- ('Ⅸ', &['ⅸ']),
- ('Ⅹ', &['ⅹ']),
- ('Ⅺ', &['ⅺ']),
- ('Ⅻ', &['ⅻ']),
- ('Ⅼ', &['ⅼ']),
- ('Ⅽ', &['ⅽ']),
- ('Ⅾ', &['ⅾ']),
- ('Ⅿ', &['ⅿ']),
- ('ⅰ', &['Ⅰ']),
- ('ⅱ', &['Ⅱ']),
- ('ⅲ', &['Ⅲ']),
- ('ⅳ', &['Ⅳ']),
- ('ⅴ', &['Ⅴ']),
- ('ⅵ', &['Ⅵ']),
- ('ⅶ', &['Ⅶ']),
- ('ⅷ', &['Ⅷ']),
- ('ⅸ', &['Ⅸ']),
- ('ⅹ', &['Ⅹ']),
- ('ⅺ', &['Ⅺ']),
- ('ⅻ', &['Ⅻ']),
- ('ⅼ', &['Ⅼ']),
- ('ⅽ', &['Ⅽ']),
- ('ⅾ', &['Ⅾ']),
- ('ⅿ', &['Ⅿ']),
- ('Ↄ', &['ↄ']),
- ('ↄ', &['Ↄ']),
- ('Ⓐ', &['ⓐ']),
- ('Ⓑ', &['ⓑ']),
- ('Ⓒ', &['ⓒ']),
- ('Ⓓ', &['ⓓ']),
- ('Ⓔ', &['ⓔ']),
- ('Ⓕ', &['ⓕ']),
- ('Ⓖ', &['ⓖ']),
- ('Ⓗ', &['ⓗ']),
- ('Ⓘ', &['ⓘ']),
- ('Ⓙ', &['ⓙ']),
- ('Ⓚ', &['ⓚ']),
- ('Ⓛ', &['ⓛ']),
- ('Ⓜ', &['ⓜ']),
- ('Ⓝ', &['ⓝ']),
- ('Ⓞ', &['ⓞ']),
- ('Ⓟ', &['ⓟ']),
- ('Ⓠ', &['ⓠ']),
- ('Ⓡ', &['ⓡ']),
- ('Ⓢ', &['ⓢ']),
- ('Ⓣ', &['ⓣ']),
- ('Ⓤ', &['ⓤ']),
- ('Ⓥ', &['ⓥ']),
- ('Ⓦ', &['ⓦ']),
- ('Ⓧ', &['ⓧ']),
- ('Ⓨ', &['ⓨ']),
- ('Ⓩ', &['ⓩ']),
- ('ⓐ', &['Ⓐ']),
- ('ⓑ', &['Ⓑ']),
- ('ⓒ', &['Ⓒ']),
- ('ⓓ', &['Ⓓ']),
- ('ⓔ', &['Ⓔ']),
- ('ⓕ', &['Ⓕ']),
- ('ⓖ', &['Ⓖ']),
- ('ⓗ', &['Ⓗ']),
- ('ⓘ', &['Ⓘ']),
- ('ⓙ', &['Ⓙ']),
- ('ⓚ', &['Ⓚ']),
- ('ⓛ', &['Ⓛ']),
- ('ⓜ', &['Ⓜ']),
- ('ⓝ', &['Ⓝ']),
- ('ⓞ', &['Ⓞ']),
- ('ⓟ', &['Ⓟ']),
- ('ⓠ', &['Ⓠ']),
- ('ⓡ', &['Ⓡ']),
- ('ⓢ', &['Ⓢ']),
- ('ⓣ', &['Ⓣ']),
- ('ⓤ', &['Ⓤ']),
- ('ⓥ', &['Ⓥ']),
- ('ⓦ', &['Ⓦ']),
- ('ⓧ', &['Ⓧ']),
- ('ⓨ', &['Ⓨ']),
- ('ⓩ', &['Ⓩ']),
- ('Ⰰ', &['ⰰ']),
- ('Ⰱ', &['ⰱ']),
- ('Ⰲ', &['ⰲ']),
- ('Ⰳ', &['ⰳ']),
- ('Ⰴ', &['ⰴ']),
- ('Ⰵ', &['ⰵ']),
- ('Ⰶ', &['ⰶ']),
- ('Ⰷ', &['ⰷ']),
- ('Ⰸ', &['ⰸ']),
- ('Ⰹ', &['ⰹ']),
- ('Ⰺ', &['ⰺ']),
- ('Ⰻ', &['ⰻ']),
- ('Ⰼ', &['ⰼ']),
- ('Ⰽ', &['ⰽ']),
- ('Ⰾ', &['ⰾ']),
- ('Ⰿ', &['ⰿ']),
- ('Ⱀ', &['ⱀ']),
- ('Ⱁ', &['ⱁ']),
- ('Ⱂ', &['ⱂ']),
- ('Ⱃ', &['ⱃ']),
- ('Ⱄ', &['ⱄ']),
- ('Ⱅ', &['ⱅ']),
- ('Ⱆ', &['ⱆ']),
- ('Ⱇ', &['ⱇ']),
- ('Ⱈ', &['ⱈ']),
- ('Ⱉ', &['ⱉ']),
- ('Ⱊ', &['ⱊ']),
- ('Ⱋ', &['ⱋ']),
- ('Ⱌ', &['ⱌ']),
- ('Ⱍ', &['ⱍ']),
- ('Ⱎ', &['ⱎ']),
- ('Ⱏ', &['ⱏ']),
- ('Ⱐ', &['ⱐ']),
- ('Ⱑ', &['ⱑ']),
- ('Ⱒ', &['ⱒ']),
- ('Ⱓ', &['ⱓ']),
- ('Ⱔ', &['ⱔ']),
- ('Ⱕ', &['ⱕ']),
- ('Ⱖ', &['ⱖ']),
- ('Ⱗ', &['ⱗ']),
- ('Ⱘ', &['ⱘ']),
- ('Ⱙ', &['ⱙ']),
- ('Ⱚ', &['ⱚ']),
- ('Ⱛ', &['ⱛ']),
- ('Ⱜ', &['ⱜ']),
- ('Ⱝ', &['ⱝ']),
- ('Ⱞ', &['ⱞ']),
- ('Ⱟ', &['ⱟ']),
- ('ⰰ', &['Ⰰ']),
- ('ⰱ', &['Ⰱ']),
- ('ⰲ', &['Ⰲ']),
- ('ⰳ', &['Ⰳ']),
- ('ⰴ', &['Ⰴ']),
- ('ⰵ', &['Ⰵ']),
- ('ⰶ', &['Ⰶ']),
- ('ⰷ', &['Ⰷ']),
- ('ⰸ', &['Ⰸ']),
- ('ⰹ', &['Ⰹ']),
- ('ⰺ', &['Ⰺ']),
- ('ⰻ', &['Ⰻ']),
- ('ⰼ', &['Ⰼ']),
- ('ⰽ', &['Ⰽ']),
- ('ⰾ', &['Ⰾ']),
- ('ⰿ', &['Ⰿ']),
- ('ⱀ', &['Ⱀ']),
- ('ⱁ', &['Ⱁ']),
- ('ⱂ', &['Ⱂ']),
- ('ⱃ', &['Ⱃ']),
- ('ⱄ', &['Ⱄ']),
- ('ⱅ', &['Ⱅ']),
- ('ⱆ', &['Ⱆ']),
- ('ⱇ', &['Ⱇ']),
- ('ⱈ', &['Ⱈ']),
- ('ⱉ', &['Ⱉ']),
- ('ⱊ', &['Ⱊ']),
- ('ⱋ', &['Ⱋ']),
- ('ⱌ', &['Ⱌ']),
- ('ⱍ', &['Ⱍ']),
- ('ⱎ', &['Ⱎ']),
- ('ⱏ', &['Ⱏ']),
- ('ⱐ', &['Ⱐ']),
- ('ⱑ', &['Ⱑ']),
- ('ⱒ', &['Ⱒ']),
- ('ⱓ', &['Ⱓ']),
- ('ⱔ', &['Ⱔ']),
- ('ⱕ', &['Ⱕ']),
- ('ⱖ', &['Ⱖ']),
- ('ⱗ', &['Ⱗ']),
- ('ⱘ', &['Ⱘ']),
- ('ⱙ', &['Ⱙ']),
- ('ⱚ', &['Ⱚ']),
- ('ⱛ', &['Ⱛ']),
- ('ⱜ', &['Ⱜ']),
- ('ⱝ', &['Ⱝ']),
- ('ⱞ', &['Ⱞ']),
- ('ⱟ', &['Ⱟ']),
- ('Ⱡ', &['ⱡ']),
- ('ⱡ', &['Ⱡ']),
- ('Ɫ', &['ɫ']),
- ('Ᵽ', &['ᵽ']),
- ('Ɽ', &['ɽ']),
- ('ⱥ', &['Ⱥ']),
- ('ⱦ', &['Ⱦ']),
- ('Ⱨ', &['ⱨ']),
- ('ⱨ', &['Ⱨ']),
- ('Ⱪ', &['ⱪ']),
- ('ⱪ', &['Ⱪ']),
- ('Ⱬ', &['ⱬ']),
- ('ⱬ', &['Ⱬ']),
- ('Ɑ', &['ɑ']),
- ('Ɱ', &['ɱ']),
- ('Ɐ', &['ɐ']),
- ('Ɒ', &['ɒ']),
- ('Ⱳ', &['ⱳ']),
- ('ⱳ', &['Ⱳ']),
- ('Ⱶ', &['ⱶ']),
- ('ⱶ', &['Ⱶ']),
- ('Ȿ', &['ȿ']),
- ('Ɀ', &['ɀ']),
- ('Ⲁ', &['ⲁ']),
- ('ⲁ', &['Ⲁ']),
- ('Ⲃ', &['ⲃ']),
- ('ⲃ', &['Ⲃ']),
- ('Ⲅ', &['ⲅ']),
- ('ⲅ', &['Ⲅ']),
- ('Ⲇ', &['ⲇ']),
- ('ⲇ', &['Ⲇ']),
- ('Ⲉ', &['ⲉ']),
- ('ⲉ', &['Ⲉ']),
- ('Ⲋ', &['ⲋ']),
- ('ⲋ', &['Ⲋ']),
- ('Ⲍ', &['ⲍ']),
- ('ⲍ', &['Ⲍ']),
- ('Ⲏ', &['ⲏ']),
- ('ⲏ', &['Ⲏ']),
- ('Ⲑ', &['ⲑ']),
- ('ⲑ', &['Ⲑ']),
- ('Ⲓ', &['ⲓ']),
- ('ⲓ', &['Ⲓ']),
- ('Ⲕ', &['ⲕ']),
- ('ⲕ', &['Ⲕ']),
- ('Ⲗ', &['ⲗ']),
- ('ⲗ', &['Ⲗ']),
- ('Ⲙ', &['ⲙ']),
- ('ⲙ', &['Ⲙ']),
- ('Ⲛ', &['ⲛ']),
- ('ⲛ', &['Ⲛ']),
- ('Ⲝ', &['ⲝ']),
- ('ⲝ', &['Ⲝ']),
- ('Ⲟ', &['ⲟ']),
- ('ⲟ', &['Ⲟ']),
- ('Ⲡ', &['ⲡ']),
- ('ⲡ', &['Ⲡ']),
- ('Ⲣ', &['ⲣ']),
- ('ⲣ', &['Ⲣ']),
- ('Ⲥ', &['ⲥ']),
- ('ⲥ', &['Ⲥ']),
- ('Ⲧ', &['ⲧ']),
- ('ⲧ', &['Ⲧ']),
- ('Ⲩ', &['ⲩ']),
- ('ⲩ', &['Ⲩ']),
- ('Ⲫ', &['ⲫ']),
- ('ⲫ', &['Ⲫ']),
- ('Ⲭ', &['ⲭ']),
- ('ⲭ', &['Ⲭ']),
- ('Ⲯ', &['ⲯ']),
- ('ⲯ', &['Ⲯ']),
- ('Ⲱ', &['ⲱ']),
- ('ⲱ', &['Ⲱ']),
- ('Ⲳ', &['ⲳ']),
- ('ⲳ', &['Ⲳ']),
- ('Ⲵ', &['ⲵ']),
- ('ⲵ', &['Ⲵ']),
- ('Ⲷ', &['ⲷ']),
- ('ⲷ', &['Ⲷ']),
- ('Ⲹ', &['ⲹ']),
- ('ⲹ', &['Ⲹ']),
- ('Ⲻ', &['ⲻ']),
- ('ⲻ', &['Ⲻ']),
- ('Ⲽ', &['ⲽ']),
- ('ⲽ', &['Ⲽ']),
- ('Ⲿ', &['ⲿ']),
- ('ⲿ', &['Ⲿ']),
- ('Ⳁ', &['ⳁ']),
- ('ⳁ', &['Ⳁ']),
- ('Ⳃ', &['ⳃ']),
- ('ⳃ', &['Ⳃ']),
- ('Ⳅ', &['ⳅ']),
- ('ⳅ', &['Ⳅ']),
- ('Ⳇ', &['ⳇ']),
- ('ⳇ', &['Ⳇ']),
- ('Ⳉ', &['ⳉ']),
- ('ⳉ', &['Ⳉ']),
- ('Ⳋ', &['ⳋ']),
- ('ⳋ', &['Ⳋ']),
- ('Ⳍ', &['ⳍ']),
- ('ⳍ', &['Ⳍ']),
- ('Ⳏ', &['ⳏ']),
- ('ⳏ', &['Ⳏ']),
- ('Ⳑ', &['ⳑ']),
- ('ⳑ', &['Ⳑ']),
- ('Ⳓ', &['ⳓ']),
- ('ⳓ', &['Ⳓ']),
- ('Ⳕ', &['ⳕ']),
- ('ⳕ', &['Ⳕ']),
- ('Ⳗ', &['ⳗ']),
- ('ⳗ', &['Ⳗ']),
- ('Ⳙ', &['ⳙ']),
- ('ⳙ', &['Ⳙ']),
- ('Ⳛ', &['ⳛ']),
- ('ⳛ', &['Ⳛ']),
- ('Ⳝ', &['ⳝ']),
- ('ⳝ', &['Ⳝ']),
- ('Ⳟ', &['ⳟ']),
- ('ⳟ', &['Ⳟ']),
- ('Ⳡ', &['ⳡ']),
- ('ⳡ', &['Ⳡ']),
- ('Ⳣ', &['ⳣ']),
- ('ⳣ', &['Ⳣ']),
- ('Ⳬ', &['ⳬ']),
- ('ⳬ', &['Ⳬ']),
- ('Ⳮ', &['ⳮ']),
- ('ⳮ', &['Ⳮ']),
- ('Ⳳ', &['ⳳ']),
- ('ⳳ', &['Ⳳ']),
- ('ⴀ', &['Ⴀ']),
- ('ⴁ', &['Ⴁ']),
- ('ⴂ', &['Ⴂ']),
- ('ⴃ', &['Ⴃ']),
- ('ⴄ', &['Ⴄ']),
- ('ⴅ', &['Ⴅ']),
- ('ⴆ', &['Ⴆ']),
- ('ⴇ', &['Ⴇ']),
- ('ⴈ', &['Ⴈ']),
- ('ⴉ', &['Ⴉ']),
- ('ⴊ', &['Ⴊ']),
- ('ⴋ', &['Ⴋ']),
- ('ⴌ', &['Ⴌ']),
- ('ⴍ', &['Ⴍ']),
- ('ⴎ', &['Ⴎ']),
- ('ⴏ', &['Ⴏ']),
- ('ⴐ', &['Ⴐ']),
- ('ⴑ', &['Ⴑ']),
- ('ⴒ', &['Ⴒ']),
- ('ⴓ', &['Ⴓ']),
- ('ⴔ', &['Ⴔ']),
- ('ⴕ', &['Ⴕ']),
- ('ⴖ', &['Ⴖ']),
- ('ⴗ', &['Ⴗ']),
- ('ⴘ', &['Ⴘ']),
- ('ⴙ', &['Ⴙ']),
- ('ⴚ', &['Ⴚ']),
- ('ⴛ', &['Ⴛ']),
- ('ⴜ', &['Ⴜ']),
- ('ⴝ', &['Ⴝ']),
- ('ⴞ', &['Ⴞ']),
- ('ⴟ', &['Ⴟ']),
- ('ⴠ', &['Ⴠ']),
- ('ⴡ', &['Ⴡ']),
- ('ⴢ', &['Ⴢ']),
- ('ⴣ', &['Ⴣ']),
- ('ⴤ', &['Ⴤ']),
- ('ⴥ', &['Ⴥ']),
- ('ⴧ', &['Ⴧ']),
- ('ⴭ', &['Ⴭ']),
- ('Ꙁ', &['ꙁ']),
- ('ꙁ', &['Ꙁ']),
- ('Ꙃ', &['ꙃ']),
- ('ꙃ', &['Ꙃ']),
- ('Ꙅ', &['ꙅ']),
- ('ꙅ', &['Ꙅ']),
- ('Ꙇ', &['ꙇ']),
- ('ꙇ', &['Ꙇ']),
- ('Ꙉ', &['ꙉ']),
- ('ꙉ', &['Ꙉ']),
- ('Ꙋ', &['ᲈ', 'ꙋ']),
- ('ꙋ', &['ᲈ', 'Ꙋ']),
- ('Ꙍ', &['ꙍ']),
- ('ꙍ', &['Ꙍ']),
- ('Ꙏ', &['ꙏ']),
- ('ꙏ', &['Ꙏ']),
- ('Ꙑ', &['ꙑ']),
- ('ꙑ', &['Ꙑ']),
- ('Ꙓ', &['ꙓ']),
- ('ꙓ', &['Ꙓ']),
- ('Ꙕ', &['ꙕ']),
- ('ꙕ', &['Ꙕ']),
- ('Ꙗ', &['ꙗ']),
- ('ꙗ', &['Ꙗ']),
- ('Ꙙ', &['ꙙ']),
- ('ꙙ', &['Ꙙ']),
- ('Ꙛ', &['ꙛ']),
- ('ꙛ', &['Ꙛ']),
- ('Ꙝ', &['ꙝ']),
- ('ꙝ', &['Ꙝ']),
- ('Ꙟ', &['ꙟ']),
- ('ꙟ', &['Ꙟ']),
- ('Ꙡ', &['ꙡ']),
- ('ꙡ', &['Ꙡ']),
- ('Ꙣ', &['ꙣ']),
- ('ꙣ', &['Ꙣ']),
- ('Ꙥ', &['ꙥ']),
- ('ꙥ', &['Ꙥ']),
- ('Ꙧ', &['ꙧ']),
- ('ꙧ', &['Ꙧ']),
- ('Ꙩ', &['ꙩ']),
- ('ꙩ', &['Ꙩ']),
- ('Ꙫ', &['ꙫ']),
- ('ꙫ', &['Ꙫ']),
- ('Ꙭ', &['ꙭ']),
- ('ꙭ', &['Ꙭ']),
- ('Ꚁ', &['ꚁ']),
- ('ꚁ', &['Ꚁ']),
- ('Ꚃ', &['ꚃ']),
- ('ꚃ', &['Ꚃ']),
- ('Ꚅ', &['ꚅ']),
- ('ꚅ', &['Ꚅ']),
- ('Ꚇ', &['ꚇ']),
- ('ꚇ', &['Ꚇ']),
- ('Ꚉ', &['ꚉ']),
- ('ꚉ', &['Ꚉ']),
- ('Ꚋ', &['ꚋ']),
- ('ꚋ', &['Ꚋ']),
- ('Ꚍ', &['ꚍ']),
- ('ꚍ', &['Ꚍ']),
- ('Ꚏ', &['ꚏ']),
- ('ꚏ', &['Ꚏ']),
- ('Ꚑ', &['ꚑ']),
- ('ꚑ', &['Ꚑ']),
- ('Ꚓ', &['ꚓ']),
- ('ꚓ', &['Ꚓ']),
- ('Ꚕ', &['ꚕ']),
- ('ꚕ', &['Ꚕ']),
- ('Ꚗ', &['ꚗ']),
- ('ꚗ', &['Ꚗ']),
- ('Ꚙ', &['ꚙ']),
- ('ꚙ', &['Ꚙ']),
- ('Ꚛ', &['ꚛ']),
- ('ꚛ', &['Ꚛ']),
- ('Ꜣ', &['ꜣ']),
- ('ꜣ', &['Ꜣ']),
- ('Ꜥ', &['ꜥ']),
- ('ꜥ', &['Ꜥ']),
- ('Ꜧ', &['ꜧ']),
- ('ꜧ', &['Ꜧ']),
- ('Ꜩ', &['ꜩ']),
- ('ꜩ', &['Ꜩ']),
- ('Ꜫ', &['ꜫ']),
- ('ꜫ', &['Ꜫ']),
- ('Ꜭ', &['ꜭ']),
- ('ꜭ', &['Ꜭ']),
- ('Ꜯ', &['ꜯ']),
- ('ꜯ', &['Ꜯ']),
- ('Ꜳ', &['ꜳ']),
- ('ꜳ', &['Ꜳ']),
- ('Ꜵ', &['ꜵ']),
- ('ꜵ', &['Ꜵ']),
- ('Ꜷ', &['ꜷ']),
- ('ꜷ', &['Ꜷ']),
- ('Ꜹ', &['ꜹ']),
- ('ꜹ', &['Ꜹ']),
- ('Ꜻ', &['ꜻ']),
- ('ꜻ', &['Ꜻ']),
- ('Ꜽ', &['ꜽ']),
- ('ꜽ', &['Ꜽ']),
- ('Ꜿ', &['ꜿ']),
- ('ꜿ', &['Ꜿ']),
- ('Ꝁ', &['ꝁ']),
- ('ꝁ', &['Ꝁ']),
- ('Ꝃ', &['ꝃ']),
- ('ꝃ', &['Ꝃ']),
- ('Ꝅ', &['ꝅ']),
- ('ꝅ', &['Ꝅ']),
- ('Ꝇ', &['ꝇ']),
- ('ꝇ', &['Ꝇ']),
- ('Ꝉ', &['ꝉ']),
- ('ꝉ', &['Ꝉ']),
- ('Ꝋ', &['ꝋ']),
- ('ꝋ', &['Ꝋ']),
- ('Ꝍ', &['ꝍ']),
- ('ꝍ', &['Ꝍ']),
- ('Ꝏ', &['ꝏ']),
- ('ꝏ', &['Ꝏ']),
- ('Ꝑ', &['ꝑ']),
- ('ꝑ', &['Ꝑ']),
- ('Ꝓ', &['ꝓ']),
- ('ꝓ', &['Ꝓ']),
- ('Ꝕ', &['ꝕ']),
- ('ꝕ', &['Ꝕ']),
- ('Ꝗ', &['ꝗ']),
- ('ꝗ', &['Ꝗ']),
- ('Ꝙ', &['ꝙ']),
- ('ꝙ', &['Ꝙ']),
- ('Ꝛ', &['ꝛ']),
- ('ꝛ', &['Ꝛ']),
- ('Ꝝ', &['ꝝ']),
- ('ꝝ', &['Ꝝ']),
- ('Ꝟ', &['ꝟ']),
- ('ꝟ', &['Ꝟ']),
- ('Ꝡ', &['ꝡ']),
- ('ꝡ', &['Ꝡ']),
- ('Ꝣ', &['ꝣ']),
- ('ꝣ', &['Ꝣ']),
- ('Ꝥ', &['ꝥ']),
- ('ꝥ', &['Ꝥ']),
- ('Ꝧ', &['ꝧ']),
- ('ꝧ', &['Ꝧ']),
- ('Ꝩ', &['ꝩ']),
- ('ꝩ', &['Ꝩ']),
- ('Ꝫ', &['ꝫ']),
- ('ꝫ', &['Ꝫ']),
- ('Ꝭ', &['ꝭ']),
- ('ꝭ', &['Ꝭ']),
- ('Ꝯ', &['ꝯ']),
- ('ꝯ', &['Ꝯ']),
- ('Ꝺ', &['ꝺ']),
- ('ꝺ', &['Ꝺ']),
- ('Ꝼ', &['ꝼ']),
- ('ꝼ', &['Ꝼ']),
- ('Ᵹ', &['ᵹ']),
- ('Ꝿ', &['ꝿ']),
- ('ꝿ', &['Ꝿ']),
- ('Ꞁ', &['ꞁ']),
- ('ꞁ', &['Ꞁ']),
- ('Ꞃ', &['ꞃ']),
- ('ꞃ', &['Ꞃ']),
- ('Ꞅ', &['ꞅ']),
- ('ꞅ', &['Ꞅ']),
- ('Ꞇ', &['ꞇ']),
- ('ꞇ', &['Ꞇ']),
- ('Ꞌ', &['ꞌ']),
- ('ꞌ', &['Ꞌ']),
- ('Ɥ', &['ɥ']),
- ('Ꞑ', &['ꞑ']),
- ('ꞑ', &['Ꞑ']),
- ('Ꞓ', &['ꞓ']),
- ('ꞓ', &['Ꞓ']),
- ('ꞔ', &['Ꞔ']),
- ('Ꞗ', &['ꞗ']),
- ('ꞗ', &['Ꞗ']),
- ('Ꞙ', &['ꞙ']),
- ('ꞙ', &['Ꞙ']),
- ('Ꞛ', &['ꞛ']),
- ('ꞛ', &['Ꞛ']),
- ('Ꞝ', &['ꞝ']),
- ('ꞝ', &['Ꞝ']),
- ('Ꞟ', &['ꞟ']),
- ('ꞟ', &['Ꞟ']),
- ('Ꞡ', &['ꞡ']),
- ('ꞡ', &['Ꞡ']),
- ('Ꞣ', &['ꞣ']),
- ('ꞣ', &['Ꞣ']),
- ('Ꞥ', &['ꞥ']),
- ('ꞥ', &['Ꞥ']),
- ('Ꞧ', &['ꞧ']),
- ('ꞧ', &['Ꞧ']),
- ('Ꞩ', &['ꞩ']),
- ('ꞩ', &['Ꞩ']),
- ('Ɦ', &['ɦ']),
- ('Ɜ', &['ɜ']),
- ('Ɡ', &['ɡ']),
- ('Ɬ', &['ɬ']),
- ('Ɪ', &['ɪ']),
- ('Ʞ', &['ʞ']),
- ('Ʇ', &['ʇ']),
- ('Ʝ', &['ʝ']),
- ('Ꭓ', &['ꭓ']),
- ('Ꞵ', &['ꞵ']),
- ('ꞵ', &['Ꞵ']),
- ('Ꞷ', &['ꞷ']),
- ('ꞷ', &['Ꞷ']),
- ('Ꞹ', &['ꞹ']),
- ('ꞹ', &['Ꞹ']),
- ('Ꞻ', &['ꞻ']),
- ('ꞻ', &['Ꞻ']),
- ('Ꞽ', &['ꞽ']),
- ('ꞽ', &['Ꞽ']),
- ('Ꞿ', &['ꞿ']),
- ('ꞿ', &['Ꞿ']),
- ('Ꟁ', &['ꟁ']),
- ('ꟁ', &['Ꟁ']),
- ('Ꟃ', &['ꟃ']),
- ('ꟃ', &['Ꟃ']),
- ('Ꞔ', &['ꞔ']),
- ('Ʂ', &['ʂ']),
- ('Ᶎ', &['ᶎ']),
- ('Ꟈ', &['ꟈ']),
- ('ꟈ', &['Ꟈ']),
- ('Ꟊ', &['ꟊ']),
- ('ꟊ', &['Ꟊ']),
- ('Ɤ', &['ɤ']),
- ('Ꟍ', &['ꟍ']),
- ('ꟍ', &['Ꟍ']),
- ('Ꟑ', &['ꟑ']),
- ('ꟑ', &['Ꟑ']),
- ('Ꟗ', &['ꟗ']),
- ('ꟗ', &['Ꟗ']),
- ('Ꟙ', &['ꟙ']),
- ('ꟙ', &['Ꟙ']),
- ('Ꟛ', &['ꟛ']),
- ('ꟛ', &['Ꟛ']),
- ('Ƛ', &['ƛ']),
- ('Ꟶ', &['ꟶ']),
- ('ꟶ', &['Ꟶ']),
- ('ꭓ', &['Ꭓ']),
- ('ꭰ', &['Ꭰ']),
- ('ꭱ', &['Ꭱ']),
- ('ꭲ', &['Ꭲ']),
- ('ꭳ', &['Ꭳ']),
- ('ꭴ', &['Ꭴ']),
- ('ꭵ', &['Ꭵ']),
- ('ꭶ', &['Ꭶ']),
- ('ꭷ', &['Ꭷ']),
- ('ꭸ', &['Ꭸ']),
- ('ꭹ', &['Ꭹ']),
- ('ꭺ', &['Ꭺ']),
- ('ꭻ', &['Ꭻ']),
- ('ꭼ', &['Ꭼ']),
- ('ꭽ', &['Ꭽ']),
- ('ꭾ', &['Ꭾ']),
- ('ꭿ', &['Ꭿ']),
- ('ꮀ', &['Ꮀ']),
- ('ꮁ', &['Ꮁ']),
- ('ꮂ', &['Ꮂ']),
- ('ꮃ', &['Ꮃ']),
- ('ꮄ', &['Ꮄ']),
- ('ꮅ', &['Ꮅ']),
- ('ꮆ', &['Ꮆ']),
- ('ꮇ', &['Ꮇ']),
- ('ꮈ', &['Ꮈ']),
- ('ꮉ', &['Ꮉ']),
- ('ꮊ', &['Ꮊ']),
- ('ꮋ', &['Ꮋ']),
- ('ꮌ', &['Ꮌ']),
- ('ꮍ', &['Ꮍ']),
- ('ꮎ', &['Ꮎ']),
- ('ꮏ', &['Ꮏ']),
- ('ꮐ', &['Ꮐ']),
- ('ꮑ', &['Ꮑ']),
- ('ꮒ', &['Ꮒ']),
- ('ꮓ', &['Ꮓ']),
- ('ꮔ', &['Ꮔ']),
- ('ꮕ', &['Ꮕ']),
- ('ꮖ', &['Ꮖ']),
- ('ꮗ', &['Ꮗ']),
- ('ꮘ', &['Ꮘ']),
- ('ꮙ', &['Ꮙ']),
- ('ꮚ', &['Ꮚ']),
- ('ꮛ', &['Ꮛ']),
- ('ꮜ', &['Ꮜ']),
- ('ꮝ', &['Ꮝ']),
- ('ꮞ', &['Ꮞ']),
- ('ꮟ', &['Ꮟ']),
- ('ꮠ', &['Ꮠ']),
- ('ꮡ', &['Ꮡ']),
- ('ꮢ', &['Ꮢ']),
- ('ꮣ', &['Ꮣ']),
- ('ꮤ', &['Ꮤ']),
- ('ꮥ', &['Ꮥ']),
- ('ꮦ', &['Ꮦ']),
- ('ꮧ', &['Ꮧ']),
- ('ꮨ', &['Ꮨ']),
- ('ꮩ', &['Ꮩ']),
- ('ꮪ', &['Ꮪ']),
- ('ꮫ', &['Ꮫ']),
- ('ꮬ', &['Ꮬ']),
- ('ꮭ', &['Ꮭ']),
- ('ꮮ', &['Ꮮ']),
- ('ꮯ', &['Ꮯ']),
- ('ꮰ', &['Ꮰ']),
- ('ꮱ', &['Ꮱ']),
- ('ꮲ', &['Ꮲ']),
- ('ꮳ', &['Ꮳ']),
- ('ꮴ', &['Ꮴ']),
- ('ꮵ', &['Ꮵ']),
- ('ꮶ', &['Ꮶ']),
- ('ꮷ', &['Ꮷ']),
- ('ꮸ', &['Ꮸ']),
- ('ꮹ', &['Ꮹ']),
- ('ꮺ', &['Ꮺ']),
- ('ꮻ', &['Ꮻ']),
- ('ꮼ', &['Ꮼ']),
- ('ꮽ', &['Ꮽ']),
- ('ꮾ', &['Ꮾ']),
- ('ꮿ', &['Ꮿ']),
- ('ſt', &['st']),
- ('st', &['ſt']),
- ('A', &['a']),
- ('B', &['b']),
- ('C', &['c']),
- ('D', &['d']),
- ('E', &['e']),
- ('F', &['f']),
- ('G', &['g']),
- ('H', &['h']),
- ('I', &['i']),
- ('J', &['j']),
- ('K', &['k']),
- ('L', &['l']),
- ('M', &['m']),
- ('N', &['n']),
- ('O', &['o']),
- ('P', &['p']),
- ('Q', &['q']),
- ('R', &['r']),
- ('S', &['s']),
- ('T', &['t']),
- ('U', &['u']),
- ('V', &['v']),
- ('W', &['w']),
- ('X', &['x']),
- ('Y', &['y']),
- ('Z', &['z']),
- ('a', &['A']),
- ('b', &['B']),
- ('c', &['C']),
- ('d', &['D']),
- ('e', &['E']),
- ('f', &['F']),
- ('g', &['G']),
- ('h', &['H']),
- ('i', &['I']),
- ('j', &['J']),
- ('k', &['K']),
- ('l', &['L']),
- ('m', &['M']),
- ('n', &['N']),
- ('o', &['O']),
- ('p', &['P']),
- ('q', &['Q']),
- ('r', &['R']),
- ('s', &['S']),
- ('t', &['T']),
- ('u', &['U']),
- ('v', &['V']),
- ('w', &['W']),
- ('x', &['X']),
- ('y', &['Y']),
- ('z', &['Z']),
- ('𐐀', &['𐐨']),
- ('𐐁', &['𐐩']),
- ('𐐂', &['𐐪']),
- ('𐐃', &['𐐫']),
- ('𐐄', &['𐐬']),
- ('𐐅', &['𐐭']),
- ('𐐆', &['𐐮']),
- ('𐐇', &['𐐯']),
- ('𐐈', &['𐐰']),
- ('𐐉', &['𐐱']),
- ('𐐊', &['𐐲']),
- ('𐐋', &['𐐳']),
- ('𐐌', &['𐐴']),
- ('𐐍', &['𐐵']),
- ('𐐎', &['𐐶']),
- ('𐐏', &['𐐷']),
- ('𐐐', &['𐐸']),
- ('𐐑', &['𐐹']),
- ('𐐒', &['𐐺']),
- ('𐐓', &['𐐻']),
- ('𐐔', &['𐐼']),
- ('𐐕', &['𐐽']),
- ('𐐖', &['𐐾']),
- ('𐐗', &['𐐿']),
- ('𐐘', &['𐑀']),
- ('𐐙', &['𐑁']),
- ('𐐚', &['𐑂']),
- ('𐐛', &['𐑃']),
- ('𐐜', &['𐑄']),
- ('𐐝', &['𐑅']),
- ('𐐞', &['𐑆']),
- ('𐐟', &['𐑇']),
- ('𐐠', &['𐑈']),
- ('𐐡', &['𐑉']),
- ('𐐢', &['𐑊']),
- ('𐐣', &['𐑋']),
- ('𐐤', &['𐑌']),
- ('𐐥', &['𐑍']),
- ('𐐦', &['𐑎']),
- ('𐐧', &['𐑏']),
- ('𐐨', &['𐐀']),
- ('𐐩', &['𐐁']),
- ('𐐪', &['𐐂']),
- ('𐐫', &['𐐃']),
- ('𐐬', &['𐐄']),
- ('𐐭', &['𐐅']),
- ('𐐮', &['𐐆']),
- ('𐐯', &['𐐇']),
- ('𐐰', &['𐐈']),
- ('𐐱', &['𐐉']),
- ('𐐲', &['𐐊']),
- ('𐐳', &['𐐋']),
- ('𐐴', &['𐐌']),
- ('𐐵', &['𐐍']),
- ('𐐶', &['𐐎']),
- ('𐐷', &['𐐏']),
- ('𐐸', &['𐐐']),
- ('𐐹', &['𐐑']),
- ('𐐺', &['𐐒']),
- ('𐐻', &['𐐓']),
- ('𐐼', &['𐐔']),
- ('𐐽', &['𐐕']),
- ('𐐾', &['𐐖']),
- ('𐐿', &['𐐗']),
- ('𐑀', &['𐐘']),
- ('𐑁', &['𐐙']),
- ('𐑂', &['𐐚']),
- ('𐑃', &['𐐛']),
- ('𐑄', &['𐐜']),
- ('𐑅', &['𐐝']),
- ('𐑆', &['𐐞']),
- ('𐑇', &['𐐟']),
- ('𐑈', &['𐐠']),
- ('𐑉', &['𐐡']),
- ('𐑊', &['𐐢']),
- ('𐑋', &['𐐣']),
- ('𐑌', &['𐐤']),
- ('𐑍', &['𐐥']),
- ('𐑎', &['𐐦']),
- ('𐑏', &['𐐧']),
- ('𐒰', &['𐓘']),
- ('𐒱', &['𐓙']),
- ('𐒲', &['𐓚']),
- ('𐒳', &['𐓛']),
- ('𐒴', &['𐓜']),
- ('𐒵', &['𐓝']),
- ('𐒶', &['𐓞']),
- ('𐒷', &['𐓟']),
- ('𐒸', &['𐓠']),
- ('𐒹', &['𐓡']),
- ('𐒺', &['𐓢']),
- ('𐒻', &['𐓣']),
- ('𐒼', &['𐓤']),
- ('𐒽', &['𐓥']),
- ('𐒾', &['𐓦']),
- ('𐒿', &['𐓧']),
- ('𐓀', &['𐓨']),
- ('𐓁', &['𐓩']),
- ('𐓂', &['𐓪']),
- ('𐓃', &['𐓫']),
- ('𐓄', &['𐓬']),
- ('𐓅', &['𐓭']),
- ('𐓆', &['𐓮']),
- ('𐓇', &['𐓯']),
- ('𐓈', &['𐓰']),
- ('𐓉', &['𐓱']),
- ('𐓊', &['𐓲']),
- ('𐓋', &['𐓳']),
- ('𐓌', &['𐓴']),
- ('𐓍', &['𐓵']),
- ('𐓎', &['𐓶']),
- ('𐓏', &['𐓷']),
- ('𐓐', &['𐓸']),
- ('𐓑', &['𐓹']),
- ('𐓒', &['𐓺']),
- ('𐓓', &['𐓻']),
- ('𐓘', &['𐒰']),
- ('𐓙', &['𐒱']),
- ('𐓚', &['𐒲']),
- ('𐓛', &['𐒳']),
- ('𐓜', &['𐒴']),
- ('𐓝', &['𐒵']),
- ('𐓞', &['𐒶']),
- ('𐓟', &['𐒷']),
- ('𐓠', &['𐒸']),
- ('𐓡', &['𐒹']),
- ('𐓢', &['𐒺']),
- ('𐓣', &['𐒻']),
- ('𐓤', &['𐒼']),
- ('𐓥', &['𐒽']),
- ('𐓦', &['𐒾']),
- ('𐓧', &['𐒿']),
- ('𐓨', &['𐓀']),
- ('𐓩', &['𐓁']),
- ('𐓪', &['𐓂']),
- ('𐓫', &['𐓃']),
- ('𐓬', &['𐓄']),
- ('𐓭', &['𐓅']),
- ('𐓮', &['𐓆']),
- ('𐓯', &['𐓇']),
- ('𐓰', &['𐓈']),
- ('𐓱', &['𐓉']),
- ('𐓲', &['𐓊']),
- ('𐓳', &['𐓋']),
- ('𐓴', &['𐓌']),
- ('𐓵', &['𐓍']),
- ('𐓶', &['𐓎']),
- ('𐓷', &['𐓏']),
- ('𐓸', &['𐓐']),
- ('𐓹', &['𐓑']),
- ('𐓺', &['𐓒']),
- ('𐓻', &['𐓓']),
- ('𐕰', &['𐖗']),
- ('𐕱', &['𐖘']),
- ('𐕲', &['𐖙']),
- ('𐕳', &['𐖚']),
- ('𐕴', &['𐖛']),
- ('𐕵', &['𐖜']),
- ('𐕶', &['𐖝']),
- ('𐕷', &['𐖞']),
- ('𐕸', &['𐖟']),
- ('𐕹', &['𐖠']),
- ('𐕺', &['𐖡']),
- ('𐕼', &['𐖣']),
- ('𐕽', &['𐖤']),
- ('𐕾', &['𐖥']),
- ('𐕿', &['𐖦']),
- ('𐖀', &['𐖧']),
- ('𐖁', &['𐖨']),
- ('𐖂', &['𐖩']),
- ('𐖃', &['𐖪']),
- ('𐖄', &['𐖫']),
- ('𐖅', &['𐖬']),
- ('𐖆', &['𐖭']),
- ('𐖇', &['𐖮']),
- ('𐖈', &['𐖯']),
- ('𐖉', &['𐖰']),
- ('𐖊', &['𐖱']),
- ('𐖌', &['𐖳']),
- ('𐖍', &['𐖴']),
- ('𐖎', &['𐖵']),
- ('𐖏', &['𐖶']),
- ('𐖐', &['𐖷']),
- ('𐖑', &['𐖸']),
- ('𐖒', &['𐖹']),
- ('𐖔', &['𐖻']),
- ('𐖕', &['𐖼']),
- ('𐖗', &['𐕰']),
- ('𐖘', &['𐕱']),
- ('𐖙', &['𐕲']),
- ('𐖚', &['𐕳']),
- ('𐖛', &['𐕴']),
- ('𐖜', &['𐕵']),
- ('𐖝', &['𐕶']),
- ('𐖞', &['𐕷']),
- ('𐖟', &['𐕸']),
- ('𐖠', &['𐕹']),
- ('𐖡', &['𐕺']),
- ('𐖣', &['𐕼']),
- ('𐖤', &['𐕽']),
- ('𐖥', &['𐕾']),
- ('𐖦', &['𐕿']),
- ('𐖧', &['𐖀']),
- ('𐖨', &['𐖁']),
- ('𐖩', &['𐖂']),
- ('𐖪', &['𐖃']),
- ('𐖫', &['𐖄']),
- ('𐖬', &['𐖅']),
- ('𐖭', &['𐖆']),
- ('𐖮', &['𐖇']),
- ('𐖯', &['𐖈']),
- ('𐖰', &['𐖉']),
- ('𐖱', &['𐖊']),
- ('𐖳', &['𐖌']),
- ('𐖴', &['𐖍']),
- ('𐖵', &['𐖎']),
- ('𐖶', &['𐖏']),
- ('𐖷', &['𐖐']),
- ('𐖸', &['𐖑']),
- ('𐖹', &['𐖒']),
- ('𐖻', &['𐖔']),
- ('𐖼', &['𐖕']),
- ('𐲀', &['𐳀']),
- ('𐲁', &['𐳁']),
- ('𐲂', &['𐳂']),
- ('𐲃', &['𐳃']),
- ('𐲄', &['𐳄']),
- ('𐲅', &['𐳅']),
- ('𐲆', &['𐳆']),
- ('𐲇', &['𐳇']),
- ('𐲈', &['𐳈']),
- ('𐲉', &['𐳉']),
- ('𐲊', &['𐳊']),
- ('𐲋', &['𐳋']),
- ('𐲌', &['𐳌']),
- ('𐲍', &['𐳍']),
- ('𐲎', &['𐳎']),
- ('𐲏', &['𐳏']),
- ('𐲐', &['𐳐']),
- ('𐲑', &['𐳑']),
- ('𐲒', &['𐳒']),
- ('𐲓', &['𐳓']),
- ('𐲔', &['𐳔']),
- ('𐲕', &['𐳕']),
- ('𐲖', &['𐳖']),
- ('𐲗', &['𐳗']),
- ('𐲘', &['𐳘']),
- ('𐲙', &['𐳙']),
- ('𐲚', &['𐳚']),
- ('𐲛', &['𐳛']),
- ('𐲜', &['𐳜']),
- ('𐲝', &['𐳝']),
- ('𐲞', &['𐳞']),
- ('𐲟', &['𐳟']),
- ('𐲠', &['𐳠']),
- ('𐲡', &['𐳡']),
- ('𐲢', &['𐳢']),
- ('𐲣', &['𐳣']),
- ('𐲤', &['𐳤']),
- ('𐲥', &['𐳥']),
- ('𐲦', &['𐳦']),
- ('𐲧', &['𐳧']),
- ('𐲨', &['𐳨']),
- ('𐲩', &['𐳩']),
- ('𐲪', &['𐳪']),
- ('𐲫', &['𐳫']),
- ('𐲬', &['𐳬']),
- ('𐲭', &['𐳭']),
- ('𐲮', &['𐳮']),
- ('𐲯', &['𐳯']),
- ('𐲰', &['𐳰']),
- ('𐲱', &['𐳱']),
- ('𐲲', &['𐳲']),
- ('𐳀', &['𐲀']),
- ('𐳁', &['𐲁']),
- ('𐳂', &['𐲂']),
- ('𐳃', &['𐲃']),
- ('𐳄', &['𐲄']),
- ('𐳅', &['𐲅']),
- ('𐳆', &['𐲆']),
- ('𐳇', &['𐲇']),
- ('𐳈', &['𐲈']),
- ('𐳉', &['𐲉']),
- ('𐳊', &['𐲊']),
- ('𐳋', &['𐲋']),
- ('𐳌', &['𐲌']),
- ('𐳍', &['𐲍']),
- ('𐳎', &['𐲎']),
- ('𐳏', &['𐲏']),
- ('𐳐', &['𐲐']),
- ('𐳑', &['𐲑']),
- ('𐳒', &['𐲒']),
- ('𐳓', &['𐲓']),
- ('𐳔', &['𐲔']),
- ('𐳕', &['𐲕']),
- ('𐳖', &['𐲖']),
- ('𐳗', &['𐲗']),
- ('𐳘', &['𐲘']),
- ('𐳙', &['𐲙']),
- ('𐳚', &['𐲚']),
- ('𐳛', &['𐲛']),
- ('𐳜', &['𐲜']),
- ('𐳝', &['𐲝']),
- ('𐳞', &['𐲞']),
- ('𐳟', &['𐲟']),
- ('𐳠', &['𐲠']),
- ('𐳡', &['𐲡']),
- ('𐳢', &['𐲢']),
- ('𐳣', &['𐲣']),
- ('𐳤', &['𐲤']),
- ('𐳥', &['𐲥']),
- ('𐳦', &['𐲦']),
- ('𐳧', &['𐲧']),
- ('𐳨', &['𐲨']),
- ('𐳩', &['𐲩']),
- ('𐳪', &['𐲪']),
- ('𐳫', &['𐲫']),
- ('𐳬', &['𐲬']),
- ('𐳭', &['𐲭']),
- ('𐳮', &['𐲮']),
- ('𐳯', &['𐲯']),
- ('𐳰', &['𐲰']),
- ('𐳱', &['𐲱']),
- ('𐳲', &['𐲲']),
- ('𐵐', &['𐵰']),
- ('𐵑', &['𐵱']),
- ('𐵒', &['𐵲']),
- ('𐵓', &['𐵳']),
- ('𐵔', &['𐵴']),
- ('𐵕', &['𐵵']),
- ('𐵖', &['𐵶']),
- ('𐵗', &['𐵷']),
- ('𐵘', &['𐵸']),
- ('𐵙', &['𐵹']),
- ('𐵚', &['𐵺']),
- ('𐵛', &['𐵻']),
- ('𐵜', &['𐵼']),
- ('𐵝', &['𐵽']),
- ('𐵞', &['𐵾']),
- ('𐵟', &['𐵿']),
- ('𐵠', &['𐶀']),
- ('𐵡', &['𐶁']),
- ('𐵢', &['𐶂']),
- ('𐵣', &['𐶃']),
- ('𐵤', &['𐶄']),
- ('𐵥', &['𐶅']),
- ('𐵰', &['𐵐']),
- ('𐵱', &['𐵑']),
- ('𐵲', &['𐵒']),
- ('𐵳', &['𐵓']),
- ('𐵴', &['𐵔']),
- ('𐵵', &['𐵕']),
- ('𐵶', &['𐵖']),
- ('𐵷', &['𐵗']),
- ('𐵸', &['𐵘']),
- ('𐵹', &['𐵙']),
- ('𐵺', &['𐵚']),
- ('𐵻', &['𐵛']),
- ('𐵼', &['𐵜']),
- ('𐵽', &['𐵝']),
- ('𐵾', &['𐵞']),
- ('𐵿', &['𐵟']),
- ('𐶀', &['𐵠']),
- ('𐶁', &['𐵡']),
- ('𐶂', &['𐵢']),
- ('𐶃', &['𐵣']),
- ('𐶄', &['𐵤']),
- ('𐶅', &['𐵥']),
- ('𑢠', &['𑣀']),
- ('𑢡', &['𑣁']),
- ('𑢢', &['𑣂']),
- ('𑢣', &['𑣃']),
- ('𑢤', &['𑣄']),
- ('𑢥', &['𑣅']),
- ('𑢦', &['𑣆']),
- ('𑢧', &['𑣇']),
- ('𑢨', &['𑣈']),
- ('𑢩', &['𑣉']),
- ('𑢪', &['𑣊']),
- ('𑢫', &['𑣋']),
- ('𑢬', &['𑣌']),
- ('𑢭', &['𑣍']),
- ('𑢮', &['𑣎']),
- ('𑢯', &['𑣏']),
- ('𑢰', &['𑣐']),
- ('𑢱', &['𑣑']),
- ('𑢲', &['𑣒']),
- ('𑢳', &['𑣓']),
- ('𑢴', &['𑣔']),
- ('𑢵', &['𑣕']),
- ('𑢶', &['𑣖']),
- ('𑢷', &['𑣗']),
- ('𑢸', &['𑣘']),
- ('𑢹', &['𑣙']),
- ('𑢺', &['𑣚']),
- ('𑢻', &['𑣛']),
- ('𑢼', &['𑣜']),
- ('𑢽', &['𑣝']),
- ('𑢾', &['𑣞']),
- ('𑢿', &['𑣟']),
- ('𑣀', &['𑢠']),
- ('𑣁', &['𑢡']),
- ('𑣂', &['𑢢']),
- ('𑣃', &['𑢣']),
- ('𑣄', &['𑢤']),
- ('𑣅', &['𑢥']),
- ('𑣆', &['𑢦']),
- ('𑣇', &['𑢧']),
- ('𑣈', &['𑢨']),
- ('𑣉', &['𑢩']),
- ('𑣊', &['𑢪']),
- ('𑣋', &['𑢫']),
- ('𑣌', &['𑢬']),
- ('𑣍', &['𑢭']),
- ('𑣎', &['𑢮']),
- ('𑣏', &['𑢯']),
- ('𑣐', &['𑢰']),
- ('𑣑', &['𑢱']),
- ('𑣒', &['𑢲']),
- ('𑣓', &['𑢳']),
- ('𑣔', &['𑢴']),
- ('𑣕', &['𑢵']),
- ('𑣖', &['𑢶']),
- ('𑣗', &['𑢷']),
- ('𑣘', &['𑢸']),
- ('𑣙', &['𑢹']),
- ('𑣚', &['𑢺']),
- ('𑣛', &['𑢻']),
- ('𑣜', &['𑢼']),
- ('𑣝', &['𑢽']),
- ('𑣞', &['𑢾']),
- ('𑣟', &['𑢿']),
- ('𖹀', &['𖹠']),
- ('𖹁', &['𖹡']),
- ('𖹂', &['𖹢']),
- ('𖹃', &['𖹣']),
- ('𖹄', &['𖹤']),
- ('𖹅', &['𖹥']),
- ('𖹆', &['𖹦']),
- ('𖹇', &['𖹧']),
- ('𖹈', &['𖹨']),
- ('𖹉', &['𖹩']),
- ('𖹊', &['𖹪']),
- ('𖹋', &['𖹫']),
- ('𖹌', &['𖹬']),
- ('𖹍', &['𖹭']),
- ('𖹎', &['𖹮']),
- ('𖹏', &['𖹯']),
- ('𖹐', &['𖹰']),
- ('𖹑', &['𖹱']),
- ('𖹒', &['𖹲']),
- ('𖹓', &['𖹳']),
- ('𖹔', &['𖹴']),
- ('𖹕', &['𖹵']),
- ('𖹖', &['𖹶']),
- ('𖹗', &['𖹷']),
- ('𖹘', &['𖹸']),
- ('𖹙', &['𖹹']),
- ('𖹚', &['𖹺']),
- ('𖹛', &['𖹻']),
- ('𖹜', &['𖹼']),
- ('𖹝', &['𖹽']),
- ('𖹞', &['𖹾']),
- ('𖹟', &['𖹿']),
- ('𖹠', &['𖹀']),
- ('𖹡', &['𖹁']),
- ('𖹢', &['𖹂']),
- ('𖹣', &['𖹃']),
- ('𖹤', &['𖹄']),
- ('𖹥', &['𖹅']),
- ('𖹦', &['𖹆']),
- ('𖹧', &['𖹇']),
- ('𖹨', &['𖹈']),
- ('𖹩', &['𖹉']),
- ('𖹪', &['𖹊']),
- ('𖹫', &['𖹋']),
- ('𖹬', &['𖹌']),
- ('𖹭', &['𖹍']),
- ('𖹮', &['𖹎']),
- ('𖹯', &['𖹏']),
- ('𖹰', &['𖹐']),
- ('𖹱', &['𖹑']),
- ('𖹲', &['𖹒']),
- ('𖹳', &['𖹓']),
- ('𖹴', &['𖹔']),
- ('𖹵', &['𖹕']),
- ('𖹶', &['𖹖']),
- ('𖹷', &['𖹗']),
- ('𖹸', &['𖹘']),
- ('𖹹', &['𖹙']),
- ('𖹺', &['𖹚']),
- ('𖹻', &['𖹛']),
- ('𖹼', &['𖹜']),
- ('𖹽', &['𖹝']),
- ('𖹾', &['𖹞']),
- ('𖹿', &['𖹟']),
- ('𞤀', &['𞤢']),
- ('𞤁', &['𞤣']),
- ('𞤂', &['𞤤']),
- ('𞤃', &['𞤥']),
- ('𞤄', &['𞤦']),
- ('𞤅', &['𞤧']),
- ('𞤆', &['𞤨']),
- ('𞤇', &['𞤩']),
- ('𞤈', &['𞤪']),
- ('𞤉', &['𞤫']),
- ('𞤊', &['𞤬']),
- ('𞤋', &['𞤭']),
- ('𞤌', &['𞤮']),
- ('𞤍', &['𞤯']),
- ('𞤎', &['𞤰']),
- ('𞤏', &['𞤱']),
- ('𞤐', &['𞤲']),
- ('𞤑', &['𞤳']),
- ('𞤒', &['𞤴']),
- ('𞤓', &['𞤵']),
- ('𞤔', &['𞤶']),
- ('𞤕', &['𞤷']),
- ('𞤖', &['𞤸']),
- ('𞤗', &['𞤹']),
- ('𞤘', &['𞤺']),
- ('𞤙', &['𞤻']),
- ('𞤚', &['𞤼']),
- ('𞤛', &['𞤽']),
- ('𞤜', &['𞤾']),
- ('𞤝', &['𞤿']),
- ('𞤞', &['𞥀']),
- ('𞤟', &['𞥁']),
- ('𞤠', &['𞥂']),
- ('𞤡', &['𞥃']),
- ('𞤢', &['𞤀']),
- ('𞤣', &['𞤁']),
- ('𞤤', &['𞤂']),
- ('𞤥', &['𞤃']),
- ('𞤦', &['𞤄']),
- ('𞤧', &['𞤅']),
- ('𞤨', &['𞤆']),
- ('𞤩', &['𞤇']),
- ('𞤪', &['𞤈']),
- ('𞤫', &['𞤉']),
- ('𞤬', &['𞤊']),
- ('𞤭', &['𞤋']),
- ('𞤮', &['𞤌']),
- ('𞤯', &['𞤍']),
- ('𞤰', &['𞤎']),
- ('𞤱', &['𞤏']),
- ('𞤲', &['𞤐']),
- ('𞤳', &['𞤑']),
- ('𞤴', &['𞤒']),
- ('𞤵', &['𞤓']),
- ('𞤶', &['𞤔']),
- ('𞤷', &['𞤕']),
- ('𞤸', &['𞤖']),
- ('𞤹', &['𞤗']),
- ('𞤺', &['𞤘']),
- ('𞤻', &['𞤙']),
- ('𞤼', &['𞤚']),
- ('𞤽', &['𞤛']),
- ('𞤾', &['𞤜']),
- ('𞤿', &['𞤝']),
- ('𞥀', &['𞤞']),
- ('𞥁', &['𞤟']),
- ('𞥂', &['𞤠']),
- ('𞥃', &['𞤡']),
-];
diff --git a/vendor/regex-syntax/src/unicode_tables/general_category.rs b/vendor/regex-syntax/src/unicode_tables/general_category.rs
deleted file mode 100644
index 6ff6b538..00000000
--- a/vendor/regex-syntax/src/unicode_tables/general_category.rs
+++ /dev/null
@@ -1,6717 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate general-category ucd-16.0.0 --chars --exclude surrogate
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
- ("Cased_Letter", CASED_LETTER),
- ("Close_Punctuation", CLOSE_PUNCTUATION),
- ("Connector_Punctuation", CONNECTOR_PUNCTUATION),
- ("Control", CONTROL),
- ("Currency_Symbol", CURRENCY_SYMBOL),
- ("Dash_Punctuation", DASH_PUNCTUATION),
- ("Decimal_Number", DECIMAL_NUMBER),
- ("Enclosing_Mark", ENCLOSING_MARK),
- ("Final_Punctuation", FINAL_PUNCTUATION),
- ("Format", FORMAT),
- ("Initial_Punctuation", INITIAL_PUNCTUATION),
- ("Letter", LETTER),
- ("Letter_Number", LETTER_NUMBER),
- ("Line_Separator", LINE_SEPARATOR),
- ("Lowercase_Letter", LOWERCASE_LETTER),
- ("Mark", MARK),
- ("Math_Symbol", MATH_SYMBOL),
- ("Modifier_Letter", MODIFIER_LETTER),
- ("Modifier_Symbol", MODIFIER_SYMBOL),
- ("Nonspacing_Mark", NONSPACING_MARK),
- ("Number", NUMBER),
- ("Open_Punctuation", OPEN_PUNCTUATION),
- ("Other", OTHER),
- ("Other_Letter", OTHER_LETTER),
- ("Other_Number", OTHER_NUMBER),
- ("Other_Punctuation", OTHER_PUNCTUATION),
- ("Other_Symbol", OTHER_SYMBOL),
- ("Paragraph_Separator", PARAGRAPH_SEPARATOR),
- ("Private_Use", PRIVATE_USE),
- ("Punctuation", PUNCTUATION),
- ("Separator", SEPARATOR),
- ("Space_Separator", SPACE_SEPARATOR),
- ("Spacing_Mark", SPACING_MARK),
- ("Symbol", SYMBOL),
- ("Titlecase_Letter", TITLECASE_LETTER),
- ("Unassigned", UNASSIGNED),
- ("Uppercase_Letter", UPPERCASE_LETTER),
-];
-
-pub const CASED_LETTER: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('a', 'z'),
- ('µ', 'µ'),
- ('À', 'Ö'),
- ('Ø', 'ö'),
- ('ø', 'ƺ'),
- ('Ƽ', 'ƿ'),
- ('DŽ', 'ʓ'),
- ('ʕ', 'ʯ'),
- ('Ͱ', 'ͳ'),
- ('Ͷ', 'ͷ'),
- ('ͻ', 'ͽ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ϵ'),
- ('Ϸ', 'ҁ'),
- ('Ҋ', 'ԯ'),
- ('Ա', 'Ֆ'),
- ('ՠ', 'ֈ'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ჺ'),
- ('ჽ', 'ჿ'),
- ('Ꭰ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('ᲀ', 'ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('ᴀ', 'ᴫ'),
- ('ᵫ', 'ᵷ'),
- ('ᵹ', 'ᶚ'),
- ('Ḁ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ᾼ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῌ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('ῠ', 'Ῥ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῼ'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℊ', 'ℓ'),
- ('ℕ', 'ℕ'),
- ('ℙ', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('Ω', 'Ω'),
- ('ℨ', 'ℨ'),
- ('K', 'ℭ'),
- ('ℯ', 'ℴ'),
- ('ℹ', 'ℹ'),
- ('ℼ', 'ℿ'),
- ('ⅅ', 'ⅉ'),
- ('ⅎ', 'ⅎ'),
- ('Ↄ', 'ↄ'),
- ('Ⰰ', 'ⱻ'),
- ('Ȿ', 'ⳤ'),
- ('Ⳬ', 'ⳮ'),
- ('Ⳳ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('Ꙁ', 'ꙭ'),
- ('Ꚁ', 'ꚛ'),
- ('Ꜣ', 'ꝯ'),
- ('ꝱ', 'ꞇ'),
- ('Ꞌ', 'ꞎ'),
- ('Ꞑ', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'Ƛ'),
- ('Ꟶ', 'ꟶ'),
- ('ꟺ', 'ꟺ'),
- ('ꬰ', 'ꭚ'),
- ('ꭠ', 'ꭨ'),
- ('ꭰ', 'ꮿ'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('A', 'Z'),
- ('a', 'z'),
- ('𐐀', '𐑏'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐵐', '𐵥'),
- ('𐵰', '𐶅'),
- ('𑢠', '𑣟'),
- ('𖹀', '𖹿'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝛀'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛺'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜴'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝮'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞨'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟋'),
- ('𝼀', '𝼉'),
- ('𝼋', '𝼞'),
- ('𝼥', '𝼪'),
- ('𞤀', '𞥃'),
-];
-
-pub const CLOSE_PUNCTUATION: &'static [(char, char)] = &[
- (')', ')'),
- (']', ']'),
- ('}', '}'),
- ('༻', '༻'),
- ('༽', '༽'),
- ('᚜', '᚜'),
- ('⁆', '⁆'),
- ('⁾', '⁾'),
- ('₎', '₎'),
- ('⌉', '⌉'),
- ('⌋', '⌋'),
- ('〉', '〉'),
- ('❩', '❩'),
- ('❫', '❫'),
- ('❭', '❭'),
- ('❯', '❯'),
- ('❱', '❱'),
- ('❳', '❳'),
- ('❵', '❵'),
- ('⟆', '⟆'),
- ('⟧', '⟧'),
- ('⟩', '⟩'),
- ('⟫', '⟫'),
- ('⟭', '⟭'),
- ('⟯', '⟯'),
- ('⦄', '⦄'),
- ('⦆', '⦆'),
- ('⦈', '⦈'),
- ('⦊', '⦊'),
- ('⦌', '⦌'),
- ('⦎', '⦎'),
- ('⦐', '⦐'),
- ('⦒', '⦒'),
- ('⦔', '⦔'),
- ('⦖', '⦖'),
- ('⦘', '⦘'),
- ('⧙', '⧙'),
- ('⧛', '⧛'),
- ('⧽', '⧽'),
- ('⸣', '⸣'),
- ('⸥', '⸥'),
- ('⸧', '⸧'),
- ('⸩', '⸩'),
- ('⹖', '⹖'),
- ('⹘', '⹘'),
- ('⹚', '⹚'),
- ('⹜', '⹜'),
- ('〉', '〉'),
- ('》', '》'),
- ('」', '」'),
- ('』', '』'),
- ('】', '】'),
- ('〕', '〕'),
- ('〗', '〗'),
- ('〙', '〙'),
- ('〛', '〛'),
- ('〞', '〟'),
- ('﴾', '﴾'),
- ('︘', '︘'),
- ('︶', '︶'),
- ('︸', '︸'),
- ('︺', '︺'),
- ('︼', '︼'),
- ('︾', '︾'),
- ('﹀', '﹀'),
- ('﹂', '﹂'),
- ('﹄', '﹄'),
- ('﹈', '﹈'),
- ('﹚', '﹚'),
- ('﹜', '﹜'),
- ('﹞', '﹞'),
- (')', ')'),
- (']', ']'),
- ('}', '}'),
- ('⦆', '⦆'),
- ('」', '」'),
-];
-
-pub const CONNECTOR_PUNCTUATION: &'static [(char, char)] = &[
- ('_', '_'),
- ('‿', '⁀'),
- ('⁔', '⁔'),
- ('︳', '︴'),
- ('﹍', '﹏'),
- ('_', '_'),
-];
-
-pub const CONTROL: &'static [(char, char)] =
- &[('\0', '\u{1f}'), ('\u{7f}', '\u{9f}')];
-
-pub const CURRENCY_SYMBOL: &'static [(char, char)] = &[
- ('$', '$'),
- ('¢', '¥'),
- ('֏', '֏'),
- ('؋', '؋'),
- ('߾', '߿'),
- ('৲', '৳'),
- ('৻', '৻'),
- ('૱', '૱'),
- ('௹', '௹'),
- ('฿', '฿'),
- ('៛', '៛'),
- ('₠', '⃀'),
- ('꠸', '꠸'),
- ('﷼', '﷼'),
- ('﹩', '﹩'),
- ('$', '$'),
- ('¢', '£'),
- ('¥', '₩'),
- ('𑿝', '𑿠'),
- ('𞋿', '𞋿'),
- ('𞲰', '𞲰'),
-];
-
-pub const DASH_PUNCTUATION: &'static [(char, char)] = &[
- ('-', '-'),
- ('֊', '֊'),
- ('־', '־'),
- ('᐀', '᐀'),
- ('᠆', '᠆'),
- ('‐', '―'),
- ('⸗', '⸗'),
- ('⸚', '⸚'),
- ('⸺', '⸻'),
- ('⹀', '⹀'),
- ('⹝', '⹝'),
- ('〜', '〜'),
- ('〰', '〰'),
- ('゠', '゠'),
- ('︱', '︲'),
- ('﹘', '﹘'),
- ('﹣', '﹣'),
- ('-', '-'),
- ('𐵮', '𐵮'),
- ('𐺭', '𐺭'),
-];
-
-pub const DECIMAL_NUMBER: &'static [(char, char)] = &[
- ('0', '9'),
- ('٠', '٩'),
- ('۰', '۹'),
- ('߀', '߉'),
- ('०', '९'),
- ('০', '৯'),
- ('੦', '੯'),
- ('૦', '૯'),
- ('୦', '୯'),
- ('௦', '௯'),
- ('౦', '౯'),
- ('೦', '೯'),
- ('൦', '൯'),
- ('෦', '෯'),
- ('๐', '๙'),
- ('໐', '໙'),
- ('༠', '༩'),
- ('၀', '၉'),
- ('႐', '႙'),
- ('០', '៩'),
- ('᠐', '᠙'),
- ('᥆', '᥏'),
- ('᧐', '᧙'),
- ('᪀', '᪉'),
- ('᪐', '᪙'),
- ('᭐', '᭙'),
- ('᮰', '᮹'),
- ('᱀', '᱉'),
- ('᱐', '᱙'),
- ('꘠', '꘩'),
- ('꣐', '꣙'),
- ('꤀', '꤉'),
- ('꧐', '꧙'),
- ('꧰', '꧹'),
- ('꩐', '꩙'),
- ('꯰', '꯹'),
- ('0', '9'),
- ('𐒠', '𐒩'),
- ('𐴰', '𐴹'),
- ('𐵀', '𐵉'),
- ('𑁦', '𑁯'),
- ('𑃰', '𑃹'),
- ('𑄶', '𑄿'),
- ('𑇐', '𑇙'),
- ('𑋰', '𑋹'),
- ('𑑐', '𑑙'),
- ('𑓐', '𑓙'),
- ('𑙐', '𑙙'),
- ('𑛀', '𑛉'),
- ('𑛐', '𑛣'),
- ('𑜰', '𑜹'),
- ('𑣠', '𑣩'),
- ('𑥐', '𑥙'),
- ('𑯰', '𑯹'),
- ('𑱐', '𑱙'),
- ('𑵐', '𑵙'),
- ('𑶠', '𑶩'),
- ('𑽐', '𑽙'),
- ('𖄰', '𖄹'),
- ('𖩠', '𖩩'),
- ('𖫀', '𖫉'),
- ('𖭐', '𖭙'),
- ('𖵰', '𖵹'),
- ('𜳰', '𜳹'),
- ('𝟎', '𝟿'),
- ('𞅀', '𞅉'),
- ('𞋰', '𞋹'),
- ('𞓰', '𞓹'),
- ('𞗱', '𞗺'),
- ('𞥐', '𞥙'),
- ('🯰', '🯹'),
-];
-
-pub const ENCLOSING_MARK: &'static [(char, char)] = &[
- ('\u{488}', '\u{489}'),
- ('\u{1abe}', '\u{1abe}'),
- ('\u{20dd}', '\u{20e0}'),
- ('\u{20e2}', '\u{20e4}'),
- ('\u{a670}', '\u{a672}'),
-];
-
-pub const FINAL_PUNCTUATION: &'static [(char, char)] = &[
- ('»', '»'),
- ('’', '’'),
- ('”', '”'),
- ('›', '›'),
- ('⸃', '⸃'),
- ('⸅', '⸅'),
- ('⸊', '⸊'),
- ('⸍', '⸍'),
- ('⸝', '⸝'),
- ('⸡', '⸡'),
-];
-
-pub const FORMAT: &'static [(char, char)] = &[
- ('\u{ad}', '\u{ad}'),
- ('\u{600}', '\u{605}'),
- ('\u{61c}', '\u{61c}'),
- ('\u{6dd}', '\u{6dd}'),
- ('\u{70f}', '\u{70f}'),
- ('\u{890}', '\u{891}'),
- ('\u{8e2}', '\u{8e2}'),
- ('\u{180e}', '\u{180e}'),
- ('\u{200b}', '\u{200f}'),
- ('\u{202a}', '\u{202e}'),
- ('\u{2060}', '\u{2064}'),
- ('\u{2066}', '\u{206f}'),
- ('\u{feff}', '\u{feff}'),
- ('\u{fff9}', '\u{fffb}'),
- ('\u{110bd}', '\u{110bd}'),
- ('\u{110cd}', '\u{110cd}'),
- ('\u{13430}', '\u{1343f}'),
- ('\u{1bca0}', '\u{1bca3}'),
- ('\u{1d173}', '\u{1d17a}'),
- ('\u{e0001}', '\u{e0001}'),
- ('\u{e0020}', '\u{e007f}'),
-];
-
-pub const INITIAL_PUNCTUATION: &'static [(char, char)] = &[
- ('«', '«'),
- ('‘', '‘'),
- ('‛', '“'),
- ('‟', '‟'),
- ('‹', '‹'),
- ('⸂', '⸂'),
- ('⸄', '⸄'),
- ('⸉', '⸉'),
- ('⸌', '⸌'),
- ('⸜', '⸜'),
- ('⸠', '⸠'),
-];
-
-pub const LETTER: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('a', 'z'),
- ('ª', 'ª'),
- ('µ', 'µ'),
- ('º', 'º'),
- ('À', 'Ö'),
- ('Ø', 'ö'),
- ('ø', 'ˁ'),
- ('ˆ', 'ˑ'),
- ('ˠ', 'ˤ'),
- ('ˬ', 'ˬ'),
- ('ˮ', 'ˮ'),
- ('Ͱ', 'ʹ'),
- ('Ͷ', 'ͷ'),
- ('ͺ', 'ͽ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ϵ'),
- ('Ϸ', 'ҁ'),
- ('Ҋ', 'ԯ'),
- ('Ա', 'Ֆ'),
- ('ՙ', 'ՙ'),
- ('ՠ', 'ֈ'),
- ('א', 'ת'),
- ('ׯ', 'ײ'),
- ('ؠ', 'ي'),
- ('ٮ', 'ٯ'),
- ('ٱ', 'ۓ'),
- ('ە', 'ە'),
- ('ۥ', 'ۦ'),
- ('ۮ', 'ۯ'),
- ('ۺ', 'ۼ'),
- ('ۿ', 'ۿ'),
- ('ܐ', 'ܐ'),
- ('ܒ', 'ܯ'),
- ('ݍ', 'ޥ'),
- ('ޱ', 'ޱ'),
- ('ߊ', 'ߪ'),
- ('ߴ', 'ߵ'),
- ('ߺ', 'ߺ'),
- ('ࠀ', 'ࠕ'),
- ('ࠚ', 'ࠚ'),
- ('ࠤ', 'ࠤ'),
- ('ࠨ', 'ࠨ'),
- ('ࡀ', 'ࡘ'),
- ('ࡠ', 'ࡪ'),
- ('ࡰ', 'ࢇ'),
- ('ࢉ', 'ࢎ'),
- ('ࢠ', 'ࣉ'),
- ('ऄ', 'ह'),
- ('ऽ', 'ऽ'),
- ('ॐ', 'ॐ'),
- ('क़', 'ॡ'),
- ('ॱ', 'ঀ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('ঽ', 'ঽ'),
- ('ৎ', 'ৎ'),
- ('ড়', 'ঢ়'),
- ('য়', 'ৡ'),
- ('ৰ', 'ৱ'),
- ('ৼ', 'ৼ'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('ੲ', 'ੴ'),
- ('અ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('ઽ', 'ઽ'),
- ('ૐ', 'ૐ'),
- ('ૠ', 'ૡ'),
- ('ૹ', 'ૹ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('ଽ', 'ଽ'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', 'ୡ'),
- ('ୱ', 'ୱ'),
- ('ஃ', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'ஹ'),
- ('ௐ', 'ௐ'),
- ('అ', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'హ'),
- ('ఽ', 'ఽ'),
- ('ౘ', 'ౚ'),
- ('ౝ', 'ౝ'),
- ('ౠ', 'ౡ'),
- ('ಀ', 'ಀ'),
- ('ಅ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('ಽ', 'ಽ'),
- ('ೝ', 'ೞ'),
- ('ೠ', 'ೡ'),
- ('ೱ', 'ೲ'),
- ('ഄ', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', 'ഺ'),
- ('ഽ', 'ഽ'),
- ('ൎ', 'ൎ'),
- ('ൔ', 'ൖ'),
- ('ൟ', 'ൡ'),
- ('ൺ', 'ൿ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('ก', 'ะ'),
- ('า', 'ำ'),
- ('เ', 'ๆ'),
- ('ກ', 'ຂ'),
- ('ຄ', 'ຄ'),
- ('ຆ', 'ຊ'),
- ('ຌ', 'ຣ'),
- ('ລ', 'ລ'),
- ('ວ', 'ະ'),
- ('າ', 'ຳ'),
- ('ຽ', 'ຽ'),
- ('ເ', 'ໄ'),
- ('ໆ', 'ໆ'),
- ('ໜ', 'ໟ'),
- ('ༀ', 'ༀ'),
- ('ཀ', 'ཇ'),
- ('ཉ', 'ཬ'),
- ('ྈ', 'ྌ'),
- ('က', 'ဪ'),
- ('ဿ', 'ဿ'),
- ('ၐ', 'ၕ'),
- ('ၚ', 'ၝ'),
- ('ၡ', 'ၡ'),
- ('ၥ', 'ၦ'),
- ('ၮ', 'ၰ'),
- ('ၵ', 'ႁ'),
- ('ႎ', 'ႎ'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ჺ'),
- ('ჼ', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዖ'),
- ('ዘ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ፚ'),
- ('ᎀ', 'ᎏ'),
- ('Ꭰ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('ᐁ', 'ᙬ'),
- ('ᙯ', 'ᙿ'),
- ('ᚁ', 'ᚚ'),
- ('ᚠ', 'ᛪ'),
- ('ᛱ', 'ᛸ'),
- ('ᜀ', 'ᜑ'),
- ('ᜟ', 'ᜱ'),
- ('ᝀ', 'ᝑ'),
- ('ᝠ', 'ᝬ'),
- ('ᝮ', 'ᝰ'),
- ('ក', 'ឳ'),
- ('ៗ', 'ៗ'),
- ('ៜ', 'ៜ'),
- ('ᠠ', 'ᡸ'),
- ('ᢀ', 'ᢄ'),
- ('ᢇ', 'ᢨ'),
- ('ᢪ', 'ᢪ'),
- ('ᢰ', 'ᣵ'),
- ('ᤀ', 'ᤞ'),
- ('ᥐ', 'ᥭ'),
- ('ᥰ', 'ᥴ'),
- ('ᦀ', 'ᦫ'),
- ('ᦰ', 'ᧉ'),
- ('ᨀ', 'ᨖ'),
- ('ᨠ', 'ᩔ'),
- ('ᪧ', 'ᪧ'),
- ('ᬅ', 'ᬳ'),
- ('ᭅ', 'ᭌ'),
- ('ᮃ', 'ᮠ'),
- ('ᮮ', 'ᮯ'),
- ('ᮺ', 'ᯥ'),
- ('ᰀ', 'ᰣ'),
- ('ᱍ', 'ᱏ'),
- ('ᱚ', 'ᱽ'),
- ('ᲀ', 'ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('ᳩ', 'ᳬ'),
- ('ᳮ', 'ᳳ'),
- ('ᳵ', 'ᳶ'),
- ('ᳺ', 'ᳺ'),
- ('ᴀ', 'ᶿ'),
- ('Ḁ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ᾼ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῌ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('ῠ', 'Ῥ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῼ'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℊ', 'ℓ'),
- ('ℕ', 'ℕ'),
- ('ℙ', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('Ω', 'Ω'),
- ('ℨ', 'ℨ'),
- ('K', 'ℭ'),
- ('ℯ', 'ℹ'),
- ('ℼ', 'ℿ'),
- ('ⅅ', 'ⅉ'),
- ('ⅎ', 'ⅎ'),
- ('Ↄ', 'ↄ'),
- ('Ⰰ', 'ⳤ'),
- ('Ⳬ', 'ⳮ'),
- ('Ⳳ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ⴰ', 'ⵧ'),
- ('ⵯ', 'ⵯ'),
- ('ⶀ', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('ⸯ', 'ⸯ'),
- ('々', '〆'),
- ('〱', '〵'),
- ('〻', '〼'),
- ('ぁ', 'ゖ'),
- ('ゝ', 'ゟ'),
- ('ァ', 'ヺ'),
- ('ー', 'ヿ'),
- ('ㄅ', 'ㄯ'),
- ('ㄱ', 'ㆎ'),
- ('ㆠ', 'ㆿ'),
- ('ㇰ', 'ㇿ'),
- ('㐀', '䶿'),
- ('一', 'ꒌ'),
- ('ꓐ', 'ꓽ'),
- ('ꔀ', 'ꘌ'),
- ('ꘐ', 'ꘟ'),
- ('ꘪ', 'ꘫ'),
- ('Ꙁ', 'ꙮ'),
- ('ꙿ', 'ꚝ'),
- ('ꚠ', 'ꛥ'),
- ('ꜗ', 'ꜟ'),
- ('Ꜣ', 'ꞈ'),
- ('Ꞌ', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'Ƛ'),
- ('ꟲ', 'ꠁ'),
- ('ꠃ', 'ꠅ'),
- ('ꠇ', 'ꠊ'),
- ('ꠌ', 'ꠢ'),
- ('ꡀ', 'ꡳ'),
- ('ꢂ', 'ꢳ'),
- ('ꣲ', 'ꣷ'),
- ('ꣻ', 'ꣻ'),
- ('ꣽ', 'ꣾ'),
- ('ꤊ', 'ꤥ'),
- ('ꤰ', 'ꥆ'),
- ('ꥠ', 'ꥼ'),
- ('ꦄ', 'ꦲ'),
- ('ꧏ', 'ꧏ'),
- ('ꧠ', 'ꧤ'),
- ('ꧦ', 'ꧯ'),
- ('ꧺ', 'ꧾ'),
- ('ꨀ', 'ꨨ'),
- ('ꩀ', 'ꩂ'),
- ('ꩄ', 'ꩋ'),
- ('ꩠ', 'ꩶ'),
- ('ꩺ', 'ꩺ'),
- ('ꩾ', 'ꪯ'),
- ('ꪱ', 'ꪱ'),
- ('ꪵ', 'ꪶ'),
- ('ꪹ', 'ꪽ'),
- ('ꫀ', 'ꫀ'),
- ('ꫂ', 'ꫂ'),
- ('ꫛ', 'ꫝ'),
- ('ꫠ', 'ꫪ'),
- ('ꫲ', 'ꫴ'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('ꬰ', 'ꭚ'),
- ('ꭜ', 'ꭩ'),
- ('ꭰ', 'ꯢ'),
- ('가', '힣'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('豈', '舘'),
- ('並', '龎'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('יִ', 'יִ'),
- ('ײַ', 'ﬨ'),
- ('שׁ', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', 'ﮱ'),
- ('ﯓ', 'ﴽ'),
- ('ﵐ', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('ﷰ', 'ﷻ'),
- ('ﹰ', 'ﹴ'),
- ('ﹶ', 'ﻼ'),
- ('A', 'Z'),
- ('a', 'z'),
- ('ヲ', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
- ('𐊀', '𐊜'),
- ('𐊠', '𐋐'),
- ('𐌀', '𐌟'),
- ('𐌭', '𐍀'),
- ('𐍂', '𐍉'),
- ('𐍐', '𐍵'),
- ('𐎀', '𐎝'),
- ('𐎠', '𐏃'),
- ('𐏈', '𐏏'),
- ('𐐀', '𐒝'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
- ('𐔀', '𐔧'),
- ('𐔰', '𐕣'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐗀', '𐗳'),
- ('𐘀', '𐜶'),
- ('𐝀', '𐝕'),
- ('𐝠', '𐝧'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐠀', '𐠅'),
- ('𐠈', '𐠈'),
- ('𐠊', '𐠵'),
- ('𐠷', '𐠸'),
- ('𐠼', '𐠼'),
- ('𐠿', '𐡕'),
- ('𐡠', '𐡶'),
- ('𐢀', '𐢞'),
- ('𐣠', '𐣲'),
- ('𐣴', '𐣵'),
- ('𐤀', '𐤕'),
- ('𐤠', '𐤹'),
- ('𐦀', '𐦷'),
- ('𐦾', '𐦿'),
- ('𐨀', '𐨀'),
- ('𐨐', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨵'),
- ('𐩠', '𐩼'),
- ('𐪀', '𐪜'),
- ('𐫀', '𐫇'),
- ('𐫉', '𐫤'),
- ('𐬀', '𐬵'),
- ('𐭀', '𐭕'),
- ('𐭠', '𐭲'),
- ('𐮀', '𐮑'),
- ('𐰀', '𐱈'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐴀', '𐴣'),
- ('𐵊', '𐵥'),
- ('𐵯', '𐶅'),
- ('𐺀', '𐺩'),
- ('𐺰', '𐺱'),
- ('𐻂', '𐻄'),
- ('𐼀', '𐼜'),
- ('𐼧', '𐼧'),
- ('𐼰', '𐽅'),
- ('𐽰', '𐾁'),
- ('𐾰', '𐿄'),
- ('𐿠', '𐿶'),
- ('𑀃', '𑀷'),
- ('𑁱', '𑁲'),
- ('𑁵', '𑁵'),
- ('𑂃', '𑂯'),
- ('𑃐', '𑃨'),
- ('𑄃', '𑄦'),
- ('𑅄', '𑅄'),
- ('𑅇', '𑅇'),
- ('𑅐', '𑅲'),
- ('𑅶', '𑅶'),
- ('𑆃', '𑆲'),
- ('𑇁', '𑇄'),
- ('𑇚', '𑇚'),
- ('𑇜', '𑇜'),
- ('𑈀', '𑈑'),
- ('𑈓', '𑈫'),
- ('𑈿', '𑉀'),
- ('𑊀', '𑊆'),
- ('𑊈', '𑊈'),
- ('𑊊', '𑊍'),
- ('𑊏', '𑊝'),
- ('𑊟', '𑊨'),
- ('𑊰', '𑋞'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('𑌽', '𑌽'),
- ('𑍐', '𑍐'),
- ('𑍝', '𑍡'),
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '𑎷'),
- ('𑏑', '𑏑'),
- ('𑏓', '𑏓'),
- ('𑐀', '𑐴'),
- ('𑑇', '𑑊'),
- ('𑑟', '𑑡'),
- ('𑒀', '𑒯'),
- ('𑓄', '𑓅'),
- ('𑓇', '𑓇'),
- ('𑖀', '𑖮'),
- ('𑗘', '𑗛'),
- ('𑘀', '𑘯'),
- ('𑙄', '𑙄'),
- ('𑚀', '𑚪'),
- ('𑚸', '𑚸'),
- ('𑜀', '𑜚'),
- ('𑝀', '𑝆'),
- ('𑠀', '𑠫'),
- ('𑢠', '𑣟'),
- ('𑣿', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤯'),
- ('𑤿', '𑤿'),
- ('𑥁', '𑥁'),
- ('𑦠', '𑦧'),
- ('𑦪', '𑧐'),
- ('𑧡', '𑧡'),
- ('𑧣', '𑧣'),
- ('𑨀', '𑨀'),
- ('𑨋', '𑨲'),
- ('𑨺', '𑨺'),
- ('𑩐', '𑩐'),
- ('𑩜', '𑪉'),
- ('𑪝', '𑪝'),
- ('𑪰', '𑫸'),
- ('𑯀', '𑯠'),
- ('𑰀', '𑰈'),
- ('𑰊', '𑰮'),
- ('𑱀', '𑱀'),
- ('𑱲', '𑲏'),
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '𑴰'),
- ('𑵆', '𑵆'),
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶉'),
- ('𑶘', '𑶘'),
- ('𑻠', '𑻲'),
- ('𑼂', '𑼂'),
- ('𑼄', '𑼐'),
- ('𑼒', '𑼳'),
- ('𑾰', '𑾰'),
- ('𒀀', '𒎙'),
- ('𒒀', '𒕃'),
- ('𒾐', '𒿰'),
- ('𓀀', '𓐯'),
- ('𓑁', '𓑆'),
- ('𓑠', '𔏺'),
- ('𔐀', '𔙆'),
- ('𖄀', '𖄝'),
- ('𖠀', '𖨸'),
- ('𖩀', '𖩞'),
- ('𖩰', '𖪾'),
- ('𖫐', '𖫭'),
- ('𖬀', '𖬯'),
- ('𖭀', '𖭃'),
- ('𖭣', '𖭷'),
- ('𖭽', '𖮏'),
- ('𖵀', '𖵬'),
- ('𖹀', '𖹿'),
- ('𖼀', '𖽊'),
- ('𖽐', '𖽐'),
- ('𖾓', '𖾟'),
- ('𖿠', '𖿡'),
- ('𖿣', '𖿣'),
- ('𗀀', '𘟷'),
- ('𘠀', '𘳕'),
- ('𘳿', '𘴈'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𛀀', '𛄢'),
- ('𛄲', '𛄲'),
- ('𛅐', '𛅒'),
- ('𛅕', '𛅕'),
- ('𛅤', '𛅧'),
- ('𛅰', '𛋻'),
- ('𛰀', '𛱪'),
- ('𛱰', '𛱼'),
- ('𛲀', '𛲈'),
- ('𛲐', '𛲙'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝛀'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛺'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜴'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝮'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞨'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟋'),
- ('𝼀', '𝼞'),
- ('𝼥', '𝼪'),
- ('𞀰', '𞁭'),
- ('𞄀', '𞄬'),
- ('𞄷', '𞄽'),
- ('𞅎', '𞅎'),
- ('𞊐', '𞊭'),
- ('𞋀', '𞋫'),
- ('𞓐', '𞓫'),
- ('𞗐', '𞗭'),
- ('𞗰', '𞗰'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
- ('𞠀', '𞣄'),
- ('𞤀', '𞥃'),
- ('𞥋', '𞥋'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('丽', '𪘀'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
-];
-
-pub const LETTER_NUMBER: &'static [(char, char)] = &[
- ('ᛮ', 'ᛰ'),
- ('Ⅰ', 'ↂ'),
- ('ↅ', 'ↈ'),
- ('〇', '〇'),
- ('〡', '〩'),
- ('〸', '〺'),
- ('ꛦ', 'ꛯ'),
- ('𐅀', '𐅴'),
- ('𐍁', '𐍁'),
- ('𐍊', '𐍊'),
- ('𐏑', '𐏕'),
- ('𒐀', '𒑮'),
-];
-
-pub const LINE_SEPARATOR: &'static [(char, char)] =
- &[('\u{2028}', '\u{2028}')];
-
-pub const LOWERCASE_LETTER: &'static [(char, char)] = &[
- ('a', 'z'),
- ('µ', 'µ'),
- ('ß', 'ö'),
- ('ø', 'ÿ'),
- ('ā', 'ā'),
- ('ă', 'ă'),
- ('ą', 'ą'),
- ('ć', 'ć'),
- ('ĉ', 'ĉ'),
- ('ċ', 'ċ'),
- ('č', 'č'),
- ('ď', 'ď'),
- ('đ', 'đ'),
- ('ē', 'ē'),
- ('ĕ', 'ĕ'),
- ('ė', 'ė'),
- ('ę', 'ę'),
- ('ě', 'ě'),
- ('ĝ', 'ĝ'),
- ('ğ', 'ğ'),
- ('ġ', 'ġ'),
- ('ģ', 'ģ'),
- ('ĥ', 'ĥ'),
- ('ħ', 'ħ'),
- ('ĩ', 'ĩ'),
- ('ī', 'ī'),
- ('ĭ', 'ĭ'),
- ('į', 'į'),
- ('ı', 'ı'),
- ('ij', 'ij'),
- ('ĵ', 'ĵ'),
- ('ķ', 'ĸ'),
- ('ĺ', 'ĺ'),
- ('ļ', 'ļ'),
- ('ľ', 'ľ'),
- ('ŀ', 'ŀ'),
- ('ł', 'ł'),
- ('ń', 'ń'),
- ('ņ', 'ņ'),
- ('ň', 'ʼn'),
- ('ŋ', 'ŋ'),
- ('ō', 'ō'),
- ('ŏ', 'ŏ'),
- ('ő', 'ő'),
- ('œ', 'œ'),
- ('ŕ', 'ŕ'),
- ('ŗ', 'ŗ'),
- ('ř', 'ř'),
- ('ś', 'ś'),
- ('ŝ', 'ŝ'),
- ('ş', 'ş'),
- ('š', 'š'),
- ('ţ', 'ţ'),
- ('ť', 'ť'),
- ('ŧ', 'ŧ'),
- ('ũ', 'ũ'),
- ('ū', 'ū'),
- ('ŭ', 'ŭ'),
- ('ů', 'ů'),
- ('ű', 'ű'),
- ('ų', 'ų'),
- ('ŵ', 'ŵ'),
- ('ŷ', 'ŷ'),
- ('ź', 'ź'),
- ('ż', 'ż'),
- ('ž', 'ƀ'),
- ('ƃ', 'ƃ'),
- ('ƅ', 'ƅ'),
- ('ƈ', 'ƈ'),
- ('ƌ', 'ƍ'),
- ('ƒ', 'ƒ'),
- ('ƕ', 'ƕ'),
- ('ƙ', 'ƛ'),
- ('ƞ', 'ƞ'),
- ('ơ', 'ơ'),
- ('ƣ', 'ƣ'),
- ('ƥ', 'ƥ'),
- ('ƨ', 'ƨ'),
- ('ƪ', 'ƫ'),
- ('ƭ', 'ƭ'),
- ('ư', 'ư'),
- ('ƴ', 'ƴ'),
- ('ƶ', 'ƶ'),
- ('ƹ', 'ƺ'),
- ('ƽ', 'ƿ'),
- ('dž', 'dž'),
- ('lj', 'lj'),
- ('nj', 'nj'),
- ('ǎ', 'ǎ'),
- ('ǐ', 'ǐ'),
- ('ǒ', 'ǒ'),
- ('ǔ', 'ǔ'),
- ('ǖ', 'ǖ'),
- ('ǘ', 'ǘ'),
- ('ǚ', 'ǚ'),
- ('ǜ', 'ǝ'),
- ('ǟ', 'ǟ'),
- ('ǡ', 'ǡ'),
- ('ǣ', 'ǣ'),
- ('ǥ', 'ǥ'),
- ('ǧ', 'ǧ'),
- ('ǩ', 'ǩ'),
- ('ǫ', 'ǫ'),
- ('ǭ', 'ǭ'),
- ('ǯ', 'ǰ'),
- ('dz', 'dz'),
- ('ǵ', 'ǵ'),
- ('ǹ', 'ǹ'),
- ('ǻ', 'ǻ'),
- ('ǽ', 'ǽ'),
- ('ǿ', 'ǿ'),
- ('ȁ', 'ȁ'),
- ('ȃ', 'ȃ'),
- ('ȅ', 'ȅ'),
- ('ȇ', 'ȇ'),
- ('ȉ', 'ȉ'),
- ('ȋ', 'ȋ'),
- ('ȍ', 'ȍ'),
- ('ȏ', 'ȏ'),
- ('ȑ', 'ȑ'),
- ('ȓ', 'ȓ'),
- ('ȕ', 'ȕ'),
- ('ȗ', 'ȗ'),
- ('ș', 'ș'),
- ('ț', 'ț'),
- ('ȝ', 'ȝ'),
- ('ȟ', 'ȟ'),
- ('ȡ', 'ȡ'),
- ('ȣ', 'ȣ'),
- ('ȥ', 'ȥ'),
- ('ȧ', 'ȧ'),
- ('ȩ', 'ȩ'),
- ('ȫ', 'ȫ'),
- ('ȭ', 'ȭ'),
- ('ȯ', 'ȯ'),
- ('ȱ', 'ȱ'),
- ('ȳ', 'ȹ'),
- ('ȼ', 'ȼ'),
- ('ȿ', 'ɀ'),
- ('ɂ', 'ɂ'),
- ('ɇ', 'ɇ'),
- ('ɉ', 'ɉ'),
- ('ɋ', 'ɋ'),
- ('ɍ', 'ɍ'),
- ('ɏ', 'ʓ'),
- ('ʕ', 'ʯ'),
- ('ͱ', 'ͱ'),
- ('ͳ', 'ͳ'),
- ('ͷ', 'ͷ'),
- ('ͻ', 'ͽ'),
- ('ΐ', 'ΐ'),
- ('ά', 'ώ'),
- ('ϐ', 'ϑ'),
- ('ϕ', 'ϗ'),
- ('ϙ', 'ϙ'),
- ('ϛ', 'ϛ'),
- ('ϝ', 'ϝ'),
- ('ϟ', 'ϟ'),
- ('ϡ', 'ϡ'),
- ('ϣ', 'ϣ'),
- ('ϥ', 'ϥ'),
- ('ϧ', 'ϧ'),
- ('ϩ', 'ϩ'),
- ('ϫ', 'ϫ'),
- ('ϭ', 'ϭ'),
- ('ϯ', 'ϳ'),
- ('ϵ', 'ϵ'),
- ('ϸ', 'ϸ'),
- ('ϻ', 'ϼ'),
- ('а', 'џ'),
- ('ѡ', 'ѡ'),
- ('ѣ', 'ѣ'),
- ('ѥ', 'ѥ'),
- ('ѧ', 'ѧ'),
- ('ѩ', 'ѩ'),
- ('ѫ', 'ѫ'),
- ('ѭ', 'ѭ'),
- ('ѯ', 'ѯ'),
- ('ѱ', 'ѱ'),
- ('ѳ', 'ѳ'),
- ('ѵ', 'ѵ'),
- ('ѷ', 'ѷ'),
- ('ѹ', 'ѹ'),
- ('ѻ', 'ѻ'),
- ('ѽ', 'ѽ'),
- ('ѿ', 'ѿ'),
- ('ҁ', 'ҁ'),
- ('ҋ', 'ҋ'),
- ('ҍ', 'ҍ'),
- ('ҏ', 'ҏ'),
- ('ґ', 'ґ'),
- ('ғ', 'ғ'),
- ('ҕ', 'ҕ'),
- ('җ', 'җ'),
- ('ҙ', 'ҙ'),
- ('қ', 'қ'),
- ('ҝ', 'ҝ'),
- ('ҟ', 'ҟ'),
- ('ҡ', 'ҡ'),
- ('ң', 'ң'),
- ('ҥ', 'ҥ'),
- ('ҧ', 'ҧ'),
- ('ҩ', 'ҩ'),
- ('ҫ', 'ҫ'),
- ('ҭ', 'ҭ'),
- ('ү', 'ү'),
- ('ұ', 'ұ'),
- ('ҳ', 'ҳ'),
- ('ҵ', 'ҵ'),
- ('ҷ', 'ҷ'),
- ('ҹ', 'ҹ'),
- ('һ', 'һ'),
- ('ҽ', 'ҽ'),
- ('ҿ', 'ҿ'),
- ('ӂ', 'ӂ'),
- ('ӄ', 'ӄ'),
- ('ӆ', 'ӆ'),
- ('ӈ', 'ӈ'),
- ('ӊ', 'ӊ'),
- ('ӌ', 'ӌ'),
- ('ӎ', 'ӏ'),
- ('ӑ', 'ӑ'),
- ('ӓ', 'ӓ'),
- ('ӕ', 'ӕ'),
- ('ӗ', 'ӗ'),
- ('ә', 'ә'),
- ('ӛ', 'ӛ'),
- ('ӝ', 'ӝ'),
- ('ӟ', 'ӟ'),
- ('ӡ', 'ӡ'),
- ('ӣ', 'ӣ'),
- ('ӥ', 'ӥ'),
- ('ӧ', 'ӧ'),
- ('ө', 'ө'),
- ('ӫ', 'ӫ'),
- ('ӭ', 'ӭ'),
- ('ӯ', 'ӯ'),
- ('ӱ', 'ӱ'),
- ('ӳ', 'ӳ'),
- ('ӵ', 'ӵ'),
- ('ӷ', 'ӷ'),
- ('ӹ', 'ӹ'),
- ('ӻ', 'ӻ'),
- ('ӽ', 'ӽ'),
- ('ӿ', 'ӿ'),
- ('ԁ', 'ԁ'),
- ('ԃ', 'ԃ'),
- ('ԅ', 'ԅ'),
- ('ԇ', 'ԇ'),
- ('ԉ', 'ԉ'),
- ('ԋ', 'ԋ'),
- ('ԍ', 'ԍ'),
- ('ԏ', 'ԏ'),
- ('ԑ', 'ԑ'),
- ('ԓ', 'ԓ'),
- ('ԕ', 'ԕ'),
- ('ԗ', 'ԗ'),
- ('ԙ', 'ԙ'),
- ('ԛ', 'ԛ'),
- ('ԝ', 'ԝ'),
- ('ԟ', 'ԟ'),
- ('ԡ', 'ԡ'),
- ('ԣ', 'ԣ'),
- ('ԥ', 'ԥ'),
- ('ԧ', 'ԧ'),
- ('ԩ', 'ԩ'),
- ('ԫ', 'ԫ'),
- ('ԭ', 'ԭ'),
- ('ԯ', 'ԯ'),
- ('ՠ', 'ֈ'),
- ('ა', 'ჺ'),
- ('ჽ', 'ჿ'),
- ('ᏸ', 'ᏽ'),
- ('ᲀ', 'ᲈ'),
- ('ᲊ', 'ᲊ'),
- ('ᴀ', 'ᴫ'),
- ('ᵫ', 'ᵷ'),
- ('ᵹ', 'ᶚ'),
- ('ḁ', 'ḁ'),
- ('ḃ', 'ḃ'),
- ('ḅ', 'ḅ'),
- ('ḇ', 'ḇ'),
- ('ḉ', 'ḉ'),
- ('ḋ', 'ḋ'),
- ('ḍ', 'ḍ'),
- ('ḏ', 'ḏ'),
- ('ḑ', 'ḑ'),
- ('ḓ', 'ḓ'),
- ('ḕ', 'ḕ'),
- ('ḗ', 'ḗ'),
- ('ḙ', 'ḙ'),
- ('ḛ', 'ḛ'),
- ('ḝ', 'ḝ'),
- ('ḟ', 'ḟ'),
- ('ḡ', 'ḡ'),
- ('ḣ', 'ḣ'),
- ('ḥ', 'ḥ'),
- ('ḧ', 'ḧ'),
- ('ḩ', 'ḩ'),
- ('ḫ', 'ḫ'),
- ('ḭ', 'ḭ'),
- ('ḯ', 'ḯ'),
- ('ḱ', 'ḱ'),
- ('ḳ', 'ḳ'),
- ('ḵ', 'ḵ'),
- ('ḷ', 'ḷ'),
- ('ḹ', 'ḹ'),
- ('ḻ', 'ḻ'),
- ('ḽ', 'ḽ'),
- ('ḿ', 'ḿ'),
- ('ṁ', 'ṁ'),
- ('ṃ', 'ṃ'),
- ('ṅ', 'ṅ'),
- ('ṇ', 'ṇ'),
- ('ṉ', 'ṉ'),
- ('ṋ', 'ṋ'),
- ('ṍ', 'ṍ'),
- ('ṏ', 'ṏ'),
- ('ṑ', 'ṑ'),
- ('ṓ', 'ṓ'),
- ('ṕ', 'ṕ'),
- ('ṗ', 'ṗ'),
- ('ṙ', 'ṙ'),
- ('ṛ', 'ṛ'),
- ('ṝ', 'ṝ'),
- ('ṟ', 'ṟ'),
- ('ṡ', 'ṡ'),
- ('ṣ', 'ṣ'),
- ('ṥ', 'ṥ'),
- ('ṧ', 'ṧ'),
- ('ṩ', 'ṩ'),
- ('ṫ', 'ṫ'),
- ('ṭ', 'ṭ'),
- ('ṯ', 'ṯ'),
- ('ṱ', 'ṱ'),
- ('ṳ', 'ṳ'),
- ('ṵ', 'ṵ'),
- ('ṷ', 'ṷ'),
- ('ṹ', 'ṹ'),
- ('ṻ', 'ṻ'),
- ('ṽ', 'ṽ'),
- ('ṿ', 'ṿ'),
- ('ẁ', 'ẁ'),
- ('ẃ', 'ẃ'),
- ('ẅ', 'ẅ'),
- ('ẇ', 'ẇ'),
- ('ẉ', 'ẉ'),
- ('ẋ', 'ẋ'),
- ('ẍ', 'ẍ'),
- ('ẏ', 'ẏ'),
- ('ẑ', 'ẑ'),
- ('ẓ', 'ẓ'),
- ('ẕ', 'ẝ'),
- ('ẟ', 'ẟ'),
- ('ạ', 'ạ'),
- ('ả', 'ả'),
- ('ấ', 'ấ'),
- ('ầ', 'ầ'),
- ('ẩ', 'ẩ'),
- ('ẫ', 'ẫ'),
- ('ậ', 'ậ'),
- ('ắ', 'ắ'),
- ('ằ', 'ằ'),
- ('ẳ', 'ẳ'),
- ('ẵ', 'ẵ'),
- ('ặ', 'ặ'),
- ('ẹ', 'ẹ'),
- ('ẻ', 'ẻ'),
- ('ẽ', 'ẽ'),
- ('ế', 'ế'),
- ('ề', 'ề'),
- ('ể', 'ể'),
- ('ễ', 'ễ'),
- ('ệ', 'ệ'),
- ('ỉ', 'ỉ'),
- ('ị', 'ị'),
- ('ọ', 'ọ'),
- ('ỏ', 'ỏ'),
- ('ố', 'ố'),
- ('ồ', 'ồ'),
- ('ổ', 'ổ'),
- ('ỗ', 'ỗ'),
- ('ộ', 'ộ'),
- ('ớ', 'ớ'),
- ('ờ', 'ờ'),
- ('ở', 'ở'),
- ('ỡ', 'ỡ'),
- ('ợ', 'ợ'),
- ('ụ', 'ụ'),
- ('ủ', 'ủ'),
- ('ứ', 'ứ'),
- ('ừ', 'ừ'),
- ('ử', 'ử'),
- ('ữ', 'ữ'),
- ('ự', 'ự'),
- ('ỳ', 'ỳ'),
- ('ỵ', 'ỵ'),
- ('ỷ', 'ỷ'),
- ('ỹ', 'ỹ'),
- ('ỻ', 'ỻ'),
- ('ỽ', 'ỽ'),
- ('ỿ', 'ἇ'),
- ('ἐ', 'ἕ'),
- ('ἠ', 'ἧ'),
- ('ἰ', 'ἷ'),
- ('ὀ', 'ὅ'),
- ('ὐ', 'ὗ'),
- ('ὠ', 'ὧ'),
- ('ὰ', 'ώ'),
- ('ᾀ', 'ᾇ'),
- ('ᾐ', 'ᾗ'),
- ('ᾠ', 'ᾧ'),
- ('ᾰ', 'ᾴ'),
- ('ᾶ', 'ᾷ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῇ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'ῗ'),
- ('ῠ', 'ῧ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῷ'),
- ('ℊ', 'ℊ'),
- ('ℎ', 'ℏ'),
- ('ℓ', 'ℓ'),
- ('ℯ', 'ℯ'),
- ('ℴ', 'ℴ'),
- ('ℹ', 'ℹ'),
- ('ℼ', 'ℽ'),
- ('ⅆ', 'ⅉ'),
- ('ⅎ', 'ⅎ'),
- ('ↄ', 'ↄ'),
- ('ⰰ', 'ⱟ'),
- ('ⱡ', 'ⱡ'),
- ('ⱥ', 'ⱦ'),
- ('ⱨ', 'ⱨ'),
- ('ⱪ', 'ⱪ'),
- ('ⱬ', 'ⱬ'),
- ('ⱱ', 'ⱱ'),
- ('ⱳ', 'ⱴ'),
- ('ⱶ', 'ⱻ'),
- ('ⲁ', 'ⲁ'),
- ('ⲃ', 'ⲃ'),
- ('ⲅ', 'ⲅ'),
- ('ⲇ', 'ⲇ'),
- ('ⲉ', 'ⲉ'),
- ('ⲋ', 'ⲋ'),
- ('ⲍ', 'ⲍ'),
- ('ⲏ', 'ⲏ'),
- ('ⲑ', 'ⲑ'),
- ('ⲓ', 'ⲓ'),
- ('ⲕ', 'ⲕ'),
- ('ⲗ', 'ⲗ'),
- ('ⲙ', 'ⲙ'),
- ('ⲛ', 'ⲛ'),
- ('ⲝ', 'ⲝ'),
- ('ⲟ', 'ⲟ'),
- ('ⲡ', 'ⲡ'),
- ('ⲣ', 'ⲣ'),
- ('ⲥ', 'ⲥ'),
- ('ⲧ', 'ⲧ'),
- ('ⲩ', 'ⲩ'),
- ('ⲫ', 'ⲫ'),
- ('ⲭ', 'ⲭ'),
- ('ⲯ', 'ⲯ'),
- ('ⲱ', 'ⲱ'),
- ('ⲳ', 'ⲳ'),
- ('ⲵ', 'ⲵ'),
- ('ⲷ', 'ⲷ'),
- ('ⲹ', 'ⲹ'),
- ('ⲻ', 'ⲻ'),
- ('ⲽ', 'ⲽ'),
- ('ⲿ', 'ⲿ'),
- ('ⳁ', 'ⳁ'),
- ('ⳃ', 'ⳃ'),
- ('ⳅ', 'ⳅ'),
- ('ⳇ', 'ⳇ'),
- ('ⳉ', 'ⳉ'),
- ('ⳋ', 'ⳋ'),
- ('ⳍ', 'ⳍ'),
- ('ⳏ', 'ⳏ'),
- ('ⳑ', 'ⳑ'),
- ('ⳓ', 'ⳓ'),
- ('ⳕ', 'ⳕ'),
- ('ⳗ', 'ⳗ'),
- ('ⳙ', 'ⳙ'),
- ('ⳛ', 'ⳛ'),
- ('ⳝ', 'ⳝ'),
- ('ⳟ', 'ⳟ'),
- ('ⳡ', 'ⳡ'),
- ('ⳣ', 'ⳤ'),
- ('ⳬ', 'ⳬ'),
- ('ⳮ', 'ⳮ'),
- ('ⳳ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ꙁ', 'ꙁ'),
- ('ꙃ', 'ꙃ'),
- ('ꙅ', 'ꙅ'),
- ('ꙇ', 'ꙇ'),
- ('ꙉ', 'ꙉ'),
- ('ꙋ', 'ꙋ'),
- ('ꙍ', 'ꙍ'),
- ('ꙏ', 'ꙏ'),
- ('ꙑ', 'ꙑ'),
- ('ꙓ', 'ꙓ'),
- ('ꙕ', 'ꙕ'),
- ('ꙗ', 'ꙗ'),
- ('ꙙ', 'ꙙ'),
- ('ꙛ', 'ꙛ'),
- ('ꙝ', 'ꙝ'),
- ('ꙟ', 'ꙟ'),
- ('ꙡ', 'ꙡ'),
- ('ꙣ', 'ꙣ'),
- ('ꙥ', 'ꙥ'),
- ('ꙧ', 'ꙧ'),
- ('ꙩ', 'ꙩ'),
- ('ꙫ', 'ꙫ'),
- ('ꙭ', 'ꙭ'),
- ('ꚁ', 'ꚁ'),
- ('ꚃ', 'ꚃ'),
- ('ꚅ', 'ꚅ'),
- ('ꚇ', 'ꚇ'),
- ('ꚉ', 'ꚉ'),
- ('ꚋ', 'ꚋ'),
- ('ꚍ', 'ꚍ'),
- ('ꚏ', 'ꚏ'),
- ('ꚑ', 'ꚑ'),
- ('ꚓ', 'ꚓ'),
- ('ꚕ', 'ꚕ'),
- ('ꚗ', 'ꚗ'),
- ('ꚙ', 'ꚙ'),
- ('ꚛ', 'ꚛ'),
- ('ꜣ', 'ꜣ'),
- ('ꜥ', 'ꜥ'),
- ('ꜧ', 'ꜧ'),
- ('ꜩ', 'ꜩ'),
- ('ꜫ', 'ꜫ'),
- ('ꜭ', 'ꜭ'),
- ('ꜯ', 'ꜱ'),
- ('ꜳ', 'ꜳ'),
- ('ꜵ', 'ꜵ'),
- ('ꜷ', 'ꜷ'),
- ('ꜹ', 'ꜹ'),
- ('ꜻ', 'ꜻ'),
- ('ꜽ', 'ꜽ'),
- ('ꜿ', 'ꜿ'),
- ('ꝁ', 'ꝁ'),
- ('ꝃ', 'ꝃ'),
- ('ꝅ', 'ꝅ'),
- ('ꝇ', 'ꝇ'),
- ('ꝉ', 'ꝉ'),
- ('ꝋ', 'ꝋ'),
- ('ꝍ', 'ꝍ'),
- ('ꝏ', 'ꝏ'),
- ('ꝑ', 'ꝑ'),
- ('ꝓ', 'ꝓ'),
- ('ꝕ', 'ꝕ'),
- ('ꝗ', 'ꝗ'),
- ('ꝙ', 'ꝙ'),
- ('ꝛ', 'ꝛ'),
- ('ꝝ', 'ꝝ'),
- ('ꝟ', 'ꝟ'),
- ('ꝡ', 'ꝡ'),
- ('ꝣ', 'ꝣ'),
- ('ꝥ', 'ꝥ'),
- ('ꝧ', 'ꝧ'),
- ('ꝩ', 'ꝩ'),
- ('ꝫ', 'ꝫ'),
- ('ꝭ', 'ꝭ'),
- ('ꝯ', 'ꝯ'),
- ('ꝱ', 'ꝸ'),
- ('ꝺ', 'ꝺ'),
- ('ꝼ', 'ꝼ'),
- ('ꝿ', 'ꝿ'),
- ('ꞁ', 'ꞁ'),
- ('ꞃ', 'ꞃ'),
- ('ꞅ', 'ꞅ'),
- ('ꞇ', 'ꞇ'),
- ('ꞌ', 'ꞌ'),
- ('ꞎ', 'ꞎ'),
- ('ꞑ', 'ꞑ'),
- ('ꞓ', 'ꞕ'),
- ('ꞗ', 'ꞗ'),
- ('ꞙ', 'ꞙ'),
- ('ꞛ', 'ꞛ'),
- ('ꞝ', 'ꞝ'),
- ('ꞟ', 'ꞟ'),
- ('ꞡ', 'ꞡ'),
- ('ꞣ', 'ꞣ'),
- ('ꞥ', 'ꞥ'),
- ('ꞧ', 'ꞧ'),
- ('ꞩ', 'ꞩ'),
- ('ꞯ', 'ꞯ'),
- ('ꞵ', 'ꞵ'),
- ('ꞷ', 'ꞷ'),
- ('ꞹ', 'ꞹ'),
- ('ꞻ', 'ꞻ'),
- ('ꞽ', 'ꞽ'),
- ('ꞿ', 'ꞿ'),
- ('ꟁ', 'ꟁ'),
- ('ꟃ', 'ꟃ'),
- ('ꟈ', 'ꟈ'),
- ('ꟊ', 'ꟊ'),
- ('ꟍ', 'ꟍ'),
- ('ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'ꟕ'),
- ('ꟗ', 'ꟗ'),
- ('ꟙ', 'ꟙ'),
- ('ꟛ', 'ꟛ'),
- ('ꟶ', 'ꟶ'),
- ('ꟺ', 'ꟺ'),
- ('ꬰ', 'ꭚ'),
- ('ꭠ', 'ꭨ'),
- ('ꭰ', 'ꮿ'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('a', 'z'),
- ('𐐨', '𐑏'),
- ('𐓘', '𐓻'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐳀', '𐳲'),
- ('𐵰', '𐶅'),
- ('𑣀', '𑣟'),
- ('𖹠', '𖹿'),
- ('𝐚', '𝐳'),
- ('𝑎', '𝑔'),
- ('𝑖', '𝑧'),
- ('𝒂', '𝒛'),
- ('𝒶', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝓏'),
- ('𝓪', '𝔃'),
- ('𝔞', '𝔷'),
- ('𝕒', '𝕫'),
- ('𝖆', '𝖟'),
- ('𝖺', '𝗓'),
- ('𝗮', '𝘇'),
- ('𝘢', '𝘻'),
- ('𝙖', '𝙯'),
- ('𝚊', '𝚥'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛡'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜛'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝕'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞏'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟉'),
- ('𝟋', '𝟋'),
- ('𝼀', '𝼉'),
- ('𝼋', '𝼞'),
- ('𝼥', '𝼪'),
- ('𞤢', '𞥃'),
-];
-
-pub const MARK: &'static [(char, char)] = &[
- ('\u{300}', '\u{36f}'),
- ('\u{483}', '\u{489}'),
- ('\u{591}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c5}'),
- ('\u{5c7}', '\u{5c7}'),
- ('\u{610}', '\u{61a}'),
- ('\u{64b}', '\u{65f}'),
- ('\u{670}', '\u{670}'),
- ('\u{6d6}', '\u{6dc}'),
- ('\u{6df}', '\u{6e4}'),
- ('\u{6e7}', '\u{6e8}'),
- ('\u{6ea}', '\u{6ed}'),
- ('\u{711}', '\u{711}'),
- ('\u{730}', '\u{74a}'),
- ('\u{7a6}', '\u{7b0}'),
- ('\u{7eb}', '\u{7f3}'),
- ('\u{7fd}', '\u{7fd}'),
- ('\u{816}', '\u{819}'),
- ('\u{81b}', '\u{823}'),
- ('\u{825}', '\u{827}'),
- ('\u{829}', '\u{82d}'),
- ('\u{859}', '\u{85b}'),
- ('\u{897}', '\u{89f}'),
- ('\u{8ca}', '\u{8e1}'),
- ('\u{8e3}', 'ः'),
- ('\u{93a}', '\u{93c}'),
- ('ा', 'ॏ'),
- ('\u{951}', '\u{957}'),
- ('\u{962}', '\u{963}'),
- ('\u{981}', 'ঃ'),
- ('\u{9bc}', '\u{9bc}'),
- ('\u{9be}', '\u{9c4}'),
- ('ে', 'ৈ'),
- ('ো', '\u{9cd}'),
- ('\u{9d7}', '\u{9d7}'),
- ('\u{9e2}', '\u{9e3}'),
- ('\u{9fe}', '\u{9fe}'),
- ('\u{a01}', 'ਃ'),
- ('\u{a3c}', '\u{a3c}'),
- ('ਾ', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('\u{a51}', '\u{a51}'),
- ('\u{a70}', '\u{a71}'),
- ('\u{a75}', '\u{a75}'),
- ('\u{a81}', 'ઃ'),
- ('\u{abc}', '\u{abc}'),
- ('ા', '\u{ac5}'),
- ('\u{ac7}', 'ૉ'),
- ('ો', '\u{acd}'),
- ('\u{ae2}', '\u{ae3}'),
- ('\u{afa}', '\u{aff}'),
- ('\u{b01}', 'ଃ'),
- ('\u{b3c}', '\u{b3c}'),
- ('\u{b3e}', '\u{b44}'),
- ('େ', 'ୈ'),
- ('ୋ', '\u{b4d}'),
- ('\u{b55}', '\u{b57}'),
- ('\u{b62}', '\u{b63}'),
- ('\u{b82}', '\u{b82}'),
- ('\u{bbe}', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', '\u{bcd}'),
- ('\u{bd7}', '\u{bd7}'),
- ('\u{c00}', '\u{c04}'),
- ('\u{c3c}', '\u{c3c}'),
- ('\u{c3e}', 'ౄ'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('\u{c62}', '\u{c63}'),
- ('\u{c81}', 'ಃ'),
- ('\u{cbc}', '\u{cbc}'),
- ('ಾ', 'ೄ'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccd}'),
- ('\u{cd5}', '\u{cd6}'),
- ('\u{ce2}', '\u{ce3}'),
- ('ೳ', 'ೳ'),
- ('\u{d00}', 'ഃ'),
- ('\u{d3b}', '\u{d3c}'),
- ('\u{d3e}', '\u{d44}'),
- ('െ', 'ൈ'),
- ('ൊ', '\u{d4d}'),
- ('\u{d57}', '\u{d57}'),
- ('\u{d62}', '\u{d63}'),
- ('\u{d81}', 'ඃ'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dcf}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('ෘ', '\u{ddf}'),
- ('ෲ', 'ෳ'),
- ('\u{e31}', '\u{e31}'),
- ('\u{e34}', '\u{e3a}'),
- ('\u{e47}', '\u{e4e}'),
- ('\u{eb1}', '\u{eb1}'),
- ('\u{eb4}', '\u{ebc}'),
- ('\u{ec8}', '\u{ece}'),
- ('\u{f18}', '\u{f19}'),
- ('\u{f35}', '\u{f35}'),
- ('\u{f37}', '\u{f37}'),
- ('\u{f39}', '\u{f39}'),
- ('༾', '༿'),
- ('\u{f71}', '\u{f84}'),
- ('\u{f86}', '\u{f87}'),
- ('\u{f8d}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('\u{fc6}', '\u{fc6}'),
- ('ါ', '\u{103e}'),
- ('ၖ', '\u{1059}'),
- ('\u{105e}', '\u{1060}'),
- ('ၢ', 'ၤ'),
- ('ၧ', 'ၭ'),
- ('\u{1071}', '\u{1074}'),
- ('\u{1082}', '\u{108d}'),
- ('ႏ', 'ႏ'),
- ('ႚ', '\u{109d}'),
- ('\u{135d}', '\u{135f}'),
- ('\u{1712}', '\u{1715}'),
- ('\u{1732}', '\u{1734}'),
- ('\u{1752}', '\u{1753}'),
- ('\u{1772}', '\u{1773}'),
- ('\u{17b4}', '\u{17d3}'),
- ('\u{17dd}', '\u{17dd}'),
- ('\u{180b}', '\u{180d}'),
- ('\u{180f}', '\u{180f}'),
- ('\u{1885}', '\u{1886}'),
- ('\u{18a9}', '\u{18a9}'),
- ('\u{1920}', 'ᤫ'),
- ('ᤰ', '\u{193b}'),
- ('\u{1a17}', '\u{1a1b}'),
- ('ᩕ', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a7c}'),
- ('\u{1a7f}', '\u{1a7f}'),
- ('\u{1ab0}', '\u{1ace}'),
- ('\u{1b00}', 'ᬄ'),
- ('\u{1b34}', '\u{1b44}'),
- ('\u{1b6b}', '\u{1b73}'),
- ('\u{1b80}', 'ᮂ'),
- ('ᮡ', '\u{1bad}'),
- ('\u{1be6}', '\u{1bf3}'),
- ('ᰤ', '\u{1c37}'),
- ('\u{1cd0}', '\u{1cd2}'),
- ('\u{1cd4}', '\u{1ce8}'),
- ('\u{1ced}', '\u{1ced}'),
- ('\u{1cf4}', '\u{1cf4}'),
- ('᳷', '\u{1cf9}'),
- ('\u{1dc0}', '\u{1dff}'),
- ('\u{20d0}', '\u{20f0}'),
- ('\u{2cef}', '\u{2cf1}'),
- ('\u{2d7f}', '\u{2d7f}'),
- ('\u{2de0}', '\u{2dff}'),
- ('\u{302a}', '\u{302f}'),
- ('\u{3099}', '\u{309a}'),
- ('\u{a66f}', '\u{a672}'),
- ('\u{a674}', '\u{a67d}'),
- ('\u{a69e}', '\u{a69f}'),
- ('\u{a6f0}', '\u{a6f1}'),
- ('\u{a802}', '\u{a802}'),
- ('\u{a806}', '\u{a806}'),
- ('\u{a80b}', '\u{a80b}'),
- ('ꠣ', 'ꠧ'),
- ('\u{a82c}', '\u{a82c}'),
- ('ꢀ', 'ꢁ'),
- ('ꢴ', '\u{a8c5}'),
- ('\u{a8e0}', '\u{a8f1}'),
- ('\u{a8ff}', '\u{a8ff}'),
- ('\u{a926}', '\u{a92d}'),
- ('\u{a947}', '\u{a953}'),
- ('\u{a980}', 'ꦃ'),
- ('\u{a9b3}', '\u{a9c0}'),
- ('\u{a9e5}', '\u{a9e5}'),
- ('\u{aa29}', '\u{aa36}'),
- ('\u{aa43}', '\u{aa43}'),
- ('\u{aa4c}', 'ꩍ'),
- ('ꩻ', 'ꩽ'),
- ('\u{aab0}', '\u{aab0}'),
- ('\u{aab2}', '\u{aab4}'),
- ('\u{aab7}', '\u{aab8}'),
- ('\u{aabe}', '\u{aabf}'),
- ('\u{aac1}', '\u{aac1}'),
- ('ꫫ', 'ꫯ'),
- ('ꫵ', '\u{aaf6}'),
- ('ꯣ', 'ꯪ'),
- ('꯬', '\u{abed}'),
- ('\u{fb1e}', '\u{fb1e}'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{fe20}', '\u{fe2f}'),
- ('\u{101fd}', '\u{101fd}'),
- ('\u{102e0}', '\u{102e0}'),
- ('\u{10376}', '\u{1037a}'),
- ('\u{10a01}', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '\u{10a0f}'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '\u{10a3f}'),
- ('\u{10ae5}', '\u{10ae6}'),
- ('\u{10d24}', '\u{10d27}'),
- ('\u{10d69}', '\u{10d6d}'),
- ('\u{10eab}', '\u{10eac}'),
- ('\u{10efc}', '\u{10eff}'),
- ('\u{10f46}', '\u{10f50}'),
- ('\u{10f82}', '\u{10f85}'),
- ('𑀀', '𑀂'),
- ('\u{11038}', '\u{11046}'),
- ('\u{11070}', '\u{11070}'),
- ('\u{11073}', '\u{11074}'),
- ('\u{1107f}', '𑂂'),
- ('𑂰', '\u{110ba}'),
- ('\u{110c2}', '\u{110c2}'),
- ('\u{11100}', '\u{11102}'),
- ('\u{11127}', '\u{11134}'),
- ('𑅅', '𑅆'),
- ('\u{11173}', '\u{11173}'),
- ('\u{11180}', '𑆂'),
- ('𑆳', '\u{111c0}'),
- ('\u{111c9}', '\u{111cc}'),
- ('𑇎', '\u{111cf}'),
- ('𑈬', '\u{11237}'),
- ('\u{1123e}', '\u{1123e}'),
- ('\u{11241}', '\u{11241}'),
- ('\u{112df}', '\u{112ea}'),
- ('\u{11300}', '𑌃'),
- ('\u{1133b}', '\u{1133c}'),
- ('\u{1133e}', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '\u{1134d}'),
- ('\u{11357}', '\u{11357}'),
- ('𑍢', '𑍣'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('\u{113b8}', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '𑏊'),
- ('𑏌', '\u{113d0}'),
- ('\u{113d2}', '\u{113d2}'),
- ('\u{113e1}', '\u{113e2}'),
- ('𑐵', '\u{11446}'),
- ('\u{1145e}', '\u{1145e}'),
- ('\u{114b0}', '\u{114c3}'),
- ('\u{115af}', '\u{115b5}'),
- ('𑖸', '\u{115c0}'),
- ('\u{115dc}', '\u{115dd}'),
- ('𑘰', '\u{11640}'),
- ('\u{116ab}', '\u{116b7}'),
- ('\u{1171d}', '\u{1172b}'),
- ('𑠬', '\u{1183a}'),
- ('\u{11930}', '𑤵'),
- ('𑤷', '𑤸'),
- ('\u{1193b}', '\u{1193e}'),
- ('𑥀', '𑥀'),
- ('𑥂', '\u{11943}'),
- ('𑧑', '\u{119d7}'),
- ('\u{119da}', '\u{119e0}'),
- ('𑧤', '𑧤'),
- ('\u{11a01}', '\u{11a0a}'),
- ('\u{11a33}', '𑨹'),
- ('\u{11a3b}', '\u{11a3e}'),
- ('\u{11a47}', '\u{11a47}'),
- ('\u{11a51}', '\u{11a5b}'),
- ('\u{11a8a}', '\u{11a99}'),
- ('𑰯', '\u{11c36}'),
- ('\u{11c38}', '\u{11c3f}'),
- ('\u{11c92}', '\u{11ca7}'),
- ('𑲩', '\u{11cb6}'),
- ('\u{11d31}', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d45}'),
- ('\u{11d47}', '\u{11d47}'),
- ('𑶊', '𑶎'),
- ('\u{11d90}', '\u{11d91}'),
- ('𑶓', '\u{11d97}'),
- ('\u{11ef3}', '𑻶'),
- ('\u{11f00}', '\u{11f01}'),
- ('𑼃', '𑼃'),
- ('𑼴', '\u{11f3a}'),
- ('𑼾', '\u{11f42}'),
- ('\u{11f5a}', '\u{11f5a}'),
- ('\u{13440}', '\u{13440}'),
- ('\u{13447}', '\u{13455}'),
- ('\u{1611e}', '\u{1612f}'),
- ('\u{16af0}', '\u{16af4}'),
- ('\u{16b30}', '\u{16b36}'),
- ('\u{16f4f}', '\u{16f4f}'),
- ('𖽑', '𖾇'),
- ('\u{16f8f}', '\u{16f92}'),
- ('\u{16fe4}', '\u{16fe4}'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('\u{1bc9d}', '\u{1bc9e}'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d165}', '\u{1d169}'),
- ('\u{1d16d}', '\u{1d172}'),
- ('\u{1d17b}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('\u{1d242}', '\u{1d244}'),
- ('\u{1da00}', '\u{1da36}'),
- ('\u{1da3b}', '\u{1da6c}'),
- ('\u{1da75}', '\u{1da75}'),
- ('\u{1da84}', '\u{1da84}'),
- ('\u{1da9b}', '\u{1da9f}'),
- ('\u{1daa1}', '\u{1daaf}'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('\u{1e130}', '\u{1e136}'),
- ('\u{1e2ae}', '\u{1e2ae}'),
- ('\u{1e2ec}', '\u{1e2ef}'),
- ('\u{1e4ec}', '\u{1e4ef}'),
- ('\u{1e5ee}', '\u{1e5ef}'),
- ('\u{1e8d0}', '\u{1e8d6}'),
- ('\u{1e944}', '\u{1e94a}'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const MATH_SYMBOL: &'static [(char, char)] = &[
- ('+', '+'),
- ('<', '>'),
- ('|', '|'),
- ('~', '~'),
- ('¬', '¬'),
- ('±', '±'),
- ('×', '×'),
- ('÷', '÷'),
- ('϶', '϶'),
- ('؆', '؈'),
- ('⁄', '⁄'),
- ('⁒', '⁒'),
- ('⁺', '⁼'),
- ('₊', '₌'),
- ('℘', '℘'),
- ('⅀', '⅄'),
- ('⅋', '⅋'),
- ('←', '↔'),
- ('↚', '↛'),
- ('↠', '↠'),
- ('↣', '↣'),
- ('↦', '↦'),
- ('↮', '↮'),
- ('⇎', '⇏'),
- ('⇒', '⇒'),
- ('⇔', '⇔'),
- ('⇴', '⋿'),
- ('⌠', '⌡'),
- ('⍼', '⍼'),
- ('⎛', '⎳'),
- ('⏜', '⏡'),
- ('▷', '▷'),
- ('◁', '◁'),
- ('◸', '◿'),
- ('♯', '♯'),
- ('⟀', '⟄'),
- ('⟇', '⟥'),
- ('⟰', '⟿'),
- ('⤀', '⦂'),
- ('⦙', '⧗'),
- ('⧜', '⧻'),
- ('⧾', '⫿'),
- ('⬰', '⭄'),
- ('⭇', '⭌'),
- ('﬩', '﬩'),
- ('﹢', '﹢'),
- ('﹤', '﹦'),
- ('+', '+'),
- ('<', '>'),
- ('|', '|'),
- ('~', '~'),
- ('¬', '¬'),
- ('←', '↓'),
- ('𐶎', '𐶏'),
- ('𝛁', '𝛁'),
- ('𝛛', '𝛛'),
- ('𝛻', '𝛻'),
- ('𝜕', '𝜕'),
- ('𝜵', '𝜵'),
- ('𝝏', '𝝏'),
- ('𝝯', '𝝯'),
- ('𝞉', '𝞉'),
- ('𝞩', '𝞩'),
- ('𝟃', '𝟃'),
- ('𞻰', '𞻱'),
-];
-
-pub const MODIFIER_LETTER: &'static [(char, char)] = &[
- ('ʰ', 'ˁ'),
- ('ˆ', 'ˑ'),
- ('ˠ', 'ˤ'),
- ('ˬ', 'ˬ'),
- ('ˮ', 'ˮ'),
- ('ʹ', 'ʹ'),
- ('ͺ', 'ͺ'),
- ('ՙ', 'ՙ'),
- ('ـ', 'ـ'),
- ('ۥ', 'ۦ'),
- ('ߴ', 'ߵ'),
- ('ߺ', 'ߺ'),
- ('ࠚ', 'ࠚ'),
- ('ࠤ', 'ࠤ'),
- ('ࠨ', 'ࠨ'),
- ('ࣉ', 'ࣉ'),
- ('ॱ', 'ॱ'),
- ('ๆ', 'ๆ'),
- ('ໆ', 'ໆ'),
- ('ჼ', 'ჼ'),
- ('ៗ', 'ៗ'),
- ('ᡃ', 'ᡃ'),
- ('ᪧ', 'ᪧ'),
- ('ᱸ', 'ᱽ'),
- ('ᴬ', 'ᵪ'),
- ('ᵸ', 'ᵸ'),
- ('ᶛ', 'ᶿ'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('ⱼ', 'ⱽ'),
- ('ⵯ', 'ⵯ'),
- ('ⸯ', 'ⸯ'),
- ('々', '々'),
- ('〱', '〵'),
- ('〻', '〻'),
- ('ゝ', 'ゞ'),
- ('ー', 'ヾ'),
- ('ꀕ', 'ꀕ'),
- ('ꓸ', 'ꓽ'),
- ('ꘌ', 'ꘌ'),
- ('ꙿ', 'ꙿ'),
- ('ꚜ', 'ꚝ'),
- ('ꜗ', 'ꜟ'),
- ('ꝰ', 'ꝰ'),
- ('ꞈ', 'ꞈ'),
- ('ꟲ', 'ꟴ'),
- ('ꟸ', 'ꟹ'),
- ('ꧏ', 'ꧏ'),
- ('ꧦ', 'ꧦ'),
- ('ꩰ', 'ꩰ'),
- ('ꫝ', 'ꫝ'),
- ('ꫳ', 'ꫴ'),
- ('ꭜ', 'ꭟ'),
- ('ꭩ', 'ꭩ'),
- ('ー', 'ー'),
- ('\u{ff9e}', '\u{ff9f}'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐵎', '𐵎'),
- ('𐵯', '𐵯'),
- ('𖭀', '𖭃'),
- ('𖵀', '𖵂'),
- ('𖵫', '𖵬'),
- ('𖾓', '𖾟'),
- ('𖿠', '𖿡'),
- ('𖿣', '𖿣'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𞀰', '𞁭'),
- ('𞄷', '𞄽'),
- ('𞓫', '𞓫'),
- ('𞥋', '𞥋'),
-];
-
-pub const MODIFIER_SYMBOL: &'static [(char, char)] = &[
- ('^', '^'),
- ('`', '`'),
- ('¨', '¨'),
- ('¯', '¯'),
- ('´', '´'),
- ('¸', '¸'),
- ('˂', '˅'),
- ('˒', '˟'),
- ('˥', '˫'),
- ('˭', '˭'),
- ('˯', '˿'),
- ('͵', '͵'),
- ('΄', '΅'),
- ('࢈', '࢈'),
- ('᾽', '᾽'),
- ('᾿', '῁'),
- ('῍', '῏'),
- ('῝', '῟'),
- ('῭', '`'),
- ('´', '῾'),
- ('゛', '゜'),
- ('꜀', '꜖'),
- ('꜠', '꜡'),
- ('꞉', '꞊'),
- ('꭛', '꭛'),
- ('꭪', '꭫'),
- ('﮲', '﯂'),
- ('^', '^'),
- ('`', '`'),
- (' ̄', ' ̄'),
- ('🏻', '🏿'),
-];
-
-pub const NONSPACING_MARK: &'static [(char, char)] = &[
- ('\u{300}', '\u{36f}'),
- ('\u{483}', '\u{487}'),
- ('\u{591}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c5}'),
- ('\u{5c7}', '\u{5c7}'),
- ('\u{610}', '\u{61a}'),
- ('\u{64b}', '\u{65f}'),
- ('\u{670}', '\u{670}'),
- ('\u{6d6}', '\u{6dc}'),
- ('\u{6df}', '\u{6e4}'),
- ('\u{6e7}', '\u{6e8}'),
- ('\u{6ea}', '\u{6ed}'),
- ('\u{711}', '\u{711}'),
- ('\u{730}', '\u{74a}'),
- ('\u{7a6}', '\u{7b0}'),
- ('\u{7eb}', '\u{7f3}'),
- ('\u{7fd}', '\u{7fd}'),
- ('\u{816}', '\u{819}'),
- ('\u{81b}', '\u{823}'),
- ('\u{825}', '\u{827}'),
- ('\u{829}', '\u{82d}'),
- ('\u{859}', '\u{85b}'),
- ('\u{897}', '\u{89f}'),
- ('\u{8ca}', '\u{8e1}'),
- ('\u{8e3}', '\u{902}'),
- ('\u{93a}', '\u{93a}'),
- ('\u{93c}', '\u{93c}'),
- ('\u{941}', '\u{948}'),
- ('\u{94d}', '\u{94d}'),
- ('\u{951}', '\u{957}'),
- ('\u{962}', '\u{963}'),
- ('\u{981}', '\u{981}'),
- ('\u{9bc}', '\u{9bc}'),
- ('\u{9c1}', '\u{9c4}'),
- ('\u{9cd}', '\u{9cd}'),
- ('\u{9e2}', '\u{9e3}'),
- ('\u{9fe}', '\u{9fe}'),
- ('\u{a01}', '\u{a02}'),
- ('\u{a3c}', '\u{a3c}'),
- ('\u{a41}', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('\u{a51}', '\u{a51}'),
- ('\u{a70}', '\u{a71}'),
- ('\u{a75}', '\u{a75}'),
- ('\u{a81}', '\u{a82}'),
- ('\u{abc}', '\u{abc}'),
- ('\u{ac1}', '\u{ac5}'),
- ('\u{ac7}', '\u{ac8}'),
- ('\u{acd}', '\u{acd}'),
- ('\u{ae2}', '\u{ae3}'),
- ('\u{afa}', '\u{aff}'),
- ('\u{b01}', '\u{b01}'),
- ('\u{b3c}', '\u{b3c}'),
- ('\u{b3f}', '\u{b3f}'),
- ('\u{b41}', '\u{b44}'),
- ('\u{b4d}', '\u{b4d}'),
- ('\u{b55}', '\u{b56}'),
- ('\u{b62}', '\u{b63}'),
- ('\u{b82}', '\u{b82}'),
- ('\u{bc0}', '\u{bc0}'),
- ('\u{bcd}', '\u{bcd}'),
- ('\u{c00}', '\u{c00}'),
- ('\u{c04}', '\u{c04}'),
- ('\u{c3c}', '\u{c3c}'),
- ('\u{c3e}', '\u{c40}'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('\u{c62}', '\u{c63}'),
- ('\u{c81}', '\u{c81}'),
- ('\u{cbc}', '\u{cbc}'),
- ('\u{cbf}', '\u{cbf}'),
- ('\u{cc6}', '\u{cc6}'),
- ('\u{ccc}', '\u{ccd}'),
- ('\u{ce2}', '\u{ce3}'),
- ('\u{d00}', '\u{d01}'),
- ('\u{d3b}', '\u{d3c}'),
- ('\u{d41}', '\u{d44}'),
- ('\u{d4d}', '\u{d4d}'),
- ('\u{d62}', '\u{d63}'),
- ('\u{d81}', '\u{d81}'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dd2}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('\u{e31}', '\u{e31}'),
- ('\u{e34}', '\u{e3a}'),
- ('\u{e47}', '\u{e4e}'),
- ('\u{eb1}', '\u{eb1}'),
- ('\u{eb4}', '\u{ebc}'),
- ('\u{ec8}', '\u{ece}'),
- ('\u{f18}', '\u{f19}'),
- ('\u{f35}', '\u{f35}'),
- ('\u{f37}', '\u{f37}'),
- ('\u{f39}', '\u{f39}'),
- ('\u{f71}', '\u{f7e}'),
- ('\u{f80}', '\u{f84}'),
- ('\u{f86}', '\u{f87}'),
- ('\u{f8d}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('\u{fc6}', '\u{fc6}'),
- ('\u{102d}', '\u{1030}'),
- ('\u{1032}', '\u{1037}'),
- ('\u{1039}', '\u{103a}'),
- ('\u{103d}', '\u{103e}'),
- ('\u{1058}', '\u{1059}'),
- ('\u{105e}', '\u{1060}'),
- ('\u{1071}', '\u{1074}'),
- ('\u{1082}', '\u{1082}'),
- ('\u{1085}', '\u{1086}'),
- ('\u{108d}', '\u{108d}'),
- ('\u{109d}', '\u{109d}'),
- ('\u{135d}', '\u{135f}'),
- ('\u{1712}', '\u{1714}'),
- ('\u{1732}', '\u{1733}'),
- ('\u{1752}', '\u{1753}'),
- ('\u{1772}', '\u{1773}'),
- ('\u{17b4}', '\u{17b5}'),
- ('\u{17b7}', '\u{17bd}'),
- ('\u{17c6}', '\u{17c6}'),
- ('\u{17c9}', '\u{17d3}'),
- ('\u{17dd}', '\u{17dd}'),
- ('\u{180b}', '\u{180d}'),
- ('\u{180f}', '\u{180f}'),
- ('\u{1885}', '\u{1886}'),
- ('\u{18a9}', '\u{18a9}'),
- ('\u{1920}', '\u{1922}'),
- ('\u{1927}', '\u{1928}'),
- ('\u{1932}', '\u{1932}'),
- ('\u{1939}', '\u{193b}'),
- ('\u{1a17}', '\u{1a18}'),
- ('\u{1a1b}', '\u{1a1b}'),
- ('\u{1a56}', '\u{1a56}'),
- ('\u{1a58}', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a60}'),
- ('\u{1a62}', '\u{1a62}'),
- ('\u{1a65}', '\u{1a6c}'),
- ('\u{1a73}', '\u{1a7c}'),
- ('\u{1a7f}', '\u{1a7f}'),
- ('\u{1ab0}', '\u{1abd}'),
- ('\u{1abf}', '\u{1ace}'),
- ('\u{1b00}', '\u{1b03}'),
- ('\u{1b34}', '\u{1b34}'),
- ('\u{1b36}', '\u{1b3a}'),
- ('\u{1b3c}', '\u{1b3c}'),
- ('\u{1b42}', '\u{1b42}'),
- ('\u{1b6b}', '\u{1b73}'),
- ('\u{1b80}', '\u{1b81}'),
- ('\u{1ba2}', '\u{1ba5}'),
- ('\u{1ba8}', '\u{1ba9}'),
- ('\u{1bab}', '\u{1bad}'),
- ('\u{1be6}', '\u{1be6}'),
- ('\u{1be8}', '\u{1be9}'),
- ('\u{1bed}', '\u{1bed}'),
- ('\u{1bef}', '\u{1bf1}'),
- ('\u{1c2c}', '\u{1c33}'),
- ('\u{1c36}', '\u{1c37}'),
- ('\u{1cd0}', '\u{1cd2}'),
- ('\u{1cd4}', '\u{1ce0}'),
- ('\u{1ce2}', '\u{1ce8}'),
- ('\u{1ced}', '\u{1ced}'),
- ('\u{1cf4}', '\u{1cf4}'),
- ('\u{1cf8}', '\u{1cf9}'),
- ('\u{1dc0}', '\u{1dff}'),
- ('\u{20d0}', '\u{20dc}'),
- ('\u{20e1}', '\u{20e1}'),
- ('\u{20e5}', '\u{20f0}'),
- ('\u{2cef}', '\u{2cf1}'),
- ('\u{2d7f}', '\u{2d7f}'),
- ('\u{2de0}', '\u{2dff}'),
- ('\u{302a}', '\u{302d}'),
- ('\u{3099}', '\u{309a}'),
- ('\u{a66f}', '\u{a66f}'),
- ('\u{a674}', '\u{a67d}'),
- ('\u{a69e}', '\u{a69f}'),
- ('\u{a6f0}', '\u{a6f1}'),
- ('\u{a802}', '\u{a802}'),
- ('\u{a806}', '\u{a806}'),
- ('\u{a80b}', '\u{a80b}'),
- ('\u{a825}', '\u{a826}'),
- ('\u{a82c}', '\u{a82c}'),
- ('\u{a8c4}', '\u{a8c5}'),
- ('\u{a8e0}', '\u{a8f1}'),
- ('\u{a8ff}', '\u{a8ff}'),
- ('\u{a926}', '\u{a92d}'),
- ('\u{a947}', '\u{a951}'),
- ('\u{a980}', '\u{a982}'),
- ('\u{a9b3}', '\u{a9b3}'),
- ('\u{a9b6}', '\u{a9b9}'),
- ('\u{a9bc}', '\u{a9bd}'),
- ('\u{a9e5}', '\u{a9e5}'),
- ('\u{aa29}', '\u{aa2e}'),
- ('\u{aa31}', '\u{aa32}'),
- ('\u{aa35}', '\u{aa36}'),
- ('\u{aa43}', '\u{aa43}'),
- ('\u{aa4c}', '\u{aa4c}'),
- ('\u{aa7c}', '\u{aa7c}'),
- ('\u{aab0}', '\u{aab0}'),
- ('\u{aab2}', '\u{aab4}'),
- ('\u{aab7}', '\u{aab8}'),
- ('\u{aabe}', '\u{aabf}'),
- ('\u{aac1}', '\u{aac1}'),
- ('\u{aaec}', '\u{aaed}'),
- ('\u{aaf6}', '\u{aaf6}'),
- ('\u{abe5}', '\u{abe5}'),
- ('\u{abe8}', '\u{abe8}'),
- ('\u{abed}', '\u{abed}'),
- ('\u{fb1e}', '\u{fb1e}'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{fe20}', '\u{fe2f}'),
- ('\u{101fd}', '\u{101fd}'),
- ('\u{102e0}', '\u{102e0}'),
- ('\u{10376}', '\u{1037a}'),
- ('\u{10a01}', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '\u{10a0f}'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '\u{10a3f}'),
- ('\u{10ae5}', '\u{10ae6}'),
- ('\u{10d24}', '\u{10d27}'),
- ('\u{10d69}', '\u{10d6d}'),
- ('\u{10eab}', '\u{10eac}'),
- ('\u{10efc}', '\u{10eff}'),
- ('\u{10f46}', '\u{10f50}'),
- ('\u{10f82}', '\u{10f85}'),
- ('\u{11001}', '\u{11001}'),
- ('\u{11038}', '\u{11046}'),
- ('\u{11070}', '\u{11070}'),
- ('\u{11073}', '\u{11074}'),
- ('\u{1107f}', '\u{11081}'),
- ('\u{110b3}', '\u{110b6}'),
- ('\u{110b9}', '\u{110ba}'),
- ('\u{110c2}', '\u{110c2}'),
- ('\u{11100}', '\u{11102}'),
- ('\u{11127}', '\u{1112b}'),
- ('\u{1112d}', '\u{11134}'),
- ('\u{11173}', '\u{11173}'),
- ('\u{11180}', '\u{11181}'),
- ('\u{111b6}', '\u{111be}'),
- ('\u{111c9}', '\u{111cc}'),
- ('\u{111cf}', '\u{111cf}'),
- ('\u{1122f}', '\u{11231}'),
- ('\u{11234}', '\u{11234}'),
- ('\u{11236}', '\u{11237}'),
- ('\u{1123e}', '\u{1123e}'),
- ('\u{11241}', '\u{11241}'),
- ('\u{112df}', '\u{112df}'),
- ('\u{112e3}', '\u{112ea}'),
- ('\u{11300}', '\u{11301}'),
- ('\u{1133b}', '\u{1133c}'),
- ('\u{11340}', '\u{11340}'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('\u{113bb}', '\u{113c0}'),
- ('\u{113ce}', '\u{113ce}'),
- ('\u{113d0}', '\u{113d0}'),
- ('\u{113d2}', '\u{113d2}'),
- ('\u{113e1}', '\u{113e2}'),
- ('\u{11438}', '\u{1143f}'),
- ('\u{11442}', '\u{11444}'),
- ('\u{11446}', '\u{11446}'),
- ('\u{1145e}', '\u{1145e}'),
- ('\u{114b3}', '\u{114b8}'),
- ('\u{114ba}', '\u{114ba}'),
- ('\u{114bf}', '\u{114c0}'),
- ('\u{114c2}', '\u{114c3}'),
- ('\u{115b2}', '\u{115b5}'),
- ('\u{115bc}', '\u{115bd}'),
- ('\u{115bf}', '\u{115c0}'),
- ('\u{115dc}', '\u{115dd}'),
- ('\u{11633}', '\u{1163a}'),
- ('\u{1163d}', '\u{1163d}'),
- ('\u{1163f}', '\u{11640}'),
- ('\u{116ab}', '\u{116ab}'),
- ('\u{116ad}', '\u{116ad}'),
- ('\u{116b0}', '\u{116b5}'),
- ('\u{116b7}', '\u{116b7}'),
- ('\u{1171d}', '\u{1171d}'),
- ('\u{1171f}', '\u{1171f}'),
- ('\u{11722}', '\u{11725}'),
- ('\u{11727}', '\u{1172b}'),
- ('\u{1182f}', '\u{11837}'),
- ('\u{11839}', '\u{1183a}'),
- ('\u{1193b}', '\u{1193c}'),
- ('\u{1193e}', '\u{1193e}'),
- ('\u{11943}', '\u{11943}'),
- ('\u{119d4}', '\u{119d7}'),
- ('\u{119da}', '\u{119db}'),
- ('\u{119e0}', '\u{119e0}'),
- ('\u{11a01}', '\u{11a0a}'),
- ('\u{11a33}', '\u{11a38}'),
- ('\u{11a3b}', '\u{11a3e}'),
- ('\u{11a47}', '\u{11a47}'),
- ('\u{11a51}', '\u{11a56}'),
- ('\u{11a59}', '\u{11a5b}'),
- ('\u{11a8a}', '\u{11a96}'),
- ('\u{11a98}', '\u{11a99}'),
- ('\u{11c30}', '\u{11c36}'),
- ('\u{11c38}', '\u{11c3d}'),
- ('\u{11c3f}', '\u{11c3f}'),
- ('\u{11c92}', '\u{11ca7}'),
- ('\u{11caa}', '\u{11cb0}'),
- ('\u{11cb2}', '\u{11cb3}'),
- ('\u{11cb5}', '\u{11cb6}'),
- ('\u{11d31}', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d45}'),
- ('\u{11d47}', '\u{11d47}'),
- ('\u{11d90}', '\u{11d91}'),
- ('\u{11d95}', '\u{11d95}'),
- ('\u{11d97}', '\u{11d97}'),
- ('\u{11ef3}', '\u{11ef4}'),
- ('\u{11f00}', '\u{11f01}'),
- ('\u{11f36}', '\u{11f3a}'),
- ('\u{11f40}', '\u{11f40}'),
- ('\u{11f42}', '\u{11f42}'),
- ('\u{11f5a}', '\u{11f5a}'),
- ('\u{13440}', '\u{13440}'),
- ('\u{13447}', '\u{13455}'),
- ('\u{1611e}', '\u{16129}'),
- ('\u{1612d}', '\u{1612f}'),
- ('\u{16af0}', '\u{16af4}'),
- ('\u{16b30}', '\u{16b36}'),
- ('\u{16f4f}', '\u{16f4f}'),
- ('\u{16f8f}', '\u{16f92}'),
- ('\u{16fe4}', '\u{16fe4}'),
- ('\u{1bc9d}', '\u{1bc9e}'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d167}', '\u{1d169}'),
- ('\u{1d17b}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('\u{1d242}', '\u{1d244}'),
- ('\u{1da00}', '\u{1da36}'),
- ('\u{1da3b}', '\u{1da6c}'),
- ('\u{1da75}', '\u{1da75}'),
- ('\u{1da84}', '\u{1da84}'),
- ('\u{1da9b}', '\u{1da9f}'),
- ('\u{1daa1}', '\u{1daaf}'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('\u{1e130}', '\u{1e136}'),
- ('\u{1e2ae}', '\u{1e2ae}'),
- ('\u{1e2ec}', '\u{1e2ef}'),
- ('\u{1e4ec}', '\u{1e4ef}'),
- ('\u{1e5ee}', '\u{1e5ef}'),
- ('\u{1e8d0}', '\u{1e8d6}'),
- ('\u{1e944}', '\u{1e94a}'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const NUMBER: &'static [(char, char)] = &[
- ('0', '9'),
- ('²', '³'),
- ('¹', '¹'),
- ('¼', '¾'),
- ('٠', '٩'),
- ('۰', '۹'),
- ('߀', '߉'),
- ('०', '९'),
- ('০', '৯'),
- ('৴', '৹'),
- ('੦', '੯'),
- ('૦', '૯'),
- ('୦', '୯'),
- ('୲', '୷'),
- ('௦', '௲'),
- ('౦', '౯'),
- ('౸', '౾'),
- ('೦', '೯'),
- ('൘', '൞'),
- ('൦', '൸'),
- ('෦', '෯'),
- ('๐', '๙'),
- ('໐', '໙'),
- ('༠', '༳'),
- ('၀', '၉'),
- ('႐', '႙'),
- ('፩', '፼'),
- ('ᛮ', 'ᛰ'),
- ('០', '៩'),
- ('៰', '៹'),
- ('᠐', '᠙'),
- ('᥆', '᥏'),
- ('᧐', '᧚'),
- ('᪀', '᪉'),
- ('᪐', '᪙'),
- ('᭐', '᭙'),
- ('᮰', '᮹'),
- ('᱀', '᱉'),
- ('᱐', '᱙'),
- ('⁰', '⁰'),
- ('⁴', '⁹'),
- ('₀', '₉'),
- ('⅐', 'ↂ'),
- ('ↅ', '↉'),
- ('①', '⒛'),
- ('⓪', '⓿'),
- ('❶', '➓'),
- ('⳽', '⳽'),
- ('〇', '〇'),
- ('〡', '〩'),
- ('〸', '〺'),
- ('㆒', '㆕'),
- ('㈠', '㈩'),
- ('㉈', '㉏'),
- ('㉑', '㉟'),
- ('㊀', '㊉'),
- ('㊱', '㊿'),
- ('꘠', '꘩'),
- ('ꛦ', 'ꛯ'),
- ('꠰', '꠵'),
- ('꣐', '꣙'),
- ('꤀', '꤉'),
- ('꧐', '꧙'),
- ('꧰', '꧹'),
- ('꩐', '꩙'),
- ('꯰', '꯹'),
- ('0', '9'),
- ('𐄇', '𐄳'),
- ('𐅀', '𐅸'),
- ('𐆊', '𐆋'),
- ('𐋡', '𐋻'),
- ('𐌠', '𐌣'),
- ('𐍁', '𐍁'),
- ('𐍊', '𐍊'),
- ('𐏑', '𐏕'),
- ('𐒠', '𐒩'),
- ('𐡘', '𐡟'),
- ('𐡹', '𐡿'),
- ('𐢧', '𐢯'),
- ('𐣻', '𐣿'),
- ('𐤖', '𐤛'),
- ('𐦼', '𐦽'),
- ('𐧀', '𐧏'),
- ('𐧒', '𐧿'),
- ('𐩀', '𐩈'),
- ('𐩽', '𐩾'),
- ('𐪝', '𐪟'),
- ('𐫫', '𐫯'),
- ('𐭘', '𐭟'),
- ('𐭸', '𐭿'),
- ('𐮩', '𐮯'),
- ('𐳺', '𐳿'),
- ('𐴰', '𐴹'),
- ('𐵀', '𐵉'),
- ('𐹠', '𐹾'),
- ('𐼝', '𐼦'),
- ('𐽑', '𐽔'),
- ('𐿅', '𐿋'),
- ('𑁒', '𑁯'),
- ('𑃰', '𑃹'),
- ('𑄶', '𑄿'),
- ('𑇐', '𑇙'),
- ('𑇡', '𑇴'),
- ('𑋰', '𑋹'),
- ('𑑐', '𑑙'),
- ('𑓐', '𑓙'),
- ('𑙐', '𑙙'),
- ('𑛀', '𑛉'),
- ('𑛐', '𑛣'),
- ('𑜰', '𑜻'),
- ('𑣠', '𑣲'),
- ('𑥐', '𑥙'),
- ('𑯰', '𑯹'),
- ('𑱐', '𑱬'),
- ('𑵐', '𑵙'),
- ('𑶠', '𑶩'),
- ('𑽐', '𑽙'),
- ('𑿀', '𑿔'),
- ('𒐀', '𒑮'),
- ('𖄰', '𖄹'),
- ('𖩠', '𖩩'),
- ('𖫀', '𖫉'),
- ('𖭐', '𖭙'),
- ('𖭛', '𖭡'),
- ('𖵰', '𖵹'),
- ('𖺀', '𖺖'),
- ('𜳰', '𜳹'),
- ('𝋀', '𝋓'),
- ('𝋠', '𝋳'),
- ('𝍠', '𝍸'),
- ('𝟎', '𝟿'),
- ('𞅀', '𞅉'),
- ('𞋰', '𞋹'),
- ('𞓰', '𞓹'),
- ('𞗱', '𞗺'),
- ('𞣇', '𞣏'),
- ('𞥐', '𞥙'),
- ('𞱱', '𞲫'),
- ('𞲭', '𞲯'),
- ('𞲱', '𞲴'),
- ('𞴁', '𞴭'),
- ('𞴯', '𞴽'),
- ('🄀', '🄌'),
- ('🯰', '🯹'),
-];
-
-pub const OPEN_PUNCTUATION: &'static [(char, char)] = &[
- ('(', '('),
- ('[', '['),
- ('{', '{'),
- ('༺', '༺'),
- ('༼', '༼'),
- ('᚛', '᚛'),
- ('‚', '‚'),
- ('„', '„'),
- ('⁅', '⁅'),
- ('⁽', '⁽'),
- ('₍', '₍'),
- ('⌈', '⌈'),
- ('⌊', '⌊'),
- ('〈', '〈'),
- ('❨', '❨'),
- ('❪', '❪'),
- ('❬', '❬'),
- ('❮', '❮'),
- ('❰', '❰'),
- ('❲', '❲'),
- ('❴', '❴'),
- ('⟅', '⟅'),
- ('⟦', '⟦'),
- ('⟨', '⟨'),
- ('⟪', '⟪'),
- ('⟬', '⟬'),
- ('⟮', '⟮'),
- ('⦃', '⦃'),
- ('⦅', '⦅'),
- ('⦇', '⦇'),
- ('⦉', '⦉'),
- ('⦋', '⦋'),
- ('⦍', '⦍'),
- ('⦏', '⦏'),
- ('⦑', '⦑'),
- ('⦓', '⦓'),
- ('⦕', '⦕'),
- ('⦗', '⦗'),
- ('⧘', '⧘'),
- ('⧚', '⧚'),
- ('⧼', '⧼'),
- ('⸢', '⸢'),
- ('⸤', '⸤'),
- ('⸦', '⸦'),
- ('⸨', '⸨'),
- ('⹂', '⹂'),
- ('⹕', '⹕'),
- ('⹗', '⹗'),
- ('⹙', '⹙'),
- ('⹛', '⹛'),
- ('〈', '〈'),
- ('《', '《'),
- ('「', '「'),
- ('『', '『'),
- ('【', '【'),
- ('〔', '〔'),
- ('〖', '〖'),
- ('〘', '〘'),
- ('〚', '〚'),
- ('〝', '〝'),
- ('﴿', '﴿'),
- ('︗', '︗'),
- ('︵', '︵'),
- ('︷', '︷'),
- ('︹', '︹'),
- ('︻', '︻'),
- ('︽', '︽'),
- ('︿', '︿'),
- ('﹁', '﹁'),
- ('﹃', '﹃'),
- ('﹇', '﹇'),
- ('﹙', '﹙'),
- ('﹛', '﹛'),
- ('﹝', '﹝'),
- ('(', '('),
- ('[', '['),
- ('{', '{'),
- ('⦅', '⦅'),
- ('「', '「'),
-];
-
-pub const OTHER: &'static [(char, char)] = &[
- ('\0', '\u{1f}'),
- ('\u{7f}', '\u{9f}'),
- ('\u{ad}', '\u{ad}'),
- ('\u{378}', '\u{379}'),
- ('\u{380}', '\u{383}'),
- ('\u{38b}', '\u{38b}'),
- ('\u{38d}', '\u{38d}'),
- ('\u{3a2}', '\u{3a2}'),
- ('\u{530}', '\u{530}'),
- ('\u{557}', '\u{558}'),
- ('\u{58b}', '\u{58c}'),
- ('\u{590}', '\u{590}'),
- ('\u{5c8}', '\u{5cf}'),
- ('\u{5eb}', '\u{5ee}'),
- ('\u{5f5}', '\u{605}'),
- ('\u{61c}', '\u{61c}'),
- ('\u{6dd}', '\u{6dd}'),
- ('\u{70e}', '\u{70f}'),
- ('\u{74b}', '\u{74c}'),
- ('\u{7b2}', '\u{7bf}'),
- ('\u{7fb}', '\u{7fc}'),
- ('\u{82e}', '\u{82f}'),
- ('\u{83f}', '\u{83f}'),
- ('\u{85c}', '\u{85d}'),
- ('\u{85f}', '\u{85f}'),
- ('\u{86b}', '\u{86f}'),
- ('\u{88f}', '\u{896}'),
- ('\u{8e2}', '\u{8e2}'),
- ('\u{984}', '\u{984}'),
- ('\u{98d}', '\u{98e}'),
- ('\u{991}', '\u{992}'),
- ('\u{9a9}', '\u{9a9}'),
- ('\u{9b1}', '\u{9b1}'),
- ('\u{9b3}', '\u{9b5}'),
- ('\u{9ba}', '\u{9bb}'),
- ('\u{9c5}', '\u{9c6}'),
- ('\u{9c9}', '\u{9ca}'),
- ('\u{9cf}', '\u{9d6}'),
- ('\u{9d8}', '\u{9db}'),
- ('\u{9de}', '\u{9de}'),
- ('\u{9e4}', '\u{9e5}'),
- ('\u{9ff}', '\u{a00}'),
- ('\u{a04}', '\u{a04}'),
- ('\u{a0b}', '\u{a0e}'),
- ('\u{a11}', '\u{a12}'),
- ('\u{a29}', '\u{a29}'),
- ('\u{a31}', '\u{a31}'),
- ('\u{a34}', '\u{a34}'),
- ('\u{a37}', '\u{a37}'),
- ('\u{a3a}', '\u{a3b}'),
- ('\u{a3d}', '\u{a3d}'),
- ('\u{a43}', '\u{a46}'),
- ('\u{a49}', '\u{a4a}'),
- ('\u{a4e}', '\u{a50}'),
- ('\u{a52}', '\u{a58}'),
- ('\u{a5d}', '\u{a5d}'),
- ('\u{a5f}', '\u{a65}'),
- ('\u{a77}', '\u{a80}'),
- ('\u{a84}', '\u{a84}'),
- ('\u{a8e}', '\u{a8e}'),
- ('\u{a92}', '\u{a92}'),
- ('\u{aa9}', '\u{aa9}'),
- ('\u{ab1}', '\u{ab1}'),
- ('\u{ab4}', '\u{ab4}'),
- ('\u{aba}', '\u{abb}'),
- ('\u{ac6}', '\u{ac6}'),
- ('\u{aca}', '\u{aca}'),
- ('\u{ace}', '\u{acf}'),
- ('\u{ad1}', '\u{adf}'),
- ('\u{ae4}', '\u{ae5}'),
- ('\u{af2}', '\u{af8}'),
- ('\u{b00}', '\u{b00}'),
- ('\u{b04}', '\u{b04}'),
- ('\u{b0d}', '\u{b0e}'),
- ('\u{b11}', '\u{b12}'),
- ('\u{b29}', '\u{b29}'),
- ('\u{b31}', '\u{b31}'),
- ('\u{b34}', '\u{b34}'),
- ('\u{b3a}', '\u{b3b}'),
- ('\u{b45}', '\u{b46}'),
- ('\u{b49}', '\u{b4a}'),
- ('\u{b4e}', '\u{b54}'),
- ('\u{b58}', '\u{b5b}'),
- ('\u{b5e}', '\u{b5e}'),
- ('\u{b64}', '\u{b65}'),
- ('\u{b78}', '\u{b81}'),
- ('\u{b84}', '\u{b84}'),
- ('\u{b8b}', '\u{b8d}'),
- ('\u{b91}', '\u{b91}'),
- ('\u{b96}', '\u{b98}'),
- ('\u{b9b}', '\u{b9b}'),
- ('\u{b9d}', '\u{b9d}'),
- ('\u{ba0}', '\u{ba2}'),
- ('\u{ba5}', '\u{ba7}'),
- ('\u{bab}', '\u{bad}'),
- ('\u{bba}', '\u{bbd}'),
- ('\u{bc3}', '\u{bc5}'),
- ('\u{bc9}', '\u{bc9}'),
- ('\u{bce}', '\u{bcf}'),
- ('\u{bd1}', '\u{bd6}'),
- ('\u{bd8}', '\u{be5}'),
- ('\u{bfb}', '\u{bff}'),
- ('\u{c0d}', '\u{c0d}'),
- ('\u{c11}', '\u{c11}'),
- ('\u{c29}', '\u{c29}'),
- ('\u{c3a}', '\u{c3b}'),
- ('\u{c45}', '\u{c45}'),
- ('\u{c49}', '\u{c49}'),
- ('\u{c4e}', '\u{c54}'),
- ('\u{c57}', '\u{c57}'),
- ('\u{c5b}', '\u{c5c}'),
- ('\u{c5e}', '\u{c5f}'),
- ('\u{c64}', '\u{c65}'),
- ('\u{c70}', '\u{c76}'),
- ('\u{c8d}', '\u{c8d}'),
- ('\u{c91}', '\u{c91}'),
- ('\u{ca9}', '\u{ca9}'),
- ('\u{cb4}', '\u{cb4}'),
- ('\u{cba}', '\u{cbb}'),
- ('\u{cc5}', '\u{cc5}'),
- ('\u{cc9}', '\u{cc9}'),
- ('\u{cce}', '\u{cd4}'),
- ('\u{cd7}', '\u{cdc}'),
- ('\u{cdf}', '\u{cdf}'),
- ('\u{ce4}', '\u{ce5}'),
- ('\u{cf0}', '\u{cf0}'),
- ('\u{cf4}', '\u{cff}'),
- ('\u{d0d}', '\u{d0d}'),
- ('\u{d11}', '\u{d11}'),
- ('\u{d45}', '\u{d45}'),
- ('\u{d49}', '\u{d49}'),
- ('\u{d50}', '\u{d53}'),
- ('\u{d64}', '\u{d65}'),
- ('\u{d80}', '\u{d80}'),
- ('\u{d84}', '\u{d84}'),
- ('\u{d97}', '\u{d99}'),
- ('\u{db2}', '\u{db2}'),
- ('\u{dbc}', '\u{dbc}'),
- ('\u{dbe}', '\u{dbf}'),
- ('\u{dc7}', '\u{dc9}'),
- ('\u{dcb}', '\u{dce}'),
- ('\u{dd5}', '\u{dd5}'),
- ('\u{dd7}', '\u{dd7}'),
- ('\u{de0}', '\u{de5}'),
- ('\u{df0}', '\u{df1}'),
- ('\u{df5}', '\u{e00}'),
- ('\u{e3b}', '\u{e3e}'),
- ('\u{e5c}', '\u{e80}'),
- ('\u{e83}', '\u{e83}'),
- ('\u{e85}', '\u{e85}'),
- ('\u{e8b}', '\u{e8b}'),
- ('\u{ea4}', '\u{ea4}'),
- ('\u{ea6}', '\u{ea6}'),
- ('\u{ebe}', '\u{ebf}'),
- ('\u{ec5}', '\u{ec5}'),
- ('\u{ec7}', '\u{ec7}'),
- ('\u{ecf}', '\u{ecf}'),
- ('\u{eda}', '\u{edb}'),
- ('\u{ee0}', '\u{eff}'),
- ('\u{f48}', '\u{f48}'),
- ('\u{f6d}', '\u{f70}'),
- ('\u{f98}', '\u{f98}'),
- ('\u{fbd}', '\u{fbd}'),
- ('\u{fcd}', '\u{fcd}'),
- ('\u{fdb}', '\u{fff}'),
- ('\u{10c6}', '\u{10c6}'),
- ('\u{10c8}', '\u{10cc}'),
- ('\u{10ce}', '\u{10cf}'),
- ('\u{1249}', '\u{1249}'),
- ('\u{124e}', '\u{124f}'),
- ('\u{1257}', '\u{1257}'),
- ('\u{1259}', '\u{1259}'),
- ('\u{125e}', '\u{125f}'),
- ('\u{1289}', '\u{1289}'),
- ('\u{128e}', '\u{128f}'),
- ('\u{12b1}', '\u{12b1}'),
- ('\u{12b6}', '\u{12b7}'),
- ('\u{12bf}', '\u{12bf}'),
- ('\u{12c1}', '\u{12c1}'),
- ('\u{12c6}', '\u{12c7}'),
- ('\u{12d7}', '\u{12d7}'),
- ('\u{1311}', '\u{1311}'),
- ('\u{1316}', '\u{1317}'),
- ('\u{135b}', '\u{135c}'),
- ('\u{137d}', '\u{137f}'),
- ('\u{139a}', '\u{139f}'),
- ('\u{13f6}', '\u{13f7}'),
- ('\u{13fe}', '\u{13ff}'),
- ('\u{169d}', '\u{169f}'),
- ('\u{16f9}', '\u{16ff}'),
- ('\u{1716}', '\u{171e}'),
- ('\u{1737}', '\u{173f}'),
- ('\u{1754}', '\u{175f}'),
- ('\u{176d}', '\u{176d}'),
- ('\u{1771}', '\u{1771}'),
- ('\u{1774}', '\u{177f}'),
- ('\u{17de}', '\u{17df}'),
- ('\u{17ea}', '\u{17ef}'),
- ('\u{17fa}', '\u{17ff}'),
- ('\u{180e}', '\u{180e}'),
- ('\u{181a}', '\u{181f}'),
- ('\u{1879}', '\u{187f}'),
- ('\u{18ab}', '\u{18af}'),
- ('\u{18f6}', '\u{18ff}'),
- ('\u{191f}', '\u{191f}'),
- ('\u{192c}', '\u{192f}'),
- ('\u{193c}', '\u{193f}'),
- ('\u{1941}', '\u{1943}'),
- ('\u{196e}', '\u{196f}'),
- ('\u{1975}', '\u{197f}'),
- ('\u{19ac}', '\u{19af}'),
- ('\u{19ca}', '\u{19cf}'),
- ('\u{19db}', '\u{19dd}'),
- ('\u{1a1c}', '\u{1a1d}'),
- ('\u{1a5f}', '\u{1a5f}'),
- ('\u{1a7d}', '\u{1a7e}'),
- ('\u{1a8a}', '\u{1a8f}'),
- ('\u{1a9a}', '\u{1a9f}'),
- ('\u{1aae}', '\u{1aaf}'),
- ('\u{1acf}', '\u{1aff}'),
- ('\u{1b4d}', '\u{1b4d}'),
- ('\u{1bf4}', '\u{1bfb}'),
- ('\u{1c38}', '\u{1c3a}'),
- ('\u{1c4a}', '\u{1c4c}'),
- ('\u{1c8b}', '\u{1c8f}'),
- ('\u{1cbb}', '\u{1cbc}'),
- ('\u{1cc8}', '\u{1ccf}'),
- ('\u{1cfb}', '\u{1cff}'),
- ('\u{1f16}', '\u{1f17}'),
- ('\u{1f1e}', '\u{1f1f}'),
- ('\u{1f46}', '\u{1f47}'),
- ('\u{1f4e}', '\u{1f4f}'),
- ('\u{1f58}', '\u{1f58}'),
- ('\u{1f5a}', '\u{1f5a}'),
- ('\u{1f5c}', '\u{1f5c}'),
- ('\u{1f5e}', '\u{1f5e}'),
- ('\u{1f7e}', '\u{1f7f}'),
- ('\u{1fb5}', '\u{1fb5}'),
- ('\u{1fc5}', '\u{1fc5}'),
- ('\u{1fd4}', '\u{1fd5}'),
- ('\u{1fdc}', '\u{1fdc}'),
- ('\u{1ff0}', '\u{1ff1}'),
- ('\u{1ff5}', '\u{1ff5}'),
- ('\u{1fff}', '\u{1fff}'),
- ('\u{200b}', '\u{200f}'),
- ('\u{202a}', '\u{202e}'),
- ('\u{2060}', '\u{206f}'),
- ('\u{2072}', '\u{2073}'),
- ('\u{208f}', '\u{208f}'),
- ('\u{209d}', '\u{209f}'),
- ('\u{20c1}', '\u{20cf}'),
- ('\u{20f1}', '\u{20ff}'),
- ('\u{218c}', '\u{218f}'),
- ('\u{242a}', '\u{243f}'),
- ('\u{244b}', '\u{245f}'),
- ('\u{2b74}', '\u{2b75}'),
- ('\u{2b96}', '\u{2b96}'),
- ('\u{2cf4}', '\u{2cf8}'),
- ('\u{2d26}', '\u{2d26}'),
- ('\u{2d28}', '\u{2d2c}'),
- ('\u{2d2e}', '\u{2d2f}'),
- ('\u{2d68}', '\u{2d6e}'),
- ('\u{2d71}', '\u{2d7e}'),
- ('\u{2d97}', '\u{2d9f}'),
- ('\u{2da7}', '\u{2da7}'),
- ('\u{2daf}', '\u{2daf}'),
- ('\u{2db7}', '\u{2db7}'),
- ('\u{2dbf}', '\u{2dbf}'),
- ('\u{2dc7}', '\u{2dc7}'),
- ('\u{2dcf}', '\u{2dcf}'),
- ('\u{2dd7}', '\u{2dd7}'),
- ('\u{2ddf}', '\u{2ddf}'),
- ('\u{2e5e}', '\u{2e7f}'),
- ('\u{2e9a}', '\u{2e9a}'),
- ('\u{2ef4}', '\u{2eff}'),
- ('\u{2fd6}', '\u{2fef}'),
- ('\u{3040}', '\u{3040}'),
- ('\u{3097}', '\u{3098}'),
- ('\u{3100}', '\u{3104}'),
- ('\u{3130}', '\u{3130}'),
- ('\u{318f}', '\u{318f}'),
- ('\u{31e6}', '\u{31ee}'),
- ('\u{321f}', '\u{321f}'),
- ('\u{a48d}', '\u{a48f}'),
- ('\u{a4c7}', '\u{a4cf}'),
- ('\u{a62c}', '\u{a63f}'),
- ('\u{a6f8}', '\u{a6ff}'),
- ('\u{a7ce}', '\u{a7cf}'),
- ('\u{a7d2}', '\u{a7d2}'),
- ('\u{a7d4}', '\u{a7d4}'),
- ('\u{a7dd}', '\u{a7f1}'),
- ('\u{a82d}', '\u{a82f}'),
- ('\u{a83a}', '\u{a83f}'),
- ('\u{a878}', '\u{a87f}'),
- ('\u{a8c6}', '\u{a8cd}'),
- ('\u{a8da}', '\u{a8df}'),
- ('\u{a954}', '\u{a95e}'),
- ('\u{a97d}', '\u{a97f}'),
- ('\u{a9ce}', '\u{a9ce}'),
- ('\u{a9da}', '\u{a9dd}'),
- ('\u{a9ff}', '\u{a9ff}'),
- ('\u{aa37}', '\u{aa3f}'),
- ('\u{aa4e}', '\u{aa4f}'),
- ('\u{aa5a}', '\u{aa5b}'),
- ('\u{aac3}', '\u{aada}'),
- ('\u{aaf7}', '\u{ab00}'),
- ('\u{ab07}', '\u{ab08}'),
- ('\u{ab0f}', '\u{ab10}'),
- ('\u{ab17}', '\u{ab1f}'),
- ('\u{ab27}', '\u{ab27}'),
- ('\u{ab2f}', '\u{ab2f}'),
- ('\u{ab6c}', '\u{ab6f}'),
- ('\u{abee}', '\u{abef}'),
- ('\u{abfa}', '\u{abff}'),
- ('\u{d7a4}', '\u{d7af}'),
- ('\u{d7c7}', '\u{d7ca}'),
- ('\u{d7fc}', '\u{f8ff}'),
- ('\u{fa6e}', '\u{fa6f}'),
- ('\u{fada}', '\u{faff}'),
- ('\u{fb07}', '\u{fb12}'),
- ('\u{fb18}', '\u{fb1c}'),
- ('\u{fb37}', '\u{fb37}'),
- ('\u{fb3d}', '\u{fb3d}'),
- ('\u{fb3f}', '\u{fb3f}'),
- ('\u{fb42}', '\u{fb42}'),
- ('\u{fb45}', '\u{fb45}'),
- ('\u{fbc3}', '\u{fbd2}'),
- ('\u{fd90}', '\u{fd91}'),
- ('\u{fdc8}', '\u{fdce}'),
- ('\u{fdd0}', '\u{fdef}'),
- ('\u{fe1a}', '\u{fe1f}'),
- ('\u{fe53}', '\u{fe53}'),
- ('\u{fe67}', '\u{fe67}'),
- ('\u{fe6c}', '\u{fe6f}'),
- ('\u{fe75}', '\u{fe75}'),
- ('\u{fefd}', '\u{ff00}'),
- ('\u{ffbf}', '\u{ffc1}'),
- ('\u{ffc8}', '\u{ffc9}'),
- ('\u{ffd0}', '\u{ffd1}'),
- ('\u{ffd8}', '\u{ffd9}'),
- ('\u{ffdd}', '\u{ffdf}'),
- ('\u{ffe7}', '\u{ffe7}'),
- ('\u{ffef}', '\u{fffb}'),
- ('\u{fffe}', '\u{ffff}'),
- ('\u{1000c}', '\u{1000c}'),
- ('\u{10027}', '\u{10027}'),
- ('\u{1003b}', '\u{1003b}'),
- ('\u{1003e}', '\u{1003e}'),
- ('\u{1004e}', '\u{1004f}'),
- ('\u{1005e}', '\u{1007f}'),
- ('\u{100fb}', '\u{100ff}'),
- ('\u{10103}', '\u{10106}'),
- ('\u{10134}', '\u{10136}'),
- ('\u{1018f}', '\u{1018f}'),
- ('\u{1019d}', '\u{1019f}'),
- ('\u{101a1}', '\u{101cf}'),
- ('\u{101fe}', '\u{1027f}'),
- ('\u{1029d}', '\u{1029f}'),
- ('\u{102d1}', '\u{102df}'),
- ('\u{102fc}', '\u{102ff}'),
- ('\u{10324}', '\u{1032c}'),
- ('\u{1034b}', '\u{1034f}'),
- ('\u{1037b}', '\u{1037f}'),
- ('\u{1039e}', '\u{1039e}'),
- ('\u{103c4}', '\u{103c7}'),
- ('\u{103d6}', '\u{103ff}'),
- ('\u{1049e}', '\u{1049f}'),
- ('\u{104aa}', '\u{104af}'),
- ('\u{104d4}', '\u{104d7}'),
- ('\u{104fc}', '\u{104ff}'),
- ('\u{10528}', '\u{1052f}'),
- ('\u{10564}', '\u{1056e}'),
- ('\u{1057b}', '\u{1057b}'),
- ('\u{1058b}', '\u{1058b}'),
- ('\u{10593}', '\u{10593}'),
- ('\u{10596}', '\u{10596}'),
- ('\u{105a2}', '\u{105a2}'),
- ('\u{105b2}', '\u{105b2}'),
- ('\u{105ba}', '\u{105ba}'),
- ('\u{105bd}', '\u{105bf}'),
- ('\u{105f4}', '\u{105ff}'),
- ('\u{10737}', '\u{1073f}'),
- ('\u{10756}', '\u{1075f}'),
- ('\u{10768}', '\u{1077f}'),
- ('\u{10786}', '\u{10786}'),
- ('\u{107b1}', '\u{107b1}'),
- ('\u{107bb}', '\u{107ff}'),
- ('\u{10806}', '\u{10807}'),
- ('\u{10809}', '\u{10809}'),
- ('\u{10836}', '\u{10836}'),
- ('\u{10839}', '\u{1083b}'),
- ('\u{1083d}', '\u{1083e}'),
- ('\u{10856}', '\u{10856}'),
- ('\u{1089f}', '\u{108a6}'),
- ('\u{108b0}', '\u{108df}'),
- ('\u{108f3}', '\u{108f3}'),
- ('\u{108f6}', '\u{108fa}'),
- ('\u{1091c}', '\u{1091e}'),
- ('\u{1093a}', '\u{1093e}'),
- ('\u{10940}', '\u{1097f}'),
- ('\u{109b8}', '\u{109bb}'),
- ('\u{109d0}', '\u{109d1}'),
- ('\u{10a04}', '\u{10a04}'),
- ('\u{10a07}', '\u{10a0b}'),
- ('\u{10a14}', '\u{10a14}'),
- ('\u{10a18}', '\u{10a18}'),
- ('\u{10a36}', '\u{10a37}'),
- ('\u{10a3b}', '\u{10a3e}'),
- ('\u{10a49}', '\u{10a4f}'),
- ('\u{10a59}', '\u{10a5f}'),
- ('\u{10aa0}', '\u{10abf}'),
- ('\u{10ae7}', '\u{10aea}'),
- ('\u{10af7}', '\u{10aff}'),
- ('\u{10b36}', '\u{10b38}'),
- ('\u{10b56}', '\u{10b57}'),
- ('\u{10b73}', '\u{10b77}'),
- ('\u{10b92}', '\u{10b98}'),
- ('\u{10b9d}', '\u{10ba8}'),
- ('\u{10bb0}', '\u{10bff}'),
- ('\u{10c49}', '\u{10c7f}'),
- ('\u{10cb3}', '\u{10cbf}'),
- ('\u{10cf3}', '\u{10cf9}'),
- ('\u{10d28}', '\u{10d2f}'),
- ('\u{10d3a}', '\u{10d3f}'),
- ('\u{10d66}', '\u{10d68}'),
- ('\u{10d86}', '\u{10d8d}'),
- ('\u{10d90}', '\u{10e5f}'),
- ('\u{10e7f}', '\u{10e7f}'),
- ('\u{10eaa}', '\u{10eaa}'),
- ('\u{10eae}', '\u{10eaf}'),
- ('\u{10eb2}', '\u{10ec1}'),
- ('\u{10ec5}', '\u{10efb}'),
- ('\u{10f28}', '\u{10f2f}'),
- ('\u{10f5a}', '\u{10f6f}'),
- ('\u{10f8a}', '\u{10faf}'),
- ('\u{10fcc}', '\u{10fdf}'),
- ('\u{10ff7}', '\u{10fff}'),
- ('\u{1104e}', '\u{11051}'),
- ('\u{11076}', '\u{1107e}'),
- ('\u{110bd}', '\u{110bd}'),
- ('\u{110c3}', '\u{110cf}'),
- ('\u{110e9}', '\u{110ef}'),
- ('\u{110fa}', '\u{110ff}'),
- ('\u{11135}', '\u{11135}'),
- ('\u{11148}', '\u{1114f}'),
- ('\u{11177}', '\u{1117f}'),
- ('\u{111e0}', '\u{111e0}'),
- ('\u{111f5}', '\u{111ff}'),
- ('\u{11212}', '\u{11212}'),
- ('\u{11242}', '\u{1127f}'),
- ('\u{11287}', '\u{11287}'),
- ('\u{11289}', '\u{11289}'),
- ('\u{1128e}', '\u{1128e}'),
- ('\u{1129e}', '\u{1129e}'),
- ('\u{112aa}', '\u{112af}'),
- ('\u{112eb}', '\u{112ef}'),
- ('\u{112fa}', '\u{112ff}'),
- ('\u{11304}', '\u{11304}'),
- ('\u{1130d}', '\u{1130e}'),
- ('\u{11311}', '\u{11312}'),
- ('\u{11329}', '\u{11329}'),
- ('\u{11331}', '\u{11331}'),
- ('\u{11334}', '\u{11334}'),
- ('\u{1133a}', '\u{1133a}'),
- ('\u{11345}', '\u{11346}'),
- ('\u{11349}', '\u{1134a}'),
- ('\u{1134e}', '\u{1134f}'),
- ('\u{11351}', '\u{11356}'),
- ('\u{11358}', '\u{1135c}'),
- ('\u{11364}', '\u{11365}'),
- ('\u{1136d}', '\u{1136f}'),
- ('\u{11375}', '\u{1137f}'),
- ('\u{1138a}', '\u{1138a}'),
- ('\u{1138c}', '\u{1138d}'),
- ('\u{1138f}', '\u{1138f}'),
- ('\u{113b6}', '\u{113b6}'),
- ('\u{113c1}', '\u{113c1}'),
- ('\u{113c3}', '\u{113c4}'),
- ('\u{113c6}', '\u{113c6}'),
- ('\u{113cb}', '\u{113cb}'),
- ('\u{113d6}', '\u{113d6}'),
- ('\u{113d9}', '\u{113e0}'),
- ('\u{113e3}', '\u{113ff}'),
- ('\u{1145c}', '\u{1145c}'),
- ('\u{11462}', '\u{1147f}'),
- ('\u{114c8}', '\u{114cf}'),
- ('\u{114da}', '\u{1157f}'),
- ('\u{115b6}', '\u{115b7}'),
- ('\u{115de}', '\u{115ff}'),
- ('\u{11645}', '\u{1164f}'),
- ('\u{1165a}', '\u{1165f}'),
- ('\u{1166d}', '\u{1167f}'),
- ('\u{116ba}', '\u{116bf}'),
- ('\u{116ca}', '\u{116cf}'),
- ('\u{116e4}', '\u{116ff}'),
- ('\u{1171b}', '\u{1171c}'),
- ('\u{1172c}', '\u{1172f}'),
- ('\u{11747}', '\u{117ff}'),
- ('\u{1183c}', '\u{1189f}'),
- ('\u{118f3}', '\u{118fe}'),
- ('\u{11907}', '\u{11908}'),
- ('\u{1190a}', '\u{1190b}'),
- ('\u{11914}', '\u{11914}'),
- ('\u{11917}', '\u{11917}'),
- ('\u{11936}', '\u{11936}'),
- ('\u{11939}', '\u{1193a}'),
- ('\u{11947}', '\u{1194f}'),
- ('\u{1195a}', '\u{1199f}'),
- ('\u{119a8}', '\u{119a9}'),
- ('\u{119d8}', '\u{119d9}'),
- ('\u{119e5}', '\u{119ff}'),
- ('\u{11a48}', '\u{11a4f}'),
- ('\u{11aa3}', '\u{11aaf}'),
- ('\u{11af9}', '\u{11aff}'),
- ('\u{11b0a}', '\u{11bbf}'),
- ('\u{11be2}', '\u{11bef}'),
- ('\u{11bfa}', '\u{11bff}'),
- ('\u{11c09}', '\u{11c09}'),
- ('\u{11c37}', '\u{11c37}'),
- ('\u{11c46}', '\u{11c4f}'),
- ('\u{11c6d}', '\u{11c6f}'),
- ('\u{11c90}', '\u{11c91}'),
- ('\u{11ca8}', '\u{11ca8}'),
- ('\u{11cb7}', '\u{11cff}'),
- ('\u{11d07}', '\u{11d07}'),
- ('\u{11d0a}', '\u{11d0a}'),
- ('\u{11d37}', '\u{11d39}'),
- ('\u{11d3b}', '\u{11d3b}'),
- ('\u{11d3e}', '\u{11d3e}'),
- ('\u{11d48}', '\u{11d4f}'),
- ('\u{11d5a}', '\u{11d5f}'),
- ('\u{11d66}', '\u{11d66}'),
- ('\u{11d69}', '\u{11d69}'),
- ('\u{11d8f}', '\u{11d8f}'),
- ('\u{11d92}', '\u{11d92}'),
- ('\u{11d99}', '\u{11d9f}'),
- ('\u{11daa}', '\u{11edf}'),
- ('\u{11ef9}', '\u{11eff}'),
- ('\u{11f11}', '\u{11f11}'),
- ('\u{11f3b}', '\u{11f3d}'),
- ('\u{11f5b}', '\u{11faf}'),
- ('\u{11fb1}', '\u{11fbf}'),
- ('\u{11ff2}', '\u{11ffe}'),
- ('\u{1239a}', '\u{123ff}'),
- ('\u{1246f}', '\u{1246f}'),
- ('\u{12475}', '\u{1247f}'),
- ('\u{12544}', '\u{12f8f}'),
- ('\u{12ff3}', '\u{12fff}'),
- ('\u{13430}', '\u{1343f}'),
- ('\u{13456}', '\u{1345f}'),
- ('\u{143fb}', '\u{143ff}'),
- ('\u{14647}', '\u{160ff}'),
- ('\u{1613a}', '\u{167ff}'),
- ('\u{16a39}', '\u{16a3f}'),
- ('\u{16a5f}', '\u{16a5f}'),
- ('\u{16a6a}', '\u{16a6d}'),
- ('\u{16abf}', '\u{16abf}'),
- ('\u{16aca}', '\u{16acf}'),
- ('\u{16aee}', '\u{16aef}'),
- ('\u{16af6}', '\u{16aff}'),
- ('\u{16b46}', '\u{16b4f}'),
- ('\u{16b5a}', '\u{16b5a}'),
- ('\u{16b62}', '\u{16b62}'),
- ('\u{16b78}', '\u{16b7c}'),
- ('\u{16b90}', '\u{16d3f}'),
- ('\u{16d7a}', '\u{16e3f}'),
- ('\u{16e9b}', '\u{16eff}'),
- ('\u{16f4b}', '\u{16f4e}'),
- ('\u{16f88}', '\u{16f8e}'),
- ('\u{16fa0}', '\u{16fdf}'),
- ('\u{16fe5}', '\u{16fef}'),
- ('\u{16ff2}', '\u{16fff}'),
- ('\u{187f8}', '\u{187ff}'),
- ('\u{18cd6}', '\u{18cfe}'),
- ('\u{18d09}', '\u{1afef}'),
- ('\u{1aff4}', '\u{1aff4}'),
- ('\u{1affc}', '\u{1affc}'),
- ('\u{1afff}', '\u{1afff}'),
- ('\u{1b123}', '\u{1b131}'),
- ('\u{1b133}', '\u{1b14f}'),
- ('\u{1b153}', '\u{1b154}'),
- ('\u{1b156}', '\u{1b163}'),
- ('\u{1b168}', '\u{1b16f}'),
- ('\u{1b2fc}', '\u{1bbff}'),
- ('\u{1bc6b}', '\u{1bc6f}'),
- ('\u{1bc7d}', '\u{1bc7f}'),
- ('\u{1bc89}', '\u{1bc8f}'),
- ('\u{1bc9a}', '\u{1bc9b}'),
- ('\u{1bca0}', '\u{1cbff}'),
- ('\u{1ccfa}', '\u{1ccff}'),
- ('\u{1ceb4}', '\u{1ceff}'),
- ('\u{1cf2e}', '\u{1cf2f}'),
- ('\u{1cf47}', '\u{1cf4f}'),
- ('\u{1cfc4}', '\u{1cfff}'),
- ('\u{1d0f6}', '\u{1d0ff}'),
- ('\u{1d127}', '\u{1d128}'),
- ('\u{1d173}', '\u{1d17a}'),
- ('\u{1d1eb}', '\u{1d1ff}'),
- ('\u{1d246}', '\u{1d2bf}'),
- ('\u{1d2d4}', '\u{1d2df}'),
- ('\u{1d2f4}', '\u{1d2ff}'),
- ('\u{1d357}', '\u{1d35f}'),
- ('\u{1d379}', '\u{1d3ff}'),
- ('\u{1d455}', '\u{1d455}'),
- ('\u{1d49d}', '\u{1d49d}'),
- ('\u{1d4a0}', '\u{1d4a1}'),
- ('\u{1d4a3}', '\u{1d4a4}'),
- ('\u{1d4a7}', '\u{1d4a8}'),
- ('\u{1d4ad}', '\u{1d4ad}'),
- ('\u{1d4ba}', '\u{1d4ba}'),
- ('\u{1d4bc}', '\u{1d4bc}'),
- ('\u{1d4c4}', '\u{1d4c4}'),
- ('\u{1d506}', '\u{1d506}'),
- ('\u{1d50b}', '\u{1d50c}'),
- ('\u{1d515}', '\u{1d515}'),
- ('\u{1d51d}', '\u{1d51d}'),
- ('\u{1d53a}', '\u{1d53a}'),
- ('\u{1d53f}', '\u{1d53f}'),
- ('\u{1d545}', '\u{1d545}'),
- ('\u{1d547}', '\u{1d549}'),
- ('\u{1d551}', '\u{1d551}'),
- ('\u{1d6a6}', '\u{1d6a7}'),
- ('\u{1d7cc}', '\u{1d7cd}'),
- ('\u{1da8c}', '\u{1da9a}'),
- ('\u{1daa0}', '\u{1daa0}'),
- ('\u{1dab0}', '\u{1deff}'),
- ('\u{1df1f}', '\u{1df24}'),
- ('\u{1df2b}', '\u{1dfff}'),
- ('\u{1e007}', '\u{1e007}'),
- ('\u{1e019}', '\u{1e01a}'),
- ('\u{1e022}', '\u{1e022}'),
- ('\u{1e025}', '\u{1e025}'),
- ('\u{1e02b}', '\u{1e02f}'),
- ('\u{1e06e}', '\u{1e08e}'),
- ('\u{1e090}', '\u{1e0ff}'),
- ('\u{1e12d}', '\u{1e12f}'),
- ('\u{1e13e}', '\u{1e13f}'),
- ('\u{1e14a}', '\u{1e14d}'),
- ('\u{1e150}', '\u{1e28f}'),
- ('\u{1e2af}', '\u{1e2bf}'),
- ('\u{1e2fa}', '\u{1e2fe}'),
- ('\u{1e300}', '\u{1e4cf}'),
- ('\u{1e4fa}', '\u{1e5cf}'),
- ('\u{1e5fb}', '\u{1e5fe}'),
- ('\u{1e600}', '\u{1e7df}'),
- ('\u{1e7e7}', '\u{1e7e7}'),
- ('\u{1e7ec}', '\u{1e7ec}'),
- ('\u{1e7ef}', '\u{1e7ef}'),
- ('\u{1e7ff}', '\u{1e7ff}'),
- ('\u{1e8c5}', '\u{1e8c6}'),
- ('\u{1e8d7}', '\u{1e8ff}'),
- ('\u{1e94c}', '\u{1e94f}'),
- ('\u{1e95a}', '\u{1e95d}'),
- ('\u{1e960}', '\u{1ec70}'),
- ('\u{1ecb5}', '\u{1ed00}'),
- ('\u{1ed3e}', '\u{1edff}'),
- ('\u{1ee04}', '\u{1ee04}'),
- ('\u{1ee20}', '\u{1ee20}'),
- ('\u{1ee23}', '\u{1ee23}'),
- ('\u{1ee25}', '\u{1ee26}'),
- ('\u{1ee28}', '\u{1ee28}'),
- ('\u{1ee33}', '\u{1ee33}'),
- ('\u{1ee38}', '\u{1ee38}'),
- ('\u{1ee3a}', '\u{1ee3a}'),
- ('\u{1ee3c}', '\u{1ee41}'),
- ('\u{1ee43}', '\u{1ee46}'),
- ('\u{1ee48}', '\u{1ee48}'),
- ('\u{1ee4a}', '\u{1ee4a}'),
- ('\u{1ee4c}', '\u{1ee4c}'),
- ('\u{1ee50}', '\u{1ee50}'),
- ('\u{1ee53}', '\u{1ee53}'),
- ('\u{1ee55}', '\u{1ee56}'),
- ('\u{1ee58}', '\u{1ee58}'),
- ('\u{1ee5a}', '\u{1ee5a}'),
- ('\u{1ee5c}', '\u{1ee5c}'),
- ('\u{1ee5e}', '\u{1ee5e}'),
- ('\u{1ee60}', '\u{1ee60}'),
- ('\u{1ee63}', '\u{1ee63}'),
- ('\u{1ee65}', '\u{1ee66}'),
- ('\u{1ee6b}', '\u{1ee6b}'),
- ('\u{1ee73}', '\u{1ee73}'),
- ('\u{1ee78}', '\u{1ee78}'),
- ('\u{1ee7d}', '\u{1ee7d}'),
- ('\u{1ee7f}', '\u{1ee7f}'),
- ('\u{1ee8a}', '\u{1ee8a}'),
- ('\u{1ee9c}', '\u{1eea0}'),
- ('\u{1eea4}', '\u{1eea4}'),
- ('\u{1eeaa}', '\u{1eeaa}'),
- ('\u{1eebc}', '\u{1eeef}'),
- ('\u{1eef2}', '\u{1efff}'),
- ('\u{1f02c}', '\u{1f02f}'),
- ('\u{1f094}', '\u{1f09f}'),
- ('\u{1f0af}', '\u{1f0b0}'),
- ('\u{1f0c0}', '\u{1f0c0}'),
- ('\u{1f0d0}', '\u{1f0d0}'),
- ('\u{1f0f6}', '\u{1f0ff}'),
- ('\u{1f1ae}', '\u{1f1e5}'),
- ('\u{1f203}', '\u{1f20f}'),
- ('\u{1f23c}', '\u{1f23f}'),
- ('\u{1f249}', '\u{1f24f}'),
- ('\u{1f252}', '\u{1f25f}'),
- ('\u{1f266}', '\u{1f2ff}'),
- ('\u{1f6d8}', '\u{1f6db}'),
- ('\u{1f6ed}', '\u{1f6ef}'),
- ('\u{1f6fd}', '\u{1f6ff}'),
- ('\u{1f777}', '\u{1f77a}'),
- ('\u{1f7da}', '\u{1f7df}'),
- ('\u{1f7ec}', '\u{1f7ef}'),
- ('\u{1f7f1}', '\u{1f7ff}'),
- ('\u{1f80c}', '\u{1f80f}'),
- ('\u{1f848}', '\u{1f84f}'),
- ('\u{1f85a}', '\u{1f85f}'),
- ('\u{1f888}', '\u{1f88f}'),
- ('\u{1f8ae}', '\u{1f8af}'),
- ('\u{1f8bc}', '\u{1f8bf}'),
- ('\u{1f8c2}', '\u{1f8ff}'),
- ('\u{1fa54}', '\u{1fa5f}'),
- ('\u{1fa6e}', '\u{1fa6f}'),
- ('\u{1fa7d}', '\u{1fa7f}'),
- ('\u{1fa8a}', '\u{1fa8e}'),
- ('\u{1fac7}', '\u{1facd}'),
- ('\u{1fadd}', '\u{1fade}'),
- ('\u{1faea}', '\u{1faef}'),
- ('\u{1faf9}', '\u{1faff}'),
- ('\u{1fb93}', '\u{1fb93}'),
- ('\u{1fbfa}', '\u{1ffff}'),
- ('\u{2a6e0}', '\u{2a6ff}'),
- ('\u{2b73a}', '\u{2b73f}'),
- ('\u{2b81e}', '\u{2b81f}'),
- ('\u{2cea2}', '\u{2ceaf}'),
- ('\u{2ebe1}', '\u{2ebef}'),
- ('\u{2ee5e}', '\u{2f7ff}'),
- ('\u{2fa1e}', '\u{2ffff}'),
- ('\u{3134b}', '\u{3134f}'),
- ('\u{323b0}', '\u{e00ff}'),
- ('\u{e01f0}', '\u{10ffff}'),
-];
-
-pub const OTHER_LETTER: &'static [(char, char)] = &[
- ('ª', 'ª'),
- ('º', 'º'),
- ('ƻ', 'ƻ'),
- ('ǀ', 'ǃ'),
- ('ʔ', 'ʔ'),
- ('א', 'ת'),
- ('ׯ', 'ײ'),
- ('ؠ', 'ؿ'),
- ('ف', 'ي'),
- ('ٮ', 'ٯ'),
- ('ٱ', 'ۓ'),
- ('ە', 'ە'),
- ('ۮ', 'ۯ'),
- ('ۺ', 'ۼ'),
- ('ۿ', 'ۿ'),
- ('ܐ', 'ܐ'),
- ('ܒ', 'ܯ'),
- ('ݍ', 'ޥ'),
- ('ޱ', 'ޱ'),
- ('ߊ', 'ߪ'),
- ('ࠀ', 'ࠕ'),
- ('ࡀ', 'ࡘ'),
- ('ࡠ', 'ࡪ'),
- ('ࡰ', 'ࢇ'),
- ('ࢉ', 'ࢎ'),
- ('ࢠ', 'ࣈ'),
- ('ऄ', 'ह'),
- ('ऽ', 'ऽ'),
- ('ॐ', 'ॐ'),
- ('क़', 'ॡ'),
- ('ॲ', 'ঀ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('ঽ', 'ঽ'),
- ('ৎ', 'ৎ'),
- ('ড়', 'ঢ়'),
- ('য়', 'ৡ'),
- ('ৰ', 'ৱ'),
- ('ৼ', 'ৼ'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('ੲ', 'ੴ'),
- ('અ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('ઽ', 'ઽ'),
- ('ૐ', 'ૐ'),
- ('ૠ', 'ૡ'),
- ('ૹ', 'ૹ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('ଽ', 'ଽ'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', 'ୡ'),
- ('ୱ', 'ୱ'),
- ('ஃ', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'ஹ'),
- ('ௐ', 'ௐ'),
- ('అ', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'హ'),
- ('ఽ', 'ఽ'),
- ('ౘ', 'ౚ'),
- ('ౝ', 'ౝ'),
- ('ౠ', 'ౡ'),
- ('ಀ', 'ಀ'),
- ('ಅ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('ಽ', 'ಽ'),
- ('ೝ', 'ೞ'),
- ('ೠ', 'ೡ'),
- ('ೱ', 'ೲ'),
- ('ഄ', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', 'ഺ'),
- ('ഽ', 'ഽ'),
- ('ൎ', 'ൎ'),
- ('ൔ', 'ൖ'),
- ('ൟ', 'ൡ'),
- ('ൺ', 'ൿ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('ก', 'ะ'),
- ('า', 'ำ'),
- ('เ', 'ๅ'),
- ('ກ', 'ຂ'),
- ('ຄ', 'ຄ'),
- ('ຆ', 'ຊ'),
- ('ຌ', 'ຣ'),
- ('ລ', 'ລ'),
- ('ວ', 'ະ'),
- ('າ', 'ຳ'),
- ('ຽ', 'ຽ'),
- ('ເ', 'ໄ'),
- ('ໜ', 'ໟ'),
- ('ༀ', 'ༀ'),
- ('ཀ', 'ཇ'),
- ('ཉ', 'ཬ'),
- ('ྈ', 'ྌ'),
- ('က', 'ဪ'),
- ('ဿ', 'ဿ'),
- ('ၐ', 'ၕ'),
- ('ၚ', 'ၝ'),
- ('ၡ', 'ၡ'),
- ('ၥ', 'ၦ'),
- ('ၮ', 'ၰ'),
- ('ၵ', 'ႁ'),
- ('ႎ', 'ႎ'),
- ('ᄀ', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዖ'),
- ('ዘ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ፚ'),
- ('ᎀ', 'ᎏ'),
- ('ᐁ', 'ᙬ'),
- ('ᙯ', 'ᙿ'),
- ('ᚁ', 'ᚚ'),
- ('ᚠ', 'ᛪ'),
- ('ᛱ', 'ᛸ'),
- ('ᜀ', 'ᜑ'),
- ('ᜟ', 'ᜱ'),
- ('ᝀ', 'ᝑ'),
- ('ᝠ', 'ᝬ'),
- ('ᝮ', 'ᝰ'),
- ('ក', 'ឳ'),
- ('ៜ', 'ៜ'),
- ('ᠠ', 'ᡂ'),
- ('ᡄ', 'ᡸ'),
- ('ᢀ', 'ᢄ'),
- ('ᢇ', 'ᢨ'),
- ('ᢪ', 'ᢪ'),
- ('ᢰ', 'ᣵ'),
- ('ᤀ', 'ᤞ'),
- ('ᥐ', 'ᥭ'),
- ('ᥰ', 'ᥴ'),
- ('ᦀ', 'ᦫ'),
- ('ᦰ', 'ᧉ'),
- ('ᨀ', 'ᨖ'),
- ('ᨠ', 'ᩔ'),
- ('ᬅ', 'ᬳ'),
- ('ᭅ', 'ᭌ'),
- ('ᮃ', 'ᮠ'),
- ('ᮮ', 'ᮯ'),
- ('ᮺ', 'ᯥ'),
- ('ᰀ', 'ᰣ'),
- ('ᱍ', 'ᱏ'),
- ('ᱚ', 'ᱷ'),
- ('ᳩ', 'ᳬ'),
- ('ᳮ', 'ᳳ'),
- ('ᳵ', 'ᳶ'),
- ('ᳺ', 'ᳺ'),
- ('ℵ', 'ℸ'),
- ('ⴰ', 'ⵧ'),
- ('ⶀ', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('〆', '〆'),
- ('〼', '〼'),
- ('ぁ', 'ゖ'),
- ('ゟ', 'ゟ'),
- ('ァ', 'ヺ'),
- ('ヿ', 'ヿ'),
- ('ㄅ', 'ㄯ'),
- ('ㄱ', 'ㆎ'),
- ('ㆠ', 'ㆿ'),
- ('ㇰ', 'ㇿ'),
- ('㐀', '䶿'),
- ('一', 'ꀔ'),
- ('ꀖ', 'ꒌ'),
- ('ꓐ', 'ꓷ'),
- ('ꔀ', 'ꘋ'),
- ('ꘐ', 'ꘟ'),
- ('ꘪ', 'ꘫ'),
- ('ꙮ', 'ꙮ'),
- ('ꚠ', 'ꛥ'),
- ('ꞏ', 'ꞏ'),
- ('ꟷ', 'ꟷ'),
- ('ꟻ', 'ꠁ'),
- ('ꠃ', 'ꠅ'),
- ('ꠇ', 'ꠊ'),
- ('ꠌ', 'ꠢ'),
- ('ꡀ', 'ꡳ'),
- ('ꢂ', 'ꢳ'),
- ('ꣲ', 'ꣷ'),
- ('ꣻ', 'ꣻ'),
- ('ꣽ', 'ꣾ'),
- ('ꤊ', 'ꤥ'),
- ('ꤰ', 'ꥆ'),
- ('ꥠ', 'ꥼ'),
- ('ꦄ', 'ꦲ'),
- ('ꧠ', 'ꧤ'),
- ('ꧧ', 'ꧯ'),
- ('ꧺ', 'ꧾ'),
- ('ꨀ', 'ꨨ'),
- ('ꩀ', 'ꩂ'),
- ('ꩄ', 'ꩋ'),
- ('ꩠ', 'ꩯ'),
- ('ꩱ', 'ꩶ'),
- ('ꩺ', 'ꩺ'),
- ('ꩾ', 'ꪯ'),
- ('ꪱ', 'ꪱ'),
- ('ꪵ', 'ꪶ'),
- ('ꪹ', 'ꪽ'),
- ('ꫀ', 'ꫀ'),
- ('ꫂ', 'ꫂ'),
- ('ꫛ', 'ꫜ'),
- ('ꫠ', 'ꫪ'),
- ('ꫲ', 'ꫲ'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('ꯀ', 'ꯢ'),
- ('가', '힣'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('豈', '舘'),
- ('並', '龎'),
- ('יִ', 'יִ'),
- ('ײַ', 'ﬨ'),
- ('שׁ', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', 'ﮱ'),
- ('ﯓ', 'ﴽ'),
- ('ﵐ', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('ﷰ', 'ﷻ'),
- ('ﹰ', 'ﹴ'),
- ('ﹶ', 'ﻼ'),
- ('ヲ', 'ッ'),
- ('ア', 'ン'),
- ('ᅠ', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
- ('𐊀', '𐊜'),
- ('𐊠', '𐋐'),
- ('𐌀', '𐌟'),
- ('𐌭', '𐍀'),
- ('𐍂', '𐍉'),
- ('𐍐', '𐍵'),
- ('𐎀', '𐎝'),
- ('𐎠', '𐏃'),
- ('𐏈', '𐏏'),
- ('𐑐', '𐒝'),
- ('𐔀', '𐔧'),
- ('𐔰', '𐕣'),
- ('𐗀', '𐗳'),
- ('𐘀', '𐜶'),
- ('𐝀', '𐝕'),
- ('𐝠', '𐝧'),
- ('𐠀', '𐠅'),
- ('𐠈', '𐠈'),
- ('𐠊', '𐠵'),
- ('𐠷', '𐠸'),
- ('𐠼', '𐠼'),
- ('𐠿', '𐡕'),
- ('𐡠', '𐡶'),
- ('𐢀', '𐢞'),
- ('𐣠', '𐣲'),
- ('𐣴', '𐣵'),
- ('𐤀', '𐤕'),
- ('𐤠', '𐤹'),
- ('𐦀', '𐦷'),
- ('𐦾', '𐦿'),
- ('𐨀', '𐨀'),
- ('𐨐', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨵'),
- ('𐩠', '𐩼'),
- ('𐪀', '𐪜'),
- ('𐫀', '𐫇'),
- ('𐫉', '𐫤'),
- ('𐬀', '𐬵'),
- ('𐭀', '𐭕'),
- ('𐭠', '𐭲'),
- ('𐮀', '𐮑'),
- ('𐰀', '𐱈'),
- ('𐴀', '𐴣'),
- ('𐵊', '𐵍'),
- ('𐵏', '𐵏'),
- ('𐺀', '𐺩'),
- ('𐺰', '𐺱'),
- ('𐻂', '𐻄'),
- ('𐼀', '𐼜'),
- ('𐼧', '𐼧'),
- ('𐼰', '𐽅'),
- ('𐽰', '𐾁'),
- ('𐾰', '𐿄'),
- ('𐿠', '𐿶'),
- ('𑀃', '𑀷'),
- ('𑁱', '𑁲'),
- ('𑁵', '𑁵'),
- ('𑂃', '𑂯'),
- ('𑃐', '𑃨'),
- ('𑄃', '𑄦'),
- ('𑅄', '𑅄'),
- ('𑅇', '𑅇'),
- ('𑅐', '𑅲'),
- ('𑅶', '𑅶'),
- ('𑆃', '𑆲'),
- ('𑇁', '𑇄'),
- ('𑇚', '𑇚'),
- ('𑇜', '𑇜'),
- ('𑈀', '𑈑'),
- ('𑈓', '𑈫'),
- ('𑈿', '𑉀'),
- ('𑊀', '𑊆'),
- ('𑊈', '𑊈'),
- ('𑊊', '𑊍'),
- ('𑊏', '𑊝'),
- ('𑊟', '𑊨'),
- ('𑊰', '𑋞'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('𑌽', '𑌽'),
- ('𑍐', '𑍐'),
- ('𑍝', '𑍡'),
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '𑎷'),
- ('𑏑', '𑏑'),
- ('𑏓', '𑏓'),
- ('𑐀', '𑐴'),
- ('𑑇', '𑑊'),
- ('𑑟', '𑑡'),
- ('𑒀', '𑒯'),
- ('𑓄', '𑓅'),
- ('𑓇', '𑓇'),
- ('𑖀', '𑖮'),
- ('𑗘', '𑗛'),
- ('𑘀', '𑘯'),
- ('𑙄', '𑙄'),
- ('𑚀', '𑚪'),
- ('𑚸', '𑚸'),
- ('𑜀', '𑜚'),
- ('𑝀', '𑝆'),
- ('𑠀', '𑠫'),
- ('𑣿', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤯'),
- ('𑤿', '𑤿'),
- ('𑥁', '𑥁'),
- ('𑦠', '𑦧'),
- ('𑦪', '𑧐'),
- ('𑧡', '𑧡'),
- ('𑧣', '𑧣'),
- ('𑨀', '𑨀'),
- ('𑨋', '𑨲'),
- ('𑨺', '𑨺'),
- ('𑩐', '𑩐'),
- ('𑩜', '𑪉'),
- ('𑪝', '𑪝'),
- ('𑪰', '𑫸'),
- ('𑯀', '𑯠'),
- ('𑰀', '𑰈'),
- ('𑰊', '𑰮'),
- ('𑱀', '𑱀'),
- ('𑱲', '𑲏'),
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '𑴰'),
- ('𑵆', '𑵆'),
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶉'),
- ('𑶘', '𑶘'),
- ('𑻠', '𑻲'),
- ('𑼂', '𑼂'),
- ('𑼄', '𑼐'),
- ('𑼒', '𑼳'),
- ('𑾰', '𑾰'),
- ('𒀀', '𒎙'),
- ('𒒀', '𒕃'),
- ('𒾐', '𒿰'),
- ('𓀀', '𓐯'),
- ('𓑁', '𓑆'),
- ('𓑠', '𔏺'),
- ('𔐀', '𔙆'),
- ('𖄀', '𖄝'),
- ('𖠀', '𖨸'),
- ('𖩀', '𖩞'),
- ('𖩰', '𖪾'),
- ('𖫐', '𖫭'),
- ('𖬀', '𖬯'),
- ('𖭣', '𖭷'),
- ('𖭽', '𖮏'),
- ('𖵃', '𖵪'),
- ('𖼀', '𖽊'),
- ('𖽐', '𖽐'),
- ('𗀀', '𘟷'),
- ('𘠀', '𘳕'),
- ('𘳿', '𘴈'),
- ('𛀀', '𛄢'),
- ('𛄲', '𛄲'),
- ('𛅐', '𛅒'),
- ('𛅕', '𛅕'),
- ('𛅤', '𛅧'),
- ('𛅰', '𛋻'),
- ('𛰀', '𛱪'),
- ('𛱰', '𛱼'),
- ('𛲀', '𛲈'),
- ('𛲐', '𛲙'),
- ('𝼊', '𝼊'),
- ('𞄀', '𞄬'),
- ('𞅎', '𞅎'),
- ('𞊐', '𞊭'),
- ('𞋀', '𞋫'),
- ('𞓐', '𞓪'),
- ('𞗐', '𞗭'),
- ('𞗰', '𞗰'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
- ('𞠀', '𞣄'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('丽', '𪘀'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
-];
-
-pub const OTHER_NUMBER: &'static [(char, char)] = &[
- ('²', '³'),
- ('¹', '¹'),
- ('¼', '¾'),
- ('৴', '৹'),
- ('୲', '୷'),
- ('௰', '௲'),
- ('౸', '౾'),
- ('൘', '൞'),
- ('൰', '൸'),
- ('༪', '༳'),
- ('፩', '፼'),
- ('៰', '៹'),
- ('᧚', '᧚'),
- ('⁰', '⁰'),
- ('⁴', '⁹'),
- ('₀', '₉'),
- ('⅐', '⅟'),
- ('↉', '↉'),
- ('①', '⒛'),
- ('⓪', '⓿'),
- ('❶', '➓'),
- ('⳽', '⳽'),
- ('㆒', '㆕'),
- ('㈠', '㈩'),
- ('㉈', '㉏'),
- ('㉑', '㉟'),
- ('㊀', '㊉'),
- ('㊱', '㊿'),
- ('꠰', '꠵'),
- ('𐄇', '𐄳'),
- ('𐅵', '𐅸'),
- ('𐆊', '𐆋'),
- ('𐋡', '𐋻'),
- ('𐌠', '𐌣'),
- ('𐡘', '𐡟'),
- ('𐡹', '𐡿'),
- ('𐢧', '𐢯'),
- ('𐣻', '𐣿'),
- ('𐤖', '𐤛'),
- ('𐦼', '𐦽'),
- ('𐧀', '𐧏'),
- ('𐧒', '𐧿'),
- ('𐩀', '𐩈'),
- ('𐩽', '𐩾'),
- ('𐪝', '𐪟'),
- ('𐫫', '𐫯'),
- ('𐭘', '𐭟'),
- ('𐭸', '𐭿'),
- ('𐮩', '𐮯'),
- ('𐳺', '𐳿'),
- ('𐹠', '𐹾'),
- ('𐼝', '𐼦'),
- ('𐽑', '𐽔'),
- ('𐿅', '𐿋'),
- ('𑁒', '𑁥'),
- ('𑇡', '𑇴'),
- ('𑜺', '𑜻'),
- ('𑣪', '𑣲'),
- ('𑱚', '𑱬'),
- ('𑿀', '𑿔'),
- ('𖭛', '𖭡'),
- ('𖺀', '𖺖'),
- ('𝋀', '𝋓'),
- ('𝋠', '𝋳'),
- ('𝍠', '𝍸'),
- ('𞣇', '𞣏'),
- ('𞱱', '𞲫'),
- ('𞲭', '𞲯'),
- ('𞲱', '𞲴'),
- ('𞴁', '𞴭'),
- ('𞴯', '𞴽'),
- ('🄀', '🄌'),
-];
-
-pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[
- ('!', '#'),
- ('%', '\''),
- ('*', '*'),
- (',', ','),
- ('.', '/'),
- (':', ';'),
- ('?', '@'),
- ('\\', '\\'),
- ('¡', '¡'),
- ('§', '§'),
- ('¶', '·'),
- ('¿', '¿'),
- (';', ';'),
- ('·', '·'),
- ('՚', '՟'),
- ('։', '։'),
- ('׀', '׀'),
- ('׃', '׃'),
- ('׆', '׆'),
- ('׳', '״'),
- ('؉', '؊'),
- ('،', '؍'),
- ('؛', '؛'),
- ('؝', '؟'),
- ('٪', '٭'),
- ('۔', '۔'),
- ('܀', '܍'),
- ('߷', '߹'),
- ('࠰', '࠾'),
- ('࡞', '࡞'),
- ('।', '॥'),
- ('॰', '॰'),
- ('৽', '৽'),
- ('੶', '੶'),
- ('૰', '૰'),
- ('౷', '౷'),
- ('಄', '಄'),
- ('෴', '෴'),
- ('๏', '๏'),
- ('๚', '๛'),
- ('༄', '༒'),
- ('༔', '༔'),
- ('྅', '྅'),
- ('࿐', '࿔'),
- ('࿙', '࿚'),
- ('၊', '၏'),
- ('჻', '჻'),
- ('፠', '፨'),
- ('᙮', '᙮'),
- ('᛫', '᛭'),
- ('᜵', '᜶'),
- ('។', '៖'),
- ('៘', '៚'),
- ('᠀', '᠅'),
- ('᠇', '᠊'),
- ('᥄', '᥅'),
- ('᨞', '᨟'),
- ('᪠', '᪦'),
- ('᪨', '᪭'),
- ('᭎', '᭏'),
- ('᭚', '᭠'),
- ('᭽', '᭿'),
- ('᯼', '᯿'),
- ('᰻', '᰿'),
- ('᱾', '᱿'),
- ('᳀', '᳇'),
- ('᳓', '᳓'),
- ('‖', '‗'),
- ('†', '‧'),
- ('‰', '‸'),
- ('※', '‾'),
- ('⁁', '⁃'),
- ('⁇', '⁑'),
- ('⁓', '⁓'),
- ('⁕', '⁞'),
- ('⳹', '⳼'),
- ('⳾', '⳿'),
- ('⵰', '⵰'),
- ('⸀', '⸁'),
- ('⸆', '⸈'),
- ('⸋', '⸋'),
- ('⸎', '⸖'),
- ('⸘', '⸙'),
- ('⸛', '⸛'),
- ('⸞', '⸟'),
- ('⸪', '⸮'),
- ('⸰', '⸹'),
- ('⸼', '⸿'),
- ('⹁', '⹁'),
- ('⹃', '⹏'),
- ('⹒', '⹔'),
- ('、', '〃'),
- ('〽', '〽'),
- ('・', '・'),
- ('꓾', '꓿'),
- ('꘍', '꘏'),
- ('꙳', '꙳'),
- ('꙾', '꙾'),
- ('꛲', '꛷'),
- ('꡴', '꡷'),
- ('꣎', '꣏'),
- ('꣸', '꣺'),
- ('꣼', '꣼'),
- ('꤮', '꤯'),
- ('꥟', '꥟'),
- ('꧁', '꧍'),
- ('꧞', '꧟'),
- ('꩜', '꩟'),
- ('꫞', '꫟'),
- ('꫰', '꫱'),
- ('꯫', '꯫'),
- ('︐', '︖'),
- ('︙', '︙'),
- ('︰', '︰'),
- ('﹅', '﹆'),
- ('﹉', '﹌'),
- ('﹐', '﹒'),
- ('﹔', '﹗'),
- ('﹟', '﹡'),
- ('﹨', '﹨'),
- ('﹪', '﹫'),
- ('!', '#'),
- ('%', '''),
- ('*', '*'),
- (',', ','),
- ('.', '/'),
- (':', ';'),
- ('?', '@'),
- ('\', '\'),
- ('。', '。'),
- ('、', '・'),
- ('𐄀', '𐄂'),
- ('𐎟', '𐎟'),
- ('𐏐', '𐏐'),
- ('𐕯', '𐕯'),
- ('𐡗', '𐡗'),
- ('𐤟', '𐤟'),
- ('𐤿', '𐤿'),
- ('𐩐', '𐩘'),
- ('𐩿', '𐩿'),
- ('𐫰', '𐫶'),
- ('𐬹', '𐬿'),
- ('𐮙', '𐮜'),
- ('𐽕', '𐽙'),
- ('𐾆', '𐾉'),
- ('𑁇', '𑁍'),
- ('𑂻', '𑂼'),
- ('𑂾', '𑃁'),
- ('𑅀', '𑅃'),
- ('𑅴', '𑅵'),
- ('𑇅', '𑇈'),
- ('𑇍', '𑇍'),
- ('𑇛', '𑇛'),
- ('𑇝', '𑇟'),
- ('𑈸', '𑈽'),
- ('𑊩', '𑊩'),
- ('𑏔', '𑏕'),
- ('𑏗', '𑏘'),
- ('𑑋', '𑑏'),
- ('𑑚', '𑑛'),
- ('𑑝', '𑑝'),
- ('𑓆', '𑓆'),
- ('𑗁', '𑗗'),
- ('𑙁', '𑙃'),
- ('𑙠', '𑙬'),
- ('𑚹', '𑚹'),
- ('𑜼', '𑜾'),
- ('𑠻', '𑠻'),
- ('𑥄', '𑥆'),
- ('𑧢', '𑧢'),
- ('𑨿', '𑩆'),
- ('𑪚', '𑪜'),
- ('𑪞', '𑪢'),
- ('𑬀', '𑬉'),
- ('𑯡', '𑯡'),
- ('𑱁', '𑱅'),
- ('𑱰', '𑱱'),
- ('𑻷', '𑻸'),
- ('𑽃', '𑽏'),
- ('𑿿', '𑿿'),
- ('𒑰', '𒑴'),
- ('𒿱', '𒿲'),
- ('𖩮', '𖩯'),
- ('𖫵', '𖫵'),
- ('𖬷', '𖬻'),
- ('𖭄', '𖭄'),
- ('𖵭', '𖵯'),
- ('𖺗', '𖺚'),
- ('𖿢', '𖿢'),
- ('𛲟', '𛲟'),
- ('𝪇', '𝪋'),
- ('𞗿', '𞗿'),
- ('𞥞', '𞥟'),
-];
-
-pub const OTHER_SYMBOL: &'static [(char, char)] = &[
- ('¦', '¦'),
- ('©', '©'),
- ('®', '®'),
- ('°', '°'),
- ('҂', '҂'),
- ('֍', '֎'),
- ('؎', '؏'),
- ('۞', '۞'),
- ('۩', '۩'),
- ('۽', '۾'),
- ('߶', '߶'),
- ('৺', '৺'),
- ('୰', '୰'),
- ('௳', '௸'),
- ('௺', '௺'),
- ('౿', '౿'),
- ('൏', '൏'),
- ('൹', '൹'),
- ('༁', '༃'),
- ('༓', '༓'),
- ('༕', '༗'),
- ('༚', '༟'),
- ('༴', '༴'),
- ('༶', '༶'),
- ('༸', '༸'),
- ('྾', '࿅'),
- ('࿇', '࿌'),
- ('࿎', '࿏'),
- ('࿕', '࿘'),
- ('႞', '႟'),
- ('᎐', '᎙'),
- ('᙭', '᙭'),
- ('᥀', '᥀'),
- ('᧞', '᧿'),
- ('᭡', '᭪'),
- ('᭴', '᭼'),
- ('℀', '℁'),
- ('℃', '℆'),
- ('℈', '℉'),
- ('℔', '℔'),
- ('№', '℗'),
- ('℞', '℣'),
- ('℥', '℥'),
- ('℧', '℧'),
- ('℩', '℩'),
- ('℮', '℮'),
- ('℺', '℻'),
- ('⅊', '⅊'),
- ('⅌', '⅍'),
- ('⅏', '⅏'),
- ('↊', '↋'),
- ('↕', '↙'),
- ('↜', '↟'),
- ('↡', '↢'),
- ('↤', '↥'),
- ('↧', '↭'),
- ('↯', '⇍'),
- ('⇐', '⇑'),
- ('⇓', '⇓'),
- ('⇕', '⇳'),
- ('⌀', '⌇'),
- ('⌌', '⌟'),
- ('⌢', '⌨'),
- ('⌫', '⍻'),
- ('⍽', '⎚'),
- ('⎴', '⏛'),
- ('⏢', '␩'),
- ('⑀', '⑊'),
- ('⒜', 'ⓩ'),
- ('─', '▶'),
- ('▸', '◀'),
- ('◂', '◷'),
- ('☀', '♮'),
- ('♰', '❧'),
- ('➔', '➿'),
- ('⠀', '⣿'),
- ('⬀', '⬯'),
- ('⭅', '⭆'),
- ('⭍', '⭳'),
- ('⭶', '⮕'),
- ('⮗', '⯿'),
- ('⳥', '⳪'),
- ('⹐', '⹑'),
- ('⺀', '⺙'),
- ('⺛', '⻳'),
- ('⼀', '⿕'),
- ('⿰', '⿿'),
- ('〄', '〄'),
- ('〒', '〓'),
- ('〠', '〠'),
- ('〶', '〷'),
- ('〾', '〿'),
- ('㆐', '㆑'),
- ('㆖', '㆟'),
- ('㇀', '㇥'),
- ('㇯', '㇯'),
- ('㈀', '㈞'),
- ('㈪', '㉇'),
- ('㉐', '㉐'),
- ('㉠', '㉿'),
- ('㊊', '㊰'),
- ('㋀', '㏿'),
- ('䷀', '䷿'),
- ('꒐', '꓆'),
- ('꠨', '꠫'),
- ('꠶', '꠷'),
- ('꠹', '꠹'),
- ('꩷', '꩹'),
- ('﵀', '﵏'),
- ('﷏', '﷏'),
- ('﷽', '﷿'),
- ('¦', '¦'),
- ('│', '│'),
- ('■', '○'),
- ('', '�'),
- ('𐄷', '𐄿'),
- ('𐅹', '𐆉'),
- ('𐆌', '𐆎'),
- ('𐆐', '𐆜'),
- ('𐆠', '𐆠'),
- ('𐇐', '𐇼'),
- ('𐡷', '𐡸'),
- ('𐫈', '𐫈'),
- ('𑜿', '𑜿'),
- ('𑿕', '𑿜'),
- ('𑿡', '𑿱'),
- ('𖬼', '𖬿'),
- ('𖭅', '𖭅'),
- ('𛲜', '𛲜'),
- ('𜰀', '𜳯'),
- ('𜴀', '𜺳'),
- ('𜽐', '𜿃'),
- ('𝀀', '𝃵'),
- ('𝄀', '𝄦'),
- ('𝄩', '𝅘𝅥𝅲'),
- ('𝅪', '𝅬'),
- ('𝆃', '𝆄'),
- ('𝆌', '𝆩'),
- ('𝆮', '𝇪'),
- ('𝈀', '𝉁'),
- ('𝉅', '𝉅'),
- ('𝌀', '𝍖'),
- ('𝠀', '𝧿'),
- ('𝨷', '𝨺'),
- ('𝩭', '𝩴'),
- ('𝩶', '𝪃'),
- ('𝪅', '𝪆'),
- ('𞅏', '𞅏'),
- ('𞲬', '𞲬'),
- ('𞴮', '𞴮'),
- ('🀀', '🀫'),
- ('🀰', '🂓'),
- ('🂠', '🂮'),
- ('🂱', '🂿'),
- ('🃁', '🃏'),
- ('🃑', '🃵'),
- ('🄍', '🆭'),
- ('🇦', '🈂'),
- ('🈐', '🈻'),
- ('🉀', '🉈'),
- ('🉐', '🉑'),
- ('🉠', '🉥'),
- ('🌀', '🏺'),
- ('🐀', '🛗'),
- ('🛜', '🛬'),
- ('🛰', '🛼'),
- ('🜀', '🝶'),
- ('🝻', '🟙'),
- ('🟠', '🟫'),
- ('🟰', '🟰'),
- ('🠀', '🠋'),
- ('🠐', '🡇'),
- ('🡐', '🡙'),
- ('🡠', '🢇'),
- ('🢐', '🢭'),
- ('🢰', '🢻'),
- ('🣀', '🣁'),
- ('🤀', '🩓'),
- ('🩠', '🩭'),
- ('🩰', '🩼'),
- ('🪀', '🪉'),
- ('🪏', '🫆'),
- ('🫎', '🫜'),
- ('🫟', '🫩'),
- ('🫰', '🫸'),
- ('🬀', '🮒'),
- ('🮔', '🯯'),
-];
-
-pub const PARAGRAPH_SEPARATOR: &'static [(char, char)] =
- &[('\u{2029}', '\u{2029}')];
-
-pub const PRIVATE_USE: &'static [(char, char)] = &[
- ('\u{e000}', '\u{f8ff}'),
- ('\u{f0000}', '\u{ffffd}'),
- ('\u{100000}', '\u{10fffd}'),
-];
-
-pub const PUNCTUATION: &'static [(char, char)] = &[
- ('!', '#'),
- ('%', '*'),
- (',', '/'),
- (':', ';'),
- ('?', '@'),
- ('[', ']'),
- ('_', '_'),
- ('{', '{'),
- ('}', '}'),
- ('¡', '¡'),
- ('§', '§'),
- ('«', '«'),
- ('¶', '·'),
- ('»', '»'),
- ('¿', '¿'),
- (';', ';'),
- ('·', '·'),
- ('՚', '՟'),
- ('։', '֊'),
- ('־', '־'),
- ('׀', '׀'),
- ('׃', '׃'),
- ('׆', '׆'),
- ('׳', '״'),
- ('؉', '؊'),
- ('،', '؍'),
- ('؛', '؛'),
- ('؝', '؟'),
- ('٪', '٭'),
- ('۔', '۔'),
- ('܀', '܍'),
- ('߷', '߹'),
- ('࠰', '࠾'),
- ('࡞', '࡞'),
- ('।', '॥'),
- ('॰', '॰'),
- ('৽', '৽'),
- ('੶', '੶'),
- ('૰', '૰'),
- ('౷', '౷'),
- ('಄', '಄'),
- ('෴', '෴'),
- ('๏', '๏'),
- ('๚', '๛'),
- ('༄', '༒'),
- ('༔', '༔'),
- ('༺', '༽'),
- ('྅', '྅'),
- ('࿐', '࿔'),
- ('࿙', '࿚'),
- ('၊', '၏'),
- ('჻', '჻'),
- ('፠', '፨'),
- ('᐀', '᐀'),
- ('᙮', '᙮'),
- ('᚛', '᚜'),
- ('᛫', '᛭'),
- ('᜵', '᜶'),
- ('។', '៖'),
- ('៘', '៚'),
- ('᠀', '᠊'),
- ('᥄', '᥅'),
- ('᨞', '᨟'),
- ('᪠', '᪦'),
- ('᪨', '᪭'),
- ('᭎', '᭏'),
- ('᭚', '᭠'),
- ('᭽', '᭿'),
- ('᯼', '᯿'),
- ('᰻', '᰿'),
- ('᱾', '᱿'),
- ('᳀', '᳇'),
- ('᳓', '᳓'),
- ('‐', '‧'),
- ('‰', '⁃'),
- ('⁅', '⁑'),
- ('⁓', '⁞'),
- ('⁽', '⁾'),
- ('₍', '₎'),
- ('⌈', '⌋'),
- ('〈', '〉'),
- ('❨', '❵'),
- ('⟅', '⟆'),
- ('⟦', '⟯'),
- ('⦃', '⦘'),
- ('⧘', '⧛'),
- ('⧼', '⧽'),
- ('⳹', '⳼'),
- ('⳾', '⳿'),
- ('⵰', '⵰'),
- ('⸀', '⸮'),
- ('⸰', '⹏'),
- ('⹒', '⹝'),
- ('、', '〃'),
- ('〈', '】'),
- ('〔', '〟'),
- ('〰', '〰'),
- ('〽', '〽'),
- ('゠', '゠'),
- ('・', '・'),
- ('꓾', '꓿'),
- ('꘍', '꘏'),
- ('꙳', '꙳'),
- ('꙾', '꙾'),
- ('꛲', '꛷'),
- ('꡴', '꡷'),
- ('꣎', '꣏'),
- ('꣸', '꣺'),
- ('꣼', '꣼'),
- ('꤮', '꤯'),
- ('꥟', '꥟'),
- ('꧁', '꧍'),
- ('꧞', '꧟'),
- ('꩜', '꩟'),
- ('꫞', '꫟'),
- ('꫰', '꫱'),
- ('꯫', '꯫'),
- ('﴾', '﴿'),
- ('︐', '︙'),
- ('︰', '﹒'),
- ('﹔', '﹡'),
- ('﹣', '﹣'),
- ('﹨', '﹨'),
- ('﹪', '﹫'),
- ('!', '#'),
- ('%', '*'),
- (',', '/'),
- (':', ';'),
- ('?', '@'),
- ('[', ']'),
- ('_', '_'),
- ('{', '{'),
- ('}', '}'),
- ('⦅', '・'),
- ('𐄀', '𐄂'),
- ('𐎟', '𐎟'),
- ('𐏐', '𐏐'),
- ('𐕯', '𐕯'),
- ('𐡗', '𐡗'),
- ('𐤟', '𐤟'),
- ('𐤿', '𐤿'),
- ('𐩐', '𐩘'),
- ('𐩿', '𐩿'),
- ('𐫰', '𐫶'),
- ('𐬹', '𐬿'),
- ('𐮙', '𐮜'),
- ('𐵮', '𐵮'),
- ('𐺭', '𐺭'),
- ('𐽕', '𐽙'),
- ('𐾆', '𐾉'),
- ('𑁇', '𑁍'),
- ('𑂻', '𑂼'),
- ('𑂾', '𑃁'),
- ('𑅀', '𑅃'),
- ('𑅴', '𑅵'),
- ('𑇅', '𑇈'),
- ('𑇍', '𑇍'),
- ('𑇛', '𑇛'),
- ('𑇝', '𑇟'),
- ('𑈸', '𑈽'),
- ('𑊩', '𑊩'),
- ('𑏔', '𑏕'),
- ('𑏗', '𑏘'),
- ('𑑋', '𑑏'),
- ('𑑚', '𑑛'),
- ('𑑝', '𑑝'),
- ('𑓆', '𑓆'),
- ('𑗁', '𑗗'),
- ('𑙁', '𑙃'),
- ('𑙠', '𑙬'),
- ('𑚹', '𑚹'),
- ('𑜼', '𑜾'),
- ('𑠻', '𑠻'),
- ('𑥄', '𑥆'),
- ('𑧢', '𑧢'),
- ('𑨿', '𑩆'),
- ('𑪚', '𑪜'),
- ('𑪞', '𑪢'),
- ('𑬀', '𑬉'),
- ('𑯡', '𑯡'),
- ('𑱁', '𑱅'),
- ('𑱰', '𑱱'),
- ('𑻷', '𑻸'),
- ('𑽃', '𑽏'),
- ('𑿿', '𑿿'),
- ('𒑰', '𒑴'),
- ('𒿱', '𒿲'),
- ('𖩮', '𖩯'),
- ('𖫵', '𖫵'),
- ('𖬷', '𖬻'),
- ('𖭄', '𖭄'),
- ('𖵭', '𖵯'),
- ('𖺗', '𖺚'),
- ('𖿢', '𖿢'),
- ('𛲟', '𛲟'),
- ('𝪇', '𝪋'),
- ('𞗿', '𞗿'),
- ('𞥞', '𞥟'),
-];
-
-pub const SEPARATOR: &'static [(char, char)] = &[
- (' ', ' '),
- ('\u{a0}', '\u{a0}'),
- ('\u{1680}', '\u{1680}'),
- ('\u{2000}', '\u{200a}'),
- ('\u{2028}', '\u{2029}'),
- ('\u{202f}', '\u{202f}'),
- ('\u{205f}', '\u{205f}'),
- ('\u{3000}', '\u{3000}'),
-];
-
-pub const SPACE_SEPARATOR: &'static [(char, char)] = &[
- (' ', ' '),
- ('\u{a0}', '\u{a0}'),
- ('\u{1680}', '\u{1680}'),
- ('\u{2000}', '\u{200a}'),
- ('\u{202f}', '\u{202f}'),
- ('\u{205f}', '\u{205f}'),
- ('\u{3000}', '\u{3000}'),
-];
-
-pub const SPACING_MARK: &'static [(char, char)] = &[
- ('ः', 'ः'),
- ('ऻ', 'ऻ'),
- ('ा', 'ी'),
- ('ॉ', 'ौ'),
- ('ॎ', 'ॏ'),
- ('ং', 'ঃ'),
- ('\u{9be}', 'ী'),
- ('ে', 'ৈ'),
- ('ো', 'ৌ'),
- ('\u{9d7}', '\u{9d7}'),
- ('ਃ', 'ਃ'),
- ('ਾ', 'ੀ'),
- ('ઃ', 'ઃ'),
- ('ા', 'ી'),
- ('ૉ', 'ૉ'),
- ('ો', 'ૌ'),
- ('ଂ', 'ଃ'),
- ('\u{b3e}', '\u{b3e}'),
- ('ୀ', 'ୀ'),
- ('େ', 'ୈ'),
- ('ୋ', 'ୌ'),
- ('\u{b57}', '\u{b57}'),
- ('\u{bbe}', 'ி'),
- ('ு', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', 'ௌ'),
- ('\u{bd7}', '\u{bd7}'),
- ('ఁ', 'ః'),
- ('ు', 'ౄ'),
- ('ಂ', 'ಃ'),
- ('ಾ', 'ಾ'),
- ('\u{cc0}', 'ೄ'),
- ('\u{cc7}', '\u{cc8}'),
- ('\u{cca}', '\u{ccb}'),
- ('\u{cd5}', '\u{cd6}'),
- ('ೳ', 'ೳ'),
- ('ം', 'ഃ'),
- ('\u{d3e}', 'ീ'),
- ('െ', 'ൈ'),
- ('ൊ', 'ൌ'),
- ('\u{d57}', '\u{d57}'),
- ('ං', 'ඃ'),
- ('\u{dcf}', 'ෑ'),
- ('ෘ', '\u{ddf}'),
- ('ෲ', 'ෳ'),
- ('༾', '༿'),
- ('ཿ', 'ཿ'),
- ('ါ', 'ာ'),
- ('ေ', 'ေ'),
- ('း', 'း'),
- ('ျ', 'ြ'),
- ('ၖ', 'ၗ'),
- ('ၢ', 'ၤ'),
- ('ၧ', 'ၭ'),
- ('ႃ', 'ႄ'),
- ('ႇ', 'ႌ'),
- ('ႏ', 'ႏ'),
- ('ႚ', 'ႜ'),
- ('\u{1715}', '\u{1715}'),
- ('\u{1734}', '\u{1734}'),
- ('ា', 'ា'),
- ('ើ', 'ៅ'),
- ('ះ', 'ៈ'),
- ('ᤣ', 'ᤦ'),
- ('ᤩ', 'ᤫ'),
- ('ᤰ', 'ᤱ'),
- ('ᤳ', 'ᤸ'),
- ('ᨙ', 'ᨚ'),
- ('ᩕ', 'ᩕ'),
- ('ᩗ', 'ᩗ'),
- ('ᩡ', 'ᩡ'),
- ('ᩣ', 'ᩤ'),
- ('ᩭ', 'ᩲ'),
- ('ᬄ', 'ᬄ'),
- ('\u{1b35}', '\u{1b35}'),
- ('\u{1b3b}', '\u{1b3b}'),
- ('\u{1b3d}', 'ᭁ'),
- ('\u{1b43}', '\u{1b44}'),
- ('ᮂ', 'ᮂ'),
- ('ᮡ', 'ᮡ'),
- ('ᮦ', 'ᮧ'),
- ('\u{1baa}', '\u{1baa}'),
- ('ᯧ', 'ᯧ'),
- ('ᯪ', 'ᯬ'),
- ('ᯮ', 'ᯮ'),
- ('\u{1bf2}', '\u{1bf3}'),
- ('ᰤ', 'ᰫ'),
- ('ᰴ', 'ᰵ'),
- ('᳡', '᳡'),
- ('᳷', '᳷'),
- ('\u{302e}', '\u{302f}'),
- ('ꠣ', 'ꠤ'),
- ('ꠧ', 'ꠧ'),
- ('ꢀ', 'ꢁ'),
- ('ꢴ', 'ꣃ'),
- ('ꥒ', '\u{a953}'),
- ('ꦃ', 'ꦃ'),
- ('ꦴ', 'ꦵ'),
- ('ꦺ', 'ꦻ'),
- ('ꦾ', '\u{a9c0}'),
- ('ꨯ', 'ꨰ'),
- ('ꨳ', 'ꨴ'),
- ('ꩍ', 'ꩍ'),
- ('ꩻ', 'ꩻ'),
- ('ꩽ', 'ꩽ'),
- ('ꫫ', 'ꫫ'),
- ('ꫮ', 'ꫯ'),
- ('ꫵ', 'ꫵ'),
- ('ꯣ', 'ꯤ'),
- ('ꯦ', 'ꯧ'),
- ('ꯩ', 'ꯪ'),
- ('꯬', '꯬'),
- ('𑀀', '𑀀'),
- ('𑀂', '𑀂'),
- ('𑂂', '𑂂'),
- ('𑂰', '𑂲'),
- ('𑂷', '𑂸'),
- ('𑄬', '𑄬'),
- ('𑅅', '𑅆'),
- ('𑆂', '𑆂'),
- ('𑆳', '𑆵'),
- ('𑆿', '\u{111c0}'),
- ('𑇎', '𑇎'),
- ('𑈬', '𑈮'),
- ('𑈲', '𑈳'),
- ('\u{11235}', '\u{11235}'),
- ('𑋠', '𑋢'),
- ('𑌂', '𑌃'),
- ('\u{1133e}', '𑌿'),
- ('𑍁', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '\u{1134d}'),
- ('\u{11357}', '\u{11357}'),
- ('𑍢', '𑍣'),
- ('\u{113b8}', '𑎺'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '𑏊'),
- ('𑏌', '𑏍'),
- ('\u{113cf}', '\u{113cf}'),
- ('𑐵', '𑐷'),
- ('𑑀', '𑑁'),
- ('𑑅', '𑑅'),
- ('\u{114b0}', '𑒲'),
- ('𑒹', '𑒹'),
- ('𑒻', '𑒾'),
- ('𑓁', '𑓁'),
- ('\u{115af}', '𑖱'),
- ('𑖸', '𑖻'),
- ('𑖾', '𑖾'),
- ('𑘰', '𑘲'),
- ('𑘻', '𑘼'),
- ('𑘾', '𑘾'),
- ('𑚬', '𑚬'),
- ('𑚮', '𑚯'),
- ('\u{116b6}', '\u{116b6}'),
- ('𑜞', '𑜞'),
- ('𑜠', '𑜡'),
- ('𑜦', '𑜦'),
- ('𑠬', '𑠮'),
- ('𑠸', '𑠸'),
- ('\u{11930}', '𑤵'),
- ('𑤷', '𑤸'),
- ('\u{1193d}', '\u{1193d}'),
- ('𑥀', '𑥀'),
- ('𑥂', '𑥂'),
- ('𑧑', '𑧓'),
- ('𑧜', '𑧟'),
- ('𑧤', '𑧤'),
- ('𑨹', '𑨹'),
- ('𑩗', '𑩘'),
- ('𑪗', '𑪗'),
- ('𑰯', '𑰯'),
- ('𑰾', '𑰾'),
- ('𑲩', '𑲩'),
- ('𑲱', '𑲱'),
- ('𑲴', '𑲴'),
- ('𑶊', '𑶎'),
- ('𑶓', '𑶔'),
- ('𑶖', '𑶖'),
- ('𑻵', '𑻶'),
- ('𑼃', '𑼃'),
- ('𑼴', '𑼵'),
- ('𑼾', '𑼿'),
- ('\u{11f41}', '\u{11f41}'),
- ('𖄪', '𖄬'),
- ('𖽑', '𖾇'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('\u{1d165}', '\u{1d166}'),
- ('\u{1d16d}', '\u{1d172}'),
-];
-
-pub const SYMBOL: &'static [(char, char)] = &[
- ('$', '$'),
- ('+', '+'),
- ('<', '>'),
- ('^', '^'),
- ('`', '`'),
- ('|', '|'),
- ('~', '~'),
- ('¢', '¦'),
- ('¨', '©'),
- ('¬', '¬'),
- ('®', '±'),
- ('´', '´'),
- ('¸', '¸'),
- ('×', '×'),
- ('÷', '÷'),
- ('˂', '˅'),
- ('˒', '˟'),
- ('˥', '˫'),
- ('˭', '˭'),
- ('˯', '˿'),
- ('͵', '͵'),
- ('΄', '΅'),
- ('϶', '϶'),
- ('҂', '҂'),
- ('֍', '֏'),
- ('؆', '؈'),
- ('؋', '؋'),
- ('؎', '؏'),
- ('۞', '۞'),
- ('۩', '۩'),
- ('۽', '۾'),
- ('߶', '߶'),
- ('߾', '߿'),
- ('࢈', '࢈'),
- ('৲', '৳'),
- ('৺', '৻'),
- ('૱', '૱'),
- ('୰', '୰'),
- ('௳', '௺'),
- ('౿', '౿'),
- ('൏', '൏'),
- ('൹', '൹'),
- ('฿', '฿'),
- ('༁', '༃'),
- ('༓', '༓'),
- ('༕', '༗'),
- ('༚', '༟'),
- ('༴', '༴'),
- ('༶', '༶'),
- ('༸', '༸'),
- ('྾', '࿅'),
- ('࿇', '࿌'),
- ('࿎', '࿏'),
- ('࿕', '࿘'),
- ('႞', '႟'),
- ('᎐', '᎙'),
- ('᙭', '᙭'),
- ('៛', '៛'),
- ('᥀', '᥀'),
- ('᧞', '᧿'),
- ('᭡', '᭪'),
- ('᭴', '᭼'),
- ('᾽', '᾽'),
- ('᾿', '῁'),
- ('῍', '῏'),
- ('῝', '῟'),
- ('῭', '`'),
- ('´', '῾'),
- ('⁄', '⁄'),
- ('⁒', '⁒'),
- ('⁺', '⁼'),
- ('₊', '₌'),
- ('₠', '⃀'),
- ('℀', '℁'),
- ('℃', '℆'),
- ('℈', '℉'),
- ('℔', '℔'),
- ('№', '℘'),
- ('℞', '℣'),
- ('℥', '℥'),
- ('℧', '℧'),
- ('℩', '℩'),
- ('℮', '℮'),
- ('℺', '℻'),
- ('⅀', '⅄'),
- ('⅊', '⅍'),
- ('⅏', '⅏'),
- ('↊', '↋'),
- ('←', '⌇'),
- ('⌌', '⌨'),
- ('⌫', '␩'),
- ('⑀', '⑊'),
- ('⒜', 'ⓩ'),
- ('─', '❧'),
- ('➔', '⟄'),
- ('⟇', '⟥'),
- ('⟰', '⦂'),
- ('⦙', '⧗'),
- ('⧜', '⧻'),
- ('⧾', '⭳'),
- ('⭶', '⮕'),
- ('⮗', '⯿'),
- ('⳥', '⳪'),
- ('⹐', '⹑'),
- ('⺀', '⺙'),
- ('⺛', '⻳'),
- ('⼀', '⿕'),
- ('⿰', '⿿'),
- ('〄', '〄'),
- ('〒', '〓'),
- ('〠', '〠'),
- ('〶', '〷'),
- ('〾', '〿'),
- ('゛', '゜'),
- ('㆐', '㆑'),
- ('㆖', '㆟'),
- ('㇀', '㇥'),
- ('㇯', '㇯'),
- ('㈀', '㈞'),
- ('㈪', '㉇'),
- ('㉐', '㉐'),
- ('㉠', '㉿'),
- ('㊊', '㊰'),
- ('㋀', '㏿'),
- ('䷀', '䷿'),
- ('꒐', '꓆'),
- ('꜀', '꜖'),
- ('꜠', '꜡'),
- ('꞉', '꞊'),
- ('꠨', '꠫'),
- ('꠶', '꠹'),
- ('꩷', '꩹'),
- ('꭛', '꭛'),
- ('꭪', '꭫'),
- ('﬩', '﬩'),
- ('﮲', '﯂'),
- ('﵀', '﵏'),
- ('﷏', '﷏'),
- ('﷼', '﷿'),
- ('﹢', '﹢'),
- ('﹤', '﹦'),
- ('﹩', '﹩'),
- ('$', '$'),
- ('+', '+'),
- ('<', '>'),
- ('^', '^'),
- ('`', '`'),
- ('|', '|'),
- ('~', '~'),
- ('¢', '₩'),
- ('│', '○'),
- ('', '�'),
- ('𐄷', '𐄿'),
- ('𐅹', '𐆉'),
- ('𐆌', '𐆎'),
- ('𐆐', '𐆜'),
- ('𐆠', '𐆠'),
- ('𐇐', '𐇼'),
- ('𐡷', '𐡸'),
- ('𐫈', '𐫈'),
- ('𐶎', '𐶏'),
- ('𑜿', '𑜿'),
- ('𑿕', '𑿱'),
- ('𖬼', '𖬿'),
- ('𖭅', '𖭅'),
- ('𛲜', '𛲜'),
- ('𜰀', '𜳯'),
- ('𜴀', '𜺳'),
- ('𜽐', '𜿃'),
- ('𝀀', '𝃵'),
- ('𝄀', '𝄦'),
- ('𝄩', '𝅘𝅥𝅲'),
- ('𝅪', '𝅬'),
- ('𝆃', '𝆄'),
- ('𝆌', '𝆩'),
- ('𝆮', '𝇪'),
- ('𝈀', '𝉁'),
- ('𝉅', '𝉅'),
- ('𝌀', '𝍖'),
- ('𝛁', '𝛁'),
- ('𝛛', '𝛛'),
- ('𝛻', '𝛻'),
- ('𝜕', '𝜕'),
- ('𝜵', '𝜵'),
- ('𝝏', '𝝏'),
- ('𝝯', '𝝯'),
- ('𝞉', '𝞉'),
- ('𝞩', '𝞩'),
- ('𝟃', '𝟃'),
- ('𝠀', '𝧿'),
- ('𝨷', '𝨺'),
- ('𝩭', '𝩴'),
- ('𝩶', '𝪃'),
- ('𝪅', '𝪆'),
- ('𞅏', '𞅏'),
- ('𞋿', '𞋿'),
- ('𞲬', '𞲬'),
- ('𞲰', '𞲰'),
- ('𞴮', '𞴮'),
- ('𞻰', '𞻱'),
- ('🀀', '🀫'),
- ('🀰', '🂓'),
- ('🂠', '🂮'),
- ('🂱', '🂿'),
- ('🃁', '🃏'),
- ('🃑', '🃵'),
- ('🄍', '🆭'),
- ('🇦', '🈂'),
- ('🈐', '🈻'),
- ('🉀', '🉈'),
- ('🉐', '🉑'),
- ('🉠', '🉥'),
- ('🌀', '🛗'),
- ('🛜', '🛬'),
- ('🛰', '🛼'),
- ('🜀', '🝶'),
- ('🝻', '🟙'),
- ('🟠', '🟫'),
- ('🟰', '🟰'),
- ('🠀', '🠋'),
- ('🠐', '🡇'),
- ('🡐', '🡙'),
- ('🡠', '🢇'),
- ('🢐', '🢭'),
- ('🢰', '🢻'),
- ('🣀', '🣁'),
- ('🤀', '🩓'),
- ('🩠', '🩭'),
- ('🩰', '🩼'),
- ('🪀', '🪉'),
- ('🪏', '🫆'),
- ('🫎', '🫜'),
- ('🫟', '🫩'),
- ('🫰', '🫸'),
- ('🬀', '🮒'),
- ('🮔', '🯯'),
-];
-
-pub const TITLECASE_LETTER: &'static [(char, char)] = &[
- ('Dž', 'Dž'),
- ('Lj', 'Lj'),
- ('Nj', 'Nj'),
- ('Dz', 'Dz'),
- ('ᾈ', 'ᾏ'),
- ('ᾘ', 'ᾟ'),
- ('ᾨ', 'ᾯ'),
- ('ᾼ', 'ᾼ'),
- ('ῌ', 'ῌ'),
- ('ῼ', 'ῼ'),
-];
-
-pub const UNASSIGNED: &'static [(char, char)] = &[
- ('\u{378}', '\u{379}'),
- ('\u{380}', '\u{383}'),
- ('\u{38b}', '\u{38b}'),
- ('\u{38d}', '\u{38d}'),
- ('\u{3a2}', '\u{3a2}'),
- ('\u{530}', '\u{530}'),
- ('\u{557}', '\u{558}'),
- ('\u{58b}', '\u{58c}'),
- ('\u{590}', '\u{590}'),
- ('\u{5c8}', '\u{5cf}'),
- ('\u{5eb}', '\u{5ee}'),
- ('\u{5f5}', '\u{5ff}'),
- ('\u{70e}', '\u{70e}'),
- ('\u{74b}', '\u{74c}'),
- ('\u{7b2}', '\u{7bf}'),
- ('\u{7fb}', '\u{7fc}'),
- ('\u{82e}', '\u{82f}'),
- ('\u{83f}', '\u{83f}'),
- ('\u{85c}', '\u{85d}'),
- ('\u{85f}', '\u{85f}'),
- ('\u{86b}', '\u{86f}'),
- ('\u{88f}', '\u{88f}'),
- ('\u{892}', '\u{896}'),
- ('\u{984}', '\u{984}'),
- ('\u{98d}', '\u{98e}'),
- ('\u{991}', '\u{992}'),
- ('\u{9a9}', '\u{9a9}'),
- ('\u{9b1}', '\u{9b1}'),
- ('\u{9b3}', '\u{9b5}'),
- ('\u{9ba}', '\u{9bb}'),
- ('\u{9c5}', '\u{9c6}'),
- ('\u{9c9}', '\u{9ca}'),
- ('\u{9cf}', '\u{9d6}'),
- ('\u{9d8}', '\u{9db}'),
- ('\u{9de}', '\u{9de}'),
- ('\u{9e4}', '\u{9e5}'),
- ('\u{9ff}', '\u{a00}'),
- ('\u{a04}', '\u{a04}'),
- ('\u{a0b}', '\u{a0e}'),
- ('\u{a11}', '\u{a12}'),
- ('\u{a29}', '\u{a29}'),
- ('\u{a31}', '\u{a31}'),
- ('\u{a34}', '\u{a34}'),
- ('\u{a37}', '\u{a37}'),
- ('\u{a3a}', '\u{a3b}'),
- ('\u{a3d}', '\u{a3d}'),
- ('\u{a43}', '\u{a46}'),
- ('\u{a49}', '\u{a4a}'),
- ('\u{a4e}', '\u{a50}'),
- ('\u{a52}', '\u{a58}'),
- ('\u{a5d}', '\u{a5d}'),
- ('\u{a5f}', '\u{a65}'),
- ('\u{a77}', '\u{a80}'),
- ('\u{a84}', '\u{a84}'),
- ('\u{a8e}', '\u{a8e}'),
- ('\u{a92}', '\u{a92}'),
- ('\u{aa9}', '\u{aa9}'),
- ('\u{ab1}', '\u{ab1}'),
- ('\u{ab4}', '\u{ab4}'),
- ('\u{aba}', '\u{abb}'),
- ('\u{ac6}', '\u{ac6}'),
- ('\u{aca}', '\u{aca}'),
- ('\u{ace}', '\u{acf}'),
- ('\u{ad1}', '\u{adf}'),
- ('\u{ae4}', '\u{ae5}'),
- ('\u{af2}', '\u{af8}'),
- ('\u{b00}', '\u{b00}'),
- ('\u{b04}', '\u{b04}'),
- ('\u{b0d}', '\u{b0e}'),
- ('\u{b11}', '\u{b12}'),
- ('\u{b29}', '\u{b29}'),
- ('\u{b31}', '\u{b31}'),
- ('\u{b34}', '\u{b34}'),
- ('\u{b3a}', '\u{b3b}'),
- ('\u{b45}', '\u{b46}'),
- ('\u{b49}', '\u{b4a}'),
- ('\u{b4e}', '\u{b54}'),
- ('\u{b58}', '\u{b5b}'),
- ('\u{b5e}', '\u{b5e}'),
- ('\u{b64}', '\u{b65}'),
- ('\u{b78}', '\u{b81}'),
- ('\u{b84}', '\u{b84}'),
- ('\u{b8b}', '\u{b8d}'),
- ('\u{b91}', '\u{b91}'),
- ('\u{b96}', '\u{b98}'),
- ('\u{b9b}', '\u{b9b}'),
- ('\u{b9d}', '\u{b9d}'),
- ('\u{ba0}', '\u{ba2}'),
- ('\u{ba5}', '\u{ba7}'),
- ('\u{bab}', '\u{bad}'),
- ('\u{bba}', '\u{bbd}'),
- ('\u{bc3}', '\u{bc5}'),
- ('\u{bc9}', '\u{bc9}'),
- ('\u{bce}', '\u{bcf}'),
- ('\u{bd1}', '\u{bd6}'),
- ('\u{bd8}', '\u{be5}'),
- ('\u{bfb}', '\u{bff}'),
- ('\u{c0d}', '\u{c0d}'),
- ('\u{c11}', '\u{c11}'),
- ('\u{c29}', '\u{c29}'),
- ('\u{c3a}', '\u{c3b}'),
- ('\u{c45}', '\u{c45}'),
- ('\u{c49}', '\u{c49}'),
- ('\u{c4e}', '\u{c54}'),
- ('\u{c57}', '\u{c57}'),
- ('\u{c5b}', '\u{c5c}'),
- ('\u{c5e}', '\u{c5f}'),
- ('\u{c64}', '\u{c65}'),
- ('\u{c70}', '\u{c76}'),
- ('\u{c8d}', '\u{c8d}'),
- ('\u{c91}', '\u{c91}'),
- ('\u{ca9}', '\u{ca9}'),
- ('\u{cb4}', '\u{cb4}'),
- ('\u{cba}', '\u{cbb}'),
- ('\u{cc5}', '\u{cc5}'),
- ('\u{cc9}', '\u{cc9}'),
- ('\u{cce}', '\u{cd4}'),
- ('\u{cd7}', '\u{cdc}'),
- ('\u{cdf}', '\u{cdf}'),
- ('\u{ce4}', '\u{ce5}'),
- ('\u{cf0}', '\u{cf0}'),
- ('\u{cf4}', '\u{cff}'),
- ('\u{d0d}', '\u{d0d}'),
- ('\u{d11}', '\u{d11}'),
- ('\u{d45}', '\u{d45}'),
- ('\u{d49}', '\u{d49}'),
- ('\u{d50}', '\u{d53}'),
- ('\u{d64}', '\u{d65}'),
- ('\u{d80}', '\u{d80}'),
- ('\u{d84}', '\u{d84}'),
- ('\u{d97}', '\u{d99}'),
- ('\u{db2}', '\u{db2}'),
- ('\u{dbc}', '\u{dbc}'),
- ('\u{dbe}', '\u{dbf}'),
- ('\u{dc7}', '\u{dc9}'),
- ('\u{dcb}', '\u{dce}'),
- ('\u{dd5}', '\u{dd5}'),
- ('\u{dd7}', '\u{dd7}'),
- ('\u{de0}', '\u{de5}'),
- ('\u{df0}', '\u{df1}'),
- ('\u{df5}', '\u{e00}'),
- ('\u{e3b}', '\u{e3e}'),
- ('\u{e5c}', '\u{e80}'),
- ('\u{e83}', '\u{e83}'),
- ('\u{e85}', '\u{e85}'),
- ('\u{e8b}', '\u{e8b}'),
- ('\u{ea4}', '\u{ea4}'),
- ('\u{ea6}', '\u{ea6}'),
- ('\u{ebe}', '\u{ebf}'),
- ('\u{ec5}', '\u{ec5}'),
- ('\u{ec7}', '\u{ec7}'),
- ('\u{ecf}', '\u{ecf}'),
- ('\u{eda}', '\u{edb}'),
- ('\u{ee0}', '\u{eff}'),
- ('\u{f48}', '\u{f48}'),
- ('\u{f6d}', '\u{f70}'),
- ('\u{f98}', '\u{f98}'),
- ('\u{fbd}', '\u{fbd}'),
- ('\u{fcd}', '\u{fcd}'),
- ('\u{fdb}', '\u{fff}'),
- ('\u{10c6}', '\u{10c6}'),
- ('\u{10c8}', '\u{10cc}'),
- ('\u{10ce}', '\u{10cf}'),
- ('\u{1249}', '\u{1249}'),
- ('\u{124e}', '\u{124f}'),
- ('\u{1257}', '\u{1257}'),
- ('\u{1259}', '\u{1259}'),
- ('\u{125e}', '\u{125f}'),
- ('\u{1289}', '\u{1289}'),
- ('\u{128e}', '\u{128f}'),
- ('\u{12b1}', '\u{12b1}'),
- ('\u{12b6}', '\u{12b7}'),
- ('\u{12bf}', '\u{12bf}'),
- ('\u{12c1}', '\u{12c1}'),
- ('\u{12c6}', '\u{12c7}'),
- ('\u{12d7}', '\u{12d7}'),
- ('\u{1311}', '\u{1311}'),
- ('\u{1316}', '\u{1317}'),
- ('\u{135b}', '\u{135c}'),
- ('\u{137d}', '\u{137f}'),
- ('\u{139a}', '\u{139f}'),
- ('\u{13f6}', '\u{13f7}'),
- ('\u{13fe}', '\u{13ff}'),
- ('\u{169d}', '\u{169f}'),
- ('\u{16f9}', '\u{16ff}'),
- ('\u{1716}', '\u{171e}'),
- ('\u{1737}', '\u{173f}'),
- ('\u{1754}', '\u{175f}'),
- ('\u{176d}', '\u{176d}'),
- ('\u{1771}', '\u{1771}'),
- ('\u{1774}', '\u{177f}'),
- ('\u{17de}', '\u{17df}'),
- ('\u{17ea}', '\u{17ef}'),
- ('\u{17fa}', '\u{17ff}'),
- ('\u{181a}', '\u{181f}'),
- ('\u{1879}', '\u{187f}'),
- ('\u{18ab}', '\u{18af}'),
- ('\u{18f6}', '\u{18ff}'),
- ('\u{191f}', '\u{191f}'),
- ('\u{192c}', '\u{192f}'),
- ('\u{193c}', '\u{193f}'),
- ('\u{1941}', '\u{1943}'),
- ('\u{196e}', '\u{196f}'),
- ('\u{1975}', '\u{197f}'),
- ('\u{19ac}', '\u{19af}'),
- ('\u{19ca}', '\u{19cf}'),
- ('\u{19db}', '\u{19dd}'),
- ('\u{1a1c}', '\u{1a1d}'),
- ('\u{1a5f}', '\u{1a5f}'),
- ('\u{1a7d}', '\u{1a7e}'),
- ('\u{1a8a}', '\u{1a8f}'),
- ('\u{1a9a}', '\u{1a9f}'),
- ('\u{1aae}', '\u{1aaf}'),
- ('\u{1acf}', '\u{1aff}'),
- ('\u{1b4d}', '\u{1b4d}'),
- ('\u{1bf4}', '\u{1bfb}'),
- ('\u{1c38}', '\u{1c3a}'),
- ('\u{1c4a}', '\u{1c4c}'),
- ('\u{1c8b}', '\u{1c8f}'),
- ('\u{1cbb}', '\u{1cbc}'),
- ('\u{1cc8}', '\u{1ccf}'),
- ('\u{1cfb}', '\u{1cff}'),
- ('\u{1f16}', '\u{1f17}'),
- ('\u{1f1e}', '\u{1f1f}'),
- ('\u{1f46}', '\u{1f47}'),
- ('\u{1f4e}', '\u{1f4f}'),
- ('\u{1f58}', '\u{1f58}'),
- ('\u{1f5a}', '\u{1f5a}'),
- ('\u{1f5c}', '\u{1f5c}'),
- ('\u{1f5e}', '\u{1f5e}'),
- ('\u{1f7e}', '\u{1f7f}'),
- ('\u{1fb5}', '\u{1fb5}'),
- ('\u{1fc5}', '\u{1fc5}'),
- ('\u{1fd4}', '\u{1fd5}'),
- ('\u{1fdc}', '\u{1fdc}'),
- ('\u{1ff0}', '\u{1ff1}'),
- ('\u{1ff5}', '\u{1ff5}'),
- ('\u{1fff}', '\u{1fff}'),
- ('\u{2065}', '\u{2065}'),
- ('\u{2072}', '\u{2073}'),
- ('\u{208f}', '\u{208f}'),
- ('\u{209d}', '\u{209f}'),
- ('\u{20c1}', '\u{20cf}'),
- ('\u{20f1}', '\u{20ff}'),
- ('\u{218c}', '\u{218f}'),
- ('\u{242a}', '\u{243f}'),
- ('\u{244b}', '\u{245f}'),
- ('\u{2b74}', '\u{2b75}'),
- ('\u{2b96}', '\u{2b96}'),
- ('\u{2cf4}', '\u{2cf8}'),
- ('\u{2d26}', '\u{2d26}'),
- ('\u{2d28}', '\u{2d2c}'),
- ('\u{2d2e}', '\u{2d2f}'),
- ('\u{2d68}', '\u{2d6e}'),
- ('\u{2d71}', '\u{2d7e}'),
- ('\u{2d97}', '\u{2d9f}'),
- ('\u{2da7}', '\u{2da7}'),
- ('\u{2daf}', '\u{2daf}'),
- ('\u{2db7}', '\u{2db7}'),
- ('\u{2dbf}', '\u{2dbf}'),
- ('\u{2dc7}', '\u{2dc7}'),
- ('\u{2dcf}', '\u{2dcf}'),
- ('\u{2dd7}', '\u{2dd7}'),
- ('\u{2ddf}', '\u{2ddf}'),
- ('\u{2e5e}', '\u{2e7f}'),
- ('\u{2e9a}', '\u{2e9a}'),
- ('\u{2ef4}', '\u{2eff}'),
- ('\u{2fd6}', '\u{2fef}'),
- ('\u{3040}', '\u{3040}'),
- ('\u{3097}', '\u{3098}'),
- ('\u{3100}', '\u{3104}'),
- ('\u{3130}', '\u{3130}'),
- ('\u{318f}', '\u{318f}'),
- ('\u{31e6}', '\u{31ee}'),
- ('\u{321f}', '\u{321f}'),
- ('\u{a48d}', '\u{a48f}'),
- ('\u{a4c7}', '\u{a4cf}'),
- ('\u{a62c}', '\u{a63f}'),
- ('\u{a6f8}', '\u{a6ff}'),
- ('\u{a7ce}', '\u{a7cf}'),
- ('\u{a7d2}', '\u{a7d2}'),
- ('\u{a7d4}', '\u{a7d4}'),
- ('\u{a7dd}', '\u{a7f1}'),
- ('\u{a82d}', '\u{a82f}'),
- ('\u{a83a}', '\u{a83f}'),
- ('\u{a878}', '\u{a87f}'),
- ('\u{a8c6}', '\u{a8cd}'),
- ('\u{a8da}', '\u{a8df}'),
- ('\u{a954}', '\u{a95e}'),
- ('\u{a97d}', '\u{a97f}'),
- ('\u{a9ce}', '\u{a9ce}'),
- ('\u{a9da}', '\u{a9dd}'),
- ('\u{a9ff}', '\u{a9ff}'),
- ('\u{aa37}', '\u{aa3f}'),
- ('\u{aa4e}', '\u{aa4f}'),
- ('\u{aa5a}', '\u{aa5b}'),
- ('\u{aac3}', '\u{aada}'),
- ('\u{aaf7}', '\u{ab00}'),
- ('\u{ab07}', '\u{ab08}'),
- ('\u{ab0f}', '\u{ab10}'),
- ('\u{ab17}', '\u{ab1f}'),
- ('\u{ab27}', '\u{ab27}'),
- ('\u{ab2f}', '\u{ab2f}'),
- ('\u{ab6c}', '\u{ab6f}'),
- ('\u{abee}', '\u{abef}'),
- ('\u{abfa}', '\u{abff}'),
- ('\u{d7a4}', '\u{d7af}'),
- ('\u{d7c7}', '\u{d7ca}'),
- ('\u{d7fc}', '\u{d7ff}'),
- ('\u{fa6e}', '\u{fa6f}'),
- ('\u{fada}', '\u{faff}'),
- ('\u{fb07}', '\u{fb12}'),
- ('\u{fb18}', '\u{fb1c}'),
- ('\u{fb37}', '\u{fb37}'),
- ('\u{fb3d}', '\u{fb3d}'),
- ('\u{fb3f}', '\u{fb3f}'),
- ('\u{fb42}', '\u{fb42}'),
- ('\u{fb45}', '\u{fb45}'),
- ('\u{fbc3}', '\u{fbd2}'),
- ('\u{fd90}', '\u{fd91}'),
- ('\u{fdc8}', '\u{fdce}'),
- ('\u{fdd0}', '\u{fdef}'),
- ('\u{fe1a}', '\u{fe1f}'),
- ('\u{fe53}', '\u{fe53}'),
- ('\u{fe67}', '\u{fe67}'),
- ('\u{fe6c}', '\u{fe6f}'),
- ('\u{fe75}', '\u{fe75}'),
- ('\u{fefd}', '\u{fefe}'),
- ('\u{ff00}', '\u{ff00}'),
- ('\u{ffbf}', '\u{ffc1}'),
- ('\u{ffc8}', '\u{ffc9}'),
- ('\u{ffd0}', '\u{ffd1}'),
- ('\u{ffd8}', '\u{ffd9}'),
- ('\u{ffdd}', '\u{ffdf}'),
- ('\u{ffe7}', '\u{ffe7}'),
- ('\u{ffef}', '\u{fff8}'),
- ('\u{fffe}', '\u{ffff}'),
- ('\u{1000c}', '\u{1000c}'),
- ('\u{10027}', '\u{10027}'),
- ('\u{1003b}', '\u{1003b}'),
- ('\u{1003e}', '\u{1003e}'),
- ('\u{1004e}', '\u{1004f}'),
- ('\u{1005e}', '\u{1007f}'),
- ('\u{100fb}', '\u{100ff}'),
- ('\u{10103}', '\u{10106}'),
- ('\u{10134}', '\u{10136}'),
- ('\u{1018f}', '\u{1018f}'),
- ('\u{1019d}', '\u{1019f}'),
- ('\u{101a1}', '\u{101cf}'),
- ('\u{101fe}', '\u{1027f}'),
- ('\u{1029d}', '\u{1029f}'),
- ('\u{102d1}', '\u{102df}'),
- ('\u{102fc}', '\u{102ff}'),
- ('\u{10324}', '\u{1032c}'),
- ('\u{1034b}', '\u{1034f}'),
- ('\u{1037b}', '\u{1037f}'),
- ('\u{1039e}', '\u{1039e}'),
- ('\u{103c4}', '\u{103c7}'),
- ('\u{103d6}', '\u{103ff}'),
- ('\u{1049e}', '\u{1049f}'),
- ('\u{104aa}', '\u{104af}'),
- ('\u{104d4}', '\u{104d7}'),
- ('\u{104fc}', '\u{104ff}'),
- ('\u{10528}', '\u{1052f}'),
- ('\u{10564}', '\u{1056e}'),
- ('\u{1057b}', '\u{1057b}'),
- ('\u{1058b}', '\u{1058b}'),
- ('\u{10593}', '\u{10593}'),
- ('\u{10596}', '\u{10596}'),
- ('\u{105a2}', '\u{105a2}'),
- ('\u{105b2}', '\u{105b2}'),
- ('\u{105ba}', '\u{105ba}'),
- ('\u{105bd}', '\u{105bf}'),
- ('\u{105f4}', '\u{105ff}'),
- ('\u{10737}', '\u{1073f}'),
- ('\u{10756}', '\u{1075f}'),
- ('\u{10768}', '\u{1077f}'),
- ('\u{10786}', '\u{10786}'),
- ('\u{107b1}', '\u{107b1}'),
- ('\u{107bb}', '\u{107ff}'),
- ('\u{10806}', '\u{10807}'),
- ('\u{10809}', '\u{10809}'),
- ('\u{10836}', '\u{10836}'),
- ('\u{10839}', '\u{1083b}'),
- ('\u{1083d}', '\u{1083e}'),
- ('\u{10856}', '\u{10856}'),
- ('\u{1089f}', '\u{108a6}'),
- ('\u{108b0}', '\u{108df}'),
- ('\u{108f3}', '\u{108f3}'),
- ('\u{108f6}', '\u{108fa}'),
- ('\u{1091c}', '\u{1091e}'),
- ('\u{1093a}', '\u{1093e}'),
- ('\u{10940}', '\u{1097f}'),
- ('\u{109b8}', '\u{109bb}'),
- ('\u{109d0}', '\u{109d1}'),
- ('\u{10a04}', '\u{10a04}'),
- ('\u{10a07}', '\u{10a0b}'),
- ('\u{10a14}', '\u{10a14}'),
- ('\u{10a18}', '\u{10a18}'),
- ('\u{10a36}', '\u{10a37}'),
- ('\u{10a3b}', '\u{10a3e}'),
- ('\u{10a49}', '\u{10a4f}'),
- ('\u{10a59}', '\u{10a5f}'),
- ('\u{10aa0}', '\u{10abf}'),
- ('\u{10ae7}', '\u{10aea}'),
- ('\u{10af7}', '\u{10aff}'),
- ('\u{10b36}', '\u{10b38}'),
- ('\u{10b56}', '\u{10b57}'),
- ('\u{10b73}', '\u{10b77}'),
- ('\u{10b92}', '\u{10b98}'),
- ('\u{10b9d}', '\u{10ba8}'),
- ('\u{10bb0}', '\u{10bff}'),
- ('\u{10c49}', '\u{10c7f}'),
- ('\u{10cb3}', '\u{10cbf}'),
- ('\u{10cf3}', '\u{10cf9}'),
- ('\u{10d28}', '\u{10d2f}'),
- ('\u{10d3a}', '\u{10d3f}'),
- ('\u{10d66}', '\u{10d68}'),
- ('\u{10d86}', '\u{10d8d}'),
- ('\u{10d90}', '\u{10e5f}'),
- ('\u{10e7f}', '\u{10e7f}'),
- ('\u{10eaa}', '\u{10eaa}'),
- ('\u{10eae}', '\u{10eaf}'),
- ('\u{10eb2}', '\u{10ec1}'),
- ('\u{10ec5}', '\u{10efb}'),
- ('\u{10f28}', '\u{10f2f}'),
- ('\u{10f5a}', '\u{10f6f}'),
- ('\u{10f8a}', '\u{10faf}'),
- ('\u{10fcc}', '\u{10fdf}'),
- ('\u{10ff7}', '\u{10fff}'),
- ('\u{1104e}', '\u{11051}'),
- ('\u{11076}', '\u{1107e}'),
- ('\u{110c3}', '\u{110cc}'),
- ('\u{110ce}', '\u{110cf}'),
- ('\u{110e9}', '\u{110ef}'),
- ('\u{110fa}', '\u{110ff}'),
- ('\u{11135}', '\u{11135}'),
- ('\u{11148}', '\u{1114f}'),
- ('\u{11177}', '\u{1117f}'),
- ('\u{111e0}', '\u{111e0}'),
- ('\u{111f5}', '\u{111ff}'),
- ('\u{11212}', '\u{11212}'),
- ('\u{11242}', '\u{1127f}'),
- ('\u{11287}', '\u{11287}'),
- ('\u{11289}', '\u{11289}'),
- ('\u{1128e}', '\u{1128e}'),
- ('\u{1129e}', '\u{1129e}'),
- ('\u{112aa}', '\u{112af}'),
- ('\u{112eb}', '\u{112ef}'),
- ('\u{112fa}', '\u{112ff}'),
- ('\u{11304}', '\u{11304}'),
- ('\u{1130d}', '\u{1130e}'),
- ('\u{11311}', '\u{11312}'),
- ('\u{11329}', '\u{11329}'),
- ('\u{11331}', '\u{11331}'),
- ('\u{11334}', '\u{11334}'),
- ('\u{1133a}', '\u{1133a}'),
- ('\u{11345}', '\u{11346}'),
- ('\u{11349}', '\u{1134a}'),
- ('\u{1134e}', '\u{1134f}'),
- ('\u{11351}', '\u{11356}'),
- ('\u{11358}', '\u{1135c}'),
- ('\u{11364}', '\u{11365}'),
- ('\u{1136d}', '\u{1136f}'),
- ('\u{11375}', '\u{1137f}'),
- ('\u{1138a}', '\u{1138a}'),
- ('\u{1138c}', '\u{1138d}'),
- ('\u{1138f}', '\u{1138f}'),
- ('\u{113b6}', '\u{113b6}'),
- ('\u{113c1}', '\u{113c1}'),
- ('\u{113c3}', '\u{113c4}'),
- ('\u{113c6}', '\u{113c6}'),
- ('\u{113cb}', '\u{113cb}'),
- ('\u{113d6}', '\u{113d6}'),
- ('\u{113d9}', '\u{113e0}'),
- ('\u{113e3}', '\u{113ff}'),
- ('\u{1145c}', '\u{1145c}'),
- ('\u{11462}', '\u{1147f}'),
- ('\u{114c8}', '\u{114cf}'),
- ('\u{114da}', '\u{1157f}'),
- ('\u{115b6}', '\u{115b7}'),
- ('\u{115de}', '\u{115ff}'),
- ('\u{11645}', '\u{1164f}'),
- ('\u{1165a}', '\u{1165f}'),
- ('\u{1166d}', '\u{1167f}'),
- ('\u{116ba}', '\u{116bf}'),
- ('\u{116ca}', '\u{116cf}'),
- ('\u{116e4}', '\u{116ff}'),
- ('\u{1171b}', '\u{1171c}'),
- ('\u{1172c}', '\u{1172f}'),
- ('\u{11747}', '\u{117ff}'),
- ('\u{1183c}', '\u{1189f}'),
- ('\u{118f3}', '\u{118fe}'),
- ('\u{11907}', '\u{11908}'),
- ('\u{1190a}', '\u{1190b}'),
- ('\u{11914}', '\u{11914}'),
- ('\u{11917}', '\u{11917}'),
- ('\u{11936}', '\u{11936}'),
- ('\u{11939}', '\u{1193a}'),
- ('\u{11947}', '\u{1194f}'),
- ('\u{1195a}', '\u{1199f}'),
- ('\u{119a8}', '\u{119a9}'),
- ('\u{119d8}', '\u{119d9}'),
- ('\u{119e5}', '\u{119ff}'),
- ('\u{11a48}', '\u{11a4f}'),
- ('\u{11aa3}', '\u{11aaf}'),
- ('\u{11af9}', '\u{11aff}'),
- ('\u{11b0a}', '\u{11bbf}'),
- ('\u{11be2}', '\u{11bef}'),
- ('\u{11bfa}', '\u{11bff}'),
- ('\u{11c09}', '\u{11c09}'),
- ('\u{11c37}', '\u{11c37}'),
- ('\u{11c46}', '\u{11c4f}'),
- ('\u{11c6d}', '\u{11c6f}'),
- ('\u{11c90}', '\u{11c91}'),
- ('\u{11ca8}', '\u{11ca8}'),
- ('\u{11cb7}', '\u{11cff}'),
- ('\u{11d07}', '\u{11d07}'),
- ('\u{11d0a}', '\u{11d0a}'),
- ('\u{11d37}', '\u{11d39}'),
- ('\u{11d3b}', '\u{11d3b}'),
- ('\u{11d3e}', '\u{11d3e}'),
- ('\u{11d48}', '\u{11d4f}'),
- ('\u{11d5a}', '\u{11d5f}'),
- ('\u{11d66}', '\u{11d66}'),
- ('\u{11d69}', '\u{11d69}'),
- ('\u{11d8f}', '\u{11d8f}'),
- ('\u{11d92}', '\u{11d92}'),
- ('\u{11d99}', '\u{11d9f}'),
- ('\u{11daa}', '\u{11edf}'),
- ('\u{11ef9}', '\u{11eff}'),
- ('\u{11f11}', '\u{11f11}'),
- ('\u{11f3b}', '\u{11f3d}'),
- ('\u{11f5b}', '\u{11faf}'),
- ('\u{11fb1}', '\u{11fbf}'),
- ('\u{11ff2}', '\u{11ffe}'),
- ('\u{1239a}', '\u{123ff}'),
- ('\u{1246f}', '\u{1246f}'),
- ('\u{12475}', '\u{1247f}'),
- ('\u{12544}', '\u{12f8f}'),
- ('\u{12ff3}', '\u{12fff}'),
- ('\u{13456}', '\u{1345f}'),
- ('\u{143fb}', '\u{143ff}'),
- ('\u{14647}', '\u{160ff}'),
- ('\u{1613a}', '\u{167ff}'),
- ('\u{16a39}', '\u{16a3f}'),
- ('\u{16a5f}', '\u{16a5f}'),
- ('\u{16a6a}', '\u{16a6d}'),
- ('\u{16abf}', '\u{16abf}'),
- ('\u{16aca}', '\u{16acf}'),
- ('\u{16aee}', '\u{16aef}'),
- ('\u{16af6}', '\u{16aff}'),
- ('\u{16b46}', '\u{16b4f}'),
- ('\u{16b5a}', '\u{16b5a}'),
- ('\u{16b62}', '\u{16b62}'),
- ('\u{16b78}', '\u{16b7c}'),
- ('\u{16b90}', '\u{16d3f}'),
- ('\u{16d7a}', '\u{16e3f}'),
- ('\u{16e9b}', '\u{16eff}'),
- ('\u{16f4b}', '\u{16f4e}'),
- ('\u{16f88}', '\u{16f8e}'),
- ('\u{16fa0}', '\u{16fdf}'),
- ('\u{16fe5}', '\u{16fef}'),
- ('\u{16ff2}', '\u{16fff}'),
- ('\u{187f8}', '\u{187ff}'),
- ('\u{18cd6}', '\u{18cfe}'),
- ('\u{18d09}', '\u{1afef}'),
- ('\u{1aff4}', '\u{1aff4}'),
- ('\u{1affc}', '\u{1affc}'),
- ('\u{1afff}', '\u{1afff}'),
- ('\u{1b123}', '\u{1b131}'),
- ('\u{1b133}', '\u{1b14f}'),
- ('\u{1b153}', '\u{1b154}'),
- ('\u{1b156}', '\u{1b163}'),
- ('\u{1b168}', '\u{1b16f}'),
- ('\u{1b2fc}', '\u{1bbff}'),
- ('\u{1bc6b}', '\u{1bc6f}'),
- ('\u{1bc7d}', '\u{1bc7f}'),
- ('\u{1bc89}', '\u{1bc8f}'),
- ('\u{1bc9a}', '\u{1bc9b}'),
- ('\u{1bca4}', '\u{1cbff}'),
- ('\u{1ccfa}', '\u{1ccff}'),
- ('\u{1ceb4}', '\u{1ceff}'),
- ('\u{1cf2e}', '\u{1cf2f}'),
- ('\u{1cf47}', '\u{1cf4f}'),
- ('\u{1cfc4}', '\u{1cfff}'),
- ('\u{1d0f6}', '\u{1d0ff}'),
- ('\u{1d127}', '\u{1d128}'),
- ('\u{1d1eb}', '\u{1d1ff}'),
- ('\u{1d246}', '\u{1d2bf}'),
- ('\u{1d2d4}', '\u{1d2df}'),
- ('\u{1d2f4}', '\u{1d2ff}'),
- ('\u{1d357}', '\u{1d35f}'),
- ('\u{1d379}', '\u{1d3ff}'),
- ('\u{1d455}', '\u{1d455}'),
- ('\u{1d49d}', '\u{1d49d}'),
- ('\u{1d4a0}', '\u{1d4a1}'),
- ('\u{1d4a3}', '\u{1d4a4}'),
- ('\u{1d4a7}', '\u{1d4a8}'),
- ('\u{1d4ad}', '\u{1d4ad}'),
- ('\u{1d4ba}', '\u{1d4ba}'),
- ('\u{1d4bc}', '\u{1d4bc}'),
- ('\u{1d4c4}', '\u{1d4c4}'),
- ('\u{1d506}', '\u{1d506}'),
- ('\u{1d50b}', '\u{1d50c}'),
- ('\u{1d515}', '\u{1d515}'),
- ('\u{1d51d}', '\u{1d51d}'),
- ('\u{1d53a}', '\u{1d53a}'),
- ('\u{1d53f}', '\u{1d53f}'),
- ('\u{1d545}', '\u{1d545}'),
- ('\u{1d547}', '\u{1d549}'),
- ('\u{1d551}', '\u{1d551}'),
- ('\u{1d6a6}', '\u{1d6a7}'),
- ('\u{1d7cc}', '\u{1d7cd}'),
- ('\u{1da8c}', '\u{1da9a}'),
- ('\u{1daa0}', '\u{1daa0}'),
- ('\u{1dab0}', '\u{1deff}'),
- ('\u{1df1f}', '\u{1df24}'),
- ('\u{1df2b}', '\u{1dfff}'),
- ('\u{1e007}', '\u{1e007}'),
- ('\u{1e019}', '\u{1e01a}'),
- ('\u{1e022}', '\u{1e022}'),
- ('\u{1e025}', '\u{1e025}'),
- ('\u{1e02b}', '\u{1e02f}'),
- ('\u{1e06e}', '\u{1e08e}'),
- ('\u{1e090}', '\u{1e0ff}'),
- ('\u{1e12d}', '\u{1e12f}'),
- ('\u{1e13e}', '\u{1e13f}'),
- ('\u{1e14a}', '\u{1e14d}'),
- ('\u{1e150}', '\u{1e28f}'),
- ('\u{1e2af}', '\u{1e2bf}'),
- ('\u{1e2fa}', '\u{1e2fe}'),
- ('\u{1e300}', '\u{1e4cf}'),
- ('\u{1e4fa}', '\u{1e5cf}'),
- ('\u{1e5fb}', '\u{1e5fe}'),
- ('\u{1e600}', '\u{1e7df}'),
- ('\u{1e7e7}', '\u{1e7e7}'),
- ('\u{1e7ec}', '\u{1e7ec}'),
- ('\u{1e7ef}', '\u{1e7ef}'),
- ('\u{1e7ff}', '\u{1e7ff}'),
- ('\u{1e8c5}', '\u{1e8c6}'),
- ('\u{1e8d7}', '\u{1e8ff}'),
- ('\u{1e94c}', '\u{1e94f}'),
- ('\u{1e95a}', '\u{1e95d}'),
- ('\u{1e960}', '\u{1ec70}'),
- ('\u{1ecb5}', '\u{1ed00}'),
- ('\u{1ed3e}', '\u{1edff}'),
- ('\u{1ee04}', '\u{1ee04}'),
- ('\u{1ee20}', '\u{1ee20}'),
- ('\u{1ee23}', '\u{1ee23}'),
- ('\u{1ee25}', '\u{1ee26}'),
- ('\u{1ee28}', '\u{1ee28}'),
- ('\u{1ee33}', '\u{1ee33}'),
- ('\u{1ee38}', '\u{1ee38}'),
- ('\u{1ee3a}', '\u{1ee3a}'),
- ('\u{1ee3c}', '\u{1ee41}'),
- ('\u{1ee43}', '\u{1ee46}'),
- ('\u{1ee48}', '\u{1ee48}'),
- ('\u{1ee4a}', '\u{1ee4a}'),
- ('\u{1ee4c}', '\u{1ee4c}'),
- ('\u{1ee50}', '\u{1ee50}'),
- ('\u{1ee53}', '\u{1ee53}'),
- ('\u{1ee55}', '\u{1ee56}'),
- ('\u{1ee58}', '\u{1ee58}'),
- ('\u{1ee5a}', '\u{1ee5a}'),
- ('\u{1ee5c}', '\u{1ee5c}'),
- ('\u{1ee5e}', '\u{1ee5e}'),
- ('\u{1ee60}', '\u{1ee60}'),
- ('\u{1ee63}', '\u{1ee63}'),
- ('\u{1ee65}', '\u{1ee66}'),
- ('\u{1ee6b}', '\u{1ee6b}'),
- ('\u{1ee73}', '\u{1ee73}'),
- ('\u{1ee78}', '\u{1ee78}'),
- ('\u{1ee7d}', '\u{1ee7d}'),
- ('\u{1ee7f}', '\u{1ee7f}'),
- ('\u{1ee8a}', '\u{1ee8a}'),
- ('\u{1ee9c}', '\u{1eea0}'),
- ('\u{1eea4}', '\u{1eea4}'),
- ('\u{1eeaa}', '\u{1eeaa}'),
- ('\u{1eebc}', '\u{1eeef}'),
- ('\u{1eef2}', '\u{1efff}'),
- ('\u{1f02c}', '\u{1f02f}'),
- ('\u{1f094}', '\u{1f09f}'),
- ('\u{1f0af}', '\u{1f0b0}'),
- ('\u{1f0c0}', '\u{1f0c0}'),
- ('\u{1f0d0}', '\u{1f0d0}'),
- ('\u{1f0f6}', '\u{1f0ff}'),
- ('\u{1f1ae}', '\u{1f1e5}'),
- ('\u{1f203}', '\u{1f20f}'),
- ('\u{1f23c}', '\u{1f23f}'),
- ('\u{1f249}', '\u{1f24f}'),
- ('\u{1f252}', '\u{1f25f}'),
- ('\u{1f266}', '\u{1f2ff}'),
- ('\u{1f6d8}', '\u{1f6db}'),
- ('\u{1f6ed}', '\u{1f6ef}'),
- ('\u{1f6fd}', '\u{1f6ff}'),
- ('\u{1f777}', '\u{1f77a}'),
- ('\u{1f7da}', '\u{1f7df}'),
- ('\u{1f7ec}', '\u{1f7ef}'),
- ('\u{1f7f1}', '\u{1f7ff}'),
- ('\u{1f80c}', '\u{1f80f}'),
- ('\u{1f848}', '\u{1f84f}'),
- ('\u{1f85a}', '\u{1f85f}'),
- ('\u{1f888}', '\u{1f88f}'),
- ('\u{1f8ae}', '\u{1f8af}'),
- ('\u{1f8bc}', '\u{1f8bf}'),
- ('\u{1f8c2}', '\u{1f8ff}'),
- ('\u{1fa54}', '\u{1fa5f}'),
- ('\u{1fa6e}', '\u{1fa6f}'),
- ('\u{1fa7d}', '\u{1fa7f}'),
- ('\u{1fa8a}', '\u{1fa8e}'),
- ('\u{1fac7}', '\u{1facd}'),
- ('\u{1fadd}', '\u{1fade}'),
- ('\u{1faea}', '\u{1faef}'),
- ('\u{1faf9}', '\u{1faff}'),
- ('\u{1fb93}', '\u{1fb93}'),
- ('\u{1fbfa}', '\u{1ffff}'),
- ('\u{2a6e0}', '\u{2a6ff}'),
- ('\u{2b73a}', '\u{2b73f}'),
- ('\u{2b81e}', '\u{2b81f}'),
- ('\u{2cea2}', '\u{2ceaf}'),
- ('\u{2ebe1}', '\u{2ebef}'),
- ('\u{2ee5e}', '\u{2f7ff}'),
- ('\u{2fa1e}', '\u{2ffff}'),
- ('\u{3134b}', '\u{3134f}'),
- ('\u{323b0}', '\u{e0000}'),
- ('\u{e0002}', '\u{e001f}'),
- ('\u{e0080}', '\u{e00ff}'),
- ('\u{e01f0}', '\u{effff}'),
- ('\u{ffffe}', '\u{fffff}'),
- ('\u{10fffe}', '\u{10ffff}'),
-];
-
-pub const UPPERCASE_LETTER: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('À', 'Ö'),
- ('Ø', 'Þ'),
- ('Ā', 'Ā'),
- ('Ă', 'Ă'),
- ('Ą', 'Ą'),
- ('Ć', 'Ć'),
- ('Ĉ', 'Ĉ'),
- ('Ċ', 'Ċ'),
- ('Č', 'Č'),
- ('Ď', 'Ď'),
- ('Đ', 'Đ'),
- ('Ē', 'Ē'),
- ('Ĕ', 'Ĕ'),
- ('Ė', 'Ė'),
- ('Ę', 'Ę'),
- ('Ě', 'Ě'),
- ('Ĝ', 'Ĝ'),
- ('Ğ', 'Ğ'),
- ('Ġ', 'Ġ'),
- ('Ģ', 'Ģ'),
- ('Ĥ', 'Ĥ'),
- ('Ħ', 'Ħ'),
- ('Ĩ', 'Ĩ'),
- ('Ī', 'Ī'),
- ('Ĭ', 'Ĭ'),
- ('Į', 'Į'),
- ('İ', 'İ'),
- ('IJ', 'IJ'),
- ('Ĵ', 'Ĵ'),
- ('Ķ', 'Ķ'),
- ('Ĺ', 'Ĺ'),
- ('Ļ', 'Ļ'),
- ('Ľ', 'Ľ'),
- ('Ŀ', 'Ŀ'),
- ('Ł', 'Ł'),
- ('Ń', 'Ń'),
- ('Ņ', 'Ņ'),
- ('Ň', 'Ň'),
- ('Ŋ', 'Ŋ'),
- ('Ō', 'Ō'),
- ('Ŏ', 'Ŏ'),
- ('Ő', 'Ő'),
- ('Œ', 'Œ'),
- ('Ŕ', 'Ŕ'),
- ('Ŗ', 'Ŗ'),
- ('Ř', 'Ř'),
- ('Ś', 'Ś'),
- ('Ŝ', 'Ŝ'),
- ('Ş', 'Ş'),
- ('Š', 'Š'),
- ('Ţ', 'Ţ'),
- ('Ť', 'Ť'),
- ('Ŧ', 'Ŧ'),
- ('Ũ', 'Ũ'),
- ('Ū', 'Ū'),
- ('Ŭ', 'Ŭ'),
- ('Ů', 'Ů'),
- ('Ű', 'Ű'),
- ('Ų', 'Ų'),
- ('Ŵ', 'Ŵ'),
- ('Ŷ', 'Ŷ'),
- ('Ÿ', 'Ź'),
- ('Ż', 'Ż'),
- ('Ž', 'Ž'),
- ('Ɓ', 'Ƃ'),
- ('Ƅ', 'Ƅ'),
- ('Ɔ', 'Ƈ'),
- ('Ɖ', 'Ƌ'),
- ('Ǝ', 'Ƒ'),
- ('Ɠ', 'Ɣ'),
- ('Ɩ', 'Ƙ'),
- ('Ɯ', 'Ɲ'),
- ('Ɵ', 'Ơ'),
- ('Ƣ', 'Ƣ'),
- ('Ƥ', 'Ƥ'),
- ('Ʀ', 'Ƨ'),
- ('Ʃ', 'Ʃ'),
- ('Ƭ', 'Ƭ'),
- ('Ʈ', 'Ư'),
- ('Ʊ', 'Ƴ'),
- ('Ƶ', 'Ƶ'),
- ('Ʒ', 'Ƹ'),
- ('Ƽ', 'Ƽ'),
- ('DŽ', 'DŽ'),
- ('LJ', 'LJ'),
- ('NJ', 'NJ'),
- ('Ǎ', 'Ǎ'),
- ('Ǐ', 'Ǐ'),
- ('Ǒ', 'Ǒ'),
- ('Ǔ', 'Ǔ'),
- ('Ǖ', 'Ǖ'),
- ('Ǘ', 'Ǘ'),
- ('Ǚ', 'Ǚ'),
- ('Ǜ', 'Ǜ'),
- ('Ǟ', 'Ǟ'),
- ('Ǡ', 'Ǡ'),
- ('Ǣ', 'Ǣ'),
- ('Ǥ', 'Ǥ'),
- ('Ǧ', 'Ǧ'),
- ('Ǩ', 'Ǩ'),
- ('Ǫ', 'Ǫ'),
- ('Ǭ', 'Ǭ'),
- ('Ǯ', 'Ǯ'),
- ('DZ', 'DZ'),
- ('Ǵ', 'Ǵ'),
- ('Ƕ', 'Ǹ'),
- ('Ǻ', 'Ǻ'),
- ('Ǽ', 'Ǽ'),
- ('Ǿ', 'Ǿ'),
- ('Ȁ', 'Ȁ'),
- ('Ȃ', 'Ȃ'),
- ('Ȅ', 'Ȅ'),
- ('Ȇ', 'Ȇ'),
- ('Ȉ', 'Ȉ'),
- ('Ȋ', 'Ȋ'),
- ('Ȍ', 'Ȍ'),
- ('Ȏ', 'Ȏ'),
- ('Ȑ', 'Ȑ'),
- ('Ȓ', 'Ȓ'),
- ('Ȕ', 'Ȕ'),
- ('Ȗ', 'Ȗ'),
- ('Ș', 'Ș'),
- ('Ț', 'Ț'),
- ('Ȝ', 'Ȝ'),
- ('Ȟ', 'Ȟ'),
- ('Ƞ', 'Ƞ'),
- ('Ȣ', 'Ȣ'),
- ('Ȥ', 'Ȥ'),
- ('Ȧ', 'Ȧ'),
- ('Ȩ', 'Ȩ'),
- ('Ȫ', 'Ȫ'),
- ('Ȭ', 'Ȭ'),
- ('Ȯ', 'Ȯ'),
- ('Ȱ', 'Ȱ'),
- ('Ȳ', 'Ȳ'),
- ('Ⱥ', 'Ȼ'),
- ('Ƚ', 'Ⱦ'),
- ('Ɂ', 'Ɂ'),
- ('Ƀ', 'Ɇ'),
- ('Ɉ', 'Ɉ'),
- ('Ɋ', 'Ɋ'),
- ('Ɍ', 'Ɍ'),
- ('Ɏ', 'Ɏ'),
- ('Ͱ', 'Ͱ'),
- ('Ͳ', 'Ͳ'),
- ('Ͷ', 'Ͷ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ώ'),
- ('Α', 'Ρ'),
- ('Σ', 'Ϋ'),
- ('Ϗ', 'Ϗ'),
- ('ϒ', 'ϔ'),
- ('Ϙ', 'Ϙ'),
- ('Ϛ', 'Ϛ'),
- ('Ϝ', 'Ϝ'),
- ('Ϟ', 'Ϟ'),
- ('Ϡ', 'Ϡ'),
- ('Ϣ', 'Ϣ'),
- ('Ϥ', 'Ϥ'),
- ('Ϧ', 'Ϧ'),
- ('Ϩ', 'Ϩ'),
- ('Ϫ', 'Ϫ'),
- ('Ϭ', 'Ϭ'),
- ('Ϯ', 'Ϯ'),
- ('ϴ', 'ϴ'),
- ('Ϸ', 'Ϸ'),
- ('Ϲ', 'Ϻ'),
- ('Ͻ', 'Я'),
- ('Ѡ', 'Ѡ'),
- ('Ѣ', 'Ѣ'),
- ('Ѥ', 'Ѥ'),
- ('Ѧ', 'Ѧ'),
- ('Ѩ', 'Ѩ'),
- ('Ѫ', 'Ѫ'),
- ('Ѭ', 'Ѭ'),
- ('Ѯ', 'Ѯ'),
- ('Ѱ', 'Ѱ'),
- ('Ѳ', 'Ѳ'),
- ('Ѵ', 'Ѵ'),
- ('Ѷ', 'Ѷ'),
- ('Ѹ', 'Ѹ'),
- ('Ѻ', 'Ѻ'),
- ('Ѽ', 'Ѽ'),
- ('Ѿ', 'Ѿ'),
- ('Ҁ', 'Ҁ'),
- ('Ҋ', 'Ҋ'),
- ('Ҍ', 'Ҍ'),
- ('Ҏ', 'Ҏ'),
- ('Ґ', 'Ґ'),
- ('Ғ', 'Ғ'),
- ('Ҕ', 'Ҕ'),
- ('Җ', 'Җ'),
- ('Ҙ', 'Ҙ'),
- ('Қ', 'Қ'),
- ('Ҝ', 'Ҝ'),
- ('Ҟ', 'Ҟ'),
- ('Ҡ', 'Ҡ'),
- ('Ң', 'Ң'),
- ('Ҥ', 'Ҥ'),
- ('Ҧ', 'Ҧ'),
- ('Ҩ', 'Ҩ'),
- ('Ҫ', 'Ҫ'),
- ('Ҭ', 'Ҭ'),
- ('Ү', 'Ү'),
- ('Ұ', 'Ұ'),
- ('Ҳ', 'Ҳ'),
- ('Ҵ', 'Ҵ'),
- ('Ҷ', 'Ҷ'),
- ('Ҹ', 'Ҹ'),
- ('Һ', 'Һ'),
- ('Ҽ', 'Ҽ'),
- ('Ҿ', 'Ҿ'),
- ('Ӏ', 'Ӂ'),
- ('Ӄ', 'Ӄ'),
- ('Ӆ', 'Ӆ'),
- ('Ӈ', 'Ӈ'),
- ('Ӊ', 'Ӊ'),
- ('Ӌ', 'Ӌ'),
- ('Ӎ', 'Ӎ'),
- ('Ӑ', 'Ӑ'),
- ('Ӓ', 'Ӓ'),
- ('Ӕ', 'Ӕ'),
- ('Ӗ', 'Ӗ'),
- ('Ә', 'Ә'),
- ('Ӛ', 'Ӛ'),
- ('Ӝ', 'Ӝ'),
- ('Ӟ', 'Ӟ'),
- ('Ӡ', 'Ӡ'),
- ('Ӣ', 'Ӣ'),
- ('Ӥ', 'Ӥ'),
- ('Ӧ', 'Ӧ'),
- ('Ө', 'Ө'),
- ('Ӫ', 'Ӫ'),
- ('Ӭ', 'Ӭ'),
- ('Ӯ', 'Ӯ'),
- ('Ӱ', 'Ӱ'),
- ('Ӳ', 'Ӳ'),
- ('Ӵ', 'Ӵ'),
- ('Ӷ', 'Ӷ'),
- ('Ӹ', 'Ӹ'),
- ('Ӻ', 'Ӻ'),
- ('Ӽ', 'Ӽ'),
- ('Ӿ', 'Ӿ'),
- ('Ԁ', 'Ԁ'),
- ('Ԃ', 'Ԃ'),
- ('Ԅ', 'Ԅ'),
- ('Ԇ', 'Ԇ'),
- ('Ԉ', 'Ԉ'),
- ('Ԋ', 'Ԋ'),
- ('Ԍ', 'Ԍ'),
- ('Ԏ', 'Ԏ'),
- ('Ԑ', 'Ԑ'),
- ('Ԓ', 'Ԓ'),
- ('Ԕ', 'Ԕ'),
- ('Ԗ', 'Ԗ'),
- ('Ԙ', 'Ԙ'),
- ('Ԛ', 'Ԛ'),
- ('Ԝ', 'Ԝ'),
- ('Ԟ', 'Ԟ'),
- ('Ԡ', 'Ԡ'),
- ('Ԣ', 'Ԣ'),
- ('Ԥ', 'Ԥ'),
- ('Ԧ', 'Ԧ'),
- ('Ԩ', 'Ԩ'),
- ('Ԫ', 'Ԫ'),
- ('Ԭ', 'Ԭ'),
- ('Ԯ', 'Ԯ'),
- ('Ա', 'Ֆ'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('Ꭰ', 'Ᏽ'),
- ('Ᲊ', 'Ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('Ḁ', 'Ḁ'),
- ('Ḃ', 'Ḃ'),
- ('Ḅ', 'Ḅ'),
- ('Ḇ', 'Ḇ'),
- ('Ḉ', 'Ḉ'),
- ('Ḋ', 'Ḋ'),
- ('Ḍ', 'Ḍ'),
- ('Ḏ', 'Ḏ'),
- ('Ḑ', 'Ḑ'),
- ('Ḓ', 'Ḓ'),
- ('Ḕ', 'Ḕ'),
- ('Ḗ', 'Ḗ'),
- ('Ḙ', 'Ḙ'),
- ('Ḛ', 'Ḛ'),
- ('Ḝ', 'Ḝ'),
- ('Ḟ', 'Ḟ'),
- ('Ḡ', 'Ḡ'),
- ('Ḣ', 'Ḣ'),
- ('Ḥ', 'Ḥ'),
- ('Ḧ', 'Ḧ'),
- ('Ḩ', 'Ḩ'),
- ('Ḫ', 'Ḫ'),
- ('Ḭ', 'Ḭ'),
- ('Ḯ', 'Ḯ'),
- ('Ḱ', 'Ḱ'),
- ('Ḳ', 'Ḳ'),
- ('Ḵ', 'Ḵ'),
- ('Ḷ', 'Ḷ'),
- ('Ḹ', 'Ḹ'),
- ('Ḻ', 'Ḻ'),
- ('Ḽ', 'Ḽ'),
- ('Ḿ', 'Ḿ'),
- ('Ṁ', 'Ṁ'),
- ('Ṃ', 'Ṃ'),
- ('Ṅ', 'Ṅ'),
- ('Ṇ', 'Ṇ'),
- ('Ṉ', 'Ṉ'),
- ('Ṋ', 'Ṋ'),
- ('Ṍ', 'Ṍ'),
- ('Ṏ', 'Ṏ'),
- ('Ṑ', 'Ṑ'),
- ('Ṓ', 'Ṓ'),
- ('Ṕ', 'Ṕ'),
- ('Ṗ', 'Ṗ'),
- ('Ṙ', 'Ṙ'),
- ('Ṛ', 'Ṛ'),
- ('Ṝ', 'Ṝ'),
- ('Ṟ', 'Ṟ'),
- ('Ṡ', 'Ṡ'),
- ('Ṣ', 'Ṣ'),
- ('Ṥ', 'Ṥ'),
- ('Ṧ', 'Ṧ'),
- ('Ṩ', 'Ṩ'),
- ('Ṫ', 'Ṫ'),
- ('Ṭ', 'Ṭ'),
- ('Ṯ', 'Ṯ'),
- ('Ṱ', 'Ṱ'),
- ('Ṳ', 'Ṳ'),
- ('Ṵ', 'Ṵ'),
- ('Ṷ', 'Ṷ'),
- ('Ṹ', 'Ṹ'),
- ('Ṻ', 'Ṻ'),
- ('Ṽ', 'Ṽ'),
- ('Ṿ', 'Ṿ'),
- ('Ẁ', 'Ẁ'),
- ('Ẃ', 'Ẃ'),
- ('Ẅ', 'Ẅ'),
- ('Ẇ', 'Ẇ'),
- ('Ẉ', 'Ẉ'),
- ('Ẋ', 'Ẋ'),
- ('Ẍ', 'Ẍ'),
- ('Ẏ', 'Ẏ'),
- ('Ẑ', 'Ẑ'),
- ('Ẓ', 'Ẓ'),
- ('Ẕ', 'Ẕ'),
- ('ẞ', 'ẞ'),
- ('Ạ', 'Ạ'),
- ('Ả', 'Ả'),
- ('Ấ', 'Ấ'),
- ('Ầ', 'Ầ'),
- ('Ẩ', 'Ẩ'),
- ('Ẫ', 'Ẫ'),
- ('Ậ', 'Ậ'),
- ('Ắ', 'Ắ'),
- ('Ằ', 'Ằ'),
- ('Ẳ', 'Ẳ'),
- ('Ẵ', 'Ẵ'),
- ('Ặ', 'Ặ'),
- ('Ẹ', 'Ẹ'),
- ('Ẻ', 'Ẻ'),
- ('Ẽ', 'Ẽ'),
- ('Ế', 'Ế'),
- ('Ề', 'Ề'),
- ('Ể', 'Ể'),
- ('Ễ', 'Ễ'),
- ('Ệ', 'Ệ'),
- ('Ỉ', 'Ỉ'),
- ('Ị', 'Ị'),
- ('Ọ', 'Ọ'),
- ('Ỏ', 'Ỏ'),
- ('Ố', 'Ố'),
- ('Ồ', 'Ồ'),
- ('Ổ', 'Ổ'),
- ('Ỗ', 'Ỗ'),
- ('Ộ', 'Ộ'),
- ('Ớ', 'Ớ'),
- ('Ờ', 'Ờ'),
- ('Ở', 'Ở'),
- ('Ỡ', 'Ỡ'),
- ('Ợ', 'Ợ'),
- ('Ụ', 'Ụ'),
- ('Ủ', 'Ủ'),
- ('Ứ', 'Ứ'),
- ('Ừ', 'Ừ'),
- ('Ử', 'Ử'),
- ('Ữ', 'Ữ'),
- ('Ự', 'Ự'),
- ('Ỳ', 'Ỳ'),
- ('Ỵ', 'Ỵ'),
- ('Ỷ', 'Ỷ'),
- ('Ỹ', 'Ỹ'),
- ('Ỻ', 'Ỻ'),
- ('Ỽ', 'Ỽ'),
- ('Ỿ', 'Ỿ'),
- ('Ἀ', 'Ἇ'),
- ('Ἐ', 'Ἕ'),
- ('Ἠ', 'Ἧ'),
- ('Ἰ', 'Ἷ'),
- ('Ὀ', 'Ὅ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'Ὗ'),
- ('Ὠ', 'Ὧ'),
- ('Ᾰ', 'Ά'),
- ('Ὲ', 'Ή'),
- ('Ῐ', 'Ί'),
- ('Ῠ', 'Ῥ'),
- ('Ὸ', 'Ώ'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℋ', 'ℍ'),
- ('ℐ', 'ℒ'),
- ('ℕ', 'ℕ'),
- ('ℙ', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('Ω', 'Ω'),
- ('ℨ', 'ℨ'),
- ('K', 'ℭ'),
- ('ℰ', 'ℳ'),
- ('ℾ', 'ℿ'),
- ('ⅅ', 'ⅅ'),
- ('Ↄ', 'Ↄ'),
- ('Ⰰ', 'Ⱟ'),
- ('Ⱡ', 'Ⱡ'),
- ('Ɫ', 'Ɽ'),
- ('Ⱨ', 'Ⱨ'),
- ('Ⱪ', 'Ⱪ'),
- ('Ⱬ', 'Ⱬ'),
- ('Ɑ', 'Ɒ'),
- ('Ⱳ', 'Ⱳ'),
- ('Ⱶ', 'Ⱶ'),
- ('Ȿ', 'Ⲁ'),
- ('Ⲃ', 'Ⲃ'),
- ('Ⲅ', 'Ⲅ'),
- ('Ⲇ', 'Ⲇ'),
- ('Ⲉ', 'Ⲉ'),
- ('Ⲋ', 'Ⲋ'),
- ('Ⲍ', 'Ⲍ'),
- ('Ⲏ', 'Ⲏ'),
- ('Ⲑ', 'Ⲑ'),
- ('Ⲓ', 'Ⲓ'),
- ('Ⲕ', 'Ⲕ'),
- ('Ⲗ', 'Ⲗ'),
- ('Ⲙ', 'Ⲙ'),
- ('Ⲛ', 'Ⲛ'),
- ('Ⲝ', 'Ⲝ'),
- ('Ⲟ', 'Ⲟ'),
- ('Ⲡ', 'Ⲡ'),
- ('Ⲣ', 'Ⲣ'),
- ('Ⲥ', 'Ⲥ'),
- ('Ⲧ', 'Ⲧ'),
- ('Ⲩ', 'Ⲩ'),
- ('Ⲫ', 'Ⲫ'),
- ('Ⲭ', 'Ⲭ'),
- ('Ⲯ', 'Ⲯ'),
- ('Ⲱ', 'Ⲱ'),
- ('Ⲳ', 'Ⲳ'),
- ('Ⲵ', 'Ⲵ'),
- ('Ⲷ', 'Ⲷ'),
- ('Ⲹ', 'Ⲹ'),
- ('Ⲻ', 'Ⲻ'),
- ('Ⲽ', 'Ⲽ'),
- ('Ⲿ', 'Ⲿ'),
- ('Ⳁ', 'Ⳁ'),
- ('Ⳃ', 'Ⳃ'),
- ('Ⳅ', 'Ⳅ'),
- ('Ⳇ', 'Ⳇ'),
- ('Ⳉ', 'Ⳉ'),
- ('Ⳋ', 'Ⳋ'),
- ('Ⳍ', 'Ⳍ'),
- ('Ⳏ', 'Ⳏ'),
- ('Ⳑ', 'Ⳑ'),
- ('Ⳓ', 'Ⳓ'),
- ('Ⳕ', 'Ⳕ'),
- ('Ⳗ', 'Ⳗ'),
- ('Ⳙ', 'Ⳙ'),
- ('Ⳛ', 'Ⳛ'),
- ('Ⳝ', 'Ⳝ'),
- ('Ⳟ', 'Ⳟ'),
- ('Ⳡ', 'Ⳡ'),
- ('Ⳣ', 'Ⳣ'),
- ('Ⳬ', 'Ⳬ'),
- ('Ⳮ', 'Ⳮ'),
- ('Ⳳ', 'Ⳳ'),
- ('Ꙁ', 'Ꙁ'),
- ('Ꙃ', 'Ꙃ'),
- ('Ꙅ', 'Ꙅ'),
- ('Ꙇ', 'Ꙇ'),
- ('Ꙉ', 'Ꙉ'),
- ('Ꙋ', 'Ꙋ'),
- ('Ꙍ', 'Ꙍ'),
- ('Ꙏ', 'Ꙏ'),
- ('Ꙑ', 'Ꙑ'),
- ('Ꙓ', 'Ꙓ'),
- ('Ꙕ', 'Ꙕ'),
- ('Ꙗ', 'Ꙗ'),
- ('Ꙙ', 'Ꙙ'),
- ('Ꙛ', 'Ꙛ'),
- ('Ꙝ', 'Ꙝ'),
- ('Ꙟ', 'Ꙟ'),
- ('Ꙡ', 'Ꙡ'),
- ('Ꙣ', 'Ꙣ'),
- ('Ꙥ', 'Ꙥ'),
- ('Ꙧ', 'Ꙧ'),
- ('Ꙩ', 'Ꙩ'),
- ('Ꙫ', 'Ꙫ'),
- ('Ꙭ', 'Ꙭ'),
- ('Ꚁ', 'Ꚁ'),
- ('Ꚃ', 'Ꚃ'),
- ('Ꚅ', 'Ꚅ'),
- ('Ꚇ', 'Ꚇ'),
- ('Ꚉ', 'Ꚉ'),
- ('Ꚋ', 'Ꚋ'),
- ('Ꚍ', 'Ꚍ'),
- ('Ꚏ', 'Ꚏ'),
- ('Ꚑ', 'Ꚑ'),
- ('Ꚓ', 'Ꚓ'),
- ('Ꚕ', 'Ꚕ'),
- ('Ꚗ', 'Ꚗ'),
- ('Ꚙ', 'Ꚙ'),
- ('Ꚛ', 'Ꚛ'),
- ('Ꜣ', 'Ꜣ'),
- ('Ꜥ', 'Ꜥ'),
- ('Ꜧ', 'Ꜧ'),
- ('Ꜩ', 'Ꜩ'),
- ('Ꜫ', 'Ꜫ'),
- ('Ꜭ', 'Ꜭ'),
- ('Ꜯ', 'Ꜯ'),
- ('Ꜳ', 'Ꜳ'),
- ('Ꜵ', 'Ꜵ'),
- ('Ꜷ', 'Ꜷ'),
- ('Ꜹ', 'Ꜹ'),
- ('Ꜻ', 'Ꜻ'),
- ('Ꜽ', 'Ꜽ'),
- ('Ꜿ', 'Ꜿ'),
- ('Ꝁ', 'Ꝁ'),
- ('Ꝃ', 'Ꝃ'),
- ('Ꝅ', 'Ꝅ'),
- ('Ꝇ', 'Ꝇ'),
- ('Ꝉ', 'Ꝉ'),
- ('Ꝋ', 'Ꝋ'),
- ('Ꝍ', 'Ꝍ'),
- ('Ꝏ', 'Ꝏ'),
- ('Ꝑ', 'Ꝑ'),
- ('Ꝓ', 'Ꝓ'),
- ('Ꝕ', 'Ꝕ'),
- ('Ꝗ', 'Ꝗ'),
- ('Ꝙ', 'Ꝙ'),
- ('Ꝛ', 'Ꝛ'),
- ('Ꝝ', 'Ꝝ'),
- ('Ꝟ', 'Ꝟ'),
- ('Ꝡ', 'Ꝡ'),
- ('Ꝣ', 'Ꝣ'),
- ('Ꝥ', 'Ꝥ'),
- ('Ꝧ', 'Ꝧ'),
- ('Ꝩ', 'Ꝩ'),
- ('Ꝫ', 'Ꝫ'),
- ('Ꝭ', 'Ꝭ'),
- ('Ꝯ', 'Ꝯ'),
- ('Ꝺ', 'Ꝺ'),
- ('Ꝼ', 'Ꝼ'),
- ('Ᵹ', 'Ꝿ'),
- ('Ꞁ', 'Ꞁ'),
- ('Ꞃ', 'Ꞃ'),
- ('Ꞅ', 'Ꞅ'),
- ('Ꞇ', 'Ꞇ'),
- ('Ꞌ', 'Ꞌ'),
- ('Ɥ', 'Ɥ'),
- ('Ꞑ', 'Ꞑ'),
- ('Ꞓ', 'Ꞓ'),
- ('Ꞗ', 'Ꞗ'),
- ('Ꞙ', 'Ꞙ'),
- ('Ꞛ', 'Ꞛ'),
- ('Ꞝ', 'Ꞝ'),
- ('Ꞟ', 'Ꞟ'),
- ('Ꞡ', 'Ꞡ'),
- ('Ꞣ', 'Ꞣ'),
- ('Ꞥ', 'Ꞥ'),
- ('Ꞧ', 'Ꞧ'),
- ('Ꞩ', 'Ꞩ'),
- ('Ɦ', 'Ɪ'),
- ('Ʞ', 'Ꞵ'),
- ('Ꞷ', 'Ꞷ'),
- ('Ꞹ', 'Ꞹ'),
- ('Ꞻ', 'Ꞻ'),
- ('Ꞽ', 'Ꞽ'),
- ('Ꞿ', 'Ꞿ'),
- ('Ꟁ', 'Ꟁ'),
- ('Ꟃ', 'Ꟃ'),
- ('Ꞔ', 'Ꟈ'),
- ('Ꟊ', 'Ꟊ'),
- ('Ɤ', 'Ꟍ'),
- ('Ꟑ', 'Ꟑ'),
- ('Ꟗ', 'Ꟗ'),
- ('Ꟙ', 'Ꟙ'),
- ('Ꟛ', 'Ꟛ'),
- ('Ƛ', 'Ƛ'),
- ('Ꟶ', 'Ꟶ'),
- ('A', 'Z'),
- ('𐐀', '𐐧'),
- ('𐒰', '𐓓'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐲀', '𐲲'),
- ('𐵐', '𐵥'),
- ('𑢠', '𑢿'),
- ('𖹀', '𖹟'),
- ('𝐀', '𝐙'),
- ('𝐴', '𝑍'),
- ('𝑨', '𝒁'),
- ('𝒜', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒵'),
- ('𝓐', '𝓩'),
- ('𝔄', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔸', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕬', '𝖅'),
- ('𝖠', '𝖹'),
- ('𝗔', '𝗭'),
- ('𝘈', '𝘡'),
- ('𝘼', '𝙕'),
- ('𝙰', '𝚉'),
- ('𝚨', '𝛀'),
- ('𝛢', '𝛺'),
- ('𝜜', '𝜴'),
- ('𝝖', '𝝮'),
- ('𝞐', '𝞨'),
- ('𝟊', '𝟊'),
- ('𞤀', '𞤡'),
-];
diff --git a/vendor/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs b/vendor/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs
deleted file mode 100644
index 6a6ec2af..00000000
--- a/vendor/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs
+++ /dev/null
@@ -1,1420 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate grapheme-cluster-break ucd-16.0.0 --chars
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
- ("CR", CR),
- ("Control", CONTROL),
- ("Extend", EXTEND),
- ("L", L),
- ("LF", LF),
- ("LV", LV),
- ("LVT", LVT),
- ("Prepend", PREPEND),
- ("Regional_Indicator", REGIONAL_INDICATOR),
- ("SpacingMark", SPACINGMARK),
- ("T", T),
- ("V", V),
- ("ZWJ", ZWJ),
-];
-
-pub const CR: &'static [(char, char)] = &[('\r', '\r')];
-
-pub const CONTROL: &'static [(char, char)] = &[
- ('\0', '\t'),
- ('\u{b}', '\u{c}'),
- ('\u{e}', '\u{1f}'),
- ('\u{7f}', '\u{9f}'),
- ('\u{ad}', '\u{ad}'),
- ('\u{61c}', '\u{61c}'),
- ('\u{180e}', '\u{180e}'),
- ('\u{200b}', '\u{200b}'),
- ('\u{200e}', '\u{200f}'),
- ('\u{2028}', '\u{202e}'),
- ('\u{2060}', '\u{206f}'),
- ('\u{feff}', '\u{feff}'),
- ('\u{fff0}', '\u{fffb}'),
- ('\u{13430}', '\u{1343f}'),
- ('\u{1bca0}', '\u{1bca3}'),
- ('\u{1d173}', '\u{1d17a}'),
- ('\u{e0000}', '\u{e001f}'),
- ('\u{e0080}', '\u{e00ff}'),
- ('\u{e01f0}', '\u{e0fff}'),
-];
-
-pub const EXTEND: &'static [(char, char)] = &[
- ('\u{300}', '\u{36f}'),
- ('\u{483}', '\u{489}'),
- ('\u{591}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c5}'),
- ('\u{5c7}', '\u{5c7}'),
- ('\u{610}', '\u{61a}'),
- ('\u{64b}', '\u{65f}'),
- ('\u{670}', '\u{670}'),
- ('\u{6d6}', '\u{6dc}'),
- ('\u{6df}', '\u{6e4}'),
- ('\u{6e7}', '\u{6e8}'),
- ('\u{6ea}', '\u{6ed}'),
- ('\u{711}', '\u{711}'),
- ('\u{730}', '\u{74a}'),
- ('\u{7a6}', '\u{7b0}'),
- ('\u{7eb}', '\u{7f3}'),
- ('\u{7fd}', '\u{7fd}'),
- ('\u{816}', '\u{819}'),
- ('\u{81b}', '\u{823}'),
- ('\u{825}', '\u{827}'),
- ('\u{829}', '\u{82d}'),
- ('\u{859}', '\u{85b}'),
- ('\u{897}', '\u{89f}'),
- ('\u{8ca}', '\u{8e1}'),
- ('\u{8e3}', '\u{902}'),
- ('\u{93a}', '\u{93a}'),
- ('\u{93c}', '\u{93c}'),
- ('\u{941}', '\u{948}'),
- ('\u{94d}', '\u{94d}'),
- ('\u{951}', '\u{957}'),
- ('\u{962}', '\u{963}'),
- ('\u{981}', '\u{981}'),
- ('\u{9bc}', '\u{9bc}'),
- ('\u{9be}', '\u{9be}'),
- ('\u{9c1}', '\u{9c4}'),
- ('\u{9cd}', '\u{9cd}'),
- ('\u{9d7}', '\u{9d7}'),
- ('\u{9e2}', '\u{9e3}'),
- ('\u{9fe}', '\u{9fe}'),
- ('\u{a01}', '\u{a02}'),
- ('\u{a3c}', '\u{a3c}'),
- ('\u{a41}', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('\u{a51}', '\u{a51}'),
- ('\u{a70}', '\u{a71}'),
- ('\u{a75}', '\u{a75}'),
- ('\u{a81}', '\u{a82}'),
- ('\u{abc}', '\u{abc}'),
- ('\u{ac1}', '\u{ac5}'),
- ('\u{ac7}', '\u{ac8}'),
- ('\u{acd}', '\u{acd}'),
- ('\u{ae2}', '\u{ae3}'),
- ('\u{afa}', '\u{aff}'),
- ('\u{b01}', '\u{b01}'),
- ('\u{b3c}', '\u{b3c}'),
- ('\u{b3e}', '\u{b3f}'),
- ('\u{b41}', '\u{b44}'),
- ('\u{b4d}', '\u{b4d}'),
- ('\u{b55}', '\u{b57}'),
- ('\u{b62}', '\u{b63}'),
- ('\u{b82}', '\u{b82}'),
- ('\u{bbe}', '\u{bbe}'),
- ('\u{bc0}', '\u{bc0}'),
- ('\u{bcd}', '\u{bcd}'),
- ('\u{bd7}', '\u{bd7}'),
- ('\u{c00}', '\u{c00}'),
- ('\u{c04}', '\u{c04}'),
- ('\u{c3c}', '\u{c3c}'),
- ('\u{c3e}', '\u{c40}'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('\u{c62}', '\u{c63}'),
- ('\u{c81}', '\u{c81}'),
- ('\u{cbc}', '\u{cbc}'),
- ('\u{cbf}', '\u{cc0}'),
- ('\u{cc2}', '\u{cc2}'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccd}'),
- ('\u{cd5}', '\u{cd6}'),
- ('\u{ce2}', '\u{ce3}'),
- ('\u{d00}', '\u{d01}'),
- ('\u{d3b}', '\u{d3c}'),
- ('\u{d3e}', '\u{d3e}'),
- ('\u{d41}', '\u{d44}'),
- ('\u{d4d}', '\u{d4d}'),
- ('\u{d57}', '\u{d57}'),
- ('\u{d62}', '\u{d63}'),
- ('\u{d81}', '\u{d81}'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dcf}', '\u{dcf}'),
- ('\u{dd2}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('\u{ddf}', '\u{ddf}'),
- ('\u{e31}', '\u{e31}'),
- ('\u{e34}', '\u{e3a}'),
- ('\u{e47}', '\u{e4e}'),
- ('\u{eb1}', '\u{eb1}'),
- ('\u{eb4}', '\u{ebc}'),
- ('\u{ec8}', '\u{ece}'),
- ('\u{f18}', '\u{f19}'),
- ('\u{f35}', '\u{f35}'),
- ('\u{f37}', '\u{f37}'),
- ('\u{f39}', '\u{f39}'),
- ('\u{f71}', '\u{f7e}'),
- ('\u{f80}', '\u{f84}'),
- ('\u{f86}', '\u{f87}'),
- ('\u{f8d}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('\u{fc6}', '\u{fc6}'),
- ('\u{102d}', '\u{1030}'),
- ('\u{1032}', '\u{1037}'),
- ('\u{1039}', '\u{103a}'),
- ('\u{103d}', '\u{103e}'),
- ('\u{1058}', '\u{1059}'),
- ('\u{105e}', '\u{1060}'),
- ('\u{1071}', '\u{1074}'),
- ('\u{1082}', '\u{1082}'),
- ('\u{1085}', '\u{1086}'),
- ('\u{108d}', '\u{108d}'),
- ('\u{109d}', '\u{109d}'),
- ('\u{135d}', '\u{135f}'),
- ('\u{1712}', '\u{1715}'),
- ('\u{1732}', '\u{1734}'),
- ('\u{1752}', '\u{1753}'),
- ('\u{1772}', '\u{1773}'),
- ('\u{17b4}', '\u{17b5}'),
- ('\u{17b7}', '\u{17bd}'),
- ('\u{17c6}', '\u{17c6}'),
- ('\u{17c9}', '\u{17d3}'),
- ('\u{17dd}', '\u{17dd}'),
- ('\u{180b}', '\u{180d}'),
- ('\u{180f}', '\u{180f}'),
- ('\u{1885}', '\u{1886}'),
- ('\u{18a9}', '\u{18a9}'),
- ('\u{1920}', '\u{1922}'),
- ('\u{1927}', '\u{1928}'),
- ('\u{1932}', '\u{1932}'),
- ('\u{1939}', '\u{193b}'),
- ('\u{1a17}', '\u{1a18}'),
- ('\u{1a1b}', '\u{1a1b}'),
- ('\u{1a56}', '\u{1a56}'),
- ('\u{1a58}', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a60}'),
- ('\u{1a62}', '\u{1a62}'),
- ('\u{1a65}', '\u{1a6c}'),
- ('\u{1a73}', '\u{1a7c}'),
- ('\u{1a7f}', '\u{1a7f}'),
- ('\u{1ab0}', '\u{1ace}'),
- ('\u{1b00}', '\u{1b03}'),
- ('\u{1b34}', '\u{1b3d}'),
- ('\u{1b42}', '\u{1b44}'),
- ('\u{1b6b}', '\u{1b73}'),
- ('\u{1b80}', '\u{1b81}'),
- ('\u{1ba2}', '\u{1ba5}'),
- ('\u{1ba8}', '\u{1bad}'),
- ('\u{1be6}', '\u{1be6}'),
- ('\u{1be8}', '\u{1be9}'),
- ('\u{1bed}', '\u{1bed}'),
- ('\u{1bef}', '\u{1bf3}'),
- ('\u{1c2c}', '\u{1c33}'),
- ('\u{1c36}', '\u{1c37}'),
- ('\u{1cd0}', '\u{1cd2}'),
- ('\u{1cd4}', '\u{1ce0}'),
- ('\u{1ce2}', '\u{1ce8}'),
- ('\u{1ced}', '\u{1ced}'),
- ('\u{1cf4}', '\u{1cf4}'),
- ('\u{1cf8}', '\u{1cf9}'),
- ('\u{1dc0}', '\u{1dff}'),
- ('\u{200c}', '\u{200c}'),
- ('\u{20d0}', '\u{20f0}'),
- ('\u{2cef}', '\u{2cf1}'),
- ('\u{2d7f}', '\u{2d7f}'),
- ('\u{2de0}', '\u{2dff}'),
- ('\u{302a}', '\u{302f}'),
- ('\u{3099}', '\u{309a}'),
- ('\u{a66f}', '\u{a672}'),
- ('\u{a674}', '\u{a67d}'),
- ('\u{a69e}', '\u{a69f}'),
- ('\u{a6f0}', '\u{a6f1}'),
- ('\u{a802}', '\u{a802}'),
- ('\u{a806}', '\u{a806}'),
- ('\u{a80b}', '\u{a80b}'),
- ('\u{a825}', '\u{a826}'),
- ('\u{a82c}', '\u{a82c}'),
- ('\u{a8c4}', '\u{a8c5}'),
- ('\u{a8e0}', '\u{a8f1}'),
- ('\u{a8ff}', '\u{a8ff}'),
- ('\u{a926}', '\u{a92d}'),
- ('\u{a947}', '\u{a951}'),
- ('\u{a953}', '\u{a953}'),
- ('\u{a980}', '\u{a982}'),
- ('\u{a9b3}', '\u{a9b3}'),
- ('\u{a9b6}', '\u{a9b9}'),
- ('\u{a9bc}', '\u{a9bd}'),
- ('\u{a9c0}', '\u{a9c0}'),
- ('\u{a9e5}', '\u{a9e5}'),
- ('\u{aa29}', '\u{aa2e}'),
- ('\u{aa31}', '\u{aa32}'),
- ('\u{aa35}', '\u{aa36}'),
- ('\u{aa43}', '\u{aa43}'),
- ('\u{aa4c}', '\u{aa4c}'),
- ('\u{aa7c}', '\u{aa7c}'),
- ('\u{aab0}', '\u{aab0}'),
- ('\u{aab2}', '\u{aab4}'),
- ('\u{aab7}', '\u{aab8}'),
- ('\u{aabe}', '\u{aabf}'),
- ('\u{aac1}', '\u{aac1}'),
- ('\u{aaec}', '\u{aaed}'),
- ('\u{aaf6}', '\u{aaf6}'),
- ('\u{abe5}', '\u{abe5}'),
- ('\u{abe8}', '\u{abe8}'),
- ('\u{abed}', '\u{abed}'),
- ('\u{fb1e}', '\u{fb1e}'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{fe20}', '\u{fe2f}'),
- ('\u{ff9e}', '\u{ff9f}'),
- ('\u{101fd}', '\u{101fd}'),
- ('\u{102e0}', '\u{102e0}'),
- ('\u{10376}', '\u{1037a}'),
- ('\u{10a01}', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '\u{10a0f}'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '\u{10a3f}'),
- ('\u{10ae5}', '\u{10ae6}'),
- ('\u{10d24}', '\u{10d27}'),
- ('\u{10d69}', '\u{10d6d}'),
- ('\u{10eab}', '\u{10eac}'),
- ('\u{10efc}', '\u{10eff}'),
- ('\u{10f46}', '\u{10f50}'),
- ('\u{10f82}', '\u{10f85}'),
- ('\u{11001}', '\u{11001}'),
- ('\u{11038}', '\u{11046}'),
- ('\u{11070}', '\u{11070}'),
- ('\u{11073}', '\u{11074}'),
- ('\u{1107f}', '\u{11081}'),
- ('\u{110b3}', '\u{110b6}'),
- ('\u{110b9}', '\u{110ba}'),
- ('\u{110c2}', '\u{110c2}'),
- ('\u{11100}', '\u{11102}'),
- ('\u{11127}', '\u{1112b}'),
- ('\u{1112d}', '\u{11134}'),
- ('\u{11173}', '\u{11173}'),
- ('\u{11180}', '\u{11181}'),
- ('\u{111b6}', '\u{111be}'),
- ('\u{111c0}', '\u{111c0}'),
- ('\u{111c9}', '\u{111cc}'),
- ('\u{111cf}', '\u{111cf}'),
- ('\u{1122f}', '\u{11231}'),
- ('\u{11234}', '\u{11237}'),
- ('\u{1123e}', '\u{1123e}'),
- ('\u{11241}', '\u{11241}'),
- ('\u{112df}', '\u{112df}'),
- ('\u{112e3}', '\u{112ea}'),
- ('\u{11300}', '\u{11301}'),
- ('\u{1133b}', '\u{1133c}'),
- ('\u{1133e}', '\u{1133e}'),
- ('\u{11340}', '\u{11340}'),
- ('\u{1134d}', '\u{1134d}'),
- ('\u{11357}', '\u{11357}'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('\u{113b8}', '\u{113b8}'),
- ('\u{113bb}', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '\u{113c9}'),
- ('\u{113ce}', '\u{113d0}'),
- ('\u{113d2}', '\u{113d2}'),
- ('\u{113e1}', '\u{113e2}'),
- ('\u{11438}', '\u{1143f}'),
- ('\u{11442}', '\u{11444}'),
- ('\u{11446}', '\u{11446}'),
- ('\u{1145e}', '\u{1145e}'),
- ('\u{114b0}', '\u{114b0}'),
- ('\u{114b3}', '\u{114b8}'),
- ('\u{114ba}', '\u{114ba}'),
- ('\u{114bd}', '\u{114bd}'),
- ('\u{114bf}', '\u{114c0}'),
- ('\u{114c2}', '\u{114c3}'),
- ('\u{115af}', '\u{115af}'),
- ('\u{115b2}', '\u{115b5}'),
- ('\u{115bc}', '\u{115bd}'),
- ('\u{115bf}', '\u{115c0}'),
- ('\u{115dc}', '\u{115dd}'),
- ('\u{11633}', '\u{1163a}'),
- ('\u{1163d}', '\u{1163d}'),
- ('\u{1163f}', '\u{11640}'),
- ('\u{116ab}', '\u{116ab}'),
- ('\u{116ad}', '\u{116ad}'),
- ('\u{116b0}', '\u{116b7}'),
- ('\u{1171d}', '\u{1171d}'),
- ('\u{1171f}', '\u{1171f}'),
- ('\u{11722}', '\u{11725}'),
- ('\u{11727}', '\u{1172b}'),
- ('\u{1182f}', '\u{11837}'),
- ('\u{11839}', '\u{1183a}'),
- ('\u{11930}', '\u{11930}'),
- ('\u{1193b}', '\u{1193e}'),
- ('\u{11943}', '\u{11943}'),
- ('\u{119d4}', '\u{119d7}'),
- ('\u{119da}', '\u{119db}'),
- ('\u{119e0}', '\u{119e0}'),
- ('\u{11a01}', '\u{11a0a}'),
- ('\u{11a33}', '\u{11a38}'),
- ('\u{11a3b}', '\u{11a3e}'),
- ('\u{11a47}', '\u{11a47}'),
- ('\u{11a51}', '\u{11a56}'),
- ('\u{11a59}', '\u{11a5b}'),
- ('\u{11a8a}', '\u{11a96}'),
- ('\u{11a98}', '\u{11a99}'),
- ('\u{11c30}', '\u{11c36}'),
- ('\u{11c38}', '\u{11c3d}'),
- ('\u{11c3f}', '\u{11c3f}'),
- ('\u{11c92}', '\u{11ca7}'),
- ('\u{11caa}', '\u{11cb0}'),
- ('\u{11cb2}', '\u{11cb3}'),
- ('\u{11cb5}', '\u{11cb6}'),
- ('\u{11d31}', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d45}'),
- ('\u{11d47}', '\u{11d47}'),
- ('\u{11d90}', '\u{11d91}'),
- ('\u{11d95}', '\u{11d95}'),
- ('\u{11d97}', '\u{11d97}'),
- ('\u{11ef3}', '\u{11ef4}'),
- ('\u{11f00}', '\u{11f01}'),
- ('\u{11f36}', '\u{11f3a}'),
- ('\u{11f40}', '\u{11f42}'),
- ('\u{11f5a}', '\u{11f5a}'),
- ('\u{13440}', '\u{13440}'),
- ('\u{13447}', '\u{13455}'),
- ('\u{1611e}', '\u{16129}'),
- ('\u{1612d}', '\u{1612f}'),
- ('\u{16af0}', '\u{16af4}'),
- ('\u{16b30}', '\u{16b36}'),
- ('\u{16f4f}', '\u{16f4f}'),
- ('\u{16f8f}', '\u{16f92}'),
- ('\u{16fe4}', '\u{16fe4}'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('\u{1bc9d}', '\u{1bc9e}'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d165}', '\u{1d169}'),
- ('\u{1d16d}', '\u{1d172}'),
- ('\u{1d17b}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('\u{1d242}', '\u{1d244}'),
- ('\u{1da00}', '\u{1da36}'),
- ('\u{1da3b}', '\u{1da6c}'),
- ('\u{1da75}', '\u{1da75}'),
- ('\u{1da84}', '\u{1da84}'),
- ('\u{1da9b}', '\u{1da9f}'),
- ('\u{1daa1}', '\u{1daaf}'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('\u{1e130}', '\u{1e136}'),
- ('\u{1e2ae}', '\u{1e2ae}'),
- ('\u{1e2ec}', '\u{1e2ef}'),
- ('\u{1e4ec}', '\u{1e4ef}'),
- ('\u{1e5ee}', '\u{1e5ef}'),
- ('\u{1e8d0}', '\u{1e8d6}'),
- ('\u{1e944}', '\u{1e94a}'),
- ('🏻', '🏿'),
- ('\u{e0020}', '\u{e007f}'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const L: &'static [(char, char)] = &[('ᄀ', 'ᅟ'), ('ꥠ', 'ꥼ')];
-
-pub const LF: &'static [(char, char)] = &[('\n', '\n')];
-
-pub const LV: &'static [(char, char)] = &[
- ('가', '가'),
- ('개', '개'),
- ('갸', '갸'),
- ('걔', '걔'),
- ('거', '거'),
- ('게', '게'),
- ('겨', '겨'),
- ('계', '계'),
- ('고', '고'),
- ('과', '과'),
- ('괘', '괘'),
- ('괴', '괴'),
- ('교', '교'),
- ('구', '구'),
- ('궈', '궈'),
- ('궤', '궤'),
- ('귀', '귀'),
- ('규', '규'),
- ('그', '그'),
- ('긔', '긔'),
- ('기', '기'),
- ('까', '까'),
- ('깨', '깨'),
- ('꺄', '꺄'),
- ('꺠', '꺠'),
- ('꺼', '꺼'),
- ('께', '께'),
- ('껴', '껴'),
- ('꼐', '꼐'),
- ('꼬', '꼬'),
- ('꽈', '꽈'),
- ('꽤', '꽤'),
- ('꾀', '꾀'),
- ('꾜', '꾜'),
- ('꾸', '꾸'),
- ('꿔', '꿔'),
- ('꿰', '꿰'),
- ('뀌', '뀌'),
- ('뀨', '뀨'),
- ('끄', '끄'),
- ('끠', '끠'),
- ('끼', '끼'),
- ('나', '나'),
- ('내', '내'),
- ('냐', '냐'),
- ('냬', '냬'),
- ('너', '너'),
- ('네', '네'),
- ('녀', '녀'),
- ('녜', '녜'),
- ('노', '노'),
- ('놔', '놔'),
- ('놰', '놰'),
- ('뇌', '뇌'),
- ('뇨', '뇨'),
- ('누', '누'),
- ('눠', '눠'),
- ('눼', '눼'),
- ('뉘', '뉘'),
- ('뉴', '뉴'),
- ('느', '느'),
- ('늬', '늬'),
- ('니', '니'),
- ('다', '다'),
- ('대', '대'),
- ('댜', '댜'),
- ('댸', '댸'),
- ('더', '더'),
- ('데', '데'),
- ('뎌', '뎌'),
- ('뎨', '뎨'),
- ('도', '도'),
- ('돠', '돠'),
- ('돼', '돼'),
- ('되', '되'),
- ('됴', '됴'),
- ('두', '두'),
- ('둬', '둬'),
- ('뒈', '뒈'),
- ('뒤', '뒤'),
- ('듀', '듀'),
- ('드', '드'),
- ('듸', '듸'),
- ('디', '디'),
- ('따', '따'),
- ('때', '때'),
- ('땨', '땨'),
- ('떄', '떄'),
- ('떠', '떠'),
- ('떼', '떼'),
- ('뗘', '뗘'),
- ('뗴', '뗴'),
- ('또', '또'),
- ('똬', '똬'),
- ('뙈', '뙈'),
- ('뙤', '뙤'),
- ('뚀', '뚀'),
- ('뚜', '뚜'),
- ('뚸', '뚸'),
- ('뛔', '뛔'),
- ('뛰', '뛰'),
- ('뜌', '뜌'),
- ('뜨', '뜨'),
- ('띄', '띄'),
- ('띠', '띠'),
- ('라', '라'),
- ('래', '래'),
- ('랴', '랴'),
- ('럐', '럐'),
- ('러', '러'),
- ('레', '레'),
- ('려', '려'),
- ('례', '례'),
- ('로', '로'),
- ('롸', '롸'),
- ('뢔', '뢔'),
- ('뢰', '뢰'),
- ('료', '료'),
- ('루', '루'),
- ('뤄', '뤄'),
- ('뤠', '뤠'),
- ('뤼', '뤼'),
- ('류', '류'),
- ('르', '르'),
- ('릐', '릐'),
- ('리', '리'),
- ('마', '마'),
- ('매', '매'),
- ('먀', '먀'),
- ('먜', '먜'),
- ('머', '머'),
- ('메', '메'),
- ('며', '며'),
- ('몌', '몌'),
- ('모', '모'),
- ('뫄', '뫄'),
- ('뫠', '뫠'),
- ('뫼', '뫼'),
- ('묘', '묘'),
- ('무', '무'),
- ('뭐', '뭐'),
- ('뭬', '뭬'),
- ('뮈', '뮈'),
- ('뮤', '뮤'),
- ('므', '므'),
- ('믜', '믜'),
- ('미', '미'),
- ('바', '바'),
- ('배', '배'),
- ('뱌', '뱌'),
- ('뱨', '뱨'),
- ('버', '버'),
- ('베', '베'),
- ('벼', '벼'),
- ('볘', '볘'),
- ('보', '보'),
- ('봐', '봐'),
- ('봬', '봬'),
- ('뵈', '뵈'),
- ('뵤', '뵤'),
- ('부', '부'),
- ('붜', '붜'),
- ('붸', '붸'),
- ('뷔', '뷔'),
- ('뷰', '뷰'),
- ('브', '브'),
- ('븨', '븨'),
- ('비', '비'),
- ('빠', '빠'),
- ('빼', '빼'),
- ('뺘', '뺘'),
- ('뺴', '뺴'),
- ('뻐', '뻐'),
- ('뻬', '뻬'),
- ('뼈', '뼈'),
- ('뼤', '뼤'),
- ('뽀', '뽀'),
- ('뽜', '뽜'),
- ('뽸', '뽸'),
- ('뾔', '뾔'),
- ('뾰', '뾰'),
- ('뿌', '뿌'),
- ('뿨', '뿨'),
- ('쀄', '쀄'),
- ('쀠', '쀠'),
- ('쀼', '쀼'),
- ('쁘', '쁘'),
- ('쁴', '쁴'),
- ('삐', '삐'),
- ('사', '사'),
- ('새', '새'),
- ('샤', '샤'),
- ('섀', '섀'),
- ('서', '서'),
- ('세', '세'),
- ('셔', '셔'),
- ('셰', '셰'),
- ('소', '소'),
- ('솨', '솨'),
- ('쇄', '쇄'),
- ('쇠', '쇠'),
- ('쇼', '쇼'),
- ('수', '수'),
- ('숴', '숴'),
- ('쉐', '쉐'),
- ('쉬', '쉬'),
- ('슈', '슈'),
- ('스', '스'),
- ('싀', '싀'),
- ('시', '시'),
- ('싸', '싸'),
- ('쌔', '쌔'),
- ('쌰', '쌰'),
- ('썌', '썌'),
- ('써', '써'),
- ('쎄', '쎄'),
- ('쎠', '쎠'),
- ('쎼', '쎼'),
- ('쏘', '쏘'),
- ('쏴', '쏴'),
- ('쐐', '쐐'),
- ('쐬', '쐬'),
- ('쑈', '쑈'),
- ('쑤', '쑤'),
- ('쒀', '쒀'),
- ('쒜', '쒜'),
- ('쒸', '쒸'),
- ('쓔', '쓔'),
- ('쓰', '쓰'),
- ('씌', '씌'),
- ('씨', '씨'),
- ('아', '아'),
- ('애', '애'),
- ('야', '야'),
- ('얘', '얘'),
- ('어', '어'),
- ('에', '에'),
- ('여', '여'),
- ('예', '예'),
- ('오', '오'),
- ('와', '와'),
- ('왜', '왜'),
- ('외', '외'),
- ('요', '요'),
- ('우', '우'),
- ('워', '워'),
- ('웨', '웨'),
- ('위', '위'),
- ('유', '유'),
- ('으', '으'),
- ('의', '의'),
- ('이', '이'),
- ('자', '자'),
- ('재', '재'),
- ('쟈', '쟈'),
- ('쟤', '쟤'),
- ('저', '저'),
- ('제', '제'),
- ('져', '져'),
- ('졔', '졔'),
- ('조', '조'),
- ('좌', '좌'),
- ('좨', '좨'),
- ('죄', '죄'),
- ('죠', '죠'),
- ('주', '주'),
- ('줘', '줘'),
- ('줴', '줴'),
- ('쥐', '쥐'),
- ('쥬', '쥬'),
- ('즈', '즈'),
- ('즤', '즤'),
- ('지', '지'),
- ('짜', '짜'),
- ('째', '째'),
- ('쨔', '쨔'),
- ('쨰', '쨰'),
- ('쩌', '쩌'),
- ('쩨', '쩨'),
- ('쪄', '쪄'),
- ('쪠', '쪠'),
- ('쪼', '쪼'),
- ('쫘', '쫘'),
- ('쫴', '쫴'),
- ('쬐', '쬐'),
- ('쬬', '쬬'),
- ('쭈', '쭈'),
- ('쭤', '쭤'),
- ('쮀', '쮀'),
- ('쮜', '쮜'),
- ('쮸', '쮸'),
- ('쯔', '쯔'),
- ('쯰', '쯰'),
- ('찌', '찌'),
- ('차', '차'),
- ('채', '채'),
- ('챠', '챠'),
- ('챼', '챼'),
- ('처', '처'),
- ('체', '체'),
- ('쳐', '쳐'),
- ('쳬', '쳬'),
- ('초', '초'),
- ('촤', '촤'),
- ('쵀', '쵀'),
- ('최', '최'),
- ('쵸', '쵸'),
- ('추', '추'),
- ('춰', '춰'),
- ('췌', '췌'),
- ('취', '취'),
- ('츄', '츄'),
- ('츠', '츠'),
- ('츼', '츼'),
- ('치', '치'),
- ('카', '카'),
- ('캐', '캐'),
- ('캬', '캬'),
- ('컈', '컈'),
- ('커', '커'),
- ('케', '케'),
- ('켜', '켜'),
- ('켸', '켸'),
- ('코', '코'),
- ('콰', '콰'),
- ('쾌', '쾌'),
- ('쾨', '쾨'),
- ('쿄', '쿄'),
- ('쿠', '쿠'),
- ('쿼', '쿼'),
- ('퀘', '퀘'),
- ('퀴', '퀴'),
- ('큐', '큐'),
- ('크', '크'),
- ('킈', '킈'),
- ('키', '키'),
- ('타', '타'),
- ('태', '태'),
- ('탸', '탸'),
- ('턔', '턔'),
- ('터', '터'),
- ('테', '테'),
- ('텨', '텨'),
- ('톄', '톄'),
- ('토', '토'),
- ('톼', '톼'),
- ('퇘', '퇘'),
- ('퇴', '퇴'),
- ('툐', '툐'),
- ('투', '투'),
- ('퉈', '퉈'),
- ('퉤', '퉤'),
- ('튀', '튀'),
- ('튜', '튜'),
- ('트', '트'),
- ('틔', '틔'),
- ('티', '티'),
- ('파', '파'),
- ('패', '패'),
- ('퍄', '퍄'),
- ('퍠', '퍠'),
- ('퍼', '퍼'),
- ('페', '페'),
- ('펴', '펴'),
- ('폐', '폐'),
- ('포', '포'),
- ('퐈', '퐈'),
- ('퐤', '퐤'),
- ('푀', '푀'),
- ('표', '표'),
- ('푸', '푸'),
- ('풔', '풔'),
- ('풰', '풰'),
- ('퓌', '퓌'),
- ('퓨', '퓨'),
- ('프', '프'),
- ('픠', '픠'),
- ('피', '피'),
- ('하', '하'),
- ('해', '해'),
- ('햐', '햐'),
- ('햬', '햬'),
- ('허', '허'),
- ('헤', '헤'),
- ('혀', '혀'),
- ('혜', '혜'),
- ('호', '호'),
- ('화', '화'),
- ('홰', '홰'),
- ('회', '회'),
- ('효', '효'),
- ('후', '후'),
- ('훠', '훠'),
- ('훼', '훼'),
- ('휘', '휘'),
- ('휴', '휴'),
- ('흐', '흐'),
- ('희', '희'),
- ('히', '히'),
-];
-
-pub const LVT: &'static [(char, char)] = &[
- ('각', '갛'),
- ('객', '갷'),
- ('갹', '걓'),
- ('걕', '걯'),
- ('걱', '겋'),
- ('겍', '겧'),
- ('격', '곃'),
- ('곅', '곟'),
- ('곡', '곻'),
- ('곽', '괗'),
- ('괙', '괳'),
- ('괵', '굏'),
- ('굑', '굫'),
- ('국', '궇'),
- ('궉', '궣'),
- ('궥', '궿'),
- ('귁', '귛'),
- ('귝', '귷'),
- ('극', '긓'),
- ('긕', '긯'),
- ('긱', '깋'),
- ('깍', '깧'),
- ('깩', '꺃'),
- ('꺅', '꺟'),
- ('꺡', '꺻'),
- ('꺽', '껗'),
- ('껙', '껳'),
- ('껵', '꼏'),
- ('꼑', '꼫'),
- ('꼭', '꽇'),
- ('꽉', '꽣'),
- ('꽥', '꽿'),
- ('꾁', '꾛'),
- ('꾝', '꾷'),
- ('꾹', '꿓'),
- ('꿕', '꿯'),
- ('꿱', '뀋'),
- ('뀍', '뀧'),
- ('뀩', '끃'),
- ('끅', '끟'),
- ('끡', '끻'),
- ('끽', '낗'),
- ('낙', '낳'),
- ('낵', '냏'),
- ('냑', '냫'),
- ('냭', '넇'),
- ('넉', '넣'),
- ('넥', '넿'),
- ('녁', '녛'),
- ('녝', '녷'),
- ('녹', '놓'),
- ('놕', '놯'),
- ('놱', '뇋'),
- ('뇍', '뇧'),
- ('뇩', '눃'),
- ('눅', '눟'),
- ('눡', '눻'),
- ('눽', '뉗'),
- ('뉙', '뉳'),
- ('뉵', '늏'),
- ('늑', '늫'),
- ('늭', '닇'),
- ('닉', '닣'),
- ('닥', '닿'),
- ('댁', '댛'),
- ('댝', '댷'),
- ('댹', '덓'),
- ('덕', '덯'),
- ('덱', '뎋'),
- ('뎍', '뎧'),
- ('뎩', '돃'),
- ('독', '돟'),
- ('돡', '돻'),
- ('돽', '됗'),
- ('됙', '됳'),
- ('됵', '둏'),
- ('둑', '둫'),
- ('둭', '뒇'),
- ('뒉', '뒣'),
- ('뒥', '뒿'),
- ('듁', '듛'),
- ('득', '듷'),
- ('듹', '딓'),
- ('딕', '딯'),
- ('딱', '땋'),
- ('땍', '땧'),
- ('땩', '떃'),
- ('떅', '떟'),
- ('떡', '떻'),
- ('떽', '뗗'),
- ('뗙', '뗳'),
- ('뗵', '똏'),
- ('똑', '똫'),
- ('똭', '뙇'),
- ('뙉', '뙣'),
- ('뙥', '뙿'),
- ('뚁', '뚛'),
- ('뚝', '뚷'),
- ('뚹', '뛓'),
- ('뛕', '뛯'),
- ('뛱', '뜋'),
- ('뜍', '뜧'),
- ('뜩', '띃'),
- ('띅', '띟'),
- ('띡', '띻'),
- ('락', '랗'),
- ('랙', '랳'),
- ('략', '럏'),
- ('럑', '럫'),
- ('럭', '렇'),
- ('렉', '렣'),
- ('력', '렿'),
- ('롁', '롛'),
- ('록', '롷'),
- ('롹', '뢓'),
- ('뢕', '뢯'),
- ('뢱', '룋'),
- ('룍', '룧'),
- ('룩', '뤃'),
- ('뤅', '뤟'),
- ('뤡', '뤻'),
- ('뤽', '륗'),
- ('륙', '륳'),
- ('륵', '릏'),
- ('릑', '릫'),
- ('릭', '맇'),
- ('막', '맣'),
- ('맥', '맿'),
- ('먁', '먛'),
- ('먝', '먷'),
- ('먹', '멓'),
- ('멕', '멯'),
- ('멱', '몋'),
- ('몍', '몧'),
- ('목', '뫃'),
- ('뫅', '뫟'),
- ('뫡', '뫻'),
- ('뫽', '묗'),
- ('묙', '묳'),
- ('묵', '뭏'),
- ('뭑', '뭫'),
- ('뭭', '뮇'),
- ('뮉', '뮣'),
- ('뮥', '뮿'),
- ('믁', '믛'),
- ('믝', '믷'),
- ('믹', '밓'),
- ('박', '밯'),
- ('백', '뱋'),
- ('뱍', '뱧'),
- ('뱩', '벃'),
- ('벅', '벟'),
- ('벡', '벻'),
- ('벽', '볗'),
- ('볙', '볳'),
- ('복', '봏'),
- ('봑', '봫'),
- ('봭', '뵇'),
- ('뵉', '뵣'),
- ('뵥', '뵿'),
- ('북', '붛'),
- ('붝', '붷'),
- ('붹', '뷓'),
- ('뷕', '뷯'),
- ('뷱', '븋'),
- ('븍', '븧'),
- ('븩', '빃'),
- ('빅', '빟'),
- ('빡', '빻'),
- ('빽', '뺗'),
- ('뺙', '뺳'),
- ('뺵', '뻏'),
- ('뻑', '뻫'),
- ('뻭', '뼇'),
- ('뼉', '뼣'),
- ('뼥', '뼿'),
- ('뽁', '뽛'),
- ('뽝', '뽷'),
- ('뽹', '뾓'),
- ('뾕', '뾯'),
- ('뾱', '뿋'),
- ('뿍', '뿧'),
- ('뿩', '쀃'),
- ('쀅', '쀟'),
- ('쀡', '쀻'),
- ('쀽', '쁗'),
- ('쁙', '쁳'),
- ('쁵', '삏'),
- ('삑', '삫'),
- ('삭', '샇'),
- ('색', '샣'),
- ('샥', '샿'),
- ('섁', '섛'),
- ('석', '섷'),
- ('섹', '셓'),
- ('셕', '셯'),
- ('셱', '솋'),
- ('속', '솧'),
- ('솩', '쇃'),
- ('쇅', '쇟'),
- ('쇡', '쇻'),
- ('쇽', '숗'),
- ('숙', '숳'),
- ('숵', '쉏'),
- ('쉑', '쉫'),
- ('쉭', '슇'),
- ('슉', '슣'),
- ('슥', '슿'),
- ('싁', '싛'),
- ('식', '싷'),
- ('싹', '쌓'),
- ('쌕', '쌯'),
- ('쌱', '썋'),
- ('썍', '썧'),
- ('썩', '쎃'),
- ('쎅', '쎟'),
- ('쎡', '쎻'),
- ('쎽', '쏗'),
- ('쏙', '쏳'),
- ('쏵', '쐏'),
- ('쐑', '쐫'),
- ('쐭', '쑇'),
- ('쑉', '쑣'),
- ('쑥', '쑿'),
- ('쒁', '쒛'),
- ('쒝', '쒷'),
- ('쒹', '쓓'),
- ('쓕', '쓯'),
- ('쓱', '씋'),
- ('씍', '씧'),
- ('씩', '앃'),
- ('악', '앟'),
- ('액', '앻'),
- ('약', '얗'),
- ('얙', '얳'),
- ('억', '엏'),
- ('엑', '엫'),
- ('역', '옇'),
- ('옉', '옣'),
- ('옥', '옿'),
- ('왁', '왛'),
- ('왝', '왷'),
- ('왹', '욓'),
- ('욕', '욯'),
- ('욱', '웋'),
- ('웍', '웧'),
- ('웩', '윃'),
- ('윅', '윟'),
- ('육', '윻'),
- ('윽', '읗'),
- ('읙', '읳'),
- ('익', '잏'),
- ('작', '잫'),
- ('잭', '쟇'),
- ('쟉', '쟣'),
- ('쟥', '쟿'),
- ('적', '젛'),
- ('젝', '젷'),
- ('젹', '졓'),
- ('졕', '졯'),
- ('족', '좋'),
- ('좍', '좧'),
- ('좩', '죃'),
- ('죅', '죟'),
- ('죡', '죻'),
- ('죽', '줗'),
- ('줙', '줳'),
- ('줵', '쥏'),
- ('쥑', '쥫'),
- ('쥭', '즇'),
- ('즉', '즣'),
- ('즥', '즿'),
- ('직', '짛'),
- ('짝', '짷'),
- ('짹', '쨓'),
- ('쨕', '쨯'),
- ('쨱', '쩋'),
- ('쩍', '쩧'),
- ('쩩', '쪃'),
- ('쪅', '쪟'),
- ('쪡', '쪻'),
- ('쪽', '쫗'),
- ('쫙', '쫳'),
- ('쫵', '쬏'),
- ('쬑', '쬫'),
- ('쬭', '쭇'),
- ('쭉', '쭣'),
- ('쭥', '쭿'),
- ('쮁', '쮛'),
- ('쮝', '쮷'),
- ('쮹', '쯓'),
- ('쯕', '쯯'),
- ('쯱', '찋'),
- ('찍', '찧'),
- ('착', '챃'),
- ('책', '챟'),
- ('챡', '챻'),
- ('챽', '첗'),
- ('척', '첳'),
- ('첵', '쳏'),
- ('쳑', '쳫'),
- ('쳭', '촇'),
- ('촉', '촣'),
- ('촥', '촿'),
- ('쵁', '쵛'),
- ('쵝', '쵷'),
- ('쵹', '춓'),
- ('축', '춯'),
- ('춱', '췋'),
- ('췍', '췧'),
- ('췩', '츃'),
- ('츅', '츟'),
- ('측', '츻'),
- ('츽', '칗'),
- ('칙', '칳'),
- ('칵', '캏'),
- ('캑', '캫'),
- ('캭', '컇'),
- ('컉', '컣'),
- ('컥', '컿'),
- ('켁', '켛'),
- ('켝', '켷'),
- ('켹', '콓'),
- ('콕', '콯'),
- ('콱', '쾋'),
- ('쾍', '쾧'),
- ('쾩', '쿃'),
- ('쿅', '쿟'),
- ('쿡', '쿻'),
- ('쿽', '퀗'),
- ('퀙', '퀳'),
- ('퀵', '큏'),
- ('큑', '큫'),
- ('큭', '킇'),
- ('킉', '킣'),
- ('킥', '킿'),
- ('탁', '탛'),
- ('택', '탷'),
- ('탹', '턓'),
- ('턕', '턯'),
- ('턱', '텋'),
- ('텍', '텧'),
- ('텩', '톃'),
- ('톅', '톟'),
- ('톡', '톻'),
- ('톽', '퇗'),
- ('퇙', '퇳'),
- ('퇵', '툏'),
- ('툑', '툫'),
- ('툭', '퉇'),
- ('퉉', '퉣'),
- ('퉥', '퉿'),
- ('튁', '튛'),
- ('튝', '튷'),
- ('특', '틓'),
- ('틕', '틯'),
- ('틱', '팋'),
- ('팍', '팧'),
- ('팩', '퍃'),
- ('퍅', '퍟'),
- ('퍡', '퍻'),
- ('퍽', '펗'),
- ('펙', '펳'),
- ('펵', '폏'),
- ('폑', '폫'),
- ('폭', '퐇'),
- ('퐉', '퐣'),
- ('퐥', '퐿'),
- ('푁', '푛'),
- ('푝', '푷'),
- ('푹', '풓'),
- ('풕', '풯'),
- ('풱', '퓋'),
- ('퓍', '퓧'),
- ('퓩', '픃'),
- ('픅', '픟'),
- ('픡', '픻'),
- ('픽', '핗'),
- ('학', '핳'),
- ('핵', '햏'),
- ('햑', '햫'),
- ('햭', '헇'),
- ('헉', '헣'),
- ('헥', '헿'),
- ('혁', '혛'),
- ('혝', '혷'),
- ('혹', '홓'),
- ('확', '홯'),
- ('홱', '횋'),
- ('획', '횧'),
- ('횩', '훃'),
- ('훅', '훟'),
- ('훡', '훻'),
- ('훽', '휗'),
- ('휙', '휳'),
- ('휵', '흏'),
- ('흑', '흫'),
- ('흭', '힇'),
- ('힉', '힣'),
-];
-
-pub const PREPEND: &'static [(char, char)] = &[
- ('\u{600}', '\u{605}'),
- ('\u{6dd}', '\u{6dd}'),
- ('\u{70f}', '\u{70f}'),
- ('\u{890}', '\u{891}'),
- ('\u{8e2}', '\u{8e2}'),
- ('ൎ', 'ൎ'),
- ('\u{110bd}', '\u{110bd}'),
- ('\u{110cd}', '\u{110cd}'),
- ('𑇂', '𑇃'),
- ('𑏑', '𑏑'),
- ('𑤿', '𑤿'),
- ('𑥁', '𑥁'),
- ('𑨺', '𑨺'),
- ('𑪄', '𑪉'),
- ('𑵆', '𑵆'),
- ('𑼂', '𑼂'),
-];
-
-pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('🇦', '🇿')];
-
-pub const SPACINGMARK: &'static [(char, char)] = &[
- ('ः', 'ः'),
- ('ऻ', 'ऻ'),
- ('ा', 'ी'),
- ('ॉ', 'ौ'),
- ('ॎ', 'ॏ'),
- ('ং', 'ঃ'),
- ('ি', 'ী'),
- ('ে', 'ৈ'),
- ('ো', 'ৌ'),
- ('ਃ', 'ਃ'),
- ('ਾ', 'ੀ'),
- ('ઃ', 'ઃ'),
- ('ા', 'ી'),
- ('ૉ', 'ૉ'),
- ('ો', 'ૌ'),
- ('ଂ', 'ଃ'),
- ('ୀ', 'ୀ'),
- ('େ', 'ୈ'),
- ('ୋ', 'ୌ'),
- ('ி', 'ி'),
- ('ு', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', 'ௌ'),
- ('ఁ', 'ః'),
- ('ు', 'ౄ'),
- ('ಂ', 'ಃ'),
- ('ಾ', 'ಾ'),
- ('ು', 'ು'),
- ('ೃ', 'ೄ'),
- ('ೳ', 'ೳ'),
- ('ം', 'ഃ'),
- ('ി', 'ീ'),
- ('െ', 'ൈ'),
- ('ൊ', 'ൌ'),
- ('ං', 'ඃ'),
- ('ැ', 'ෑ'),
- ('ෘ', 'ෞ'),
- ('ෲ', 'ෳ'),
- ('ำ', 'ำ'),
- ('ຳ', 'ຳ'),
- ('༾', '༿'),
- ('ཿ', 'ཿ'),
- ('ေ', 'ေ'),
- ('ျ', 'ြ'),
- ('ၖ', 'ၗ'),
- ('ႄ', 'ႄ'),
- ('ា', 'ា'),
- ('ើ', 'ៅ'),
- ('ះ', 'ៈ'),
- ('ᤣ', 'ᤦ'),
- ('ᤩ', 'ᤫ'),
- ('ᤰ', 'ᤱ'),
- ('ᤳ', 'ᤸ'),
- ('ᨙ', 'ᨚ'),
- ('ᩕ', 'ᩕ'),
- ('ᩗ', 'ᩗ'),
- ('ᩭ', 'ᩲ'),
- ('ᬄ', 'ᬄ'),
- ('ᬾ', 'ᭁ'),
- ('ᮂ', 'ᮂ'),
- ('ᮡ', 'ᮡ'),
- ('ᮦ', 'ᮧ'),
- ('ᯧ', 'ᯧ'),
- ('ᯪ', 'ᯬ'),
- ('ᯮ', 'ᯮ'),
- ('ᰤ', 'ᰫ'),
- ('ᰴ', 'ᰵ'),
- ('᳡', '᳡'),
- ('᳷', '᳷'),
- ('ꠣ', 'ꠤ'),
- ('ꠧ', 'ꠧ'),
- ('ꢀ', 'ꢁ'),
- ('ꢴ', 'ꣃ'),
- ('ꥒ', 'ꥒ'),
- ('ꦃ', 'ꦃ'),
- ('ꦴ', 'ꦵ'),
- ('ꦺ', 'ꦻ'),
- ('ꦾ', 'ꦿ'),
- ('ꨯ', 'ꨰ'),
- ('ꨳ', 'ꨴ'),
- ('ꩍ', 'ꩍ'),
- ('ꫫ', 'ꫫ'),
- ('ꫮ', 'ꫯ'),
- ('ꫵ', 'ꫵ'),
- ('ꯣ', 'ꯤ'),
- ('ꯦ', 'ꯧ'),
- ('ꯩ', 'ꯪ'),
- ('꯬', '꯬'),
- ('𑀀', '𑀀'),
- ('𑀂', '𑀂'),
- ('𑂂', '𑂂'),
- ('𑂰', '𑂲'),
- ('𑂷', '𑂸'),
- ('𑄬', '𑄬'),
- ('𑅅', '𑅆'),
- ('𑆂', '𑆂'),
- ('𑆳', '𑆵'),
- ('𑆿', '𑆿'),
- ('𑇎', '𑇎'),
- ('𑈬', '𑈮'),
- ('𑈲', '𑈳'),
- ('𑋠', '𑋢'),
- ('𑌂', '𑌃'),
- ('𑌿', '𑌿'),
- ('𑍁', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '𑍌'),
- ('𑍢', '𑍣'),
- ('𑎹', '𑎺'),
- ('𑏊', '𑏊'),
- ('𑏌', '𑏍'),
- ('𑐵', '𑐷'),
- ('𑑀', '𑑁'),
- ('𑑅', '𑑅'),
- ('𑒱', '𑒲'),
- ('𑒹', '𑒹'),
- ('𑒻', '𑒼'),
- ('𑒾', '𑒾'),
- ('𑓁', '𑓁'),
- ('𑖰', '𑖱'),
- ('𑖸', '𑖻'),
- ('𑖾', '𑖾'),
- ('𑘰', '𑘲'),
- ('𑘻', '𑘼'),
- ('𑘾', '𑘾'),
- ('𑚬', '𑚬'),
- ('𑚮', '𑚯'),
- ('𑜞', '𑜞'),
- ('𑜦', '𑜦'),
- ('𑠬', '𑠮'),
- ('𑠸', '𑠸'),
- ('𑤱', '𑤵'),
- ('𑤷', '𑤸'),
- ('𑥀', '𑥀'),
- ('𑥂', '𑥂'),
- ('𑧑', '𑧓'),
- ('𑧜', '𑧟'),
- ('𑧤', '𑧤'),
- ('𑨹', '𑨹'),
- ('𑩗', '𑩘'),
- ('𑪗', '𑪗'),
- ('𑰯', '𑰯'),
- ('𑰾', '𑰾'),
- ('𑲩', '𑲩'),
- ('𑲱', '𑲱'),
- ('𑲴', '𑲴'),
- ('𑶊', '𑶎'),
- ('𑶓', '𑶔'),
- ('𑶖', '𑶖'),
- ('𑻵', '𑻶'),
- ('𑼃', '𑼃'),
- ('𑼴', '𑼵'),
- ('𑼾', '𑼿'),
- ('𖄪', '𖄬'),
- ('𖽑', '𖾇'),
-];
-
-pub const T: &'static [(char, char)] = &[('ᆨ', 'ᇿ'), ('ퟋ', 'ퟻ')];
-
-pub const V: &'static [(char, char)] =
- &[('ᅠ', 'ᆧ'), ('ힰ', 'ퟆ'), ('𖵣', '𖵣'), ('𖵧', '𖵪')];
-
-pub const ZWJ: &'static [(char, char)] = &[('\u{200d}', '\u{200d}')];
diff --git a/vendor/regex-syntax/src/unicode_tables/mod.rs b/vendor/regex-syntax/src/unicode_tables/mod.rs
deleted file mode 100644
index 20736c7a..00000000
--- a/vendor/regex-syntax/src/unicode_tables/mod.rs
+++ /dev/null
@@ -1,57 +0,0 @@
-#[cfg(feature = "unicode-age")]
-pub mod age;
-
-#[cfg(feature = "unicode-case")]
-pub mod case_folding_simple;
-
-#[cfg(feature = "unicode-gencat")]
-pub mod general_category;
-
-#[cfg(feature = "unicode-segment")]
-pub mod grapheme_cluster_break;
-
-#[cfg(all(feature = "unicode-perl", not(feature = "unicode-gencat")))]
-#[allow(dead_code)]
-pub mod perl_decimal;
-
-#[cfg(all(feature = "unicode-perl", not(feature = "unicode-bool")))]
-#[allow(dead_code)]
-pub mod perl_space;
-
-#[cfg(feature = "unicode-perl")]
-pub mod perl_word;
-
-#[cfg(feature = "unicode-bool")]
-pub mod property_bool;
-
-#[cfg(any(
- feature = "unicode-age",
- feature = "unicode-bool",
- feature = "unicode-gencat",
- feature = "unicode-perl",
- feature = "unicode-script",
- feature = "unicode-segment",
-))]
-pub mod property_names;
-
-#[cfg(any(
- feature = "unicode-age",
- feature = "unicode-bool",
- feature = "unicode-gencat",
- feature = "unicode-perl",
- feature = "unicode-script",
- feature = "unicode-segment",
-))]
-pub mod property_values;
-
-#[cfg(feature = "unicode-script")]
-pub mod script;
-
-#[cfg(feature = "unicode-script")]
-pub mod script_extension;
-
-#[cfg(feature = "unicode-segment")]
-pub mod sentence_break;
-
-#[cfg(feature = "unicode-segment")]
-pub mod word_break;
diff --git a/vendor/regex-syntax/src/unicode_tables/perl_decimal.rs b/vendor/regex-syntax/src/unicode_tables/perl_decimal.rs
deleted file mode 100644
index 18996c2b..00000000
--- a/vendor/regex-syntax/src/unicode_tables/perl_decimal.rs
+++ /dev/null
@@ -1,84 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate general-category ucd-16.0.0 --chars --include decimalnumber
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] =
- &[("Decimal_Number", DECIMAL_NUMBER)];
-
-pub const DECIMAL_NUMBER: &'static [(char, char)] = &[
- ('0', '9'),
- ('٠', '٩'),
- ('۰', '۹'),
- ('߀', '߉'),
- ('०', '९'),
- ('০', '৯'),
- ('੦', '੯'),
- ('૦', '૯'),
- ('୦', '୯'),
- ('௦', '௯'),
- ('౦', '౯'),
- ('೦', '೯'),
- ('൦', '൯'),
- ('෦', '෯'),
- ('๐', '๙'),
- ('໐', '໙'),
- ('༠', '༩'),
- ('၀', '၉'),
- ('႐', '႙'),
- ('០', '៩'),
- ('᠐', '᠙'),
- ('᥆', '᥏'),
- ('᧐', '᧙'),
- ('᪀', '᪉'),
- ('᪐', '᪙'),
- ('᭐', '᭙'),
- ('᮰', '᮹'),
- ('᱀', '᱉'),
- ('᱐', '᱙'),
- ('꘠', '꘩'),
- ('꣐', '꣙'),
- ('꤀', '꤉'),
- ('꧐', '꧙'),
- ('꧰', '꧹'),
- ('꩐', '꩙'),
- ('꯰', '꯹'),
- ('0', '9'),
- ('𐒠', '𐒩'),
- ('𐴰', '𐴹'),
- ('𐵀', '𐵉'),
- ('𑁦', '𑁯'),
- ('𑃰', '𑃹'),
- ('𑄶', '𑄿'),
- ('𑇐', '𑇙'),
- ('𑋰', '𑋹'),
- ('𑑐', '𑑙'),
- ('𑓐', '𑓙'),
- ('𑙐', '𑙙'),
- ('𑛀', '𑛉'),
- ('𑛐', '𑛣'),
- ('𑜰', '𑜹'),
- ('𑣠', '𑣩'),
- ('𑥐', '𑥙'),
- ('𑯰', '𑯹'),
- ('𑱐', '𑱙'),
- ('𑵐', '𑵙'),
- ('𑶠', '𑶩'),
- ('𑽐', '𑽙'),
- ('𖄰', '𖄹'),
- ('𖩠', '𖩩'),
- ('𖫀', '𖫉'),
- ('𖭐', '𖭙'),
- ('𖵰', '𖵹'),
- ('𜳰', '𜳹'),
- ('𝟎', '𝟿'),
- ('𞅀', '𞅉'),
- ('𞋰', '𞋹'),
- ('𞓰', '𞓹'),
- ('𞗱', '𞗺'),
- ('𞥐', '𞥙'),
- ('🯰', '🯹'),
-];
diff --git a/vendor/regex-syntax/src/unicode_tables/perl_space.rs b/vendor/regex-syntax/src/unicode_tables/perl_space.rs
deleted file mode 100644
index c969e373..00000000
--- a/vendor/regex-syntax/src/unicode_tables/perl_space.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate property-bool ucd-16.0.0 --chars --include whitespace
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] =
- &[("White_Space", WHITE_SPACE)];
-
-pub const WHITE_SPACE: &'static [(char, char)] = &[
- ('\t', '\r'),
- (' ', ' '),
- ('\u{85}', '\u{85}'),
- ('\u{a0}', '\u{a0}'),
- ('\u{1680}', '\u{1680}'),
- ('\u{2000}', '\u{200a}'),
- ('\u{2028}', '\u{2029}'),
- ('\u{202f}', '\u{202f}'),
- ('\u{205f}', '\u{205f}'),
- ('\u{3000}', '\u{3000}'),
-];
diff --git a/vendor/regex-syntax/src/unicode_tables/perl_word.rs b/vendor/regex-syntax/src/unicode_tables/perl_word.rs
deleted file mode 100644
index 21c8c0f9..00000000
--- a/vendor/regex-syntax/src/unicode_tables/perl_word.rs
+++ /dev/null
@@ -1,806 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate perl-word ucd-16.0.0 --chars
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const PERL_WORD: &'static [(char, char)] = &[
- ('0', '9'),
- ('A', 'Z'),
- ('_', '_'),
- ('a', 'z'),
- ('ª', 'ª'),
- ('µ', 'µ'),
- ('º', 'º'),
- ('À', 'Ö'),
- ('Ø', 'ö'),
- ('ø', 'ˁ'),
- ('ˆ', 'ˑ'),
- ('ˠ', 'ˤ'),
- ('ˬ', 'ˬ'),
- ('ˮ', 'ˮ'),
- ('\u{300}', 'ʹ'),
- ('Ͷ', 'ͷ'),
- ('ͺ', 'ͽ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ϵ'),
- ('Ϸ', 'ҁ'),
- ('\u{483}', 'ԯ'),
- ('Ա', 'Ֆ'),
- ('ՙ', 'ՙ'),
- ('ՠ', 'ֈ'),
- ('\u{591}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c5}'),
- ('\u{5c7}', '\u{5c7}'),
- ('א', 'ת'),
- ('ׯ', 'ײ'),
- ('\u{610}', '\u{61a}'),
- ('ؠ', '٩'),
- ('ٮ', 'ۓ'),
- ('ە', '\u{6dc}'),
- ('\u{6df}', '\u{6e8}'),
- ('\u{6ea}', 'ۼ'),
- ('ۿ', 'ۿ'),
- ('ܐ', '\u{74a}'),
- ('ݍ', 'ޱ'),
- ('߀', 'ߵ'),
- ('ߺ', 'ߺ'),
- ('\u{7fd}', '\u{7fd}'),
- ('ࠀ', '\u{82d}'),
- ('ࡀ', '\u{85b}'),
- ('ࡠ', 'ࡪ'),
- ('ࡰ', 'ࢇ'),
- ('ࢉ', 'ࢎ'),
- ('\u{897}', '\u{8e1}'),
- ('\u{8e3}', '\u{963}'),
- ('०', '९'),
- ('ॱ', 'ঃ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('\u{9bc}', '\u{9c4}'),
- ('ে', 'ৈ'),
- ('ো', 'ৎ'),
- ('\u{9d7}', '\u{9d7}'),
- ('ড়', 'ঢ়'),
- ('য়', '\u{9e3}'),
- ('০', 'ৱ'),
- ('ৼ', 'ৼ'),
- ('\u{9fe}', '\u{9fe}'),
- ('\u{a01}', 'ਃ'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('\u{a3c}', '\u{a3c}'),
- ('ਾ', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('\u{a51}', '\u{a51}'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('੦', '\u{a75}'),
- ('\u{a81}', 'ઃ'),
- ('અ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('\u{abc}', '\u{ac5}'),
- ('\u{ac7}', 'ૉ'),
- ('ો', '\u{acd}'),
- ('ૐ', 'ૐ'),
- ('ૠ', '\u{ae3}'),
- ('૦', '૯'),
- ('ૹ', '\u{aff}'),
- ('\u{b01}', 'ଃ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('\u{b3c}', '\u{b44}'),
- ('େ', 'ୈ'),
- ('ୋ', '\u{b4d}'),
- ('\u{b55}', '\u{b57}'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', '\u{b63}'),
- ('୦', '୯'),
- ('ୱ', 'ୱ'),
- ('\u{b82}', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'ஹ'),
- ('\u{bbe}', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', '\u{bcd}'),
- ('ௐ', 'ௐ'),
- ('\u{bd7}', '\u{bd7}'),
- ('௦', '௯'),
- ('\u{c00}', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'హ'),
- ('\u{c3c}', 'ౄ'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('ౘ', 'ౚ'),
- ('ౝ', 'ౝ'),
- ('ౠ', '\u{c63}'),
- ('౦', '౯'),
- ('ಀ', 'ಃ'),
- ('ಅ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('\u{cbc}', 'ೄ'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccd}'),
- ('\u{cd5}', '\u{cd6}'),
- ('ೝ', 'ೞ'),
- ('ೠ', '\u{ce3}'),
- ('೦', '೯'),
- ('ೱ', 'ೳ'),
- ('\u{d00}', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', '\u{d44}'),
- ('െ', 'ൈ'),
- ('ൊ', 'ൎ'),
- ('ൔ', '\u{d57}'),
- ('ൟ', '\u{d63}'),
- ('൦', '൯'),
- ('ൺ', 'ൿ'),
- ('\u{d81}', 'ඃ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dcf}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('ෘ', '\u{ddf}'),
- ('෦', '෯'),
- ('ෲ', 'ෳ'),
- ('ก', '\u{e3a}'),
- ('เ', '\u{e4e}'),
- ('๐', '๙'),
- ('ກ', 'ຂ'),
- ('ຄ', 'ຄ'),
- ('ຆ', 'ຊ'),
- ('ຌ', 'ຣ'),
- ('ລ', 'ລ'),
- ('ວ', 'ຽ'),
- ('ເ', 'ໄ'),
- ('ໆ', 'ໆ'),
- ('\u{ec8}', '\u{ece}'),
- ('໐', '໙'),
- ('ໜ', 'ໟ'),
- ('ༀ', 'ༀ'),
- ('\u{f18}', '\u{f19}'),
- ('༠', '༩'),
- ('\u{f35}', '\u{f35}'),
- ('\u{f37}', '\u{f37}'),
- ('\u{f39}', '\u{f39}'),
- ('༾', 'ཇ'),
- ('ཉ', 'ཬ'),
- ('\u{f71}', '\u{f84}'),
- ('\u{f86}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('\u{fc6}', '\u{fc6}'),
- ('က', '၉'),
- ('ၐ', '\u{109d}'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ჺ'),
- ('ჼ', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዖ'),
- ('ዘ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ፚ'),
- ('\u{135d}', '\u{135f}'),
- ('ᎀ', 'ᎏ'),
- ('Ꭰ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('ᐁ', 'ᙬ'),
- ('ᙯ', 'ᙿ'),
- ('ᚁ', 'ᚚ'),
- ('ᚠ', 'ᛪ'),
- ('ᛮ', 'ᛸ'),
- ('ᜀ', '\u{1715}'),
- ('ᜟ', '\u{1734}'),
- ('ᝀ', '\u{1753}'),
- ('ᝠ', 'ᝬ'),
- ('ᝮ', 'ᝰ'),
- ('\u{1772}', '\u{1773}'),
- ('ក', '\u{17d3}'),
- ('ៗ', 'ៗ'),
- ('ៜ', '\u{17dd}'),
- ('០', '៩'),
- ('\u{180b}', '\u{180d}'),
- ('\u{180f}', '᠙'),
- ('ᠠ', 'ᡸ'),
- ('ᢀ', 'ᢪ'),
- ('ᢰ', 'ᣵ'),
- ('ᤀ', 'ᤞ'),
- ('\u{1920}', 'ᤫ'),
- ('ᤰ', '\u{193b}'),
- ('᥆', 'ᥭ'),
- ('ᥰ', 'ᥴ'),
- ('ᦀ', 'ᦫ'),
- ('ᦰ', 'ᧉ'),
- ('᧐', '᧙'),
- ('ᨀ', '\u{1a1b}'),
- ('ᨠ', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a7c}'),
- ('\u{1a7f}', '᪉'),
- ('᪐', '᪙'),
- ('ᪧ', 'ᪧ'),
- ('\u{1ab0}', '\u{1ace}'),
- ('\u{1b00}', 'ᭌ'),
- ('᭐', '᭙'),
- ('\u{1b6b}', '\u{1b73}'),
- ('\u{1b80}', '\u{1bf3}'),
- ('ᰀ', '\u{1c37}'),
- ('᱀', '᱉'),
- ('ᱍ', 'ᱽ'),
- ('ᲀ', 'ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('\u{1cd0}', '\u{1cd2}'),
- ('\u{1cd4}', 'ᳺ'),
- ('ᴀ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ᾼ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῌ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('ῠ', 'Ῥ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῼ'),
- ('\u{200c}', '\u{200d}'),
- ('‿', '⁀'),
- ('⁔', '⁔'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('\u{20d0}', '\u{20f0}'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℊ', 'ℓ'),
- ('ℕ', 'ℕ'),
- ('ℙ', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('Ω', 'Ω'),
- ('ℨ', 'ℨ'),
- ('K', 'ℭ'),
- ('ℯ', 'ℹ'),
- ('ℼ', 'ℿ'),
- ('ⅅ', 'ⅉ'),
- ('ⅎ', 'ⅎ'),
- ('Ⅰ', 'ↈ'),
- ('Ⓐ', 'ⓩ'),
- ('Ⰰ', 'ⳤ'),
- ('Ⳬ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ⴰ', 'ⵧ'),
- ('ⵯ', 'ⵯ'),
- ('\u{2d7f}', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('\u{2de0}', '\u{2dff}'),
- ('ⸯ', 'ⸯ'),
- ('々', '〇'),
- ('〡', '\u{302f}'),
- ('〱', '〵'),
- ('〸', '〼'),
- ('ぁ', 'ゖ'),
- ('\u{3099}', '\u{309a}'),
- ('ゝ', 'ゟ'),
- ('ァ', 'ヺ'),
- ('ー', 'ヿ'),
- ('ㄅ', 'ㄯ'),
- ('ㄱ', 'ㆎ'),
- ('ㆠ', 'ㆿ'),
- ('ㇰ', 'ㇿ'),
- ('㐀', '䶿'),
- ('一', 'ꒌ'),
- ('ꓐ', 'ꓽ'),
- ('ꔀ', 'ꘌ'),
- ('ꘐ', 'ꘫ'),
- ('Ꙁ', '\u{a672}'),
- ('\u{a674}', '\u{a67d}'),
- ('ꙿ', '\u{a6f1}'),
- ('ꜗ', 'ꜟ'),
- ('Ꜣ', 'ꞈ'),
- ('Ꞌ', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'Ƛ'),
- ('ꟲ', 'ꠧ'),
- ('\u{a82c}', '\u{a82c}'),
- ('ꡀ', 'ꡳ'),
- ('ꢀ', '\u{a8c5}'),
- ('꣐', '꣙'),
- ('\u{a8e0}', 'ꣷ'),
- ('ꣻ', 'ꣻ'),
- ('ꣽ', '\u{a92d}'),
- ('ꤰ', '\u{a953}'),
- ('ꥠ', 'ꥼ'),
- ('\u{a980}', '\u{a9c0}'),
- ('ꧏ', '꧙'),
- ('ꧠ', 'ꧾ'),
- ('ꨀ', '\u{aa36}'),
- ('ꩀ', 'ꩍ'),
- ('꩐', '꩙'),
- ('ꩠ', 'ꩶ'),
- ('ꩺ', 'ꫂ'),
- ('ꫛ', 'ꫝ'),
- ('ꫠ', 'ꫯ'),
- ('ꫲ', '\u{aaf6}'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('ꬰ', 'ꭚ'),
- ('ꭜ', 'ꭩ'),
- ('ꭰ', 'ꯪ'),
- ('꯬', '\u{abed}'),
- ('꯰', '꯹'),
- ('가', '힣'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('豈', '舘'),
- ('並', '龎'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('יִ', 'ﬨ'),
- ('שׁ', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', 'ﮱ'),
- ('ﯓ', 'ﴽ'),
- ('ﵐ', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('ﷰ', 'ﷻ'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{fe20}', '\u{fe2f}'),
- ('︳', '︴'),
- ('﹍', '﹏'),
- ('ﹰ', 'ﹴ'),
- ('ﹶ', 'ﻼ'),
- ('0', '9'),
- ('A', 'Z'),
- ('_', '_'),
- ('a', 'z'),
- ('ヲ', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
- ('𐅀', '𐅴'),
- ('\u{101fd}', '\u{101fd}'),
- ('𐊀', '𐊜'),
- ('𐊠', '𐋐'),
- ('\u{102e0}', '\u{102e0}'),
- ('𐌀', '𐌟'),
- ('𐌭', '𐍊'),
- ('𐍐', '\u{1037a}'),
- ('𐎀', '𐎝'),
- ('𐎠', '𐏃'),
- ('𐏈', '𐏏'),
- ('𐏑', '𐏕'),
- ('𐐀', '𐒝'),
- ('𐒠', '𐒩'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
- ('𐔀', '𐔧'),
- ('𐔰', '𐕣'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐗀', '𐗳'),
- ('𐘀', '𐜶'),
- ('𐝀', '𐝕'),
- ('𐝠', '𐝧'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐠀', '𐠅'),
- ('𐠈', '𐠈'),
- ('𐠊', '𐠵'),
- ('𐠷', '𐠸'),
- ('𐠼', '𐠼'),
- ('𐠿', '𐡕'),
- ('𐡠', '𐡶'),
- ('𐢀', '𐢞'),
- ('𐣠', '𐣲'),
- ('𐣴', '𐣵'),
- ('𐤀', '𐤕'),
- ('𐤠', '𐤹'),
- ('𐦀', '𐦷'),
- ('𐦾', '𐦿'),
- ('𐨀', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨵'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '\u{10a3f}'),
- ('𐩠', '𐩼'),
- ('𐪀', '𐪜'),
- ('𐫀', '𐫇'),
- ('𐫉', '\u{10ae6}'),
- ('𐬀', '𐬵'),
- ('𐭀', '𐭕'),
- ('𐭠', '𐭲'),
- ('𐮀', '𐮑'),
- ('𐰀', '𐱈'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐴀', '\u{10d27}'),
- ('𐴰', '𐴹'),
- ('𐵀', '𐵥'),
- ('\u{10d69}', '\u{10d6d}'),
- ('𐵯', '𐶅'),
- ('𐺀', '𐺩'),
- ('\u{10eab}', '\u{10eac}'),
- ('𐺰', '𐺱'),
- ('𐻂', '𐻄'),
- ('\u{10efc}', '𐼜'),
- ('𐼧', '𐼧'),
- ('𐼰', '\u{10f50}'),
- ('𐽰', '\u{10f85}'),
- ('𐾰', '𐿄'),
- ('𐿠', '𐿶'),
- ('𑀀', '\u{11046}'),
- ('𑁦', '𑁵'),
- ('\u{1107f}', '\u{110ba}'),
- ('\u{110c2}', '\u{110c2}'),
- ('𑃐', '𑃨'),
- ('𑃰', '𑃹'),
- ('\u{11100}', '\u{11134}'),
- ('𑄶', '𑄿'),
- ('𑅄', '𑅇'),
- ('𑅐', '\u{11173}'),
- ('𑅶', '𑅶'),
- ('\u{11180}', '𑇄'),
- ('\u{111c9}', '\u{111cc}'),
- ('𑇎', '𑇚'),
- ('𑇜', '𑇜'),
- ('𑈀', '𑈑'),
- ('𑈓', '\u{11237}'),
- ('\u{1123e}', '\u{11241}'),
- ('𑊀', '𑊆'),
- ('𑊈', '𑊈'),
- ('𑊊', '𑊍'),
- ('𑊏', '𑊝'),
- ('𑊟', '𑊨'),
- ('𑊰', '\u{112ea}'),
- ('𑋰', '𑋹'),
- ('\u{11300}', '𑌃'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('\u{1133b}', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '\u{1134d}'),
- ('𑍐', '𑍐'),
- ('\u{11357}', '\u{11357}'),
- ('𑍝', '𑍣'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '𑏊'),
- ('𑏌', '𑏓'),
- ('\u{113e1}', '\u{113e2}'),
- ('𑐀', '𑑊'),
- ('𑑐', '𑑙'),
- ('\u{1145e}', '𑑡'),
- ('𑒀', '𑓅'),
- ('𑓇', '𑓇'),
- ('𑓐', '𑓙'),
- ('𑖀', '\u{115b5}'),
- ('𑖸', '\u{115c0}'),
- ('𑗘', '\u{115dd}'),
- ('𑘀', '\u{11640}'),
- ('𑙄', '𑙄'),
- ('𑙐', '𑙙'),
- ('𑚀', '𑚸'),
- ('𑛀', '𑛉'),
- ('𑛐', '𑛣'),
- ('𑜀', '𑜚'),
- ('\u{1171d}', '\u{1172b}'),
- ('𑜰', '𑜹'),
- ('𑝀', '𑝆'),
- ('𑠀', '\u{1183a}'),
- ('𑢠', '𑣩'),
- ('𑣿', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤵'),
- ('𑤷', '𑤸'),
- ('\u{1193b}', '\u{11943}'),
- ('𑥐', '𑥙'),
- ('𑦠', '𑦧'),
- ('𑦪', '\u{119d7}'),
- ('\u{119da}', '𑧡'),
- ('𑧣', '𑧤'),
- ('𑨀', '\u{11a3e}'),
- ('\u{11a47}', '\u{11a47}'),
- ('𑩐', '\u{11a99}'),
- ('𑪝', '𑪝'),
- ('𑪰', '𑫸'),
- ('𑯀', '𑯠'),
- ('𑯰', '𑯹'),
- ('𑰀', '𑰈'),
- ('𑰊', '\u{11c36}'),
- ('\u{11c38}', '𑱀'),
- ('𑱐', '𑱙'),
- ('𑱲', '𑲏'),
- ('\u{11c92}', '\u{11ca7}'),
- ('𑲩', '\u{11cb6}'),
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d47}'),
- ('𑵐', '𑵙'),
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶎'),
- ('\u{11d90}', '\u{11d91}'),
- ('𑶓', '𑶘'),
- ('𑶠', '𑶩'),
- ('𑻠', '𑻶'),
- ('\u{11f00}', '𑼐'),
- ('𑼒', '\u{11f3a}'),
- ('𑼾', '\u{11f42}'),
- ('𑽐', '\u{11f5a}'),
- ('𑾰', '𑾰'),
- ('𒀀', '𒎙'),
- ('𒐀', '𒑮'),
- ('𒒀', '𒕃'),
- ('𒾐', '𒿰'),
- ('𓀀', '𓐯'),
- ('\u{13440}', '\u{13455}'),
- ('𓑠', '𔏺'),
- ('𔐀', '𔙆'),
- ('𖄀', '𖄹'),
- ('𖠀', '𖨸'),
- ('𖩀', '𖩞'),
- ('𖩠', '𖩩'),
- ('𖩰', '𖪾'),
- ('𖫀', '𖫉'),
- ('𖫐', '𖫭'),
- ('\u{16af0}', '\u{16af4}'),
- ('𖬀', '\u{16b36}'),
- ('𖭀', '𖭃'),
- ('𖭐', '𖭙'),
- ('𖭣', '𖭷'),
- ('𖭽', '𖮏'),
- ('𖵀', '𖵬'),
- ('𖵰', '𖵹'),
- ('𖹀', '𖹿'),
- ('𖼀', '𖽊'),
- ('\u{16f4f}', '𖾇'),
- ('\u{16f8f}', '𖾟'),
- ('𖿠', '𖿡'),
- ('𖿣', '\u{16fe4}'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('𗀀', '𘟷'),
- ('𘠀', '𘳕'),
- ('𘳿', '𘴈'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𛀀', '𛄢'),
- ('𛄲', '𛄲'),
- ('𛅐', '𛅒'),
- ('𛅕', '𛅕'),
- ('𛅤', '𛅧'),
- ('𛅰', '𛋻'),
- ('𛰀', '𛱪'),
- ('𛱰', '𛱼'),
- ('𛲀', '𛲈'),
- ('𛲐', '𛲙'),
- ('\u{1bc9d}', '\u{1bc9e}'),
- ('𜳰', '𜳹'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d165}', '\u{1d169}'),
- ('\u{1d16d}', '\u{1d172}'),
- ('\u{1d17b}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('\u{1d242}', '\u{1d244}'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝛀'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛺'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜴'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝮'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞨'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟋'),
- ('𝟎', '𝟿'),
- ('\u{1da00}', '\u{1da36}'),
- ('\u{1da3b}', '\u{1da6c}'),
- ('\u{1da75}', '\u{1da75}'),
- ('\u{1da84}', '\u{1da84}'),
- ('\u{1da9b}', '\u{1da9f}'),
- ('\u{1daa1}', '\u{1daaf}'),
- ('𝼀', '𝼞'),
- ('𝼥', '𝼪'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('𞀰', '𞁭'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('𞄀', '𞄬'),
- ('\u{1e130}', '𞄽'),
- ('𞅀', '𞅉'),
- ('𞅎', '𞅎'),
- ('𞊐', '\u{1e2ae}'),
- ('𞋀', '𞋹'),
- ('𞓐', '𞓹'),
- ('𞗐', '𞗺'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
- ('𞠀', '𞣄'),
- ('\u{1e8d0}', '\u{1e8d6}'),
- ('𞤀', '𞥋'),
- ('𞥐', '𞥙'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('🄰', '🅉'),
- ('🅐', '🅩'),
- ('🅰', '🆉'),
- ('🯰', '🯹'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('丽', '𪘀'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
- ('\u{e0100}', '\u{e01ef}'),
-];
diff --git a/vendor/regex-syntax/src/unicode_tables/property_bool.rs b/vendor/regex-syntax/src/unicode_tables/property_bool.rs
deleted file mode 100644
index 3d62edc4..00000000
--- a/vendor/regex-syntax/src/unicode_tables/property_bool.rs
+++ /dev/null
@@ -1,12095 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate property-bool ucd-16.0.0 --chars
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
- ("ASCII_Hex_Digit", ASCII_HEX_DIGIT),
- ("Alphabetic", ALPHABETIC),
- ("Bidi_Control", BIDI_CONTROL),
- ("Bidi_Mirrored", BIDI_MIRRORED),
- ("Case_Ignorable", CASE_IGNORABLE),
- ("Cased", CASED),
- ("Changes_When_Casefolded", CHANGES_WHEN_CASEFOLDED),
- ("Changes_When_Casemapped", CHANGES_WHEN_CASEMAPPED),
- ("Changes_When_Lowercased", CHANGES_WHEN_LOWERCASED),
- ("Changes_When_Titlecased", CHANGES_WHEN_TITLECASED),
- ("Changes_When_Uppercased", CHANGES_WHEN_UPPERCASED),
- ("Dash", DASH),
- ("Default_Ignorable_Code_Point", DEFAULT_IGNORABLE_CODE_POINT),
- ("Deprecated", DEPRECATED),
- ("Diacritic", DIACRITIC),
- ("Emoji", EMOJI),
- ("Emoji_Component", EMOJI_COMPONENT),
- ("Emoji_Modifier", EMOJI_MODIFIER),
- ("Emoji_Modifier_Base", EMOJI_MODIFIER_BASE),
- ("Emoji_Presentation", EMOJI_PRESENTATION),
- ("Extended_Pictographic", EXTENDED_PICTOGRAPHIC),
- ("Extender", EXTENDER),
- ("Grapheme_Base", GRAPHEME_BASE),
- ("Grapheme_Extend", GRAPHEME_EXTEND),
- ("Grapheme_Link", GRAPHEME_LINK),
- ("Hex_Digit", HEX_DIGIT),
- ("Hyphen", HYPHEN),
- ("IDS_Binary_Operator", IDS_BINARY_OPERATOR),
- ("IDS_Trinary_Operator", IDS_TRINARY_OPERATOR),
- ("IDS_Unary_Operator", IDS_UNARY_OPERATOR),
- ("ID_Compat_Math_Continue", ID_COMPAT_MATH_CONTINUE),
- ("ID_Compat_Math_Start", ID_COMPAT_MATH_START),
- ("ID_Continue", ID_CONTINUE),
- ("ID_Start", ID_START),
- ("Ideographic", IDEOGRAPHIC),
- ("InCB", INCB),
- ("Join_Control", JOIN_CONTROL),
- ("Logical_Order_Exception", LOGICAL_ORDER_EXCEPTION),
- ("Lowercase", LOWERCASE),
- ("Math", MATH),
- ("Modifier_Combining_Mark", MODIFIER_COMBINING_MARK),
- ("Noncharacter_Code_Point", NONCHARACTER_CODE_POINT),
- ("Other_Alphabetic", OTHER_ALPHABETIC),
- ("Other_Default_Ignorable_Code_Point", OTHER_DEFAULT_IGNORABLE_CODE_POINT),
- ("Other_Grapheme_Extend", OTHER_GRAPHEME_EXTEND),
- ("Other_ID_Continue", OTHER_ID_CONTINUE),
- ("Other_ID_Start", OTHER_ID_START),
- ("Other_Lowercase", OTHER_LOWERCASE),
- ("Other_Math", OTHER_MATH),
- ("Other_Uppercase", OTHER_UPPERCASE),
- ("Pattern_Syntax", PATTERN_SYNTAX),
- ("Pattern_White_Space", PATTERN_WHITE_SPACE),
- ("Prepended_Concatenation_Mark", PREPENDED_CONCATENATION_MARK),
- ("Quotation_Mark", QUOTATION_MARK),
- ("Radical", RADICAL),
- ("Regional_Indicator", REGIONAL_INDICATOR),
- ("Sentence_Terminal", SENTENCE_TERMINAL),
- ("Soft_Dotted", SOFT_DOTTED),
- ("Terminal_Punctuation", TERMINAL_PUNCTUATION),
- ("Unified_Ideograph", UNIFIED_IDEOGRAPH),
- ("Uppercase", UPPERCASE),
- ("Variation_Selector", VARIATION_SELECTOR),
- ("White_Space", WHITE_SPACE),
- ("XID_Continue", XID_CONTINUE),
- ("XID_Start", XID_START),
-];
-
-pub const ASCII_HEX_DIGIT: &'static [(char, char)] =
- &[('0', '9'), ('A', 'F'), ('a', 'f')];
-
-pub const ALPHABETIC: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('a', 'z'),
- ('ª', 'ª'),
- ('µ', 'µ'),
- ('º', 'º'),
- ('À', 'Ö'),
- ('Ø', 'ö'),
- ('ø', 'ˁ'),
- ('ˆ', 'ˑ'),
- ('ˠ', 'ˤ'),
- ('ˬ', 'ˬ'),
- ('ˮ', 'ˮ'),
- ('\u{345}', '\u{345}'),
- ('\u{363}', 'ʹ'),
- ('Ͷ', 'ͷ'),
- ('ͺ', 'ͽ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ϵ'),
- ('Ϸ', 'ҁ'),
- ('Ҋ', 'ԯ'),
- ('Ա', 'Ֆ'),
- ('ՙ', 'ՙ'),
- ('ՠ', 'ֈ'),
- ('\u{5b0}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c5}'),
- ('\u{5c7}', '\u{5c7}'),
- ('א', 'ת'),
- ('ׯ', 'ײ'),
- ('\u{610}', '\u{61a}'),
- ('ؠ', '\u{657}'),
- ('\u{659}', '\u{65f}'),
- ('ٮ', 'ۓ'),
- ('ە', '\u{6dc}'),
- ('\u{6e1}', '\u{6e8}'),
- ('\u{6ed}', 'ۯ'),
- ('ۺ', 'ۼ'),
- ('ۿ', 'ۿ'),
- ('ܐ', '\u{73f}'),
- ('ݍ', 'ޱ'),
- ('ߊ', 'ߪ'),
- ('ߴ', 'ߵ'),
- ('ߺ', 'ߺ'),
- ('ࠀ', '\u{817}'),
- ('ࠚ', '\u{82c}'),
- ('ࡀ', 'ࡘ'),
- ('ࡠ', 'ࡪ'),
- ('ࡰ', 'ࢇ'),
- ('ࢉ', 'ࢎ'),
- ('\u{897}', '\u{897}'),
- ('ࢠ', 'ࣉ'),
- ('\u{8d4}', '\u{8df}'),
- ('\u{8e3}', '\u{8e9}'),
- ('\u{8f0}', 'ऻ'),
- ('ऽ', 'ौ'),
- ('ॎ', 'ॐ'),
- ('\u{955}', '\u{963}'),
- ('ॱ', 'ঃ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('ঽ', '\u{9c4}'),
- ('ে', 'ৈ'),
- ('ো', 'ৌ'),
- ('ৎ', 'ৎ'),
- ('\u{9d7}', '\u{9d7}'),
- ('ড়', 'ঢ়'),
- ('য়', '\u{9e3}'),
- ('ৰ', 'ৱ'),
- ('ৼ', 'ৼ'),
- ('\u{a01}', 'ਃ'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('ਾ', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4c}'),
- ('\u{a51}', '\u{a51}'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('\u{a70}', '\u{a75}'),
- ('\u{a81}', 'ઃ'),
- ('અ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('ઽ', '\u{ac5}'),
- ('\u{ac7}', 'ૉ'),
- ('ો', 'ૌ'),
- ('ૐ', 'ૐ'),
- ('ૠ', '\u{ae3}'),
- ('ૹ', '\u{afc}'),
- ('\u{b01}', 'ଃ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('ଽ', '\u{b44}'),
- ('େ', 'ୈ'),
- ('ୋ', 'ୌ'),
- ('\u{b56}', '\u{b57}'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', '\u{b63}'),
- ('ୱ', 'ୱ'),
- ('\u{b82}', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'ஹ'),
- ('\u{bbe}', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', 'ௌ'),
- ('ௐ', 'ௐ'),
- ('\u{bd7}', '\u{bd7}'),
- ('\u{c00}', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'హ'),
- ('ఽ', 'ౄ'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4c}'),
- ('\u{c55}', '\u{c56}'),
- ('ౘ', 'ౚ'),
- ('ౝ', 'ౝ'),
- ('ౠ', '\u{c63}'),
- ('ಀ', 'ಃ'),
- ('ಅ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('ಽ', 'ೄ'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccc}'),
- ('\u{cd5}', '\u{cd6}'),
- ('ೝ', 'ೞ'),
- ('ೠ', '\u{ce3}'),
- ('ೱ', 'ೳ'),
- ('\u{d00}', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', 'ഺ'),
- ('ഽ', '\u{d44}'),
- ('െ', 'ൈ'),
- ('ൊ', 'ൌ'),
- ('ൎ', 'ൎ'),
- ('ൔ', '\u{d57}'),
- ('ൟ', '\u{d63}'),
- ('ൺ', 'ൿ'),
- ('\u{d81}', 'ඃ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('\u{dcf}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('ෘ', '\u{ddf}'),
- ('ෲ', 'ෳ'),
- ('ก', '\u{e3a}'),
- ('เ', 'ๆ'),
- ('\u{e4d}', '\u{e4d}'),
- ('ກ', 'ຂ'),
- ('ຄ', 'ຄ'),
- ('ຆ', 'ຊ'),
- ('ຌ', 'ຣ'),
- ('ລ', 'ລ'),
- ('ວ', '\u{eb9}'),
- ('\u{ebb}', 'ຽ'),
- ('ເ', 'ໄ'),
- ('ໆ', 'ໆ'),
- ('\u{ecd}', '\u{ecd}'),
- ('ໜ', 'ໟ'),
- ('ༀ', 'ༀ'),
- ('ཀ', 'ཇ'),
- ('ཉ', 'ཬ'),
- ('\u{f71}', '\u{f83}'),
- ('ྈ', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('က', '\u{1036}'),
- ('း', 'း'),
- ('ျ', 'ဿ'),
- ('ၐ', 'ႏ'),
- ('ႚ', '\u{109d}'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ჺ'),
- ('ჼ', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዖ'),
- ('ዘ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ፚ'),
- ('ᎀ', 'ᎏ'),
- ('Ꭰ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('ᐁ', 'ᙬ'),
- ('ᙯ', 'ᙿ'),
- ('ᚁ', 'ᚚ'),
- ('ᚠ', 'ᛪ'),
- ('ᛮ', 'ᛸ'),
- ('ᜀ', '\u{1713}'),
- ('ᜟ', '\u{1733}'),
- ('ᝀ', '\u{1753}'),
- ('ᝠ', 'ᝬ'),
- ('ᝮ', 'ᝰ'),
- ('\u{1772}', '\u{1773}'),
- ('ក', 'ឳ'),
- ('ា', 'ៈ'),
- ('ៗ', 'ៗ'),
- ('ៜ', 'ៜ'),
- ('ᠠ', 'ᡸ'),
- ('ᢀ', 'ᢪ'),
- ('ᢰ', 'ᣵ'),
- ('ᤀ', 'ᤞ'),
- ('\u{1920}', 'ᤫ'),
- ('ᤰ', 'ᤸ'),
- ('ᥐ', 'ᥭ'),
- ('ᥰ', 'ᥴ'),
- ('ᦀ', 'ᦫ'),
- ('ᦰ', 'ᧉ'),
- ('ᨀ', '\u{1a1b}'),
- ('ᨠ', '\u{1a5e}'),
- ('ᩡ', '\u{1a74}'),
- ('ᪧ', 'ᪧ'),
- ('\u{1abf}', '\u{1ac0}'),
- ('\u{1acc}', '\u{1ace}'),
- ('\u{1b00}', 'ᬳ'),
- ('\u{1b35}', '\u{1b43}'),
- ('ᭅ', 'ᭌ'),
- ('\u{1b80}', '\u{1ba9}'),
- ('\u{1bac}', 'ᮯ'),
- ('ᮺ', 'ᯥ'),
- ('ᯧ', '\u{1bf1}'),
- ('ᰀ', '\u{1c36}'),
- ('ᱍ', 'ᱏ'),
- ('ᱚ', 'ᱽ'),
- ('ᲀ', 'ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('ᳩ', 'ᳬ'),
- ('ᳮ', 'ᳳ'),
- ('ᳵ', 'ᳶ'),
- ('ᳺ', 'ᳺ'),
- ('ᴀ', 'ᶿ'),
- ('\u{1dd3}', '\u{1df4}'),
- ('Ḁ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ᾼ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῌ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('ῠ', 'Ῥ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῼ'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℊ', 'ℓ'),
- ('ℕ', 'ℕ'),
- ('ℙ', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('Ω', 'Ω'),
- ('ℨ', 'ℨ'),
- ('K', 'ℭ'),
- ('ℯ', 'ℹ'),
- ('ℼ', 'ℿ'),
- ('ⅅ', 'ⅉ'),
- ('ⅎ', 'ⅎ'),
- ('Ⅰ', 'ↈ'),
- ('Ⓐ', 'ⓩ'),
- ('Ⰰ', 'ⳤ'),
- ('Ⳬ', 'ⳮ'),
- ('Ⳳ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ⴰ', 'ⵧ'),
- ('ⵯ', 'ⵯ'),
- ('ⶀ', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('\u{2de0}', '\u{2dff}'),
- ('ⸯ', 'ⸯ'),
- ('々', '〇'),
- ('〡', '〩'),
- ('〱', '〵'),
- ('〸', '〼'),
- ('ぁ', 'ゖ'),
- ('ゝ', 'ゟ'),
- ('ァ', 'ヺ'),
- ('ー', 'ヿ'),
- ('ㄅ', 'ㄯ'),
- ('ㄱ', 'ㆎ'),
- ('ㆠ', 'ㆿ'),
- ('ㇰ', 'ㇿ'),
- ('㐀', '䶿'),
- ('一', 'ꒌ'),
- ('ꓐ', 'ꓽ'),
- ('ꔀ', 'ꘌ'),
- ('ꘐ', 'ꘟ'),
- ('ꘪ', 'ꘫ'),
- ('Ꙁ', 'ꙮ'),
- ('\u{a674}', '\u{a67b}'),
- ('ꙿ', 'ꛯ'),
- ('ꜗ', 'ꜟ'),
- ('Ꜣ', 'ꞈ'),
- ('Ꞌ', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'Ƛ'),
- ('ꟲ', 'ꠅ'),
- ('ꠇ', 'ꠧ'),
- ('ꡀ', 'ꡳ'),
- ('ꢀ', 'ꣃ'),
- ('\u{a8c5}', '\u{a8c5}'),
- ('ꣲ', 'ꣷ'),
- ('ꣻ', 'ꣻ'),
- ('ꣽ', '\u{a8ff}'),
- ('ꤊ', '\u{a92a}'),
- ('ꤰ', 'ꥒ'),
- ('ꥠ', 'ꥼ'),
- ('\u{a980}', 'ꦲ'),
- ('ꦴ', 'ꦿ'),
- ('ꧏ', 'ꧏ'),
- ('ꧠ', 'ꧯ'),
- ('ꧺ', 'ꧾ'),
- ('ꨀ', '\u{aa36}'),
- ('ꩀ', 'ꩍ'),
- ('ꩠ', 'ꩶ'),
- ('ꩺ', '\u{aabe}'),
- ('ꫀ', 'ꫀ'),
- ('ꫂ', 'ꫂ'),
- ('ꫛ', 'ꫝ'),
- ('ꫠ', 'ꫯ'),
- ('ꫲ', 'ꫵ'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('ꬰ', 'ꭚ'),
- ('ꭜ', 'ꭩ'),
- ('ꭰ', 'ꯪ'),
- ('가', '힣'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('豈', '舘'),
- ('並', '龎'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('יִ', 'ﬨ'),
- ('שׁ', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', 'ﮱ'),
- ('ﯓ', 'ﴽ'),
- ('ﵐ', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('ﷰ', 'ﷻ'),
- ('ﹰ', 'ﹴ'),
- ('ﹶ', 'ﻼ'),
- ('A', 'Z'),
- ('a', 'z'),
- ('ヲ', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
- ('𐅀', '𐅴'),
- ('𐊀', '𐊜'),
- ('𐊠', '𐋐'),
- ('𐌀', '𐌟'),
- ('𐌭', '𐍊'),
- ('𐍐', '\u{1037a}'),
- ('𐎀', '𐎝'),
- ('𐎠', '𐏃'),
- ('𐏈', '𐏏'),
- ('𐏑', '𐏕'),
- ('𐐀', '𐒝'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
- ('𐔀', '𐔧'),
- ('𐔰', '𐕣'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐗀', '𐗳'),
- ('𐘀', '𐜶'),
- ('𐝀', '𐝕'),
- ('𐝠', '𐝧'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐠀', '𐠅'),
- ('𐠈', '𐠈'),
- ('𐠊', '𐠵'),
- ('𐠷', '𐠸'),
- ('𐠼', '𐠼'),
- ('𐠿', '𐡕'),
- ('𐡠', '𐡶'),
- ('𐢀', '𐢞'),
- ('𐣠', '𐣲'),
- ('𐣴', '𐣵'),
- ('𐤀', '𐤕'),
- ('𐤠', '𐤹'),
- ('𐦀', '𐦷'),
- ('𐦾', '𐦿'),
- ('𐨀', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨵'),
- ('𐩠', '𐩼'),
- ('𐪀', '𐪜'),
- ('𐫀', '𐫇'),
- ('𐫉', '𐫤'),
- ('𐬀', '𐬵'),
- ('𐭀', '𐭕'),
- ('𐭠', '𐭲'),
- ('𐮀', '𐮑'),
- ('𐰀', '𐱈'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐴀', '\u{10d27}'),
- ('𐵊', '𐵥'),
- ('\u{10d69}', '\u{10d69}'),
- ('𐵯', '𐶅'),
- ('𐺀', '𐺩'),
- ('\u{10eab}', '\u{10eac}'),
- ('𐺰', '𐺱'),
- ('𐻂', '𐻄'),
- ('\u{10efc}', '\u{10efc}'),
- ('𐼀', '𐼜'),
- ('𐼧', '𐼧'),
- ('𐼰', '𐽅'),
- ('𐽰', '𐾁'),
- ('𐾰', '𐿄'),
- ('𐿠', '𐿶'),
- ('𑀀', '\u{11045}'),
- ('𑁱', '𑁵'),
- ('\u{11080}', '𑂸'),
- ('\u{110c2}', '\u{110c2}'),
- ('𑃐', '𑃨'),
- ('\u{11100}', '\u{11132}'),
- ('𑅄', '𑅇'),
- ('𑅐', '𑅲'),
- ('𑅶', '𑅶'),
- ('\u{11180}', '𑆿'),
- ('𑇁', '𑇄'),
- ('𑇎', '\u{111cf}'),
- ('𑇚', '𑇚'),
- ('𑇜', '𑇜'),
- ('𑈀', '𑈑'),
- ('𑈓', '\u{11234}'),
- ('\u{11237}', '\u{11237}'),
- ('\u{1123e}', '\u{11241}'),
- ('𑊀', '𑊆'),
- ('𑊈', '𑊈'),
- ('𑊊', '𑊍'),
- ('𑊏', '𑊝'),
- ('𑊟', '𑊨'),
- ('𑊰', '\u{112e8}'),
- ('\u{11300}', '𑌃'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('𑌽', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '𑍌'),
- ('𑍐', '𑍐'),
- ('\u{11357}', '\u{11357}'),
- ('𑍝', '𑍣'),
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '𑏊'),
- ('𑏌', '𑏍'),
- ('𑏑', '𑏑'),
- ('𑏓', '𑏓'),
- ('𑐀', '𑑁'),
- ('\u{11443}', '𑑅'),
- ('𑑇', '𑑊'),
- ('𑑟', '𑑡'),
- ('𑒀', '𑓁'),
- ('𑓄', '𑓅'),
- ('𑓇', '𑓇'),
- ('𑖀', '\u{115b5}'),
- ('𑖸', '𑖾'),
- ('𑗘', '\u{115dd}'),
- ('𑘀', '𑘾'),
- ('\u{11640}', '\u{11640}'),
- ('𑙄', '𑙄'),
- ('𑚀', '\u{116b5}'),
- ('𑚸', '𑚸'),
- ('𑜀', '𑜚'),
- ('\u{1171d}', '\u{1172a}'),
- ('𑝀', '𑝆'),
- ('𑠀', '𑠸'),
- ('𑢠', '𑣟'),
- ('𑣿', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤵'),
- ('𑤷', '𑤸'),
- ('\u{1193b}', '\u{1193c}'),
- ('𑤿', '𑥂'),
- ('𑦠', '𑦧'),
- ('𑦪', '\u{119d7}'),
- ('\u{119da}', '𑧟'),
- ('𑧡', '𑧡'),
- ('𑧣', '𑧤'),
- ('𑨀', '𑨲'),
- ('\u{11a35}', '\u{11a3e}'),
- ('𑩐', '𑪗'),
- ('𑪝', '𑪝'),
- ('𑪰', '𑫸'),
- ('𑯀', '𑯠'),
- ('𑰀', '𑰈'),
- ('𑰊', '\u{11c36}'),
- ('\u{11c38}', '𑰾'),
- ('𑱀', '𑱀'),
- ('𑱲', '𑲏'),
- ('\u{11c92}', '\u{11ca7}'),
- ('𑲩', '\u{11cb6}'),
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d41}'),
- ('\u{11d43}', '\u{11d43}'),
- ('𑵆', '\u{11d47}'),
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶎'),
- ('\u{11d90}', '\u{11d91}'),
- ('𑶓', '𑶖'),
- ('𑶘', '𑶘'),
- ('𑻠', '𑻶'),
- ('\u{11f00}', '𑼐'),
- ('𑼒', '\u{11f3a}'),
- ('𑼾', '\u{11f40}'),
- ('𑾰', '𑾰'),
- ('𒀀', '𒎙'),
- ('𒐀', '𒑮'),
- ('𒒀', '𒕃'),
- ('𒾐', '𒿰'),
- ('𓀀', '𓐯'),
- ('𓑁', '𓑆'),
- ('𓑠', '𔏺'),
- ('𔐀', '𔙆'),
- ('𖄀', '\u{1612e}'),
- ('𖠀', '𖨸'),
- ('𖩀', '𖩞'),
- ('𖩰', '𖪾'),
- ('𖫐', '𖫭'),
- ('𖬀', '𖬯'),
- ('𖭀', '𖭃'),
- ('𖭣', '𖭷'),
- ('𖭽', '𖮏'),
- ('𖵀', '𖵬'),
- ('𖹀', '𖹿'),
- ('𖼀', '𖽊'),
- ('\u{16f4f}', '𖾇'),
- ('\u{16f8f}', '𖾟'),
- ('𖿠', '𖿡'),
- ('𖿣', '𖿣'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('𗀀', '𘟷'),
- ('𘠀', '𘳕'),
- ('𘳿', '𘴈'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𛀀', '𛄢'),
- ('𛄲', '𛄲'),
- ('𛅐', '𛅒'),
- ('𛅕', '𛅕'),
- ('𛅤', '𛅧'),
- ('𛅰', '𛋻'),
- ('𛰀', '𛱪'),
- ('𛱰', '𛱼'),
- ('𛲀', '𛲈'),
- ('𛲐', '𛲙'),
- ('\u{1bc9e}', '\u{1bc9e}'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝛀'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛺'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜴'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝮'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞨'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟋'),
- ('𝼀', '𝼞'),
- ('𝼥', '𝼪'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('𞀰', '𞁭'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('𞄀', '𞄬'),
- ('𞄷', '𞄽'),
- ('𞅎', '𞅎'),
- ('𞊐', '𞊭'),
- ('𞋀', '𞋫'),
- ('𞓐', '𞓫'),
- ('𞗐', '𞗭'),
- ('𞗰', '𞗰'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
- ('𞠀', '𞣄'),
- ('𞤀', '𞥃'),
- ('\u{1e947}', '\u{1e947}'),
- ('𞥋', '𞥋'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('🄰', '🅉'),
- ('🅐', '🅩'),
- ('🅰', '🆉'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('丽', '𪘀'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
-];
-
-pub const BIDI_CONTROL: &'static [(char, char)] = &[
- ('\u{61c}', '\u{61c}'),
- ('\u{200e}', '\u{200f}'),
- ('\u{202a}', '\u{202e}'),
- ('\u{2066}', '\u{2069}'),
-];
-
-pub const BIDI_MIRRORED: &'static [(char, char)] = &[
- ('(', ')'),
- ('<', '<'),
- ('>', '>'),
- ('[', '['),
- (']', ']'),
- ('{', '{'),
- ('}', '}'),
- ('«', '«'),
- ('»', '»'),
- ('༺', '༽'),
- ('᚛', '᚜'),
- ('‹', '›'),
- ('⁅', '⁆'),
- ('⁽', '⁾'),
- ('₍', '₎'),
- ('⅀', '⅀'),
- ('∁', '∄'),
- ('∈', '∍'),
- ('∑', '∑'),
- ('∕', '∖'),
- ('√', '∝'),
- ('∟', '∢'),
- ('∤', '∤'),
- ('∦', '∦'),
- ('∫', '∳'),
- ('∹', '∹'),
- ('∻', '≌'),
- ('≒', '≕'),
- ('≟', '≠'),
- ('≢', '≢'),
- ('≤', '≫'),
- ('≭', '⊌'),
- ('⊏', '⊒'),
- ('⊘', '⊘'),
- ('⊢', '⊣'),
- ('⊦', '⊸'),
- ('⊾', '⊿'),
- ('⋉', '⋍'),
- ('⋐', '⋑'),
- ('⋖', '⋭'),
- ('⋰', '⋿'),
- ('⌈', '⌋'),
- ('⌠', '⌡'),
- ('〈', '〉'),
- ('❨', '❵'),
- ('⟀', '⟀'),
- ('⟃', '⟆'),
- ('⟈', '⟉'),
- ('⟋', '⟍'),
- ('⟓', '⟖'),
- ('⟜', '⟞'),
- ('⟢', '⟯'),
- ('⦃', '⦘'),
- ('⦛', '⦠'),
- ('⦢', '⦯'),
- ('⦸', '⦸'),
- ('⧀', '⧅'),
- ('⧉', '⧉'),
- ('⧎', '⧒'),
- ('⧔', '⧕'),
- ('⧘', '⧜'),
- ('⧡', '⧡'),
- ('⧣', '⧥'),
- ('⧨', '⧩'),
- ('⧴', '⧹'),
- ('⧼', '⧽'),
- ('⨊', '⨜'),
- ('⨞', '⨡'),
- ('⨤', '⨤'),
- ('⨦', '⨦'),
- ('⨩', '⨩'),
- ('⨫', '⨮'),
- ('⨴', '⨵'),
- ('⨼', '⨾'),
- ('⩗', '⩘'),
- ('⩤', '⩥'),
- ('⩪', '⩭'),
- ('⩯', '⩰'),
- ('⩳', '⩴'),
- ('⩹', '⪣'),
- ('⪦', '⪭'),
- ('⪯', '⫖'),
- ('⫝̸', '⫝̸'),
- ('⫞', '⫞'),
- ('⫢', '⫦'),
- ('⫬', '⫮'),
- ('⫳', '⫳'),
- ('⫷', '⫻'),
- ('⫽', '⫽'),
- ('⯾', '⯾'),
- ('⸂', '⸅'),
- ('⸉', '⸊'),
- ('⸌', '⸍'),
- ('⸜', '⸝'),
- ('⸠', '⸩'),
- ('⹕', '⹜'),
- ('〈', '】'),
- ('〔', '〛'),
- ('﹙', '﹞'),
- ('﹤', '﹥'),
- ('(', ')'),
- ('<', '<'),
- ('>', '>'),
- ('[', '['),
- (']', ']'),
- ('{', '{'),
- ('}', '}'),
- ('⦅', '⦆'),
- ('「', '」'),
- ('𝛛', '𝛛'),
- ('𝜕', '𝜕'),
- ('𝝏', '𝝏'),
- ('𝞉', '𝞉'),
- ('𝟃', '𝟃'),
-];
-
-pub const CASE_IGNORABLE: &'static [(char, char)] = &[
- ('\'', '\''),
- ('.', '.'),
- (':', ':'),
- ('^', '^'),
- ('`', '`'),
- ('¨', '¨'),
- ('\u{ad}', '\u{ad}'),
- ('¯', '¯'),
- ('´', '´'),
- ('·', '¸'),
- ('ʰ', '\u{36f}'),
- ('ʹ', '͵'),
- ('ͺ', 'ͺ'),
- ('΄', '΅'),
- ('·', '·'),
- ('\u{483}', '\u{489}'),
- ('ՙ', 'ՙ'),
- ('՟', '՟'),
- ('\u{591}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c5}'),
- ('\u{5c7}', '\u{5c7}'),
- ('״', '״'),
- ('\u{600}', '\u{605}'),
- ('\u{610}', '\u{61a}'),
- ('\u{61c}', '\u{61c}'),
- ('ـ', 'ـ'),
- ('\u{64b}', '\u{65f}'),
- ('\u{670}', '\u{670}'),
- ('\u{6d6}', '\u{6dd}'),
- ('\u{6df}', '\u{6e8}'),
- ('\u{6ea}', '\u{6ed}'),
- ('\u{70f}', '\u{70f}'),
- ('\u{711}', '\u{711}'),
- ('\u{730}', '\u{74a}'),
- ('\u{7a6}', '\u{7b0}'),
- ('\u{7eb}', 'ߵ'),
- ('ߺ', 'ߺ'),
- ('\u{7fd}', '\u{7fd}'),
- ('\u{816}', '\u{82d}'),
- ('\u{859}', '\u{85b}'),
- ('࢈', '࢈'),
- ('\u{890}', '\u{891}'),
- ('\u{897}', '\u{89f}'),
- ('ࣉ', '\u{902}'),
- ('\u{93a}', '\u{93a}'),
- ('\u{93c}', '\u{93c}'),
- ('\u{941}', '\u{948}'),
- ('\u{94d}', '\u{94d}'),
- ('\u{951}', '\u{957}'),
- ('\u{962}', '\u{963}'),
- ('ॱ', 'ॱ'),
- ('\u{981}', '\u{981}'),
- ('\u{9bc}', '\u{9bc}'),
- ('\u{9c1}', '\u{9c4}'),
- ('\u{9cd}', '\u{9cd}'),
- ('\u{9e2}', '\u{9e3}'),
- ('\u{9fe}', '\u{9fe}'),
- ('\u{a01}', '\u{a02}'),
- ('\u{a3c}', '\u{a3c}'),
- ('\u{a41}', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('\u{a51}', '\u{a51}'),
- ('\u{a70}', '\u{a71}'),
- ('\u{a75}', '\u{a75}'),
- ('\u{a81}', '\u{a82}'),
- ('\u{abc}', '\u{abc}'),
- ('\u{ac1}', '\u{ac5}'),
- ('\u{ac7}', '\u{ac8}'),
- ('\u{acd}', '\u{acd}'),
- ('\u{ae2}', '\u{ae3}'),
- ('\u{afa}', '\u{aff}'),
- ('\u{b01}', '\u{b01}'),
- ('\u{b3c}', '\u{b3c}'),
- ('\u{b3f}', '\u{b3f}'),
- ('\u{b41}', '\u{b44}'),
- ('\u{b4d}', '\u{b4d}'),
- ('\u{b55}', '\u{b56}'),
- ('\u{b62}', '\u{b63}'),
- ('\u{b82}', '\u{b82}'),
- ('\u{bc0}', '\u{bc0}'),
- ('\u{bcd}', '\u{bcd}'),
- ('\u{c00}', '\u{c00}'),
- ('\u{c04}', '\u{c04}'),
- ('\u{c3c}', '\u{c3c}'),
- ('\u{c3e}', '\u{c40}'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('\u{c62}', '\u{c63}'),
- ('\u{c81}', '\u{c81}'),
- ('\u{cbc}', '\u{cbc}'),
- ('\u{cbf}', '\u{cbf}'),
- ('\u{cc6}', '\u{cc6}'),
- ('\u{ccc}', '\u{ccd}'),
- ('\u{ce2}', '\u{ce3}'),
- ('\u{d00}', '\u{d01}'),
- ('\u{d3b}', '\u{d3c}'),
- ('\u{d41}', '\u{d44}'),
- ('\u{d4d}', '\u{d4d}'),
- ('\u{d62}', '\u{d63}'),
- ('\u{d81}', '\u{d81}'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dd2}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('\u{e31}', '\u{e31}'),
- ('\u{e34}', '\u{e3a}'),
- ('ๆ', '\u{e4e}'),
- ('\u{eb1}', '\u{eb1}'),
- ('\u{eb4}', '\u{ebc}'),
- ('ໆ', 'ໆ'),
- ('\u{ec8}', '\u{ece}'),
- ('\u{f18}', '\u{f19}'),
- ('\u{f35}', '\u{f35}'),
- ('\u{f37}', '\u{f37}'),
- ('\u{f39}', '\u{f39}'),
- ('\u{f71}', '\u{f7e}'),
- ('\u{f80}', '\u{f84}'),
- ('\u{f86}', '\u{f87}'),
- ('\u{f8d}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('\u{fc6}', '\u{fc6}'),
- ('\u{102d}', '\u{1030}'),
- ('\u{1032}', '\u{1037}'),
- ('\u{1039}', '\u{103a}'),
- ('\u{103d}', '\u{103e}'),
- ('\u{1058}', '\u{1059}'),
- ('\u{105e}', '\u{1060}'),
- ('\u{1071}', '\u{1074}'),
- ('\u{1082}', '\u{1082}'),
- ('\u{1085}', '\u{1086}'),
- ('\u{108d}', '\u{108d}'),
- ('\u{109d}', '\u{109d}'),
- ('ჼ', 'ჼ'),
- ('\u{135d}', '\u{135f}'),
- ('\u{1712}', '\u{1714}'),
- ('\u{1732}', '\u{1733}'),
- ('\u{1752}', '\u{1753}'),
- ('\u{1772}', '\u{1773}'),
- ('\u{17b4}', '\u{17b5}'),
- ('\u{17b7}', '\u{17bd}'),
- ('\u{17c6}', '\u{17c6}'),
- ('\u{17c9}', '\u{17d3}'),
- ('ៗ', 'ៗ'),
- ('\u{17dd}', '\u{17dd}'),
- ('\u{180b}', '\u{180f}'),
- ('ᡃ', 'ᡃ'),
- ('\u{1885}', '\u{1886}'),
- ('\u{18a9}', '\u{18a9}'),
- ('\u{1920}', '\u{1922}'),
- ('\u{1927}', '\u{1928}'),
- ('\u{1932}', '\u{1932}'),
- ('\u{1939}', '\u{193b}'),
- ('\u{1a17}', '\u{1a18}'),
- ('\u{1a1b}', '\u{1a1b}'),
- ('\u{1a56}', '\u{1a56}'),
- ('\u{1a58}', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a60}'),
- ('\u{1a62}', '\u{1a62}'),
- ('\u{1a65}', '\u{1a6c}'),
- ('\u{1a73}', '\u{1a7c}'),
- ('\u{1a7f}', '\u{1a7f}'),
- ('ᪧ', 'ᪧ'),
- ('\u{1ab0}', '\u{1ace}'),
- ('\u{1b00}', '\u{1b03}'),
- ('\u{1b34}', '\u{1b34}'),
- ('\u{1b36}', '\u{1b3a}'),
- ('\u{1b3c}', '\u{1b3c}'),
- ('\u{1b42}', '\u{1b42}'),
- ('\u{1b6b}', '\u{1b73}'),
- ('\u{1b80}', '\u{1b81}'),
- ('\u{1ba2}', '\u{1ba5}'),
- ('\u{1ba8}', '\u{1ba9}'),
- ('\u{1bab}', '\u{1bad}'),
- ('\u{1be6}', '\u{1be6}'),
- ('\u{1be8}', '\u{1be9}'),
- ('\u{1bed}', '\u{1bed}'),
- ('\u{1bef}', '\u{1bf1}'),
- ('\u{1c2c}', '\u{1c33}'),
- ('\u{1c36}', '\u{1c37}'),
- ('ᱸ', 'ᱽ'),
- ('\u{1cd0}', '\u{1cd2}'),
- ('\u{1cd4}', '\u{1ce0}'),
- ('\u{1ce2}', '\u{1ce8}'),
- ('\u{1ced}', '\u{1ced}'),
- ('\u{1cf4}', '\u{1cf4}'),
- ('\u{1cf8}', '\u{1cf9}'),
- ('ᴬ', 'ᵪ'),
- ('ᵸ', 'ᵸ'),
- ('ᶛ', '\u{1dff}'),
- ('᾽', '᾽'),
- ('᾿', '῁'),
- ('῍', '῏'),
- ('῝', '῟'),
- ('῭', '`'),
- ('´', '῾'),
- ('\u{200b}', '\u{200f}'),
- ('‘', '’'),
- ('․', '․'),
- ('‧', '‧'),
- ('\u{202a}', '\u{202e}'),
- ('\u{2060}', '\u{2064}'),
- ('\u{2066}', '\u{206f}'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('\u{20d0}', '\u{20f0}'),
- ('ⱼ', 'ⱽ'),
- ('\u{2cef}', '\u{2cf1}'),
- ('ⵯ', 'ⵯ'),
- ('\u{2d7f}', '\u{2d7f}'),
- ('\u{2de0}', '\u{2dff}'),
- ('ⸯ', 'ⸯ'),
- ('々', '々'),
- ('\u{302a}', '\u{302d}'),
- ('〱', '〵'),
- ('〻', '〻'),
- ('\u{3099}', 'ゞ'),
- ('ー', 'ヾ'),
- ('ꀕ', 'ꀕ'),
- ('ꓸ', 'ꓽ'),
- ('ꘌ', 'ꘌ'),
- ('\u{a66f}', '\u{a672}'),
- ('\u{a674}', '\u{a67d}'),
- ('ꙿ', 'ꙿ'),
- ('ꚜ', '\u{a69f}'),
- ('\u{a6f0}', '\u{a6f1}'),
- ('꜀', '꜡'),
- ('ꝰ', 'ꝰ'),
- ('ꞈ', '꞊'),
- ('ꟲ', 'ꟴ'),
- ('ꟸ', 'ꟹ'),
- ('\u{a802}', '\u{a802}'),
- ('\u{a806}', '\u{a806}'),
- ('\u{a80b}', '\u{a80b}'),
- ('\u{a825}', '\u{a826}'),
- ('\u{a82c}', '\u{a82c}'),
- ('\u{a8c4}', '\u{a8c5}'),
- ('\u{a8e0}', '\u{a8f1}'),
- ('\u{a8ff}', '\u{a8ff}'),
- ('\u{a926}', '\u{a92d}'),
- ('\u{a947}', '\u{a951}'),
- ('\u{a980}', '\u{a982}'),
- ('\u{a9b3}', '\u{a9b3}'),
- ('\u{a9b6}', '\u{a9b9}'),
- ('\u{a9bc}', '\u{a9bd}'),
- ('ꧏ', 'ꧏ'),
- ('\u{a9e5}', 'ꧦ'),
- ('\u{aa29}', '\u{aa2e}'),
- ('\u{aa31}', '\u{aa32}'),
- ('\u{aa35}', '\u{aa36}'),
- ('\u{aa43}', '\u{aa43}'),
- ('\u{aa4c}', '\u{aa4c}'),
- ('ꩰ', 'ꩰ'),
- ('\u{aa7c}', '\u{aa7c}'),
- ('\u{aab0}', '\u{aab0}'),
- ('\u{aab2}', '\u{aab4}'),
- ('\u{aab7}', '\u{aab8}'),
- ('\u{aabe}', '\u{aabf}'),
- ('\u{aac1}', '\u{aac1}'),
- ('ꫝ', 'ꫝ'),
- ('\u{aaec}', '\u{aaed}'),
- ('ꫳ', 'ꫴ'),
- ('\u{aaf6}', '\u{aaf6}'),
- ('꭛', 'ꭟ'),
- ('ꭩ', '꭫'),
- ('\u{abe5}', '\u{abe5}'),
- ('\u{abe8}', '\u{abe8}'),
- ('\u{abed}', '\u{abed}'),
- ('\u{fb1e}', '\u{fb1e}'),
- ('﮲', '﯂'),
- ('\u{fe00}', '\u{fe0f}'),
- ('︓', '︓'),
- ('\u{fe20}', '\u{fe2f}'),
- ('﹒', '﹒'),
- ('﹕', '﹕'),
- ('\u{feff}', '\u{feff}'),
- (''', '''),
- ('.', '.'),
- (':', ':'),
- ('^', '^'),
- ('`', '`'),
- ('ー', 'ー'),
- ('\u{ff9e}', '\u{ff9f}'),
- (' ̄', ' ̄'),
- ('\u{fff9}', '\u{fffb}'),
- ('\u{101fd}', '\u{101fd}'),
- ('\u{102e0}', '\u{102e0}'),
- ('\u{10376}', '\u{1037a}'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('\u{10a01}', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '\u{10a0f}'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '\u{10a3f}'),
- ('\u{10ae5}', '\u{10ae6}'),
- ('\u{10d24}', '\u{10d27}'),
- ('𐵎', '𐵎'),
- ('\u{10d69}', '\u{10d6d}'),
- ('𐵯', '𐵯'),
- ('\u{10eab}', '\u{10eac}'),
- ('\u{10efc}', '\u{10eff}'),
- ('\u{10f46}', '\u{10f50}'),
- ('\u{10f82}', '\u{10f85}'),
- ('\u{11001}', '\u{11001}'),
- ('\u{11038}', '\u{11046}'),
- ('\u{11070}', '\u{11070}'),
- ('\u{11073}', '\u{11074}'),
- ('\u{1107f}', '\u{11081}'),
- ('\u{110b3}', '\u{110b6}'),
- ('\u{110b9}', '\u{110ba}'),
- ('\u{110bd}', '\u{110bd}'),
- ('\u{110c2}', '\u{110c2}'),
- ('\u{110cd}', '\u{110cd}'),
- ('\u{11100}', '\u{11102}'),
- ('\u{11127}', '\u{1112b}'),
- ('\u{1112d}', '\u{11134}'),
- ('\u{11173}', '\u{11173}'),
- ('\u{11180}', '\u{11181}'),
- ('\u{111b6}', '\u{111be}'),
- ('\u{111c9}', '\u{111cc}'),
- ('\u{111cf}', '\u{111cf}'),
- ('\u{1122f}', '\u{11231}'),
- ('\u{11234}', '\u{11234}'),
- ('\u{11236}', '\u{11237}'),
- ('\u{1123e}', '\u{1123e}'),
- ('\u{11241}', '\u{11241}'),
- ('\u{112df}', '\u{112df}'),
- ('\u{112e3}', '\u{112ea}'),
- ('\u{11300}', '\u{11301}'),
- ('\u{1133b}', '\u{1133c}'),
- ('\u{11340}', '\u{11340}'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('\u{113bb}', '\u{113c0}'),
- ('\u{113ce}', '\u{113ce}'),
- ('\u{113d0}', '\u{113d0}'),
- ('\u{113d2}', '\u{113d2}'),
- ('\u{113e1}', '\u{113e2}'),
- ('\u{11438}', '\u{1143f}'),
- ('\u{11442}', '\u{11444}'),
- ('\u{11446}', '\u{11446}'),
- ('\u{1145e}', '\u{1145e}'),
- ('\u{114b3}', '\u{114b8}'),
- ('\u{114ba}', '\u{114ba}'),
- ('\u{114bf}', '\u{114c0}'),
- ('\u{114c2}', '\u{114c3}'),
- ('\u{115b2}', '\u{115b5}'),
- ('\u{115bc}', '\u{115bd}'),
- ('\u{115bf}', '\u{115c0}'),
- ('\u{115dc}', '\u{115dd}'),
- ('\u{11633}', '\u{1163a}'),
- ('\u{1163d}', '\u{1163d}'),
- ('\u{1163f}', '\u{11640}'),
- ('\u{116ab}', '\u{116ab}'),
- ('\u{116ad}', '\u{116ad}'),
- ('\u{116b0}', '\u{116b5}'),
- ('\u{116b7}', '\u{116b7}'),
- ('\u{1171d}', '\u{1171d}'),
- ('\u{1171f}', '\u{1171f}'),
- ('\u{11722}', '\u{11725}'),
- ('\u{11727}', '\u{1172b}'),
- ('\u{1182f}', '\u{11837}'),
- ('\u{11839}', '\u{1183a}'),
- ('\u{1193b}', '\u{1193c}'),
- ('\u{1193e}', '\u{1193e}'),
- ('\u{11943}', '\u{11943}'),
- ('\u{119d4}', '\u{119d7}'),
- ('\u{119da}', '\u{119db}'),
- ('\u{119e0}', '\u{119e0}'),
- ('\u{11a01}', '\u{11a0a}'),
- ('\u{11a33}', '\u{11a38}'),
- ('\u{11a3b}', '\u{11a3e}'),
- ('\u{11a47}', '\u{11a47}'),
- ('\u{11a51}', '\u{11a56}'),
- ('\u{11a59}', '\u{11a5b}'),
- ('\u{11a8a}', '\u{11a96}'),
- ('\u{11a98}', '\u{11a99}'),
- ('\u{11c30}', '\u{11c36}'),
- ('\u{11c38}', '\u{11c3d}'),
- ('\u{11c3f}', '\u{11c3f}'),
- ('\u{11c92}', '\u{11ca7}'),
- ('\u{11caa}', '\u{11cb0}'),
- ('\u{11cb2}', '\u{11cb3}'),
- ('\u{11cb5}', '\u{11cb6}'),
- ('\u{11d31}', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d45}'),
- ('\u{11d47}', '\u{11d47}'),
- ('\u{11d90}', '\u{11d91}'),
- ('\u{11d95}', '\u{11d95}'),
- ('\u{11d97}', '\u{11d97}'),
- ('\u{11ef3}', '\u{11ef4}'),
- ('\u{11f00}', '\u{11f01}'),
- ('\u{11f36}', '\u{11f3a}'),
- ('\u{11f40}', '\u{11f40}'),
- ('\u{11f42}', '\u{11f42}'),
- ('\u{11f5a}', '\u{11f5a}'),
- ('\u{13430}', '\u{13440}'),
- ('\u{13447}', '\u{13455}'),
- ('\u{1611e}', '\u{16129}'),
- ('\u{1612d}', '\u{1612f}'),
- ('\u{16af0}', '\u{16af4}'),
- ('\u{16b30}', '\u{16b36}'),
- ('𖭀', '𖭃'),
- ('𖵀', '𖵂'),
- ('𖵫', '𖵬'),
- ('\u{16f4f}', '\u{16f4f}'),
- ('\u{16f8f}', '𖾟'),
- ('𖿠', '𖿡'),
- ('𖿣', '\u{16fe4}'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('\u{1bc9d}', '\u{1bc9e}'),
- ('\u{1bca0}', '\u{1bca3}'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d167}', '\u{1d169}'),
- ('\u{1d173}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('\u{1d242}', '\u{1d244}'),
- ('\u{1da00}', '\u{1da36}'),
- ('\u{1da3b}', '\u{1da6c}'),
- ('\u{1da75}', '\u{1da75}'),
- ('\u{1da84}', '\u{1da84}'),
- ('\u{1da9b}', '\u{1da9f}'),
- ('\u{1daa1}', '\u{1daaf}'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('𞀰', '𞁭'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('\u{1e130}', '𞄽'),
- ('\u{1e2ae}', '\u{1e2ae}'),
- ('\u{1e2ec}', '\u{1e2ef}'),
- ('𞓫', '\u{1e4ef}'),
- ('\u{1e5ee}', '\u{1e5ef}'),
- ('\u{1e8d0}', '\u{1e8d6}'),
- ('\u{1e944}', '𞥋'),
- ('🏻', '🏿'),
- ('\u{e0001}', '\u{e0001}'),
- ('\u{e0020}', '\u{e007f}'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const CASED: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('a', 'z'),
- ('ª', 'ª'),
- ('µ', 'µ'),
- ('º', 'º'),
- ('À', 'Ö'),
- ('Ø', 'ö'),
- ('ø', 'ƺ'),
- ('Ƽ', 'ƿ'),
- ('DŽ', 'ʓ'),
- ('ʕ', 'ʸ'),
- ('ˀ', 'ˁ'),
- ('ˠ', 'ˤ'),
- ('\u{345}', '\u{345}'),
- ('Ͱ', 'ͳ'),
- ('Ͷ', 'ͷ'),
- ('ͺ', 'ͽ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ϵ'),
- ('Ϸ', 'ҁ'),
- ('Ҋ', 'ԯ'),
- ('Ա', 'Ֆ'),
- ('ՠ', 'ֈ'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ჺ'),
- ('ჼ', 'ჿ'),
- ('Ꭰ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('ᲀ', 'ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('ᴀ', 'ᶿ'),
- ('Ḁ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ᾼ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῌ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('ῠ', 'Ῥ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῼ'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℊ', 'ℓ'),
- ('ℕ', 'ℕ'),
- ('ℙ', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('Ω', 'Ω'),
- ('ℨ', 'ℨ'),
- ('K', 'ℭ'),
- ('ℯ', 'ℴ'),
- ('ℹ', 'ℹ'),
- ('ℼ', 'ℿ'),
- ('ⅅ', 'ⅉ'),
- ('ⅎ', 'ⅎ'),
- ('Ⅰ', 'ⅿ'),
- ('Ↄ', 'ↄ'),
- ('Ⓐ', 'ⓩ'),
- ('Ⰰ', 'ⳤ'),
- ('Ⳬ', 'ⳮ'),
- ('Ⳳ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('Ꙁ', 'ꙭ'),
- ('Ꚁ', 'ꚝ'),
- ('Ꜣ', 'ꞇ'),
- ('Ꞌ', 'ꞎ'),
- ('Ꞑ', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'Ƛ'),
- ('ꟲ', 'ꟶ'),
- ('ꟸ', 'ꟺ'),
- ('ꬰ', 'ꭚ'),
- ('ꭜ', 'ꭩ'),
- ('ꭰ', 'ꮿ'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('A', 'Z'),
- ('a', 'z'),
- ('𐐀', '𐑏'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐞀', '𐞀'),
- ('𐞃', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐵐', '𐵥'),
- ('𐵰', '𐶅'),
- ('𑢠', '𑣟'),
- ('𖹀', '𖹿'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝛀'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛺'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜴'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝮'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞨'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟋'),
- ('𝼀', '𝼉'),
- ('𝼋', '𝼞'),
- ('𝼥', '𝼪'),
- ('𞀰', '𞁭'),
- ('𞤀', '𞥃'),
- ('🄰', '🅉'),
- ('🅐', '🅩'),
- ('🅰', '🆉'),
-];
-
-pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('µ', 'µ'),
- ('À', 'Ö'),
- ('Ø', 'ß'),
- ('Ā', 'Ā'),
- ('Ă', 'Ă'),
- ('Ą', 'Ą'),
- ('Ć', 'Ć'),
- ('Ĉ', 'Ĉ'),
- ('Ċ', 'Ċ'),
- ('Č', 'Č'),
- ('Ď', 'Ď'),
- ('Đ', 'Đ'),
- ('Ē', 'Ē'),
- ('Ĕ', 'Ĕ'),
- ('Ė', 'Ė'),
- ('Ę', 'Ę'),
- ('Ě', 'Ě'),
- ('Ĝ', 'Ĝ'),
- ('Ğ', 'Ğ'),
- ('Ġ', 'Ġ'),
- ('Ģ', 'Ģ'),
- ('Ĥ', 'Ĥ'),
- ('Ħ', 'Ħ'),
- ('Ĩ', 'Ĩ'),
- ('Ī', 'Ī'),
- ('Ĭ', 'Ĭ'),
- ('Į', 'Į'),
- ('İ', 'İ'),
- ('IJ', 'IJ'),
- ('Ĵ', 'Ĵ'),
- ('Ķ', 'Ķ'),
- ('Ĺ', 'Ĺ'),
- ('Ļ', 'Ļ'),
- ('Ľ', 'Ľ'),
- ('Ŀ', 'Ŀ'),
- ('Ł', 'Ł'),
- ('Ń', 'Ń'),
- ('Ņ', 'Ņ'),
- ('Ň', 'Ň'),
- ('ʼn', 'Ŋ'),
- ('Ō', 'Ō'),
- ('Ŏ', 'Ŏ'),
- ('Ő', 'Ő'),
- ('Œ', 'Œ'),
- ('Ŕ', 'Ŕ'),
- ('Ŗ', 'Ŗ'),
- ('Ř', 'Ř'),
- ('Ś', 'Ś'),
- ('Ŝ', 'Ŝ'),
- ('Ş', 'Ş'),
- ('Š', 'Š'),
- ('Ţ', 'Ţ'),
- ('Ť', 'Ť'),
- ('Ŧ', 'Ŧ'),
- ('Ũ', 'Ũ'),
- ('Ū', 'Ū'),
- ('Ŭ', 'Ŭ'),
- ('Ů', 'Ů'),
- ('Ű', 'Ű'),
- ('Ų', 'Ų'),
- ('Ŵ', 'Ŵ'),
- ('Ŷ', 'Ŷ'),
- ('Ÿ', 'Ź'),
- ('Ż', 'Ż'),
- ('Ž', 'Ž'),
- ('ſ', 'ſ'),
- ('Ɓ', 'Ƃ'),
- ('Ƅ', 'Ƅ'),
- ('Ɔ', 'Ƈ'),
- ('Ɖ', 'Ƌ'),
- ('Ǝ', 'Ƒ'),
- ('Ɠ', 'Ɣ'),
- ('Ɩ', 'Ƙ'),
- ('Ɯ', 'Ɲ'),
- ('Ɵ', 'Ơ'),
- ('Ƣ', 'Ƣ'),
- ('Ƥ', 'Ƥ'),
- ('Ʀ', 'Ƨ'),
- ('Ʃ', 'Ʃ'),
- ('Ƭ', 'Ƭ'),
- ('Ʈ', 'Ư'),
- ('Ʊ', 'Ƴ'),
- ('Ƶ', 'Ƶ'),
- ('Ʒ', 'Ƹ'),
- ('Ƽ', 'Ƽ'),
- ('DŽ', 'Dž'),
- ('LJ', 'Lj'),
- ('NJ', 'Nj'),
- ('Ǎ', 'Ǎ'),
- ('Ǐ', 'Ǐ'),
- ('Ǒ', 'Ǒ'),
- ('Ǔ', 'Ǔ'),
- ('Ǖ', 'Ǖ'),
- ('Ǘ', 'Ǘ'),
- ('Ǚ', 'Ǚ'),
- ('Ǜ', 'Ǜ'),
- ('Ǟ', 'Ǟ'),
- ('Ǡ', 'Ǡ'),
- ('Ǣ', 'Ǣ'),
- ('Ǥ', 'Ǥ'),
- ('Ǧ', 'Ǧ'),
- ('Ǩ', 'Ǩ'),
- ('Ǫ', 'Ǫ'),
- ('Ǭ', 'Ǭ'),
- ('Ǯ', 'Ǯ'),
- ('DZ', 'Dz'),
- ('Ǵ', 'Ǵ'),
- ('Ƕ', 'Ǹ'),
- ('Ǻ', 'Ǻ'),
- ('Ǽ', 'Ǽ'),
- ('Ǿ', 'Ǿ'),
- ('Ȁ', 'Ȁ'),
- ('Ȃ', 'Ȃ'),
- ('Ȅ', 'Ȅ'),
- ('Ȇ', 'Ȇ'),
- ('Ȉ', 'Ȉ'),
- ('Ȋ', 'Ȋ'),
- ('Ȍ', 'Ȍ'),
- ('Ȏ', 'Ȏ'),
- ('Ȑ', 'Ȑ'),
- ('Ȓ', 'Ȓ'),
- ('Ȕ', 'Ȕ'),
- ('Ȗ', 'Ȗ'),
- ('Ș', 'Ș'),
- ('Ț', 'Ț'),
- ('Ȝ', 'Ȝ'),
- ('Ȟ', 'Ȟ'),
- ('Ƞ', 'Ƞ'),
- ('Ȣ', 'Ȣ'),
- ('Ȥ', 'Ȥ'),
- ('Ȧ', 'Ȧ'),
- ('Ȩ', 'Ȩ'),
- ('Ȫ', 'Ȫ'),
- ('Ȭ', 'Ȭ'),
- ('Ȯ', 'Ȯ'),
- ('Ȱ', 'Ȱ'),
- ('Ȳ', 'Ȳ'),
- ('Ⱥ', 'Ȼ'),
- ('Ƚ', 'Ⱦ'),
- ('Ɂ', 'Ɂ'),
- ('Ƀ', 'Ɇ'),
- ('Ɉ', 'Ɉ'),
- ('Ɋ', 'Ɋ'),
- ('Ɍ', 'Ɍ'),
- ('Ɏ', 'Ɏ'),
- ('\u{345}', '\u{345}'),
- ('Ͱ', 'Ͱ'),
- ('Ͳ', 'Ͳ'),
- ('Ͷ', 'Ͷ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ώ'),
- ('Α', 'Ρ'),
- ('Σ', 'Ϋ'),
- ('ς', 'ς'),
- ('Ϗ', 'ϑ'),
- ('ϕ', 'ϖ'),
- ('Ϙ', 'Ϙ'),
- ('Ϛ', 'Ϛ'),
- ('Ϝ', 'Ϝ'),
- ('Ϟ', 'Ϟ'),
- ('Ϡ', 'Ϡ'),
- ('Ϣ', 'Ϣ'),
- ('Ϥ', 'Ϥ'),
- ('Ϧ', 'Ϧ'),
- ('Ϩ', 'Ϩ'),
- ('Ϫ', 'Ϫ'),
- ('Ϭ', 'Ϭ'),
- ('Ϯ', 'Ϯ'),
- ('ϰ', 'ϱ'),
- ('ϴ', 'ϵ'),
- ('Ϸ', 'Ϸ'),
- ('Ϲ', 'Ϻ'),
- ('Ͻ', 'Я'),
- ('Ѡ', 'Ѡ'),
- ('Ѣ', 'Ѣ'),
- ('Ѥ', 'Ѥ'),
- ('Ѧ', 'Ѧ'),
- ('Ѩ', 'Ѩ'),
- ('Ѫ', 'Ѫ'),
- ('Ѭ', 'Ѭ'),
- ('Ѯ', 'Ѯ'),
- ('Ѱ', 'Ѱ'),
- ('Ѳ', 'Ѳ'),
- ('Ѵ', 'Ѵ'),
- ('Ѷ', 'Ѷ'),
- ('Ѹ', 'Ѹ'),
- ('Ѻ', 'Ѻ'),
- ('Ѽ', 'Ѽ'),
- ('Ѿ', 'Ѿ'),
- ('Ҁ', 'Ҁ'),
- ('Ҋ', 'Ҋ'),
- ('Ҍ', 'Ҍ'),
- ('Ҏ', 'Ҏ'),
- ('Ґ', 'Ґ'),
- ('Ғ', 'Ғ'),
- ('Ҕ', 'Ҕ'),
- ('Җ', 'Җ'),
- ('Ҙ', 'Ҙ'),
- ('Қ', 'Қ'),
- ('Ҝ', 'Ҝ'),
- ('Ҟ', 'Ҟ'),
- ('Ҡ', 'Ҡ'),
- ('Ң', 'Ң'),
- ('Ҥ', 'Ҥ'),
- ('Ҧ', 'Ҧ'),
- ('Ҩ', 'Ҩ'),
- ('Ҫ', 'Ҫ'),
- ('Ҭ', 'Ҭ'),
- ('Ү', 'Ү'),
- ('Ұ', 'Ұ'),
- ('Ҳ', 'Ҳ'),
- ('Ҵ', 'Ҵ'),
- ('Ҷ', 'Ҷ'),
- ('Ҹ', 'Ҹ'),
- ('Һ', 'Һ'),
- ('Ҽ', 'Ҽ'),
- ('Ҿ', 'Ҿ'),
- ('Ӏ', 'Ӂ'),
- ('Ӄ', 'Ӄ'),
- ('Ӆ', 'Ӆ'),
- ('Ӈ', 'Ӈ'),
- ('Ӊ', 'Ӊ'),
- ('Ӌ', 'Ӌ'),
- ('Ӎ', 'Ӎ'),
- ('Ӑ', 'Ӑ'),
- ('Ӓ', 'Ӓ'),
- ('Ӕ', 'Ӕ'),
- ('Ӗ', 'Ӗ'),
- ('Ә', 'Ә'),
- ('Ӛ', 'Ӛ'),
- ('Ӝ', 'Ӝ'),
- ('Ӟ', 'Ӟ'),
- ('Ӡ', 'Ӡ'),
- ('Ӣ', 'Ӣ'),
- ('Ӥ', 'Ӥ'),
- ('Ӧ', 'Ӧ'),
- ('Ө', 'Ө'),
- ('Ӫ', 'Ӫ'),
- ('Ӭ', 'Ӭ'),
- ('Ӯ', 'Ӯ'),
- ('Ӱ', 'Ӱ'),
- ('Ӳ', 'Ӳ'),
- ('Ӵ', 'Ӵ'),
- ('Ӷ', 'Ӷ'),
- ('Ӹ', 'Ӹ'),
- ('Ӻ', 'Ӻ'),
- ('Ӽ', 'Ӽ'),
- ('Ӿ', 'Ӿ'),
- ('Ԁ', 'Ԁ'),
- ('Ԃ', 'Ԃ'),
- ('Ԅ', 'Ԅ'),
- ('Ԇ', 'Ԇ'),
- ('Ԉ', 'Ԉ'),
- ('Ԋ', 'Ԋ'),
- ('Ԍ', 'Ԍ'),
- ('Ԏ', 'Ԏ'),
- ('Ԑ', 'Ԑ'),
- ('Ԓ', 'Ԓ'),
- ('Ԕ', 'Ԕ'),
- ('Ԗ', 'Ԗ'),
- ('Ԙ', 'Ԙ'),
- ('Ԛ', 'Ԛ'),
- ('Ԝ', 'Ԝ'),
- ('Ԟ', 'Ԟ'),
- ('Ԡ', 'Ԡ'),
- ('Ԣ', 'Ԣ'),
- ('Ԥ', 'Ԥ'),
- ('Ԧ', 'Ԧ'),
- ('Ԩ', 'Ԩ'),
- ('Ԫ', 'Ԫ'),
- ('Ԭ', 'Ԭ'),
- ('Ԯ', 'Ԯ'),
- ('Ա', 'Ֆ'),
- ('և', 'և'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ᏸ', 'ᏽ'),
- ('ᲀ', 'Ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('Ḁ', 'Ḁ'),
- ('Ḃ', 'Ḃ'),
- ('Ḅ', 'Ḅ'),
- ('Ḇ', 'Ḇ'),
- ('Ḉ', 'Ḉ'),
- ('Ḋ', 'Ḋ'),
- ('Ḍ', 'Ḍ'),
- ('Ḏ', 'Ḏ'),
- ('Ḑ', 'Ḑ'),
- ('Ḓ', 'Ḓ'),
- ('Ḕ', 'Ḕ'),
- ('Ḗ', 'Ḗ'),
- ('Ḙ', 'Ḙ'),
- ('Ḛ', 'Ḛ'),
- ('Ḝ', 'Ḝ'),
- ('Ḟ', 'Ḟ'),
- ('Ḡ', 'Ḡ'),
- ('Ḣ', 'Ḣ'),
- ('Ḥ', 'Ḥ'),
- ('Ḧ', 'Ḧ'),
- ('Ḩ', 'Ḩ'),
- ('Ḫ', 'Ḫ'),
- ('Ḭ', 'Ḭ'),
- ('Ḯ', 'Ḯ'),
- ('Ḱ', 'Ḱ'),
- ('Ḳ', 'Ḳ'),
- ('Ḵ', 'Ḵ'),
- ('Ḷ', 'Ḷ'),
- ('Ḹ', 'Ḹ'),
- ('Ḻ', 'Ḻ'),
- ('Ḽ', 'Ḽ'),
- ('Ḿ', 'Ḿ'),
- ('Ṁ', 'Ṁ'),
- ('Ṃ', 'Ṃ'),
- ('Ṅ', 'Ṅ'),
- ('Ṇ', 'Ṇ'),
- ('Ṉ', 'Ṉ'),
- ('Ṋ', 'Ṋ'),
- ('Ṍ', 'Ṍ'),
- ('Ṏ', 'Ṏ'),
- ('Ṑ', 'Ṑ'),
- ('Ṓ', 'Ṓ'),
- ('Ṕ', 'Ṕ'),
- ('Ṗ', 'Ṗ'),
- ('Ṙ', 'Ṙ'),
- ('Ṛ', 'Ṛ'),
- ('Ṝ', 'Ṝ'),
- ('Ṟ', 'Ṟ'),
- ('Ṡ', 'Ṡ'),
- ('Ṣ', 'Ṣ'),
- ('Ṥ', 'Ṥ'),
- ('Ṧ', 'Ṧ'),
- ('Ṩ', 'Ṩ'),
- ('Ṫ', 'Ṫ'),
- ('Ṭ', 'Ṭ'),
- ('Ṯ', 'Ṯ'),
- ('Ṱ', 'Ṱ'),
- ('Ṳ', 'Ṳ'),
- ('Ṵ', 'Ṵ'),
- ('Ṷ', 'Ṷ'),
- ('Ṹ', 'Ṹ'),
- ('Ṻ', 'Ṻ'),
- ('Ṽ', 'Ṽ'),
- ('Ṿ', 'Ṿ'),
- ('Ẁ', 'Ẁ'),
- ('Ẃ', 'Ẃ'),
- ('Ẅ', 'Ẅ'),
- ('Ẇ', 'Ẇ'),
- ('Ẉ', 'Ẉ'),
- ('Ẋ', 'Ẋ'),
- ('Ẍ', 'Ẍ'),
- ('Ẏ', 'Ẏ'),
- ('Ẑ', 'Ẑ'),
- ('Ẓ', 'Ẓ'),
- ('Ẕ', 'Ẕ'),
- ('ẚ', 'ẛ'),
- ('ẞ', 'ẞ'),
- ('Ạ', 'Ạ'),
- ('Ả', 'Ả'),
- ('Ấ', 'Ấ'),
- ('Ầ', 'Ầ'),
- ('Ẩ', 'Ẩ'),
- ('Ẫ', 'Ẫ'),
- ('Ậ', 'Ậ'),
- ('Ắ', 'Ắ'),
- ('Ằ', 'Ằ'),
- ('Ẳ', 'Ẳ'),
- ('Ẵ', 'Ẵ'),
- ('Ặ', 'Ặ'),
- ('Ẹ', 'Ẹ'),
- ('Ẻ', 'Ẻ'),
- ('Ẽ', 'Ẽ'),
- ('Ế', 'Ế'),
- ('Ề', 'Ề'),
- ('Ể', 'Ể'),
- ('Ễ', 'Ễ'),
- ('Ệ', 'Ệ'),
- ('Ỉ', 'Ỉ'),
- ('Ị', 'Ị'),
- ('Ọ', 'Ọ'),
- ('Ỏ', 'Ỏ'),
- ('Ố', 'Ố'),
- ('Ồ', 'Ồ'),
- ('Ổ', 'Ổ'),
- ('Ỗ', 'Ỗ'),
- ('Ộ', 'Ộ'),
- ('Ớ', 'Ớ'),
- ('Ờ', 'Ờ'),
- ('Ở', 'Ở'),
- ('Ỡ', 'Ỡ'),
- ('Ợ', 'Ợ'),
- ('Ụ', 'Ụ'),
- ('Ủ', 'Ủ'),
- ('Ứ', 'Ứ'),
- ('Ừ', 'Ừ'),
- ('Ử', 'Ử'),
- ('Ữ', 'Ữ'),
- ('Ự', 'Ự'),
- ('Ỳ', 'Ỳ'),
- ('Ỵ', 'Ỵ'),
- ('Ỷ', 'Ỷ'),
- ('Ỹ', 'Ỹ'),
- ('Ỻ', 'Ỻ'),
- ('Ỽ', 'Ỽ'),
- ('Ỿ', 'Ỿ'),
- ('Ἀ', 'Ἇ'),
- ('Ἐ', 'Ἕ'),
- ('Ἠ', 'Ἧ'),
- ('Ἰ', 'Ἷ'),
- ('Ὀ', 'Ὅ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'Ὗ'),
- ('Ὠ', 'Ὧ'),
- ('ᾀ', 'ᾯ'),
- ('ᾲ', 'ᾴ'),
- ('ᾷ', 'ᾼ'),
- ('ῂ', 'ῄ'),
- ('ῇ', 'ῌ'),
- ('Ῐ', 'Ί'),
- ('Ῠ', 'Ῥ'),
- ('ῲ', 'ῴ'),
- ('ῷ', 'ῼ'),
- ('Ω', 'Ω'),
- ('K', 'Å'),
- ('Ⅎ', 'Ⅎ'),
- ('Ⅰ', 'Ⅿ'),
- ('Ↄ', 'Ↄ'),
- ('Ⓐ', 'Ⓩ'),
- ('Ⰰ', 'Ⱟ'),
- ('Ⱡ', 'Ⱡ'),
- ('Ɫ', 'Ɽ'),
- ('Ⱨ', 'Ⱨ'),
- ('Ⱪ', 'Ⱪ'),
- ('Ⱬ', 'Ⱬ'),
- ('Ɑ', 'Ɒ'),
- ('Ⱳ', 'Ⱳ'),
- ('Ⱶ', 'Ⱶ'),
- ('Ȿ', 'Ⲁ'),
- ('Ⲃ', 'Ⲃ'),
- ('Ⲅ', 'Ⲅ'),
- ('Ⲇ', 'Ⲇ'),
- ('Ⲉ', 'Ⲉ'),
- ('Ⲋ', 'Ⲋ'),
- ('Ⲍ', 'Ⲍ'),
- ('Ⲏ', 'Ⲏ'),
- ('Ⲑ', 'Ⲑ'),
- ('Ⲓ', 'Ⲓ'),
- ('Ⲕ', 'Ⲕ'),
- ('Ⲗ', 'Ⲗ'),
- ('Ⲙ', 'Ⲙ'),
- ('Ⲛ', 'Ⲛ'),
- ('Ⲝ', 'Ⲝ'),
- ('Ⲟ', 'Ⲟ'),
- ('Ⲡ', 'Ⲡ'),
- ('Ⲣ', 'Ⲣ'),
- ('Ⲥ', 'Ⲥ'),
- ('Ⲧ', 'Ⲧ'),
- ('Ⲩ', 'Ⲩ'),
- ('Ⲫ', 'Ⲫ'),
- ('Ⲭ', 'Ⲭ'),
- ('Ⲯ', 'Ⲯ'),
- ('Ⲱ', 'Ⲱ'),
- ('Ⲳ', 'Ⲳ'),
- ('Ⲵ', 'Ⲵ'),
- ('Ⲷ', 'Ⲷ'),
- ('Ⲹ', 'Ⲹ'),
- ('Ⲻ', 'Ⲻ'),
- ('Ⲽ', 'Ⲽ'),
- ('Ⲿ', 'Ⲿ'),
- ('Ⳁ', 'Ⳁ'),
- ('Ⳃ', 'Ⳃ'),
- ('Ⳅ', 'Ⳅ'),
- ('Ⳇ', 'Ⳇ'),
- ('Ⳉ', 'Ⳉ'),
- ('Ⳋ', 'Ⳋ'),
- ('Ⳍ', 'Ⳍ'),
- ('Ⳏ', 'Ⳏ'),
- ('Ⳑ', 'Ⳑ'),
- ('Ⳓ', 'Ⳓ'),
- ('Ⳕ', 'Ⳕ'),
- ('Ⳗ', 'Ⳗ'),
- ('Ⳙ', 'Ⳙ'),
- ('Ⳛ', 'Ⳛ'),
- ('Ⳝ', 'Ⳝ'),
- ('Ⳟ', 'Ⳟ'),
- ('Ⳡ', 'Ⳡ'),
- ('Ⳣ', 'Ⳣ'),
- ('Ⳬ', 'Ⳬ'),
- ('Ⳮ', 'Ⳮ'),
- ('Ⳳ', 'Ⳳ'),
- ('Ꙁ', 'Ꙁ'),
- ('Ꙃ', 'Ꙃ'),
- ('Ꙅ', 'Ꙅ'),
- ('Ꙇ', 'Ꙇ'),
- ('Ꙉ', 'Ꙉ'),
- ('Ꙋ', 'Ꙋ'),
- ('Ꙍ', 'Ꙍ'),
- ('Ꙏ', 'Ꙏ'),
- ('Ꙑ', 'Ꙑ'),
- ('Ꙓ', 'Ꙓ'),
- ('Ꙕ', 'Ꙕ'),
- ('Ꙗ', 'Ꙗ'),
- ('Ꙙ', 'Ꙙ'),
- ('Ꙛ', 'Ꙛ'),
- ('Ꙝ', 'Ꙝ'),
- ('Ꙟ', 'Ꙟ'),
- ('Ꙡ', 'Ꙡ'),
- ('Ꙣ', 'Ꙣ'),
- ('Ꙥ', 'Ꙥ'),
- ('Ꙧ', 'Ꙧ'),
- ('Ꙩ', 'Ꙩ'),
- ('Ꙫ', 'Ꙫ'),
- ('Ꙭ', 'Ꙭ'),
- ('Ꚁ', 'Ꚁ'),
- ('Ꚃ', 'Ꚃ'),
- ('Ꚅ', 'Ꚅ'),
- ('Ꚇ', 'Ꚇ'),
- ('Ꚉ', 'Ꚉ'),
- ('Ꚋ', 'Ꚋ'),
- ('Ꚍ', 'Ꚍ'),
- ('Ꚏ', 'Ꚏ'),
- ('Ꚑ', 'Ꚑ'),
- ('Ꚓ', 'Ꚓ'),
- ('Ꚕ', 'Ꚕ'),
- ('Ꚗ', 'Ꚗ'),
- ('Ꚙ', 'Ꚙ'),
- ('Ꚛ', 'Ꚛ'),
- ('Ꜣ', 'Ꜣ'),
- ('Ꜥ', 'Ꜥ'),
- ('Ꜧ', 'Ꜧ'),
- ('Ꜩ', 'Ꜩ'),
- ('Ꜫ', 'Ꜫ'),
- ('Ꜭ', 'Ꜭ'),
- ('Ꜯ', 'Ꜯ'),
- ('Ꜳ', 'Ꜳ'),
- ('Ꜵ', 'Ꜵ'),
- ('Ꜷ', 'Ꜷ'),
- ('Ꜹ', 'Ꜹ'),
- ('Ꜻ', 'Ꜻ'),
- ('Ꜽ', 'Ꜽ'),
- ('Ꜿ', 'Ꜿ'),
- ('Ꝁ', 'Ꝁ'),
- ('Ꝃ', 'Ꝃ'),
- ('Ꝅ', 'Ꝅ'),
- ('Ꝇ', 'Ꝇ'),
- ('Ꝉ', 'Ꝉ'),
- ('Ꝋ', 'Ꝋ'),
- ('Ꝍ', 'Ꝍ'),
- ('Ꝏ', 'Ꝏ'),
- ('Ꝑ', 'Ꝑ'),
- ('Ꝓ', 'Ꝓ'),
- ('Ꝕ', 'Ꝕ'),
- ('Ꝗ', 'Ꝗ'),
- ('Ꝙ', 'Ꝙ'),
- ('Ꝛ', 'Ꝛ'),
- ('Ꝝ', 'Ꝝ'),
- ('Ꝟ', 'Ꝟ'),
- ('Ꝡ', 'Ꝡ'),
- ('Ꝣ', 'Ꝣ'),
- ('Ꝥ', 'Ꝥ'),
- ('Ꝧ', 'Ꝧ'),
- ('Ꝩ', 'Ꝩ'),
- ('Ꝫ', 'Ꝫ'),
- ('Ꝭ', 'Ꝭ'),
- ('Ꝯ', 'Ꝯ'),
- ('Ꝺ', 'Ꝺ'),
- ('Ꝼ', 'Ꝼ'),
- ('Ᵹ', 'Ꝿ'),
- ('Ꞁ', 'Ꞁ'),
- ('Ꞃ', 'Ꞃ'),
- ('Ꞅ', 'Ꞅ'),
- ('Ꞇ', 'Ꞇ'),
- ('Ꞌ', 'Ꞌ'),
- ('Ɥ', 'Ɥ'),
- ('Ꞑ', 'Ꞑ'),
- ('Ꞓ', 'Ꞓ'),
- ('Ꞗ', 'Ꞗ'),
- ('Ꞙ', 'Ꞙ'),
- ('Ꞛ', 'Ꞛ'),
- ('Ꞝ', 'Ꞝ'),
- ('Ꞟ', 'Ꞟ'),
- ('Ꞡ', 'Ꞡ'),
- ('Ꞣ', 'Ꞣ'),
- ('Ꞥ', 'Ꞥ'),
- ('Ꞧ', 'Ꞧ'),
- ('Ꞩ', 'Ꞩ'),
- ('Ɦ', 'Ɪ'),
- ('Ʞ', 'Ꞵ'),
- ('Ꞷ', 'Ꞷ'),
- ('Ꞹ', 'Ꞹ'),
- ('Ꞻ', 'Ꞻ'),
- ('Ꞽ', 'Ꞽ'),
- ('Ꞿ', 'Ꞿ'),
- ('Ꟁ', 'Ꟁ'),
- ('Ꟃ', 'Ꟃ'),
- ('Ꞔ', 'Ꟈ'),
- ('Ꟊ', 'Ꟊ'),
- ('Ɤ', 'Ꟍ'),
- ('Ꟑ', 'Ꟑ'),
- ('Ꟗ', 'Ꟗ'),
- ('Ꟙ', 'Ꟙ'),
- ('Ꟛ', 'Ꟛ'),
- ('Ƛ', 'Ƛ'),
- ('Ꟶ', 'Ꟶ'),
- ('ꭰ', 'ꮿ'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('A', 'Z'),
- ('𐐀', '𐐧'),
- ('𐒰', '𐓓'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐲀', '𐲲'),
- ('𐵐', '𐵥'),
- ('𑢠', '𑢿'),
- ('𖹀', '𖹟'),
- ('𞤀', '𞤡'),
-];
-
-pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('a', 'z'),
- ('µ', 'µ'),
- ('À', 'Ö'),
- ('Ø', 'ö'),
- ('ø', 'ķ'),
- ('Ĺ', 'ƌ'),
- ('Ǝ', 'Ʃ'),
- ('Ƭ', 'ƹ'),
- ('Ƽ', 'ƽ'),
- ('ƿ', 'ƿ'),
- ('DŽ', 'Ƞ'),
- ('Ȣ', 'ȳ'),
- ('Ⱥ', 'ɔ'),
- ('ɖ', 'ɗ'),
- ('ə', 'ə'),
- ('ɛ', 'ɜ'),
- ('ɠ', 'ɡ'),
- ('ɣ', 'ɦ'),
- ('ɨ', 'ɬ'),
- ('ɯ', 'ɯ'),
- ('ɱ', 'ɲ'),
- ('ɵ', 'ɵ'),
- ('ɽ', 'ɽ'),
- ('ʀ', 'ʀ'),
- ('ʂ', 'ʃ'),
- ('ʇ', 'ʌ'),
- ('ʒ', 'ʒ'),
- ('ʝ', 'ʞ'),
- ('\u{345}', '\u{345}'),
- ('Ͱ', 'ͳ'),
- ('Ͷ', 'ͷ'),
- ('ͻ', 'ͽ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ϑ'),
- ('ϕ', 'ϵ'),
- ('Ϸ', 'ϻ'),
- ('Ͻ', 'ҁ'),
- ('Ҋ', 'ԯ'),
- ('Ա', 'Ֆ'),
- ('ա', 'և'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ჺ'),
- ('ჽ', 'ჿ'),
- ('Ꭰ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('ᲀ', 'ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('ᵹ', 'ᵹ'),
- ('ᵽ', 'ᵽ'),
- ('ᶎ', 'ᶎ'),
- ('Ḁ', 'ẛ'),
- ('ẞ', 'ẞ'),
- ('Ạ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ᾼ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῌ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('ῠ', 'Ῥ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῼ'),
- ('Ω', 'Ω'),
- ('K', 'Å'),
- ('Ⅎ', 'Ⅎ'),
- ('ⅎ', 'ⅎ'),
- ('Ⅰ', 'ⅿ'),
- ('Ↄ', 'ↄ'),
- ('Ⓐ', 'ⓩ'),
- ('Ⰰ', 'Ɒ'),
- ('Ⱳ', 'ⱳ'),
- ('Ⱶ', 'ⱶ'),
- ('Ȿ', 'ⳣ'),
- ('Ⳬ', 'ⳮ'),
- ('Ⳳ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('Ꙁ', 'ꙭ'),
- ('Ꚁ', 'ꚛ'),
- ('Ꜣ', 'ꜯ'),
- ('Ꜳ', 'ꝯ'),
- ('Ꝺ', 'ꞇ'),
- ('Ꞌ', 'Ɥ'),
- ('Ꞑ', 'ꞔ'),
- ('Ꞗ', 'Ɪ'),
- ('Ʞ', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('Ꟗ', 'Ƛ'),
- ('Ꟶ', 'ꟶ'),
- ('ꭓ', 'ꭓ'),
- ('ꭰ', 'ꮿ'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('A', 'Z'),
- ('a', 'z'),
- ('𐐀', '𐑏'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐵐', '𐵥'),
- ('𐵰', '𐶅'),
- ('𑢠', '𑣟'),
- ('𖹀', '𖹿'),
- ('𞤀', '𞥃'),
-];
-
-pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('À', 'Ö'),
- ('Ø', 'Þ'),
- ('Ā', 'Ā'),
- ('Ă', 'Ă'),
- ('Ą', 'Ą'),
- ('Ć', 'Ć'),
- ('Ĉ', 'Ĉ'),
- ('Ċ', 'Ċ'),
- ('Č', 'Č'),
- ('Ď', 'Ď'),
- ('Đ', 'Đ'),
- ('Ē', 'Ē'),
- ('Ĕ', 'Ĕ'),
- ('Ė', 'Ė'),
- ('Ę', 'Ę'),
- ('Ě', 'Ě'),
- ('Ĝ', 'Ĝ'),
- ('Ğ', 'Ğ'),
- ('Ġ', 'Ġ'),
- ('Ģ', 'Ģ'),
- ('Ĥ', 'Ĥ'),
- ('Ħ', 'Ħ'),
- ('Ĩ', 'Ĩ'),
- ('Ī', 'Ī'),
- ('Ĭ', 'Ĭ'),
- ('Į', 'Į'),
- ('İ', 'İ'),
- ('IJ', 'IJ'),
- ('Ĵ', 'Ĵ'),
- ('Ķ', 'Ķ'),
- ('Ĺ', 'Ĺ'),
- ('Ļ', 'Ļ'),
- ('Ľ', 'Ľ'),
- ('Ŀ', 'Ŀ'),
- ('Ł', 'Ł'),
- ('Ń', 'Ń'),
- ('Ņ', 'Ņ'),
- ('Ň', 'Ň'),
- ('Ŋ', 'Ŋ'),
- ('Ō', 'Ō'),
- ('Ŏ', 'Ŏ'),
- ('Ő', 'Ő'),
- ('Œ', 'Œ'),
- ('Ŕ', 'Ŕ'),
- ('Ŗ', 'Ŗ'),
- ('Ř', 'Ř'),
- ('Ś', 'Ś'),
- ('Ŝ', 'Ŝ'),
- ('Ş', 'Ş'),
- ('Š', 'Š'),
- ('Ţ', 'Ţ'),
- ('Ť', 'Ť'),
- ('Ŧ', 'Ŧ'),
- ('Ũ', 'Ũ'),
- ('Ū', 'Ū'),
- ('Ŭ', 'Ŭ'),
- ('Ů', 'Ů'),
- ('Ű', 'Ű'),
- ('Ų', 'Ų'),
- ('Ŵ', 'Ŵ'),
- ('Ŷ', 'Ŷ'),
- ('Ÿ', 'Ź'),
- ('Ż', 'Ż'),
- ('Ž', 'Ž'),
- ('Ɓ', 'Ƃ'),
- ('Ƅ', 'Ƅ'),
- ('Ɔ', 'Ƈ'),
- ('Ɖ', 'Ƌ'),
- ('Ǝ', 'Ƒ'),
- ('Ɠ', 'Ɣ'),
- ('Ɩ', 'Ƙ'),
- ('Ɯ', 'Ɲ'),
- ('Ɵ', 'Ơ'),
- ('Ƣ', 'Ƣ'),
- ('Ƥ', 'Ƥ'),
- ('Ʀ', 'Ƨ'),
- ('Ʃ', 'Ʃ'),
- ('Ƭ', 'Ƭ'),
- ('Ʈ', 'Ư'),
- ('Ʊ', 'Ƴ'),
- ('Ƶ', 'Ƶ'),
- ('Ʒ', 'Ƹ'),
- ('Ƽ', 'Ƽ'),
- ('DŽ', 'Dž'),
- ('LJ', 'Lj'),
- ('NJ', 'Nj'),
- ('Ǎ', 'Ǎ'),
- ('Ǐ', 'Ǐ'),
- ('Ǒ', 'Ǒ'),
- ('Ǔ', 'Ǔ'),
- ('Ǖ', 'Ǖ'),
- ('Ǘ', 'Ǘ'),
- ('Ǚ', 'Ǚ'),
- ('Ǜ', 'Ǜ'),
- ('Ǟ', 'Ǟ'),
- ('Ǡ', 'Ǡ'),
- ('Ǣ', 'Ǣ'),
- ('Ǥ', 'Ǥ'),
- ('Ǧ', 'Ǧ'),
- ('Ǩ', 'Ǩ'),
- ('Ǫ', 'Ǫ'),
- ('Ǭ', 'Ǭ'),
- ('Ǯ', 'Ǯ'),
- ('DZ', 'Dz'),
- ('Ǵ', 'Ǵ'),
- ('Ƕ', 'Ǹ'),
- ('Ǻ', 'Ǻ'),
- ('Ǽ', 'Ǽ'),
- ('Ǿ', 'Ǿ'),
- ('Ȁ', 'Ȁ'),
- ('Ȃ', 'Ȃ'),
- ('Ȅ', 'Ȅ'),
- ('Ȇ', 'Ȇ'),
- ('Ȉ', 'Ȉ'),
- ('Ȋ', 'Ȋ'),
- ('Ȍ', 'Ȍ'),
- ('Ȏ', 'Ȏ'),
- ('Ȑ', 'Ȑ'),
- ('Ȓ', 'Ȓ'),
- ('Ȕ', 'Ȕ'),
- ('Ȗ', 'Ȗ'),
- ('Ș', 'Ș'),
- ('Ț', 'Ț'),
- ('Ȝ', 'Ȝ'),
- ('Ȟ', 'Ȟ'),
- ('Ƞ', 'Ƞ'),
- ('Ȣ', 'Ȣ'),
- ('Ȥ', 'Ȥ'),
- ('Ȧ', 'Ȧ'),
- ('Ȩ', 'Ȩ'),
- ('Ȫ', 'Ȫ'),
- ('Ȭ', 'Ȭ'),
- ('Ȯ', 'Ȯ'),
- ('Ȱ', 'Ȱ'),
- ('Ȳ', 'Ȳ'),
- ('Ⱥ', 'Ȼ'),
- ('Ƚ', 'Ⱦ'),
- ('Ɂ', 'Ɂ'),
- ('Ƀ', 'Ɇ'),
- ('Ɉ', 'Ɉ'),
- ('Ɋ', 'Ɋ'),
- ('Ɍ', 'Ɍ'),
- ('Ɏ', 'Ɏ'),
- ('Ͱ', 'Ͱ'),
- ('Ͳ', 'Ͳ'),
- ('Ͷ', 'Ͷ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ώ'),
- ('Α', 'Ρ'),
- ('Σ', 'Ϋ'),
- ('Ϗ', 'Ϗ'),
- ('Ϙ', 'Ϙ'),
- ('Ϛ', 'Ϛ'),
- ('Ϝ', 'Ϝ'),
- ('Ϟ', 'Ϟ'),
- ('Ϡ', 'Ϡ'),
- ('Ϣ', 'Ϣ'),
- ('Ϥ', 'Ϥ'),
- ('Ϧ', 'Ϧ'),
- ('Ϩ', 'Ϩ'),
- ('Ϫ', 'Ϫ'),
- ('Ϭ', 'Ϭ'),
- ('Ϯ', 'Ϯ'),
- ('ϴ', 'ϴ'),
- ('Ϸ', 'Ϸ'),
- ('Ϲ', 'Ϻ'),
- ('Ͻ', 'Я'),
- ('Ѡ', 'Ѡ'),
- ('Ѣ', 'Ѣ'),
- ('Ѥ', 'Ѥ'),
- ('Ѧ', 'Ѧ'),
- ('Ѩ', 'Ѩ'),
- ('Ѫ', 'Ѫ'),
- ('Ѭ', 'Ѭ'),
- ('Ѯ', 'Ѯ'),
- ('Ѱ', 'Ѱ'),
- ('Ѳ', 'Ѳ'),
- ('Ѵ', 'Ѵ'),
- ('Ѷ', 'Ѷ'),
- ('Ѹ', 'Ѹ'),
- ('Ѻ', 'Ѻ'),
- ('Ѽ', 'Ѽ'),
- ('Ѿ', 'Ѿ'),
- ('Ҁ', 'Ҁ'),
- ('Ҋ', 'Ҋ'),
- ('Ҍ', 'Ҍ'),
- ('Ҏ', 'Ҏ'),
- ('Ґ', 'Ґ'),
- ('Ғ', 'Ғ'),
- ('Ҕ', 'Ҕ'),
- ('Җ', 'Җ'),
- ('Ҙ', 'Ҙ'),
- ('Қ', 'Қ'),
- ('Ҝ', 'Ҝ'),
- ('Ҟ', 'Ҟ'),
- ('Ҡ', 'Ҡ'),
- ('Ң', 'Ң'),
- ('Ҥ', 'Ҥ'),
- ('Ҧ', 'Ҧ'),
- ('Ҩ', 'Ҩ'),
- ('Ҫ', 'Ҫ'),
- ('Ҭ', 'Ҭ'),
- ('Ү', 'Ү'),
- ('Ұ', 'Ұ'),
- ('Ҳ', 'Ҳ'),
- ('Ҵ', 'Ҵ'),
- ('Ҷ', 'Ҷ'),
- ('Ҹ', 'Ҹ'),
- ('Һ', 'Һ'),
- ('Ҽ', 'Ҽ'),
- ('Ҿ', 'Ҿ'),
- ('Ӏ', 'Ӂ'),
- ('Ӄ', 'Ӄ'),
- ('Ӆ', 'Ӆ'),
- ('Ӈ', 'Ӈ'),
- ('Ӊ', 'Ӊ'),
- ('Ӌ', 'Ӌ'),
- ('Ӎ', 'Ӎ'),
- ('Ӑ', 'Ӑ'),
- ('Ӓ', 'Ӓ'),
- ('Ӕ', 'Ӕ'),
- ('Ӗ', 'Ӗ'),
- ('Ә', 'Ә'),
- ('Ӛ', 'Ӛ'),
- ('Ӝ', 'Ӝ'),
- ('Ӟ', 'Ӟ'),
- ('Ӡ', 'Ӡ'),
- ('Ӣ', 'Ӣ'),
- ('Ӥ', 'Ӥ'),
- ('Ӧ', 'Ӧ'),
- ('Ө', 'Ө'),
- ('Ӫ', 'Ӫ'),
- ('Ӭ', 'Ӭ'),
- ('Ӯ', 'Ӯ'),
- ('Ӱ', 'Ӱ'),
- ('Ӳ', 'Ӳ'),
- ('Ӵ', 'Ӵ'),
- ('Ӷ', 'Ӷ'),
- ('Ӹ', 'Ӹ'),
- ('Ӻ', 'Ӻ'),
- ('Ӽ', 'Ӽ'),
- ('Ӿ', 'Ӿ'),
- ('Ԁ', 'Ԁ'),
- ('Ԃ', 'Ԃ'),
- ('Ԅ', 'Ԅ'),
- ('Ԇ', 'Ԇ'),
- ('Ԉ', 'Ԉ'),
- ('Ԋ', 'Ԋ'),
- ('Ԍ', 'Ԍ'),
- ('Ԏ', 'Ԏ'),
- ('Ԑ', 'Ԑ'),
- ('Ԓ', 'Ԓ'),
- ('Ԕ', 'Ԕ'),
- ('Ԗ', 'Ԗ'),
- ('Ԙ', 'Ԙ'),
- ('Ԛ', 'Ԛ'),
- ('Ԝ', 'Ԝ'),
- ('Ԟ', 'Ԟ'),
- ('Ԡ', 'Ԡ'),
- ('Ԣ', 'Ԣ'),
- ('Ԥ', 'Ԥ'),
- ('Ԧ', 'Ԧ'),
- ('Ԩ', 'Ԩ'),
- ('Ԫ', 'Ԫ'),
- ('Ԭ', 'Ԭ'),
- ('Ԯ', 'Ԯ'),
- ('Ա', 'Ֆ'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('Ꭰ', 'Ᏽ'),
- ('Ᲊ', 'Ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('Ḁ', 'Ḁ'),
- ('Ḃ', 'Ḃ'),
- ('Ḅ', 'Ḅ'),
- ('Ḇ', 'Ḇ'),
- ('Ḉ', 'Ḉ'),
- ('Ḋ', 'Ḋ'),
- ('Ḍ', 'Ḍ'),
- ('Ḏ', 'Ḏ'),
- ('Ḑ', 'Ḑ'),
- ('Ḓ', 'Ḓ'),
- ('Ḕ', 'Ḕ'),
- ('Ḗ', 'Ḗ'),
- ('Ḙ', 'Ḙ'),
- ('Ḛ', 'Ḛ'),
- ('Ḝ', 'Ḝ'),
- ('Ḟ', 'Ḟ'),
- ('Ḡ', 'Ḡ'),
- ('Ḣ', 'Ḣ'),
- ('Ḥ', 'Ḥ'),
- ('Ḧ', 'Ḧ'),
- ('Ḩ', 'Ḩ'),
- ('Ḫ', 'Ḫ'),
- ('Ḭ', 'Ḭ'),
- ('Ḯ', 'Ḯ'),
- ('Ḱ', 'Ḱ'),
- ('Ḳ', 'Ḳ'),
- ('Ḵ', 'Ḵ'),
- ('Ḷ', 'Ḷ'),
- ('Ḹ', 'Ḹ'),
- ('Ḻ', 'Ḻ'),
- ('Ḽ', 'Ḽ'),
- ('Ḿ', 'Ḿ'),
- ('Ṁ', 'Ṁ'),
- ('Ṃ', 'Ṃ'),
- ('Ṅ', 'Ṅ'),
- ('Ṇ', 'Ṇ'),
- ('Ṉ', 'Ṉ'),
- ('Ṋ', 'Ṋ'),
- ('Ṍ', 'Ṍ'),
- ('Ṏ', 'Ṏ'),
- ('Ṑ', 'Ṑ'),
- ('Ṓ', 'Ṓ'),
- ('Ṕ', 'Ṕ'),
- ('Ṗ', 'Ṗ'),
- ('Ṙ', 'Ṙ'),
- ('Ṛ', 'Ṛ'),
- ('Ṝ', 'Ṝ'),
- ('Ṟ', 'Ṟ'),
- ('Ṡ', 'Ṡ'),
- ('Ṣ', 'Ṣ'),
- ('Ṥ', 'Ṥ'),
- ('Ṧ', 'Ṧ'),
- ('Ṩ', 'Ṩ'),
- ('Ṫ', 'Ṫ'),
- ('Ṭ', 'Ṭ'),
- ('Ṯ', 'Ṯ'),
- ('Ṱ', 'Ṱ'),
- ('Ṳ', 'Ṳ'),
- ('Ṵ', 'Ṵ'),
- ('Ṷ', 'Ṷ'),
- ('Ṹ', 'Ṹ'),
- ('Ṻ', 'Ṻ'),
- ('Ṽ', 'Ṽ'),
- ('Ṿ', 'Ṿ'),
- ('Ẁ', 'Ẁ'),
- ('Ẃ', 'Ẃ'),
- ('Ẅ', 'Ẅ'),
- ('Ẇ', 'Ẇ'),
- ('Ẉ', 'Ẉ'),
- ('Ẋ', 'Ẋ'),
- ('Ẍ', 'Ẍ'),
- ('Ẏ', 'Ẏ'),
- ('Ẑ', 'Ẑ'),
- ('Ẓ', 'Ẓ'),
- ('Ẕ', 'Ẕ'),
- ('ẞ', 'ẞ'),
- ('Ạ', 'Ạ'),
- ('Ả', 'Ả'),
- ('Ấ', 'Ấ'),
- ('Ầ', 'Ầ'),
- ('Ẩ', 'Ẩ'),
- ('Ẫ', 'Ẫ'),
- ('Ậ', 'Ậ'),
- ('Ắ', 'Ắ'),
- ('Ằ', 'Ằ'),
- ('Ẳ', 'Ẳ'),
- ('Ẵ', 'Ẵ'),
- ('Ặ', 'Ặ'),
- ('Ẹ', 'Ẹ'),
- ('Ẻ', 'Ẻ'),
- ('Ẽ', 'Ẽ'),
- ('Ế', 'Ế'),
- ('Ề', 'Ề'),
- ('Ể', 'Ể'),
- ('Ễ', 'Ễ'),
- ('Ệ', 'Ệ'),
- ('Ỉ', 'Ỉ'),
- ('Ị', 'Ị'),
- ('Ọ', 'Ọ'),
- ('Ỏ', 'Ỏ'),
- ('Ố', 'Ố'),
- ('Ồ', 'Ồ'),
- ('Ổ', 'Ổ'),
- ('Ỗ', 'Ỗ'),
- ('Ộ', 'Ộ'),
- ('Ớ', 'Ớ'),
- ('Ờ', 'Ờ'),
- ('Ở', 'Ở'),
- ('Ỡ', 'Ỡ'),
- ('Ợ', 'Ợ'),
- ('Ụ', 'Ụ'),
- ('Ủ', 'Ủ'),
- ('Ứ', 'Ứ'),
- ('Ừ', 'Ừ'),
- ('Ử', 'Ử'),
- ('Ữ', 'Ữ'),
- ('Ự', 'Ự'),
- ('Ỳ', 'Ỳ'),
- ('Ỵ', 'Ỵ'),
- ('Ỷ', 'Ỷ'),
- ('Ỹ', 'Ỹ'),
- ('Ỻ', 'Ỻ'),
- ('Ỽ', 'Ỽ'),
- ('Ỿ', 'Ỿ'),
- ('Ἀ', 'Ἇ'),
- ('Ἐ', 'Ἕ'),
- ('Ἠ', 'Ἧ'),
- ('Ἰ', 'Ἷ'),
- ('Ὀ', 'Ὅ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'Ὗ'),
- ('Ὠ', 'Ὧ'),
- ('ᾈ', 'ᾏ'),
- ('ᾘ', 'ᾟ'),
- ('ᾨ', 'ᾯ'),
- ('Ᾰ', 'ᾼ'),
- ('Ὲ', 'ῌ'),
- ('Ῐ', 'Ί'),
- ('Ῠ', 'Ῥ'),
- ('Ὸ', 'ῼ'),
- ('Ω', 'Ω'),
- ('K', 'Å'),
- ('Ⅎ', 'Ⅎ'),
- ('Ⅰ', 'Ⅿ'),
- ('Ↄ', 'Ↄ'),
- ('Ⓐ', 'Ⓩ'),
- ('Ⰰ', 'Ⱟ'),
- ('Ⱡ', 'Ⱡ'),
- ('Ɫ', 'Ɽ'),
- ('Ⱨ', 'Ⱨ'),
- ('Ⱪ', 'Ⱪ'),
- ('Ⱬ', 'Ⱬ'),
- ('Ɑ', 'Ɒ'),
- ('Ⱳ', 'Ⱳ'),
- ('Ⱶ', 'Ⱶ'),
- ('Ȿ', 'Ⲁ'),
- ('Ⲃ', 'Ⲃ'),
- ('Ⲅ', 'Ⲅ'),
- ('Ⲇ', 'Ⲇ'),
- ('Ⲉ', 'Ⲉ'),
- ('Ⲋ', 'Ⲋ'),
- ('Ⲍ', 'Ⲍ'),
- ('Ⲏ', 'Ⲏ'),
- ('Ⲑ', 'Ⲑ'),
- ('Ⲓ', 'Ⲓ'),
- ('Ⲕ', 'Ⲕ'),
- ('Ⲗ', 'Ⲗ'),
- ('Ⲙ', 'Ⲙ'),
- ('Ⲛ', 'Ⲛ'),
- ('Ⲝ', 'Ⲝ'),
- ('Ⲟ', 'Ⲟ'),
- ('Ⲡ', 'Ⲡ'),
- ('Ⲣ', 'Ⲣ'),
- ('Ⲥ', 'Ⲥ'),
- ('Ⲧ', 'Ⲧ'),
- ('Ⲩ', 'Ⲩ'),
- ('Ⲫ', 'Ⲫ'),
- ('Ⲭ', 'Ⲭ'),
- ('Ⲯ', 'Ⲯ'),
- ('Ⲱ', 'Ⲱ'),
- ('Ⲳ', 'Ⲳ'),
- ('Ⲵ', 'Ⲵ'),
- ('Ⲷ', 'Ⲷ'),
- ('Ⲹ', 'Ⲹ'),
- ('Ⲻ', 'Ⲻ'),
- ('Ⲽ', 'Ⲽ'),
- ('Ⲿ', 'Ⲿ'),
- ('Ⳁ', 'Ⳁ'),
- ('Ⳃ', 'Ⳃ'),
- ('Ⳅ', 'Ⳅ'),
- ('Ⳇ', 'Ⳇ'),
- ('Ⳉ', 'Ⳉ'),
- ('Ⳋ', 'Ⳋ'),
- ('Ⳍ', 'Ⳍ'),
- ('Ⳏ', 'Ⳏ'),
- ('Ⳑ', 'Ⳑ'),
- ('Ⳓ', 'Ⳓ'),
- ('Ⳕ', 'Ⳕ'),
- ('Ⳗ', 'Ⳗ'),
- ('Ⳙ', 'Ⳙ'),
- ('Ⳛ', 'Ⳛ'),
- ('Ⳝ', 'Ⳝ'),
- ('Ⳟ', 'Ⳟ'),
- ('Ⳡ', 'Ⳡ'),
- ('Ⳣ', 'Ⳣ'),
- ('Ⳬ', 'Ⳬ'),
- ('Ⳮ', 'Ⳮ'),
- ('Ⳳ', 'Ⳳ'),
- ('Ꙁ', 'Ꙁ'),
- ('Ꙃ', 'Ꙃ'),
- ('Ꙅ', 'Ꙅ'),
- ('Ꙇ', 'Ꙇ'),
- ('Ꙉ', 'Ꙉ'),
- ('Ꙋ', 'Ꙋ'),
- ('Ꙍ', 'Ꙍ'),
- ('Ꙏ', 'Ꙏ'),
- ('Ꙑ', 'Ꙑ'),
- ('Ꙓ', 'Ꙓ'),
- ('Ꙕ', 'Ꙕ'),
- ('Ꙗ', 'Ꙗ'),
- ('Ꙙ', 'Ꙙ'),
- ('Ꙛ', 'Ꙛ'),
- ('Ꙝ', 'Ꙝ'),
- ('Ꙟ', 'Ꙟ'),
- ('Ꙡ', 'Ꙡ'),
- ('Ꙣ', 'Ꙣ'),
- ('Ꙥ', 'Ꙥ'),
- ('Ꙧ', 'Ꙧ'),
- ('Ꙩ', 'Ꙩ'),
- ('Ꙫ', 'Ꙫ'),
- ('Ꙭ', 'Ꙭ'),
- ('Ꚁ', 'Ꚁ'),
- ('Ꚃ', 'Ꚃ'),
- ('Ꚅ', 'Ꚅ'),
- ('Ꚇ', 'Ꚇ'),
- ('Ꚉ', 'Ꚉ'),
- ('Ꚋ', 'Ꚋ'),
- ('Ꚍ', 'Ꚍ'),
- ('Ꚏ', 'Ꚏ'),
- ('Ꚑ', 'Ꚑ'),
- ('Ꚓ', 'Ꚓ'),
- ('Ꚕ', 'Ꚕ'),
- ('Ꚗ', 'Ꚗ'),
- ('Ꚙ', 'Ꚙ'),
- ('Ꚛ', 'Ꚛ'),
- ('Ꜣ', 'Ꜣ'),
- ('Ꜥ', 'Ꜥ'),
- ('Ꜧ', 'Ꜧ'),
- ('Ꜩ', 'Ꜩ'),
- ('Ꜫ', 'Ꜫ'),
- ('Ꜭ', 'Ꜭ'),
- ('Ꜯ', 'Ꜯ'),
- ('Ꜳ', 'Ꜳ'),
- ('Ꜵ', 'Ꜵ'),
- ('Ꜷ', 'Ꜷ'),
- ('Ꜹ', 'Ꜹ'),
- ('Ꜻ', 'Ꜻ'),
- ('Ꜽ', 'Ꜽ'),
- ('Ꜿ', 'Ꜿ'),
- ('Ꝁ', 'Ꝁ'),
- ('Ꝃ', 'Ꝃ'),
- ('Ꝅ', 'Ꝅ'),
- ('Ꝇ', 'Ꝇ'),
- ('Ꝉ', 'Ꝉ'),
- ('Ꝋ', 'Ꝋ'),
- ('Ꝍ', 'Ꝍ'),
- ('Ꝏ', 'Ꝏ'),
- ('Ꝑ', 'Ꝑ'),
- ('Ꝓ', 'Ꝓ'),
- ('Ꝕ', 'Ꝕ'),
- ('Ꝗ', 'Ꝗ'),
- ('Ꝙ', 'Ꝙ'),
- ('Ꝛ', 'Ꝛ'),
- ('Ꝝ', 'Ꝝ'),
- ('Ꝟ', 'Ꝟ'),
- ('Ꝡ', 'Ꝡ'),
- ('Ꝣ', 'Ꝣ'),
- ('Ꝥ', 'Ꝥ'),
- ('Ꝧ', 'Ꝧ'),
- ('Ꝩ', 'Ꝩ'),
- ('Ꝫ', 'Ꝫ'),
- ('Ꝭ', 'Ꝭ'),
- ('Ꝯ', 'Ꝯ'),
- ('Ꝺ', 'Ꝺ'),
- ('Ꝼ', 'Ꝼ'),
- ('Ᵹ', 'Ꝿ'),
- ('Ꞁ', 'Ꞁ'),
- ('Ꞃ', 'Ꞃ'),
- ('Ꞅ', 'Ꞅ'),
- ('Ꞇ', 'Ꞇ'),
- ('Ꞌ', 'Ꞌ'),
- ('Ɥ', 'Ɥ'),
- ('Ꞑ', 'Ꞑ'),
- ('Ꞓ', 'Ꞓ'),
- ('Ꞗ', 'Ꞗ'),
- ('Ꞙ', 'Ꞙ'),
- ('Ꞛ', 'Ꞛ'),
- ('Ꞝ', 'Ꞝ'),
- ('Ꞟ', 'Ꞟ'),
- ('Ꞡ', 'Ꞡ'),
- ('Ꞣ', 'Ꞣ'),
- ('Ꞥ', 'Ꞥ'),
- ('Ꞧ', 'Ꞧ'),
- ('Ꞩ', 'Ꞩ'),
- ('Ɦ', 'Ɪ'),
- ('Ʞ', 'Ꞵ'),
- ('Ꞷ', 'Ꞷ'),
- ('Ꞹ', 'Ꞹ'),
- ('Ꞻ', 'Ꞻ'),
- ('Ꞽ', 'Ꞽ'),
- ('Ꞿ', 'Ꞿ'),
- ('Ꟁ', 'Ꟁ'),
- ('Ꟃ', 'Ꟃ'),
- ('Ꞔ', 'Ꟈ'),
- ('Ꟊ', 'Ꟊ'),
- ('Ɤ', 'Ꟍ'),
- ('Ꟑ', 'Ꟑ'),
- ('Ꟗ', 'Ꟗ'),
- ('Ꟙ', 'Ꟙ'),
- ('Ꟛ', 'Ꟛ'),
- ('Ƛ', 'Ƛ'),
- ('Ꟶ', 'Ꟶ'),
- ('A', 'Z'),
- ('𐐀', '𐐧'),
- ('𐒰', '𐓓'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐲀', '𐲲'),
- ('𐵐', '𐵥'),
- ('𑢠', '𑢿'),
- ('𖹀', '𖹟'),
- ('𞤀', '𞤡'),
-];
-
-pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[
- ('a', 'z'),
- ('µ', 'µ'),
- ('ß', 'ö'),
- ('ø', 'ÿ'),
- ('ā', 'ā'),
- ('ă', 'ă'),
- ('ą', 'ą'),
- ('ć', 'ć'),
- ('ĉ', 'ĉ'),
- ('ċ', 'ċ'),
- ('č', 'č'),
- ('ď', 'ď'),
- ('đ', 'đ'),
- ('ē', 'ē'),
- ('ĕ', 'ĕ'),
- ('ė', 'ė'),
- ('ę', 'ę'),
- ('ě', 'ě'),
- ('ĝ', 'ĝ'),
- ('ğ', 'ğ'),
- ('ġ', 'ġ'),
- ('ģ', 'ģ'),
- ('ĥ', 'ĥ'),
- ('ħ', 'ħ'),
- ('ĩ', 'ĩ'),
- ('ī', 'ī'),
- ('ĭ', 'ĭ'),
- ('į', 'į'),
- ('ı', 'ı'),
- ('ij', 'ij'),
- ('ĵ', 'ĵ'),
- ('ķ', 'ķ'),
- ('ĺ', 'ĺ'),
- ('ļ', 'ļ'),
- ('ľ', 'ľ'),
- ('ŀ', 'ŀ'),
- ('ł', 'ł'),
- ('ń', 'ń'),
- ('ņ', 'ņ'),
- ('ň', 'ʼn'),
- ('ŋ', 'ŋ'),
- ('ō', 'ō'),
- ('ŏ', 'ŏ'),
- ('ő', 'ő'),
- ('œ', 'œ'),
- ('ŕ', 'ŕ'),
- ('ŗ', 'ŗ'),
- ('ř', 'ř'),
- ('ś', 'ś'),
- ('ŝ', 'ŝ'),
- ('ş', 'ş'),
- ('š', 'š'),
- ('ţ', 'ţ'),
- ('ť', 'ť'),
- ('ŧ', 'ŧ'),
- ('ũ', 'ũ'),
- ('ū', 'ū'),
- ('ŭ', 'ŭ'),
- ('ů', 'ů'),
- ('ű', 'ű'),
- ('ų', 'ų'),
- ('ŵ', 'ŵ'),
- ('ŷ', 'ŷ'),
- ('ź', 'ź'),
- ('ż', 'ż'),
- ('ž', 'ƀ'),
- ('ƃ', 'ƃ'),
- ('ƅ', 'ƅ'),
- ('ƈ', 'ƈ'),
- ('ƌ', 'ƌ'),
- ('ƒ', 'ƒ'),
- ('ƕ', 'ƕ'),
- ('ƙ', 'ƛ'),
- ('ƞ', 'ƞ'),
- ('ơ', 'ơ'),
- ('ƣ', 'ƣ'),
- ('ƥ', 'ƥ'),
- ('ƨ', 'ƨ'),
- ('ƭ', 'ƭ'),
- ('ư', 'ư'),
- ('ƴ', 'ƴ'),
- ('ƶ', 'ƶ'),
- ('ƹ', 'ƹ'),
- ('ƽ', 'ƽ'),
- ('ƿ', 'ƿ'),
- ('DŽ', 'DŽ'),
- ('dž', 'LJ'),
- ('lj', 'NJ'),
- ('nj', 'nj'),
- ('ǎ', 'ǎ'),
- ('ǐ', 'ǐ'),
- ('ǒ', 'ǒ'),
- ('ǔ', 'ǔ'),
- ('ǖ', 'ǖ'),
- ('ǘ', 'ǘ'),
- ('ǚ', 'ǚ'),
- ('ǜ', 'ǝ'),
- ('ǟ', 'ǟ'),
- ('ǡ', 'ǡ'),
- ('ǣ', 'ǣ'),
- ('ǥ', 'ǥ'),
- ('ǧ', 'ǧ'),
- ('ǩ', 'ǩ'),
- ('ǫ', 'ǫ'),
- ('ǭ', 'ǭ'),
- ('ǯ', 'DZ'),
- ('dz', 'dz'),
- ('ǵ', 'ǵ'),
- ('ǹ', 'ǹ'),
- ('ǻ', 'ǻ'),
- ('ǽ', 'ǽ'),
- ('ǿ', 'ǿ'),
- ('ȁ', 'ȁ'),
- ('ȃ', 'ȃ'),
- ('ȅ', 'ȅ'),
- ('ȇ', 'ȇ'),
- ('ȉ', 'ȉ'),
- ('ȋ', 'ȋ'),
- ('ȍ', 'ȍ'),
- ('ȏ', 'ȏ'),
- ('ȑ', 'ȑ'),
- ('ȓ', 'ȓ'),
- ('ȕ', 'ȕ'),
- ('ȗ', 'ȗ'),
- ('ș', 'ș'),
- ('ț', 'ț'),
- ('ȝ', 'ȝ'),
- ('ȟ', 'ȟ'),
- ('ȣ', 'ȣ'),
- ('ȥ', 'ȥ'),
- ('ȧ', 'ȧ'),
- ('ȩ', 'ȩ'),
- ('ȫ', 'ȫ'),
- ('ȭ', 'ȭ'),
- ('ȯ', 'ȯ'),
- ('ȱ', 'ȱ'),
- ('ȳ', 'ȳ'),
- ('ȼ', 'ȼ'),
- ('ȿ', 'ɀ'),
- ('ɂ', 'ɂ'),
- ('ɇ', 'ɇ'),
- ('ɉ', 'ɉ'),
- ('ɋ', 'ɋ'),
- ('ɍ', 'ɍ'),
- ('ɏ', 'ɔ'),
- ('ɖ', 'ɗ'),
- ('ə', 'ə'),
- ('ɛ', 'ɜ'),
- ('ɠ', 'ɡ'),
- ('ɣ', 'ɦ'),
- ('ɨ', 'ɬ'),
- ('ɯ', 'ɯ'),
- ('ɱ', 'ɲ'),
- ('ɵ', 'ɵ'),
- ('ɽ', 'ɽ'),
- ('ʀ', 'ʀ'),
- ('ʂ', 'ʃ'),
- ('ʇ', 'ʌ'),
- ('ʒ', 'ʒ'),
- ('ʝ', 'ʞ'),
- ('\u{345}', '\u{345}'),
- ('ͱ', 'ͱ'),
- ('ͳ', 'ͳ'),
- ('ͷ', 'ͷ'),
- ('ͻ', 'ͽ'),
- ('ΐ', 'ΐ'),
- ('ά', 'ώ'),
- ('ϐ', 'ϑ'),
- ('ϕ', 'ϗ'),
- ('ϙ', 'ϙ'),
- ('ϛ', 'ϛ'),
- ('ϝ', 'ϝ'),
- ('ϟ', 'ϟ'),
- ('ϡ', 'ϡ'),
- ('ϣ', 'ϣ'),
- ('ϥ', 'ϥ'),
- ('ϧ', 'ϧ'),
- ('ϩ', 'ϩ'),
- ('ϫ', 'ϫ'),
- ('ϭ', 'ϭ'),
- ('ϯ', 'ϳ'),
- ('ϵ', 'ϵ'),
- ('ϸ', 'ϸ'),
- ('ϻ', 'ϻ'),
- ('а', 'џ'),
- ('ѡ', 'ѡ'),
- ('ѣ', 'ѣ'),
- ('ѥ', 'ѥ'),
- ('ѧ', 'ѧ'),
- ('ѩ', 'ѩ'),
- ('ѫ', 'ѫ'),
- ('ѭ', 'ѭ'),
- ('ѯ', 'ѯ'),
- ('ѱ', 'ѱ'),
- ('ѳ', 'ѳ'),
- ('ѵ', 'ѵ'),
- ('ѷ', 'ѷ'),
- ('ѹ', 'ѹ'),
- ('ѻ', 'ѻ'),
- ('ѽ', 'ѽ'),
- ('ѿ', 'ѿ'),
- ('ҁ', 'ҁ'),
- ('ҋ', 'ҋ'),
- ('ҍ', 'ҍ'),
- ('ҏ', 'ҏ'),
- ('ґ', 'ґ'),
- ('ғ', 'ғ'),
- ('ҕ', 'ҕ'),
- ('җ', 'җ'),
- ('ҙ', 'ҙ'),
- ('қ', 'қ'),
- ('ҝ', 'ҝ'),
- ('ҟ', 'ҟ'),
- ('ҡ', 'ҡ'),
- ('ң', 'ң'),
- ('ҥ', 'ҥ'),
- ('ҧ', 'ҧ'),
- ('ҩ', 'ҩ'),
- ('ҫ', 'ҫ'),
- ('ҭ', 'ҭ'),
- ('ү', 'ү'),
- ('ұ', 'ұ'),
- ('ҳ', 'ҳ'),
- ('ҵ', 'ҵ'),
- ('ҷ', 'ҷ'),
- ('ҹ', 'ҹ'),
- ('һ', 'һ'),
- ('ҽ', 'ҽ'),
- ('ҿ', 'ҿ'),
- ('ӂ', 'ӂ'),
- ('ӄ', 'ӄ'),
- ('ӆ', 'ӆ'),
- ('ӈ', 'ӈ'),
- ('ӊ', 'ӊ'),
- ('ӌ', 'ӌ'),
- ('ӎ', 'ӏ'),
- ('ӑ', 'ӑ'),
- ('ӓ', 'ӓ'),
- ('ӕ', 'ӕ'),
- ('ӗ', 'ӗ'),
- ('ә', 'ә'),
- ('ӛ', 'ӛ'),
- ('ӝ', 'ӝ'),
- ('ӟ', 'ӟ'),
- ('ӡ', 'ӡ'),
- ('ӣ', 'ӣ'),
- ('ӥ', 'ӥ'),
- ('ӧ', 'ӧ'),
- ('ө', 'ө'),
- ('ӫ', 'ӫ'),
- ('ӭ', 'ӭ'),
- ('ӯ', 'ӯ'),
- ('ӱ', 'ӱ'),
- ('ӳ', 'ӳ'),
- ('ӵ', 'ӵ'),
- ('ӷ', 'ӷ'),
- ('ӹ', 'ӹ'),
- ('ӻ', 'ӻ'),
- ('ӽ', 'ӽ'),
- ('ӿ', 'ӿ'),
- ('ԁ', 'ԁ'),
- ('ԃ', 'ԃ'),
- ('ԅ', 'ԅ'),
- ('ԇ', 'ԇ'),
- ('ԉ', 'ԉ'),
- ('ԋ', 'ԋ'),
- ('ԍ', 'ԍ'),
- ('ԏ', 'ԏ'),
- ('ԑ', 'ԑ'),
- ('ԓ', 'ԓ'),
- ('ԕ', 'ԕ'),
- ('ԗ', 'ԗ'),
- ('ԙ', 'ԙ'),
- ('ԛ', 'ԛ'),
- ('ԝ', 'ԝ'),
- ('ԟ', 'ԟ'),
- ('ԡ', 'ԡ'),
- ('ԣ', 'ԣ'),
- ('ԥ', 'ԥ'),
- ('ԧ', 'ԧ'),
- ('ԩ', 'ԩ'),
- ('ԫ', 'ԫ'),
- ('ԭ', 'ԭ'),
- ('ԯ', 'ԯ'),
- ('ա', 'և'),
- ('ᏸ', 'ᏽ'),
- ('ᲀ', 'ᲈ'),
- ('ᲊ', 'ᲊ'),
- ('ᵹ', 'ᵹ'),
- ('ᵽ', 'ᵽ'),
- ('ᶎ', 'ᶎ'),
- ('ḁ', 'ḁ'),
- ('ḃ', 'ḃ'),
- ('ḅ', 'ḅ'),
- ('ḇ', 'ḇ'),
- ('ḉ', 'ḉ'),
- ('ḋ', 'ḋ'),
- ('ḍ', 'ḍ'),
- ('ḏ', 'ḏ'),
- ('ḑ', 'ḑ'),
- ('ḓ', 'ḓ'),
- ('ḕ', 'ḕ'),
- ('ḗ', 'ḗ'),
- ('ḙ', 'ḙ'),
- ('ḛ', 'ḛ'),
- ('ḝ', 'ḝ'),
- ('ḟ', 'ḟ'),
- ('ḡ', 'ḡ'),
- ('ḣ', 'ḣ'),
- ('ḥ', 'ḥ'),
- ('ḧ', 'ḧ'),
- ('ḩ', 'ḩ'),
- ('ḫ', 'ḫ'),
- ('ḭ', 'ḭ'),
- ('ḯ', 'ḯ'),
- ('ḱ', 'ḱ'),
- ('ḳ', 'ḳ'),
- ('ḵ', 'ḵ'),
- ('ḷ', 'ḷ'),
- ('ḹ', 'ḹ'),
- ('ḻ', 'ḻ'),
- ('ḽ', 'ḽ'),
- ('ḿ', 'ḿ'),
- ('ṁ', 'ṁ'),
- ('ṃ', 'ṃ'),
- ('ṅ', 'ṅ'),
- ('ṇ', 'ṇ'),
- ('ṉ', 'ṉ'),
- ('ṋ', 'ṋ'),
- ('ṍ', 'ṍ'),
- ('ṏ', 'ṏ'),
- ('ṑ', 'ṑ'),
- ('ṓ', 'ṓ'),
- ('ṕ', 'ṕ'),
- ('ṗ', 'ṗ'),
- ('ṙ', 'ṙ'),
- ('ṛ', 'ṛ'),
- ('ṝ', 'ṝ'),
- ('ṟ', 'ṟ'),
- ('ṡ', 'ṡ'),
- ('ṣ', 'ṣ'),
- ('ṥ', 'ṥ'),
- ('ṧ', 'ṧ'),
- ('ṩ', 'ṩ'),
- ('ṫ', 'ṫ'),
- ('ṭ', 'ṭ'),
- ('ṯ', 'ṯ'),
- ('ṱ', 'ṱ'),
- ('ṳ', 'ṳ'),
- ('ṵ', 'ṵ'),
- ('ṷ', 'ṷ'),
- ('ṹ', 'ṹ'),
- ('ṻ', 'ṻ'),
- ('ṽ', 'ṽ'),
- ('ṿ', 'ṿ'),
- ('ẁ', 'ẁ'),
- ('ẃ', 'ẃ'),
- ('ẅ', 'ẅ'),
- ('ẇ', 'ẇ'),
- ('ẉ', 'ẉ'),
- ('ẋ', 'ẋ'),
- ('ẍ', 'ẍ'),
- ('ẏ', 'ẏ'),
- ('ẑ', 'ẑ'),
- ('ẓ', 'ẓ'),
- ('ẕ', 'ẛ'),
- ('ạ', 'ạ'),
- ('ả', 'ả'),
- ('ấ', 'ấ'),
- ('ầ', 'ầ'),
- ('ẩ', 'ẩ'),
- ('ẫ', 'ẫ'),
- ('ậ', 'ậ'),
- ('ắ', 'ắ'),
- ('ằ', 'ằ'),
- ('ẳ', 'ẳ'),
- ('ẵ', 'ẵ'),
- ('ặ', 'ặ'),
- ('ẹ', 'ẹ'),
- ('ẻ', 'ẻ'),
- ('ẽ', 'ẽ'),
- ('ế', 'ế'),
- ('ề', 'ề'),
- ('ể', 'ể'),
- ('ễ', 'ễ'),
- ('ệ', 'ệ'),
- ('ỉ', 'ỉ'),
- ('ị', 'ị'),
- ('ọ', 'ọ'),
- ('ỏ', 'ỏ'),
- ('ố', 'ố'),
- ('ồ', 'ồ'),
- ('ổ', 'ổ'),
- ('ỗ', 'ỗ'),
- ('ộ', 'ộ'),
- ('ớ', 'ớ'),
- ('ờ', 'ờ'),
- ('ở', 'ở'),
- ('ỡ', 'ỡ'),
- ('ợ', 'ợ'),
- ('ụ', 'ụ'),
- ('ủ', 'ủ'),
- ('ứ', 'ứ'),
- ('ừ', 'ừ'),
- ('ử', 'ử'),
- ('ữ', 'ữ'),
- ('ự', 'ự'),
- ('ỳ', 'ỳ'),
- ('ỵ', 'ỵ'),
- ('ỷ', 'ỷ'),
- ('ỹ', 'ỹ'),
- ('ỻ', 'ỻ'),
- ('ỽ', 'ỽ'),
- ('ỿ', 'ἇ'),
- ('ἐ', 'ἕ'),
- ('ἠ', 'ἧ'),
- ('ἰ', 'ἷ'),
- ('ὀ', 'ὅ'),
- ('ὐ', 'ὗ'),
- ('ὠ', 'ὧ'),
- ('ὰ', 'ώ'),
- ('ᾀ', 'ᾇ'),
- ('ᾐ', 'ᾗ'),
- ('ᾠ', 'ᾧ'),
- ('ᾰ', 'ᾴ'),
- ('ᾶ', 'ᾷ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῇ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'ῗ'),
- ('ῠ', 'ῧ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῷ'),
- ('ⅎ', 'ⅎ'),
- ('ⅰ', 'ⅿ'),
- ('ↄ', 'ↄ'),
- ('ⓐ', 'ⓩ'),
- ('ⰰ', 'ⱟ'),
- ('ⱡ', 'ⱡ'),
- ('ⱥ', 'ⱦ'),
- ('ⱨ', 'ⱨ'),
- ('ⱪ', 'ⱪ'),
- ('ⱬ', 'ⱬ'),
- ('ⱳ', 'ⱳ'),
- ('ⱶ', 'ⱶ'),
- ('ⲁ', 'ⲁ'),
- ('ⲃ', 'ⲃ'),
- ('ⲅ', 'ⲅ'),
- ('ⲇ', 'ⲇ'),
- ('ⲉ', 'ⲉ'),
- ('ⲋ', 'ⲋ'),
- ('ⲍ', 'ⲍ'),
- ('ⲏ', 'ⲏ'),
- ('ⲑ', 'ⲑ'),
- ('ⲓ', 'ⲓ'),
- ('ⲕ', 'ⲕ'),
- ('ⲗ', 'ⲗ'),
- ('ⲙ', 'ⲙ'),
- ('ⲛ', 'ⲛ'),
- ('ⲝ', 'ⲝ'),
- ('ⲟ', 'ⲟ'),
- ('ⲡ', 'ⲡ'),
- ('ⲣ', 'ⲣ'),
- ('ⲥ', 'ⲥ'),
- ('ⲧ', 'ⲧ'),
- ('ⲩ', 'ⲩ'),
- ('ⲫ', 'ⲫ'),
- ('ⲭ', 'ⲭ'),
- ('ⲯ', 'ⲯ'),
- ('ⲱ', 'ⲱ'),
- ('ⲳ', 'ⲳ'),
- ('ⲵ', 'ⲵ'),
- ('ⲷ', 'ⲷ'),
- ('ⲹ', 'ⲹ'),
- ('ⲻ', 'ⲻ'),
- ('ⲽ', 'ⲽ'),
- ('ⲿ', 'ⲿ'),
- ('ⳁ', 'ⳁ'),
- ('ⳃ', 'ⳃ'),
- ('ⳅ', 'ⳅ'),
- ('ⳇ', 'ⳇ'),
- ('ⳉ', 'ⳉ'),
- ('ⳋ', 'ⳋ'),
- ('ⳍ', 'ⳍ'),
- ('ⳏ', 'ⳏ'),
- ('ⳑ', 'ⳑ'),
- ('ⳓ', 'ⳓ'),
- ('ⳕ', 'ⳕ'),
- ('ⳗ', 'ⳗ'),
- ('ⳙ', 'ⳙ'),
- ('ⳛ', 'ⳛ'),
- ('ⳝ', 'ⳝ'),
- ('ⳟ', 'ⳟ'),
- ('ⳡ', 'ⳡ'),
- ('ⳣ', 'ⳣ'),
- ('ⳬ', 'ⳬ'),
- ('ⳮ', 'ⳮ'),
- ('ⳳ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ꙁ', 'ꙁ'),
- ('ꙃ', 'ꙃ'),
- ('ꙅ', 'ꙅ'),
- ('ꙇ', 'ꙇ'),
- ('ꙉ', 'ꙉ'),
- ('ꙋ', 'ꙋ'),
- ('ꙍ', 'ꙍ'),
- ('ꙏ', 'ꙏ'),
- ('ꙑ', 'ꙑ'),
- ('ꙓ', 'ꙓ'),
- ('ꙕ', 'ꙕ'),
- ('ꙗ', 'ꙗ'),
- ('ꙙ', 'ꙙ'),
- ('ꙛ', 'ꙛ'),
- ('ꙝ', 'ꙝ'),
- ('ꙟ', 'ꙟ'),
- ('ꙡ', 'ꙡ'),
- ('ꙣ', 'ꙣ'),
- ('ꙥ', 'ꙥ'),
- ('ꙧ', 'ꙧ'),
- ('ꙩ', 'ꙩ'),
- ('ꙫ', 'ꙫ'),
- ('ꙭ', 'ꙭ'),
- ('ꚁ', 'ꚁ'),
- ('ꚃ', 'ꚃ'),
- ('ꚅ', 'ꚅ'),
- ('ꚇ', 'ꚇ'),
- ('ꚉ', 'ꚉ'),
- ('ꚋ', 'ꚋ'),
- ('ꚍ', 'ꚍ'),
- ('ꚏ', 'ꚏ'),
- ('ꚑ', 'ꚑ'),
- ('ꚓ', 'ꚓ'),
- ('ꚕ', 'ꚕ'),
- ('ꚗ', 'ꚗ'),
- ('ꚙ', 'ꚙ'),
- ('ꚛ', 'ꚛ'),
- ('ꜣ', 'ꜣ'),
- ('ꜥ', 'ꜥ'),
- ('ꜧ', 'ꜧ'),
- ('ꜩ', 'ꜩ'),
- ('ꜫ', 'ꜫ'),
- ('ꜭ', 'ꜭ'),
- ('ꜯ', 'ꜯ'),
- ('ꜳ', 'ꜳ'),
- ('ꜵ', 'ꜵ'),
- ('ꜷ', 'ꜷ'),
- ('ꜹ', 'ꜹ'),
- ('ꜻ', 'ꜻ'),
- ('ꜽ', 'ꜽ'),
- ('ꜿ', 'ꜿ'),
- ('ꝁ', 'ꝁ'),
- ('ꝃ', 'ꝃ'),
- ('ꝅ', 'ꝅ'),
- ('ꝇ', 'ꝇ'),
- ('ꝉ', 'ꝉ'),
- ('ꝋ', 'ꝋ'),
- ('ꝍ', 'ꝍ'),
- ('ꝏ', 'ꝏ'),
- ('ꝑ', 'ꝑ'),
- ('ꝓ', 'ꝓ'),
- ('ꝕ', 'ꝕ'),
- ('ꝗ', 'ꝗ'),
- ('ꝙ', 'ꝙ'),
- ('ꝛ', 'ꝛ'),
- ('ꝝ', 'ꝝ'),
- ('ꝟ', 'ꝟ'),
- ('ꝡ', 'ꝡ'),
- ('ꝣ', 'ꝣ'),
- ('ꝥ', 'ꝥ'),
- ('ꝧ', 'ꝧ'),
- ('ꝩ', 'ꝩ'),
- ('ꝫ', 'ꝫ'),
- ('ꝭ', 'ꝭ'),
- ('ꝯ', 'ꝯ'),
- ('ꝺ', 'ꝺ'),
- ('ꝼ', 'ꝼ'),
- ('ꝿ', 'ꝿ'),
- ('ꞁ', 'ꞁ'),
- ('ꞃ', 'ꞃ'),
- ('ꞅ', 'ꞅ'),
- ('ꞇ', 'ꞇ'),
- ('ꞌ', 'ꞌ'),
- ('ꞑ', 'ꞑ'),
- ('ꞓ', 'ꞔ'),
- ('ꞗ', 'ꞗ'),
- ('ꞙ', 'ꞙ'),
- ('ꞛ', 'ꞛ'),
- ('ꞝ', 'ꞝ'),
- ('ꞟ', 'ꞟ'),
- ('ꞡ', 'ꞡ'),
- ('ꞣ', 'ꞣ'),
- ('ꞥ', 'ꞥ'),
- ('ꞧ', 'ꞧ'),
- ('ꞩ', 'ꞩ'),
- ('ꞵ', 'ꞵ'),
- ('ꞷ', 'ꞷ'),
- ('ꞹ', 'ꞹ'),
- ('ꞻ', 'ꞻ'),
- ('ꞽ', 'ꞽ'),
- ('ꞿ', 'ꞿ'),
- ('ꟁ', 'ꟁ'),
- ('ꟃ', 'ꟃ'),
- ('ꟈ', 'ꟈ'),
- ('ꟊ', 'ꟊ'),
- ('ꟍ', 'ꟍ'),
- ('ꟑ', 'ꟑ'),
- ('ꟗ', 'ꟗ'),
- ('ꟙ', 'ꟙ'),
- ('ꟛ', 'ꟛ'),
- ('ꟶ', 'ꟶ'),
- ('ꭓ', 'ꭓ'),
- ('ꭰ', 'ꮿ'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('a', 'z'),
- ('𐐨', '𐑏'),
- ('𐓘', '𐓻'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐳀', '𐳲'),
- ('𐵰', '𐶅'),
- ('𑣀', '𑣟'),
- ('𖹠', '𖹿'),
- ('𞤢', '𞥃'),
-];
-
-pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[
- ('a', 'z'),
- ('µ', 'µ'),
- ('ß', 'ö'),
- ('ø', 'ÿ'),
- ('ā', 'ā'),
- ('ă', 'ă'),
- ('ą', 'ą'),
- ('ć', 'ć'),
- ('ĉ', 'ĉ'),
- ('ċ', 'ċ'),
- ('č', 'č'),
- ('ď', 'ď'),
- ('đ', 'đ'),
- ('ē', 'ē'),
- ('ĕ', 'ĕ'),
- ('ė', 'ė'),
- ('ę', 'ę'),
- ('ě', 'ě'),
- ('ĝ', 'ĝ'),
- ('ğ', 'ğ'),
- ('ġ', 'ġ'),
- ('ģ', 'ģ'),
- ('ĥ', 'ĥ'),
- ('ħ', 'ħ'),
- ('ĩ', 'ĩ'),
- ('ī', 'ī'),
- ('ĭ', 'ĭ'),
- ('į', 'į'),
- ('ı', 'ı'),
- ('ij', 'ij'),
- ('ĵ', 'ĵ'),
- ('ķ', 'ķ'),
- ('ĺ', 'ĺ'),
- ('ļ', 'ļ'),
- ('ľ', 'ľ'),
- ('ŀ', 'ŀ'),
- ('ł', 'ł'),
- ('ń', 'ń'),
- ('ņ', 'ņ'),
- ('ň', 'ʼn'),
- ('ŋ', 'ŋ'),
- ('ō', 'ō'),
- ('ŏ', 'ŏ'),
- ('ő', 'ő'),
- ('œ', 'œ'),
- ('ŕ', 'ŕ'),
- ('ŗ', 'ŗ'),
- ('ř', 'ř'),
- ('ś', 'ś'),
- ('ŝ', 'ŝ'),
- ('ş', 'ş'),
- ('š', 'š'),
- ('ţ', 'ţ'),
- ('ť', 'ť'),
- ('ŧ', 'ŧ'),
- ('ũ', 'ũ'),
- ('ū', 'ū'),
- ('ŭ', 'ŭ'),
- ('ů', 'ů'),
- ('ű', 'ű'),
- ('ų', 'ų'),
- ('ŵ', 'ŵ'),
- ('ŷ', 'ŷ'),
- ('ź', 'ź'),
- ('ż', 'ż'),
- ('ž', 'ƀ'),
- ('ƃ', 'ƃ'),
- ('ƅ', 'ƅ'),
- ('ƈ', 'ƈ'),
- ('ƌ', 'ƌ'),
- ('ƒ', 'ƒ'),
- ('ƕ', 'ƕ'),
- ('ƙ', 'ƛ'),
- ('ƞ', 'ƞ'),
- ('ơ', 'ơ'),
- ('ƣ', 'ƣ'),
- ('ƥ', 'ƥ'),
- ('ƨ', 'ƨ'),
- ('ƭ', 'ƭ'),
- ('ư', 'ư'),
- ('ƴ', 'ƴ'),
- ('ƶ', 'ƶ'),
- ('ƹ', 'ƹ'),
- ('ƽ', 'ƽ'),
- ('ƿ', 'ƿ'),
- ('Dž', 'dž'),
- ('Lj', 'lj'),
- ('Nj', 'nj'),
- ('ǎ', 'ǎ'),
- ('ǐ', 'ǐ'),
- ('ǒ', 'ǒ'),
- ('ǔ', 'ǔ'),
- ('ǖ', 'ǖ'),
- ('ǘ', 'ǘ'),
- ('ǚ', 'ǚ'),
- ('ǜ', 'ǝ'),
- ('ǟ', 'ǟ'),
- ('ǡ', 'ǡ'),
- ('ǣ', 'ǣ'),
- ('ǥ', 'ǥ'),
- ('ǧ', 'ǧ'),
- ('ǩ', 'ǩ'),
- ('ǫ', 'ǫ'),
- ('ǭ', 'ǭ'),
- ('ǯ', 'ǰ'),
- ('Dz', 'dz'),
- ('ǵ', 'ǵ'),
- ('ǹ', 'ǹ'),
- ('ǻ', 'ǻ'),
- ('ǽ', 'ǽ'),
- ('ǿ', 'ǿ'),
- ('ȁ', 'ȁ'),
- ('ȃ', 'ȃ'),
- ('ȅ', 'ȅ'),
- ('ȇ', 'ȇ'),
- ('ȉ', 'ȉ'),
- ('ȋ', 'ȋ'),
- ('ȍ', 'ȍ'),
- ('ȏ', 'ȏ'),
- ('ȑ', 'ȑ'),
- ('ȓ', 'ȓ'),
- ('ȕ', 'ȕ'),
- ('ȗ', 'ȗ'),
- ('ș', 'ș'),
- ('ț', 'ț'),
- ('ȝ', 'ȝ'),
- ('ȟ', 'ȟ'),
- ('ȣ', 'ȣ'),
- ('ȥ', 'ȥ'),
- ('ȧ', 'ȧ'),
- ('ȩ', 'ȩ'),
- ('ȫ', 'ȫ'),
- ('ȭ', 'ȭ'),
- ('ȯ', 'ȯ'),
- ('ȱ', 'ȱ'),
- ('ȳ', 'ȳ'),
- ('ȼ', 'ȼ'),
- ('ȿ', 'ɀ'),
- ('ɂ', 'ɂ'),
- ('ɇ', 'ɇ'),
- ('ɉ', 'ɉ'),
- ('ɋ', 'ɋ'),
- ('ɍ', 'ɍ'),
- ('ɏ', 'ɔ'),
- ('ɖ', 'ɗ'),
- ('ə', 'ə'),
- ('ɛ', 'ɜ'),
- ('ɠ', 'ɡ'),
- ('ɣ', 'ɦ'),
- ('ɨ', 'ɬ'),
- ('ɯ', 'ɯ'),
- ('ɱ', 'ɲ'),
- ('ɵ', 'ɵ'),
- ('ɽ', 'ɽ'),
- ('ʀ', 'ʀ'),
- ('ʂ', 'ʃ'),
- ('ʇ', 'ʌ'),
- ('ʒ', 'ʒ'),
- ('ʝ', 'ʞ'),
- ('\u{345}', '\u{345}'),
- ('ͱ', 'ͱ'),
- ('ͳ', 'ͳ'),
- ('ͷ', 'ͷ'),
- ('ͻ', 'ͽ'),
- ('ΐ', 'ΐ'),
- ('ά', 'ώ'),
- ('ϐ', 'ϑ'),
- ('ϕ', 'ϗ'),
- ('ϙ', 'ϙ'),
- ('ϛ', 'ϛ'),
- ('ϝ', 'ϝ'),
- ('ϟ', 'ϟ'),
- ('ϡ', 'ϡ'),
- ('ϣ', 'ϣ'),
- ('ϥ', 'ϥ'),
- ('ϧ', 'ϧ'),
- ('ϩ', 'ϩ'),
- ('ϫ', 'ϫ'),
- ('ϭ', 'ϭ'),
- ('ϯ', 'ϳ'),
- ('ϵ', 'ϵ'),
- ('ϸ', 'ϸ'),
- ('ϻ', 'ϻ'),
- ('а', 'џ'),
- ('ѡ', 'ѡ'),
- ('ѣ', 'ѣ'),
- ('ѥ', 'ѥ'),
- ('ѧ', 'ѧ'),
- ('ѩ', 'ѩ'),
- ('ѫ', 'ѫ'),
- ('ѭ', 'ѭ'),
- ('ѯ', 'ѯ'),
- ('ѱ', 'ѱ'),
- ('ѳ', 'ѳ'),
- ('ѵ', 'ѵ'),
- ('ѷ', 'ѷ'),
- ('ѹ', 'ѹ'),
- ('ѻ', 'ѻ'),
- ('ѽ', 'ѽ'),
- ('ѿ', 'ѿ'),
- ('ҁ', 'ҁ'),
- ('ҋ', 'ҋ'),
- ('ҍ', 'ҍ'),
- ('ҏ', 'ҏ'),
- ('ґ', 'ґ'),
- ('ғ', 'ғ'),
- ('ҕ', 'ҕ'),
- ('җ', 'җ'),
- ('ҙ', 'ҙ'),
- ('қ', 'қ'),
- ('ҝ', 'ҝ'),
- ('ҟ', 'ҟ'),
- ('ҡ', 'ҡ'),
- ('ң', 'ң'),
- ('ҥ', 'ҥ'),
- ('ҧ', 'ҧ'),
- ('ҩ', 'ҩ'),
- ('ҫ', 'ҫ'),
- ('ҭ', 'ҭ'),
- ('ү', 'ү'),
- ('ұ', 'ұ'),
- ('ҳ', 'ҳ'),
- ('ҵ', 'ҵ'),
- ('ҷ', 'ҷ'),
- ('ҹ', 'ҹ'),
- ('һ', 'һ'),
- ('ҽ', 'ҽ'),
- ('ҿ', 'ҿ'),
- ('ӂ', 'ӂ'),
- ('ӄ', 'ӄ'),
- ('ӆ', 'ӆ'),
- ('ӈ', 'ӈ'),
- ('ӊ', 'ӊ'),
- ('ӌ', 'ӌ'),
- ('ӎ', 'ӏ'),
- ('ӑ', 'ӑ'),
- ('ӓ', 'ӓ'),
- ('ӕ', 'ӕ'),
- ('ӗ', 'ӗ'),
- ('ә', 'ә'),
- ('ӛ', 'ӛ'),
- ('ӝ', 'ӝ'),
- ('ӟ', 'ӟ'),
- ('ӡ', 'ӡ'),
- ('ӣ', 'ӣ'),
- ('ӥ', 'ӥ'),
- ('ӧ', 'ӧ'),
- ('ө', 'ө'),
- ('ӫ', 'ӫ'),
- ('ӭ', 'ӭ'),
- ('ӯ', 'ӯ'),
- ('ӱ', 'ӱ'),
- ('ӳ', 'ӳ'),
- ('ӵ', 'ӵ'),
- ('ӷ', 'ӷ'),
- ('ӹ', 'ӹ'),
- ('ӻ', 'ӻ'),
- ('ӽ', 'ӽ'),
- ('ӿ', 'ӿ'),
- ('ԁ', 'ԁ'),
- ('ԃ', 'ԃ'),
- ('ԅ', 'ԅ'),
- ('ԇ', 'ԇ'),
- ('ԉ', 'ԉ'),
- ('ԋ', 'ԋ'),
- ('ԍ', 'ԍ'),
- ('ԏ', 'ԏ'),
- ('ԑ', 'ԑ'),
- ('ԓ', 'ԓ'),
- ('ԕ', 'ԕ'),
- ('ԗ', 'ԗ'),
- ('ԙ', 'ԙ'),
- ('ԛ', 'ԛ'),
- ('ԝ', 'ԝ'),
- ('ԟ', 'ԟ'),
- ('ԡ', 'ԡ'),
- ('ԣ', 'ԣ'),
- ('ԥ', 'ԥ'),
- ('ԧ', 'ԧ'),
- ('ԩ', 'ԩ'),
- ('ԫ', 'ԫ'),
- ('ԭ', 'ԭ'),
- ('ԯ', 'ԯ'),
- ('ա', 'և'),
- ('ა', 'ჺ'),
- ('ჽ', 'ჿ'),
- ('ᏸ', 'ᏽ'),
- ('ᲀ', 'ᲈ'),
- ('ᲊ', 'ᲊ'),
- ('ᵹ', 'ᵹ'),
- ('ᵽ', 'ᵽ'),
- ('ᶎ', 'ᶎ'),
- ('ḁ', 'ḁ'),
- ('ḃ', 'ḃ'),
- ('ḅ', 'ḅ'),
- ('ḇ', 'ḇ'),
- ('ḉ', 'ḉ'),
- ('ḋ', 'ḋ'),
- ('ḍ', 'ḍ'),
- ('ḏ', 'ḏ'),
- ('ḑ', 'ḑ'),
- ('ḓ', 'ḓ'),
- ('ḕ', 'ḕ'),
- ('ḗ', 'ḗ'),
- ('ḙ', 'ḙ'),
- ('ḛ', 'ḛ'),
- ('ḝ', 'ḝ'),
- ('ḟ', 'ḟ'),
- ('ḡ', 'ḡ'),
- ('ḣ', 'ḣ'),
- ('ḥ', 'ḥ'),
- ('ḧ', 'ḧ'),
- ('ḩ', 'ḩ'),
- ('ḫ', 'ḫ'),
- ('ḭ', 'ḭ'),
- ('ḯ', 'ḯ'),
- ('ḱ', 'ḱ'),
- ('ḳ', 'ḳ'),
- ('ḵ', 'ḵ'),
- ('ḷ', 'ḷ'),
- ('ḹ', 'ḹ'),
- ('ḻ', 'ḻ'),
- ('ḽ', 'ḽ'),
- ('ḿ', 'ḿ'),
- ('ṁ', 'ṁ'),
- ('ṃ', 'ṃ'),
- ('ṅ', 'ṅ'),
- ('ṇ', 'ṇ'),
- ('ṉ', 'ṉ'),
- ('ṋ', 'ṋ'),
- ('ṍ', 'ṍ'),
- ('ṏ', 'ṏ'),
- ('ṑ', 'ṑ'),
- ('ṓ', 'ṓ'),
- ('ṕ', 'ṕ'),
- ('ṗ', 'ṗ'),
- ('ṙ', 'ṙ'),
- ('ṛ', 'ṛ'),
- ('ṝ', 'ṝ'),
- ('ṟ', 'ṟ'),
- ('ṡ', 'ṡ'),
- ('ṣ', 'ṣ'),
- ('ṥ', 'ṥ'),
- ('ṧ', 'ṧ'),
- ('ṩ', 'ṩ'),
- ('ṫ', 'ṫ'),
- ('ṭ', 'ṭ'),
- ('ṯ', 'ṯ'),
- ('ṱ', 'ṱ'),
- ('ṳ', 'ṳ'),
- ('ṵ', 'ṵ'),
- ('ṷ', 'ṷ'),
- ('ṹ', 'ṹ'),
- ('ṻ', 'ṻ'),
- ('ṽ', 'ṽ'),
- ('ṿ', 'ṿ'),
- ('ẁ', 'ẁ'),
- ('ẃ', 'ẃ'),
- ('ẅ', 'ẅ'),
- ('ẇ', 'ẇ'),
- ('ẉ', 'ẉ'),
- ('ẋ', 'ẋ'),
- ('ẍ', 'ẍ'),
- ('ẏ', 'ẏ'),
- ('ẑ', 'ẑ'),
- ('ẓ', 'ẓ'),
- ('ẕ', 'ẛ'),
- ('ạ', 'ạ'),
- ('ả', 'ả'),
- ('ấ', 'ấ'),
- ('ầ', 'ầ'),
- ('ẩ', 'ẩ'),
- ('ẫ', 'ẫ'),
- ('ậ', 'ậ'),
- ('ắ', 'ắ'),
- ('ằ', 'ằ'),
- ('ẳ', 'ẳ'),
- ('ẵ', 'ẵ'),
- ('ặ', 'ặ'),
- ('ẹ', 'ẹ'),
- ('ẻ', 'ẻ'),
- ('ẽ', 'ẽ'),
- ('ế', 'ế'),
- ('ề', 'ề'),
- ('ể', 'ể'),
- ('ễ', 'ễ'),
- ('ệ', 'ệ'),
- ('ỉ', 'ỉ'),
- ('ị', 'ị'),
- ('ọ', 'ọ'),
- ('ỏ', 'ỏ'),
- ('ố', 'ố'),
- ('ồ', 'ồ'),
- ('ổ', 'ổ'),
- ('ỗ', 'ỗ'),
- ('ộ', 'ộ'),
- ('ớ', 'ớ'),
- ('ờ', 'ờ'),
- ('ở', 'ở'),
- ('ỡ', 'ỡ'),
- ('ợ', 'ợ'),
- ('ụ', 'ụ'),
- ('ủ', 'ủ'),
- ('ứ', 'ứ'),
- ('ừ', 'ừ'),
- ('ử', 'ử'),
- ('ữ', 'ữ'),
- ('ự', 'ự'),
- ('ỳ', 'ỳ'),
- ('ỵ', 'ỵ'),
- ('ỷ', 'ỷ'),
- ('ỹ', 'ỹ'),
- ('ỻ', 'ỻ'),
- ('ỽ', 'ỽ'),
- ('ỿ', 'ἇ'),
- ('ἐ', 'ἕ'),
- ('ἠ', 'ἧ'),
- ('ἰ', 'ἷ'),
- ('ὀ', 'ὅ'),
- ('ὐ', 'ὗ'),
- ('ὠ', 'ὧ'),
- ('ὰ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ᾷ'),
- ('ᾼ', 'ᾼ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῇ'),
- ('ῌ', 'ῌ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'ῗ'),
- ('ῠ', 'ῧ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῷ'),
- ('ῼ', 'ῼ'),
- ('ⅎ', 'ⅎ'),
- ('ⅰ', 'ⅿ'),
- ('ↄ', 'ↄ'),
- ('ⓐ', 'ⓩ'),
- ('ⰰ', 'ⱟ'),
- ('ⱡ', 'ⱡ'),
- ('ⱥ', 'ⱦ'),
- ('ⱨ', 'ⱨ'),
- ('ⱪ', 'ⱪ'),
- ('ⱬ', 'ⱬ'),
- ('ⱳ', 'ⱳ'),
- ('ⱶ', 'ⱶ'),
- ('ⲁ', 'ⲁ'),
- ('ⲃ', 'ⲃ'),
- ('ⲅ', 'ⲅ'),
- ('ⲇ', 'ⲇ'),
- ('ⲉ', 'ⲉ'),
- ('ⲋ', 'ⲋ'),
- ('ⲍ', 'ⲍ'),
- ('ⲏ', 'ⲏ'),
- ('ⲑ', 'ⲑ'),
- ('ⲓ', 'ⲓ'),
- ('ⲕ', 'ⲕ'),
- ('ⲗ', 'ⲗ'),
- ('ⲙ', 'ⲙ'),
- ('ⲛ', 'ⲛ'),
- ('ⲝ', 'ⲝ'),
- ('ⲟ', 'ⲟ'),
- ('ⲡ', 'ⲡ'),
- ('ⲣ', 'ⲣ'),
- ('ⲥ', 'ⲥ'),
- ('ⲧ', 'ⲧ'),
- ('ⲩ', 'ⲩ'),
- ('ⲫ', 'ⲫ'),
- ('ⲭ', 'ⲭ'),
- ('ⲯ', 'ⲯ'),
- ('ⲱ', 'ⲱ'),
- ('ⲳ', 'ⲳ'),
- ('ⲵ', 'ⲵ'),
- ('ⲷ', 'ⲷ'),
- ('ⲹ', 'ⲹ'),
- ('ⲻ', 'ⲻ'),
- ('ⲽ', 'ⲽ'),
- ('ⲿ', 'ⲿ'),
- ('ⳁ', 'ⳁ'),
- ('ⳃ', 'ⳃ'),
- ('ⳅ', 'ⳅ'),
- ('ⳇ', 'ⳇ'),
- ('ⳉ', 'ⳉ'),
- ('ⳋ', 'ⳋ'),
- ('ⳍ', 'ⳍ'),
- ('ⳏ', 'ⳏ'),
- ('ⳑ', 'ⳑ'),
- ('ⳓ', 'ⳓ'),
- ('ⳕ', 'ⳕ'),
- ('ⳗ', 'ⳗ'),
- ('ⳙ', 'ⳙ'),
- ('ⳛ', 'ⳛ'),
- ('ⳝ', 'ⳝ'),
- ('ⳟ', 'ⳟ'),
- ('ⳡ', 'ⳡ'),
- ('ⳣ', 'ⳣ'),
- ('ⳬ', 'ⳬ'),
- ('ⳮ', 'ⳮ'),
- ('ⳳ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ꙁ', 'ꙁ'),
- ('ꙃ', 'ꙃ'),
- ('ꙅ', 'ꙅ'),
- ('ꙇ', 'ꙇ'),
- ('ꙉ', 'ꙉ'),
- ('ꙋ', 'ꙋ'),
- ('ꙍ', 'ꙍ'),
- ('ꙏ', 'ꙏ'),
- ('ꙑ', 'ꙑ'),
- ('ꙓ', 'ꙓ'),
- ('ꙕ', 'ꙕ'),
- ('ꙗ', 'ꙗ'),
- ('ꙙ', 'ꙙ'),
- ('ꙛ', 'ꙛ'),
- ('ꙝ', 'ꙝ'),
- ('ꙟ', 'ꙟ'),
- ('ꙡ', 'ꙡ'),
- ('ꙣ', 'ꙣ'),
- ('ꙥ', 'ꙥ'),
- ('ꙧ', 'ꙧ'),
- ('ꙩ', 'ꙩ'),
- ('ꙫ', 'ꙫ'),
- ('ꙭ', 'ꙭ'),
- ('ꚁ', 'ꚁ'),
- ('ꚃ', 'ꚃ'),
- ('ꚅ', 'ꚅ'),
- ('ꚇ', 'ꚇ'),
- ('ꚉ', 'ꚉ'),
- ('ꚋ', 'ꚋ'),
- ('ꚍ', 'ꚍ'),
- ('ꚏ', 'ꚏ'),
- ('ꚑ', 'ꚑ'),
- ('ꚓ', 'ꚓ'),
- ('ꚕ', 'ꚕ'),
- ('ꚗ', 'ꚗ'),
- ('ꚙ', 'ꚙ'),
- ('ꚛ', 'ꚛ'),
- ('ꜣ', 'ꜣ'),
- ('ꜥ', 'ꜥ'),
- ('ꜧ', 'ꜧ'),
- ('ꜩ', 'ꜩ'),
- ('ꜫ', 'ꜫ'),
- ('ꜭ', 'ꜭ'),
- ('ꜯ', 'ꜯ'),
- ('ꜳ', 'ꜳ'),
- ('ꜵ', 'ꜵ'),
- ('ꜷ', 'ꜷ'),
- ('ꜹ', 'ꜹ'),
- ('ꜻ', 'ꜻ'),
- ('ꜽ', 'ꜽ'),
- ('ꜿ', 'ꜿ'),
- ('ꝁ', 'ꝁ'),
- ('ꝃ', 'ꝃ'),
- ('ꝅ', 'ꝅ'),
- ('ꝇ', 'ꝇ'),
- ('ꝉ', 'ꝉ'),
- ('ꝋ', 'ꝋ'),
- ('ꝍ', 'ꝍ'),
- ('ꝏ', 'ꝏ'),
- ('ꝑ', 'ꝑ'),
- ('ꝓ', 'ꝓ'),
- ('ꝕ', 'ꝕ'),
- ('ꝗ', 'ꝗ'),
- ('ꝙ', 'ꝙ'),
- ('ꝛ', 'ꝛ'),
- ('ꝝ', 'ꝝ'),
- ('ꝟ', 'ꝟ'),
- ('ꝡ', 'ꝡ'),
- ('ꝣ', 'ꝣ'),
- ('ꝥ', 'ꝥ'),
- ('ꝧ', 'ꝧ'),
- ('ꝩ', 'ꝩ'),
- ('ꝫ', 'ꝫ'),
- ('ꝭ', 'ꝭ'),
- ('ꝯ', 'ꝯ'),
- ('ꝺ', 'ꝺ'),
- ('ꝼ', 'ꝼ'),
- ('ꝿ', 'ꝿ'),
- ('ꞁ', 'ꞁ'),
- ('ꞃ', 'ꞃ'),
- ('ꞅ', 'ꞅ'),
- ('ꞇ', 'ꞇ'),
- ('ꞌ', 'ꞌ'),
- ('ꞑ', 'ꞑ'),
- ('ꞓ', 'ꞔ'),
- ('ꞗ', 'ꞗ'),
- ('ꞙ', 'ꞙ'),
- ('ꞛ', 'ꞛ'),
- ('ꞝ', 'ꞝ'),
- ('ꞟ', 'ꞟ'),
- ('ꞡ', 'ꞡ'),
- ('ꞣ', 'ꞣ'),
- ('ꞥ', 'ꞥ'),
- ('ꞧ', 'ꞧ'),
- ('ꞩ', 'ꞩ'),
- ('ꞵ', 'ꞵ'),
- ('ꞷ', 'ꞷ'),
- ('ꞹ', 'ꞹ'),
- ('ꞻ', 'ꞻ'),
- ('ꞽ', 'ꞽ'),
- ('ꞿ', 'ꞿ'),
- ('ꟁ', 'ꟁ'),
- ('ꟃ', 'ꟃ'),
- ('ꟈ', 'ꟈ'),
- ('ꟊ', 'ꟊ'),
- ('ꟍ', 'ꟍ'),
- ('ꟑ', 'ꟑ'),
- ('ꟗ', 'ꟗ'),
- ('ꟙ', 'ꟙ'),
- ('ꟛ', 'ꟛ'),
- ('ꟶ', 'ꟶ'),
- ('ꭓ', 'ꭓ'),
- ('ꭰ', 'ꮿ'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('a', 'z'),
- ('𐐨', '𐑏'),
- ('𐓘', '𐓻'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐳀', '𐳲'),
- ('𐵰', '𐶅'),
- ('𑣀', '𑣟'),
- ('𖹠', '𖹿'),
- ('𞤢', '𞥃'),
-];
-
-pub const DASH: &'static [(char, char)] = &[
- ('-', '-'),
- ('֊', '֊'),
- ('־', '־'),
- ('᐀', '᐀'),
- ('᠆', '᠆'),
- ('‐', '―'),
- ('⁓', '⁓'),
- ('⁻', '⁻'),
- ('₋', '₋'),
- ('−', '−'),
- ('⸗', '⸗'),
- ('⸚', '⸚'),
- ('⸺', '⸻'),
- ('⹀', '⹀'),
- ('⹝', '⹝'),
- ('〜', '〜'),
- ('〰', '〰'),
- ('゠', '゠'),
- ('︱', '︲'),
- ('﹘', '﹘'),
- ('﹣', '﹣'),
- ('-', '-'),
- ('𐵮', '𐵮'),
- ('𐺭', '𐺭'),
-];
-
-pub const DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[
- ('\u{ad}', '\u{ad}'),
- ('\u{34f}', '\u{34f}'),
- ('\u{61c}', '\u{61c}'),
- ('ᅟ', 'ᅠ'),
- ('\u{17b4}', '\u{17b5}'),
- ('\u{180b}', '\u{180f}'),
- ('\u{200b}', '\u{200f}'),
- ('\u{202a}', '\u{202e}'),
- ('\u{2060}', '\u{206f}'),
- ('ㅤ', 'ㅤ'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{feff}', '\u{feff}'),
- ('ᅠ', 'ᅠ'),
- ('\u{fff0}', '\u{fff8}'),
- ('\u{1bca0}', '\u{1bca3}'),
- ('\u{1d173}', '\u{1d17a}'),
- ('\u{e0000}', '\u{e0fff}'),
-];
-
-pub const DEPRECATED: &'static [(char, char)] = &[
- ('ʼn', 'ʼn'),
- ('ٳ', 'ٳ'),
- ('\u{f77}', '\u{f77}'),
- ('\u{f79}', '\u{f79}'),
- ('ឣ', 'ឤ'),
- ('\u{206a}', '\u{206f}'),
- ('〈', '〉'),
- ('\u{e0001}', '\u{e0001}'),
-];
-
-pub const DIACRITIC: &'static [(char, char)] = &[
- ('^', '^'),
- ('`', '`'),
- ('¨', '¨'),
- ('¯', '¯'),
- ('´', '´'),
- ('·', '¸'),
- ('ʰ', '\u{34e}'),
- ('\u{350}', '\u{357}'),
- ('\u{35d}', '\u{362}'),
- ('ʹ', '͵'),
- ('ͺ', 'ͺ'),
- ('΄', '΅'),
- ('\u{483}', '\u{487}'),
- ('ՙ', 'ՙ'),
- ('\u{591}', '\u{5a1}'),
- ('\u{5a3}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c4}'),
- ('\u{64b}', '\u{652}'),
- ('\u{657}', '\u{658}'),
- ('\u{6df}', '\u{6e0}'),
- ('ۥ', 'ۦ'),
- ('\u{6ea}', '\u{6ec}'),
- ('\u{730}', '\u{74a}'),
- ('\u{7a6}', '\u{7b0}'),
- ('\u{7eb}', 'ߵ'),
- ('\u{818}', '\u{819}'),
- ('\u{898}', '\u{89f}'),
- ('ࣉ', '\u{8d2}'),
- ('\u{8e3}', '\u{8fe}'),
- ('\u{93c}', '\u{93c}'),
- ('\u{94d}', '\u{94d}'),
- ('\u{951}', '\u{954}'),
- ('ॱ', 'ॱ'),
- ('\u{9bc}', '\u{9bc}'),
- ('\u{9cd}', '\u{9cd}'),
- ('\u{a3c}', '\u{a3c}'),
- ('\u{a4d}', '\u{a4d}'),
- ('\u{abc}', '\u{abc}'),
- ('\u{acd}', '\u{acd}'),
- ('\u{afd}', '\u{aff}'),
- ('\u{b3c}', '\u{b3c}'),
- ('\u{b4d}', '\u{b4d}'),
- ('\u{b55}', '\u{b55}'),
- ('\u{bcd}', '\u{bcd}'),
- ('\u{c3c}', '\u{c3c}'),
- ('\u{c4d}', '\u{c4d}'),
- ('\u{cbc}', '\u{cbc}'),
- ('\u{ccd}', '\u{ccd}'),
- ('\u{d3b}', '\u{d3c}'),
- ('\u{d4d}', '\u{d4d}'),
- ('\u{dca}', '\u{dca}'),
- ('\u{e3a}', '\u{e3a}'),
- ('\u{e47}', '\u{e4c}'),
- ('\u{e4e}', '\u{e4e}'),
- ('\u{eba}', '\u{eba}'),
- ('\u{ec8}', '\u{ecc}'),
- ('\u{f18}', '\u{f19}'),
- ('\u{f35}', '\u{f35}'),
- ('\u{f37}', '\u{f37}'),
- ('\u{f39}', '\u{f39}'),
- ('༾', '༿'),
- ('\u{f82}', '\u{f84}'),
- ('\u{f86}', '\u{f87}'),
- ('\u{fc6}', '\u{fc6}'),
- ('\u{1037}', '\u{1037}'),
- ('\u{1039}', '\u{103a}'),
- ('ၣ', 'ၤ'),
- ('ၩ', 'ၭ'),
- ('ႇ', '\u{108d}'),
- ('ႏ', 'ႏ'),
- ('ႚ', 'ႛ'),
- ('\u{135d}', '\u{135f}'),
- ('\u{1714}', '\u{1715}'),
- ('\u{1734}', '\u{1734}'),
- ('\u{17c9}', '\u{17d3}'),
- ('\u{17dd}', '\u{17dd}'),
- ('\u{1939}', '\u{193b}'),
- ('\u{1a60}', '\u{1a60}'),
- ('\u{1a75}', '\u{1a7c}'),
- ('\u{1a7f}', '\u{1a7f}'),
- ('\u{1ab0}', '\u{1abe}'),
- ('\u{1ac1}', '\u{1acb}'),
- ('\u{1b34}', '\u{1b34}'),
- ('\u{1b44}', '\u{1b44}'),
- ('\u{1b6b}', '\u{1b73}'),
- ('\u{1baa}', '\u{1bab}'),
- ('\u{1be6}', '\u{1be6}'),
- ('\u{1bf2}', '\u{1bf3}'),
- ('\u{1c36}', '\u{1c37}'),
- ('ᱸ', 'ᱽ'),
- ('\u{1cd0}', '\u{1ce8}'),
- ('\u{1ced}', '\u{1ced}'),
- ('\u{1cf4}', '\u{1cf4}'),
- ('᳷', '\u{1cf9}'),
- ('ᴬ', 'ᵪ'),
- ('\u{1dc4}', '\u{1dcf}'),
- ('\u{1df5}', '\u{1dff}'),
- ('᾽', '᾽'),
- ('᾿', '῁'),
- ('῍', '῏'),
- ('῝', '῟'),
- ('῭', '`'),
- ('´', '῾'),
- ('\u{2cef}', '\u{2cf1}'),
- ('ⸯ', 'ⸯ'),
- ('\u{302a}', '\u{302f}'),
- ('\u{3099}', '゜'),
- ('ー', 'ー'),
- ('\u{a66f}', '\u{a66f}'),
- ('\u{a67c}', '\u{a67d}'),
- ('ꙿ', 'ꙿ'),
- ('ꚜ', 'ꚝ'),
- ('\u{a6f0}', '\u{a6f1}'),
- ('꜀', '꜡'),
- ('ꞈ', '꞊'),
- ('ꟸ', 'ꟹ'),
- ('\u{a806}', '\u{a806}'),
- ('\u{a82c}', '\u{a82c}'),
- ('\u{a8c4}', '\u{a8c4}'),
- ('\u{a8e0}', '\u{a8f1}'),
- ('\u{a92b}', '꤮'),
- ('\u{a953}', '\u{a953}'),
- ('\u{a9b3}', '\u{a9b3}'),
- ('\u{a9c0}', '\u{a9c0}'),
- ('\u{a9e5}', '\u{a9e5}'),
- ('ꩻ', 'ꩽ'),
- ('\u{aabf}', 'ꫂ'),
- ('\u{aaf6}', '\u{aaf6}'),
- ('꭛', 'ꭟ'),
- ('ꭩ', '꭫'),
- ('꯬', '\u{abed}'),
- ('\u{fb1e}', '\u{fb1e}'),
- ('\u{fe20}', '\u{fe2f}'),
- ('^', '^'),
- ('`', '`'),
- ('ー', 'ー'),
- ('\u{ff9e}', '\u{ff9f}'),
- (' ̄', ' ̄'),
- ('\u{102e0}', '\u{102e0}'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '\u{10a3f}'),
- ('\u{10ae5}', '\u{10ae6}'),
- ('𐴢', '\u{10d27}'),
- ('𐵎', '𐵎'),
- ('\u{10d69}', '\u{10d6d}'),
- ('\u{10efd}', '\u{10eff}'),
- ('\u{10f46}', '\u{10f50}'),
- ('\u{10f82}', '\u{10f85}'),
- ('\u{11046}', '\u{11046}'),
- ('\u{11070}', '\u{11070}'),
- ('\u{110b9}', '\u{110ba}'),
- ('\u{11133}', '\u{11134}'),
- ('\u{11173}', '\u{11173}'),
- ('\u{111c0}', '\u{111c0}'),
- ('\u{111ca}', '\u{111cc}'),
- ('\u{11235}', '\u{11236}'),
- ('\u{112e9}', '\u{112ea}'),
- ('\u{1133b}', '\u{1133c}'),
- ('\u{1134d}', '\u{1134d}'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('\u{113ce}', '\u{113d0}'),
- ('\u{113d2}', '𑏓'),
- ('\u{113e1}', '\u{113e2}'),
- ('\u{11442}', '\u{11442}'),
- ('\u{11446}', '\u{11446}'),
- ('\u{114c2}', '\u{114c3}'),
- ('\u{115bf}', '\u{115c0}'),
- ('\u{1163f}', '\u{1163f}'),
- ('\u{116b6}', '\u{116b7}'),
- ('\u{1172b}', '\u{1172b}'),
- ('\u{11839}', '\u{1183a}'),
- ('\u{1193d}', '\u{1193e}'),
- ('\u{11943}', '\u{11943}'),
- ('\u{119e0}', '\u{119e0}'),
- ('\u{11a34}', '\u{11a34}'),
- ('\u{11a47}', '\u{11a47}'),
- ('\u{11a99}', '\u{11a99}'),
- ('\u{11c3f}', '\u{11c3f}'),
- ('\u{11d42}', '\u{11d42}'),
- ('\u{11d44}', '\u{11d45}'),
- ('\u{11d97}', '\u{11d97}'),
- ('\u{11f41}', '\u{11f42}'),
- ('\u{11f5a}', '\u{11f5a}'),
- ('\u{13447}', '\u{13455}'),
- ('\u{1612f}', '\u{1612f}'),
- ('\u{16af0}', '\u{16af4}'),
- ('\u{16b30}', '\u{16b36}'),
- ('𖵫', '𖵬'),
- ('\u{16f8f}', '𖾟'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d167}', '\u{1d169}'),
- ('\u{1d16d}', '\u{1d172}'),
- ('\u{1d17b}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('𞀰', '𞁭'),
- ('\u{1e130}', '\u{1e136}'),
- ('\u{1e2ae}', '\u{1e2ae}'),
- ('\u{1e2ec}', '\u{1e2ef}'),
- ('\u{1e5ee}', '\u{1e5ef}'),
- ('\u{1e8d0}', '\u{1e8d6}'),
- ('\u{1e944}', '\u{1e946}'),
- ('\u{1e948}', '\u{1e94a}'),
-];
-
-pub const EMOJI: &'static [(char, char)] = &[
- ('#', '#'),
- ('*', '*'),
- ('0', '9'),
- ('©', '©'),
- ('®', '®'),
- ('‼', '‼'),
- ('⁉', '⁉'),
- ('™', '™'),
- ('ℹ', 'ℹ'),
- ('↔', '↙'),
- ('↩', '↪'),
- ('⌚', '⌛'),
- ('⌨', '⌨'),
- ('⏏', '⏏'),
- ('⏩', '⏳'),
- ('⏸', '⏺'),
- ('Ⓜ', 'Ⓜ'),
- ('▪', '▫'),
- ('▶', '▶'),
- ('◀', '◀'),
- ('◻', '◾'),
- ('☀', '☄'),
- ('☎', '☎'),
- ('☑', '☑'),
- ('☔', '☕'),
- ('☘', '☘'),
- ('☝', '☝'),
- ('☠', '☠'),
- ('☢', '☣'),
- ('☦', '☦'),
- ('☪', '☪'),
- ('☮', '☯'),
- ('☸', '☺'),
- ('♀', '♀'),
- ('♂', '♂'),
- ('♈', '♓'),
- ('♟', '♠'),
- ('♣', '♣'),
- ('♥', '♦'),
- ('♨', '♨'),
- ('♻', '♻'),
- ('♾', '♿'),
- ('⚒', '⚗'),
- ('⚙', '⚙'),
- ('⚛', '⚜'),
- ('⚠', '⚡'),
- ('⚧', '⚧'),
- ('⚪', '⚫'),
- ('⚰', '⚱'),
- ('⚽', '⚾'),
- ('⛄', '⛅'),
- ('⛈', '⛈'),
- ('⛎', '⛏'),
- ('⛑', '⛑'),
- ('⛓', '⛔'),
- ('⛩', '⛪'),
- ('⛰', '⛵'),
- ('⛷', '⛺'),
- ('⛽', '⛽'),
- ('✂', '✂'),
- ('✅', '✅'),
- ('✈', '✍'),
- ('✏', '✏'),
- ('✒', '✒'),
- ('✔', '✔'),
- ('✖', '✖'),
- ('✝', '✝'),
- ('✡', '✡'),
- ('✨', '✨'),
- ('✳', '✴'),
- ('❄', '❄'),
- ('❇', '❇'),
- ('❌', '❌'),
- ('❎', '❎'),
- ('❓', '❕'),
- ('❗', '❗'),
- ('❣', '❤'),
- ('➕', '➗'),
- ('➡', '➡'),
- ('➰', '➰'),
- ('➿', '➿'),
- ('⤴', '⤵'),
- ('⬅', '⬇'),
- ('⬛', '⬜'),
- ('⭐', '⭐'),
- ('⭕', '⭕'),
- ('〰', '〰'),
- ('〽', '〽'),
- ('㊗', '㊗'),
- ('㊙', '㊙'),
- ('🀄', '🀄'),
- ('🃏', '🃏'),
- ('🅰', '🅱'),
- ('🅾', '🅿'),
- ('🆎', '🆎'),
- ('🆑', '🆚'),
- ('🇦', '🇿'),
- ('🈁', '🈂'),
- ('🈚', '🈚'),
- ('🈯', '🈯'),
- ('🈲', '🈺'),
- ('🉐', '🉑'),
- ('🌀', '🌡'),
- ('🌤', '🎓'),
- ('🎖', '🎗'),
- ('🎙', '🎛'),
- ('🎞', '🏰'),
- ('🏳', '🏵'),
- ('🏷', '📽'),
- ('📿', '🔽'),
- ('🕉', '🕎'),
- ('🕐', '🕧'),
- ('🕯', '🕰'),
- ('🕳', '🕺'),
- ('🖇', '🖇'),
- ('🖊', '🖍'),
- ('🖐', '🖐'),
- ('🖕', '🖖'),
- ('🖤', '🖥'),
- ('🖨', '🖨'),
- ('🖱', '🖲'),
- ('🖼', '🖼'),
- ('🗂', '🗄'),
- ('🗑', '🗓'),
- ('🗜', '🗞'),
- ('🗡', '🗡'),
- ('🗣', '🗣'),
- ('🗨', '🗨'),
- ('🗯', '🗯'),
- ('🗳', '🗳'),
- ('🗺', '🙏'),
- ('🚀', '🛅'),
- ('🛋', '🛒'),
- ('🛕', '🛗'),
- ('🛜', '🛥'),
- ('🛩', '🛩'),
- ('🛫', '🛬'),
- ('🛰', '🛰'),
- ('🛳', '🛼'),
- ('🟠', '🟫'),
- ('🟰', '🟰'),
- ('🤌', '🤺'),
- ('🤼', '🥅'),
- ('🥇', '🧿'),
- ('🩰', '🩼'),
- ('🪀', '🪉'),
- ('🪏', '🫆'),
- ('🫎', '🫜'),
- ('🫟', '🫩'),
- ('🫰', '🫸'),
-];
-
-pub const EMOJI_COMPONENT: &'static [(char, char)] = &[
- ('#', '#'),
- ('*', '*'),
- ('0', '9'),
- ('\u{200d}', '\u{200d}'),
- ('\u{20e3}', '\u{20e3}'),
- ('\u{fe0f}', '\u{fe0f}'),
- ('🇦', '🇿'),
- ('🏻', '🏿'),
- ('🦰', '🦳'),
- ('\u{e0020}', '\u{e007f}'),
-];
-
-pub const EMOJI_MODIFIER: &'static [(char, char)] = &[('🏻', '🏿')];
-
-pub const EMOJI_MODIFIER_BASE: &'static [(char, char)] = &[
- ('☝', '☝'),
- ('⛹', '⛹'),
- ('✊', '✍'),
- ('🎅', '🎅'),
- ('🏂', '🏄'),
- ('🏇', '🏇'),
- ('🏊', '🏌'),
- ('👂', '👃'),
- ('👆', '👐'),
- ('👦', '👸'),
- ('👼', '👼'),
- ('💁', '💃'),
- ('💅', '💇'),
- ('💏', '💏'),
- ('💑', '💑'),
- ('💪', '💪'),
- ('🕴', '🕵'),
- ('🕺', '🕺'),
- ('🖐', '🖐'),
- ('🖕', '🖖'),
- ('🙅', '🙇'),
- ('🙋', '🙏'),
- ('🚣', '🚣'),
- ('🚴', '🚶'),
- ('🛀', '🛀'),
- ('🛌', '🛌'),
- ('🤌', '🤌'),
- ('🤏', '🤏'),
- ('🤘', '🤟'),
- ('🤦', '🤦'),
- ('🤰', '🤹'),
- ('🤼', '🤾'),
- ('🥷', '🥷'),
- ('🦵', '🦶'),
- ('🦸', '🦹'),
- ('🦻', '🦻'),
- ('🧍', '🧏'),
- ('🧑', '🧝'),
- ('🫃', '🫅'),
- ('🫰', '🫸'),
-];
-
-pub const EMOJI_PRESENTATION: &'static [(char, char)] = &[
- ('⌚', '⌛'),
- ('⏩', '⏬'),
- ('⏰', '⏰'),
- ('⏳', '⏳'),
- ('◽', '◾'),
- ('☔', '☕'),
- ('♈', '♓'),
- ('♿', '♿'),
- ('⚓', '⚓'),
- ('⚡', '⚡'),
- ('⚪', '⚫'),
- ('⚽', '⚾'),
- ('⛄', '⛅'),
- ('⛎', '⛎'),
- ('⛔', '⛔'),
- ('⛪', '⛪'),
- ('⛲', '⛳'),
- ('⛵', '⛵'),
- ('⛺', '⛺'),
- ('⛽', '⛽'),
- ('✅', '✅'),
- ('✊', '✋'),
- ('✨', '✨'),
- ('❌', '❌'),
- ('❎', '❎'),
- ('❓', '❕'),
- ('❗', '❗'),
- ('➕', '➗'),
- ('➰', '➰'),
- ('➿', '➿'),
- ('⬛', '⬜'),
- ('⭐', '⭐'),
- ('⭕', '⭕'),
- ('🀄', '🀄'),
- ('🃏', '🃏'),
- ('🆎', '🆎'),
- ('🆑', '🆚'),
- ('🇦', '🇿'),
- ('🈁', '🈁'),
- ('🈚', '🈚'),
- ('🈯', '🈯'),
- ('🈲', '🈶'),
- ('🈸', '🈺'),
- ('🉐', '🉑'),
- ('🌀', '🌠'),
- ('🌭', '🌵'),
- ('🌷', '🍼'),
- ('🍾', '🎓'),
- ('🎠', '🏊'),
- ('🏏', '🏓'),
- ('🏠', '🏰'),
- ('🏴', '🏴'),
- ('🏸', '🐾'),
- ('👀', '👀'),
- ('👂', '📼'),
- ('📿', '🔽'),
- ('🕋', '🕎'),
- ('🕐', '🕧'),
- ('🕺', '🕺'),
- ('🖕', '🖖'),
- ('🖤', '🖤'),
- ('🗻', '🙏'),
- ('🚀', '🛅'),
- ('🛌', '🛌'),
- ('🛐', '🛒'),
- ('🛕', '🛗'),
- ('🛜', '🛟'),
- ('🛫', '🛬'),
- ('🛴', '🛼'),
- ('🟠', '🟫'),
- ('🟰', '🟰'),
- ('🤌', '🤺'),
- ('🤼', '🥅'),
- ('🥇', '🧿'),
- ('🩰', '🩼'),
- ('🪀', '🪉'),
- ('🪏', '🫆'),
- ('🫎', '🫜'),
- ('🫟', '🫩'),
- ('🫰', '🫸'),
-];
-
-pub const EXTENDED_PICTOGRAPHIC: &'static [(char, char)] = &[
- ('©', '©'),
- ('®', '®'),
- ('‼', '‼'),
- ('⁉', '⁉'),
- ('™', '™'),
- ('ℹ', 'ℹ'),
- ('↔', '↙'),
- ('↩', '↪'),
- ('⌚', '⌛'),
- ('⌨', '⌨'),
- ('⎈', '⎈'),
- ('⏏', '⏏'),
- ('⏩', '⏳'),
- ('⏸', '⏺'),
- ('Ⓜ', 'Ⓜ'),
- ('▪', '▫'),
- ('▶', '▶'),
- ('◀', '◀'),
- ('◻', '◾'),
- ('☀', '★'),
- ('☇', '☒'),
- ('☔', '⚅'),
- ('⚐', '✅'),
- ('✈', '✒'),
- ('✔', '✔'),
- ('✖', '✖'),
- ('✝', '✝'),
- ('✡', '✡'),
- ('✨', '✨'),
- ('✳', '✴'),
- ('❄', '❄'),
- ('❇', '❇'),
- ('❌', '❌'),
- ('❎', '❎'),
- ('❓', '❕'),
- ('❗', '❗'),
- ('❣', '❧'),
- ('➕', '➗'),
- ('➡', '➡'),
- ('➰', '➰'),
- ('➿', '➿'),
- ('⤴', '⤵'),
- ('⬅', '⬇'),
- ('⬛', '⬜'),
- ('⭐', '⭐'),
- ('⭕', '⭕'),
- ('〰', '〰'),
- ('〽', '〽'),
- ('㊗', '㊗'),
- ('㊙', '㊙'),
- ('🀀', '\u{1f0ff}'),
- ('🄍', '🄏'),
- ('🄯', '🄯'),
- ('🅬', '🅱'),
- ('🅾', '🅿'),
- ('🆎', '🆎'),
- ('🆑', '🆚'),
- ('🆭', '\u{1f1e5}'),
- ('🈁', '\u{1f20f}'),
- ('🈚', '🈚'),
- ('🈯', '🈯'),
- ('🈲', '🈺'),
- ('\u{1f23c}', '\u{1f23f}'),
- ('\u{1f249}', '🏺'),
- ('🐀', '🔽'),
- ('🕆', '🙏'),
- ('🚀', '\u{1f6ff}'),
- ('🝴', '🝿'),
- ('🟕', '\u{1f7ff}'),
- ('\u{1f80c}', '\u{1f80f}'),
- ('\u{1f848}', '\u{1f84f}'),
- ('\u{1f85a}', '\u{1f85f}'),
- ('\u{1f888}', '\u{1f88f}'),
- ('\u{1f8ae}', '\u{1f8ff}'),
- ('🤌', '🤺'),
- ('🤼', '🥅'),
- ('🥇', '\u{1faff}'),
- ('\u{1fc00}', '\u{1fffd}'),
-];
-
-pub const EXTENDER: &'static [(char, char)] = &[
- ('·', '·'),
- ('ː', 'ˑ'),
- ('ـ', 'ـ'),
- ('ߺ', 'ߺ'),
- ('\u{a71}', '\u{a71}'),
- ('\u{afb}', '\u{afb}'),
- ('\u{b55}', '\u{b55}'),
- ('ๆ', 'ๆ'),
- ('ໆ', 'ໆ'),
- ('᠊', '᠊'),
- ('ᡃ', 'ᡃ'),
- ('ᪧ', 'ᪧ'),
- ('\u{1c36}', '\u{1c36}'),
- ('ᱻ', 'ᱻ'),
- ('々', '々'),
- ('〱', '〵'),
- ('ゝ', 'ゞ'),
- ('ー', 'ヾ'),
- ('ꀕ', 'ꀕ'),
- ('ꘌ', 'ꘌ'),
- ('ꧏ', 'ꧏ'),
- ('ꧦ', 'ꧦ'),
- ('ꩰ', 'ꩰ'),
- ('ꫝ', 'ꫝ'),
- ('ꫳ', 'ꫴ'),
- ('ー', 'ー'),
- ('𐞁', '𐞂'),
- ('𐵎', '𐵎'),
- ('\u{10d6a}', '\u{10d6a}'),
- ('𐵯', '𐵯'),
- ('\u{11237}', '\u{11237}'),
- ('𑍝', '𑍝'),
- ('\u{113d2}', '𑏓'),
- ('𑗆', '𑗈'),
- ('\u{11a98}', '\u{11a98}'),
- ('𖭂', '𖭃'),
- ('𖿠', '𖿡'),
- ('𖿣', '𖿣'),
- ('𞄼', '𞄽'),
- ('\u{1e5ef}', '\u{1e5ef}'),
- ('\u{1e944}', '\u{1e946}'),
-];
-
-pub const GRAPHEME_BASE: &'static [(char, char)] = &[
- (' ', '~'),
- ('\u{a0}', '¬'),
- ('®', '˿'),
- ('Ͱ', 'ͷ'),
- ('ͺ', 'Ϳ'),
- ('΄', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', '҂'),
- ('Ҋ', 'ԯ'),
- ('Ա', 'Ֆ'),
- ('ՙ', '֊'),
- ('֍', '֏'),
- ('־', '־'),
- ('׀', '׀'),
- ('׃', '׃'),
- ('׆', '׆'),
- ('א', 'ת'),
- ('ׯ', '״'),
- ('؆', '؏'),
- ('؛', '؛'),
- ('؝', 'ي'),
- ('٠', 'ٯ'),
- ('ٱ', 'ە'),
- ('۞', '۞'),
- ('ۥ', 'ۦ'),
- ('۩', '۩'),
- ('ۮ', '܍'),
- ('ܐ', 'ܐ'),
- ('ܒ', 'ܯ'),
- ('ݍ', 'ޥ'),
- ('ޱ', 'ޱ'),
- ('߀', 'ߪ'),
- ('ߴ', 'ߺ'),
- ('߾', 'ࠕ'),
- ('ࠚ', 'ࠚ'),
- ('ࠤ', 'ࠤ'),
- ('ࠨ', 'ࠨ'),
- ('࠰', '࠾'),
- ('ࡀ', 'ࡘ'),
- ('࡞', '࡞'),
- ('ࡠ', 'ࡪ'),
- ('ࡰ', 'ࢎ'),
- ('ࢠ', 'ࣉ'),
- ('ः', 'ह'),
- ('ऻ', 'ऻ'),
- ('ऽ', 'ी'),
- ('ॉ', 'ौ'),
- ('ॎ', 'ॐ'),
- ('क़', 'ॡ'),
- ('।', 'ঀ'),
- ('ং', 'ঃ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('ঽ', 'ঽ'),
- ('ি', 'ী'),
- ('ে', 'ৈ'),
- ('ো', 'ৌ'),
- ('ৎ', 'ৎ'),
- ('ড়', 'ঢ়'),
- ('য়', 'ৡ'),
- ('০', '৽'),
- ('ਃ', 'ਃ'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('ਾ', 'ੀ'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('੦', '੯'),
- ('ੲ', 'ੴ'),
- ('੶', '੶'),
- ('ઃ', 'ઃ'),
- ('અ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('ઽ', 'ી'),
- ('ૉ', 'ૉ'),
- ('ો', 'ૌ'),
- ('ૐ', 'ૐ'),
- ('ૠ', 'ૡ'),
- ('૦', '૱'),
- ('ૹ', 'ૹ'),
- ('ଂ', 'ଃ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('ଽ', 'ଽ'),
- ('ୀ', 'ୀ'),
- ('େ', 'ୈ'),
- ('ୋ', 'ୌ'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', 'ୡ'),
- ('୦', '୷'),
- ('ஃ', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'ஹ'),
- ('ி', 'ி'),
- ('ு', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', 'ௌ'),
- ('ௐ', 'ௐ'),
- ('௦', '௺'),
- ('ఁ', 'ః'),
- ('అ', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'హ'),
- ('ఽ', 'ఽ'),
- ('ు', 'ౄ'),
- ('ౘ', 'ౚ'),
- ('ౝ', 'ౝ'),
- ('ౠ', 'ౡ'),
- ('౦', '౯'),
- ('౷', 'ಀ'),
- ('ಂ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('ಽ', 'ಾ'),
- ('ು', 'ು'),
- ('ೃ', 'ೄ'),
- ('ೝ', 'ೞ'),
- ('ೠ', 'ೡ'),
- ('೦', '೯'),
- ('ೱ', 'ೳ'),
- ('ം', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', 'ഺ'),
- ('ഽ', 'ഽ'),
- ('ി', 'ീ'),
- ('െ', 'ൈ'),
- ('ൊ', 'ൌ'),
- ('ൎ', '൏'),
- ('ൔ', 'ൖ'),
- ('൘', 'ൡ'),
- ('൦', 'ൿ'),
- ('ං', 'ඃ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('ැ', 'ෑ'),
- ('ෘ', 'ෞ'),
- ('෦', '෯'),
- ('ෲ', '෴'),
- ('ก', 'ะ'),
- ('า', 'ำ'),
- ('฿', 'ๆ'),
- ('๏', '๛'),
- ('ກ', 'ຂ'),
- ('ຄ', 'ຄ'),
- ('ຆ', 'ຊ'),
- ('ຌ', 'ຣ'),
- ('ລ', 'ລ'),
- ('ວ', 'ະ'),
- ('າ', 'ຳ'),
- ('ຽ', 'ຽ'),
- ('ເ', 'ໄ'),
- ('ໆ', 'ໆ'),
- ('໐', '໙'),
- ('ໜ', 'ໟ'),
- ('ༀ', '༗'),
- ('༚', '༴'),
- ('༶', '༶'),
- ('༸', '༸'),
- ('༺', 'ཇ'),
- ('ཉ', 'ཬ'),
- ('ཿ', 'ཿ'),
- ('྅', '྅'),
- ('ྈ', 'ྌ'),
- ('྾', '࿅'),
- ('࿇', '࿌'),
- ('࿎', '࿚'),
- ('က', 'ာ'),
- ('ေ', 'ေ'),
- ('း', 'း'),
- ('ျ', 'ြ'),
- ('ဿ', 'ၗ'),
- ('ၚ', 'ၝ'),
- ('ၡ', 'ၰ'),
- ('ၵ', 'ႁ'),
- ('ႃ', 'ႄ'),
- ('ႇ', 'ႌ'),
- ('ႎ', 'ႜ'),
- ('႞', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዖ'),
- ('ዘ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ፚ'),
- ('፠', '፼'),
- ('ᎀ', '᎙'),
- ('Ꭰ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('᐀', '᚜'),
- ('ᚠ', 'ᛸ'),
- ('ᜀ', 'ᜑ'),
- ('ᜟ', 'ᜱ'),
- ('᜵', '᜶'),
- ('ᝀ', 'ᝑ'),
- ('ᝠ', 'ᝬ'),
- ('ᝮ', 'ᝰ'),
- ('ក', 'ឳ'),
- ('ា', 'ា'),
- ('ើ', 'ៅ'),
- ('ះ', 'ៈ'),
- ('។', 'ៜ'),
- ('០', '៩'),
- ('៰', '៹'),
- ('᠀', '᠊'),
- ('᠐', '᠙'),
- ('ᠠ', 'ᡸ'),
- ('ᢀ', 'ᢄ'),
- ('ᢇ', 'ᢨ'),
- ('ᢪ', 'ᢪ'),
- ('ᢰ', 'ᣵ'),
- ('ᤀ', 'ᤞ'),
- ('ᤣ', 'ᤦ'),
- ('ᤩ', 'ᤫ'),
- ('ᤰ', 'ᤱ'),
- ('ᤳ', 'ᤸ'),
- ('᥀', '᥀'),
- ('᥄', 'ᥭ'),
- ('ᥰ', 'ᥴ'),
- ('ᦀ', 'ᦫ'),
- ('ᦰ', 'ᧉ'),
- ('᧐', '᧚'),
- ('᧞', 'ᨖ'),
- ('ᨙ', 'ᨚ'),
- ('᨞', 'ᩕ'),
- ('ᩗ', 'ᩗ'),
- ('ᩡ', 'ᩡ'),
- ('ᩣ', 'ᩤ'),
- ('ᩭ', 'ᩲ'),
- ('᪀', '᪉'),
- ('᪐', '᪙'),
- ('᪠', '᪭'),
- ('ᬄ', 'ᬳ'),
- ('ᬾ', 'ᭁ'),
- ('ᭅ', 'ᭌ'),
- ('᭎', '᭪'),
- ('᭴', '᭿'),
- ('ᮂ', 'ᮡ'),
- ('ᮦ', 'ᮧ'),
- ('ᮮ', 'ᯥ'),
- ('ᯧ', 'ᯧ'),
- ('ᯪ', 'ᯬ'),
- ('ᯮ', 'ᯮ'),
- ('᯼', 'ᰫ'),
- ('ᰴ', 'ᰵ'),
- ('᰻', '᱉'),
- ('ᱍ', 'ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', '᳇'),
- ('᳓', '᳓'),
- ('᳡', '᳡'),
- ('ᳩ', 'ᳬ'),
- ('ᳮ', 'ᳳ'),
- ('ᳵ', '᳷'),
- ('ᳺ', 'ᳺ'),
- ('ᴀ', 'ᶿ'),
- ('Ḁ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ῄ'),
- ('ῆ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('῝', '`'),
- ('ῲ', 'ῴ'),
- ('ῶ', '῾'),
- ('\u{2000}', '\u{200a}'),
- ('‐', '‧'),
- ('\u{202f}', '\u{205f}'),
- ('⁰', 'ⁱ'),
- ('⁴', '₎'),
- ('ₐ', 'ₜ'),
- ('₠', '⃀'),
- ('℀', '↋'),
- ('←', '␩'),
- ('⑀', '⑊'),
- ('①', '⭳'),
- ('⭶', '⮕'),
- ('⮗', 'ⳮ'),
- ('Ⳳ', 'ⳳ'),
- ('⳹', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ⴰ', 'ⵧ'),
- ('ⵯ', '⵰'),
- ('ⶀ', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('⸀', '⹝'),
- ('⺀', '⺙'),
- ('⺛', '⻳'),
- ('⼀', '⿕'),
- ('⿰', '〩'),
- ('〰', '〿'),
- ('ぁ', 'ゖ'),
- ('゛', 'ヿ'),
- ('ㄅ', 'ㄯ'),
- ('ㄱ', 'ㆎ'),
- ('㆐', '㇥'),
- ('㇯', '㈞'),
- ('㈠', 'ꒌ'),
- ('꒐', '꓆'),
- ('ꓐ', 'ꘫ'),
- ('Ꙁ', 'ꙮ'),
- ('꙳', '꙳'),
- ('꙾', 'ꚝ'),
- ('ꚠ', 'ꛯ'),
- ('꛲', '꛷'),
- ('꜀', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'Ƛ'),
- ('ꟲ', 'ꠁ'),
- ('ꠃ', 'ꠅ'),
- ('ꠇ', 'ꠊ'),
- ('ꠌ', 'ꠤ'),
- ('ꠧ', '꠫'),
- ('꠰', '꠹'),
- ('ꡀ', '꡷'),
- ('ꢀ', 'ꣃ'),
- ('꣎', '꣙'),
- ('ꣲ', 'ꣾ'),
- ('꤀', 'ꤥ'),
- ('꤮', 'ꥆ'),
- ('ꥒ', 'ꥒ'),
- ('꥟', 'ꥼ'),
- ('ꦃ', 'ꦲ'),
- ('ꦴ', 'ꦵ'),
- ('ꦺ', 'ꦻ'),
- ('ꦾ', 'ꦿ'),
- ('꧁', '꧍'),
- ('ꧏ', '꧙'),
- ('꧞', 'ꧤ'),
- ('ꧦ', 'ꧾ'),
- ('ꨀ', 'ꨨ'),
- ('ꨯ', 'ꨰ'),
- ('ꨳ', 'ꨴ'),
- ('ꩀ', 'ꩂ'),
- ('ꩄ', 'ꩋ'),
- ('ꩍ', 'ꩍ'),
- ('꩐', '꩙'),
- ('꩜', 'ꩻ'),
- ('ꩽ', 'ꪯ'),
- ('ꪱ', 'ꪱ'),
- ('ꪵ', 'ꪶ'),
- ('ꪹ', 'ꪽ'),
- ('ꫀ', 'ꫀ'),
- ('ꫂ', 'ꫂ'),
- ('ꫛ', 'ꫫ'),
- ('ꫮ', 'ꫵ'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('ꬰ', '꭫'),
- ('ꭰ', 'ꯤ'),
- ('ꯦ', 'ꯧ'),
- ('ꯩ', '꯬'),
- ('꯰', '꯹'),
- ('가', '힣'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('豈', '舘'),
- ('並', '龎'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('יִ', 'יִ'),
- ('ײַ', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', '﯂'),
- ('ﯓ', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('﷏', '﷏'),
- ('ﷰ', '﷿'),
- ('︐', '︙'),
- ('︰', '﹒'),
- ('﹔', '﹦'),
- ('﹨', '﹫'),
- ('ﹰ', 'ﹴ'),
- ('ﹶ', 'ﻼ'),
- ('!', 'ン'),
- ('ᅠ', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
- ('¢', '₩'),
- ('│', '○'),
- ('', '�'),
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
- ('𐄀', '𐄂'),
- ('𐄇', '𐄳'),
- ('𐄷', '𐆎'),
- ('𐆐', '𐆜'),
- ('𐆠', '𐆠'),
- ('𐇐', '𐇼'),
- ('𐊀', '𐊜'),
- ('𐊠', '𐋐'),
- ('𐋡', '𐋻'),
- ('𐌀', '𐌣'),
- ('𐌭', '𐍊'),
- ('𐍐', '𐍵'),
- ('𐎀', '𐎝'),
- ('𐎟', '𐏃'),
- ('𐏈', '𐏕'),
- ('𐐀', '𐒝'),
- ('𐒠', '𐒩'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
- ('𐔀', '𐔧'),
- ('𐔰', '𐕣'),
- ('𐕯', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐗀', '𐗳'),
- ('𐘀', '𐜶'),
- ('𐝀', '𐝕'),
- ('𐝠', '𐝧'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐠀', '𐠅'),
- ('𐠈', '𐠈'),
- ('𐠊', '𐠵'),
- ('𐠷', '𐠸'),
- ('𐠼', '𐠼'),
- ('𐠿', '𐡕'),
- ('𐡗', '𐢞'),
- ('𐢧', '𐢯'),
- ('𐣠', '𐣲'),
- ('𐣴', '𐣵'),
- ('𐣻', '𐤛'),
- ('𐤟', '𐤹'),
- ('𐤿', '𐤿'),
- ('𐦀', '𐦷'),
- ('𐦼', '𐧏'),
- ('𐧒', '𐨀'),
- ('𐨐', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨵'),
- ('𐩀', '𐩈'),
- ('𐩐', '𐩘'),
- ('𐩠', '𐪟'),
- ('𐫀', '𐫤'),
- ('𐫫', '𐫶'),
- ('𐬀', '𐬵'),
- ('𐬹', '𐭕'),
- ('𐭘', '𐭲'),
- ('𐭸', '𐮑'),
- ('𐮙', '𐮜'),
- ('𐮩', '𐮯'),
- ('𐰀', '𐱈'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐳺', '𐴣'),
- ('𐴰', '𐴹'),
- ('𐵀', '𐵥'),
- ('𐵮', '𐶅'),
- ('𐶎', '𐶏'),
- ('𐹠', '𐹾'),
- ('𐺀', '𐺩'),
- ('𐺭', '𐺭'),
- ('𐺰', '𐺱'),
- ('𐻂', '𐻄'),
- ('𐼀', '𐼧'),
- ('𐼰', '𐽅'),
- ('𐽑', '𐽙'),
- ('𐽰', '𐾁'),
- ('𐾆', '𐾉'),
- ('𐾰', '𐿋'),
- ('𐿠', '𐿶'),
- ('𑀀', '𑀀'),
- ('𑀂', '𑀷'),
- ('𑁇', '𑁍'),
- ('𑁒', '𑁯'),
- ('𑁱', '𑁲'),
- ('𑁵', '𑁵'),
- ('𑂂', '𑂲'),
- ('𑂷', '𑂸'),
- ('𑂻', '𑂼'),
- ('𑂾', '𑃁'),
- ('𑃐', '𑃨'),
- ('𑃰', '𑃹'),
- ('𑄃', '𑄦'),
- ('𑄬', '𑄬'),
- ('𑄶', '𑅇'),
- ('𑅐', '𑅲'),
- ('𑅴', '𑅶'),
- ('𑆂', '𑆵'),
- ('𑆿', '𑆿'),
- ('𑇁', '𑇈'),
- ('𑇍', '𑇎'),
- ('𑇐', '𑇟'),
- ('𑇡', '𑇴'),
- ('𑈀', '𑈑'),
- ('𑈓', '𑈮'),
- ('𑈲', '𑈳'),
- ('𑈸', '𑈽'),
- ('𑈿', '𑉀'),
- ('𑊀', '𑊆'),
- ('𑊈', '𑊈'),
- ('𑊊', '𑊍'),
- ('𑊏', '𑊝'),
- ('𑊟', '𑊩'),
- ('𑊰', '𑋞'),
- ('𑋠', '𑋢'),
- ('𑋰', '𑋹'),
- ('𑌂', '𑌃'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('𑌽', '𑌽'),
- ('𑌿', '𑌿'),
- ('𑍁', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '𑍌'),
- ('𑍐', '𑍐'),
- ('𑍝', '𑍣'),
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '𑎷'),
- ('𑎹', '𑎺'),
- ('𑏊', '𑏊'),
- ('𑏌', '𑏍'),
- ('𑏑', '𑏑'),
- ('𑏓', '𑏕'),
- ('𑏗', '𑏘'),
- ('𑐀', '𑐷'),
- ('𑑀', '𑑁'),
- ('𑑅', '𑑅'),
- ('𑑇', '𑑛'),
- ('𑑝', '𑑝'),
- ('𑑟', '𑑡'),
- ('𑒀', '𑒯'),
- ('𑒱', '𑒲'),
- ('𑒹', '𑒹'),
- ('𑒻', '𑒼'),
- ('𑒾', '𑒾'),
- ('𑓁', '𑓁'),
- ('𑓄', '𑓇'),
- ('𑓐', '𑓙'),
- ('𑖀', '𑖮'),
- ('𑖰', '𑖱'),
- ('𑖸', '𑖻'),
- ('𑖾', '𑖾'),
- ('𑗁', '𑗛'),
- ('𑘀', '𑘲'),
- ('𑘻', '𑘼'),
- ('𑘾', '𑘾'),
- ('𑙁', '𑙄'),
- ('𑙐', '𑙙'),
- ('𑙠', '𑙬'),
- ('𑚀', '𑚪'),
- ('𑚬', '𑚬'),
- ('𑚮', '𑚯'),
- ('𑚸', '𑚹'),
- ('𑛀', '𑛉'),
- ('𑛐', '𑛣'),
- ('𑜀', '𑜚'),
- ('𑜞', '𑜞'),
- ('𑜠', '𑜡'),
- ('𑜦', '𑜦'),
- ('𑜰', '𑝆'),
- ('𑠀', '𑠮'),
- ('𑠸', '𑠸'),
- ('𑠻', '𑠻'),
- ('𑢠', '𑣲'),
- ('𑣿', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤯'),
- ('𑤱', '𑤵'),
- ('𑤷', '𑤸'),
- ('𑤿', '𑥂'),
- ('𑥄', '𑥆'),
- ('𑥐', '𑥙'),
- ('𑦠', '𑦧'),
- ('𑦪', '𑧓'),
- ('𑧜', '𑧟'),
- ('𑧡', '𑧤'),
- ('𑨀', '𑨀'),
- ('𑨋', '𑨲'),
- ('𑨹', '𑨺'),
- ('𑨿', '𑩆'),
- ('𑩐', '𑩐'),
- ('𑩗', '𑩘'),
- ('𑩜', '𑪉'),
- ('𑪗', '𑪗'),
- ('𑪚', '𑪢'),
- ('𑪰', '𑫸'),
- ('𑬀', '𑬉'),
- ('𑯀', '𑯡'),
- ('𑯰', '𑯹'),
- ('𑰀', '𑰈'),
- ('𑰊', '𑰯'),
- ('𑰾', '𑰾'),
- ('𑱀', '𑱅'),
- ('𑱐', '𑱬'),
- ('𑱰', '𑲏'),
- ('𑲩', '𑲩'),
- ('𑲱', '𑲱'),
- ('𑲴', '𑲴'),
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '𑴰'),
- ('𑵆', '𑵆'),
- ('𑵐', '𑵙'),
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶎'),
- ('𑶓', '𑶔'),
- ('𑶖', '𑶖'),
- ('𑶘', '𑶘'),
- ('𑶠', '𑶩'),
- ('𑻠', '𑻲'),
- ('𑻵', '𑻸'),
- ('𑼂', '𑼐'),
- ('𑼒', '𑼵'),
- ('𑼾', '𑼿'),
- ('𑽃', '𑽙'),
- ('𑾰', '𑾰'),
- ('𑿀', '𑿱'),
- ('𑿿', '𒎙'),
- ('𒐀', '𒑮'),
- ('𒑰', '𒑴'),
- ('𒒀', '𒕃'),
- ('𒾐', '𒿲'),
- ('𓀀', '𓐯'),
- ('𓑁', '𓑆'),
- ('𓑠', '𔏺'),
- ('𔐀', '𔙆'),
- ('𖄀', '𖄝'),
- ('𖄪', '𖄬'),
- ('𖄰', '𖄹'),
- ('𖠀', '𖨸'),
- ('𖩀', '𖩞'),
- ('𖩠', '𖩩'),
- ('𖩮', '𖪾'),
- ('𖫀', '𖫉'),
- ('𖫐', '𖫭'),
- ('𖫵', '𖫵'),
- ('𖬀', '𖬯'),
- ('𖬷', '𖭅'),
- ('𖭐', '𖭙'),
- ('𖭛', '𖭡'),
- ('𖭣', '𖭷'),
- ('𖭽', '𖮏'),
- ('𖵀', '𖵹'),
- ('𖹀', '𖺚'),
- ('𖼀', '𖽊'),
- ('𖽐', '𖾇'),
- ('𖾓', '𖾟'),
- ('𖿠', '𖿣'),
- ('𗀀', '𘟷'),
- ('𘠀', '𘳕'),
- ('𘳿', '𘴈'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𛀀', '𛄢'),
- ('𛄲', '𛄲'),
- ('𛅐', '𛅒'),
- ('𛅕', '𛅕'),
- ('𛅤', '𛅧'),
- ('𛅰', '𛋻'),
- ('𛰀', '𛱪'),
- ('𛱰', '𛱼'),
- ('𛲀', '𛲈'),
- ('𛲐', '𛲙'),
- ('𛲜', '𛲜'),
- ('𛲟', '𛲟'),
- ('𜰀', '𜳹'),
- ('𜴀', '𜺳'),
- ('𜽐', '𜿃'),
- ('𝀀', '𝃵'),
- ('𝄀', '𝄦'),
- ('𝄩', '𝅘𝅥𝅲'),
- ('𝅪', '𝅬'),
- ('𝆃', '𝆄'),
- ('𝆌', '𝆩'),
- ('𝆮', '𝇪'),
- ('𝈀', '𝉁'),
- ('𝉅', '𝉅'),
- ('𝋀', '𝋓'),
- ('𝋠', '𝋳'),
- ('𝌀', '𝍖'),
- ('𝍠', '𝍸'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝟋'),
- ('𝟎', '𝧿'),
- ('𝨷', '𝨺'),
- ('𝩭', '𝩴'),
- ('𝩶', '𝪃'),
- ('𝪅', '𝪋'),
- ('𝼀', '𝼞'),
- ('𝼥', '𝼪'),
- ('𞀰', '𞁭'),
- ('𞄀', '𞄬'),
- ('𞄷', '𞄽'),
- ('𞅀', '𞅉'),
- ('𞅎', '𞅏'),
- ('𞊐', '𞊭'),
- ('𞋀', '𞋫'),
- ('𞋰', '𞋹'),
- ('𞋿', '𞋿'),
- ('𞓐', '𞓫'),
- ('𞓰', '𞓹'),
- ('𞗐', '𞗭'),
- ('𞗰', '𞗺'),
- ('𞗿', '𞗿'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
- ('𞠀', '𞣄'),
- ('𞣇', '𞣏'),
- ('𞤀', '𞥃'),
- ('𞥋', '𞥋'),
- ('𞥐', '𞥙'),
- ('𞥞', '𞥟'),
- ('𞱱', '𞲴'),
- ('𞴁', '𞴽'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('𞻰', '𞻱'),
- ('🀀', '🀫'),
- ('🀰', '🂓'),
- ('🂠', '🂮'),
- ('🂱', '🂿'),
- ('🃁', '🃏'),
- ('🃑', '🃵'),
- ('🄀', '🆭'),
- ('🇦', '🈂'),
- ('🈐', '🈻'),
- ('🉀', '🉈'),
- ('🉐', '🉑'),
- ('🉠', '🉥'),
- ('🌀', '🛗'),
- ('🛜', '🛬'),
- ('🛰', '🛼'),
- ('🜀', '🝶'),
- ('🝻', '🟙'),
- ('🟠', '🟫'),
- ('🟰', '🟰'),
- ('🠀', '🠋'),
- ('🠐', '🡇'),
- ('🡐', '🡙'),
- ('🡠', '🢇'),
- ('🢐', '🢭'),
- ('🢰', '🢻'),
- ('🣀', '🣁'),
- ('🤀', '🩓'),
- ('🩠', '🩭'),
- ('🩰', '🩼'),
- ('🪀', '🪉'),
- ('🪏', '🫆'),
- ('🫎', '🫜'),
- ('🫟', '🫩'),
- ('🫰', '🫸'),
- ('🬀', '🮒'),
- ('🮔', '🯹'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('丽', '𪘀'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
-];
-
-pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[
- ('\u{300}', '\u{36f}'),
- ('\u{483}', '\u{489}'),
- ('\u{591}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c5}'),
- ('\u{5c7}', '\u{5c7}'),
- ('\u{610}', '\u{61a}'),
- ('\u{64b}', '\u{65f}'),
- ('\u{670}', '\u{670}'),
- ('\u{6d6}', '\u{6dc}'),
- ('\u{6df}', '\u{6e4}'),
- ('\u{6e7}', '\u{6e8}'),
- ('\u{6ea}', '\u{6ed}'),
- ('\u{711}', '\u{711}'),
- ('\u{730}', '\u{74a}'),
- ('\u{7a6}', '\u{7b0}'),
- ('\u{7eb}', '\u{7f3}'),
- ('\u{7fd}', '\u{7fd}'),
- ('\u{816}', '\u{819}'),
- ('\u{81b}', '\u{823}'),
- ('\u{825}', '\u{827}'),
- ('\u{829}', '\u{82d}'),
- ('\u{859}', '\u{85b}'),
- ('\u{897}', '\u{89f}'),
- ('\u{8ca}', '\u{8e1}'),
- ('\u{8e3}', '\u{902}'),
- ('\u{93a}', '\u{93a}'),
- ('\u{93c}', '\u{93c}'),
- ('\u{941}', '\u{948}'),
- ('\u{94d}', '\u{94d}'),
- ('\u{951}', '\u{957}'),
- ('\u{962}', '\u{963}'),
- ('\u{981}', '\u{981}'),
- ('\u{9bc}', '\u{9bc}'),
- ('\u{9be}', '\u{9be}'),
- ('\u{9c1}', '\u{9c4}'),
- ('\u{9cd}', '\u{9cd}'),
- ('\u{9d7}', '\u{9d7}'),
- ('\u{9e2}', '\u{9e3}'),
- ('\u{9fe}', '\u{9fe}'),
- ('\u{a01}', '\u{a02}'),
- ('\u{a3c}', '\u{a3c}'),
- ('\u{a41}', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('\u{a51}', '\u{a51}'),
- ('\u{a70}', '\u{a71}'),
- ('\u{a75}', '\u{a75}'),
- ('\u{a81}', '\u{a82}'),
- ('\u{abc}', '\u{abc}'),
- ('\u{ac1}', '\u{ac5}'),
- ('\u{ac7}', '\u{ac8}'),
- ('\u{acd}', '\u{acd}'),
- ('\u{ae2}', '\u{ae3}'),
- ('\u{afa}', '\u{aff}'),
- ('\u{b01}', '\u{b01}'),
- ('\u{b3c}', '\u{b3c}'),
- ('\u{b3e}', '\u{b3f}'),
- ('\u{b41}', '\u{b44}'),
- ('\u{b4d}', '\u{b4d}'),
- ('\u{b55}', '\u{b57}'),
- ('\u{b62}', '\u{b63}'),
- ('\u{b82}', '\u{b82}'),
- ('\u{bbe}', '\u{bbe}'),
- ('\u{bc0}', '\u{bc0}'),
- ('\u{bcd}', '\u{bcd}'),
- ('\u{bd7}', '\u{bd7}'),
- ('\u{c00}', '\u{c00}'),
- ('\u{c04}', '\u{c04}'),
- ('\u{c3c}', '\u{c3c}'),
- ('\u{c3e}', '\u{c40}'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('\u{c62}', '\u{c63}'),
- ('\u{c81}', '\u{c81}'),
- ('\u{cbc}', '\u{cbc}'),
- ('\u{cbf}', '\u{cc0}'),
- ('\u{cc2}', '\u{cc2}'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccd}'),
- ('\u{cd5}', '\u{cd6}'),
- ('\u{ce2}', '\u{ce3}'),
- ('\u{d00}', '\u{d01}'),
- ('\u{d3b}', '\u{d3c}'),
- ('\u{d3e}', '\u{d3e}'),
- ('\u{d41}', '\u{d44}'),
- ('\u{d4d}', '\u{d4d}'),
- ('\u{d57}', '\u{d57}'),
- ('\u{d62}', '\u{d63}'),
- ('\u{d81}', '\u{d81}'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dcf}', '\u{dcf}'),
- ('\u{dd2}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('\u{ddf}', '\u{ddf}'),
- ('\u{e31}', '\u{e31}'),
- ('\u{e34}', '\u{e3a}'),
- ('\u{e47}', '\u{e4e}'),
- ('\u{eb1}', '\u{eb1}'),
- ('\u{eb4}', '\u{ebc}'),
- ('\u{ec8}', '\u{ece}'),
- ('\u{f18}', '\u{f19}'),
- ('\u{f35}', '\u{f35}'),
- ('\u{f37}', '\u{f37}'),
- ('\u{f39}', '\u{f39}'),
- ('\u{f71}', '\u{f7e}'),
- ('\u{f80}', '\u{f84}'),
- ('\u{f86}', '\u{f87}'),
- ('\u{f8d}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('\u{fc6}', '\u{fc6}'),
- ('\u{102d}', '\u{1030}'),
- ('\u{1032}', '\u{1037}'),
- ('\u{1039}', '\u{103a}'),
- ('\u{103d}', '\u{103e}'),
- ('\u{1058}', '\u{1059}'),
- ('\u{105e}', '\u{1060}'),
- ('\u{1071}', '\u{1074}'),
- ('\u{1082}', '\u{1082}'),
- ('\u{1085}', '\u{1086}'),
- ('\u{108d}', '\u{108d}'),
- ('\u{109d}', '\u{109d}'),
- ('\u{135d}', '\u{135f}'),
- ('\u{1712}', '\u{1715}'),
- ('\u{1732}', '\u{1734}'),
- ('\u{1752}', '\u{1753}'),
- ('\u{1772}', '\u{1773}'),
- ('\u{17b4}', '\u{17b5}'),
- ('\u{17b7}', '\u{17bd}'),
- ('\u{17c6}', '\u{17c6}'),
- ('\u{17c9}', '\u{17d3}'),
- ('\u{17dd}', '\u{17dd}'),
- ('\u{180b}', '\u{180d}'),
- ('\u{180f}', '\u{180f}'),
- ('\u{1885}', '\u{1886}'),
- ('\u{18a9}', '\u{18a9}'),
- ('\u{1920}', '\u{1922}'),
- ('\u{1927}', '\u{1928}'),
- ('\u{1932}', '\u{1932}'),
- ('\u{1939}', '\u{193b}'),
- ('\u{1a17}', '\u{1a18}'),
- ('\u{1a1b}', '\u{1a1b}'),
- ('\u{1a56}', '\u{1a56}'),
- ('\u{1a58}', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a60}'),
- ('\u{1a62}', '\u{1a62}'),
- ('\u{1a65}', '\u{1a6c}'),
- ('\u{1a73}', '\u{1a7c}'),
- ('\u{1a7f}', '\u{1a7f}'),
- ('\u{1ab0}', '\u{1ace}'),
- ('\u{1b00}', '\u{1b03}'),
- ('\u{1b34}', '\u{1b3d}'),
- ('\u{1b42}', '\u{1b44}'),
- ('\u{1b6b}', '\u{1b73}'),
- ('\u{1b80}', '\u{1b81}'),
- ('\u{1ba2}', '\u{1ba5}'),
- ('\u{1ba8}', '\u{1bad}'),
- ('\u{1be6}', '\u{1be6}'),
- ('\u{1be8}', '\u{1be9}'),
- ('\u{1bed}', '\u{1bed}'),
- ('\u{1bef}', '\u{1bf3}'),
- ('\u{1c2c}', '\u{1c33}'),
- ('\u{1c36}', '\u{1c37}'),
- ('\u{1cd0}', '\u{1cd2}'),
- ('\u{1cd4}', '\u{1ce0}'),
- ('\u{1ce2}', '\u{1ce8}'),
- ('\u{1ced}', '\u{1ced}'),
- ('\u{1cf4}', '\u{1cf4}'),
- ('\u{1cf8}', '\u{1cf9}'),
- ('\u{1dc0}', '\u{1dff}'),
- ('\u{200c}', '\u{200c}'),
- ('\u{20d0}', '\u{20f0}'),
- ('\u{2cef}', '\u{2cf1}'),
- ('\u{2d7f}', '\u{2d7f}'),
- ('\u{2de0}', '\u{2dff}'),
- ('\u{302a}', '\u{302f}'),
- ('\u{3099}', '\u{309a}'),
- ('\u{a66f}', '\u{a672}'),
- ('\u{a674}', '\u{a67d}'),
- ('\u{a69e}', '\u{a69f}'),
- ('\u{a6f0}', '\u{a6f1}'),
- ('\u{a802}', '\u{a802}'),
- ('\u{a806}', '\u{a806}'),
- ('\u{a80b}', '\u{a80b}'),
- ('\u{a825}', '\u{a826}'),
- ('\u{a82c}', '\u{a82c}'),
- ('\u{a8c4}', '\u{a8c5}'),
- ('\u{a8e0}', '\u{a8f1}'),
- ('\u{a8ff}', '\u{a8ff}'),
- ('\u{a926}', '\u{a92d}'),
- ('\u{a947}', '\u{a951}'),
- ('\u{a953}', '\u{a953}'),
- ('\u{a980}', '\u{a982}'),
- ('\u{a9b3}', '\u{a9b3}'),
- ('\u{a9b6}', '\u{a9b9}'),
- ('\u{a9bc}', '\u{a9bd}'),
- ('\u{a9c0}', '\u{a9c0}'),
- ('\u{a9e5}', '\u{a9e5}'),
- ('\u{aa29}', '\u{aa2e}'),
- ('\u{aa31}', '\u{aa32}'),
- ('\u{aa35}', '\u{aa36}'),
- ('\u{aa43}', '\u{aa43}'),
- ('\u{aa4c}', '\u{aa4c}'),
- ('\u{aa7c}', '\u{aa7c}'),
- ('\u{aab0}', '\u{aab0}'),
- ('\u{aab2}', '\u{aab4}'),
- ('\u{aab7}', '\u{aab8}'),
- ('\u{aabe}', '\u{aabf}'),
- ('\u{aac1}', '\u{aac1}'),
- ('\u{aaec}', '\u{aaed}'),
- ('\u{aaf6}', '\u{aaf6}'),
- ('\u{abe5}', '\u{abe5}'),
- ('\u{abe8}', '\u{abe8}'),
- ('\u{abed}', '\u{abed}'),
- ('\u{fb1e}', '\u{fb1e}'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{fe20}', '\u{fe2f}'),
- ('\u{ff9e}', '\u{ff9f}'),
- ('\u{101fd}', '\u{101fd}'),
- ('\u{102e0}', '\u{102e0}'),
- ('\u{10376}', '\u{1037a}'),
- ('\u{10a01}', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '\u{10a0f}'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '\u{10a3f}'),
- ('\u{10ae5}', '\u{10ae6}'),
- ('\u{10d24}', '\u{10d27}'),
- ('\u{10d69}', '\u{10d6d}'),
- ('\u{10eab}', '\u{10eac}'),
- ('\u{10efc}', '\u{10eff}'),
- ('\u{10f46}', '\u{10f50}'),
- ('\u{10f82}', '\u{10f85}'),
- ('\u{11001}', '\u{11001}'),
- ('\u{11038}', '\u{11046}'),
- ('\u{11070}', '\u{11070}'),
- ('\u{11073}', '\u{11074}'),
- ('\u{1107f}', '\u{11081}'),
- ('\u{110b3}', '\u{110b6}'),
- ('\u{110b9}', '\u{110ba}'),
- ('\u{110c2}', '\u{110c2}'),
- ('\u{11100}', '\u{11102}'),
- ('\u{11127}', '\u{1112b}'),
- ('\u{1112d}', '\u{11134}'),
- ('\u{11173}', '\u{11173}'),
- ('\u{11180}', '\u{11181}'),
- ('\u{111b6}', '\u{111be}'),
- ('\u{111c0}', '\u{111c0}'),
- ('\u{111c9}', '\u{111cc}'),
- ('\u{111cf}', '\u{111cf}'),
- ('\u{1122f}', '\u{11231}'),
- ('\u{11234}', '\u{11237}'),
- ('\u{1123e}', '\u{1123e}'),
- ('\u{11241}', '\u{11241}'),
- ('\u{112df}', '\u{112df}'),
- ('\u{112e3}', '\u{112ea}'),
- ('\u{11300}', '\u{11301}'),
- ('\u{1133b}', '\u{1133c}'),
- ('\u{1133e}', '\u{1133e}'),
- ('\u{11340}', '\u{11340}'),
- ('\u{1134d}', '\u{1134d}'),
- ('\u{11357}', '\u{11357}'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('\u{113b8}', '\u{113b8}'),
- ('\u{113bb}', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '\u{113c9}'),
- ('\u{113ce}', '\u{113d0}'),
- ('\u{113d2}', '\u{113d2}'),
- ('\u{113e1}', '\u{113e2}'),
- ('\u{11438}', '\u{1143f}'),
- ('\u{11442}', '\u{11444}'),
- ('\u{11446}', '\u{11446}'),
- ('\u{1145e}', '\u{1145e}'),
- ('\u{114b0}', '\u{114b0}'),
- ('\u{114b3}', '\u{114b8}'),
- ('\u{114ba}', '\u{114ba}'),
- ('\u{114bd}', '\u{114bd}'),
- ('\u{114bf}', '\u{114c0}'),
- ('\u{114c2}', '\u{114c3}'),
- ('\u{115af}', '\u{115af}'),
- ('\u{115b2}', '\u{115b5}'),
- ('\u{115bc}', '\u{115bd}'),
- ('\u{115bf}', '\u{115c0}'),
- ('\u{115dc}', '\u{115dd}'),
- ('\u{11633}', '\u{1163a}'),
- ('\u{1163d}', '\u{1163d}'),
- ('\u{1163f}', '\u{11640}'),
- ('\u{116ab}', '\u{116ab}'),
- ('\u{116ad}', '\u{116ad}'),
- ('\u{116b0}', '\u{116b7}'),
- ('\u{1171d}', '\u{1171d}'),
- ('\u{1171f}', '\u{1171f}'),
- ('\u{11722}', '\u{11725}'),
- ('\u{11727}', '\u{1172b}'),
- ('\u{1182f}', '\u{11837}'),
- ('\u{11839}', '\u{1183a}'),
- ('\u{11930}', '\u{11930}'),
- ('\u{1193b}', '\u{1193e}'),
- ('\u{11943}', '\u{11943}'),
- ('\u{119d4}', '\u{119d7}'),
- ('\u{119da}', '\u{119db}'),
- ('\u{119e0}', '\u{119e0}'),
- ('\u{11a01}', '\u{11a0a}'),
- ('\u{11a33}', '\u{11a38}'),
- ('\u{11a3b}', '\u{11a3e}'),
- ('\u{11a47}', '\u{11a47}'),
- ('\u{11a51}', '\u{11a56}'),
- ('\u{11a59}', '\u{11a5b}'),
- ('\u{11a8a}', '\u{11a96}'),
- ('\u{11a98}', '\u{11a99}'),
- ('\u{11c30}', '\u{11c36}'),
- ('\u{11c38}', '\u{11c3d}'),
- ('\u{11c3f}', '\u{11c3f}'),
- ('\u{11c92}', '\u{11ca7}'),
- ('\u{11caa}', '\u{11cb0}'),
- ('\u{11cb2}', '\u{11cb3}'),
- ('\u{11cb5}', '\u{11cb6}'),
- ('\u{11d31}', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d45}'),
- ('\u{11d47}', '\u{11d47}'),
- ('\u{11d90}', '\u{11d91}'),
- ('\u{11d95}', '\u{11d95}'),
- ('\u{11d97}', '\u{11d97}'),
- ('\u{11ef3}', '\u{11ef4}'),
- ('\u{11f00}', '\u{11f01}'),
- ('\u{11f36}', '\u{11f3a}'),
- ('\u{11f40}', '\u{11f42}'),
- ('\u{11f5a}', '\u{11f5a}'),
- ('\u{13440}', '\u{13440}'),
- ('\u{13447}', '\u{13455}'),
- ('\u{1611e}', '\u{16129}'),
- ('\u{1612d}', '\u{1612f}'),
- ('\u{16af0}', '\u{16af4}'),
- ('\u{16b30}', '\u{16b36}'),
- ('\u{16f4f}', '\u{16f4f}'),
- ('\u{16f8f}', '\u{16f92}'),
- ('\u{16fe4}', '\u{16fe4}'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('\u{1bc9d}', '\u{1bc9e}'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d165}', '\u{1d169}'),
- ('\u{1d16d}', '\u{1d172}'),
- ('\u{1d17b}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('\u{1d242}', '\u{1d244}'),
- ('\u{1da00}', '\u{1da36}'),
- ('\u{1da3b}', '\u{1da6c}'),
- ('\u{1da75}', '\u{1da75}'),
- ('\u{1da84}', '\u{1da84}'),
- ('\u{1da9b}', '\u{1da9f}'),
- ('\u{1daa1}', '\u{1daaf}'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('\u{1e130}', '\u{1e136}'),
- ('\u{1e2ae}', '\u{1e2ae}'),
- ('\u{1e2ec}', '\u{1e2ef}'),
- ('\u{1e4ec}', '\u{1e4ef}'),
- ('\u{1e5ee}', '\u{1e5ef}'),
- ('\u{1e8d0}', '\u{1e8d6}'),
- ('\u{1e944}', '\u{1e94a}'),
- ('\u{e0020}', '\u{e007f}'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const GRAPHEME_LINK: &'static [(char, char)] = &[
- ('\u{94d}', '\u{94d}'),
- ('\u{9cd}', '\u{9cd}'),
- ('\u{a4d}', '\u{a4d}'),
- ('\u{acd}', '\u{acd}'),
- ('\u{b4d}', '\u{b4d}'),
- ('\u{bcd}', '\u{bcd}'),
- ('\u{c4d}', '\u{c4d}'),
- ('\u{ccd}', '\u{ccd}'),
- ('\u{d3b}', '\u{d3c}'),
- ('\u{d4d}', '\u{d4d}'),
- ('\u{dca}', '\u{dca}'),
- ('\u{e3a}', '\u{e3a}'),
- ('\u{eba}', '\u{eba}'),
- ('\u{f84}', '\u{f84}'),
- ('\u{1039}', '\u{103a}'),
- ('\u{1714}', '\u{1715}'),
- ('\u{1734}', '\u{1734}'),
- ('\u{17d2}', '\u{17d2}'),
- ('\u{1a60}', '\u{1a60}'),
- ('\u{1b44}', '\u{1b44}'),
- ('\u{1baa}', '\u{1bab}'),
- ('\u{1bf2}', '\u{1bf3}'),
- ('\u{2d7f}', '\u{2d7f}'),
- ('\u{a806}', '\u{a806}'),
- ('\u{a82c}', '\u{a82c}'),
- ('\u{a8c4}', '\u{a8c4}'),
- ('\u{a953}', '\u{a953}'),
- ('\u{a9c0}', '\u{a9c0}'),
- ('\u{aaf6}', '\u{aaf6}'),
- ('\u{abed}', '\u{abed}'),
- ('\u{10a3f}', '\u{10a3f}'),
- ('\u{11046}', '\u{11046}'),
- ('\u{11070}', '\u{11070}'),
- ('\u{1107f}', '\u{1107f}'),
- ('\u{110b9}', '\u{110b9}'),
- ('\u{11133}', '\u{11134}'),
- ('\u{111c0}', '\u{111c0}'),
- ('\u{11235}', '\u{11235}'),
- ('\u{112ea}', '\u{112ea}'),
- ('\u{1134d}', '\u{1134d}'),
- ('\u{113ce}', '\u{113d0}'),
- ('\u{11442}', '\u{11442}'),
- ('\u{114c2}', '\u{114c2}'),
- ('\u{115bf}', '\u{115bf}'),
- ('\u{1163f}', '\u{1163f}'),
- ('\u{116b6}', '\u{116b6}'),
- ('\u{1172b}', '\u{1172b}'),
- ('\u{11839}', '\u{11839}'),
- ('\u{1193d}', '\u{1193e}'),
- ('\u{119e0}', '\u{119e0}'),
- ('\u{11a34}', '\u{11a34}'),
- ('\u{11a47}', '\u{11a47}'),
- ('\u{11a99}', '\u{11a99}'),
- ('\u{11c3f}', '\u{11c3f}'),
- ('\u{11d44}', '\u{11d45}'),
- ('\u{11d97}', '\u{11d97}'),
- ('\u{11f41}', '\u{11f42}'),
- ('\u{1612f}', '\u{1612f}'),
-];
-
-pub const HEX_DIGIT: &'static [(char, char)] = &[
- ('0', '9'),
- ('A', 'F'),
- ('a', 'f'),
- ('0', '9'),
- ('A', 'F'),
- ('a', 'f'),
-];
-
-pub const HYPHEN: &'static [(char, char)] = &[
- ('-', '-'),
- ('\u{ad}', '\u{ad}'),
- ('֊', '֊'),
- ('᠆', '᠆'),
- ('‐', '‑'),
- ('⸗', '⸗'),
- ('・', '・'),
- ('﹣', '﹣'),
- ('-', '-'),
- ('・', '・'),
-];
-
-pub const IDS_BINARY_OPERATOR: &'static [(char, char)] =
- &[('⿰', '⿱'), ('⿴', '⿽'), ('㇯', '㇯')];
-
-pub const IDS_TRINARY_OPERATOR: &'static [(char, char)] = &[('⿲', '⿳')];
-
-pub const IDS_UNARY_OPERATOR: &'static [(char, char)] = &[('⿾', '⿿')];
-
-pub const ID_COMPAT_MATH_CONTINUE: &'static [(char, char)] = &[
- ('²', '³'),
- ('¹', '¹'),
- ('⁰', '⁰'),
- ('⁴', '⁾'),
- ('₀', '₎'),
- ('∂', '∂'),
- ('∇', '∇'),
- ('∞', '∞'),
- ('𝛁', '𝛁'),
- ('𝛛', '𝛛'),
- ('𝛻', '𝛻'),
- ('𝜕', '𝜕'),
- ('𝜵', '𝜵'),
- ('𝝏', '𝝏'),
- ('𝝯', '𝝯'),
- ('𝞉', '𝞉'),
- ('𝞩', '𝞩'),
- ('𝟃', '𝟃'),
-];
-
-pub const ID_COMPAT_MATH_START: &'static [(char, char)] = &[
- ('∂', '∂'),
- ('∇', '∇'),
- ('∞', '∞'),
- ('𝛁', '𝛁'),
- ('𝛛', '𝛛'),
- ('𝛻', '𝛻'),
- ('𝜕', '𝜕'),
- ('𝜵', '𝜵'),
- ('𝝏', '𝝏'),
- ('𝝯', '𝝯'),
- ('𝞉', '𝞉'),
- ('𝞩', '𝞩'),
- ('𝟃', '𝟃'),
-];
-
-pub const ID_CONTINUE: &'static [(char, char)] = &[
- ('0', '9'),
- ('A', 'Z'),
- ('_', '_'),
- ('a', 'z'),
- ('ª', 'ª'),
- ('µ', 'µ'),
- ('·', '·'),
- ('º', 'º'),
- ('À', 'Ö'),
- ('Ø', 'ö'),
- ('ø', 'ˁ'),
- ('ˆ', 'ˑ'),
- ('ˠ', 'ˤ'),
- ('ˬ', 'ˬ'),
- ('ˮ', 'ˮ'),
- ('\u{300}', 'ʹ'),
- ('Ͷ', 'ͷ'),
- ('ͺ', 'ͽ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ϵ'),
- ('Ϸ', 'ҁ'),
- ('\u{483}', '\u{487}'),
- ('Ҋ', 'ԯ'),
- ('Ա', 'Ֆ'),
- ('ՙ', 'ՙ'),
- ('ՠ', 'ֈ'),
- ('\u{591}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c5}'),
- ('\u{5c7}', '\u{5c7}'),
- ('א', 'ת'),
- ('ׯ', 'ײ'),
- ('\u{610}', '\u{61a}'),
- ('ؠ', '٩'),
- ('ٮ', 'ۓ'),
- ('ە', '\u{6dc}'),
- ('\u{6df}', '\u{6e8}'),
- ('\u{6ea}', 'ۼ'),
- ('ۿ', 'ۿ'),
- ('ܐ', '\u{74a}'),
- ('ݍ', 'ޱ'),
- ('߀', 'ߵ'),
- ('ߺ', 'ߺ'),
- ('\u{7fd}', '\u{7fd}'),
- ('ࠀ', '\u{82d}'),
- ('ࡀ', '\u{85b}'),
- ('ࡠ', 'ࡪ'),
- ('ࡰ', 'ࢇ'),
- ('ࢉ', 'ࢎ'),
- ('\u{897}', '\u{8e1}'),
- ('\u{8e3}', '\u{963}'),
- ('०', '९'),
- ('ॱ', 'ঃ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('\u{9bc}', '\u{9c4}'),
- ('ে', 'ৈ'),
- ('ো', 'ৎ'),
- ('\u{9d7}', '\u{9d7}'),
- ('ড়', 'ঢ়'),
- ('য়', '\u{9e3}'),
- ('০', 'ৱ'),
- ('ৼ', 'ৼ'),
- ('\u{9fe}', '\u{9fe}'),
- ('\u{a01}', 'ਃ'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('\u{a3c}', '\u{a3c}'),
- ('ਾ', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('\u{a51}', '\u{a51}'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('੦', '\u{a75}'),
- ('\u{a81}', 'ઃ'),
- ('અ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('\u{abc}', '\u{ac5}'),
- ('\u{ac7}', 'ૉ'),
- ('ો', '\u{acd}'),
- ('ૐ', 'ૐ'),
- ('ૠ', '\u{ae3}'),
- ('૦', '૯'),
- ('ૹ', '\u{aff}'),
- ('\u{b01}', 'ଃ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('\u{b3c}', '\u{b44}'),
- ('େ', 'ୈ'),
- ('ୋ', '\u{b4d}'),
- ('\u{b55}', '\u{b57}'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', '\u{b63}'),
- ('୦', '୯'),
- ('ୱ', 'ୱ'),
- ('\u{b82}', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'ஹ'),
- ('\u{bbe}', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', '\u{bcd}'),
- ('ௐ', 'ௐ'),
- ('\u{bd7}', '\u{bd7}'),
- ('௦', '௯'),
- ('\u{c00}', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'హ'),
- ('\u{c3c}', 'ౄ'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('ౘ', 'ౚ'),
- ('ౝ', 'ౝ'),
- ('ౠ', '\u{c63}'),
- ('౦', '౯'),
- ('ಀ', 'ಃ'),
- ('ಅ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('\u{cbc}', 'ೄ'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccd}'),
- ('\u{cd5}', '\u{cd6}'),
- ('ೝ', 'ೞ'),
- ('ೠ', '\u{ce3}'),
- ('೦', '೯'),
- ('ೱ', 'ೳ'),
- ('\u{d00}', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', '\u{d44}'),
- ('െ', 'ൈ'),
- ('ൊ', 'ൎ'),
- ('ൔ', '\u{d57}'),
- ('ൟ', '\u{d63}'),
- ('൦', '൯'),
- ('ൺ', 'ൿ'),
- ('\u{d81}', 'ඃ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dcf}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('ෘ', '\u{ddf}'),
- ('෦', '෯'),
- ('ෲ', 'ෳ'),
- ('ก', '\u{e3a}'),
- ('เ', '\u{e4e}'),
- ('๐', '๙'),
- ('ກ', 'ຂ'),
- ('ຄ', 'ຄ'),
- ('ຆ', 'ຊ'),
- ('ຌ', 'ຣ'),
- ('ລ', 'ລ'),
- ('ວ', 'ຽ'),
- ('ເ', 'ໄ'),
- ('ໆ', 'ໆ'),
- ('\u{ec8}', '\u{ece}'),
- ('໐', '໙'),
- ('ໜ', 'ໟ'),
- ('ༀ', 'ༀ'),
- ('\u{f18}', '\u{f19}'),
- ('༠', '༩'),
- ('\u{f35}', '\u{f35}'),
- ('\u{f37}', '\u{f37}'),
- ('\u{f39}', '\u{f39}'),
- ('༾', 'ཇ'),
- ('ཉ', 'ཬ'),
- ('\u{f71}', '\u{f84}'),
- ('\u{f86}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('\u{fc6}', '\u{fc6}'),
- ('က', '၉'),
- ('ၐ', '\u{109d}'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ჺ'),
- ('ჼ', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዖ'),
- ('ዘ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ፚ'),
- ('\u{135d}', '\u{135f}'),
- ('፩', '፱'),
- ('ᎀ', 'ᎏ'),
- ('Ꭰ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('ᐁ', 'ᙬ'),
- ('ᙯ', 'ᙿ'),
- ('ᚁ', 'ᚚ'),
- ('ᚠ', 'ᛪ'),
- ('ᛮ', 'ᛸ'),
- ('ᜀ', '\u{1715}'),
- ('ᜟ', '\u{1734}'),
- ('ᝀ', '\u{1753}'),
- ('ᝠ', 'ᝬ'),
- ('ᝮ', 'ᝰ'),
- ('\u{1772}', '\u{1773}'),
- ('ក', '\u{17d3}'),
- ('ៗ', 'ៗ'),
- ('ៜ', '\u{17dd}'),
- ('០', '៩'),
- ('\u{180b}', '\u{180d}'),
- ('\u{180f}', '᠙'),
- ('ᠠ', 'ᡸ'),
- ('ᢀ', 'ᢪ'),
- ('ᢰ', 'ᣵ'),
- ('ᤀ', 'ᤞ'),
- ('\u{1920}', 'ᤫ'),
- ('ᤰ', '\u{193b}'),
- ('᥆', 'ᥭ'),
- ('ᥰ', 'ᥴ'),
- ('ᦀ', 'ᦫ'),
- ('ᦰ', 'ᧉ'),
- ('᧐', '᧚'),
- ('ᨀ', '\u{1a1b}'),
- ('ᨠ', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a7c}'),
- ('\u{1a7f}', '᪉'),
- ('᪐', '᪙'),
- ('ᪧ', 'ᪧ'),
- ('\u{1ab0}', '\u{1abd}'),
- ('\u{1abf}', '\u{1ace}'),
- ('\u{1b00}', 'ᭌ'),
- ('᭐', '᭙'),
- ('\u{1b6b}', '\u{1b73}'),
- ('\u{1b80}', '\u{1bf3}'),
- ('ᰀ', '\u{1c37}'),
- ('᱀', '᱉'),
- ('ᱍ', 'ᱽ'),
- ('ᲀ', 'ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('\u{1cd0}', '\u{1cd2}'),
- ('\u{1cd4}', 'ᳺ'),
- ('ᴀ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ᾼ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῌ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('ῠ', 'Ῥ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῼ'),
- ('\u{200c}', '\u{200d}'),
- ('‿', '⁀'),
- ('⁔', '⁔'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('\u{20d0}', '\u{20dc}'),
- ('\u{20e1}', '\u{20e1}'),
- ('\u{20e5}', '\u{20f0}'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℊ', 'ℓ'),
- ('ℕ', 'ℕ'),
- ('℘', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('Ω', 'Ω'),
- ('ℨ', 'ℨ'),
- ('K', 'ℹ'),
- ('ℼ', 'ℿ'),
- ('ⅅ', 'ⅉ'),
- ('ⅎ', 'ⅎ'),
- ('Ⅰ', 'ↈ'),
- ('Ⰰ', 'ⳤ'),
- ('Ⳬ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ⴰ', 'ⵧ'),
- ('ⵯ', 'ⵯ'),
- ('\u{2d7f}', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('\u{2de0}', '\u{2dff}'),
- ('々', '〇'),
- ('〡', '\u{302f}'),
- ('〱', '〵'),
- ('〸', '〼'),
- ('ぁ', 'ゖ'),
- ('\u{3099}', 'ゟ'),
- ('ァ', 'ヿ'),
- ('ㄅ', 'ㄯ'),
- ('ㄱ', 'ㆎ'),
- ('ㆠ', 'ㆿ'),
- ('ㇰ', 'ㇿ'),
- ('㐀', '䶿'),
- ('一', 'ꒌ'),
- ('ꓐ', 'ꓽ'),
- ('ꔀ', 'ꘌ'),
- ('ꘐ', 'ꘫ'),
- ('Ꙁ', '\u{a66f}'),
- ('\u{a674}', '\u{a67d}'),
- ('ꙿ', '\u{a6f1}'),
- ('ꜗ', 'ꜟ'),
- ('Ꜣ', 'ꞈ'),
- ('Ꞌ', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'Ƛ'),
- ('ꟲ', 'ꠧ'),
- ('\u{a82c}', '\u{a82c}'),
- ('ꡀ', 'ꡳ'),
- ('ꢀ', '\u{a8c5}'),
- ('꣐', '꣙'),
- ('\u{a8e0}', 'ꣷ'),
- ('ꣻ', 'ꣻ'),
- ('ꣽ', '\u{a92d}'),
- ('ꤰ', '\u{a953}'),
- ('ꥠ', 'ꥼ'),
- ('\u{a980}', '\u{a9c0}'),
- ('ꧏ', '꧙'),
- ('ꧠ', 'ꧾ'),
- ('ꨀ', '\u{aa36}'),
- ('ꩀ', 'ꩍ'),
- ('꩐', '꩙'),
- ('ꩠ', 'ꩶ'),
- ('ꩺ', 'ꫂ'),
- ('ꫛ', 'ꫝ'),
- ('ꫠ', 'ꫯ'),
- ('ꫲ', '\u{aaf6}'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('ꬰ', 'ꭚ'),
- ('ꭜ', 'ꭩ'),
- ('ꭰ', 'ꯪ'),
- ('꯬', '\u{abed}'),
- ('꯰', '꯹'),
- ('가', '힣'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('豈', '舘'),
- ('並', '龎'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('יִ', 'ﬨ'),
- ('שׁ', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', 'ﮱ'),
- ('ﯓ', 'ﴽ'),
- ('ﵐ', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('ﷰ', 'ﷻ'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{fe20}', '\u{fe2f}'),
- ('︳', '︴'),
- ('﹍', '﹏'),
- ('ﹰ', 'ﹴ'),
- ('ﹶ', 'ﻼ'),
- ('0', '9'),
- ('A', 'Z'),
- ('_', '_'),
- ('a', 'z'),
- ('・', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
- ('𐅀', '𐅴'),
- ('\u{101fd}', '\u{101fd}'),
- ('𐊀', '𐊜'),
- ('𐊠', '𐋐'),
- ('\u{102e0}', '\u{102e0}'),
- ('𐌀', '𐌟'),
- ('𐌭', '𐍊'),
- ('𐍐', '\u{1037a}'),
- ('𐎀', '𐎝'),
- ('𐎠', '𐏃'),
- ('𐏈', '𐏏'),
- ('𐏑', '𐏕'),
- ('𐐀', '𐒝'),
- ('𐒠', '𐒩'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
- ('𐔀', '𐔧'),
- ('𐔰', '𐕣'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐗀', '𐗳'),
- ('𐘀', '𐜶'),
- ('𐝀', '𐝕'),
- ('𐝠', '𐝧'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐠀', '𐠅'),
- ('𐠈', '𐠈'),
- ('𐠊', '𐠵'),
- ('𐠷', '𐠸'),
- ('𐠼', '𐠼'),
- ('𐠿', '𐡕'),
- ('𐡠', '𐡶'),
- ('𐢀', '𐢞'),
- ('𐣠', '𐣲'),
- ('𐣴', '𐣵'),
- ('𐤀', '𐤕'),
- ('𐤠', '𐤹'),
- ('𐦀', '𐦷'),
- ('𐦾', '𐦿'),
- ('𐨀', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨵'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '\u{10a3f}'),
- ('𐩠', '𐩼'),
- ('𐪀', '𐪜'),
- ('𐫀', '𐫇'),
- ('𐫉', '\u{10ae6}'),
- ('𐬀', '𐬵'),
- ('𐭀', '𐭕'),
- ('𐭠', '𐭲'),
- ('𐮀', '𐮑'),
- ('𐰀', '𐱈'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐴀', '\u{10d27}'),
- ('𐴰', '𐴹'),
- ('𐵀', '𐵥'),
- ('\u{10d69}', '\u{10d6d}'),
- ('𐵯', '𐶅'),
- ('𐺀', '𐺩'),
- ('\u{10eab}', '\u{10eac}'),
- ('𐺰', '𐺱'),
- ('𐻂', '𐻄'),
- ('\u{10efc}', '𐼜'),
- ('𐼧', '𐼧'),
- ('𐼰', '\u{10f50}'),
- ('𐽰', '\u{10f85}'),
- ('𐾰', '𐿄'),
- ('𐿠', '𐿶'),
- ('𑀀', '\u{11046}'),
- ('𑁦', '𑁵'),
- ('\u{1107f}', '\u{110ba}'),
- ('\u{110c2}', '\u{110c2}'),
- ('𑃐', '𑃨'),
- ('𑃰', '𑃹'),
- ('\u{11100}', '\u{11134}'),
- ('𑄶', '𑄿'),
- ('𑅄', '𑅇'),
- ('𑅐', '\u{11173}'),
- ('𑅶', '𑅶'),
- ('\u{11180}', '𑇄'),
- ('\u{111c9}', '\u{111cc}'),
- ('𑇎', '𑇚'),
- ('𑇜', '𑇜'),
- ('𑈀', '𑈑'),
- ('𑈓', '\u{11237}'),
- ('\u{1123e}', '\u{11241}'),
- ('𑊀', '𑊆'),
- ('𑊈', '𑊈'),
- ('𑊊', '𑊍'),
- ('𑊏', '𑊝'),
- ('𑊟', '𑊨'),
- ('𑊰', '\u{112ea}'),
- ('𑋰', '𑋹'),
- ('\u{11300}', '𑌃'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('\u{1133b}', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '\u{1134d}'),
- ('𑍐', '𑍐'),
- ('\u{11357}', '\u{11357}'),
- ('𑍝', '𑍣'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '𑏊'),
- ('𑏌', '𑏓'),
- ('\u{113e1}', '\u{113e2}'),
- ('𑐀', '𑑊'),
- ('𑑐', '𑑙'),
- ('\u{1145e}', '𑑡'),
- ('𑒀', '𑓅'),
- ('𑓇', '𑓇'),
- ('𑓐', '𑓙'),
- ('𑖀', '\u{115b5}'),
- ('𑖸', '\u{115c0}'),
- ('𑗘', '\u{115dd}'),
- ('𑘀', '\u{11640}'),
- ('𑙄', '𑙄'),
- ('𑙐', '𑙙'),
- ('𑚀', '𑚸'),
- ('𑛀', '𑛉'),
- ('𑛐', '𑛣'),
- ('𑜀', '𑜚'),
- ('\u{1171d}', '\u{1172b}'),
- ('𑜰', '𑜹'),
- ('𑝀', '𑝆'),
- ('𑠀', '\u{1183a}'),
- ('𑢠', '𑣩'),
- ('𑣿', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤵'),
- ('𑤷', '𑤸'),
- ('\u{1193b}', '\u{11943}'),
- ('𑥐', '𑥙'),
- ('𑦠', '𑦧'),
- ('𑦪', '\u{119d7}'),
- ('\u{119da}', '𑧡'),
- ('𑧣', '𑧤'),
- ('𑨀', '\u{11a3e}'),
- ('\u{11a47}', '\u{11a47}'),
- ('𑩐', '\u{11a99}'),
- ('𑪝', '𑪝'),
- ('𑪰', '𑫸'),
- ('𑯀', '𑯠'),
- ('𑯰', '𑯹'),
- ('𑰀', '𑰈'),
- ('𑰊', '\u{11c36}'),
- ('\u{11c38}', '𑱀'),
- ('𑱐', '𑱙'),
- ('𑱲', '𑲏'),
- ('\u{11c92}', '\u{11ca7}'),
- ('𑲩', '\u{11cb6}'),
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d47}'),
- ('𑵐', '𑵙'),
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶎'),
- ('\u{11d90}', '\u{11d91}'),
- ('𑶓', '𑶘'),
- ('𑶠', '𑶩'),
- ('𑻠', '𑻶'),
- ('\u{11f00}', '𑼐'),
- ('𑼒', '\u{11f3a}'),
- ('𑼾', '\u{11f42}'),
- ('𑽐', '\u{11f5a}'),
- ('𑾰', '𑾰'),
- ('𒀀', '𒎙'),
- ('𒐀', '𒑮'),
- ('𒒀', '𒕃'),
- ('𒾐', '𒿰'),
- ('𓀀', '𓐯'),
- ('\u{13440}', '\u{13455}'),
- ('𓑠', '𔏺'),
- ('𔐀', '𔙆'),
- ('𖄀', '𖄹'),
- ('𖠀', '𖨸'),
- ('𖩀', '𖩞'),
- ('𖩠', '𖩩'),
- ('𖩰', '𖪾'),
- ('𖫀', '𖫉'),
- ('𖫐', '𖫭'),
- ('\u{16af0}', '\u{16af4}'),
- ('𖬀', '\u{16b36}'),
- ('𖭀', '𖭃'),
- ('𖭐', '𖭙'),
- ('𖭣', '𖭷'),
- ('𖭽', '𖮏'),
- ('𖵀', '𖵬'),
- ('𖵰', '𖵹'),
- ('𖹀', '𖹿'),
- ('𖼀', '𖽊'),
- ('\u{16f4f}', '𖾇'),
- ('\u{16f8f}', '𖾟'),
- ('𖿠', '𖿡'),
- ('𖿣', '\u{16fe4}'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('𗀀', '𘟷'),
- ('𘠀', '𘳕'),
- ('𘳿', '𘴈'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𛀀', '𛄢'),
- ('𛄲', '𛄲'),
- ('𛅐', '𛅒'),
- ('𛅕', '𛅕'),
- ('𛅤', '𛅧'),
- ('𛅰', '𛋻'),
- ('𛰀', '𛱪'),
- ('𛱰', '𛱼'),
- ('𛲀', '𛲈'),
- ('𛲐', '𛲙'),
- ('\u{1bc9d}', '\u{1bc9e}'),
- ('𜳰', '𜳹'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d165}', '\u{1d169}'),
- ('\u{1d16d}', '\u{1d172}'),
- ('\u{1d17b}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('\u{1d242}', '\u{1d244}'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝛀'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛺'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜴'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝮'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞨'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟋'),
- ('𝟎', '𝟿'),
- ('\u{1da00}', '\u{1da36}'),
- ('\u{1da3b}', '\u{1da6c}'),
- ('\u{1da75}', '\u{1da75}'),
- ('\u{1da84}', '\u{1da84}'),
- ('\u{1da9b}', '\u{1da9f}'),
- ('\u{1daa1}', '\u{1daaf}'),
- ('𝼀', '𝼞'),
- ('𝼥', '𝼪'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('𞀰', '𞁭'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('𞄀', '𞄬'),
- ('\u{1e130}', '𞄽'),
- ('𞅀', '𞅉'),
- ('𞅎', '𞅎'),
- ('𞊐', '\u{1e2ae}'),
- ('𞋀', '𞋹'),
- ('𞓐', '𞓹'),
- ('𞗐', '𞗺'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
- ('𞠀', '𞣄'),
- ('\u{1e8d0}', '\u{1e8d6}'),
- ('𞤀', '𞥋'),
- ('𞥐', '𞥙'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('🯰', '🯹'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('丽', '𪘀'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const ID_START: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('a', 'z'),
- ('ª', 'ª'),
- ('µ', 'µ'),
- ('º', 'º'),
- ('À', 'Ö'),
- ('Ø', 'ö'),
- ('ø', 'ˁ'),
- ('ˆ', 'ˑ'),
- ('ˠ', 'ˤ'),
- ('ˬ', 'ˬ'),
- ('ˮ', 'ˮ'),
- ('Ͱ', 'ʹ'),
- ('Ͷ', 'ͷ'),
- ('ͺ', 'ͽ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ϵ'),
- ('Ϸ', 'ҁ'),
- ('Ҋ', 'ԯ'),
- ('Ա', 'Ֆ'),
- ('ՙ', 'ՙ'),
- ('ՠ', 'ֈ'),
- ('א', 'ת'),
- ('ׯ', 'ײ'),
- ('ؠ', 'ي'),
- ('ٮ', 'ٯ'),
- ('ٱ', 'ۓ'),
- ('ە', 'ە'),
- ('ۥ', 'ۦ'),
- ('ۮ', 'ۯ'),
- ('ۺ', 'ۼ'),
- ('ۿ', 'ۿ'),
- ('ܐ', 'ܐ'),
- ('ܒ', 'ܯ'),
- ('ݍ', 'ޥ'),
- ('ޱ', 'ޱ'),
- ('ߊ', 'ߪ'),
- ('ߴ', 'ߵ'),
- ('ߺ', 'ߺ'),
- ('ࠀ', 'ࠕ'),
- ('ࠚ', 'ࠚ'),
- ('ࠤ', 'ࠤ'),
- ('ࠨ', 'ࠨ'),
- ('ࡀ', 'ࡘ'),
- ('ࡠ', 'ࡪ'),
- ('ࡰ', 'ࢇ'),
- ('ࢉ', 'ࢎ'),
- ('ࢠ', 'ࣉ'),
- ('ऄ', 'ह'),
- ('ऽ', 'ऽ'),
- ('ॐ', 'ॐ'),
- ('क़', 'ॡ'),
- ('ॱ', 'ঀ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('ঽ', 'ঽ'),
- ('ৎ', 'ৎ'),
- ('ড়', 'ঢ়'),
- ('য়', 'ৡ'),
- ('ৰ', 'ৱ'),
- ('ৼ', 'ৼ'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('ੲ', 'ੴ'),
- ('અ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('ઽ', 'ઽ'),
- ('ૐ', 'ૐ'),
- ('ૠ', 'ૡ'),
- ('ૹ', 'ૹ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('ଽ', 'ଽ'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', 'ୡ'),
- ('ୱ', 'ୱ'),
- ('ஃ', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'ஹ'),
- ('ௐ', 'ௐ'),
- ('అ', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'హ'),
- ('ఽ', 'ఽ'),
- ('ౘ', 'ౚ'),
- ('ౝ', 'ౝ'),
- ('ౠ', 'ౡ'),
- ('ಀ', 'ಀ'),
- ('ಅ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('ಽ', 'ಽ'),
- ('ೝ', 'ೞ'),
- ('ೠ', 'ೡ'),
- ('ೱ', 'ೲ'),
- ('ഄ', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', 'ഺ'),
- ('ഽ', 'ഽ'),
- ('ൎ', 'ൎ'),
- ('ൔ', 'ൖ'),
- ('ൟ', 'ൡ'),
- ('ൺ', 'ൿ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('ก', 'ะ'),
- ('า', 'ำ'),
- ('เ', 'ๆ'),
- ('ກ', 'ຂ'),
- ('ຄ', 'ຄ'),
- ('ຆ', 'ຊ'),
- ('ຌ', 'ຣ'),
- ('ລ', 'ລ'),
- ('ວ', 'ະ'),
- ('າ', 'ຳ'),
- ('ຽ', 'ຽ'),
- ('ເ', 'ໄ'),
- ('ໆ', 'ໆ'),
- ('ໜ', 'ໟ'),
- ('ༀ', 'ༀ'),
- ('ཀ', 'ཇ'),
- ('ཉ', 'ཬ'),
- ('ྈ', 'ྌ'),
- ('က', 'ဪ'),
- ('ဿ', 'ဿ'),
- ('ၐ', 'ၕ'),
- ('ၚ', 'ၝ'),
- ('ၡ', 'ၡ'),
- ('ၥ', 'ၦ'),
- ('ၮ', 'ၰ'),
- ('ၵ', 'ႁ'),
- ('ႎ', 'ႎ'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ჺ'),
- ('ჼ', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዖ'),
- ('ዘ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ፚ'),
- ('ᎀ', 'ᎏ'),
- ('Ꭰ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('ᐁ', 'ᙬ'),
- ('ᙯ', 'ᙿ'),
- ('ᚁ', 'ᚚ'),
- ('ᚠ', 'ᛪ'),
- ('ᛮ', 'ᛸ'),
- ('ᜀ', 'ᜑ'),
- ('ᜟ', 'ᜱ'),
- ('ᝀ', 'ᝑ'),
- ('ᝠ', 'ᝬ'),
- ('ᝮ', 'ᝰ'),
- ('ក', 'ឳ'),
- ('ៗ', 'ៗ'),
- ('ៜ', 'ៜ'),
- ('ᠠ', 'ᡸ'),
- ('ᢀ', 'ᢨ'),
- ('ᢪ', 'ᢪ'),
- ('ᢰ', 'ᣵ'),
- ('ᤀ', 'ᤞ'),
- ('ᥐ', 'ᥭ'),
- ('ᥰ', 'ᥴ'),
- ('ᦀ', 'ᦫ'),
- ('ᦰ', 'ᧉ'),
- ('ᨀ', 'ᨖ'),
- ('ᨠ', 'ᩔ'),
- ('ᪧ', 'ᪧ'),
- ('ᬅ', 'ᬳ'),
- ('ᭅ', 'ᭌ'),
- ('ᮃ', 'ᮠ'),
- ('ᮮ', 'ᮯ'),
- ('ᮺ', 'ᯥ'),
- ('ᰀ', 'ᰣ'),
- ('ᱍ', 'ᱏ'),
- ('ᱚ', 'ᱽ'),
- ('ᲀ', 'ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('ᳩ', 'ᳬ'),
- ('ᳮ', 'ᳳ'),
- ('ᳵ', 'ᳶ'),
- ('ᳺ', 'ᳺ'),
- ('ᴀ', 'ᶿ'),
- ('Ḁ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ᾼ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῌ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('ῠ', 'Ῥ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῼ'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℊ', 'ℓ'),
- ('ℕ', 'ℕ'),
- ('℘', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('Ω', 'Ω'),
- ('ℨ', 'ℨ'),
- ('K', 'ℹ'),
- ('ℼ', 'ℿ'),
- ('ⅅ', 'ⅉ'),
- ('ⅎ', 'ⅎ'),
- ('Ⅰ', 'ↈ'),
- ('Ⰰ', 'ⳤ'),
- ('Ⳬ', 'ⳮ'),
- ('Ⳳ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ⴰ', 'ⵧ'),
- ('ⵯ', 'ⵯ'),
- ('ⶀ', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('々', '〇'),
- ('〡', '〩'),
- ('〱', '〵'),
- ('〸', '〼'),
- ('ぁ', 'ゖ'),
- ('゛', 'ゟ'),
- ('ァ', 'ヺ'),
- ('ー', 'ヿ'),
- ('ㄅ', 'ㄯ'),
- ('ㄱ', 'ㆎ'),
- ('ㆠ', 'ㆿ'),
- ('ㇰ', 'ㇿ'),
- ('㐀', '䶿'),
- ('一', 'ꒌ'),
- ('ꓐ', 'ꓽ'),
- ('ꔀ', 'ꘌ'),
- ('ꘐ', 'ꘟ'),
- ('ꘪ', 'ꘫ'),
- ('Ꙁ', 'ꙮ'),
- ('ꙿ', 'ꚝ'),
- ('ꚠ', 'ꛯ'),
- ('ꜗ', 'ꜟ'),
- ('Ꜣ', 'ꞈ'),
- ('Ꞌ', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'Ƛ'),
- ('ꟲ', 'ꠁ'),
- ('ꠃ', 'ꠅ'),
- ('ꠇ', 'ꠊ'),
- ('ꠌ', 'ꠢ'),
- ('ꡀ', 'ꡳ'),
- ('ꢂ', 'ꢳ'),
- ('ꣲ', 'ꣷ'),
- ('ꣻ', 'ꣻ'),
- ('ꣽ', 'ꣾ'),
- ('ꤊ', 'ꤥ'),
- ('ꤰ', 'ꥆ'),
- ('ꥠ', 'ꥼ'),
- ('ꦄ', 'ꦲ'),
- ('ꧏ', 'ꧏ'),
- ('ꧠ', 'ꧤ'),
- ('ꧦ', 'ꧯ'),
- ('ꧺ', 'ꧾ'),
- ('ꨀ', 'ꨨ'),
- ('ꩀ', 'ꩂ'),
- ('ꩄ', 'ꩋ'),
- ('ꩠ', 'ꩶ'),
- ('ꩺ', 'ꩺ'),
- ('ꩾ', 'ꪯ'),
- ('ꪱ', 'ꪱ'),
- ('ꪵ', 'ꪶ'),
- ('ꪹ', 'ꪽ'),
- ('ꫀ', 'ꫀ'),
- ('ꫂ', 'ꫂ'),
- ('ꫛ', 'ꫝ'),
- ('ꫠ', 'ꫪ'),
- ('ꫲ', 'ꫴ'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('ꬰ', 'ꭚ'),
- ('ꭜ', 'ꭩ'),
- ('ꭰ', 'ꯢ'),
- ('가', '힣'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('豈', '舘'),
- ('並', '龎'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('יִ', 'יִ'),
- ('ײַ', 'ﬨ'),
- ('שׁ', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', 'ﮱ'),
- ('ﯓ', 'ﴽ'),
- ('ﵐ', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('ﷰ', 'ﷻ'),
- ('ﹰ', 'ﹴ'),
- ('ﹶ', 'ﻼ'),
- ('A', 'Z'),
- ('a', 'z'),
- ('ヲ', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
- ('𐅀', '𐅴'),
- ('𐊀', '𐊜'),
- ('𐊠', '𐋐'),
- ('𐌀', '𐌟'),
- ('𐌭', '𐍊'),
- ('𐍐', '𐍵'),
- ('𐎀', '𐎝'),
- ('𐎠', '𐏃'),
- ('𐏈', '𐏏'),
- ('𐏑', '𐏕'),
- ('𐐀', '𐒝'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
- ('𐔀', '𐔧'),
- ('𐔰', '𐕣'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐗀', '𐗳'),
- ('𐘀', '𐜶'),
- ('𐝀', '𐝕'),
- ('𐝠', '𐝧'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐠀', '𐠅'),
- ('𐠈', '𐠈'),
- ('𐠊', '𐠵'),
- ('𐠷', '𐠸'),
- ('𐠼', '𐠼'),
- ('𐠿', '𐡕'),
- ('𐡠', '𐡶'),
- ('𐢀', '𐢞'),
- ('𐣠', '𐣲'),
- ('𐣴', '𐣵'),
- ('𐤀', '𐤕'),
- ('𐤠', '𐤹'),
- ('𐦀', '𐦷'),
- ('𐦾', '𐦿'),
- ('𐨀', '𐨀'),
- ('𐨐', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨵'),
- ('𐩠', '𐩼'),
- ('𐪀', '𐪜'),
- ('𐫀', '𐫇'),
- ('𐫉', '𐫤'),
- ('𐬀', '𐬵'),
- ('𐭀', '𐭕'),
- ('𐭠', '𐭲'),
- ('𐮀', '𐮑'),
- ('𐰀', '𐱈'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐴀', '𐴣'),
- ('𐵊', '𐵥'),
- ('𐵯', '𐶅'),
- ('𐺀', '𐺩'),
- ('𐺰', '𐺱'),
- ('𐻂', '𐻄'),
- ('𐼀', '𐼜'),
- ('𐼧', '𐼧'),
- ('𐼰', '𐽅'),
- ('𐽰', '𐾁'),
- ('𐾰', '𐿄'),
- ('𐿠', '𐿶'),
- ('𑀃', '𑀷'),
- ('𑁱', '𑁲'),
- ('𑁵', '𑁵'),
- ('𑂃', '𑂯'),
- ('𑃐', '𑃨'),
- ('𑄃', '𑄦'),
- ('𑅄', '𑅄'),
- ('𑅇', '𑅇'),
- ('𑅐', '𑅲'),
- ('𑅶', '𑅶'),
- ('𑆃', '𑆲'),
- ('𑇁', '𑇄'),
- ('𑇚', '𑇚'),
- ('𑇜', '𑇜'),
- ('𑈀', '𑈑'),
- ('𑈓', '𑈫'),
- ('𑈿', '𑉀'),
- ('𑊀', '𑊆'),
- ('𑊈', '𑊈'),
- ('𑊊', '𑊍'),
- ('𑊏', '𑊝'),
- ('𑊟', '𑊨'),
- ('𑊰', '𑋞'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('𑌽', '𑌽'),
- ('𑍐', '𑍐'),
- ('𑍝', '𑍡'),
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '𑎷'),
- ('𑏑', '𑏑'),
- ('𑏓', '𑏓'),
- ('𑐀', '𑐴'),
- ('𑑇', '𑑊'),
- ('𑑟', '𑑡'),
- ('𑒀', '𑒯'),
- ('𑓄', '𑓅'),
- ('𑓇', '𑓇'),
- ('𑖀', '𑖮'),
- ('𑗘', '𑗛'),
- ('𑘀', '𑘯'),
- ('𑙄', '𑙄'),
- ('𑚀', '𑚪'),
- ('𑚸', '𑚸'),
- ('𑜀', '𑜚'),
- ('𑝀', '𑝆'),
- ('𑠀', '𑠫'),
- ('𑢠', '𑣟'),
- ('𑣿', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤯'),
- ('𑤿', '𑤿'),
- ('𑥁', '𑥁'),
- ('𑦠', '𑦧'),
- ('𑦪', '𑧐'),
- ('𑧡', '𑧡'),
- ('𑧣', '𑧣'),
- ('𑨀', '𑨀'),
- ('𑨋', '𑨲'),
- ('𑨺', '𑨺'),
- ('𑩐', '𑩐'),
- ('𑩜', '𑪉'),
- ('𑪝', '𑪝'),
- ('𑪰', '𑫸'),
- ('𑯀', '𑯠'),
- ('𑰀', '𑰈'),
- ('𑰊', '𑰮'),
- ('𑱀', '𑱀'),
- ('𑱲', '𑲏'),
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '𑴰'),
- ('𑵆', '𑵆'),
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶉'),
- ('𑶘', '𑶘'),
- ('𑻠', '𑻲'),
- ('𑼂', '𑼂'),
- ('𑼄', '𑼐'),
- ('𑼒', '𑼳'),
- ('𑾰', '𑾰'),
- ('𒀀', '𒎙'),
- ('𒐀', '𒑮'),
- ('𒒀', '𒕃'),
- ('𒾐', '𒿰'),
- ('𓀀', '𓐯'),
- ('𓑁', '𓑆'),
- ('𓑠', '𔏺'),
- ('𔐀', '𔙆'),
- ('𖄀', '𖄝'),
- ('𖠀', '𖨸'),
- ('𖩀', '𖩞'),
- ('𖩰', '𖪾'),
- ('𖫐', '𖫭'),
- ('𖬀', '𖬯'),
- ('𖭀', '𖭃'),
- ('𖭣', '𖭷'),
- ('𖭽', '𖮏'),
- ('𖵀', '𖵬'),
- ('𖹀', '𖹿'),
- ('𖼀', '𖽊'),
- ('𖽐', '𖽐'),
- ('𖾓', '𖾟'),
- ('𖿠', '𖿡'),
- ('𖿣', '𖿣'),
- ('𗀀', '𘟷'),
- ('𘠀', '𘳕'),
- ('𘳿', '𘴈'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𛀀', '𛄢'),
- ('𛄲', '𛄲'),
- ('𛅐', '𛅒'),
- ('𛅕', '𛅕'),
- ('𛅤', '𛅧'),
- ('𛅰', '𛋻'),
- ('𛰀', '𛱪'),
- ('𛱰', '𛱼'),
- ('𛲀', '𛲈'),
- ('𛲐', '𛲙'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝛀'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛺'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜴'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝮'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞨'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟋'),
- ('𝼀', '𝼞'),
- ('𝼥', '𝼪'),
- ('𞀰', '𞁭'),
- ('𞄀', '𞄬'),
- ('𞄷', '𞄽'),
- ('𞅎', '𞅎'),
- ('𞊐', '𞊭'),
- ('𞋀', '𞋫'),
- ('𞓐', '𞓫'),
- ('𞗐', '𞗭'),
- ('𞗰', '𞗰'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
- ('𞠀', '𞣄'),
- ('𞤀', '𞥃'),
- ('𞥋', '𞥋'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('丽', '𪘀'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
-];
-
-pub const IDEOGRAPHIC: &'static [(char, char)] = &[
- ('〆', '〇'),
- ('〡', '〩'),
- ('〸', '〺'),
- ('㐀', '䶿'),
- ('一', '鿿'),
- ('豈', '舘'),
- ('並', '龎'),
- ('\u{16fe4}', '\u{16fe4}'),
- ('𗀀', '𘟷'),
- ('𘠀', '𘳕'),
- ('𘳿', '𘴈'),
- ('𛅰', '𛋻'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('丽', '𪘀'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
-];
-
-pub const INCB: &'static [(char, char)] = &[
- ('\u{300}', '\u{36f}'),
- ('\u{483}', '\u{489}'),
- ('\u{591}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c5}'),
- ('\u{5c7}', '\u{5c7}'),
- ('\u{610}', '\u{61a}'),
- ('\u{64b}', '\u{65f}'),
- ('\u{670}', '\u{670}'),
- ('\u{6d6}', '\u{6dc}'),
- ('\u{6df}', '\u{6e4}'),
- ('\u{6e7}', '\u{6e8}'),
- ('\u{6ea}', '\u{6ed}'),
- ('\u{711}', '\u{711}'),
- ('\u{730}', '\u{74a}'),
- ('\u{7a6}', '\u{7b0}'),
- ('\u{7eb}', '\u{7f3}'),
- ('\u{7fd}', '\u{7fd}'),
- ('\u{816}', '\u{819}'),
- ('\u{81b}', '\u{823}'),
- ('\u{825}', '\u{827}'),
- ('\u{829}', '\u{82d}'),
- ('\u{859}', '\u{85b}'),
- ('\u{897}', '\u{89f}'),
- ('\u{8ca}', '\u{8e1}'),
- ('\u{8e3}', '\u{902}'),
- ('क', '\u{93a}'),
- ('\u{93c}', '\u{93c}'),
- ('\u{941}', '\u{948}'),
- ('\u{94d}', '\u{94d}'),
- ('\u{951}', 'य़'),
- ('\u{962}', '\u{963}'),
- ('ॸ', 'ॿ'),
- ('\u{981}', '\u{981}'),
- ('ক', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('\u{9bc}', '\u{9bc}'),
- ('\u{9be}', '\u{9be}'),
- ('\u{9c1}', '\u{9c4}'),
- ('\u{9cd}', '\u{9cd}'),
- ('\u{9d7}', '\u{9d7}'),
- ('ড়', 'ঢ়'),
- ('য়', 'য়'),
- ('\u{9e2}', '\u{9e3}'),
- ('ৰ', 'ৱ'),
- ('\u{9fe}', '\u{9fe}'),
- ('\u{a01}', '\u{a02}'),
- ('\u{a3c}', '\u{a3c}'),
- ('\u{a41}', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('\u{a51}', '\u{a51}'),
- ('\u{a70}', '\u{a71}'),
- ('\u{a75}', '\u{a75}'),
- ('\u{a81}', '\u{a82}'),
- ('ક', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('\u{abc}', '\u{abc}'),
- ('\u{ac1}', '\u{ac5}'),
- ('\u{ac7}', '\u{ac8}'),
- ('\u{acd}', '\u{acd}'),
- ('\u{ae2}', '\u{ae3}'),
- ('ૹ', '\u{aff}'),
- ('\u{b01}', '\u{b01}'),
- ('କ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('\u{b3c}', '\u{b3c}'),
- ('\u{b3e}', '\u{b3f}'),
- ('\u{b41}', '\u{b44}'),
- ('\u{b4d}', '\u{b4d}'),
- ('\u{b55}', '\u{b57}'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', 'ୟ'),
- ('\u{b62}', '\u{b63}'),
- ('ୱ', 'ୱ'),
- ('\u{b82}', '\u{b82}'),
- ('\u{bbe}', '\u{bbe}'),
- ('\u{bc0}', '\u{bc0}'),
- ('\u{bcd}', '\u{bcd}'),
- ('\u{bd7}', '\u{bd7}'),
- ('\u{c00}', '\u{c00}'),
- ('\u{c04}', '\u{c04}'),
- ('క', 'న'),
- ('ప', 'హ'),
- ('\u{c3c}', '\u{c3c}'),
- ('\u{c3e}', '\u{c40}'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('ౘ', 'ౚ'),
- ('\u{c62}', '\u{c63}'),
- ('\u{c81}', '\u{c81}'),
- ('\u{cbc}', '\u{cbc}'),
- ('\u{cbf}', '\u{cc0}'),
- ('\u{cc2}', '\u{cc2}'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccd}'),
- ('\u{cd5}', '\u{cd6}'),
- ('\u{ce2}', '\u{ce3}'),
- ('\u{d00}', '\u{d01}'),
- ('ക', '\u{d3c}'),
- ('\u{d3e}', '\u{d3e}'),
- ('\u{d41}', '\u{d44}'),
- ('\u{d4d}', '\u{d4d}'),
- ('\u{d57}', '\u{d57}'),
- ('\u{d62}', '\u{d63}'),
- ('\u{d81}', '\u{d81}'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dcf}', '\u{dcf}'),
- ('\u{dd2}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('\u{ddf}', '\u{ddf}'),
- ('\u{e31}', '\u{e31}'),
- ('\u{e34}', '\u{e3a}'),
- ('\u{e47}', '\u{e4e}'),
- ('\u{eb1}', '\u{eb1}'),
- ('\u{eb4}', '\u{ebc}'),
- ('\u{ec8}', '\u{ece}'),
- ('\u{f18}', '\u{f19}'),
- ('\u{f35}', '\u{f35}'),
- ('\u{f37}', '\u{f37}'),
- ('\u{f39}', '\u{f39}'),
- ('\u{f71}', '\u{f7e}'),
- ('\u{f80}', '\u{f84}'),
- ('\u{f86}', '\u{f87}'),
- ('\u{f8d}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('\u{fc6}', '\u{fc6}'),
- ('\u{102d}', '\u{1030}'),
- ('\u{1032}', '\u{1037}'),
- ('\u{1039}', '\u{103a}'),
- ('\u{103d}', '\u{103e}'),
- ('\u{1058}', '\u{1059}'),
- ('\u{105e}', '\u{1060}'),
- ('\u{1071}', '\u{1074}'),
- ('\u{1082}', '\u{1082}'),
- ('\u{1085}', '\u{1086}'),
- ('\u{108d}', '\u{108d}'),
- ('\u{109d}', '\u{109d}'),
- ('\u{135d}', '\u{135f}'),
- ('\u{1712}', '\u{1715}'),
- ('\u{1732}', '\u{1734}'),
- ('\u{1752}', '\u{1753}'),
- ('\u{1772}', '\u{1773}'),
- ('\u{17b4}', '\u{17b5}'),
- ('\u{17b7}', '\u{17bd}'),
- ('\u{17c6}', '\u{17c6}'),
- ('\u{17c9}', '\u{17d3}'),
- ('\u{17dd}', '\u{17dd}'),
- ('\u{180b}', '\u{180d}'),
- ('\u{180f}', '\u{180f}'),
- ('\u{1885}', '\u{1886}'),
- ('\u{18a9}', '\u{18a9}'),
- ('\u{1920}', '\u{1922}'),
- ('\u{1927}', '\u{1928}'),
- ('\u{1932}', '\u{1932}'),
- ('\u{1939}', '\u{193b}'),
- ('\u{1a17}', '\u{1a18}'),
- ('\u{1a1b}', '\u{1a1b}'),
- ('\u{1a56}', '\u{1a56}'),
- ('\u{1a58}', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a60}'),
- ('\u{1a62}', '\u{1a62}'),
- ('\u{1a65}', '\u{1a6c}'),
- ('\u{1a73}', '\u{1a7c}'),
- ('\u{1a7f}', '\u{1a7f}'),
- ('\u{1ab0}', '\u{1ace}'),
- ('\u{1b00}', '\u{1b03}'),
- ('\u{1b34}', '\u{1b3d}'),
- ('\u{1b42}', '\u{1b44}'),
- ('\u{1b6b}', '\u{1b73}'),
- ('\u{1b80}', '\u{1b81}'),
- ('\u{1ba2}', '\u{1ba5}'),
- ('\u{1ba8}', '\u{1bad}'),
- ('\u{1be6}', '\u{1be6}'),
- ('\u{1be8}', '\u{1be9}'),
- ('\u{1bed}', '\u{1bed}'),
- ('\u{1bef}', '\u{1bf3}'),
- ('\u{1c2c}', '\u{1c33}'),
- ('\u{1c36}', '\u{1c37}'),
- ('\u{1cd0}', '\u{1cd2}'),
- ('\u{1cd4}', '\u{1ce0}'),
- ('\u{1ce2}', '\u{1ce8}'),
- ('\u{1ced}', '\u{1ced}'),
- ('\u{1cf4}', '\u{1cf4}'),
- ('\u{1cf8}', '\u{1cf9}'),
- ('\u{1dc0}', '\u{1dff}'),
- ('\u{200d}', '\u{200d}'),
- ('\u{20d0}', '\u{20f0}'),
- ('\u{2cef}', '\u{2cf1}'),
- ('\u{2d7f}', '\u{2d7f}'),
- ('\u{2de0}', '\u{2dff}'),
- ('\u{302a}', '\u{302f}'),
- ('\u{3099}', '\u{309a}'),
- ('\u{a66f}', '\u{a672}'),
- ('\u{a674}', '\u{a67d}'),
- ('\u{a69e}', '\u{a69f}'),
- ('\u{a6f0}', '\u{a6f1}'),
- ('\u{a802}', '\u{a802}'),
- ('\u{a806}', '\u{a806}'),
- ('\u{a80b}', '\u{a80b}'),
- ('\u{a825}', '\u{a826}'),
- ('\u{a82c}', '\u{a82c}'),
- ('\u{a8c4}', '\u{a8c5}'),
- ('\u{a8e0}', '\u{a8f1}'),
- ('\u{a8ff}', '\u{a8ff}'),
- ('\u{a926}', '\u{a92d}'),
- ('\u{a947}', '\u{a951}'),
- ('\u{a953}', '\u{a953}'),
- ('\u{a980}', '\u{a982}'),
- ('\u{a9b3}', '\u{a9b3}'),
- ('\u{a9b6}', '\u{a9b9}'),
- ('\u{a9bc}', '\u{a9bd}'),
- ('\u{a9c0}', '\u{a9c0}'),
- ('\u{a9e5}', '\u{a9e5}'),
- ('\u{aa29}', '\u{aa2e}'),
- ('\u{aa31}', '\u{aa32}'),
- ('\u{aa35}', '\u{aa36}'),
- ('\u{aa43}', '\u{aa43}'),
- ('\u{aa4c}', '\u{aa4c}'),
- ('\u{aa7c}', '\u{aa7c}'),
- ('\u{aab0}', '\u{aab0}'),
- ('\u{aab2}', '\u{aab4}'),
- ('\u{aab7}', '\u{aab8}'),
- ('\u{aabe}', '\u{aabf}'),
- ('\u{aac1}', '\u{aac1}'),
- ('\u{aaec}', '\u{aaed}'),
- ('\u{aaf6}', '\u{aaf6}'),
- ('\u{abe5}', '\u{abe5}'),
- ('\u{abe8}', '\u{abe8}'),
- ('\u{abed}', '\u{abed}'),
- ('\u{fb1e}', '\u{fb1e}'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{fe20}', '\u{fe2f}'),
- ('\u{ff9e}', '\u{ff9f}'),
- ('\u{101fd}', '\u{101fd}'),
- ('\u{102e0}', '\u{102e0}'),
- ('\u{10376}', '\u{1037a}'),
- ('\u{10a01}', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '\u{10a0f}'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '\u{10a3f}'),
- ('\u{10ae5}', '\u{10ae6}'),
- ('\u{10d24}', '\u{10d27}'),
- ('\u{10d69}', '\u{10d6d}'),
- ('\u{10eab}', '\u{10eac}'),
- ('\u{10efc}', '\u{10eff}'),
- ('\u{10f46}', '\u{10f50}'),
- ('\u{10f82}', '\u{10f85}'),
- ('\u{11001}', '\u{11001}'),
- ('\u{11038}', '\u{11046}'),
- ('\u{11070}', '\u{11070}'),
- ('\u{11073}', '\u{11074}'),
- ('\u{1107f}', '\u{11081}'),
- ('\u{110b3}', '\u{110b6}'),
- ('\u{110b9}', '\u{110ba}'),
- ('\u{110c2}', '\u{110c2}'),
- ('\u{11100}', '\u{11102}'),
- ('\u{11127}', '\u{1112b}'),
- ('\u{1112d}', '\u{11134}'),
- ('\u{11173}', '\u{11173}'),
- ('\u{11180}', '\u{11181}'),
- ('\u{111b6}', '\u{111be}'),
- ('\u{111c0}', '\u{111c0}'),
- ('\u{111c9}', '\u{111cc}'),
- ('\u{111cf}', '\u{111cf}'),
- ('\u{1122f}', '\u{11231}'),
- ('\u{11234}', '\u{11237}'),
- ('\u{1123e}', '\u{1123e}'),
- ('\u{11241}', '\u{11241}'),
- ('\u{112df}', '\u{112df}'),
- ('\u{112e3}', '\u{112ea}'),
- ('\u{11300}', '\u{11301}'),
- ('\u{1133b}', '\u{1133c}'),
- ('\u{1133e}', '\u{1133e}'),
- ('\u{11340}', '\u{11340}'),
- ('\u{1134d}', '\u{1134d}'),
- ('\u{11357}', '\u{11357}'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('\u{113b8}', '\u{113b8}'),
- ('\u{113bb}', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '\u{113c9}'),
- ('\u{113ce}', '\u{113d0}'),
- ('\u{113d2}', '\u{113d2}'),
- ('\u{113e1}', '\u{113e2}'),
- ('\u{11438}', '\u{1143f}'),
- ('\u{11442}', '\u{11444}'),
- ('\u{11446}', '\u{11446}'),
- ('\u{1145e}', '\u{1145e}'),
- ('\u{114b0}', '\u{114b0}'),
- ('\u{114b3}', '\u{114b8}'),
- ('\u{114ba}', '\u{114ba}'),
- ('\u{114bd}', '\u{114bd}'),
- ('\u{114bf}', '\u{114c0}'),
- ('\u{114c2}', '\u{114c3}'),
- ('\u{115af}', '\u{115af}'),
- ('\u{115b2}', '\u{115b5}'),
- ('\u{115bc}', '\u{115bd}'),
- ('\u{115bf}', '\u{115c0}'),
- ('\u{115dc}', '\u{115dd}'),
- ('\u{11633}', '\u{1163a}'),
- ('\u{1163d}', '\u{1163d}'),
- ('\u{1163f}', '\u{11640}'),
- ('\u{116ab}', '\u{116ab}'),
- ('\u{116ad}', '\u{116ad}'),
- ('\u{116b0}', '\u{116b7}'),
- ('\u{1171d}', '\u{1171d}'),
- ('\u{1171f}', '\u{1171f}'),
- ('\u{11722}', '\u{11725}'),
- ('\u{11727}', '\u{1172b}'),
- ('\u{1182f}', '\u{11837}'),
- ('\u{11839}', '\u{1183a}'),
- ('\u{11930}', '\u{11930}'),
- ('\u{1193b}', '\u{1193e}'),
- ('\u{11943}', '\u{11943}'),
- ('\u{119d4}', '\u{119d7}'),
- ('\u{119da}', '\u{119db}'),
- ('\u{119e0}', '\u{119e0}'),
- ('\u{11a01}', '\u{11a0a}'),
- ('\u{11a33}', '\u{11a38}'),
- ('\u{11a3b}', '\u{11a3e}'),
- ('\u{11a47}', '\u{11a47}'),
- ('\u{11a51}', '\u{11a56}'),
- ('\u{11a59}', '\u{11a5b}'),
- ('\u{11a8a}', '\u{11a96}'),
- ('\u{11a98}', '\u{11a99}'),
- ('\u{11c30}', '\u{11c36}'),
- ('\u{11c38}', '\u{11c3d}'),
- ('\u{11c3f}', '\u{11c3f}'),
- ('\u{11c92}', '\u{11ca7}'),
- ('\u{11caa}', '\u{11cb0}'),
- ('\u{11cb2}', '\u{11cb3}'),
- ('\u{11cb5}', '\u{11cb6}'),
- ('\u{11d31}', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d45}'),
- ('\u{11d47}', '\u{11d47}'),
- ('\u{11d90}', '\u{11d91}'),
- ('\u{11d95}', '\u{11d95}'),
- ('\u{11d97}', '\u{11d97}'),
- ('\u{11ef3}', '\u{11ef4}'),
- ('\u{11f00}', '\u{11f01}'),
- ('\u{11f36}', '\u{11f3a}'),
- ('\u{11f40}', '\u{11f42}'),
- ('\u{11f5a}', '\u{11f5a}'),
- ('\u{13440}', '\u{13440}'),
- ('\u{13447}', '\u{13455}'),
- ('\u{1611e}', '\u{16129}'),
- ('\u{1612d}', '\u{1612f}'),
- ('\u{16af0}', '\u{16af4}'),
- ('\u{16b30}', '\u{16b36}'),
- ('\u{16f4f}', '\u{16f4f}'),
- ('\u{16f8f}', '\u{16f92}'),
- ('\u{16fe4}', '\u{16fe4}'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('\u{1bc9d}', '\u{1bc9e}'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d165}', '\u{1d169}'),
- ('\u{1d16d}', '\u{1d172}'),
- ('\u{1d17b}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('\u{1d242}', '\u{1d244}'),
- ('\u{1da00}', '\u{1da36}'),
- ('\u{1da3b}', '\u{1da6c}'),
- ('\u{1da75}', '\u{1da75}'),
- ('\u{1da84}', '\u{1da84}'),
- ('\u{1da9b}', '\u{1da9f}'),
- ('\u{1daa1}', '\u{1daaf}'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('\u{1e130}', '\u{1e136}'),
- ('\u{1e2ae}', '\u{1e2ae}'),
- ('\u{1e2ec}', '\u{1e2ef}'),
- ('\u{1e4ec}', '\u{1e4ef}'),
- ('\u{1e5ee}', '\u{1e5ef}'),
- ('\u{1e8d0}', '\u{1e8d6}'),
- ('\u{1e944}', '\u{1e94a}'),
- ('🏻', '🏿'),
- ('\u{e0020}', '\u{e007f}'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const JOIN_CONTROL: &'static [(char, char)] = &[('\u{200c}', '\u{200d}')];
-
-pub const LOGICAL_ORDER_EXCEPTION: &'static [(char, char)] = &[
- ('เ', 'ไ'),
- ('ເ', 'ໄ'),
- ('ᦵ', 'ᦷ'),
- ('ᦺ', 'ᦺ'),
- ('ꪵ', 'ꪶ'),
- ('ꪹ', 'ꪹ'),
- ('ꪻ', 'ꪼ'),
-];
-
-pub const LOWERCASE: &'static [(char, char)] = &[
- ('a', 'z'),
- ('ª', 'ª'),
- ('µ', 'µ'),
- ('º', 'º'),
- ('ß', 'ö'),
- ('ø', 'ÿ'),
- ('ā', 'ā'),
- ('ă', 'ă'),
- ('ą', 'ą'),
- ('ć', 'ć'),
- ('ĉ', 'ĉ'),
- ('ċ', 'ċ'),
- ('č', 'č'),
- ('ď', 'ď'),
- ('đ', 'đ'),
- ('ē', 'ē'),
- ('ĕ', 'ĕ'),
- ('ė', 'ė'),
- ('ę', 'ę'),
- ('ě', 'ě'),
- ('ĝ', 'ĝ'),
- ('ğ', 'ğ'),
- ('ġ', 'ġ'),
- ('ģ', 'ģ'),
- ('ĥ', 'ĥ'),
- ('ħ', 'ħ'),
- ('ĩ', 'ĩ'),
- ('ī', 'ī'),
- ('ĭ', 'ĭ'),
- ('į', 'į'),
- ('ı', 'ı'),
- ('ij', 'ij'),
- ('ĵ', 'ĵ'),
- ('ķ', 'ĸ'),
- ('ĺ', 'ĺ'),
- ('ļ', 'ļ'),
- ('ľ', 'ľ'),
- ('ŀ', 'ŀ'),
- ('ł', 'ł'),
- ('ń', 'ń'),
- ('ņ', 'ņ'),
- ('ň', 'ʼn'),
- ('ŋ', 'ŋ'),
- ('ō', 'ō'),
- ('ŏ', 'ŏ'),
- ('ő', 'ő'),
- ('œ', 'œ'),
- ('ŕ', 'ŕ'),
- ('ŗ', 'ŗ'),
- ('ř', 'ř'),
- ('ś', 'ś'),
- ('ŝ', 'ŝ'),
- ('ş', 'ş'),
- ('š', 'š'),
- ('ţ', 'ţ'),
- ('ť', 'ť'),
- ('ŧ', 'ŧ'),
- ('ũ', 'ũ'),
- ('ū', 'ū'),
- ('ŭ', 'ŭ'),
- ('ů', 'ů'),
- ('ű', 'ű'),
- ('ų', 'ų'),
- ('ŵ', 'ŵ'),
- ('ŷ', 'ŷ'),
- ('ź', 'ź'),
- ('ż', 'ż'),
- ('ž', 'ƀ'),
- ('ƃ', 'ƃ'),
- ('ƅ', 'ƅ'),
- ('ƈ', 'ƈ'),
- ('ƌ', 'ƍ'),
- ('ƒ', 'ƒ'),
- ('ƕ', 'ƕ'),
- ('ƙ', 'ƛ'),
- ('ƞ', 'ƞ'),
- ('ơ', 'ơ'),
- ('ƣ', 'ƣ'),
- ('ƥ', 'ƥ'),
- ('ƨ', 'ƨ'),
- ('ƪ', 'ƫ'),
- ('ƭ', 'ƭ'),
- ('ư', 'ư'),
- ('ƴ', 'ƴ'),
- ('ƶ', 'ƶ'),
- ('ƹ', 'ƺ'),
- ('ƽ', 'ƿ'),
- ('dž', 'dž'),
- ('lj', 'lj'),
- ('nj', 'nj'),
- ('ǎ', 'ǎ'),
- ('ǐ', 'ǐ'),
- ('ǒ', 'ǒ'),
- ('ǔ', 'ǔ'),
- ('ǖ', 'ǖ'),
- ('ǘ', 'ǘ'),
- ('ǚ', 'ǚ'),
- ('ǜ', 'ǝ'),
- ('ǟ', 'ǟ'),
- ('ǡ', 'ǡ'),
- ('ǣ', 'ǣ'),
- ('ǥ', 'ǥ'),
- ('ǧ', 'ǧ'),
- ('ǩ', 'ǩ'),
- ('ǫ', 'ǫ'),
- ('ǭ', 'ǭ'),
- ('ǯ', 'ǰ'),
- ('dz', 'dz'),
- ('ǵ', 'ǵ'),
- ('ǹ', 'ǹ'),
- ('ǻ', 'ǻ'),
- ('ǽ', 'ǽ'),
- ('ǿ', 'ǿ'),
- ('ȁ', 'ȁ'),
- ('ȃ', 'ȃ'),
- ('ȅ', 'ȅ'),
- ('ȇ', 'ȇ'),
- ('ȉ', 'ȉ'),
- ('ȋ', 'ȋ'),
- ('ȍ', 'ȍ'),
- ('ȏ', 'ȏ'),
- ('ȑ', 'ȑ'),
- ('ȓ', 'ȓ'),
- ('ȕ', 'ȕ'),
- ('ȗ', 'ȗ'),
- ('ș', 'ș'),
- ('ț', 'ț'),
- ('ȝ', 'ȝ'),
- ('ȟ', 'ȟ'),
- ('ȡ', 'ȡ'),
- ('ȣ', 'ȣ'),
- ('ȥ', 'ȥ'),
- ('ȧ', 'ȧ'),
- ('ȩ', 'ȩ'),
- ('ȫ', 'ȫ'),
- ('ȭ', 'ȭ'),
- ('ȯ', 'ȯ'),
- ('ȱ', 'ȱ'),
- ('ȳ', 'ȹ'),
- ('ȼ', 'ȼ'),
- ('ȿ', 'ɀ'),
- ('ɂ', 'ɂ'),
- ('ɇ', 'ɇ'),
- ('ɉ', 'ɉ'),
- ('ɋ', 'ɋ'),
- ('ɍ', 'ɍ'),
- ('ɏ', 'ʓ'),
- ('ʕ', 'ʸ'),
- ('ˀ', 'ˁ'),
- ('ˠ', 'ˤ'),
- ('\u{345}', '\u{345}'),
- ('ͱ', 'ͱ'),
- ('ͳ', 'ͳ'),
- ('ͷ', 'ͷ'),
- ('ͺ', 'ͽ'),
- ('ΐ', 'ΐ'),
- ('ά', 'ώ'),
- ('ϐ', 'ϑ'),
- ('ϕ', 'ϗ'),
- ('ϙ', 'ϙ'),
- ('ϛ', 'ϛ'),
- ('ϝ', 'ϝ'),
- ('ϟ', 'ϟ'),
- ('ϡ', 'ϡ'),
- ('ϣ', 'ϣ'),
- ('ϥ', 'ϥ'),
- ('ϧ', 'ϧ'),
- ('ϩ', 'ϩ'),
- ('ϫ', 'ϫ'),
- ('ϭ', 'ϭ'),
- ('ϯ', 'ϳ'),
- ('ϵ', 'ϵ'),
- ('ϸ', 'ϸ'),
- ('ϻ', 'ϼ'),
- ('а', 'џ'),
- ('ѡ', 'ѡ'),
- ('ѣ', 'ѣ'),
- ('ѥ', 'ѥ'),
- ('ѧ', 'ѧ'),
- ('ѩ', 'ѩ'),
- ('ѫ', 'ѫ'),
- ('ѭ', 'ѭ'),
- ('ѯ', 'ѯ'),
- ('ѱ', 'ѱ'),
- ('ѳ', 'ѳ'),
- ('ѵ', 'ѵ'),
- ('ѷ', 'ѷ'),
- ('ѹ', 'ѹ'),
- ('ѻ', 'ѻ'),
- ('ѽ', 'ѽ'),
- ('ѿ', 'ѿ'),
- ('ҁ', 'ҁ'),
- ('ҋ', 'ҋ'),
- ('ҍ', 'ҍ'),
- ('ҏ', 'ҏ'),
- ('ґ', 'ґ'),
- ('ғ', 'ғ'),
- ('ҕ', 'ҕ'),
- ('җ', 'җ'),
- ('ҙ', 'ҙ'),
- ('қ', 'қ'),
- ('ҝ', 'ҝ'),
- ('ҟ', 'ҟ'),
- ('ҡ', 'ҡ'),
- ('ң', 'ң'),
- ('ҥ', 'ҥ'),
- ('ҧ', 'ҧ'),
- ('ҩ', 'ҩ'),
- ('ҫ', 'ҫ'),
- ('ҭ', 'ҭ'),
- ('ү', 'ү'),
- ('ұ', 'ұ'),
- ('ҳ', 'ҳ'),
- ('ҵ', 'ҵ'),
- ('ҷ', 'ҷ'),
- ('ҹ', 'ҹ'),
- ('һ', 'һ'),
- ('ҽ', 'ҽ'),
- ('ҿ', 'ҿ'),
- ('ӂ', 'ӂ'),
- ('ӄ', 'ӄ'),
- ('ӆ', 'ӆ'),
- ('ӈ', 'ӈ'),
- ('ӊ', 'ӊ'),
- ('ӌ', 'ӌ'),
- ('ӎ', 'ӏ'),
- ('ӑ', 'ӑ'),
- ('ӓ', 'ӓ'),
- ('ӕ', 'ӕ'),
- ('ӗ', 'ӗ'),
- ('ә', 'ә'),
- ('ӛ', 'ӛ'),
- ('ӝ', 'ӝ'),
- ('ӟ', 'ӟ'),
- ('ӡ', 'ӡ'),
- ('ӣ', 'ӣ'),
- ('ӥ', 'ӥ'),
- ('ӧ', 'ӧ'),
- ('ө', 'ө'),
- ('ӫ', 'ӫ'),
- ('ӭ', 'ӭ'),
- ('ӯ', 'ӯ'),
- ('ӱ', 'ӱ'),
- ('ӳ', 'ӳ'),
- ('ӵ', 'ӵ'),
- ('ӷ', 'ӷ'),
- ('ӹ', 'ӹ'),
- ('ӻ', 'ӻ'),
- ('ӽ', 'ӽ'),
- ('ӿ', 'ӿ'),
- ('ԁ', 'ԁ'),
- ('ԃ', 'ԃ'),
- ('ԅ', 'ԅ'),
- ('ԇ', 'ԇ'),
- ('ԉ', 'ԉ'),
- ('ԋ', 'ԋ'),
- ('ԍ', 'ԍ'),
- ('ԏ', 'ԏ'),
- ('ԑ', 'ԑ'),
- ('ԓ', 'ԓ'),
- ('ԕ', 'ԕ'),
- ('ԗ', 'ԗ'),
- ('ԙ', 'ԙ'),
- ('ԛ', 'ԛ'),
- ('ԝ', 'ԝ'),
- ('ԟ', 'ԟ'),
- ('ԡ', 'ԡ'),
- ('ԣ', 'ԣ'),
- ('ԥ', 'ԥ'),
- ('ԧ', 'ԧ'),
- ('ԩ', 'ԩ'),
- ('ԫ', 'ԫ'),
- ('ԭ', 'ԭ'),
- ('ԯ', 'ԯ'),
- ('ՠ', 'ֈ'),
- ('ა', 'ჺ'),
- ('ჼ', 'ჿ'),
- ('ᏸ', 'ᏽ'),
- ('ᲀ', 'ᲈ'),
- ('ᲊ', 'ᲊ'),
- ('ᴀ', 'ᶿ'),
- ('ḁ', 'ḁ'),
- ('ḃ', 'ḃ'),
- ('ḅ', 'ḅ'),
- ('ḇ', 'ḇ'),
- ('ḉ', 'ḉ'),
- ('ḋ', 'ḋ'),
- ('ḍ', 'ḍ'),
- ('ḏ', 'ḏ'),
- ('ḑ', 'ḑ'),
- ('ḓ', 'ḓ'),
- ('ḕ', 'ḕ'),
- ('ḗ', 'ḗ'),
- ('ḙ', 'ḙ'),
- ('ḛ', 'ḛ'),
- ('ḝ', 'ḝ'),
- ('ḟ', 'ḟ'),
- ('ḡ', 'ḡ'),
- ('ḣ', 'ḣ'),
- ('ḥ', 'ḥ'),
- ('ḧ', 'ḧ'),
- ('ḩ', 'ḩ'),
- ('ḫ', 'ḫ'),
- ('ḭ', 'ḭ'),
- ('ḯ', 'ḯ'),
- ('ḱ', 'ḱ'),
- ('ḳ', 'ḳ'),
- ('ḵ', 'ḵ'),
- ('ḷ', 'ḷ'),
- ('ḹ', 'ḹ'),
- ('ḻ', 'ḻ'),
- ('ḽ', 'ḽ'),
- ('ḿ', 'ḿ'),
- ('ṁ', 'ṁ'),
- ('ṃ', 'ṃ'),
- ('ṅ', 'ṅ'),
- ('ṇ', 'ṇ'),
- ('ṉ', 'ṉ'),
- ('ṋ', 'ṋ'),
- ('ṍ', 'ṍ'),
- ('ṏ', 'ṏ'),
- ('ṑ', 'ṑ'),
- ('ṓ', 'ṓ'),
- ('ṕ', 'ṕ'),
- ('ṗ', 'ṗ'),
- ('ṙ', 'ṙ'),
- ('ṛ', 'ṛ'),
- ('ṝ', 'ṝ'),
- ('ṟ', 'ṟ'),
- ('ṡ', 'ṡ'),
- ('ṣ', 'ṣ'),
- ('ṥ', 'ṥ'),
- ('ṧ', 'ṧ'),
- ('ṩ', 'ṩ'),
- ('ṫ', 'ṫ'),
- ('ṭ', 'ṭ'),
- ('ṯ', 'ṯ'),
- ('ṱ', 'ṱ'),
- ('ṳ', 'ṳ'),
- ('ṵ', 'ṵ'),
- ('ṷ', 'ṷ'),
- ('ṹ', 'ṹ'),
- ('ṻ', 'ṻ'),
- ('ṽ', 'ṽ'),
- ('ṿ', 'ṿ'),
- ('ẁ', 'ẁ'),
- ('ẃ', 'ẃ'),
- ('ẅ', 'ẅ'),
- ('ẇ', 'ẇ'),
- ('ẉ', 'ẉ'),
- ('ẋ', 'ẋ'),
- ('ẍ', 'ẍ'),
- ('ẏ', 'ẏ'),
- ('ẑ', 'ẑ'),
- ('ẓ', 'ẓ'),
- ('ẕ', 'ẝ'),
- ('ẟ', 'ẟ'),
- ('ạ', 'ạ'),
- ('ả', 'ả'),
- ('ấ', 'ấ'),
- ('ầ', 'ầ'),
- ('ẩ', 'ẩ'),
- ('ẫ', 'ẫ'),
- ('ậ', 'ậ'),
- ('ắ', 'ắ'),
- ('ằ', 'ằ'),
- ('ẳ', 'ẳ'),
- ('ẵ', 'ẵ'),
- ('ặ', 'ặ'),
- ('ẹ', 'ẹ'),
- ('ẻ', 'ẻ'),
- ('ẽ', 'ẽ'),
- ('ế', 'ế'),
- ('ề', 'ề'),
- ('ể', 'ể'),
- ('ễ', 'ễ'),
- ('ệ', 'ệ'),
- ('ỉ', 'ỉ'),
- ('ị', 'ị'),
- ('ọ', 'ọ'),
- ('ỏ', 'ỏ'),
- ('ố', 'ố'),
- ('ồ', 'ồ'),
- ('ổ', 'ổ'),
- ('ỗ', 'ỗ'),
- ('ộ', 'ộ'),
- ('ớ', 'ớ'),
- ('ờ', 'ờ'),
- ('ở', 'ở'),
- ('ỡ', 'ỡ'),
- ('ợ', 'ợ'),
- ('ụ', 'ụ'),
- ('ủ', 'ủ'),
- ('ứ', 'ứ'),
- ('ừ', 'ừ'),
- ('ử', 'ử'),
- ('ữ', 'ữ'),
- ('ự', 'ự'),
- ('ỳ', 'ỳ'),
- ('ỵ', 'ỵ'),
- ('ỷ', 'ỷ'),
- ('ỹ', 'ỹ'),
- ('ỻ', 'ỻ'),
- ('ỽ', 'ỽ'),
- ('ỿ', 'ἇ'),
- ('ἐ', 'ἕ'),
- ('ἠ', 'ἧ'),
- ('ἰ', 'ἷ'),
- ('ὀ', 'ὅ'),
- ('ὐ', 'ὗ'),
- ('ὠ', 'ὧ'),
- ('ὰ', 'ώ'),
- ('ᾀ', 'ᾇ'),
- ('ᾐ', 'ᾗ'),
- ('ᾠ', 'ᾧ'),
- ('ᾰ', 'ᾴ'),
- ('ᾶ', 'ᾷ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῇ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'ῗ'),
- ('ῠ', 'ῧ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῷ'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('ℊ', 'ℊ'),
- ('ℎ', 'ℏ'),
- ('ℓ', 'ℓ'),
- ('ℯ', 'ℯ'),
- ('ℴ', 'ℴ'),
- ('ℹ', 'ℹ'),
- ('ℼ', 'ℽ'),
- ('ⅆ', 'ⅉ'),
- ('ⅎ', 'ⅎ'),
- ('ⅰ', 'ⅿ'),
- ('ↄ', 'ↄ'),
- ('ⓐ', 'ⓩ'),
- ('ⰰ', 'ⱟ'),
- ('ⱡ', 'ⱡ'),
- ('ⱥ', 'ⱦ'),
- ('ⱨ', 'ⱨ'),
- ('ⱪ', 'ⱪ'),
- ('ⱬ', 'ⱬ'),
- ('ⱱ', 'ⱱ'),
- ('ⱳ', 'ⱴ'),
- ('ⱶ', 'ⱽ'),
- ('ⲁ', 'ⲁ'),
- ('ⲃ', 'ⲃ'),
- ('ⲅ', 'ⲅ'),
- ('ⲇ', 'ⲇ'),
- ('ⲉ', 'ⲉ'),
- ('ⲋ', 'ⲋ'),
- ('ⲍ', 'ⲍ'),
- ('ⲏ', 'ⲏ'),
- ('ⲑ', 'ⲑ'),
- ('ⲓ', 'ⲓ'),
- ('ⲕ', 'ⲕ'),
- ('ⲗ', 'ⲗ'),
- ('ⲙ', 'ⲙ'),
- ('ⲛ', 'ⲛ'),
- ('ⲝ', 'ⲝ'),
- ('ⲟ', 'ⲟ'),
- ('ⲡ', 'ⲡ'),
- ('ⲣ', 'ⲣ'),
- ('ⲥ', 'ⲥ'),
- ('ⲧ', 'ⲧ'),
- ('ⲩ', 'ⲩ'),
- ('ⲫ', 'ⲫ'),
- ('ⲭ', 'ⲭ'),
- ('ⲯ', 'ⲯ'),
- ('ⲱ', 'ⲱ'),
- ('ⲳ', 'ⲳ'),
- ('ⲵ', 'ⲵ'),
- ('ⲷ', 'ⲷ'),
- ('ⲹ', 'ⲹ'),
- ('ⲻ', 'ⲻ'),
- ('ⲽ', 'ⲽ'),
- ('ⲿ', 'ⲿ'),
- ('ⳁ', 'ⳁ'),
- ('ⳃ', 'ⳃ'),
- ('ⳅ', 'ⳅ'),
- ('ⳇ', 'ⳇ'),
- ('ⳉ', 'ⳉ'),
- ('ⳋ', 'ⳋ'),
- ('ⳍ', 'ⳍ'),
- ('ⳏ', 'ⳏ'),
- ('ⳑ', 'ⳑ'),
- ('ⳓ', 'ⳓ'),
- ('ⳕ', 'ⳕ'),
- ('ⳗ', 'ⳗ'),
- ('ⳙ', 'ⳙ'),
- ('ⳛ', 'ⳛ'),
- ('ⳝ', 'ⳝ'),
- ('ⳟ', 'ⳟ'),
- ('ⳡ', 'ⳡ'),
- ('ⳣ', 'ⳤ'),
- ('ⳬ', 'ⳬ'),
- ('ⳮ', 'ⳮ'),
- ('ⳳ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ꙁ', 'ꙁ'),
- ('ꙃ', 'ꙃ'),
- ('ꙅ', 'ꙅ'),
- ('ꙇ', 'ꙇ'),
- ('ꙉ', 'ꙉ'),
- ('ꙋ', 'ꙋ'),
- ('ꙍ', 'ꙍ'),
- ('ꙏ', 'ꙏ'),
- ('ꙑ', 'ꙑ'),
- ('ꙓ', 'ꙓ'),
- ('ꙕ', 'ꙕ'),
- ('ꙗ', 'ꙗ'),
- ('ꙙ', 'ꙙ'),
- ('ꙛ', 'ꙛ'),
- ('ꙝ', 'ꙝ'),
- ('ꙟ', 'ꙟ'),
- ('ꙡ', 'ꙡ'),
- ('ꙣ', 'ꙣ'),
- ('ꙥ', 'ꙥ'),
- ('ꙧ', 'ꙧ'),
- ('ꙩ', 'ꙩ'),
- ('ꙫ', 'ꙫ'),
- ('ꙭ', 'ꙭ'),
- ('ꚁ', 'ꚁ'),
- ('ꚃ', 'ꚃ'),
- ('ꚅ', 'ꚅ'),
- ('ꚇ', 'ꚇ'),
- ('ꚉ', 'ꚉ'),
- ('ꚋ', 'ꚋ'),
- ('ꚍ', 'ꚍ'),
- ('ꚏ', 'ꚏ'),
- ('ꚑ', 'ꚑ'),
- ('ꚓ', 'ꚓ'),
- ('ꚕ', 'ꚕ'),
- ('ꚗ', 'ꚗ'),
- ('ꚙ', 'ꚙ'),
- ('ꚛ', 'ꚝ'),
- ('ꜣ', 'ꜣ'),
- ('ꜥ', 'ꜥ'),
- ('ꜧ', 'ꜧ'),
- ('ꜩ', 'ꜩ'),
- ('ꜫ', 'ꜫ'),
- ('ꜭ', 'ꜭ'),
- ('ꜯ', 'ꜱ'),
- ('ꜳ', 'ꜳ'),
- ('ꜵ', 'ꜵ'),
- ('ꜷ', 'ꜷ'),
- ('ꜹ', 'ꜹ'),
- ('ꜻ', 'ꜻ'),
- ('ꜽ', 'ꜽ'),
- ('ꜿ', 'ꜿ'),
- ('ꝁ', 'ꝁ'),
- ('ꝃ', 'ꝃ'),
- ('ꝅ', 'ꝅ'),
- ('ꝇ', 'ꝇ'),
- ('ꝉ', 'ꝉ'),
- ('ꝋ', 'ꝋ'),
- ('ꝍ', 'ꝍ'),
- ('ꝏ', 'ꝏ'),
- ('ꝑ', 'ꝑ'),
- ('ꝓ', 'ꝓ'),
- ('ꝕ', 'ꝕ'),
- ('ꝗ', 'ꝗ'),
- ('ꝙ', 'ꝙ'),
- ('ꝛ', 'ꝛ'),
- ('ꝝ', 'ꝝ'),
- ('ꝟ', 'ꝟ'),
- ('ꝡ', 'ꝡ'),
- ('ꝣ', 'ꝣ'),
- ('ꝥ', 'ꝥ'),
- ('ꝧ', 'ꝧ'),
- ('ꝩ', 'ꝩ'),
- ('ꝫ', 'ꝫ'),
- ('ꝭ', 'ꝭ'),
- ('ꝯ', 'ꝸ'),
- ('ꝺ', 'ꝺ'),
- ('ꝼ', 'ꝼ'),
- ('ꝿ', 'ꝿ'),
- ('ꞁ', 'ꞁ'),
- ('ꞃ', 'ꞃ'),
- ('ꞅ', 'ꞅ'),
- ('ꞇ', 'ꞇ'),
- ('ꞌ', 'ꞌ'),
- ('ꞎ', 'ꞎ'),
- ('ꞑ', 'ꞑ'),
- ('ꞓ', 'ꞕ'),
- ('ꞗ', 'ꞗ'),
- ('ꞙ', 'ꞙ'),
- ('ꞛ', 'ꞛ'),
- ('ꞝ', 'ꞝ'),
- ('ꞟ', 'ꞟ'),
- ('ꞡ', 'ꞡ'),
- ('ꞣ', 'ꞣ'),
- ('ꞥ', 'ꞥ'),
- ('ꞧ', 'ꞧ'),
- ('ꞩ', 'ꞩ'),
- ('ꞯ', 'ꞯ'),
- ('ꞵ', 'ꞵ'),
- ('ꞷ', 'ꞷ'),
- ('ꞹ', 'ꞹ'),
- ('ꞻ', 'ꞻ'),
- ('ꞽ', 'ꞽ'),
- ('ꞿ', 'ꞿ'),
- ('ꟁ', 'ꟁ'),
- ('ꟃ', 'ꟃ'),
- ('ꟈ', 'ꟈ'),
- ('ꟊ', 'ꟊ'),
- ('ꟍ', 'ꟍ'),
- ('ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'ꟕ'),
- ('ꟗ', 'ꟗ'),
- ('ꟙ', 'ꟙ'),
- ('ꟛ', 'ꟛ'),
- ('ꟲ', 'ꟴ'),
- ('ꟶ', 'ꟶ'),
- ('ꟸ', 'ꟺ'),
- ('ꬰ', 'ꭚ'),
- ('ꭜ', 'ꭩ'),
- ('ꭰ', 'ꮿ'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('a', 'z'),
- ('𐐨', '𐑏'),
- ('𐓘', '𐓻'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐞀', '𐞀'),
- ('𐞃', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐳀', '𐳲'),
- ('𐵰', '𐶅'),
- ('𑣀', '𑣟'),
- ('𖹠', '𖹿'),
- ('𝐚', '𝐳'),
- ('𝑎', '𝑔'),
- ('𝑖', '𝑧'),
- ('𝒂', '𝒛'),
- ('𝒶', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝓏'),
- ('𝓪', '𝔃'),
- ('𝔞', '𝔷'),
- ('𝕒', '𝕫'),
- ('𝖆', '𝖟'),
- ('𝖺', '𝗓'),
- ('𝗮', '𝘇'),
- ('𝘢', '𝘻'),
- ('𝙖', '𝙯'),
- ('𝚊', '𝚥'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛡'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜛'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝕'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞏'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟉'),
- ('𝟋', '𝟋'),
- ('𝼀', '𝼉'),
- ('𝼋', '𝼞'),
- ('𝼥', '𝼪'),
- ('𞀰', '𞁭'),
- ('𞤢', '𞥃'),
-];
-
-pub const MATH: &'static [(char, char)] = &[
- ('+', '+'),
- ('<', '>'),
- ('^', '^'),
- ('|', '|'),
- ('~', '~'),
- ('¬', '¬'),
- ('±', '±'),
- ('×', '×'),
- ('÷', '÷'),
- ('ϐ', 'ϒ'),
- ('ϕ', 'ϕ'),
- ('ϰ', 'ϱ'),
- ('ϴ', '϶'),
- ('؆', '؈'),
- ('‖', '‖'),
- ('′', '‴'),
- ('⁀', '⁀'),
- ('⁄', '⁄'),
- ('⁒', '⁒'),
- ('\u{2061}', '\u{2064}'),
- ('⁺', '⁾'),
- ('₊', '₎'),
- ('\u{20d0}', '\u{20dc}'),
- ('\u{20e1}', '\u{20e1}'),
- ('\u{20e5}', '\u{20e6}'),
- ('\u{20eb}', '\u{20ef}'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℊ', 'ℓ'),
- ('ℕ', 'ℕ'),
- ('℘', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('ℨ', '℩'),
- ('ℬ', 'ℭ'),
- ('ℯ', 'ℱ'),
- ('ℳ', 'ℸ'),
- ('ℼ', 'ⅉ'),
- ('⅋', '⅋'),
- ('←', '↧'),
- ('↩', '↮'),
- ('↰', '↱'),
- ('↶', '↷'),
- ('↼', '⇛'),
- ('⇝', '⇝'),
- ('⇤', '⇥'),
- ('⇴', '⋿'),
- ('⌈', '⌋'),
- ('⌠', '⌡'),
- ('⍼', '⍼'),
- ('⎛', '⎵'),
- ('⎷', '⎷'),
- ('⏐', '⏐'),
- ('⏜', '⏢'),
- ('■', '□'),
- ('▮', '▷'),
- ('▼', '◁'),
- ('◆', '◇'),
- ('◊', '○'),
- ('●', '◓'),
- ('◢', '◢'),
- ('◤', '◤'),
- ('◧', '◬'),
- ('◸', '◿'),
- ('★', '☆'),
- ('♀', '♀'),
- ('♂', '♂'),
- ('♠', '♣'),
- ('♭', '♯'),
- ('⟀', '⟿'),
- ('⤀', '⫿'),
- ('⬰', '⭄'),
- ('⭇', '⭌'),
- ('﬩', '﬩'),
- ('﹡', '﹦'),
- ('﹨', '﹨'),
- ('+', '+'),
- ('<', '>'),
- ('\', '\'),
- ('^', '^'),
- ('|', '|'),
- ('~', '~'),
- ('¬', '¬'),
- ('←', '↓'),
- ('𐶎', '𐶏'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝟋'),
- ('𝟎', '𝟿'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('𞻰', '𞻱'),
-];
-
-pub const MODIFIER_COMBINING_MARK: &'static [(char, char)] = &[
- ('\u{654}', '\u{655}'),
- ('\u{658}', '\u{658}'),
- ('\u{6dc}', '\u{6dc}'),
- ('\u{6e3}', '\u{6e3}'),
- ('\u{6e7}', '\u{6e8}'),
- ('\u{8ca}', '\u{8cb}'),
- ('\u{8cd}', '\u{8cf}'),
- ('\u{8d3}', '\u{8d3}'),
- ('\u{8f3}', '\u{8f3}'),
-];
-
-pub const NONCHARACTER_CODE_POINT: &'static [(char, char)] = &[
- ('\u{fdd0}', '\u{fdef}'),
- ('\u{fffe}', '\u{ffff}'),
- ('\u{1fffe}', '\u{1ffff}'),
- ('\u{2fffe}', '\u{2ffff}'),
- ('\u{3fffe}', '\u{3ffff}'),
- ('\u{4fffe}', '\u{4ffff}'),
- ('\u{5fffe}', '\u{5ffff}'),
- ('\u{6fffe}', '\u{6ffff}'),
- ('\u{7fffe}', '\u{7ffff}'),
- ('\u{8fffe}', '\u{8ffff}'),
- ('\u{9fffe}', '\u{9ffff}'),
- ('\u{afffe}', '\u{affff}'),
- ('\u{bfffe}', '\u{bffff}'),
- ('\u{cfffe}', '\u{cffff}'),
- ('\u{dfffe}', '\u{dffff}'),
- ('\u{efffe}', '\u{effff}'),
- ('\u{ffffe}', '\u{fffff}'),
- ('\u{10fffe}', '\u{10ffff}'),
-];
-
-pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[
- ('\u{345}', '\u{345}'),
- ('\u{363}', '\u{36f}'),
- ('\u{5b0}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c5}'),
- ('\u{5c7}', '\u{5c7}'),
- ('\u{610}', '\u{61a}'),
- ('\u{64b}', '\u{657}'),
- ('\u{659}', '\u{65f}'),
- ('\u{670}', '\u{670}'),
- ('\u{6d6}', '\u{6dc}'),
- ('\u{6e1}', '\u{6e4}'),
- ('\u{6e7}', '\u{6e8}'),
- ('\u{6ed}', '\u{6ed}'),
- ('\u{711}', '\u{711}'),
- ('\u{730}', '\u{73f}'),
- ('\u{7a6}', '\u{7b0}'),
- ('\u{816}', '\u{817}'),
- ('\u{81b}', '\u{823}'),
- ('\u{825}', '\u{827}'),
- ('\u{829}', '\u{82c}'),
- ('\u{897}', '\u{897}'),
- ('\u{8d4}', '\u{8df}'),
- ('\u{8e3}', '\u{8e9}'),
- ('\u{8f0}', 'ः'),
- ('\u{93a}', 'ऻ'),
- ('ा', 'ौ'),
- ('ॎ', 'ॏ'),
- ('\u{955}', '\u{957}'),
- ('\u{962}', '\u{963}'),
- ('\u{981}', 'ঃ'),
- ('\u{9be}', '\u{9c4}'),
- ('ে', 'ৈ'),
- ('ো', 'ৌ'),
- ('\u{9d7}', '\u{9d7}'),
- ('\u{9e2}', '\u{9e3}'),
- ('\u{a01}', 'ਃ'),
- ('ਾ', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4c}'),
- ('\u{a51}', '\u{a51}'),
- ('\u{a70}', '\u{a71}'),
- ('\u{a75}', '\u{a75}'),
- ('\u{a81}', 'ઃ'),
- ('ા', '\u{ac5}'),
- ('\u{ac7}', 'ૉ'),
- ('ો', 'ૌ'),
- ('\u{ae2}', '\u{ae3}'),
- ('\u{afa}', '\u{afc}'),
- ('\u{b01}', 'ଃ'),
- ('\u{b3e}', '\u{b44}'),
- ('େ', 'ୈ'),
- ('ୋ', 'ୌ'),
- ('\u{b56}', '\u{b57}'),
- ('\u{b62}', '\u{b63}'),
- ('\u{b82}', '\u{b82}'),
- ('\u{bbe}', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', 'ௌ'),
- ('\u{bd7}', '\u{bd7}'),
- ('\u{c00}', '\u{c04}'),
- ('\u{c3e}', 'ౄ'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4c}'),
- ('\u{c55}', '\u{c56}'),
- ('\u{c62}', '\u{c63}'),
- ('\u{c81}', 'ಃ'),
- ('ಾ', 'ೄ'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccc}'),
- ('\u{cd5}', '\u{cd6}'),
- ('\u{ce2}', '\u{ce3}'),
- ('ೳ', 'ೳ'),
- ('\u{d00}', 'ഃ'),
- ('\u{d3e}', '\u{d44}'),
- ('െ', 'ൈ'),
- ('ൊ', 'ൌ'),
- ('\u{d57}', '\u{d57}'),
- ('\u{d62}', '\u{d63}'),
- ('\u{d81}', 'ඃ'),
- ('\u{dcf}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('ෘ', '\u{ddf}'),
- ('ෲ', 'ෳ'),
- ('\u{e31}', '\u{e31}'),
- ('\u{e34}', '\u{e3a}'),
- ('\u{e4d}', '\u{e4d}'),
- ('\u{eb1}', '\u{eb1}'),
- ('\u{eb4}', '\u{eb9}'),
- ('\u{ebb}', '\u{ebc}'),
- ('\u{ecd}', '\u{ecd}'),
- ('\u{f71}', '\u{f83}'),
- ('\u{f8d}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('ါ', '\u{1036}'),
- ('း', 'း'),
- ('ျ', '\u{103e}'),
- ('ၖ', '\u{1059}'),
- ('\u{105e}', '\u{1060}'),
- ('ၢ', 'ၤ'),
- ('ၧ', 'ၭ'),
- ('\u{1071}', '\u{1074}'),
- ('\u{1082}', '\u{108d}'),
- ('ႏ', 'ႏ'),
- ('ႚ', '\u{109d}'),
- ('\u{1712}', '\u{1713}'),
- ('\u{1732}', '\u{1733}'),
- ('\u{1752}', '\u{1753}'),
- ('\u{1772}', '\u{1773}'),
- ('ា', 'ៈ'),
- ('\u{1885}', '\u{1886}'),
- ('\u{18a9}', '\u{18a9}'),
- ('\u{1920}', 'ᤫ'),
- ('ᤰ', 'ᤸ'),
- ('\u{1a17}', '\u{1a1b}'),
- ('ᩕ', '\u{1a5e}'),
- ('ᩡ', '\u{1a74}'),
- ('\u{1abf}', '\u{1ac0}'),
- ('\u{1acc}', '\u{1ace}'),
- ('\u{1b00}', 'ᬄ'),
- ('\u{1b35}', '\u{1b43}'),
- ('\u{1b80}', 'ᮂ'),
- ('ᮡ', '\u{1ba9}'),
- ('\u{1bac}', '\u{1bad}'),
- ('ᯧ', '\u{1bf1}'),
- ('ᰤ', '\u{1c36}'),
- ('\u{1dd3}', '\u{1df4}'),
- ('Ⓐ', 'ⓩ'),
- ('\u{2de0}', '\u{2dff}'),
- ('\u{a674}', '\u{a67b}'),
- ('\u{a69e}', '\u{a69f}'),
- ('\u{a802}', '\u{a802}'),
- ('\u{a80b}', '\u{a80b}'),
- ('ꠣ', 'ꠧ'),
- ('ꢀ', 'ꢁ'),
- ('ꢴ', 'ꣃ'),
- ('\u{a8c5}', '\u{a8c5}'),
- ('\u{a8ff}', '\u{a8ff}'),
- ('\u{a926}', '\u{a92a}'),
- ('\u{a947}', 'ꥒ'),
- ('\u{a980}', 'ꦃ'),
- ('ꦴ', 'ꦿ'),
- ('\u{a9e5}', '\u{a9e5}'),
- ('\u{aa29}', '\u{aa36}'),
- ('\u{aa43}', '\u{aa43}'),
- ('\u{aa4c}', 'ꩍ'),
- ('ꩻ', 'ꩽ'),
- ('\u{aab0}', '\u{aab0}'),
- ('\u{aab2}', '\u{aab4}'),
- ('\u{aab7}', '\u{aab8}'),
- ('\u{aabe}', '\u{aabe}'),
- ('ꫫ', 'ꫯ'),
- ('ꫵ', 'ꫵ'),
- ('ꯣ', 'ꯪ'),
- ('\u{fb1e}', '\u{fb1e}'),
- ('\u{10376}', '\u{1037a}'),
- ('\u{10a01}', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '\u{10a0f}'),
- ('\u{10d24}', '\u{10d27}'),
- ('\u{10d69}', '\u{10d69}'),
- ('\u{10eab}', '\u{10eac}'),
- ('\u{10efc}', '\u{10efc}'),
- ('𑀀', '𑀂'),
- ('\u{11038}', '\u{11045}'),
- ('\u{11073}', '\u{11074}'),
- ('\u{11080}', '𑂂'),
- ('𑂰', '𑂸'),
- ('\u{110c2}', '\u{110c2}'),
- ('\u{11100}', '\u{11102}'),
- ('\u{11127}', '\u{11132}'),
- ('𑅅', '𑅆'),
- ('\u{11180}', '𑆂'),
- ('𑆳', '𑆿'),
- ('𑇎', '\u{111cf}'),
- ('𑈬', '\u{11234}'),
- ('\u{11237}', '\u{11237}'),
- ('\u{1123e}', '\u{1123e}'),
- ('\u{11241}', '\u{11241}'),
- ('\u{112df}', '\u{112e8}'),
- ('\u{11300}', '𑌃'),
- ('\u{1133e}', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '𑍌'),
- ('\u{11357}', '\u{11357}'),
- ('𑍢', '𑍣'),
- ('\u{113b8}', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '𑏊'),
- ('𑏌', '𑏍'),
- ('𑐵', '𑑁'),
- ('\u{11443}', '𑑅'),
- ('\u{114b0}', '𑓁'),
- ('\u{115af}', '\u{115b5}'),
- ('𑖸', '𑖾'),
- ('\u{115dc}', '\u{115dd}'),
- ('𑘰', '𑘾'),
- ('\u{11640}', '\u{11640}'),
- ('\u{116ab}', '\u{116b5}'),
- ('\u{1171d}', '\u{1172a}'),
- ('𑠬', '𑠸'),
- ('\u{11930}', '𑤵'),
- ('𑤷', '𑤸'),
- ('\u{1193b}', '\u{1193c}'),
- ('𑥀', '𑥀'),
- ('𑥂', '𑥂'),
- ('𑧑', '\u{119d7}'),
- ('\u{119da}', '𑧟'),
- ('𑧤', '𑧤'),
- ('\u{11a01}', '\u{11a0a}'),
- ('\u{11a35}', '𑨹'),
- ('\u{11a3b}', '\u{11a3e}'),
- ('\u{11a51}', '\u{11a5b}'),
- ('\u{11a8a}', '𑪗'),
- ('𑰯', '\u{11c36}'),
- ('\u{11c38}', '𑰾'),
- ('\u{11c92}', '\u{11ca7}'),
- ('𑲩', '\u{11cb6}'),
- ('\u{11d31}', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d41}'),
- ('\u{11d43}', '\u{11d43}'),
- ('\u{11d47}', '\u{11d47}'),
- ('𑶊', '𑶎'),
- ('\u{11d90}', '\u{11d91}'),
- ('𑶓', '𑶖'),
- ('\u{11ef3}', '𑻶'),
- ('\u{11f00}', '\u{11f01}'),
- ('𑼃', '𑼃'),
- ('𑼴', '\u{11f3a}'),
- ('𑼾', '\u{11f40}'),
- ('\u{1611e}', '\u{1612e}'),
- ('\u{16f4f}', '\u{16f4f}'),
- ('𖽑', '𖾇'),
- ('\u{16f8f}', '\u{16f92}'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('\u{1bc9e}', '\u{1bc9e}'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('\u{1e947}', '\u{1e947}'),
- ('🄰', '🅉'),
- ('🅐', '🅩'),
- ('🅰', '🆉'),
-];
-
-pub const OTHER_DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[
- ('\u{34f}', '\u{34f}'),
- ('ᅟ', 'ᅠ'),
- ('\u{17b4}', '\u{17b5}'),
- ('\u{2065}', '\u{2065}'),
- ('ㅤ', 'ㅤ'),
- ('ᅠ', 'ᅠ'),
- ('\u{fff0}', '\u{fff8}'),
- ('\u{e0000}', '\u{e0000}'),
- ('\u{e0002}', '\u{e001f}'),
- ('\u{e0080}', '\u{e00ff}'),
- ('\u{e01f0}', '\u{e0fff}'),
-];
-
-pub const OTHER_GRAPHEME_EXTEND: &'static [(char, char)] = &[
- ('\u{9be}', '\u{9be}'),
- ('\u{9d7}', '\u{9d7}'),
- ('\u{b3e}', '\u{b3e}'),
- ('\u{b57}', '\u{b57}'),
- ('\u{bbe}', '\u{bbe}'),
- ('\u{bd7}', '\u{bd7}'),
- ('\u{cc0}', '\u{cc0}'),
- ('\u{cc2}', '\u{cc2}'),
- ('\u{cc7}', '\u{cc8}'),
- ('\u{cca}', '\u{ccb}'),
- ('\u{cd5}', '\u{cd6}'),
- ('\u{d3e}', '\u{d3e}'),
- ('\u{d57}', '\u{d57}'),
- ('\u{dcf}', '\u{dcf}'),
- ('\u{ddf}', '\u{ddf}'),
- ('\u{1715}', '\u{1715}'),
- ('\u{1734}', '\u{1734}'),
- ('\u{1b35}', '\u{1b35}'),
- ('\u{1b3b}', '\u{1b3b}'),
- ('\u{1b3d}', '\u{1b3d}'),
- ('\u{1b43}', '\u{1b44}'),
- ('\u{1baa}', '\u{1baa}'),
- ('\u{1bf2}', '\u{1bf3}'),
- ('\u{200c}', '\u{200c}'),
- ('\u{302e}', '\u{302f}'),
- ('\u{a953}', '\u{a953}'),
- ('\u{a9c0}', '\u{a9c0}'),
- ('\u{ff9e}', '\u{ff9f}'),
- ('\u{111c0}', '\u{111c0}'),
- ('\u{11235}', '\u{11235}'),
- ('\u{1133e}', '\u{1133e}'),
- ('\u{1134d}', '\u{1134d}'),
- ('\u{11357}', '\u{11357}'),
- ('\u{113b8}', '\u{113b8}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '\u{113c9}'),
- ('\u{113cf}', '\u{113cf}'),
- ('\u{114b0}', '\u{114b0}'),
- ('\u{114bd}', '\u{114bd}'),
- ('\u{115af}', '\u{115af}'),
- ('\u{116b6}', '\u{116b6}'),
- ('\u{11930}', '\u{11930}'),
- ('\u{1193d}', '\u{1193d}'),
- ('\u{11f41}', '\u{11f41}'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('\u{1d165}', '\u{1d166}'),
- ('\u{1d16d}', '\u{1d172}'),
- ('\u{e0020}', '\u{e007f}'),
-];
-
-pub const OTHER_ID_CONTINUE: &'static [(char, char)] = &[
- ('·', '·'),
- ('·', '·'),
- ('፩', '፱'),
- ('᧚', '᧚'),
- ('\u{200c}', '\u{200d}'),
- ('・', '・'),
- ('・', '・'),
-];
-
-pub const OTHER_ID_START: &'static [(char, char)] =
- &[('\u{1885}', '\u{1886}'), ('℘', '℘'), ('℮', '℮'), ('゛', '゜')];
-
-pub const OTHER_LOWERCASE: &'static [(char, char)] = &[
- ('ª', 'ª'),
- ('º', 'º'),
- ('ʰ', 'ʸ'),
- ('ˀ', 'ˁ'),
- ('ˠ', 'ˤ'),
- ('\u{345}', '\u{345}'),
- ('ͺ', 'ͺ'),
- ('ჼ', 'ჼ'),
- ('ᴬ', 'ᵪ'),
- ('ᵸ', 'ᵸ'),
- ('ᶛ', 'ᶿ'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('ⅰ', 'ⅿ'),
- ('ⓐ', 'ⓩ'),
- ('ⱼ', 'ⱽ'),
- ('ꚜ', 'ꚝ'),
- ('ꝰ', 'ꝰ'),
- ('ꟲ', 'ꟴ'),
- ('ꟸ', 'ꟹ'),
- ('ꭜ', 'ꭟ'),
- ('ꭩ', 'ꭩ'),
- ('𐞀', '𐞀'),
- ('𐞃', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𞀰', '𞁭'),
-];
-
-pub const OTHER_MATH: &'static [(char, char)] = &[
- ('^', '^'),
- ('ϐ', 'ϒ'),
- ('ϕ', 'ϕ'),
- ('ϰ', 'ϱ'),
- ('ϴ', 'ϵ'),
- ('‖', '‖'),
- ('′', '‴'),
- ('⁀', '⁀'),
- ('\u{2061}', '\u{2064}'),
- ('⁽', '⁾'),
- ('₍', '₎'),
- ('\u{20d0}', '\u{20dc}'),
- ('\u{20e1}', '\u{20e1}'),
- ('\u{20e5}', '\u{20e6}'),
- ('\u{20eb}', '\u{20ef}'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℊ', 'ℓ'),
- ('ℕ', 'ℕ'),
- ('ℙ', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('ℨ', '℩'),
- ('ℬ', 'ℭ'),
- ('ℯ', 'ℱ'),
- ('ℳ', 'ℸ'),
- ('ℼ', 'ℿ'),
- ('ⅅ', 'ⅉ'),
- ('↕', '↙'),
- ('↜', '↟'),
- ('↡', '↢'),
- ('↤', '↥'),
- ('↧', '↧'),
- ('↩', '↭'),
- ('↰', '↱'),
- ('↶', '↷'),
- ('↼', '⇍'),
- ('⇐', '⇑'),
- ('⇓', '⇓'),
- ('⇕', '⇛'),
- ('⇝', '⇝'),
- ('⇤', '⇥'),
- ('⌈', '⌋'),
- ('⎴', '⎵'),
- ('⎷', '⎷'),
- ('⏐', '⏐'),
- ('⏢', '⏢'),
- ('■', '□'),
- ('▮', '▶'),
- ('▼', '◀'),
- ('◆', '◇'),
- ('◊', '○'),
- ('●', '◓'),
- ('◢', '◢'),
- ('◤', '◤'),
- ('◧', '◬'),
- ('★', '☆'),
- ('♀', '♀'),
- ('♂', '♂'),
- ('♠', '♣'),
- ('♭', '♮'),
- ('⟅', '⟆'),
- ('⟦', '⟯'),
- ('⦃', '⦘'),
- ('⧘', '⧛'),
- ('⧼', '⧽'),
- ('﹡', '﹡'),
- ('﹣', '﹣'),
- ('﹨', '﹨'),
- ('\', '\'),
- ('^', '^'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝛀'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛺'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜴'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝮'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞨'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟋'),
- ('𝟎', '𝟿'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
-];
-
-pub const OTHER_UPPERCASE: &'static [(char, char)] =
- &[('Ⅰ', 'Ⅿ'), ('Ⓐ', 'Ⓩ'), ('🄰', '🅉'), ('🅐', '🅩'), ('🅰', '🆉')];
-
-pub const PATTERN_SYNTAX: &'static [(char, char)] = &[
- ('!', '/'),
- (':', '@'),
- ('[', '^'),
- ('`', '`'),
- ('{', '~'),
- ('¡', '§'),
- ('©', '©'),
- ('«', '¬'),
- ('®', '®'),
- ('°', '±'),
- ('¶', '¶'),
- ('»', '»'),
- ('¿', '¿'),
- ('×', '×'),
- ('÷', '÷'),
- ('‐', '‧'),
- ('‰', '‾'),
- ('⁁', '⁓'),
- ('⁕', '⁞'),
- ('←', '\u{245f}'),
- ('─', '❵'),
- ('➔', '⯿'),
- ('⸀', '\u{2e7f}'),
- ('、', '〃'),
- ('〈', '〠'),
- ('〰', '〰'),
- ('﴾', '﴿'),
- ('﹅', '﹆'),
-];
-
-pub const PATTERN_WHITE_SPACE: &'static [(char, char)] = &[
- ('\t', '\r'),
- (' ', ' '),
- ('\u{85}', '\u{85}'),
- ('\u{200e}', '\u{200f}'),
- ('\u{2028}', '\u{2029}'),
-];
-
-pub const PREPENDED_CONCATENATION_MARK: &'static [(char, char)] = &[
- ('\u{600}', '\u{605}'),
- ('\u{6dd}', '\u{6dd}'),
- ('\u{70f}', '\u{70f}'),
- ('\u{890}', '\u{891}'),
- ('\u{8e2}', '\u{8e2}'),
- ('\u{110bd}', '\u{110bd}'),
- ('\u{110cd}', '\u{110cd}'),
-];
-
-pub const QUOTATION_MARK: &'static [(char, char)] = &[
- ('"', '"'),
- ('\'', '\''),
- ('«', '«'),
- ('»', '»'),
- ('‘', '‟'),
- ('‹', '›'),
- ('⹂', '⹂'),
- ('「', '』'),
- ('〝', '〟'),
- ('﹁', '﹄'),
- ('"', '"'),
- (''', '''),
- ('「', '」'),
-];
-
-pub const RADICAL: &'static [(char, char)] =
- &[('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕')];
-
-pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('🇦', '🇿')];
-
-pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[
- ('!', '!'),
- ('.', '.'),
- ('?', '?'),
- ('։', '։'),
- ('؝', '؟'),
- ('۔', '۔'),
- ('܀', '܂'),
- ('߹', '߹'),
- ('࠷', '࠷'),
- ('࠹', '࠹'),
- ('࠽', '࠾'),
- ('।', '॥'),
- ('၊', '။'),
- ('።', '።'),
- ('፧', '፨'),
- ('᙮', '᙮'),
- ('᜵', '᜶'),
- ('។', '៕'),
- ('᠃', '᠃'),
- ('᠉', '᠉'),
- ('᥄', '᥅'),
- ('᪨', '᪫'),
- ('᭎', '᭏'),
- ('᭚', '᭛'),
- ('᭞', '᭟'),
- ('᭽', '᭿'),
- ('᰻', '᰼'),
- ('᱾', '᱿'),
- ('․', '․'),
- ('‼', '‽'),
- ('⁇', '⁉'),
- ('⳹', '⳻'),
- ('⸮', '⸮'),
- ('⸼', '⸼'),
- ('⹓', '⹔'),
- ('。', '。'),
- ('꓿', '꓿'),
- ('꘎', '꘏'),
- ('꛳', '꛳'),
- ('꛷', '꛷'),
- ('꡶', '꡷'),
- ('꣎', '꣏'),
- ('꤯', '꤯'),
- ('꧈', '꧉'),
- ('꩝', '꩟'),
- ('꫰', '꫱'),
- ('꯫', '꯫'),
- ('︒', '︒'),
- ('︕', '︖'),
- ('﹒', '﹒'),
- ('﹖', '﹗'),
- ('!', '!'),
- ('.', '.'),
- ('?', '?'),
- ('。', '。'),
- ('𐩖', '𐩗'),
- ('𐽕', '𐽙'),
- ('𐾆', '𐾉'),
- ('𑁇', '𑁈'),
- ('𑂾', '𑃁'),
- ('𑅁', '𑅃'),
- ('𑇅', '𑇆'),
- ('𑇍', '𑇍'),
- ('𑇞', '𑇟'),
- ('𑈸', '𑈹'),
- ('𑈻', '𑈼'),
- ('𑊩', '𑊩'),
- ('𑏔', '𑏕'),
- ('𑑋', '𑑌'),
- ('𑗂', '𑗃'),
- ('𑗉', '𑗗'),
- ('𑙁', '𑙂'),
- ('𑜼', '𑜾'),
- ('𑥄', '𑥄'),
- ('𑥆', '𑥆'),
- ('𑩂', '𑩃'),
- ('𑪛', '𑪜'),
- ('𑱁', '𑱂'),
- ('𑻷', '𑻸'),
- ('𑽃', '𑽄'),
- ('𖩮', '𖩯'),
- ('𖫵', '𖫵'),
- ('𖬷', '𖬸'),
- ('𖭄', '𖭄'),
- ('𖵮', '𖵯'),
- ('𖺘', '𖺘'),
- ('𛲟', '𛲟'),
- ('𝪈', '𝪈'),
-];
-
-pub const SOFT_DOTTED: &'static [(char, char)] = &[
- ('i', 'j'),
- ('į', 'į'),
- ('ɉ', 'ɉ'),
- ('ɨ', 'ɨ'),
- ('ʝ', 'ʝ'),
- ('ʲ', 'ʲ'),
- ('ϳ', 'ϳ'),
- ('і', 'і'),
- ('ј', 'ј'),
- ('ᵢ', 'ᵢ'),
- ('ᶖ', 'ᶖ'),
- ('ᶤ', 'ᶤ'),
- ('ᶨ', 'ᶨ'),
- ('ḭ', 'ḭ'),
- ('ị', 'ị'),
- ('ⁱ', 'ⁱ'),
- ('ⅈ', 'ⅉ'),
- ('ⱼ', 'ⱼ'),
- ('𝐢', '𝐣'),
- ('𝑖', '𝑗'),
- ('𝒊', '𝒋'),
- ('𝒾', '𝒿'),
- ('𝓲', '𝓳'),
- ('𝔦', '𝔧'),
- ('𝕚', '𝕛'),
- ('𝖎', '𝖏'),
- ('𝗂', '𝗃'),
- ('𝗶', '𝗷'),
- ('𝘪', '𝘫'),
- ('𝙞', '𝙟'),
- ('𝚒', '𝚓'),
- ('𝼚', '𝼚'),
- ('𞁌', '𞁍'),
- ('𞁨', '𞁨'),
-];
-
-pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[
- ('!', '!'),
- (',', ','),
- ('.', '.'),
- (':', ';'),
- ('?', '?'),
- (';', ';'),
- ('·', '·'),
- ('։', '։'),
- ('׃', '׃'),
- ('،', '،'),
- ('؛', '؛'),
- ('؝', '؟'),
- ('۔', '۔'),
- ('܀', '܊'),
- ('܌', '܌'),
- ('߸', '߹'),
- ('࠰', '࠵'),
- ('࠷', '࠾'),
- ('࡞', '࡞'),
- ('।', '॥'),
- ('๚', '๛'),
- ('༈', '༈'),
- ('།', '༒'),
- ('၊', '။'),
- ('፡', '፨'),
- ('᙮', '᙮'),
- ('᛫', '᛭'),
- ('᜵', '᜶'),
- ('។', '៖'),
- ('៚', '៚'),
- ('᠂', '᠅'),
- ('᠈', '᠉'),
- ('᥄', '᥅'),
- ('᪨', '᪫'),
- ('᭎', '᭏'),
- ('᭚', '᭛'),
- ('᭝', '᭟'),
- ('᭽', '᭿'),
- ('᰻', '᰿'),
- ('᱾', '᱿'),
- ('․', '․'),
- ('‼', '‽'),
- ('⁇', '⁉'),
- ('⳹', '⳻'),
- ('⸮', '⸮'),
- ('⸼', '⸼'),
- ('⹁', '⹁'),
- ('⹌', '⹌'),
- ('⹎', '⹏'),
- ('⹓', '⹔'),
- ('、', '。'),
- ('꓾', '꓿'),
- ('꘍', '꘏'),
- ('꛳', '꛷'),
- ('꡶', '꡷'),
- ('꣎', '꣏'),
- ('꤯', '꤯'),
- ('꧇', '꧉'),
- ('꩝', '꩟'),
- ('꫟', '꫟'),
- ('꫰', '꫱'),
- ('꯫', '꯫'),
- ('︒', '︒'),
- ('︕', '︖'),
- ('﹐', '﹒'),
- ('﹔', '﹗'),
- ('!', '!'),
- (',', ','),
- ('.', '.'),
- (':', ';'),
- ('?', '?'),
- ('。', '。'),
- ('、', '、'),
- ('𐎟', '𐎟'),
- ('𐏐', '𐏐'),
- ('𐡗', '𐡗'),
- ('𐤟', '𐤟'),
- ('𐩖', '𐩗'),
- ('𐫰', '𐫵'),
- ('𐬺', '𐬿'),
- ('𐮙', '𐮜'),
- ('𐽕', '𐽙'),
- ('𐾆', '𐾉'),
- ('𑁇', '𑁍'),
- ('𑂾', '𑃁'),
- ('𑅁', '𑅃'),
- ('𑇅', '𑇆'),
- ('𑇍', '𑇍'),
- ('𑇞', '𑇟'),
- ('𑈸', '𑈼'),
- ('𑊩', '𑊩'),
- ('𑏔', '𑏕'),
- ('𑑋', '𑑍'),
- ('𑑚', '𑑛'),
- ('𑗂', '𑗅'),
- ('𑗉', '𑗗'),
- ('𑙁', '𑙂'),
- ('𑜼', '𑜾'),
- ('𑥄', '𑥄'),
- ('𑥆', '𑥆'),
- ('𑩂', '𑩃'),
- ('𑪛', '𑪜'),
- ('𑪡', '𑪢'),
- ('𑱁', '𑱃'),
- ('𑱱', '𑱱'),
- ('𑻷', '𑻸'),
- ('𑽃', '𑽄'),
- ('𒑰', '𒑴'),
- ('𖩮', '𖩯'),
- ('𖫵', '𖫵'),
- ('𖬷', '𖬹'),
- ('𖭄', '𖭄'),
- ('𖵮', '𖵯'),
- ('𖺗', '𖺘'),
- ('𛲟', '𛲟'),
- ('𝪇', '𝪊'),
-];
-
-pub const UNIFIED_IDEOGRAPH: &'static [(char, char)] = &[
- ('㐀', '䶿'),
- ('一', '鿿'),
- ('﨎', '﨏'),
- ('﨑', '﨑'),
- ('﨓', '﨔'),
- ('﨟', '﨟'),
- ('﨡', '﨡'),
- ('﨣', '﨤'),
- ('﨧', '﨩'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
-];
-
-pub const UPPERCASE: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('À', 'Ö'),
- ('Ø', 'Þ'),
- ('Ā', 'Ā'),
- ('Ă', 'Ă'),
- ('Ą', 'Ą'),
- ('Ć', 'Ć'),
- ('Ĉ', 'Ĉ'),
- ('Ċ', 'Ċ'),
- ('Č', 'Č'),
- ('Ď', 'Ď'),
- ('Đ', 'Đ'),
- ('Ē', 'Ē'),
- ('Ĕ', 'Ĕ'),
- ('Ė', 'Ė'),
- ('Ę', 'Ę'),
- ('Ě', 'Ě'),
- ('Ĝ', 'Ĝ'),
- ('Ğ', 'Ğ'),
- ('Ġ', 'Ġ'),
- ('Ģ', 'Ģ'),
- ('Ĥ', 'Ĥ'),
- ('Ħ', 'Ħ'),
- ('Ĩ', 'Ĩ'),
- ('Ī', 'Ī'),
- ('Ĭ', 'Ĭ'),
- ('Į', 'Į'),
- ('İ', 'İ'),
- ('IJ', 'IJ'),
- ('Ĵ', 'Ĵ'),
- ('Ķ', 'Ķ'),
- ('Ĺ', 'Ĺ'),
- ('Ļ', 'Ļ'),
- ('Ľ', 'Ľ'),
- ('Ŀ', 'Ŀ'),
- ('Ł', 'Ł'),
- ('Ń', 'Ń'),
- ('Ņ', 'Ņ'),
- ('Ň', 'Ň'),
- ('Ŋ', 'Ŋ'),
- ('Ō', 'Ō'),
- ('Ŏ', 'Ŏ'),
- ('Ő', 'Ő'),
- ('Œ', 'Œ'),
- ('Ŕ', 'Ŕ'),
- ('Ŗ', 'Ŗ'),
- ('Ř', 'Ř'),
- ('Ś', 'Ś'),
- ('Ŝ', 'Ŝ'),
- ('Ş', 'Ş'),
- ('Š', 'Š'),
- ('Ţ', 'Ţ'),
- ('Ť', 'Ť'),
- ('Ŧ', 'Ŧ'),
- ('Ũ', 'Ũ'),
- ('Ū', 'Ū'),
- ('Ŭ', 'Ŭ'),
- ('Ů', 'Ů'),
- ('Ű', 'Ű'),
- ('Ų', 'Ų'),
- ('Ŵ', 'Ŵ'),
- ('Ŷ', 'Ŷ'),
- ('Ÿ', 'Ź'),
- ('Ż', 'Ż'),
- ('Ž', 'Ž'),
- ('Ɓ', 'Ƃ'),
- ('Ƅ', 'Ƅ'),
- ('Ɔ', 'Ƈ'),
- ('Ɖ', 'Ƌ'),
- ('Ǝ', 'Ƒ'),
- ('Ɠ', 'Ɣ'),
- ('Ɩ', 'Ƙ'),
- ('Ɯ', 'Ɲ'),
- ('Ɵ', 'Ơ'),
- ('Ƣ', 'Ƣ'),
- ('Ƥ', 'Ƥ'),
- ('Ʀ', 'Ƨ'),
- ('Ʃ', 'Ʃ'),
- ('Ƭ', 'Ƭ'),
- ('Ʈ', 'Ư'),
- ('Ʊ', 'Ƴ'),
- ('Ƶ', 'Ƶ'),
- ('Ʒ', 'Ƹ'),
- ('Ƽ', 'Ƽ'),
- ('DŽ', 'DŽ'),
- ('LJ', 'LJ'),
- ('NJ', 'NJ'),
- ('Ǎ', 'Ǎ'),
- ('Ǐ', 'Ǐ'),
- ('Ǒ', 'Ǒ'),
- ('Ǔ', 'Ǔ'),
- ('Ǖ', 'Ǖ'),
- ('Ǘ', 'Ǘ'),
- ('Ǚ', 'Ǚ'),
- ('Ǜ', 'Ǜ'),
- ('Ǟ', 'Ǟ'),
- ('Ǡ', 'Ǡ'),
- ('Ǣ', 'Ǣ'),
- ('Ǥ', 'Ǥ'),
- ('Ǧ', 'Ǧ'),
- ('Ǩ', 'Ǩ'),
- ('Ǫ', 'Ǫ'),
- ('Ǭ', 'Ǭ'),
- ('Ǯ', 'Ǯ'),
- ('DZ', 'DZ'),
- ('Ǵ', 'Ǵ'),
- ('Ƕ', 'Ǹ'),
- ('Ǻ', 'Ǻ'),
- ('Ǽ', 'Ǽ'),
- ('Ǿ', 'Ǿ'),
- ('Ȁ', 'Ȁ'),
- ('Ȃ', 'Ȃ'),
- ('Ȅ', 'Ȅ'),
- ('Ȇ', 'Ȇ'),
- ('Ȉ', 'Ȉ'),
- ('Ȋ', 'Ȋ'),
- ('Ȍ', 'Ȍ'),
- ('Ȏ', 'Ȏ'),
- ('Ȑ', 'Ȑ'),
- ('Ȓ', 'Ȓ'),
- ('Ȕ', 'Ȕ'),
- ('Ȗ', 'Ȗ'),
- ('Ș', 'Ș'),
- ('Ț', 'Ț'),
- ('Ȝ', 'Ȝ'),
- ('Ȟ', 'Ȟ'),
- ('Ƞ', 'Ƞ'),
- ('Ȣ', 'Ȣ'),
- ('Ȥ', 'Ȥ'),
- ('Ȧ', 'Ȧ'),
- ('Ȩ', 'Ȩ'),
- ('Ȫ', 'Ȫ'),
- ('Ȭ', 'Ȭ'),
- ('Ȯ', 'Ȯ'),
- ('Ȱ', 'Ȱ'),
- ('Ȳ', 'Ȳ'),
- ('Ⱥ', 'Ȼ'),
- ('Ƚ', 'Ⱦ'),
- ('Ɂ', 'Ɂ'),
- ('Ƀ', 'Ɇ'),
- ('Ɉ', 'Ɉ'),
- ('Ɋ', 'Ɋ'),
- ('Ɍ', 'Ɍ'),
- ('Ɏ', 'Ɏ'),
- ('Ͱ', 'Ͱ'),
- ('Ͳ', 'Ͳ'),
- ('Ͷ', 'Ͷ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ώ'),
- ('Α', 'Ρ'),
- ('Σ', 'Ϋ'),
- ('Ϗ', 'Ϗ'),
- ('ϒ', 'ϔ'),
- ('Ϙ', 'Ϙ'),
- ('Ϛ', 'Ϛ'),
- ('Ϝ', 'Ϝ'),
- ('Ϟ', 'Ϟ'),
- ('Ϡ', 'Ϡ'),
- ('Ϣ', 'Ϣ'),
- ('Ϥ', 'Ϥ'),
- ('Ϧ', 'Ϧ'),
- ('Ϩ', 'Ϩ'),
- ('Ϫ', 'Ϫ'),
- ('Ϭ', 'Ϭ'),
- ('Ϯ', 'Ϯ'),
- ('ϴ', 'ϴ'),
- ('Ϸ', 'Ϸ'),
- ('Ϲ', 'Ϻ'),
- ('Ͻ', 'Я'),
- ('Ѡ', 'Ѡ'),
- ('Ѣ', 'Ѣ'),
- ('Ѥ', 'Ѥ'),
- ('Ѧ', 'Ѧ'),
- ('Ѩ', 'Ѩ'),
- ('Ѫ', 'Ѫ'),
- ('Ѭ', 'Ѭ'),
- ('Ѯ', 'Ѯ'),
- ('Ѱ', 'Ѱ'),
- ('Ѳ', 'Ѳ'),
- ('Ѵ', 'Ѵ'),
- ('Ѷ', 'Ѷ'),
- ('Ѹ', 'Ѹ'),
- ('Ѻ', 'Ѻ'),
- ('Ѽ', 'Ѽ'),
- ('Ѿ', 'Ѿ'),
- ('Ҁ', 'Ҁ'),
- ('Ҋ', 'Ҋ'),
- ('Ҍ', 'Ҍ'),
- ('Ҏ', 'Ҏ'),
- ('Ґ', 'Ґ'),
- ('Ғ', 'Ғ'),
- ('Ҕ', 'Ҕ'),
- ('Җ', 'Җ'),
- ('Ҙ', 'Ҙ'),
- ('Қ', 'Қ'),
- ('Ҝ', 'Ҝ'),
- ('Ҟ', 'Ҟ'),
- ('Ҡ', 'Ҡ'),
- ('Ң', 'Ң'),
- ('Ҥ', 'Ҥ'),
- ('Ҧ', 'Ҧ'),
- ('Ҩ', 'Ҩ'),
- ('Ҫ', 'Ҫ'),
- ('Ҭ', 'Ҭ'),
- ('Ү', 'Ү'),
- ('Ұ', 'Ұ'),
- ('Ҳ', 'Ҳ'),
- ('Ҵ', 'Ҵ'),
- ('Ҷ', 'Ҷ'),
- ('Ҹ', 'Ҹ'),
- ('Һ', 'Һ'),
- ('Ҽ', 'Ҽ'),
- ('Ҿ', 'Ҿ'),
- ('Ӏ', 'Ӂ'),
- ('Ӄ', 'Ӄ'),
- ('Ӆ', 'Ӆ'),
- ('Ӈ', 'Ӈ'),
- ('Ӊ', 'Ӊ'),
- ('Ӌ', 'Ӌ'),
- ('Ӎ', 'Ӎ'),
- ('Ӑ', 'Ӑ'),
- ('Ӓ', 'Ӓ'),
- ('Ӕ', 'Ӕ'),
- ('Ӗ', 'Ӗ'),
- ('Ә', 'Ә'),
- ('Ӛ', 'Ӛ'),
- ('Ӝ', 'Ӝ'),
- ('Ӟ', 'Ӟ'),
- ('Ӡ', 'Ӡ'),
- ('Ӣ', 'Ӣ'),
- ('Ӥ', 'Ӥ'),
- ('Ӧ', 'Ӧ'),
- ('Ө', 'Ө'),
- ('Ӫ', 'Ӫ'),
- ('Ӭ', 'Ӭ'),
- ('Ӯ', 'Ӯ'),
- ('Ӱ', 'Ӱ'),
- ('Ӳ', 'Ӳ'),
- ('Ӵ', 'Ӵ'),
- ('Ӷ', 'Ӷ'),
- ('Ӹ', 'Ӹ'),
- ('Ӻ', 'Ӻ'),
- ('Ӽ', 'Ӽ'),
- ('Ӿ', 'Ӿ'),
- ('Ԁ', 'Ԁ'),
- ('Ԃ', 'Ԃ'),
- ('Ԅ', 'Ԅ'),
- ('Ԇ', 'Ԇ'),
- ('Ԉ', 'Ԉ'),
- ('Ԋ', 'Ԋ'),
- ('Ԍ', 'Ԍ'),
- ('Ԏ', 'Ԏ'),
- ('Ԑ', 'Ԑ'),
- ('Ԓ', 'Ԓ'),
- ('Ԕ', 'Ԕ'),
- ('Ԗ', 'Ԗ'),
- ('Ԙ', 'Ԙ'),
- ('Ԛ', 'Ԛ'),
- ('Ԝ', 'Ԝ'),
- ('Ԟ', 'Ԟ'),
- ('Ԡ', 'Ԡ'),
- ('Ԣ', 'Ԣ'),
- ('Ԥ', 'Ԥ'),
- ('Ԧ', 'Ԧ'),
- ('Ԩ', 'Ԩ'),
- ('Ԫ', 'Ԫ'),
- ('Ԭ', 'Ԭ'),
- ('Ԯ', 'Ԯ'),
- ('Ա', 'Ֆ'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('Ꭰ', 'Ᏽ'),
- ('Ᲊ', 'Ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('Ḁ', 'Ḁ'),
- ('Ḃ', 'Ḃ'),
- ('Ḅ', 'Ḅ'),
- ('Ḇ', 'Ḇ'),
- ('Ḉ', 'Ḉ'),
- ('Ḋ', 'Ḋ'),
- ('Ḍ', 'Ḍ'),
- ('Ḏ', 'Ḏ'),
- ('Ḑ', 'Ḑ'),
- ('Ḓ', 'Ḓ'),
- ('Ḕ', 'Ḕ'),
- ('Ḗ', 'Ḗ'),
- ('Ḙ', 'Ḙ'),
- ('Ḛ', 'Ḛ'),
- ('Ḝ', 'Ḝ'),
- ('Ḟ', 'Ḟ'),
- ('Ḡ', 'Ḡ'),
- ('Ḣ', 'Ḣ'),
- ('Ḥ', 'Ḥ'),
- ('Ḧ', 'Ḧ'),
- ('Ḩ', 'Ḩ'),
- ('Ḫ', 'Ḫ'),
- ('Ḭ', 'Ḭ'),
- ('Ḯ', 'Ḯ'),
- ('Ḱ', 'Ḱ'),
- ('Ḳ', 'Ḳ'),
- ('Ḵ', 'Ḵ'),
- ('Ḷ', 'Ḷ'),
- ('Ḹ', 'Ḹ'),
- ('Ḻ', 'Ḻ'),
- ('Ḽ', 'Ḽ'),
- ('Ḿ', 'Ḿ'),
- ('Ṁ', 'Ṁ'),
- ('Ṃ', 'Ṃ'),
- ('Ṅ', 'Ṅ'),
- ('Ṇ', 'Ṇ'),
- ('Ṉ', 'Ṉ'),
- ('Ṋ', 'Ṋ'),
- ('Ṍ', 'Ṍ'),
- ('Ṏ', 'Ṏ'),
- ('Ṑ', 'Ṑ'),
- ('Ṓ', 'Ṓ'),
- ('Ṕ', 'Ṕ'),
- ('Ṗ', 'Ṗ'),
- ('Ṙ', 'Ṙ'),
- ('Ṛ', 'Ṛ'),
- ('Ṝ', 'Ṝ'),
- ('Ṟ', 'Ṟ'),
- ('Ṡ', 'Ṡ'),
- ('Ṣ', 'Ṣ'),
- ('Ṥ', 'Ṥ'),
- ('Ṧ', 'Ṧ'),
- ('Ṩ', 'Ṩ'),
- ('Ṫ', 'Ṫ'),
- ('Ṭ', 'Ṭ'),
- ('Ṯ', 'Ṯ'),
- ('Ṱ', 'Ṱ'),
- ('Ṳ', 'Ṳ'),
- ('Ṵ', 'Ṵ'),
- ('Ṷ', 'Ṷ'),
- ('Ṹ', 'Ṹ'),
- ('Ṻ', 'Ṻ'),
- ('Ṽ', 'Ṽ'),
- ('Ṿ', 'Ṿ'),
- ('Ẁ', 'Ẁ'),
- ('Ẃ', 'Ẃ'),
- ('Ẅ', 'Ẅ'),
- ('Ẇ', 'Ẇ'),
- ('Ẉ', 'Ẉ'),
- ('Ẋ', 'Ẋ'),
- ('Ẍ', 'Ẍ'),
- ('Ẏ', 'Ẏ'),
- ('Ẑ', 'Ẑ'),
- ('Ẓ', 'Ẓ'),
- ('Ẕ', 'Ẕ'),
- ('ẞ', 'ẞ'),
- ('Ạ', 'Ạ'),
- ('Ả', 'Ả'),
- ('Ấ', 'Ấ'),
- ('Ầ', 'Ầ'),
- ('Ẩ', 'Ẩ'),
- ('Ẫ', 'Ẫ'),
- ('Ậ', 'Ậ'),
- ('Ắ', 'Ắ'),
- ('Ằ', 'Ằ'),
- ('Ẳ', 'Ẳ'),
- ('Ẵ', 'Ẵ'),
- ('Ặ', 'Ặ'),
- ('Ẹ', 'Ẹ'),
- ('Ẻ', 'Ẻ'),
- ('Ẽ', 'Ẽ'),
- ('Ế', 'Ế'),
- ('Ề', 'Ề'),
- ('Ể', 'Ể'),
- ('Ễ', 'Ễ'),
- ('Ệ', 'Ệ'),
- ('Ỉ', 'Ỉ'),
- ('Ị', 'Ị'),
- ('Ọ', 'Ọ'),
- ('Ỏ', 'Ỏ'),
- ('Ố', 'Ố'),
- ('Ồ', 'Ồ'),
- ('Ổ', 'Ổ'),
- ('Ỗ', 'Ỗ'),
- ('Ộ', 'Ộ'),
- ('Ớ', 'Ớ'),
- ('Ờ', 'Ờ'),
- ('Ở', 'Ở'),
- ('Ỡ', 'Ỡ'),
- ('Ợ', 'Ợ'),
- ('Ụ', 'Ụ'),
- ('Ủ', 'Ủ'),
- ('Ứ', 'Ứ'),
- ('Ừ', 'Ừ'),
- ('Ử', 'Ử'),
- ('Ữ', 'Ữ'),
- ('Ự', 'Ự'),
- ('Ỳ', 'Ỳ'),
- ('Ỵ', 'Ỵ'),
- ('Ỷ', 'Ỷ'),
- ('Ỹ', 'Ỹ'),
- ('Ỻ', 'Ỻ'),
- ('Ỽ', 'Ỽ'),
- ('Ỿ', 'Ỿ'),
- ('Ἀ', 'Ἇ'),
- ('Ἐ', 'Ἕ'),
- ('Ἠ', 'Ἧ'),
- ('Ἰ', 'Ἷ'),
- ('Ὀ', 'Ὅ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'Ὗ'),
- ('Ὠ', 'Ὧ'),
- ('Ᾰ', 'Ά'),
- ('Ὲ', 'Ή'),
- ('Ῐ', 'Ί'),
- ('Ῠ', 'Ῥ'),
- ('Ὸ', 'Ώ'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℋ', 'ℍ'),
- ('ℐ', 'ℒ'),
- ('ℕ', 'ℕ'),
- ('ℙ', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('Ω', 'Ω'),
- ('ℨ', 'ℨ'),
- ('K', 'ℭ'),
- ('ℰ', 'ℳ'),
- ('ℾ', 'ℿ'),
- ('ⅅ', 'ⅅ'),
- ('Ⅰ', 'Ⅿ'),
- ('Ↄ', 'Ↄ'),
- ('Ⓐ', 'Ⓩ'),
- ('Ⰰ', 'Ⱟ'),
- ('Ⱡ', 'Ⱡ'),
- ('Ɫ', 'Ɽ'),
- ('Ⱨ', 'Ⱨ'),
- ('Ⱪ', 'Ⱪ'),
- ('Ⱬ', 'Ⱬ'),
- ('Ɑ', 'Ɒ'),
- ('Ⱳ', 'Ⱳ'),
- ('Ⱶ', 'Ⱶ'),
- ('Ȿ', 'Ⲁ'),
- ('Ⲃ', 'Ⲃ'),
- ('Ⲅ', 'Ⲅ'),
- ('Ⲇ', 'Ⲇ'),
- ('Ⲉ', 'Ⲉ'),
- ('Ⲋ', 'Ⲋ'),
- ('Ⲍ', 'Ⲍ'),
- ('Ⲏ', 'Ⲏ'),
- ('Ⲑ', 'Ⲑ'),
- ('Ⲓ', 'Ⲓ'),
- ('Ⲕ', 'Ⲕ'),
- ('Ⲗ', 'Ⲗ'),
- ('Ⲙ', 'Ⲙ'),
- ('Ⲛ', 'Ⲛ'),
- ('Ⲝ', 'Ⲝ'),
- ('Ⲟ', 'Ⲟ'),
- ('Ⲡ', 'Ⲡ'),
- ('Ⲣ', 'Ⲣ'),
- ('Ⲥ', 'Ⲥ'),
- ('Ⲧ', 'Ⲧ'),
- ('Ⲩ', 'Ⲩ'),
- ('Ⲫ', 'Ⲫ'),
- ('Ⲭ', 'Ⲭ'),
- ('Ⲯ', 'Ⲯ'),
- ('Ⲱ', 'Ⲱ'),
- ('Ⲳ', 'Ⲳ'),
- ('Ⲵ', 'Ⲵ'),
- ('Ⲷ', 'Ⲷ'),
- ('Ⲹ', 'Ⲹ'),
- ('Ⲻ', 'Ⲻ'),
- ('Ⲽ', 'Ⲽ'),
- ('Ⲿ', 'Ⲿ'),
- ('Ⳁ', 'Ⳁ'),
- ('Ⳃ', 'Ⳃ'),
- ('Ⳅ', 'Ⳅ'),
- ('Ⳇ', 'Ⳇ'),
- ('Ⳉ', 'Ⳉ'),
- ('Ⳋ', 'Ⳋ'),
- ('Ⳍ', 'Ⳍ'),
- ('Ⳏ', 'Ⳏ'),
- ('Ⳑ', 'Ⳑ'),
- ('Ⳓ', 'Ⳓ'),
- ('Ⳕ', 'Ⳕ'),
- ('Ⳗ', 'Ⳗ'),
- ('Ⳙ', 'Ⳙ'),
- ('Ⳛ', 'Ⳛ'),
- ('Ⳝ', 'Ⳝ'),
- ('Ⳟ', 'Ⳟ'),
- ('Ⳡ', 'Ⳡ'),
- ('Ⳣ', 'Ⳣ'),
- ('Ⳬ', 'Ⳬ'),
- ('Ⳮ', 'Ⳮ'),
- ('Ⳳ', 'Ⳳ'),
- ('Ꙁ', 'Ꙁ'),
- ('Ꙃ', 'Ꙃ'),
- ('Ꙅ', 'Ꙅ'),
- ('Ꙇ', 'Ꙇ'),
- ('Ꙉ', 'Ꙉ'),
- ('Ꙋ', 'Ꙋ'),
- ('Ꙍ', 'Ꙍ'),
- ('Ꙏ', 'Ꙏ'),
- ('Ꙑ', 'Ꙑ'),
- ('Ꙓ', 'Ꙓ'),
- ('Ꙕ', 'Ꙕ'),
- ('Ꙗ', 'Ꙗ'),
- ('Ꙙ', 'Ꙙ'),
- ('Ꙛ', 'Ꙛ'),
- ('Ꙝ', 'Ꙝ'),
- ('Ꙟ', 'Ꙟ'),
- ('Ꙡ', 'Ꙡ'),
- ('Ꙣ', 'Ꙣ'),
- ('Ꙥ', 'Ꙥ'),
- ('Ꙧ', 'Ꙧ'),
- ('Ꙩ', 'Ꙩ'),
- ('Ꙫ', 'Ꙫ'),
- ('Ꙭ', 'Ꙭ'),
- ('Ꚁ', 'Ꚁ'),
- ('Ꚃ', 'Ꚃ'),
- ('Ꚅ', 'Ꚅ'),
- ('Ꚇ', 'Ꚇ'),
- ('Ꚉ', 'Ꚉ'),
- ('Ꚋ', 'Ꚋ'),
- ('Ꚍ', 'Ꚍ'),
- ('Ꚏ', 'Ꚏ'),
- ('Ꚑ', 'Ꚑ'),
- ('Ꚓ', 'Ꚓ'),
- ('Ꚕ', 'Ꚕ'),
- ('Ꚗ', 'Ꚗ'),
- ('Ꚙ', 'Ꚙ'),
- ('Ꚛ', 'Ꚛ'),
- ('Ꜣ', 'Ꜣ'),
- ('Ꜥ', 'Ꜥ'),
- ('Ꜧ', 'Ꜧ'),
- ('Ꜩ', 'Ꜩ'),
- ('Ꜫ', 'Ꜫ'),
- ('Ꜭ', 'Ꜭ'),
- ('Ꜯ', 'Ꜯ'),
- ('Ꜳ', 'Ꜳ'),
- ('Ꜵ', 'Ꜵ'),
- ('Ꜷ', 'Ꜷ'),
- ('Ꜹ', 'Ꜹ'),
- ('Ꜻ', 'Ꜻ'),
- ('Ꜽ', 'Ꜽ'),
- ('Ꜿ', 'Ꜿ'),
- ('Ꝁ', 'Ꝁ'),
- ('Ꝃ', 'Ꝃ'),
- ('Ꝅ', 'Ꝅ'),
- ('Ꝇ', 'Ꝇ'),
- ('Ꝉ', 'Ꝉ'),
- ('Ꝋ', 'Ꝋ'),
- ('Ꝍ', 'Ꝍ'),
- ('Ꝏ', 'Ꝏ'),
- ('Ꝑ', 'Ꝑ'),
- ('Ꝓ', 'Ꝓ'),
- ('Ꝕ', 'Ꝕ'),
- ('Ꝗ', 'Ꝗ'),
- ('Ꝙ', 'Ꝙ'),
- ('Ꝛ', 'Ꝛ'),
- ('Ꝝ', 'Ꝝ'),
- ('Ꝟ', 'Ꝟ'),
- ('Ꝡ', 'Ꝡ'),
- ('Ꝣ', 'Ꝣ'),
- ('Ꝥ', 'Ꝥ'),
- ('Ꝧ', 'Ꝧ'),
- ('Ꝩ', 'Ꝩ'),
- ('Ꝫ', 'Ꝫ'),
- ('Ꝭ', 'Ꝭ'),
- ('Ꝯ', 'Ꝯ'),
- ('Ꝺ', 'Ꝺ'),
- ('Ꝼ', 'Ꝼ'),
- ('Ᵹ', 'Ꝿ'),
- ('Ꞁ', 'Ꞁ'),
- ('Ꞃ', 'Ꞃ'),
- ('Ꞅ', 'Ꞅ'),
- ('Ꞇ', 'Ꞇ'),
- ('Ꞌ', 'Ꞌ'),
- ('Ɥ', 'Ɥ'),
- ('Ꞑ', 'Ꞑ'),
- ('Ꞓ', 'Ꞓ'),
- ('Ꞗ', 'Ꞗ'),
- ('Ꞙ', 'Ꞙ'),
- ('Ꞛ', 'Ꞛ'),
- ('Ꞝ', 'Ꞝ'),
- ('Ꞟ', 'Ꞟ'),
- ('Ꞡ', 'Ꞡ'),
- ('Ꞣ', 'Ꞣ'),
- ('Ꞥ', 'Ꞥ'),
- ('Ꞧ', 'Ꞧ'),
- ('Ꞩ', 'Ꞩ'),
- ('Ɦ', 'Ɪ'),
- ('Ʞ', 'Ꞵ'),
- ('Ꞷ', 'Ꞷ'),
- ('Ꞹ', 'Ꞹ'),
- ('Ꞻ', 'Ꞻ'),
- ('Ꞽ', 'Ꞽ'),
- ('Ꞿ', 'Ꞿ'),
- ('Ꟁ', 'Ꟁ'),
- ('Ꟃ', 'Ꟃ'),
- ('Ꞔ', 'Ꟈ'),
- ('Ꟊ', 'Ꟊ'),
- ('Ɤ', 'Ꟍ'),
- ('Ꟑ', 'Ꟑ'),
- ('Ꟗ', 'Ꟗ'),
- ('Ꟙ', 'Ꟙ'),
- ('Ꟛ', 'Ꟛ'),
- ('Ƛ', 'Ƛ'),
- ('Ꟶ', 'Ꟶ'),
- ('A', 'Z'),
- ('𐐀', '𐐧'),
- ('𐒰', '𐓓'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐲀', '𐲲'),
- ('𐵐', '𐵥'),
- ('𑢠', '𑢿'),
- ('𖹀', '𖹟'),
- ('𝐀', '𝐙'),
- ('𝐴', '𝑍'),
- ('𝑨', '𝒁'),
- ('𝒜', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒵'),
- ('𝓐', '𝓩'),
- ('𝔄', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔸', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕬', '𝖅'),
- ('𝖠', '𝖹'),
- ('𝗔', '𝗭'),
- ('𝘈', '𝘡'),
- ('𝘼', '𝙕'),
- ('𝙰', '𝚉'),
- ('𝚨', '𝛀'),
- ('𝛢', '𝛺'),
- ('𝜜', '𝜴'),
- ('𝝖', '𝝮'),
- ('𝞐', '𝞨'),
- ('𝟊', '𝟊'),
- ('𞤀', '𞤡'),
- ('🄰', '🅉'),
- ('🅐', '🅩'),
- ('🅰', '🆉'),
-];
-
-pub const VARIATION_SELECTOR: &'static [(char, char)] = &[
- ('\u{180b}', '\u{180d}'),
- ('\u{180f}', '\u{180f}'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const WHITE_SPACE: &'static [(char, char)] = &[
- ('\t', '\r'),
- (' ', ' '),
- ('\u{85}', '\u{85}'),
- ('\u{a0}', '\u{a0}'),
- ('\u{1680}', '\u{1680}'),
- ('\u{2000}', '\u{200a}'),
- ('\u{2028}', '\u{2029}'),
- ('\u{202f}', '\u{202f}'),
- ('\u{205f}', '\u{205f}'),
- ('\u{3000}', '\u{3000}'),
-];
-
-pub const XID_CONTINUE: &'static [(char, char)] = &[
- ('0', '9'),
- ('A', 'Z'),
- ('_', '_'),
- ('a', 'z'),
- ('ª', 'ª'),
- ('µ', 'µ'),
- ('·', '·'),
- ('º', 'º'),
- ('À', 'Ö'),
- ('Ø', 'ö'),
- ('ø', 'ˁ'),
- ('ˆ', 'ˑ'),
- ('ˠ', 'ˤ'),
- ('ˬ', 'ˬ'),
- ('ˮ', 'ˮ'),
- ('\u{300}', 'ʹ'),
- ('Ͷ', 'ͷ'),
- ('ͻ', 'ͽ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ϵ'),
- ('Ϸ', 'ҁ'),
- ('\u{483}', '\u{487}'),
- ('Ҋ', 'ԯ'),
- ('Ա', 'Ֆ'),
- ('ՙ', 'ՙ'),
- ('ՠ', 'ֈ'),
- ('\u{591}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c5}'),
- ('\u{5c7}', '\u{5c7}'),
- ('א', 'ת'),
- ('ׯ', 'ײ'),
- ('\u{610}', '\u{61a}'),
- ('ؠ', '٩'),
- ('ٮ', 'ۓ'),
- ('ە', '\u{6dc}'),
- ('\u{6df}', '\u{6e8}'),
- ('\u{6ea}', 'ۼ'),
- ('ۿ', 'ۿ'),
- ('ܐ', '\u{74a}'),
- ('ݍ', 'ޱ'),
- ('߀', 'ߵ'),
- ('ߺ', 'ߺ'),
- ('\u{7fd}', '\u{7fd}'),
- ('ࠀ', '\u{82d}'),
- ('ࡀ', '\u{85b}'),
- ('ࡠ', 'ࡪ'),
- ('ࡰ', 'ࢇ'),
- ('ࢉ', 'ࢎ'),
- ('\u{897}', '\u{8e1}'),
- ('\u{8e3}', '\u{963}'),
- ('०', '९'),
- ('ॱ', 'ঃ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('\u{9bc}', '\u{9c4}'),
- ('ে', 'ৈ'),
- ('ো', 'ৎ'),
- ('\u{9d7}', '\u{9d7}'),
- ('ড়', 'ঢ়'),
- ('য়', '\u{9e3}'),
- ('০', 'ৱ'),
- ('ৼ', 'ৼ'),
- ('\u{9fe}', '\u{9fe}'),
- ('\u{a01}', 'ਃ'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('\u{a3c}', '\u{a3c}'),
- ('ਾ', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('\u{a51}', '\u{a51}'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('੦', '\u{a75}'),
- ('\u{a81}', 'ઃ'),
- ('અ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('\u{abc}', '\u{ac5}'),
- ('\u{ac7}', 'ૉ'),
- ('ો', '\u{acd}'),
- ('ૐ', 'ૐ'),
- ('ૠ', '\u{ae3}'),
- ('૦', '૯'),
- ('ૹ', '\u{aff}'),
- ('\u{b01}', 'ଃ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('\u{b3c}', '\u{b44}'),
- ('େ', 'ୈ'),
- ('ୋ', '\u{b4d}'),
- ('\u{b55}', '\u{b57}'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', '\u{b63}'),
- ('୦', '୯'),
- ('ୱ', 'ୱ'),
- ('\u{b82}', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'ஹ'),
- ('\u{bbe}', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', '\u{bcd}'),
- ('ௐ', 'ௐ'),
- ('\u{bd7}', '\u{bd7}'),
- ('௦', '௯'),
- ('\u{c00}', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'హ'),
- ('\u{c3c}', 'ౄ'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('ౘ', 'ౚ'),
- ('ౝ', 'ౝ'),
- ('ౠ', '\u{c63}'),
- ('౦', '౯'),
- ('ಀ', 'ಃ'),
- ('ಅ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('\u{cbc}', 'ೄ'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccd}'),
- ('\u{cd5}', '\u{cd6}'),
- ('ೝ', 'ೞ'),
- ('ೠ', '\u{ce3}'),
- ('೦', '೯'),
- ('ೱ', 'ೳ'),
- ('\u{d00}', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', '\u{d44}'),
- ('െ', 'ൈ'),
- ('ൊ', 'ൎ'),
- ('ൔ', '\u{d57}'),
- ('ൟ', '\u{d63}'),
- ('൦', '൯'),
- ('ൺ', 'ൿ'),
- ('\u{d81}', 'ඃ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dcf}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('ෘ', '\u{ddf}'),
- ('෦', '෯'),
- ('ෲ', 'ෳ'),
- ('ก', '\u{e3a}'),
- ('เ', '\u{e4e}'),
- ('๐', '๙'),
- ('ກ', 'ຂ'),
- ('ຄ', 'ຄ'),
- ('ຆ', 'ຊ'),
- ('ຌ', 'ຣ'),
- ('ລ', 'ລ'),
- ('ວ', 'ຽ'),
- ('ເ', 'ໄ'),
- ('ໆ', 'ໆ'),
- ('\u{ec8}', '\u{ece}'),
- ('໐', '໙'),
- ('ໜ', 'ໟ'),
- ('ༀ', 'ༀ'),
- ('\u{f18}', '\u{f19}'),
- ('༠', '༩'),
- ('\u{f35}', '\u{f35}'),
- ('\u{f37}', '\u{f37}'),
- ('\u{f39}', '\u{f39}'),
- ('༾', 'ཇ'),
- ('ཉ', 'ཬ'),
- ('\u{f71}', '\u{f84}'),
- ('\u{f86}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('\u{fc6}', '\u{fc6}'),
- ('က', '၉'),
- ('ၐ', '\u{109d}'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ჺ'),
- ('ჼ', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዖ'),
- ('ዘ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ፚ'),
- ('\u{135d}', '\u{135f}'),
- ('፩', '፱'),
- ('ᎀ', 'ᎏ'),
- ('Ꭰ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('ᐁ', 'ᙬ'),
- ('ᙯ', 'ᙿ'),
- ('ᚁ', 'ᚚ'),
- ('ᚠ', 'ᛪ'),
- ('ᛮ', 'ᛸ'),
- ('ᜀ', '\u{1715}'),
- ('ᜟ', '\u{1734}'),
- ('ᝀ', '\u{1753}'),
- ('ᝠ', 'ᝬ'),
- ('ᝮ', 'ᝰ'),
- ('\u{1772}', '\u{1773}'),
- ('ក', '\u{17d3}'),
- ('ៗ', 'ៗ'),
- ('ៜ', '\u{17dd}'),
- ('០', '៩'),
- ('\u{180b}', '\u{180d}'),
- ('\u{180f}', '᠙'),
- ('ᠠ', 'ᡸ'),
- ('ᢀ', 'ᢪ'),
- ('ᢰ', 'ᣵ'),
- ('ᤀ', 'ᤞ'),
- ('\u{1920}', 'ᤫ'),
- ('ᤰ', '\u{193b}'),
- ('᥆', 'ᥭ'),
- ('ᥰ', 'ᥴ'),
- ('ᦀ', 'ᦫ'),
- ('ᦰ', 'ᧉ'),
- ('᧐', '᧚'),
- ('ᨀ', '\u{1a1b}'),
- ('ᨠ', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a7c}'),
- ('\u{1a7f}', '᪉'),
- ('᪐', '᪙'),
- ('ᪧ', 'ᪧ'),
- ('\u{1ab0}', '\u{1abd}'),
- ('\u{1abf}', '\u{1ace}'),
- ('\u{1b00}', 'ᭌ'),
- ('᭐', '᭙'),
- ('\u{1b6b}', '\u{1b73}'),
- ('\u{1b80}', '\u{1bf3}'),
- ('ᰀ', '\u{1c37}'),
- ('᱀', '᱉'),
- ('ᱍ', 'ᱽ'),
- ('ᲀ', 'ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('\u{1cd0}', '\u{1cd2}'),
- ('\u{1cd4}', 'ᳺ'),
- ('ᴀ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ᾼ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῌ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('ῠ', 'Ῥ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῼ'),
- ('\u{200c}', '\u{200d}'),
- ('‿', '⁀'),
- ('⁔', '⁔'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('\u{20d0}', '\u{20dc}'),
- ('\u{20e1}', '\u{20e1}'),
- ('\u{20e5}', '\u{20f0}'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℊ', 'ℓ'),
- ('ℕ', 'ℕ'),
- ('℘', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('Ω', 'Ω'),
- ('ℨ', 'ℨ'),
- ('K', 'ℹ'),
- ('ℼ', 'ℿ'),
- ('ⅅ', 'ⅉ'),
- ('ⅎ', 'ⅎ'),
- ('Ⅰ', 'ↈ'),
- ('Ⰰ', 'ⳤ'),
- ('Ⳬ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ⴰ', 'ⵧ'),
- ('ⵯ', 'ⵯ'),
- ('\u{2d7f}', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('\u{2de0}', '\u{2dff}'),
- ('々', '〇'),
- ('〡', '\u{302f}'),
- ('〱', '〵'),
- ('〸', '〼'),
- ('ぁ', 'ゖ'),
- ('\u{3099}', '\u{309a}'),
- ('ゝ', 'ゟ'),
- ('ァ', 'ヿ'),
- ('ㄅ', 'ㄯ'),
- ('ㄱ', 'ㆎ'),
- ('ㆠ', 'ㆿ'),
- ('ㇰ', 'ㇿ'),
- ('㐀', '䶿'),
- ('一', 'ꒌ'),
- ('ꓐ', 'ꓽ'),
- ('ꔀ', 'ꘌ'),
- ('ꘐ', 'ꘫ'),
- ('Ꙁ', '\u{a66f}'),
- ('\u{a674}', '\u{a67d}'),
- ('ꙿ', '\u{a6f1}'),
- ('ꜗ', 'ꜟ'),
- ('Ꜣ', 'ꞈ'),
- ('Ꞌ', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'Ƛ'),
- ('ꟲ', 'ꠧ'),
- ('\u{a82c}', '\u{a82c}'),
- ('ꡀ', 'ꡳ'),
- ('ꢀ', '\u{a8c5}'),
- ('꣐', '꣙'),
- ('\u{a8e0}', 'ꣷ'),
- ('ꣻ', 'ꣻ'),
- ('ꣽ', '\u{a92d}'),
- ('ꤰ', '\u{a953}'),
- ('ꥠ', 'ꥼ'),
- ('\u{a980}', '\u{a9c0}'),
- ('ꧏ', '꧙'),
- ('ꧠ', 'ꧾ'),
- ('ꨀ', '\u{aa36}'),
- ('ꩀ', 'ꩍ'),
- ('꩐', '꩙'),
- ('ꩠ', 'ꩶ'),
- ('ꩺ', 'ꫂ'),
- ('ꫛ', 'ꫝ'),
- ('ꫠ', 'ꫯ'),
- ('ꫲ', '\u{aaf6}'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('ꬰ', 'ꭚ'),
- ('ꭜ', 'ꭩ'),
- ('ꭰ', 'ꯪ'),
- ('꯬', '\u{abed}'),
- ('꯰', '꯹'),
- ('가', '힣'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('豈', '舘'),
- ('並', '龎'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('יִ', 'ﬨ'),
- ('שׁ', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', 'ﮱ'),
- ('ﯓ', 'ﱝ'),
- ('ﱤ', 'ﴽ'),
- ('ﵐ', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('ﷰ', 'ﷹ'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{fe20}', '\u{fe2f}'),
- ('︳', '︴'),
- ('﹍', '﹏'),
- ('ﹱ', 'ﹱ'),
- ('ﹳ', 'ﹳ'),
- ('ﹷ', 'ﹷ'),
- ('ﹹ', 'ﹹ'),
- ('ﹻ', 'ﹻ'),
- ('ﹽ', 'ﹽ'),
- ('ﹿ', 'ﻼ'),
- ('0', '9'),
- ('A', 'Z'),
- ('_', '_'),
- ('a', 'z'),
- ('・', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
- ('𐅀', '𐅴'),
- ('\u{101fd}', '\u{101fd}'),
- ('𐊀', '𐊜'),
- ('𐊠', '𐋐'),
- ('\u{102e0}', '\u{102e0}'),
- ('𐌀', '𐌟'),
- ('𐌭', '𐍊'),
- ('𐍐', '\u{1037a}'),
- ('𐎀', '𐎝'),
- ('𐎠', '𐏃'),
- ('𐏈', '𐏏'),
- ('𐏑', '𐏕'),
- ('𐐀', '𐒝'),
- ('𐒠', '𐒩'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
- ('𐔀', '𐔧'),
- ('𐔰', '𐕣'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐗀', '𐗳'),
- ('𐘀', '𐜶'),
- ('𐝀', '𐝕'),
- ('𐝠', '𐝧'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐠀', '𐠅'),
- ('𐠈', '𐠈'),
- ('𐠊', '𐠵'),
- ('𐠷', '𐠸'),
- ('𐠼', '𐠼'),
- ('𐠿', '𐡕'),
- ('𐡠', '𐡶'),
- ('𐢀', '𐢞'),
- ('𐣠', '𐣲'),
- ('𐣴', '𐣵'),
- ('𐤀', '𐤕'),
- ('𐤠', '𐤹'),
- ('𐦀', '𐦷'),
- ('𐦾', '𐦿'),
- ('𐨀', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨵'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '\u{10a3f}'),
- ('𐩠', '𐩼'),
- ('𐪀', '𐪜'),
- ('𐫀', '𐫇'),
- ('𐫉', '\u{10ae6}'),
- ('𐬀', '𐬵'),
- ('𐭀', '𐭕'),
- ('𐭠', '𐭲'),
- ('𐮀', '𐮑'),
- ('𐰀', '𐱈'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐴀', '\u{10d27}'),
- ('𐴰', '𐴹'),
- ('𐵀', '𐵥'),
- ('\u{10d69}', '\u{10d6d}'),
- ('𐵯', '𐶅'),
- ('𐺀', '𐺩'),
- ('\u{10eab}', '\u{10eac}'),
- ('𐺰', '𐺱'),
- ('𐻂', '𐻄'),
- ('\u{10efc}', '𐼜'),
- ('𐼧', '𐼧'),
- ('𐼰', '\u{10f50}'),
- ('𐽰', '\u{10f85}'),
- ('𐾰', '𐿄'),
- ('𐿠', '𐿶'),
- ('𑀀', '\u{11046}'),
- ('𑁦', '𑁵'),
- ('\u{1107f}', '\u{110ba}'),
- ('\u{110c2}', '\u{110c2}'),
- ('𑃐', '𑃨'),
- ('𑃰', '𑃹'),
- ('\u{11100}', '\u{11134}'),
- ('𑄶', '𑄿'),
- ('𑅄', '𑅇'),
- ('𑅐', '\u{11173}'),
- ('𑅶', '𑅶'),
- ('\u{11180}', '𑇄'),
- ('\u{111c9}', '\u{111cc}'),
- ('𑇎', '𑇚'),
- ('𑇜', '𑇜'),
- ('𑈀', '𑈑'),
- ('𑈓', '\u{11237}'),
- ('\u{1123e}', '\u{11241}'),
- ('𑊀', '𑊆'),
- ('𑊈', '𑊈'),
- ('𑊊', '𑊍'),
- ('𑊏', '𑊝'),
- ('𑊟', '𑊨'),
- ('𑊰', '\u{112ea}'),
- ('𑋰', '𑋹'),
- ('\u{11300}', '𑌃'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('\u{1133b}', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '\u{1134d}'),
- ('𑍐', '𑍐'),
- ('\u{11357}', '\u{11357}'),
- ('𑍝', '𑍣'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '𑏊'),
- ('𑏌', '𑏓'),
- ('\u{113e1}', '\u{113e2}'),
- ('𑐀', '𑑊'),
- ('𑑐', '𑑙'),
- ('\u{1145e}', '𑑡'),
- ('𑒀', '𑓅'),
- ('𑓇', '𑓇'),
- ('𑓐', '𑓙'),
- ('𑖀', '\u{115b5}'),
- ('𑖸', '\u{115c0}'),
- ('𑗘', '\u{115dd}'),
- ('𑘀', '\u{11640}'),
- ('𑙄', '𑙄'),
- ('𑙐', '𑙙'),
- ('𑚀', '𑚸'),
- ('𑛀', '𑛉'),
- ('𑛐', '𑛣'),
- ('𑜀', '𑜚'),
- ('\u{1171d}', '\u{1172b}'),
- ('𑜰', '𑜹'),
- ('𑝀', '𑝆'),
- ('𑠀', '\u{1183a}'),
- ('𑢠', '𑣩'),
- ('𑣿', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤵'),
- ('𑤷', '𑤸'),
- ('\u{1193b}', '\u{11943}'),
- ('𑥐', '𑥙'),
- ('𑦠', '𑦧'),
- ('𑦪', '\u{119d7}'),
- ('\u{119da}', '𑧡'),
- ('𑧣', '𑧤'),
- ('𑨀', '\u{11a3e}'),
- ('\u{11a47}', '\u{11a47}'),
- ('𑩐', '\u{11a99}'),
- ('𑪝', '𑪝'),
- ('𑪰', '𑫸'),
- ('𑯀', '𑯠'),
- ('𑯰', '𑯹'),
- ('𑰀', '𑰈'),
- ('𑰊', '\u{11c36}'),
- ('\u{11c38}', '𑱀'),
- ('𑱐', '𑱙'),
- ('𑱲', '𑲏'),
- ('\u{11c92}', '\u{11ca7}'),
- ('𑲩', '\u{11cb6}'),
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d47}'),
- ('𑵐', '𑵙'),
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶎'),
- ('\u{11d90}', '\u{11d91}'),
- ('𑶓', '𑶘'),
- ('𑶠', '𑶩'),
- ('𑻠', '𑻶'),
- ('\u{11f00}', '𑼐'),
- ('𑼒', '\u{11f3a}'),
- ('𑼾', '\u{11f42}'),
- ('𑽐', '\u{11f5a}'),
- ('𑾰', '𑾰'),
- ('𒀀', '𒎙'),
- ('𒐀', '𒑮'),
- ('𒒀', '𒕃'),
- ('𒾐', '𒿰'),
- ('𓀀', '𓐯'),
- ('\u{13440}', '\u{13455}'),
- ('𓑠', '𔏺'),
- ('𔐀', '𔙆'),
- ('𖄀', '𖄹'),
- ('𖠀', '𖨸'),
- ('𖩀', '𖩞'),
- ('𖩠', '𖩩'),
- ('𖩰', '𖪾'),
- ('𖫀', '𖫉'),
- ('𖫐', '𖫭'),
- ('\u{16af0}', '\u{16af4}'),
- ('𖬀', '\u{16b36}'),
- ('𖭀', '𖭃'),
- ('𖭐', '𖭙'),
- ('𖭣', '𖭷'),
- ('𖭽', '𖮏'),
- ('𖵀', '𖵬'),
- ('𖵰', '𖵹'),
- ('𖹀', '𖹿'),
- ('𖼀', '𖽊'),
- ('\u{16f4f}', '𖾇'),
- ('\u{16f8f}', '𖾟'),
- ('𖿠', '𖿡'),
- ('𖿣', '\u{16fe4}'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('𗀀', '𘟷'),
- ('𘠀', '𘳕'),
- ('𘳿', '𘴈'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𛀀', '𛄢'),
- ('𛄲', '𛄲'),
- ('𛅐', '𛅒'),
- ('𛅕', '𛅕'),
- ('𛅤', '𛅧'),
- ('𛅰', '𛋻'),
- ('𛰀', '𛱪'),
- ('𛱰', '𛱼'),
- ('𛲀', '𛲈'),
- ('𛲐', '𛲙'),
- ('\u{1bc9d}', '\u{1bc9e}'),
- ('𜳰', '𜳹'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d165}', '\u{1d169}'),
- ('\u{1d16d}', '\u{1d172}'),
- ('\u{1d17b}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('\u{1d242}', '\u{1d244}'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝛀'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛺'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜴'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝮'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞨'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟋'),
- ('𝟎', '𝟿'),
- ('\u{1da00}', '\u{1da36}'),
- ('\u{1da3b}', '\u{1da6c}'),
- ('\u{1da75}', '\u{1da75}'),
- ('\u{1da84}', '\u{1da84}'),
- ('\u{1da9b}', '\u{1da9f}'),
- ('\u{1daa1}', '\u{1daaf}'),
- ('𝼀', '𝼞'),
- ('𝼥', '𝼪'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('𞀰', '𞁭'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('𞄀', '𞄬'),
- ('\u{1e130}', '𞄽'),
- ('𞅀', '𞅉'),
- ('𞅎', '𞅎'),
- ('𞊐', '\u{1e2ae}'),
- ('𞋀', '𞋹'),
- ('𞓐', '𞓹'),
- ('𞗐', '𞗺'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
- ('𞠀', '𞣄'),
- ('\u{1e8d0}', '\u{1e8d6}'),
- ('𞤀', '𞥋'),
- ('𞥐', '𞥙'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('🯰', '🯹'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('丽', '𪘀'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const XID_START: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('a', 'z'),
- ('ª', 'ª'),
- ('µ', 'µ'),
- ('º', 'º'),
- ('À', 'Ö'),
- ('Ø', 'ö'),
- ('ø', 'ˁ'),
- ('ˆ', 'ˑ'),
- ('ˠ', 'ˤ'),
- ('ˬ', 'ˬ'),
- ('ˮ', 'ˮ'),
- ('Ͱ', 'ʹ'),
- ('Ͷ', 'ͷ'),
- ('ͻ', 'ͽ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ϵ'),
- ('Ϸ', 'ҁ'),
- ('Ҋ', 'ԯ'),
- ('Ա', 'Ֆ'),
- ('ՙ', 'ՙ'),
- ('ՠ', 'ֈ'),
- ('א', 'ת'),
- ('ׯ', 'ײ'),
- ('ؠ', 'ي'),
- ('ٮ', 'ٯ'),
- ('ٱ', 'ۓ'),
- ('ە', 'ە'),
- ('ۥ', 'ۦ'),
- ('ۮ', 'ۯ'),
- ('ۺ', 'ۼ'),
- ('ۿ', 'ۿ'),
- ('ܐ', 'ܐ'),
- ('ܒ', 'ܯ'),
- ('ݍ', 'ޥ'),
- ('ޱ', 'ޱ'),
- ('ߊ', 'ߪ'),
- ('ߴ', 'ߵ'),
- ('ߺ', 'ߺ'),
- ('ࠀ', 'ࠕ'),
- ('ࠚ', 'ࠚ'),
- ('ࠤ', 'ࠤ'),
- ('ࠨ', 'ࠨ'),
- ('ࡀ', 'ࡘ'),
- ('ࡠ', 'ࡪ'),
- ('ࡰ', 'ࢇ'),
- ('ࢉ', 'ࢎ'),
- ('ࢠ', 'ࣉ'),
- ('ऄ', 'ह'),
- ('ऽ', 'ऽ'),
- ('ॐ', 'ॐ'),
- ('क़', 'ॡ'),
- ('ॱ', 'ঀ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('ঽ', 'ঽ'),
- ('ৎ', 'ৎ'),
- ('ড়', 'ঢ়'),
- ('য়', 'ৡ'),
- ('ৰ', 'ৱ'),
- ('ৼ', 'ৼ'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('ੲ', 'ੴ'),
- ('અ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('ઽ', 'ઽ'),
- ('ૐ', 'ૐ'),
- ('ૠ', 'ૡ'),
- ('ૹ', 'ૹ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('ଽ', 'ଽ'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', 'ୡ'),
- ('ୱ', 'ୱ'),
- ('ஃ', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'ஹ'),
- ('ௐ', 'ௐ'),
- ('అ', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'హ'),
- ('ఽ', 'ఽ'),
- ('ౘ', 'ౚ'),
- ('ౝ', 'ౝ'),
- ('ౠ', 'ౡ'),
- ('ಀ', 'ಀ'),
- ('ಅ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('ಽ', 'ಽ'),
- ('ೝ', 'ೞ'),
- ('ೠ', 'ೡ'),
- ('ೱ', 'ೲ'),
- ('ഄ', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', 'ഺ'),
- ('ഽ', 'ഽ'),
- ('ൎ', 'ൎ'),
- ('ൔ', 'ൖ'),
- ('ൟ', 'ൡ'),
- ('ൺ', 'ൿ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('ก', 'ะ'),
- ('า', 'า'),
- ('เ', 'ๆ'),
- ('ກ', 'ຂ'),
- ('ຄ', 'ຄ'),
- ('ຆ', 'ຊ'),
- ('ຌ', 'ຣ'),
- ('ລ', 'ລ'),
- ('ວ', 'ະ'),
- ('າ', 'າ'),
- ('ຽ', 'ຽ'),
- ('ເ', 'ໄ'),
- ('ໆ', 'ໆ'),
- ('ໜ', 'ໟ'),
- ('ༀ', 'ༀ'),
- ('ཀ', 'ཇ'),
- ('ཉ', 'ཬ'),
- ('ྈ', 'ྌ'),
- ('က', 'ဪ'),
- ('ဿ', 'ဿ'),
- ('ၐ', 'ၕ'),
- ('ၚ', 'ၝ'),
- ('ၡ', 'ၡ'),
- ('ၥ', 'ၦ'),
- ('ၮ', 'ၰ'),
- ('ၵ', 'ႁ'),
- ('ႎ', 'ႎ'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ჺ'),
- ('ჼ', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዖ'),
- ('ዘ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ፚ'),
- ('ᎀ', 'ᎏ'),
- ('Ꭰ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('ᐁ', 'ᙬ'),
- ('ᙯ', 'ᙿ'),
- ('ᚁ', 'ᚚ'),
- ('ᚠ', 'ᛪ'),
- ('ᛮ', 'ᛸ'),
- ('ᜀ', 'ᜑ'),
- ('ᜟ', 'ᜱ'),
- ('ᝀ', 'ᝑ'),
- ('ᝠ', 'ᝬ'),
- ('ᝮ', 'ᝰ'),
- ('ក', 'ឳ'),
- ('ៗ', 'ៗ'),
- ('ៜ', 'ៜ'),
- ('ᠠ', 'ᡸ'),
- ('ᢀ', 'ᢨ'),
- ('ᢪ', 'ᢪ'),
- ('ᢰ', 'ᣵ'),
- ('ᤀ', 'ᤞ'),
- ('ᥐ', 'ᥭ'),
- ('ᥰ', 'ᥴ'),
- ('ᦀ', 'ᦫ'),
- ('ᦰ', 'ᧉ'),
- ('ᨀ', 'ᨖ'),
- ('ᨠ', 'ᩔ'),
- ('ᪧ', 'ᪧ'),
- ('ᬅ', 'ᬳ'),
- ('ᭅ', 'ᭌ'),
- ('ᮃ', 'ᮠ'),
- ('ᮮ', 'ᮯ'),
- ('ᮺ', 'ᯥ'),
- ('ᰀ', 'ᰣ'),
- ('ᱍ', 'ᱏ'),
- ('ᱚ', 'ᱽ'),
- ('ᲀ', 'ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('ᳩ', 'ᳬ'),
- ('ᳮ', 'ᳳ'),
- ('ᳵ', 'ᳶ'),
- ('ᳺ', 'ᳺ'),
- ('ᴀ', 'ᶿ'),
- ('Ḁ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ᾼ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῌ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('ῠ', 'Ῥ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῼ'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℊ', 'ℓ'),
- ('ℕ', 'ℕ'),
- ('℘', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('Ω', 'Ω'),
- ('ℨ', 'ℨ'),
- ('K', 'ℹ'),
- ('ℼ', 'ℿ'),
- ('ⅅ', 'ⅉ'),
- ('ⅎ', 'ⅎ'),
- ('Ⅰ', 'ↈ'),
- ('Ⰰ', 'ⳤ'),
- ('Ⳬ', 'ⳮ'),
- ('Ⳳ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ⴰ', 'ⵧ'),
- ('ⵯ', 'ⵯ'),
- ('ⶀ', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('々', '〇'),
- ('〡', '〩'),
- ('〱', '〵'),
- ('〸', '〼'),
- ('ぁ', 'ゖ'),
- ('ゝ', 'ゟ'),
- ('ァ', 'ヺ'),
- ('ー', 'ヿ'),
- ('ㄅ', 'ㄯ'),
- ('ㄱ', 'ㆎ'),
- ('ㆠ', 'ㆿ'),
- ('ㇰ', 'ㇿ'),
- ('㐀', '䶿'),
- ('一', 'ꒌ'),
- ('ꓐ', 'ꓽ'),
- ('ꔀ', 'ꘌ'),
- ('ꘐ', 'ꘟ'),
- ('ꘪ', 'ꘫ'),
- ('Ꙁ', 'ꙮ'),
- ('ꙿ', 'ꚝ'),
- ('ꚠ', 'ꛯ'),
- ('ꜗ', 'ꜟ'),
- ('Ꜣ', 'ꞈ'),
- ('Ꞌ', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'Ƛ'),
- ('ꟲ', 'ꠁ'),
- ('ꠃ', 'ꠅ'),
- ('ꠇ', 'ꠊ'),
- ('ꠌ', 'ꠢ'),
- ('ꡀ', 'ꡳ'),
- ('ꢂ', 'ꢳ'),
- ('ꣲ', 'ꣷ'),
- ('ꣻ', 'ꣻ'),
- ('ꣽ', 'ꣾ'),
- ('ꤊ', 'ꤥ'),
- ('ꤰ', 'ꥆ'),
- ('ꥠ', 'ꥼ'),
- ('ꦄ', 'ꦲ'),
- ('ꧏ', 'ꧏ'),
- ('ꧠ', 'ꧤ'),
- ('ꧦ', 'ꧯ'),
- ('ꧺ', 'ꧾ'),
- ('ꨀ', 'ꨨ'),
- ('ꩀ', 'ꩂ'),
- ('ꩄ', 'ꩋ'),
- ('ꩠ', 'ꩶ'),
- ('ꩺ', 'ꩺ'),
- ('ꩾ', 'ꪯ'),
- ('ꪱ', 'ꪱ'),
- ('ꪵ', 'ꪶ'),
- ('ꪹ', 'ꪽ'),
- ('ꫀ', 'ꫀ'),
- ('ꫂ', 'ꫂ'),
- ('ꫛ', 'ꫝ'),
- ('ꫠ', 'ꫪ'),
- ('ꫲ', 'ꫴ'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('ꬰ', 'ꭚ'),
- ('ꭜ', 'ꭩ'),
- ('ꭰ', 'ꯢ'),
- ('가', '힣'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('豈', '舘'),
- ('並', '龎'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('יִ', 'יִ'),
- ('ײַ', 'ﬨ'),
- ('שׁ', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', 'ﮱ'),
- ('ﯓ', 'ﱝ'),
- ('ﱤ', 'ﴽ'),
- ('ﵐ', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('ﷰ', 'ﷹ'),
- ('ﹱ', 'ﹱ'),
- ('ﹳ', 'ﹳ'),
- ('ﹷ', 'ﹷ'),
- ('ﹹ', 'ﹹ'),
- ('ﹻ', 'ﹻ'),
- ('ﹽ', 'ﹽ'),
- ('ﹿ', 'ﻼ'),
- ('A', 'Z'),
- ('a', 'z'),
- ('ヲ', 'ン'),
- ('ᅠ', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
- ('𐅀', '𐅴'),
- ('𐊀', '𐊜'),
- ('𐊠', '𐋐'),
- ('𐌀', '𐌟'),
- ('𐌭', '𐍊'),
- ('𐍐', '𐍵'),
- ('𐎀', '𐎝'),
- ('𐎠', '𐏃'),
- ('𐏈', '𐏏'),
- ('𐏑', '𐏕'),
- ('𐐀', '𐒝'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
- ('𐔀', '𐔧'),
- ('𐔰', '𐕣'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐗀', '𐗳'),
- ('𐘀', '𐜶'),
- ('𐝀', '𐝕'),
- ('𐝠', '𐝧'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐠀', '𐠅'),
- ('𐠈', '𐠈'),
- ('𐠊', '𐠵'),
- ('𐠷', '𐠸'),
- ('𐠼', '𐠼'),
- ('𐠿', '𐡕'),
- ('𐡠', '𐡶'),
- ('𐢀', '𐢞'),
- ('𐣠', '𐣲'),
- ('𐣴', '𐣵'),
- ('𐤀', '𐤕'),
- ('𐤠', '𐤹'),
- ('𐦀', '𐦷'),
- ('𐦾', '𐦿'),
- ('𐨀', '𐨀'),
- ('𐨐', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨵'),
- ('𐩠', '𐩼'),
- ('𐪀', '𐪜'),
- ('𐫀', '𐫇'),
- ('𐫉', '𐫤'),
- ('𐬀', '𐬵'),
- ('𐭀', '𐭕'),
- ('𐭠', '𐭲'),
- ('𐮀', '𐮑'),
- ('𐰀', '𐱈'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐴀', '𐴣'),
- ('𐵊', '𐵥'),
- ('𐵯', '𐶅'),
- ('𐺀', '𐺩'),
- ('𐺰', '𐺱'),
- ('𐻂', '𐻄'),
- ('𐼀', '𐼜'),
- ('𐼧', '𐼧'),
- ('𐼰', '𐽅'),
- ('𐽰', '𐾁'),
- ('𐾰', '𐿄'),
- ('𐿠', '𐿶'),
- ('𑀃', '𑀷'),
- ('𑁱', '𑁲'),
- ('𑁵', '𑁵'),
- ('𑂃', '𑂯'),
- ('𑃐', '𑃨'),
- ('𑄃', '𑄦'),
- ('𑅄', '𑅄'),
- ('𑅇', '𑅇'),
- ('𑅐', '𑅲'),
- ('𑅶', '𑅶'),
- ('𑆃', '𑆲'),
- ('𑇁', '𑇄'),
- ('𑇚', '𑇚'),
- ('𑇜', '𑇜'),
- ('𑈀', '𑈑'),
- ('𑈓', '𑈫'),
- ('𑈿', '𑉀'),
- ('𑊀', '𑊆'),
- ('𑊈', '𑊈'),
- ('𑊊', '𑊍'),
- ('𑊏', '𑊝'),
- ('𑊟', '𑊨'),
- ('𑊰', '𑋞'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('𑌽', '𑌽'),
- ('𑍐', '𑍐'),
- ('𑍝', '𑍡'),
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '𑎷'),
- ('𑏑', '𑏑'),
- ('𑏓', '𑏓'),
- ('𑐀', '𑐴'),
- ('𑑇', '𑑊'),
- ('𑑟', '𑑡'),
- ('𑒀', '𑒯'),
- ('𑓄', '𑓅'),
- ('𑓇', '𑓇'),
- ('𑖀', '𑖮'),
- ('𑗘', '𑗛'),
- ('𑘀', '𑘯'),
- ('𑙄', '𑙄'),
- ('𑚀', '𑚪'),
- ('𑚸', '𑚸'),
- ('𑜀', '𑜚'),
- ('𑝀', '𑝆'),
- ('𑠀', '𑠫'),
- ('𑢠', '𑣟'),
- ('𑣿', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤯'),
- ('𑤿', '𑤿'),
- ('𑥁', '𑥁'),
- ('𑦠', '𑦧'),
- ('𑦪', '𑧐'),
- ('𑧡', '𑧡'),
- ('𑧣', '𑧣'),
- ('𑨀', '𑨀'),
- ('𑨋', '𑨲'),
- ('𑨺', '𑨺'),
- ('𑩐', '𑩐'),
- ('𑩜', '𑪉'),
- ('𑪝', '𑪝'),
- ('𑪰', '𑫸'),
- ('𑯀', '𑯠'),
- ('𑰀', '𑰈'),
- ('𑰊', '𑰮'),
- ('𑱀', '𑱀'),
- ('𑱲', '𑲏'),
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '𑴰'),
- ('𑵆', '𑵆'),
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶉'),
- ('𑶘', '𑶘'),
- ('𑻠', '𑻲'),
- ('𑼂', '𑼂'),
- ('𑼄', '𑼐'),
- ('𑼒', '𑼳'),
- ('𑾰', '𑾰'),
- ('𒀀', '𒎙'),
- ('𒐀', '𒑮'),
- ('𒒀', '𒕃'),
- ('𒾐', '𒿰'),
- ('𓀀', '𓐯'),
- ('𓑁', '𓑆'),
- ('𓑠', '𔏺'),
- ('𔐀', '𔙆'),
- ('𖄀', '𖄝'),
- ('𖠀', '𖨸'),
- ('𖩀', '𖩞'),
- ('𖩰', '𖪾'),
- ('𖫐', '𖫭'),
- ('𖬀', '𖬯'),
- ('𖭀', '𖭃'),
- ('𖭣', '𖭷'),
- ('𖭽', '𖮏'),
- ('𖵀', '𖵬'),
- ('𖹀', '𖹿'),
- ('𖼀', '𖽊'),
- ('𖽐', '𖽐'),
- ('𖾓', '𖾟'),
- ('𖿠', '𖿡'),
- ('𖿣', '𖿣'),
- ('𗀀', '𘟷'),
- ('𘠀', '𘳕'),
- ('𘳿', '𘴈'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𛀀', '𛄢'),
- ('𛄲', '𛄲'),
- ('𛅐', '𛅒'),
- ('𛅕', '𛅕'),
- ('𛅤', '𛅧'),
- ('𛅰', '𛋻'),
- ('𛰀', '𛱪'),
- ('𛱰', '𛱼'),
- ('𛲀', '𛲈'),
- ('𛲐', '𛲙'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝛀'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛺'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜴'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝮'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞨'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟋'),
- ('𝼀', '𝼞'),
- ('𝼥', '𝼪'),
- ('𞀰', '𞁭'),
- ('𞄀', '𞄬'),
- ('𞄷', '𞄽'),
- ('𞅎', '𞅎'),
- ('𞊐', '𞊭'),
- ('𞋀', '𞋫'),
- ('𞓐', '𞓫'),
- ('𞗐', '𞗭'),
- ('𞗰', '𞗰'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
- ('𞠀', '𞣄'),
- ('𞤀', '𞥃'),
- ('𞥋', '𞥋'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('丽', '𪘀'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
-];
diff --git a/vendor/regex-syntax/src/unicode_tables/property_names.rs b/vendor/regex-syntax/src/unicode_tables/property_names.rs
deleted file mode 100644
index a27b4913..00000000
--- a/vendor/regex-syntax/src/unicode_tables/property_names.rs
+++ /dev/null
@@ -1,281 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate property-names ucd-16.0.0
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
- ("age", "Age"),
- ("ahex", "ASCII_Hex_Digit"),
- ("alpha", "Alphabetic"),
- ("alphabetic", "Alphabetic"),
- ("asciihexdigit", "ASCII_Hex_Digit"),
- ("bc", "Bidi_Class"),
- ("bidic", "Bidi_Control"),
- ("bidiclass", "Bidi_Class"),
- ("bidicontrol", "Bidi_Control"),
- ("bidim", "Bidi_Mirrored"),
- ("bidimirrored", "Bidi_Mirrored"),
- ("bidimirroringglyph", "Bidi_Mirroring_Glyph"),
- ("bidipairedbracket", "Bidi_Paired_Bracket"),
- ("bidipairedbrackettype", "Bidi_Paired_Bracket_Type"),
- ("blk", "Block"),
- ("block", "Block"),
- ("bmg", "Bidi_Mirroring_Glyph"),
- ("bpb", "Bidi_Paired_Bracket"),
- ("bpt", "Bidi_Paired_Bracket_Type"),
- ("canonicalcombiningclass", "Canonical_Combining_Class"),
- ("cased", "Cased"),
- ("casefolding", "Case_Folding"),
- ("caseignorable", "Case_Ignorable"),
- ("ccc", "Canonical_Combining_Class"),
- ("ce", "Composition_Exclusion"),
- ("cf", "Case_Folding"),
- ("changeswhencasefolded", "Changes_When_Casefolded"),
- ("changeswhencasemapped", "Changes_When_Casemapped"),
- ("changeswhenlowercased", "Changes_When_Lowercased"),
- ("changeswhennfkccasefolded", "Changes_When_NFKC_Casefolded"),
- ("changeswhentitlecased", "Changes_When_Titlecased"),
- ("changeswhenuppercased", "Changes_When_Uppercased"),
- ("ci", "Case_Ignorable"),
- ("cjkaccountingnumeric", "kAccountingNumeric"),
- ("cjkcompatibilityvariant", "kCompatibilityVariant"),
- ("cjkiicore", "kIICore"),
- ("cjkirggsource", "kIRG_GSource"),
- ("cjkirghsource", "kIRG_HSource"),
- ("cjkirgjsource", "kIRG_JSource"),
- ("cjkirgkpsource", "kIRG_KPSource"),
- ("cjkirgksource", "kIRG_KSource"),
- ("cjkirgmsource", "kIRG_MSource"),
- ("cjkirgssource", "kIRG_SSource"),
- ("cjkirgtsource", "kIRG_TSource"),
- ("cjkirguksource", "kIRG_UKSource"),
- ("cjkirgusource", "kIRG_USource"),
- ("cjkirgvsource", "kIRG_VSource"),
- ("cjkothernumeric", "kOtherNumeric"),
- ("cjkprimarynumeric", "kPrimaryNumeric"),
- ("cjkrsunicode", "kRSUnicode"),
- ("compex", "Full_Composition_Exclusion"),
- ("compositionexclusion", "Composition_Exclusion"),
- ("cwcf", "Changes_When_Casefolded"),
- ("cwcm", "Changes_When_Casemapped"),
- ("cwkcf", "Changes_When_NFKC_Casefolded"),
- ("cwl", "Changes_When_Lowercased"),
- ("cwt", "Changes_When_Titlecased"),
- ("cwu", "Changes_When_Uppercased"),
- ("dash", "Dash"),
- ("decompositionmapping", "Decomposition_Mapping"),
- ("decompositiontype", "Decomposition_Type"),
- ("defaultignorablecodepoint", "Default_Ignorable_Code_Point"),
- ("dep", "Deprecated"),
- ("deprecated", "Deprecated"),
- ("di", "Default_Ignorable_Code_Point"),
- ("dia", "Diacritic"),
- ("diacritic", "Diacritic"),
- ("dm", "Decomposition_Mapping"),
- ("dt", "Decomposition_Type"),
- ("ea", "East_Asian_Width"),
- ("eastasianwidth", "East_Asian_Width"),
- ("ebase", "Emoji_Modifier_Base"),
- ("ecomp", "Emoji_Component"),
- ("emod", "Emoji_Modifier"),
- ("emoji", "Emoji"),
- ("emojicomponent", "Emoji_Component"),
- ("emojimodifier", "Emoji_Modifier"),
- ("emojimodifierbase", "Emoji_Modifier_Base"),
- ("emojipresentation", "Emoji_Presentation"),
- ("epres", "Emoji_Presentation"),
- ("equideo", "Equivalent_Unified_Ideograph"),
- ("equivalentunifiedideograph", "Equivalent_Unified_Ideograph"),
- ("expandsonnfc", "Expands_On_NFC"),
- ("expandsonnfd", "Expands_On_NFD"),
- ("expandsonnfkc", "Expands_On_NFKC"),
- ("expandsonnfkd", "Expands_On_NFKD"),
- ("ext", "Extender"),
- ("extendedpictographic", "Extended_Pictographic"),
- ("extender", "Extender"),
- ("extpict", "Extended_Pictographic"),
- ("fcnfkc", "FC_NFKC_Closure"),
- ("fcnfkcclosure", "FC_NFKC_Closure"),
- ("fullcompositionexclusion", "Full_Composition_Exclusion"),
- ("gc", "General_Category"),
- ("gcb", "Grapheme_Cluster_Break"),
- ("generalcategory", "General_Category"),
- ("graphemebase", "Grapheme_Base"),
- ("graphemeclusterbreak", "Grapheme_Cluster_Break"),
- ("graphemeextend", "Grapheme_Extend"),
- ("graphemelink", "Grapheme_Link"),
- ("grbase", "Grapheme_Base"),
- ("grext", "Grapheme_Extend"),
- ("grlink", "Grapheme_Link"),
- ("hangulsyllabletype", "Hangul_Syllable_Type"),
- ("hex", "Hex_Digit"),
- ("hexdigit", "Hex_Digit"),
- ("hst", "Hangul_Syllable_Type"),
- ("hyphen", "Hyphen"),
- ("idc", "ID_Continue"),
- ("idcompatmathcontinue", "ID_Compat_Math_Continue"),
- ("idcompatmathstart", "ID_Compat_Math_Start"),
- ("idcontinue", "ID_Continue"),
- ("ideo", "Ideographic"),
- ("ideographic", "Ideographic"),
- ("ids", "ID_Start"),
- ("idsb", "IDS_Binary_Operator"),
- ("idsbinaryoperator", "IDS_Binary_Operator"),
- ("idst", "IDS_Trinary_Operator"),
- ("idstart", "ID_Start"),
- ("idstrinaryoperator", "IDS_Trinary_Operator"),
- ("idsu", "IDS_Unary_Operator"),
- ("idsunaryoperator", "IDS_Unary_Operator"),
- ("incb", "Indic_Conjunct_Break"),
- ("indicconjunctbreak", "Indic_Conjunct_Break"),
- ("indicpositionalcategory", "Indic_Positional_Category"),
- ("indicsyllabiccategory", "Indic_Syllabic_Category"),
- ("inpc", "Indic_Positional_Category"),
- ("insc", "Indic_Syllabic_Category"),
- ("isc", "ISO_Comment"),
- ("jamoshortname", "Jamo_Short_Name"),
- ("jg", "Joining_Group"),
- ("joinc", "Join_Control"),
- ("joincontrol", "Join_Control"),
- ("joininggroup", "Joining_Group"),
- ("joiningtype", "Joining_Type"),
- ("jsn", "Jamo_Short_Name"),
- ("jt", "Joining_Type"),
- ("kaccountingnumeric", "kAccountingNumeric"),
- ("kcompatibilityvariant", "kCompatibilityVariant"),
- ("kehcat", "kEH_Cat"),
- ("kehdesc", "kEH_Desc"),
- ("kehhg", "kEH_HG"),
- ("kehifao", "kEH_IFAO"),
- ("kehjsesh", "kEH_JSesh"),
- ("kehnomirror", "kEH_NoMirror"),
- ("kehnorotate", "kEH_NoRotate"),
- ("kiicore", "kIICore"),
- ("kirggsource", "kIRG_GSource"),
- ("kirghsource", "kIRG_HSource"),
- ("kirgjsource", "kIRG_JSource"),
- ("kirgkpsource", "kIRG_KPSource"),
- ("kirgksource", "kIRG_KSource"),
- ("kirgmsource", "kIRG_MSource"),
- ("kirgssource", "kIRG_SSource"),
- ("kirgtsource", "kIRG_TSource"),
- ("kirguksource", "kIRG_UKSource"),
- ("kirgusource", "kIRG_USource"),
- ("kirgvsource", "kIRG_VSource"),
- ("kothernumeric", "kOtherNumeric"),
- ("kprimarynumeric", "kPrimaryNumeric"),
- ("krsunicode", "kRSUnicode"),
- ("lb", "Line_Break"),
- ("lc", "Lowercase_Mapping"),
- ("linebreak", "Line_Break"),
- ("loe", "Logical_Order_Exception"),
- ("logicalorderexception", "Logical_Order_Exception"),
- ("lower", "Lowercase"),
- ("lowercase", "Lowercase"),
- ("lowercasemapping", "Lowercase_Mapping"),
- ("math", "Math"),
- ("mcm", "Modifier_Combining_Mark"),
- ("modifiercombiningmark", "Modifier_Combining_Mark"),
- ("na", "Name"),
- ("na1", "Unicode_1_Name"),
- ("name", "Name"),
- ("namealias", "Name_Alias"),
- ("nchar", "Noncharacter_Code_Point"),
- ("nfcqc", "NFC_Quick_Check"),
- ("nfcquickcheck", "NFC_Quick_Check"),
- ("nfdqc", "NFD_Quick_Check"),
- ("nfdquickcheck", "NFD_Quick_Check"),
- ("nfkccasefold", "NFKC_Casefold"),
- ("nfkccf", "NFKC_Casefold"),
- ("nfkcqc", "NFKC_Quick_Check"),
- ("nfkcquickcheck", "NFKC_Quick_Check"),
- ("nfkcscf", "NFKC_Simple_Casefold"),
- ("nfkcsimplecasefold", "NFKC_Simple_Casefold"),
- ("nfkdqc", "NFKD_Quick_Check"),
- ("nfkdquickcheck", "NFKD_Quick_Check"),
- ("noncharactercodepoint", "Noncharacter_Code_Point"),
- ("nt", "Numeric_Type"),
- ("numerictype", "Numeric_Type"),
- ("numericvalue", "Numeric_Value"),
- ("nv", "Numeric_Value"),
- ("oalpha", "Other_Alphabetic"),
- ("ocomment", "ISO_Comment"),
- ("odi", "Other_Default_Ignorable_Code_Point"),
- ("ogrext", "Other_Grapheme_Extend"),
- ("oidc", "Other_ID_Continue"),
- ("oids", "Other_ID_Start"),
- ("olower", "Other_Lowercase"),
- ("omath", "Other_Math"),
- ("otheralphabetic", "Other_Alphabetic"),
- ("otherdefaultignorablecodepoint", "Other_Default_Ignorable_Code_Point"),
- ("othergraphemeextend", "Other_Grapheme_Extend"),
- ("otheridcontinue", "Other_ID_Continue"),
- ("otheridstart", "Other_ID_Start"),
- ("otherlowercase", "Other_Lowercase"),
- ("othermath", "Other_Math"),
- ("otheruppercase", "Other_Uppercase"),
- ("oupper", "Other_Uppercase"),
- ("patsyn", "Pattern_Syntax"),
- ("patternsyntax", "Pattern_Syntax"),
- ("patternwhitespace", "Pattern_White_Space"),
- ("patws", "Pattern_White_Space"),
- ("pcm", "Prepended_Concatenation_Mark"),
- ("prependedconcatenationmark", "Prepended_Concatenation_Mark"),
- ("qmark", "Quotation_Mark"),
- ("quotationmark", "Quotation_Mark"),
- ("radical", "Radical"),
- ("regionalindicator", "Regional_Indicator"),
- ("ri", "Regional_Indicator"),
- ("sb", "Sentence_Break"),
- ("sc", "Script"),
- ("scf", "Simple_Case_Folding"),
- ("script", "Script"),
- ("scriptextensions", "Script_Extensions"),
- ("scx", "Script_Extensions"),
- ("sd", "Soft_Dotted"),
- ("sentencebreak", "Sentence_Break"),
- ("sentenceterminal", "Sentence_Terminal"),
- ("sfc", "Simple_Case_Folding"),
- ("simplecasefolding", "Simple_Case_Folding"),
- ("simplelowercasemapping", "Simple_Lowercase_Mapping"),
- ("simpletitlecasemapping", "Simple_Titlecase_Mapping"),
- ("simpleuppercasemapping", "Simple_Uppercase_Mapping"),
- ("slc", "Simple_Lowercase_Mapping"),
- ("softdotted", "Soft_Dotted"),
- ("space", "White_Space"),
- ("stc", "Simple_Titlecase_Mapping"),
- ("sterm", "Sentence_Terminal"),
- ("suc", "Simple_Uppercase_Mapping"),
- ("tc", "Titlecase_Mapping"),
- ("term", "Terminal_Punctuation"),
- ("terminalpunctuation", "Terminal_Punctuation"),
- ("titlecasemapping", "Titlecase_Mapping"),
- ("uc", "Uppercase_Mapping"),
- ("uideo", "Unified_Ideograph"),
- ("unicode1name", "Unicode_1_Name"),
- ("unicoderadicalstroke", "kRSUnicode"),
- ("unifiedideograph", "Unified_Ideograph"),
- ("upper", "Uppercase"),
- ("uppercase", "Uppercase"),
- ("uppercasemapping", "Uppercase_Mapping"),
- ("urs", "kRSUnicode"),
- ("variationselector", "Variation_Selector"),
- ("verticalorientation", "Vertical_Orientation"),
- ("vo", "Vertical_Orientation"),
- ("vs", "Variation_Selector"),
- ("wb", "Word_Break"),
- ("whitespace", "White_Space"),
- ("wordbreak", "Word_Break"),
- ("wspace", "White_Space"),
- ("xidc", "XID_Continue"),
- ("xidcontinue", "XID_Continue"),
- ("xids", "XID_Start"),
- ("xidstart", "XID_Start"),
- ("xonfc", "Expands_On_NFC"),
- ("xonfd", "Expands_On_NFD"),
- ("xonfkc", "Expands_On_NFKC"),
- ("xonfkd", "Expands_On_NFKD"),
-];
diff --git a/vendor/regex-syntax/src/unicode_tables/property_values.rs b/vendor/regex-syntax/src/unicode_tables/property_values.rs
deleted file mode 100644
index 2270d663..00000000
--- a/vendor/regex-syntax/src/unicode_tables/property_values.rs
+++ /dev/null
@@ -1,956 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate property-values ucd-16.0.0 --include gc,script,scx,age,gcb,wb,sb
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const PROPERTY_VALUES: &'static [(
- &'static str,
- &'static [(&'static str, &'static str)],
-)] = &[
- (
- "Age",
- &[
- ("1.1", "V1_1"),
- ("10.0", "V10_0"),
- ("11.0", "V11_0"),
- ("12.0", "V12_0"),
- ("12.1", "V12_1"),
- ("13.0", "V13_0"),
- ("14.0", "V14_0"),
- ("15.0", "V15_0"),
- ("15.1", "V15_1"),
- ("16.0", "V16_0"),
- ("2.0", "V2_0"),
- ("2.1", "V2_1"),
- ("3.0", "V3_0"),
- ("3.1", "V3_1"),
- ("3.2", "V3_2"),
- ("4.0", "V4_0"),
- ("4.1", "V4_1"),
- ("5.0", "V5_0"),
- ("5.1", "V5_1"),
- ("5.2", "V5_2"),
- ("6.0", "V6_0"),
- ("6.1", "V6_1"),
- ("6.2", "V6_2"),
- ("6.3", "V6_3"),
- ("7.0", "V7_0"),
- ("8.0", "V8_0"),
- ("9.0", "V9_0"),
- ("na", "Unassigned"),
- ("unassigned", "Unassigned"),
- ("v100", "V10_0"),
- ("v11", "V1_1"),
- ("v110", "V11_0"),
- ("v120", "V12_0"),
- ("v121", "V12_1"),
- ("v130", "V13_0"),
- ("v140", "V14_0"),
- ("v150", "V15_0"),
- ("v151", "V15_1"),
- ("v160", "V16_0"),
- ("v20", "V2_0"),
- ("v21", "V2_1"),
- ("v30", "V3_0"),
- ("v31", "V3_1"),
- ("v32", "V3_2"),
- ("v40", "V4_0"),
- ("v41", "V4_1"),
- ("v50", "V5_0"),
- ("v51", "V5_1"),
- ("v52", "V5_2"),
- ("v60", "V6_0"),
- ("v61", "V6_1"),
- ("v62", "V6_2"),
- ("v63", "V6_3"),
- ("v70", "V7_0"),
- ("v80", "V8_0"),
- ("v90", "V9_0"),
- ],
- ),
- (
- "General_Category",
- &[
- ("c", "Other"),
- ("casedletter", "Cased_Letter"),
- ("cc", "Control"),
- ("cf", "Format"),
- ("closepunctuation", "Close_Punctuation"),
- ("cn", "Unassigned"),
- ("cntrl", "Control"),
- ("co", "Private_Use"),
- ("combiningmark", "Mark"),
- ("connectorpunctuation", "Connector_Punctuation"),
- ("control", "Control"),
- ("cs", "Surrogate"),
- ("currencysymbol", "Currency_Symbol"),
- ("dashpunctuation", "Dash_Punctuation"),
- ("decimalnumber", "Decimal_Number"),
- ("digit", "Decimal_Number"),
- ("enclosingmark", "Enclosing_Mark"),
- ("finalpunctuation", "Final_Punctuation"),
- ("format", "Format"),
- ("initialpunctuation", "Initial_Punctuation"),
- ("l", "Letter"),
- ("lc", "Cased_Letter"),
- ("letter", "Letter"),
- ("letternumber", "Letter_Number"),
- ("lineseparator", "Line_Separator"),
- ("ll", "Lowercase_Letter"),
- ("lm", "Modifier_Letter"),
- ("lo", "Other_Letter"),
- ("lowercaseletter", "Lowercase_Letter"),
- ("lt", "Titlecase_Letter"),
- ("lu", "Uppercase_Letter"),
- ("m", "Mark"),
- ("mark", "Mark"),
- ("mathsymbol", "Math_Symbol"),
- ("mc", "Spacing_Mark"),
- ("me", "Enclosing_Mark"),
- ("mn", "Nonspacing_Mark"),
- ("modifierletter", "Modifier_Letter"),
- ("modifiersymbol", "Modifier_Symbol"),
- ("n", "Number"),
- ("nd", "Decimal_Number"),
- ("nl", "Letter_Number"),
- ("no", "Other_Number"),
- ("nonspacingmark", "Nonspacing_Mark"),
- ("number", "Number"),
- ("openpunctuation", "Open_Punctuation"),
- ("other", "Other"),
- ("otherletter", "Other_Letter"),
- ("othernumber", "Other_Number"),
- ("otherpunctuation", "Other_Punctuation"),
- ("othersymbol", "Other_Symbol"),
- ("p", "Punctuation"),
- ("paragraphseparator", "Paragraph_Separator"),
- ("pc", "Connector_Punctuation"),
- ("pd", "Dash_Punctuation"),
- ("pe", "Close_Punctuation"),
- ("pf", "Final_Punctuation"),
- ("pi", "Initial_Punctuation"),
- ("po", "Other_Punctuation"),
- ("privateuse", "Private_Use"),
- ("ps", "Open_Punctuation"),
- ("punct", "Punctuation"),
- ("punctuation", "Punctuation"),
- ("s", "Symbol"),
- ("sc", "Currency_Symbol"),
- ("separator", "Separator"),
- ("sk", "Modifier_Symbol"),
- ("sm", "Math_Symbol"),
- ("so", "Other_Symbol"),
- ("spaceseparator", "Space_Separator"),
- ("spacingmark", "Spacing_Mark"),
- ("surrogate", "Surrogate"),
- ("symbol", "Symbol"),
- ("titlecaseletter", "Titlecase_Letter"),
- ("unassigned", "Unassigned"),
- ("uppercaseletter", "Uppercase_Letter"),
- ("z", "Separator"),
- ("zl", "Line_Separator"),
- ("zp", "Paragraph_Separator"),
- ("zs", "Space_Separator"),
- ],
- ),
- (
- "Grapheme_Cluster_Break",
- &[
- ("cn", "Control"),
- ("control", "Control"),
- ("cr", "CR"),
- ("eb", "E_Base"),
- ("ebase", "E_Base"),
- ("ebasegaz", "E_Base_GAZ"),
- ("ebg", "E_Base_GAZ"),
- ("em", "E_Modifier"),
- ("emodifier", "E_Modifier"),
- ("ex", "Extend"),
- ("extend", "Extend"),
- ("gaz", "Glue_After_Zwj"),
- ("glueafterzwj", "Glue_After_Zwj"),
- ("l", "L"),
- ("lf", "LF"),
- ("lv", "LV"),
- ("lvt", "LVT"),
- ("other", "Other"),
- ("pp", "Prepend"),
- ("prepend", "Prepend"),
- ("regionalindicator", "Regional_Indicator"),
- ("ri", "Regional_Indicator"),
- ("sm", "SpacingMark"),
- ("spacingmark", "SpacingMark"),
- ("t", "T"),
- ("v", "V"),
- ("xx", "Other"),
- ("zwj", "ZWJ"),
- ],
- ),
- (
- "Script",
- &[
- ("adlam", "Adlam"),
- ("adlm", "Adlam"),
- ("aghb", "Caucasian_Albanian"),
- ("ahom", "Ahom"),
- ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"),
- ("arab", "Arabic"),
- ("arabic", "Arabic"),
- ("armenian", "Armenian"),
- ("armi", "Imperial_Aramaic"),
- ("armn", "Armenian"),
- ("avestan", "Avestan"),
- ("avst", "Avestan"),
- ("bali", "Balinese"),
- ("balinese", "Balinese"),
- ("bamu", "Bamum"),
- ("bamum", "Bamum"),
- ("bass", "Bassa_Vah"),
- ("bassavah", "Bassa_Vah"),
- ("batak", "Batak"),
- ("batk", "Batak"),
- ("beng", "Bengali"),
- ("bengali", "Bengali"),
- ("bhaiksuki", "Bhaiksuki"),
- ("bhks", "Bhaiksuki"),
- ("bopo", "Bopomofo"),
- ("bopomofo", "Bopomofo"),
- ("brah", "Brahmi"),
- ("brahmi", "Brahmi"),
- ("brai", "Braille"),
- ("braille", "Braille"),
- ("bugi", "Buginese"),
- ("buginese", "Buginese"),
- ("buhd", "Buhid"),
- ("buhid", "Buhid"),
- ("cakm", "Chakma"),
- ("canadianaboriginal", "Canadian_Aboriginal"),
- ("cans", "Canadian_Aboriginal"),
- ("cari", "Carian"),
- ("carian", "Carian"),
- ("caucasianalbanian", "Caucasian_Albanian"),
- ("chakma", "Chakma"),
- ("cham", "Cham"),
- ("cher", "Cherokee"),
- ("cherokee", "Cherokee"),
- ("chorasmian", "Chorasmian"),
- ("chrs", "Chorasmian"),
- ("common", "Common"),
- ("copt", "Coptic"),
- ("coptic", "Coptic"),
- ("cpmn", "Cypro_Minoan"),
- ("cprt", "Cypriot"),
- ("cuneiform", "Cuneiform"),
- ("cypriot", "Cypriot"),
- ("cyprominoan", "Cypro_Minoan"),
- ("cyrillic", "Cyrillic"),
- ("cyrl", "Cyrillic"),
- ("deseret", "Deseret"),
- ("deva", "Devanagari"),
- ("devanagari", "Devanagari"),
- ("diak", "Dives_Akuru"),
- ("divesakuru", "Dives_Akuru"),
- ("dogr", "Dogra"),
- ("dogra", "Dogra"),
- ("dsrt", "Deseret"),
- ("dupl", "Duployan"),
- ("duployan", "Duployan"),
- ("egyp", "Egyptian_Hieroglyphs"),
- ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"),
- ("elba", "Elbasan"),
- ("elbasan", "Elbasan"),
- ("elym", "Elymaic"),
- ("elymaic", "Elymaic"),
- ("ethi", "Ethiopic"),
- ("ethiopic", "Ethiopic"),
- ("gara", "Garay"),
- ("garay", "Garay"),
- ("geor", "Georgian"),
- ("georgian", "Georgian"),
- ("glag", "Glagolitic"),
- ("glagolitic", "Glagolitic"),
- ("gong", "Gunjala_Gondi"),
- ("gonm", "Masaram_Gondi"),
- ("goth", "Gothic"),
- ("gothic", "Gothic"),
- ("gran", "Grantha"),
- ("grantha", "Grantha"),
- ("greek", "Greek"),
- ("grek", "Greek"),
- ("gujarati", "Gujarati"),
- ("gujr", "Gujarati"),
- ("gukh", "Gurung_Khema"),
- ("gunjalagondi", "Gunjala_Gondi"),
- ("gurmukhi", "Gurmukhi"),
- ("guru", "Gurmukhi"),
- ("gurungkhema", "Gurung_Khema"),
- ("han", "Han"),
- ("hang", "Hangul"),
- ("hangul", "Hangul"),
- ("hani", "Han"),
- ("hanifirohingya", "Hanifi_Rohingya"),
- ("hano", "Hanunoo"),
- ("hanunoo", "Hanunoo"),
- ("hatr", "Hatran"),
- ("hatran", "Hatran"),
- ("hebr", "Hebrew"),
- ("hebrew", "Hebrew"),
- ("hira", "Hiragana"),
- ("hiragana", "Hiragana"),
- ("hluw", "Anatolian_Hieroglyphs"),
- ("hmng", "Pahawh_Hmong"),
- ("hmnp", "Nyiakeng_Puachue_Hmong"),
- ("hrkt", "Katakana_Or_Hiragana"),
- ("hung", "Old_Hungarian"),
- ("imperialaramaic", "Imperial_Aramaic"),
- ("inherited", "Inherited"),
- ("inscriptionalpahlavi", "Inscriptional_Pahlavi"),
- ("inscriptionalparthian", "Inscriptional_Parthian"),
- ("ital", "Old_Italic"),
- ("java", "Javanese"),
- ("javanese", "Javanese"),
- ("kaithi", "Kaithi"),
- ("kali", "Kayah_Li"),
- ("kana", "Katakana"),
- ("kannada", "Kannada"),
- ("katakana", "Katakana"),
- ("katakanaorhiragana", "Katakana_Or_Hiragana"),
- ("kawi", "Kawi"),
- ("kayahli", "Kayah_Li"),
- ("khar", "Kharoshthi"),
- ("kharoshthi", "Kharoshthi"),
- ("khitansmallscript", "Khitan_Small_Script"),
- ("khmer", "Khmer"),
- ("khmr", "Khmer"),
- ("khoj", "Khojki"),
- ("khojki", "Khojki"),
- ("khudawadi", "Khudawadi"),
- ("kiratrai", "Kirat_Rai"),
- ("kits", "Khitan_Small_Script"),
- ("knda", "Kannada"),
- ("krai", "Kirat_Rai"),
- ("kthi", "Kaithi"),
- ("lana", "Tai_Tham"),
- ("lao", "Lao"),
- ("laoo", "Lao"),
- ("latin", "Latin"),
- ("latn", "Latin"),
- ("lepc", "Lepcha"),
- ("lepcha", "Lepcha"),
- ("limb", "Limbu"),
- ("limbu", "Limbu"),
- ("lina", "Linear_A"),
- ("linb", "Linear_B"),
- ("lineara", "Linear_A"),
- ("linearb", "Linear_B"),
- ("lisu", "Lisu"),
- ("lyci", "Lycian"),
- ("lycian", "Lycian"),
- ("lydi", "Lydian"),
- ("lydian", "Lydian"),
- ("mahajani", "Mahajani"),
- ("mahj", "Mahajani"),
- ("maka", "Makasar"),
- ("makasar", "Makasar"),
- ("malayalam", "Malayalam"),
- ("mand", "Mandaic"),
- ("mandaic", "Mandaic"),
- ("mani", "Manichaean"),
- ("manichaean", "Manichaean"),
- ("marc", "Marchen"),
- ("marchen", "Marchen"),
- ("masaramgondi", "Masaram_Gondi"),
- ("medefaidrin", "Medefaidrin"),
- ("medf", "Medefaidrin"),
- ("meeteimayek", "Meetei_Mayek"),
- ("mend", "Mende_Kikakui"),
- ("mendekikakui", "Mende_Kikakui"),
- ("merc", "Meroitic_Cursive"),
- ("mero", "Meroitic_Hieroglyphs"),
- ("meroiticcursive", "Meroitic_Cursive"),
- ("meroitichieroglyphs", "Meroitic_Hieroglyphs"),
- ("miao", "Miao"),
- ("mlym", "Malayalam"),
- ("modi", "Modi"),
- ("mong", "Mongolian"),
- ("mongolian", "Mongolian"),
- ("mro", "Mro"),
- ("mroo", "Mro"),
- ("mtei", "Meetei_Mayek"),
- ("mult", "Multani"),
- ("multani", "Multani"),
- ("myanmar", "Myanmar"),
- ("mymr", "Myanmar"),
- ("nabataean", "Nabataean"),
- ("nagm", "Nag_Mundari"),
- ("nagmundari", "Nag_Mundari"),
- ("nand", "Nandinagari"),
- ("nandinagari", "Nandinagari"),
- ("narb", "Old_North_Arabian"),
- ("nbat", "Nabataean"),
- ("newa", "Newa"),
- ("newtailue", "New_Tai_Lue"),
- ("nko", "Nko"),
- ("nkoo", "Nko"),
- ("nshu", "Nushu"),
- ("nushu", "Nushu"),
- ("nyiakengpuachuehmong", "Nyiakeng_Puachue_Hmong"),
- ("ogam", "Ogham"),
- ("ogham", "Ogham"),
- ("olchiki", "Ol_Chiki"),
- ("olck", "Ol_Chiki"),
- ("oldhungarian", "Old_Hungarian"),
- ("olditalic", "Old_Italic"),
- ("oldnortharabian", "Old_North_Arabian"),
- ("oldpermic", "Old_Permic"),
- ("oldpersian", "Old_Persian"),
- ("oldsogdian", "Old_Sogdian"),
- ("oldsoutharabian", "Old_South_Arabian"),
- ("oldturkic", "Old_Turkic"),
- ("olduyghur", "Old_Uyghur"),
- ("olonal", "Ol_Onal"),
- ("onao", "Ol_Onal"),
- ("oriya", "Oriya"),
- ("orkh", "Old_Turkic"),
- ("orya", "Oriya"),
- ("osage", "Osage"),
- ("osge", "Osage"),
- ("osma", "Osmanya"),
- ("osmanya", "Osmanya"),
- ("ougr", "Old_Uyghur"),
- ("pahawhhmong", "Pahawh_Hmong"),
- ("palm", "Palmyrene"),
- ("palmyrene", "Palmyrene"),
- ("pauc", "Pau_Cin_Hau"),
- ("paucinhau", "Pau_Cin_Hau"),
- ("perm", "Old_Permic"),
- ("phag", "Phags_Pa"),
- ("phagspa", "Phags_Pa"),
- ("phli", "Inscriptional_Pahlavi"),
- ("phlp", "Psalter_Pahlavi"),
- ("phnx", "Phoenician"),
- ("phoenician", "Phoenician"),
- ("plrd", "Miao"),
- ("prti", "Inscriptional_Parthian"),
- ("psalterpahlavi", "Psalter_Pahlavi"),
- ("qaac", "Coptic"),
- ("qaai", "Inherited"),
- ("rejang", "Rejang"),
- ("rjng", "Rejang"),
- ("rohg", "Hanifi_Rohingya"),
- ("runic", "Runic"),
- ("runr", "Runic"),
- ("samaritan", "Samaritan"),
- ("samr", "Samaritan"),
- ("sarb", "Old_South_Arabian"),
- ("saur", "Saurashtra"),
- ("saurashtra", "Saurashtra"),
- ("sgnw", "SignWriting"),
- ("sharada", "Sharada"),
- ("shavian", "Shavian"),
- ("shaw", "Shavian"),
- ("shrd", "Sharada"),
- ("sidd", "Siddham"),
- ("siddham", "Siddham"),
- ("signwriting", "SignWriting"),
- ("sind", "Khudawadi"),
- ("sinh", "Sinhala"),
- ("sinhala", "Sinhala"),
- ("sogd", "Sogdian"),
- ("sogdian", "Sogdian"),
- ("sogo", "Old_Sogdian"),
- ("sora", "Sora_Sompeng"),
- ("sorasompeng", "Sora_Sompeng"),
- ("soyo", "Soyombo"),
- ("soyombo", "Soyombo"),
- ("sund", "Sundanese"),
- ("sundanese", "Sundanese"),
- ("sunu", "Sunuwar"),
- ("sunuwar", "Sunuwar"),
- ("sylo", "Syloti_Nagri"),
- ("sylotinagri", "Syloti_Nagri"),
- ("syrc", "Syriac"),
- ("syriac", "Syriac"),
- ("tagalog", "Tagalog"),
- ("tagb", "Tagbanwa"),
- ("tagbanwa", "Tagbanwa"),
- ("taile", "Tai_Le"),
- ("taitham", "Tai_Tham"),
- ("taiviet", "Tai_Viet"),
- ("takr", "Takri"),
- ("takri", "Takri"),
- ("tale", "Tai_Le"),
- ("talu", "New_Tai_Lue"),
- ("tamil", "Tamil"),
- ("taml", "Tamil"),
- ("tang", "Tangut"),
- ("tangsa", "Tangsa"),
- ("tangut", "Tangut"),
- ("tavt", "Tai_Viet"),
- ("telu", "Telugu"),
- ("telugu", "Telugu"),
- ("tfng", "Tifinagh"),
- ("tglg", "Tagalog"),
- ("thaa", "Thaana"),
- ("thaana", "Thaana"),
- ("thai", "Thai"),
- ("tibetan", "Tibetan"),
- ("tibt", "Tibetan"),
- ("tifinagh", "Tifinagh"),
- ("tirh", "Tirhuta"),
- ("tirhuta", "Tirhuta"),
- ("tnsa", "Tangsa"),
- ("todhri", "Todhri"),
- ("todr", "Todhri"),
- ("toto", "Toto"),
- ("tulutigalari", "Tulu_Tigalari"),
- ("tutg", "Tulu_Tigalari"),
- ("ugar", "Ugaritic"),
- ("ugaritic", "Ugaritic"),
- ("unknown", "Unknown"),
- ("vai", "Vai"),
- ("vaii", "Vai"),
- ("vith", "Vithkuqi"),
- ("vithkuqi", "Vithkuqi"),
- ("wancho", "Wancho"),
- ("wara", "Warang_Citi"),
- ("warangciti", "Warang_Citi"),
- ("wcho", "Wancho"),
- ("xpeo", "Old_Persian"),
- ("xsux", "Cuneiform"),
- ("yezi", "Yezidi"),
- ("yezidi", "Yezidi"),
- ("yi", "Yi"),
- ("yiii", "Yi"),
- ("zanabazarsquare", "Zanabazar_Square"),
- ("zanb", "Zanabazar_Square"),
- ("zinh", "Inherited"),
- ("zyyy", "Common"),
- ("zzzz", "Unknown"),
- ],
- ),
- (
- "Script_Extensions",
- &[
- ("adlam", "Adlam"),
- ("adlm", "Adlam"),
- ("aghb", "Caucasian_Albanian"),
- ("ahom", "Ahom"),
- ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"),
- ("arab", "Arabic"),
- ("arabic", "Arabic"),
- ("armenian", "Armenian"),
- ("armi", "Imperial_Aramaic"),
- ("armn", "Armenian"),
- ("avestan", "Avestan"),
- ("avst", "Avestan"),
- ("bali", "Balinese"),
- ("balinese", "Balinese"),
- ("bamu", "Bamum"),
- ("bamum", "Bamum"),
- ("bass", "Bassa_Vah"),
- ("bassavah", "Bassa_Vah"),
- ("batak", "Batak"),
- ("batk", "Batak"),
- ("beng", "Bengali"),
- ("bengali", "Bengali"),
- ("bhaiksuki", "Bhaiksuki"),
- ("bhks", "Bhaiksuki"),
- ("bopo", "Bopomofo"),
- ("bopomofo", "Bopomofo"),
- ("brah", "Brahmi"),
- ("brahmi", "Brahmi"),
- ("brai", "Braille"),
- ("braille", "Braille"),
- ("bugi", "Buginese"),
- ("buginese", "Buginese"),
- ("buhd", "Buhid"),
- ("buhid", "Buhid"),
- ("cakm", "Chakma"),
- ("canadianaboriginal", "Canadian_Aboriginal"),
- ("cans", "Canadian_Aboriginal"),
- ("cari", "Carian"),
- ("carian", "Carian"),
- ("caucasianalbanian", "Caucasian_Albanian"),
- ("chakma", "Chakma"),
- ("cham", "Cham"),
- ("cher", "Cherokee"),
- ("cherokee", "Cherokee"),
- ("chorasmian", "Chorasmian"),
- ("chrs", "Chorasmian"),
- ("common", "Common"),
- ("copt", "Coptic"),
- ("coptic", "Coptic"),
- ("cpmn", "Cypro_Minoan"),
- ("cprt", "Cypriot"),
- ("cuneiform", "Cuneiform"),
- ("cypriot", "Cypriot"),
- ("cyprominoan", "Cypro_Minoan"),
- ("cyrillic", "Cyrillic"),
- ("cyrl", "Cyrillic"),
- ("deseret", "Deseret"),
- ("deva", "Devanagari"),
- ("devanagari", "Devanagari"),
- ("diak", "Dives_Akuru"),
- ("divesakuru", "Dives_Akuru"),
- ("dogr", "Dogra"),
- ("dogra", "Dogra"),
- ("dsrt", "Deseret"),
- ("dupl", "Duployan"),
- ("duployan", "Duployan"),
- ("egyp", "Egyptian_Hieroglyphs"),
- ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"),
- ("elba", "Elbasan"),
- ("elbasan", "Elbasan"),
- ("elym", "Elymaic"),
- ("elymaic", "Elymaic"),
- ("ethi", "Ethiopic"),
- ("ethiopic", "Ethiopic"),
- ("gara", "Garay"),
- ("garay", "Garay"),
- ("geor", "Georgian"),
- ("georgian", "Georgian"),
- ("glag", "Glagolitic"),
- ("glagolitic", "Glagolitic"),
- ("gong", "Gunjala_Gondi"),
- ("gonm", "Masaram_Gondi"),
- ("goth", "Gothic"),
- ("gothic", "Gothic"),
- ("gran", "Grantha"),
- ("grantha", "Grantha"),
- ("greek", "Greek"),
- ("grek", "Greek"),
- ("gujarati", "Gujarati"),
- ("gujr", "Gujarati"),
- ("gukh", "Gurung_Khema"),
- ("gunjalagondi", "Gunjala_Gondi"),
- ("gurmukhi", "Gurmukhi"),
- ("guru", "Gurmukhi"),
- ("gurungkhema", "Gurung_Khema"),
- ("han", "Han"),
- ("hang", "Hangul"),
- ("hangul", "Hangul"),
- ("hani", "Han"),
- ("hanifirohingya", "Hanifi_Rohingya"),
- ("hano", "Hanunoo"),
- ("hanunoo", "Hanunoo"),
- ("hatr", "Hatran"),
- ("hatran", "Hatran"),
- ("hebr", "Hebrew"),
- ("hebrew", "Hebrew"),
- ("hira", "Hiragana"),
- ("hiragana", "Hiragana"),
- ("hluw", "Anatolian_Hieroglyphs"),
- ("hmng", "Pahawh_Hmong"),
- ("hmnp", "Nyiakeng_Puachue_Hmong"),
- ("hrkt", "Katakana_Or_Hiragana"),
- ("hung", "Old_Hungarian"),
- ("imperialaramaic", "Imperial_Aramaic"),
- ("inherited", "Inherited"),
- ("inscriptionalpahlavi", "Inscriptional_Pahlavi"),
- ("inscriptionalparthian", "Inscriptional_Parthian"),
- ("ital", "Old_Italic"),
- ("java", "Javanese"),
- ("javanese", "Javanese"),
- ("kaithi", "Kaithi"),
- ("kali", "Kayah_Li"),
- ("kana", "Katakana"),
- ("kannada", "Kannada"),
- ("katakana", "Katakana"),
- ("katakanaorhiragana", "Katakana_Or_Hiragana"),
- ("kawi", "Kawi"),
- ("kayahli", "Kayah_Li"),
- ("khar", "Kharoshthi"),
- ("kharoshthi", "Kharoshthi"),
- ("khitansmallscript", "Khitan_Small_Script"),
- ("khmer", "Khmer"),
- ("khmr", "Khmer"),
- ("khoj", "Khojki"),
- ("khojki", "Khojki"),
- ("khudawadi", "Khudawadi"),
- ("kiratrai", "Kirat_Rai"),
- ("kits", "Khitan_Small_Script"),
- ("knda", "Kannada"),
- ("krai", "Kirat_Rai"),
- ("kthi", "Kaithi"),
- ("lana", "Tai_Tham"),
- ("lao", "Lao"),
- ("laoo", "Lao"),
- ("latin", "Latin"),
- ("latn", "Latin"),
- ("lepc", "Lepcha"),
- ("lepcha", "Lepcha"),
- ("limb", "Limbu"),
- ("limbu", "Limbu"),
- ("lina", "Linear_A"),
- ("linb", "Linear_B"),
- ("lineara", "Linear_A"),
- ("linearb", "Linear_B"),
- ("lisu", "Lisu"),
- ("lyci", "Lycian"),
- ("lycian", "Lycian"),
- ("lydi", "Lydian"),
- ("lydian", "Lydian"),
- ("mahajani", "Mahajani"),
- ("mahj", "Mahajani"),
- ("maka", "Makasar"),
- ("makasar", "Makasar"),
- ("malayalam", "Malayalam"),
- ("mand", "Mandaic"),
- ("mandaic", "Mandaic"),
- ("mani", "Manichaean"),
- ("manichaean", "Manichaean"),
- ("marc", "Marchen"),
- ("marchen", "Marchen"),
- ("masaramgondi", "Masaram_Gondi"),
- ("medefaidrin", "Medefaidrin"),
- ("medf", "Medefaidrin"),
- ("meeteimayek", "Meetei_Mayek"),
- ("mend", "Mende_Kikakui"),
- ("mendekikakui", "Mende_Kikakui"),
- ("merc", "Meroitic_Cursive"),
- ("mero", "Meroitic_Hieroglyphs"),
- ("meroiticcursive", "Meroitic_Cursive"),
- ("meroitichieroglyphs", "Meroitic_Hieroglyphs"),
- ("miao", "Miao"),
- ("mlym", "Malayalam"),
- ("modi", "Modi"),
- ("mong", "Mongolian"),
- ("mongolian", "Mongolian"),
- ("mro", "Mro"),
- ("mroo", "Mro"),
- ("mtei", "Meetei_Mayek"),
- ("mult", "Multani"),
- ("multani", "Multani"),
- ("myanmar", "Myanmar"),
- ("mymr", "Myanmar"),
- ("nabataean", "Nabataean"),
- ("nagm", "Nag_Mundari"),
- ("nagmundari", "Nag_Mundari"),
- ("nand", "Nandinagari"),
- ("nandinagari", "Nandinagari"),
- ("narb", "Old_North_Arabian"),
- ("nbat", "Nabataean"),
- ("newa", "Newa"),
- ("newtailue", "New_Tai_Lue"),
- ("nko", "Nko"),
- ("nkoo", "Nko"),
- ("nshu", "Nushu"),
- ("nushu", "Nushu"),
- ("nyiakengpuachuehmong", "Nyiakeng_Puachue_Hmong"),
- ("ogam", "Ogham"),
- ("ogham", "Ogham"),
- ("olchiki", "Ol_Chiki"),
- ("olck", "Ol_Chiki"),
- ("oldhungarian", "Old_Hungarian"),
- ("olditalic", "Old_Italic"),
- ("oldnortharabian", "Old_North_Arabian"),
- ("oldpermic", "Old_Permic"),
- ("oldpersian", "Old_Persian"),
- ("oldsogdian", "Old_Sogdian"),
- ("oldsoutharabian", "Old_South_Arabian"),
- ("oldturkic", "Old_Turkic"),
- ("olduyghur", "Old_Uyghur"),
- ("olonal", "Ol_Onal"),
- ("onao", "Ol_Onal"),
- ("oriya", "Oriya"),
- ("orkh", "Old_Turkic"),
- ("orya", "Oriya"),
- ("osage", "Osage"),
- ("osge", "Osage"),
- ("osma", "Osmanya"),
- ("osmanya", "Osmanya"),
- ("ougr", "Old_Uyghur"),
- ("pahawhhmong", "Pahawh_Hmong"),
- ("palm", "Palmyrene"),
- ("palmyrene", "Palmyrene"),
- ("pauc", "Pau_Cin_Hau"),
- ("paucinhau", "Pau_Cin_Hau"),
- ("perm", "Old_Permic"),
- ("phag", "Phags_Pa"),
- ("phagspa", "Phags_Pa"),
- ("phli", "Inscriptional_Pahlavi"),
- ("phlp", "Psalter_Pahlavi"),
- ("phnx", "Phoenician"),
- ("phoenician", "Phoenician"),
- ("plrd", "Miao"),
- ("prti", "Inscriptional_Parthian"),
- ("psalterpahlavi", "Psalter_Pahlavi"),
- ("qaac", "Coptic"),
- ("qaai", "Inherited"),
- ("rejang", "Rejang"),
- ("rjng", "Rejang"),
- ("rohg", "Hanifi_Rohingya"),
- ("runic", "Runic"),
- ("runr", "Runic"),
- ("samaritan", "Samaritan"),
- ("samr", "Samaritan"),
- ("sarb", "Old_South_Arabian"),
- ("saur", "Saurashtra"),
- ("saurashtra", "Saurashtra"),
- ("sgnw", "SignWriting"),
- ("sharada", "Sharada"),
- ("shavian", "Shavian"),
- ("shaw", "Shavian"),
- ("shrd", "Sharada"),
- ("sidd", "Siddham"),
- ("siddham", "Siddham"),
- ("signwriting", "SignWriting"),
- ("sind", "Khudawadi"),
- ("sinh", "Sinhala"),
- ("sinhala", "Sinhala"),
- ("sogd", "Sogdian"),
- ("sogdian", "Sogdian"),
- ("sogo", "Old_Sogdian"),
- ("sora", "Sora_Sompeng"),
- ("sorasompeng", "Sora_Sompeng"),
- ("soyo", "Soyombo"),
- ("soyombo", "Soyombo"),
- ("sund", "Sundanese"),
- ("sundanese", "Sundanese"),
- ("sunu", "Sunuwar"),
- ("sunuwar", "Sunuwar"),
- ("sylo", "Syloti_Nagri"),
- ("sylotinagri", "Syloti_Nagri"),
- ("syrc", "Syriac"),
- ("syriac", "Syriac"),
- ("tagalog", "Tagalog"),
- ("tagb", "Tagbanwa"),
- ("tagbanwa", "Tagbanwa"),
- ("taile", "Tai_Le"),
- ("taitham", "Tai_Tham"),
- ("taiviet", "Tai_Viet"),
- ("takr", "Takri"),
- ("takri", "Takri"),
- ("tale", "Tai_Le"),
- ("talu", "New_Tai_Lue"),
- ("tamil", "Tamil"),
- ("taml", "Tamil"),
- ("tang", "Tangut"),
- ("tangsa", "Tangsa"),
- ("tangut", "Tangut"),
- ("tavt", "Tai_Viet"),
- ("telu", "Telugu"),
- ("telugu", "Telugu"),
- ("tfng", "Tifinagh"),
- ("tglg", "Tagalog"),
- ("thaa", "Thaana"),
- ("thaana", "Thaana"),
- ("thai", "Thai"),
- ("tibetan", "Tibetan"),
- ("tibt", "Tibetan"),
- ("tifinagh", "Tifinagh"),
- ("tirh", "Tirhuta"),
- ("tirhuta", "Tirhuta"),
- ("tnsa", "Tangsa"),
- ("todhri", "Todhri"),
- ("todr", "Todhri"),
- ("toto", "Toto"),
- ("tulutigalari", "Tulu_Tigalari"),
- ("tutg", "Tulu_Tigalari"),
- ("ugar", "Ugaritic"),
- ("ugaritic", "Ugaritic"),
- ("unknown", "Unknown"),
- ("vai", "Vai"),
- ("vaii", "Vai"),
- ("vith", "Vithkuqi"),
- ("vithkuqi", "Vithkuqi"),
- ("wancho", "Wancho"),
- ("wara", "Warang_Citi"),
- ("warangciti", "Warang_Citi"),
- ("wcho", "Wancho"),
- ("xpeo", "Old_Persian"),
- ("xsux", "Cuneiform"),
- ("yezi", "Yezidi"),
- ("yezidi", "Yezidi"),
- ("yi", "Yi"),
- ("yiii", "Yi"),
- ("zanabazarsquare", "Zanabazar_Square"),
- ("zanb", "Zanabazar_Square"),
- ("zinh", "Inherited"),
- ("zyyy", "Common"),
- ("zzzz", "Unknown"),
- ],
- ),
- (
- "Sentence_Break",
- &[
- ("at", "ATerm"),
- ("aterm", "ATerm"),
- ("cl", "Close"),
- ("close", "Close"),
- ("cr", "CR"),
- ("ex", "Extend"),
- ("extend", "Extend"),
- ("fo", "Format"),
- ("format", "Format"),
- ("le", "OLetter"),
- ("lf", "LF"),
- ("lo", "Lower"),
- ("lower", "Lower"),
- ("nu", "Numeric"),
- ("numeric", "Numeric"),
- ("oletter", "OLetter"),
- ("other", "Other"),
- ("sc", "SContinue"),
- ("scontinue", "SContinue"),
- ("se", "Sep"),
- ("sep", "Sep"),
- ("sp", "Sp"),
- ("st", "STerm"),
- ("sterm", "STerm"),
- ("up", "Upper"),
- ("upper", "Upper"),
- ("xx", "Other"),
- ],
- ),
- (
- "Word_Break",
- &[
- ("aletter", "ALetter"),
- ("cr", "CR"),
- ("doublequote", "Double_Quote"),
- ("dq", "Double_Quote"),
- ("eb", "E_Base"),
- ("ebase", "E_Base"),
- ("ebasegaz", "E_Base_GAZ"),
- ("ebg", "E_Base_GAZ"),
- ("em", "E_Modifier"),
- ("emodifier", "E_Modifier"),
- ("ex", "ExtendNumLet"),
- ("extend", "Extend"),
- ("extendnumlet", "ExtendNumLet"),
- ("fo", "Format"),
- ("format", "Format"),
- ("gaz", "Glue_After_Zwj"),
- ("glueafterzwj", "Glue_After_Zwj"),
- ("hebrewletter", "Hebrew_Letter"),
- ("hl", "Hebrew_Letter"),
- ("ka", "Katakana"),
- ("katakana", "Katakana"),
- ("le", "ALetter"),
- ("lf", "LF"),
- ("mb", "MidNumLet"),
- ("midletter", "MidLetter"),
- ("midnum", "MidNum"),
- ("midnumlet", "MidNumLet"),
- ("ml", "MidLetter"),
- ("mn", "MidNum"),
- ("newline", "Newline"),
- ("nl", "Newline"),
- ("nu", "Numeric"),
- ("numeric", "Numeric"),
- ("other", "Other"),
- ("regionalindicator", "Regional_Indicator"),
- ("ri", "Regional_Indicator"),
- ("singlequote", "Single_Quote"),
- ("sq", "Single_Quote"),
- ("wsegspace", "WSegSpace"),
- ("xx", "Other"),
- ("zwj", "ZWJ"),
- ],
- ),
-];
diff --git a/vendor/regex-syntax/src/unicode_tables/script.rs b/vendor/regex-syntax/src/unicode_tables/script.rs
deleted file mode 100644
index 3e437ca9..00000000
--- a/vendor/regex-syntax/src/unicode_tables/script.rs
+++ /dev/null
@@ -1,1300 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate script ucd-16.0.0 --chars
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
- ("Adlam", ADLAM),
- ("Ahom", AHOM),
- ("Anatolian_Hieroglyphs", ANATOLIAN_HIEROGLYPHS),
- ("Arabic", ARABIC),
- ("Armenian", ARMENIAN),
- ("Avestan", AVESTAN),
- ("Balinese", BALINESE),
- ("Bamum", BAMUM),
- ("Bassa_Vah", BASSA_VAH),
- ("Batak", BATAK),
- ("Bengali", BENGALI),
- ("Bhaiksuki", BHAIKSUKI),
- ("Bopomofo", BOPOMOFO),
- ("Brahmi", BRAHMI),
- ("Braille", BRAILLE),
- ("Buginese", BUGINESE),
- ("Buhid", BUHID),
- ("Canadian_Aboriginal", CANADIAN_ABORIGINAL),
- ("Carian", CARIAN),
- ("Caucasian_Albanian", CAUCASIAN_ALBANIAN),
- ("Chakma", CHAKMA),
- ("Cham", CHAM),
- ("Cherokee", CHEROKEE),
- ("Chorasmian", CHORASMIAN),
- ("Common", COMMON),
- ("Coptic", COPTIC),
- ("Cuneiform", CUNEIFORM),
- ("Cypriot", CYPRIOT),
- ("Cypro_Minoan", CYPRO_MINOAN),
- ("Cyrillic", CYRILLIC),
- ("Deseret", DESERET),
- ("Devanagari", DEVANAGARI),
- ("Dives_Akuru", DIVES_AKURU),
- ("Dogra", DOGRA),
- ("Duployan", DUPLOYAN),
- ("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS),
- ("Elbasan", ELBASAN),
- ("Elymaic", ELYMAIC),
- ("Ethiopic", ETHIOPIC),
- ("Garay", GARAY),
- ("Georgian", GEORGIAN),
- ("Glagolitic", GLAGOLITIC),
- ("Gothic", GOTHIC),
- ("Grantha", GRANTHA),
- ("Greek", GREEK),
- ("Gujarati", GUJARATI),
- ("Gunjala_Gondi", GUNJALA_GONDI),
- ("Gurmukhi", GURMUKHI),
- ("Gurung_Khema", GURUNG_KHEMA),
- ("Han", HAN),
- ("Hangul", HANGUL),
- ("Hanifi_Rohingya", HANIFI_ROHINGYA),
- ("Hanunoo", HANUNOO),
- ("Hatran", HATRAN),
- ("Hebrew", HEBREW),
- ("Hiragana", HIRAGANA),
- ("Imperial_Aramaic", IMPERIAL_ARAMAIC),
- ("Inherited", INHERITED),
- ("Inscriptional_Pahlavi", INSCRIPTIONAL_PAHLAVI),
- ("Inscriptional_Parthian", INSCRIPTIONAL_PARTHIAN),
- ("Javanese", JAVANESE),
- ("Kaithi", KAITHI),
- ("Kannada", KANNADA),
- ("Katakana", KATAKANA),
- ("Kawi", KAWI),
- ("Kayah_Li", KAYAH_LI),
- ("Kharoshthi", KHAROSHTHI),
- ("Khitan_Small_Script", KHITAN_SMALL_SCRIPT),
- ("Khmer", KHMER),
- ("Khojki", KHOJKI),
- ("Khudawadi", KHUDAWADI),
- ("Kirat_Rai", KIRAT_RAI),
- ("Lao", LAO),
- ("Latin", LATIN),
- ("Lepcha", LEPCHA),
- ("Limbu", LIMBU),
- ("Linear_A", LINEAR_A),
- ("Linear_B", LINEAR_B),
- ("Lisu", LISU),
- ("Lycian", LYCIAN),
- ("Lydian", LYDIAN),
- ("Mahajani", MAHAJANI),
- ("Makasar", MAKASAR),
- ("Malayalam", MALAYALAM),
- ("Mandaic", MANDAIC),
- ("Manichaean", MANICHAEAN),
- ("Marchen", MARCHEN),
- ("Masaram_Gondi", MASARAM_GONDI),
- ("Medefaidrin", MEDEFAIDRIN),
- ("Meetei_Mayek", MEETEI_MAYEK),
- ("Mende_Kikakui", MENDE_KIKAKUI),
- ("Meroitic_Cursive", MEROITIC_CURSIVE),
- ("Meroitic_Hieroglyphs", MEROITIC_HIEROGLYPHS),
- ("Miao", MIAO),
- ("Modi", MODI),
- ("Mongolian", MONGOLIAN),
- ("Mro", MRO),
- ("Multani", MULTANI),
- ("Myanmar", MYANMAR),
- ("Nabataean", NABATAEAN),
- ("Nag_Mundari", NAG_MUNDARI),
- ("Nandinagari", NANDINAGARI),
- ("New_Tai_Lue", NEW_TAI_LUE),
- ("Newa", NEWA),
- ("Nko", NKO),
- ("Nushu", NUSHU),
- ("Nyiakeng_Puachue_Hmong", NYIAKENG_PUACHUE_HMONG),
- ("Ogham", OGHAM),
- ("Ol_Chiki", OL_CHIKI),
- ("Ol_Onal", OL_ONAL),
- ("Old_Hungarian", OLD_HUNGARIAN),
- ("Old_Italic", OLD_ITALIC),
- ("Old_North_Arabian", OLD_NORTH_ARABIAN),
- ("Old_Permic", OLD_PERMIC),
- ("Old_Persian", OLD_PERSIAN),
- ("Old_Sogdian", OLD_SOGDIAN),
- ("Old_South_Arabian", OLD_SOUTH_ARABIAN),
- ("Old_Turkic", OLD_TURKIC),
- ("Old_Uyghur", OLD_UYGHUR),
- ("Oriya", ORIYA),
- ("Osage", OSAGE),
- ("Osmanya", OSMANYA),
- ("Pahawh_Hmong", PAHAWH_HMONG),
- ("Palmyrene", PALMYRENE),
- ("Pau_Cin_Hau", PAU_CIN_HAU),
- ("Phags_Pa", PHAGS_PA),
- ("Phoenician", PHOENICIAN),
- ("Psalter_Pahlavi", PSALTER_PAHLAVI),
- ("Rejang", REJANG),
- ("Runic", RUNIC),
- ("Samaritan", SAMARITAN),
- ("Saurashtra", SAURASHTRA),
- ("Sharada", SHARADA),
- ("Shavian", SHAVIAN),
- ("Siddham", SIDDHAM),
- ("SignWriting", SIGNWRITING),
- ("Sinhala", SINHALA),
- ("Sogdian", SOGDIAN),
- ("Sora_Sompeng", SORA_SOMPENG),
- ("Soyombo", SOYOMBO),
- ("Sundanese", SUNDANESE),
- ("Sunuwar", SUNUWAR),
- ("Syloti_Nagri", SYLOTI_NAGRI),
- ("Syriac", SYRIAC),
- ("Tagalog", TAGALOG),
- ("Tagbanwa", TAGBANWA),
- ("Tai_Le", TAI_LE),
- ("Tai_Tham", TAI_THAM),
- ("Tai_Viet", TAI_VIET),
- ("Takri", TAKRI),
- ("Tamil", TAMIL),
- ("Tangsa", TANGSA),
- ("Tangut", TANGUT),
- ("Telugu", TELUGU),
- ("Thaana", THAANA),
- ("Thai", THAI),
- ("Tibetan", TIBETAN),
- ("Tifinagh", TIFINAGH),
- ("Tirhuta", TIRHUTA),
- ("Todhri", TODHRI),
- ("Toto", TOTO),
- ("Tulu_Tigalari", TULU_TIGALARI),
- ("Ugaritic", UGARITIC),
- ("Vai", VAI),
- ("Vithkuqi", VITHKUQI),
- ("Wancho", WANCHO),
- ("Warang_Citi", WARANG_CITI),
- ("Yezidi", YEZIDI),
- ("Yi", YI),
- ("Zanabazar_Square", ZANABAZAR_SQUARE),
-];
-
-pub const ADLAM: &'static [(char, char)] =
- &[('𞤀', '𞥋'), ('𞥐', '𞥙'), ('𞥞', '𞥟')];
-
-pub const AHOM: &'static [(char, char)] =
- &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑝆')];
-
-pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[('𔐀', '𔙆')];
-
-pub const ARABIC: &'static [(char, char)] = &[
- ('\u{600}', '\u{604}'),
- ('؆', '؋'),
- ('؍', '\u{61a}'),
- ('\u{61c}', '؞'),
- ('ؠ', 'ؿ'),
- ('ف', 'ي'),
- ('\u{656}', 'ٯ'),
- ('ٱ', '\u{6dc}'),
- ('۞', 'ۿ'),
- ('ݐ', 'ݿ'),
- ('ࡰ', 'ࢎ'),
- ('\u{890}', '\u{891}'),
- ('\u{897}', '\u{8e1}'),
- ('\u{8e3}', '\u{8ff}'),
- ('ﭐ', '﯂'),
- ('ﯓ', 'ﴽ'),
- ('﵀', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('﷏', '﷏'),
- ('ﷰ', '﷿'),
- ('ﹰ', 'ﹴ'),
- ('ﹶ', 'ﻼ'),
- ('𐹠', '𐹾'),
- ('𐻂', '𐻄'),
- ('\u{10efc}', '\u{10eff}'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('𞻰', '𞻱'),
-];
-
-pub const ARMENIAN: &'static [(char, char)] =
- &[('Ա', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('ﬓ', 'ﬗ')];
-
-pub const AVESTAN: &'static [(char, char)] = &[('𐬀', '𐬵'), ('𐬹', '𐬿')];
-
-pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭎', '᭿')];
-
-pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')];
-
-pub const BASSA_VAH: &'static [(char, char)] =
- &[('𖫐', '𖫭'), ('\u{16af0}', '𖫵')];
-
-pub const BATAK: &'static [(char, char)] = &[('ᯀ', '\u{1bf3}'), ('᯼', '᯿')];
-
-pub const BENGALI: &'static [(char, char)] = &[
- ('ঀ', 'ঃ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('\u{9bc}', '\u{9c4}'),
- ('ে', 'ৈ'),
- ('ো', 'ৎ'),
- ('\u{9d7}', '\u{9d7}'),
- ('ড়', 'ঢ়'),
- ('য়', '\u{9e3}'),
- ('০', '\u{9fe}'),
-];
-
-pub const BHAIKSUKI: &'static [(char, char)] =
- &[('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱅'), ('𑱐', '𑱬')];
-
-pub const BOPOMOFO: &'static [(char, char)] =
- &[('˪', '˫'), ('ㄅ', 'ㄯ'), ('ㆠ', 'ㆿ')];
-
-pub const BRAHMI: &'static [(char, char)] =
- &[('𑀀', '𑁍'), ('𑁒', '𑁵'), ('\u{1107f}', '\u{1107f}')];
-
-pub const BRAILLE: &'static [(char, char)] = &[('⠀', '⣿')];
-
-pub const BUGINESE: &'static [(char, char)] = &[('ᨀ', '\u{1a1b}'), ('᨞', '᨟')];
-
-pub const BUHID: &'static [(char, char)] = &[('ᝀ', '\u{1753}')];
-
-pub const CANADIAN_ABORIGINAL: &'static [(char, char)] =
- &[('᐀', 'ᙿ'), ('ᢰ', 'ᣵ'), ('𑪰', '𑪿')];
-
-pub const CARIAN: &'static [(char, char)] = &[('𐊠', '𐋐')];
-
-pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] =
- &[('𐔰', '𐕣'), ('𐕯', '𐕯')];
-
-pub const CHAKMA: &'static [(char, char)] =
- &[('\u{11100}', '\u{11134}'), ('𑄶', '𑅇')];
-
-pub const CHAM: &'static [(char, char)] =
- &[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')];
-
-pub const CHEROKEE: &'static [(char, char)] =
- &[('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ')];
-
-pub const CHORASMIAN: &'static [(char, char)] = &[('𐾰', '𐿋')];
-
-pub const COMMON: &'static [(char, char)] = &[
- ('\0', '@'),
- ('[', '`'),
- ('{', '©'),
- ('«', '¹'),
- ('»', '¿'),
- ('×', '×'),
- ('÷', '÷'),
- ('ʹ', '˟'),
- ('˥', '˩'),
- ('ˬ', '˿'),
- ('ʹ', 'ʹ'),
- (';', ';'),
- ('΅', '΅'),
- ('·', '·'),
- ('\u{605}', '\u{605}'),
- ('،', '،'),
- ('؛', '؛'),
- ('؟', '؟'),
- ('ـ', 'ـ'),
- ('\u{6dd}', '\u{6dd}'),
- ('\u{8e2}', '\u{8e2}'),
- ('।', '॥'),
- ('฿', '฿'),
- ('࿕', '࿘'),
- ('჻', '჻'),
- ('᛫', '᛭'),
- ('᜵', '᜶'),
- ('᠂', '᠃'),
- ('᠅', '᠅'),
- ('᳓', '᳓'),
- ('᳡', '᳡'),
- ('ᳩ', 'ᳬ'),
- ('ᳮ', 'ᳳ'),
- ('ᳵ', '᳷'),
- ('ᳺ', 'ᳺ'),
- ('\u{2000}', '\u{200b}'),
- ('\u{200e}', '\u{2064}'),
- ('\u{2066}', '⁰'),
- ('⁴', '⁾'),
- ('₀', '₎'),
- ('₠', '⃀'),
- ('℀', '℥'),
- ('℧', '℩'),
- ('ℬ', 'ℱ'),
- ('ℳ', '⅍'),
- ('⅏', '⅟'),
- ('↉', '↋'),
- ('←', '␩'),
- ('⑀', '⑊'),
- ('①', '⟿'),
- ('⤀', '⭳'),
- ('⭶', '⮕'),
- ('⮗', '⯿'),
- ('⸀', '⹝'),
- ('⿰', '〄'),
- ('〆', '〆'),
- ('〈', '〠'),
- ('〰', '〷'),
- ('〼', '〿'),
- ('゛', '゜'),
- ('゠', '゠'),
- ('・', 'ー'),
- ('㆐', '㆟'),
- ('㇀', '㇥'),
- ('㇯', '㇯'),
- ('㈠', '㉟'),
- ('㉿', '㋏'),
- ('㋿', '㋿'),
- ('㍘', '㏿'),
- ('䷀', '䷿'),
- ('꜀', '꜡'),
- ('ꞈ', '꞊'),
- ('꠰', '꠹'),
- ('꤮', '꤮'),
- ('ꧏ', 'ꧏ'),
- ('꭛', '꭛'),
- ('꭪', '꭫'),
- ('﴾', '﴿'),
- ('︐', '︙'),
- ('︰', '﹒'),
- ('﹔', '﹦'),
- ('﹨', '﹫'),
- ('\u{feff}', '\u{feff}'),
- ('!', '@'),
- ('[', '`'),
- ('{', '・'),
- ('ー', 'ー'),
- ('\u{ff9e}', '\u{ff9f}'),
- ('¢', '₩'),
- ('│', '○'),
- ('\u{fff9}', '�'),
- ('𐄀', '𐄂'),
- ('𐄇', '𐄳'),
- ('𐄷', '𐄿'),
- ('𐆐', '𐆜'),
- ('𐇐', '𐇼'),
- ('𐋡', '𐋻'),
- ('\u{1bca0}', '\u{1bca3}'),
- ('𜰀', '𜳹'),
- ('𜴀', '𜺳'),
- ('𜽐', '𜿃'),
- ('𝀀', '𝃵'),
- ('𝄀', '𝄦'),
- ('𝄩', '\u{1d166}'),
- ('𝅪', '\u{1d17a}'),
- ('𝆃', '𝆄'),
- ('𝆌', '𝆩'),
- ('𝆮', '𝇪'),
- ('𝋀', '𝋓'),
- ('𝋠', '𝋳'),
- ('𝌀', '𝍖'),
- ('𝍠', '𝍸'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝟋'),
- ('𝟎', '𝟿'),
- ('𞱱', '𞲴'),
- ('𞴁', '𞴽'),
- ('🀀', '🀫'),
- ('🀰', '🂓'),
- ('🂠', '🂮'),
- ('🂱', '🂿'),
- ('🃁', '🃏'),
- ('🃑', '🃵'),
- ('🄀', '🆭'),
- ('🇦', '🇿'),
- ('🈁', '🈂'),
- ('🈐', '🈻'),
- ('🉀', '🉈'),
- ('🉐', '🉑'),
- ('🉠', '🉥'),
- ('🌀', '🛗'),
- ('🛜', '🛬'),
- ('🛰', '🛼'),
- ('🜀', '🝶'),
- ('🝻', '🟙'),
- ('🟠', '🟫'),
- ('🟰', '🟰'),
- ('🠀', '🠋'),
- ('🠐', '🡇'),
- ('🡐', '🡙'),
- ('🡠', '🢇'),
- ('🢐', '🢭'),
- ('🢰', '🢻'),
- ('🣀', '🣁'),
- ('🤀', '🩓'),
- ('🩠', '🩭'),
- ('🩰', '🩼'),
- ('🪀', '🪉'),
- ('🪏', '🫆'),
- ('🫎', '🫜'),
- ('🫟', '🫩'),
- ('🫰', '🫸'),
- ('🬀', '🮒'),
- ('🮔', '🯹'),
- ('\u{e0001}', '\u{e0001}'),
- ('\u{e0020}', '\u{e007f}'),
-];
-
-pub const COPTIC: &'static [(char, char)] =
- &[('Ϣ', 'ϯ'), ('Ⲁ', 'ⳳ'), ('⳹', '⳿')];
-
-pub const CUNEIFORM: &'static [(char, char)] =
- &[('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃')];
-
-pub const CYPRIOT: &'static [(char, char)] =
- &[('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐠿')];
-
-pub const CYPRO_MINOAN: &'static [(char, char)] = &[('𒾐', '𒿲')];
-
-pub const CYRILLIC: &'static [(char, char)] = &[
- ('Ѐ', '\u{484}'),
- ('\u{487}', 'ԯ'),
- ('ᲀ', 'ᲊ'),
- ('ᴫ', 'ᴫ'),
- ('ᵸ', 'ᵸ'),
- ('\u{2de0}', '\u{2dff}'),
- ('Ꙁ', '\u{a69f}'),
- ('\u{fe2e}', '\u{fe2f}'),
- ('𞀰', '𞁭'),
- ('\u{1e08f}', '\u{1e08f}'),
-];
-
-pub const DESERET: &'static [(char, char)] = &[('𐐀', '𐑏')];
-
-pub const DEVANAGARI: &'static [(char, char)] = &[
- ('\u{900}', 'ॐ'),
- ('\u{955}', '\u{963}'),
- ('०', 'ॿ'),
- ('\u{a8e0}', '\u{a8ff}'),
- ('𑬀', '𑬉'),
-];
-
-pub const DIVES_AKURU: &'static [(char, char)] = &[
- ('𑤀', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤵'),
- ('𑤷', '𑤸'),
- ('\u{1193b}', '𑥆'),
- ('𑥐', '𑥙'),
-];
-
-pub const DOGRA: &'static [(char, char)] = &[('𑠀', '𑠻')];
-
-pub const DUPLOYAN: &'static [(char, char)] =
- &[('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '𛲟')];
-
-pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] =
- &[('𓀀', '\u{13455}'), ('𓑠', '𔏺')];
-
-pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')];
-
-pub const ELYMAIC: &'static [(char, char)] = &[('𐿠', '𐿶')];
-
-pub const ETHIOPIC: &'static [(char, char)] = &[
- ('ሀ', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዖ'),
- ('ዘ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ፚ'),
- ('\u{135d}', '፼'),
- ('ᎀ', '᎙'),
- ('ⶀ', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
-];
-
-pub const GARAY: &'static [(char, char)] =
- &[('𐵀', '𐵥'), ('\u{10d69}', '𐶅'), ('𐶎', '𐶏')];
-
-pub const GEORGIAN: &'static [(char, char)] = &[
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ჺ'),
- ('ჼ', 'ჿ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
-];
-
-pub const GLAGOLITIC: &'static [(char, char)] = &[
- ('Ⰰ', 'ⱟ'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
-];
-
-pub const GOTHIC: &'static [(char, char)] = &[('𐌰', '𐍊')];
-
-pub const GRANTHA: &'static [(char, char)] = &[
- ('\u{11300}', '𑌃'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('\u{1133c}', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '\u{1134d}'),
- ('𑍐', '𑍐'),
- ('\u{11357}', '\u{11357}'),
- ('𑍝', '𑍣'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
-];
-
-pub const GREEK: &'static [(char, char)] = &[
- ('Ͱ', 'ͳ'),
- ('͵', 'ͷ'),
- ('ͺ', 'ͽ'),
- ('Ϳ', 'Ϳ'),
- ('΄', '΄'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ϡ'),
- ('ϰ', 'Ͽ'),
- ('ᴦ', 'ᴪ'),
- ('ᵝ', 'ᵡ'),
- ('ᵦ', 'ᵪ'),
- ('ᶿ', 'ᶿ'),
- ('ἀ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ῄ'),
- ('ῆ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('῝', '`'),
- ('ῲ', 'ῴ'),
- ('ῶ', '῾'),
- ('Ω', 'Ω'),
- ('ꭥ', 'ꭥ'),
- ('𐅀', '𐆎'),
- ('𐆠', '𐆠'),
- ('𝈀', '𝉅'),
-];
-
-pub const GUJARATI: &'static [(char, char)] = &[
- ('\u{a81}', 'ઃ'),
- ('અ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('\u{abc}', '\u{ac5}'),
- ('\u{ac7}', 'ૉ'),
- ('ો', '\u{acd}'),
- ('ૐ', 'ૐ'),
- ('ૠ', '\u{ae3}'),
- ('૦', '૱'),
- ('ૹ', '\u{aff}'),
-];
-
-pub const GUNJALA_GONDI: &'static [(char, char)] = &[
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶎'),
- ('\u{11d90}', '\u{11d91}'),
- ('𑶓', '𑶘'),
- ('𑶠', '𑶩'),
-];
-
-pub const GURMUKHI: &'static [(char, char)] = &[
- ('\u{a01}', 'ਃ'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('\u{a3c}', '\u{a3c}'),
- ('ਾ', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('\u{a51}', '\u{a51}'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('੦', '੶'),
-];
-
-pub const GURUNG_KHEMA: &'static [(char, char)] = &[('𖄀', '𖄹')];
-
-pub const HAN: &'static [(char, char)] = &[
- ('⺀', '⺙'),
- ('⺛', '⻳'),
- ('⼀', '⿕'),
- ('々', '々'),
- ('〇', '〇'),
- ('〡', '〩'),
- ('〸', '〻'),
- ('㐀', '䶿'),
- ('一', '鿿'),
- ('豈', '舘'),
- ('並', '龎'),
- ('𖿢', '𖿣'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('丽', '𪘀'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
-];
-
-pub const HANGUL: &'static [(char, char)] = &[
- ('ᄀ', 'ᇿ'),
- ('\u{302e}', '\u{302f}'),
- ('ㄱ', 'ㆎ'),
- ('㈀', '㈞'),
- ('㉠', '㉾'),
- ('ꥠ', 'ꥼ'),
- ('가', '힣'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('ᅠ', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
-];
-
-pub const HANIFI_ROHINGYA: &'static [(char, char)] =
- &[('𐴀', '\u{10d27}'), ('𐴰', '𐴹')];
-
-pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', '\u{1734}')];
-
-pub const HATRAN: &'static [(char, char)] =
- &[('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿')];
-
-pub const HEBREW: &'static [(char, char)] = &[
- ('\u{591}', '\u{5c7}'),
- ('א', 'ת'),
- ('ׯ', '״'),
- ('יִ', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', 'ﭏ'),
-];
-
-pub const HIRAGANA: &'static [(char, char)] = &[
- ('ぁ', 'ゖ'),
- ('ゝ', 'ゟ'),
- ('𛀁', '𛄟'),
- ('𛄲', '𛄲'),
- ('𛅐', '𛅒'),
- ('🈀', '🈀'),
-];
-
-pub const IMPERIAL_ARAMAIC: &'static [(char, char)] =
- &[('𐡀', '𐡕'), ('𐡗', '𐡟')];
-
-pub const INHERITED: &'static [(char, char)] = &[
- ('\u{300}', '\u{36f}'),
- ('\u{485}', '\u{486}'),
- ('\u{64b}', '\u{655}'),
- ('\u{670}', '\u{670}'),
- ('\u{951}', '\u{954}'),
- ('\u{1ab0}', '\u{1ace}'),
- ('\u{1cd0}', '\u{1cd2}'),
- ('\u{1cd4}', '\u{1ce0}'),
- ('\u{1ce2}', '\u{1ce8}'),
- ('\u{1ced}', '\u{1ced}'),
- ('\u{1cf4}', '\u{1cf4}'),
- ('\u{1cf8}', '\u{1cf9}'),
- ('\u{1dc0}', '\u{1dff}'),
- ('\u{200c}', '\u{200d}'),
- ('\u{20d0}', '\u{20f0}'),
- ('\u{302a}', '\u{302d}'),
- ('\u{3099}', '\u{309a}'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{fe20}', '\u{fe2d}'),
- ('\u{101fd}', '\u{101fd}'),
- ('\u{102e0}', '\u{102e0}'),
- ('\u{1133b}', '\u{1133b}'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d167}', '\u{1d169}'),
- ('\u{1d17b}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const INSCRIPTIONAL_PAHLAVI: &'static [(char, char)] =
- &[('𐭠', '𐭲'), ('𐭸', '𐭿')];
-
-pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] =
- &[('𐭀', '𐭕'), ('𐭘', '𐭟')];
-
-pub const JAVANESE: &'static [(char, char)] =
- &[('\u{a980}', '꧍'), ('꧐', '꧙'), ('꧞', '꧟')];
-
-pub const KAITHI: &'static [(char, char)] =
- &[('\u{11080}', '\u{110c2}'), ('\u{110cd}', '\u{110cd}')];
-
-pub const KANNADA: &'static [(char, char)] = &[
- ('ಀ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('\u{cbc}', 'ೄ'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccd}'),
- ('\u{cd5}', '\u{cd6}'),
- ('ೝ', 'ೞ'),
- ('ೠ', '\u{ce3}'),
- ('೦', '೯'),
- ('ೱ', 'ೳ'),
-];
-
-pub const KATAKANA: &'static [(char, char)] = &[
- ('ァ', 'ヺ'),
- ('ヽ', 'ヿ'),
- ('ㇰ', 'ㇿ'),
- ('㋐', '㋾'),
- ('㌀', '㍗'),
- ('ヲ', 'ッ'),
- ('ア', 'ン'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𛀀', '𛀀'),
- ('𛄠', '𛄢'),
- ('𛅕', '𛅕'),
- ('𛅤', '𛅧'),
-];
-
-pub const KAWI: &'static [(char, char)] =
- &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f5a}')];
-
-pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '\u{a92d}'), ('꤯', '꤯')];
-
-pub const KHAROSHTHI: &'static [(char, char)] = &[
- ('𐨀', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨵'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '𐩈'),
- ('𐩐', '𐩘'),
-];
-
-pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] =
- &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕'), ('𘳿', '𘳿')];
-
-pub const KHMER: &'static [(char, char)] =
- &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')];
-
-pub const KHOJKI: &'static [(char, char)] = &[('𑈀', '𑈑'), ('𑈓', '\u{11241}')];
-
-pub const KHUDAWADI: &'static [(char, char)] =
- &[('𑊰', '\u{112ea}'), ('𑋰', '𑋹')];
-
-pub const KIRAT_RAI: &'static [(char, char)] = &[('𖵀', '𖵹')];
-
-pub const LAO: &'static [(char, char)] = &[
- ('ກ', 'ຂ'),
- ('ຄ', 'ຄ'),
- ('ຆ', 'ຊ'),
- ('ຌ', 'ຣ'),
- ('ລ', 'ລ'),
- ('ວ', 'ຽ'),
- ('ເ', 'ໄ'),
- ('ໆ', 'ໆ'),
- ('\u{ec8}', '\u{ece}'),
- ('໐', '໙'),
- ('ໜ', 'ໟ'),
-];
-
-pub const LATIN: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('a', 'z'),
- ('ª', 'ª'),
- ('º', 'º'),
- ('À', 'Ö'),
- ('Ø', 'ö'),
- ('ø', 'ʸ'),
- ('ˠ', 'ˤ'),
- ('ᴀ', 'ᴥ'),
- ('ᴬ', 'ᵜ'),
- ('ᵢ', 'ᵥ'),
- ('ᵫ', 'ᵷ'),
- ('ᵹ', 'ᶾ'),
- ('Ḁ', 'ỿ'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('K', 'Å'),
- ('Ⅎ', 'Ⅎ'),
- ('ⅎ', 'ⅎ'),
- ('Ⅰ', 'ↈ'),
- ('Ⱡ', 'Ɀ'),
- ('Ꜣ', 'ꞇ'),
- ('Ꞌ', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'Ƛ'),
- ('ꟲ', 'ꟿ'),
- ('ꬰ', 'ꭚ'),
- ('ꭜ', 'ꭤ'),
- ('ꭦ', 'ꭩ'),
- ('ff', 'st'),
- ('A', 'Z'),
- ('a', 'z'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𝼀', '𝼞'),
- ('𝼥', '𝼪'),
-];
-
-pub const LEPCHA: &'static [(char, char)] =
- &[('ᰀ', '\u{1c37}'), ('᰻', '᱉'), ('ᱍ', 'ᱏ')];
-
-pub const LIMBU: &'static [(char, char)] = &[
- ('ᤀ', 'ᤞ'),
- ('\u{1920}', 'ᤫ'),
- ('ᤰ', '\u{193b}'),
- ('᥀', '᥀'),
- ('᥄', '᥏'),
-];
-
-pub const LINEAR_A: &'static [(char, char)] =
- &[('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧')];
-
-pub const LINEAR_B: &'static [(char, char)] = &[
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
-];
-
-pub const LISU: &'static [(char, char)] = &[('ꓐ', '꓿'), ('𑾰', '𑾰')];
-
-pub const LYCIAN: &'static [(char, char)] = &[('𐊀', '𐊜')];
-
-pub const LYDIAN: &'static [(char, char)] = &[('𐤠', '𐤹'), ('𐤿', '𐤿')];
-
-pub const MAHAJANI: &'static [(char, char)] = &[('𑅐', '𑅶')];
-
-pub const MAKASAR: &'static [(char, char)] = &[('𑻠', '𑻸')];
-
-pub const MALAYALAM: &'static [(char, char)] = &[
- ('\u{d00}', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', '\u{d44}'),
- ('െ', 'ൈ'),
- ('ൊ', '൏'),
- ('ൔ', '\u{d63}'),
- ('൦', 'ൿ'),
-];
-
-pub const MANDAIC: &'static [(char, char)] = &[('ࡀ', '\u{85b}'), ('࡞', '࡞')];
-
-pub const MANICHAEAN: &'static [(char, char)] =
- &[('𐫀', '\u{10ae6}'), ('𐫫', '𐫶')];
-
-pub const MARCHEN: &'static [(char, char)] =
- &[('𑱰', '𑲏'), ('\u{11c92}', '\u{11ca7}'), ('𑲩', '\u{11cb6}')];
-
-pub const MASARAM_GONDI: &'static [(char, char)] = &[
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d47}'),
- ('𑵐', '𑵙'),
-];
-
-pub const MEDEFAIDRIN: &'static [(char, char)] = &[('𖹀', '𖺚')];
-
-pub const MEETEI_MAYEK: &'static [(char, char)] =
- &[('ꫠ', '\u{aaf6}'), ('ꯀ', '\u{abed}'), ('꯰', '꯹')];
-
-pub const MENDE_KIKAKUI: &'static [(char, char)] =
- &[('𞠀', '𞣄'), ('𞣇', '\u{1e8d6}')];
-
-pub const MEROITIC_CURSIVE: &'static [(char, char)] =
- &[('𐦠', '𐦷'), ('𐦼', '𐧏'), ('𐧒', '𐧿')];
-
-pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[('𐦀', '𐦟')];
-
-pub const MIAO: &'static [(char, char)] =
- &[('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟')];
-
-pub const MODI: &'static [(char, char)] = &[('𑘀', '𑙄'), ('𑙐', '𑙙')];
-
-pub const MONGOLIAN: &'static [(char, char)] =
- &[('᠀', '᠁'), ('᠄', '᠄'), ('᠆', '᠙'), ('ᠠ', 'ᡸ'), ('ᢀ', 'ᢪ'), ('𑙠', '𑙬')];
-
-pub const MRO: &'static [(char, char)] = &[('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯')];
-
-pub const MULTANI: &'static [(char, char)] =
- &[('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')];
-
-pub const MYANMAR: &'static [(char, char)] =
- &[('က', '႟'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ'), ('𑛐', '𑛣')];
-
-pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')];
-
-pub const NAG_MUNDARI: &'static [(char, char)] = &[('𞓐', '𞓹')];
-
-pub const NANDINAGARI: &'static [(char, char)] =
- &[('𑦠', '𑦧'), ('𑦪', '\u{119d7}'), ('\u{119da}', '𑧤')];
-
-pub const NEW_TAI_LUE: &'static [(char, char)] =
- &[('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟')];
-
-pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '𑑡')];
-
-pub const NKO: &'static [(char, char)] = &[('߀', 'ߺ'), ('\u{7fd}', '߿')];
-
-pub const NUSHU: &'static [(char, char)] = &[('𖿡', '𖿡'), ('𛅰', '𛋻')];
-
-pub const NYIAKENG_PUACHUE_HMONG: &'static [(char, char)] =
- &[('𞄀', '𞄬'), ('\u{1e130}', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅏')];
-
-pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')];
-
-pub const OL_CHIKI: &'static [(char, char)] = &[('᱐', '᱿')];
-
-pub const OL_ONAL: &'static [(char, char)] = &[('𞗐', '𞗺'), ('𞗿', '𞗿')];
-
-pub const OLD_HUNGARIAN: &'static [(char, char)] =
- &[('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿')];
-
-pub const OLD_ITALIC: &'static [(char, char)] = &[('𐌀', '𐌣'), ('𐌭', '𐌯')];
-
-pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[('𐪀', '𐪟')];
-
-pub const OLD_PERMIC: &'static [(char, char)] = &[('𐍐', '\u{1037a}')];
-
-pub const OLD_PERSIAN: &'static [(char, char)] = &[('𐎠', '𐏃'), ('𐏈', '𐏕')];
-
-pub const OLD_SOGDIAN: &'static [(char, char)] = &[('𐼀', '𐼧')];
-
-pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('𐩠', '𐩿')];
-
-pub const OLD_TURKIC: &'static [(char, char)] = &[('𐰀', '𐱈')];
-
-pub const OLD_UYGHUR: &'static [(char, char)] = &[('𐽰', '𐾉')];
-
-pub const ORIYA: &'static [(char, char)] = &[
- ('\u{b01}', 'ଃ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('\u{b3c}', '\u{b44}'),
- ('େ', 'ୈ'),
- ('ୋ', '\u{b4d}'),
- ('\u{b55}', '\u{b57}'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', '\u{b63}'),
- ('୦', '୷'),
-];
-
-pub const OSAGE: &'static [(char, char)] = &[('𐒰', '𐓓'), ('𐓘', '𐓻')];
-
-pub const OSMANYA: &'static [(char, char)] = &[('𐒀', '𐒝'), ('𐒠', '𐒩')];
-
-pub const PAHAWH_HMONG: &'static [(char, char)] =
- &[('𖬀', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'), ('𖭽', '𖮏')];
-
-pub const PALMYRENE: &'static [(char, char)] = &[('𐡠', '𐡿')];
-
-pub const PAU_CIN_HAU: &'static [(char, char)] = &[('𑫀', '𑫸')];
-
-pub const PHAGS_PA: &'static [(char, char)] = &[('ꡀ', '꡷')];
-
-pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', '𐤟')];
-
-pub const PSALTER_PAHLAVI: &'static [(char, char)] =
- &[('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯')];
-
-pub const REJANG: &'static [(char, char)] = &[('ꤰ', '\u{a953}'), ('꥟', '꥟')];
-
-pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ')];
-
-pub const SAMARITAN: &'static [(char, char)] = &[('ࠀ', '\u{82d}'), ('࠰', '࠾')];
-
-pub const SAURASHTRA: &'static [(char, char)] =
- &[('ꢀ', '\u{a8c5}'), ('꣎', '꣙')];
-
-pub const SHARADA: &'static [(char, char)] = &[('\u{11180}', '𑇟')];
-
-pub const SHAVIAN: &'static [(char, char)] = &[('𐑐', '𐑿')];
-
-pub const SIDDHAM: &'static [(char, char)] =
- &[('𑖀', '\u{115b5}'), ('𑖸', '\u{115dd}')];
-
-pub const SIGNWRITING: &'static [(char, char)] =
- &[('𝠀', '𝪋'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}')];
-
-pub const SINHALA: &'static [(char, char)] = &[
- ('\u{d81}', 'ඃ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dcf}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('ෘ', '\u{ddf}'),
- ('෦', '෯'),
- ('ෲ', '෴'),
- ('𑇡', '𑇴'),
-];
-
-pub const SOGDIAN: &'static [(char, char)] = &[('𐼰', '𐽙')];
-
-pub const SORA_SOMPENG: &'static [(char, char)] = &[('𑃐', '𑃨'), ('𑃰', '𑃹')];
-
-pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', '𑪢')];
-
-pub const SUNDANESE: &'static [(char, char)] =
- &[('\u{1b80}', 'ᮿ'), ('᳀', '᳇')];
-
-pub const SUNUWAR: &'static [(char, char)] = &[('𑯀', '𑯡'), ('𑯰', '𑯹')];
-
-pub const SYLOTI_NAGRI: &'static [(char, char)] = &[('ꠀ', '\u{a82c}')];
-
-pub const SYRIAC: &'static [(char, char)] =
- &[('܀', '܍'), ('\u{70f}', '\u{74a}'), ('ݍ', 'ݏ'), ('ࡠ', 'ࡪ')];
-
-pub const TAGALOG: &'static [(char, char)] = &[('ᜀ', '\u{1715}'), ('ᜟ', 'ᜟ')];
-
-pub const TAGBANWA: &'static [(char, char)] =
- &[('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')];
-
-pub const TAI_LE: &'static [(char, char)] = &[('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ')];
-
-pub const TAI_THAM: &'static [(char, char)] = &[
- ('ᨠ', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a7c}'),
- ('\u{1a7f}', '᪉'),
- ('᪐', '᪙'),
- ('᪠', '᪭'),
-];
-
-pub const TAI_VIET: &'static [(char, char)] = &[('ꪀ', 'ꫂ'), ('ꫛ', '꫟')];
-
-pub const TAKRI: &'static [(char, char)] = &[('𑚀', '𑚹'), ('𑛀', '𑛉')];
-
-pub const TAMIL: &'static [(char, char)] = &[
- ('\u{b82}', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'ஹ'),
- ('\u{bbe}', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', '\u{bcd}'),
- ('ௐ', 'ௐ'),
- ('\u{bd7}', '\u{bd7}'),
- ('௦', '௺'),
- ('𑿀', '𑿱'),
- ('𑿿', '𑿿'),
-];
-
-pub const TANGSA: &'static [(char, char)] = &[('𖩰', '𖪾'), ('𖫀', '𖫉')];
-
-pub const TANGUT: &'static [(char, char)] =
- &[('𖿠', '𖿠'), ('𗀀', '𘟷'), ('𘠀', '𘫿'), ('𘴀', '𘴈')];
-
-pub const TELUGU: &'static [(char, char)] = &[
- ('\u{c00}', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'హ'),
- ('\u{c3c}', 'ౄ'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('ౘ', 'ౚ'),
- ('ౝ', 'ౝ'),
- ('ౠ', '\u{c63}'),
- ('౦', '౯'),
- ('౷', '౿'),
-];
-
-pub const THAANA: &'static [(char, char)] = &[('ހ', 'ޱ')];
-
-pub const THAI: &'static [(char, char)] = &[('ก', '\u{e3a}'), ('เ', '๛')];
-
-pub const TIBETAN: &'static [(char, char)] = &[
- ('ༀ', 'ཇ'),
- ('ཉ', 'ཬ'),
- ('\u{f71}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('྾', '࿌'),
- ('࿎', '࿔'),
- ('࿙', '࿚'),
-];
-
-pub const TIFINAGH: &'static [(char, char)] =
- &[('ⴰ', 'ⵧ'), ('ⵯ', '⵰'), ('\u{2d7f}', '\u{2d7f}')];
-
-pub const TIRHUTA: &'static [(char, char)] = &[('𑒀', '𑓇'), ('𑓐', '𑓙')];
-
-pub const TODHRI: &'static [(char, char)] = &[('𐗀', '𐗳')];
-
-pub const TOTO: &'static [(char, char)] = &[('𞊐', '\u{1e2ae}')];
-
-pub const TULU_TIGALARI: &'static [(char, char)] = &[
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '𑏊'),
- ('𑏌', '𑏕'),
- ('𑏗', '𑏘'),
- ('\u{113e1}', '\u{113e2}'),
-];
-
-pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')];
-
-pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ꘫ')];
-
-pub const VITHKUQI: &'static [(char, char)] = &[
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
-];
-
-pub const WANCHO: &'static [(char, char)] = &[('𞋀', '𞋹'), ('𞋿', '𞋿')];
-
-pub const WARANG_CITI: &'static [(char, char)] = &[('𑢠', '𑣲'), ('𑣿', '𑣿')];
-
-pub const YEZIDI: &'static [(char, char)] =
- &[('𐺀', '𐺩'), ('\u{10eab}', '𐺭'), ('𐺰', '𐺱')];
-
-pub const YI: &'static [(char, char)] = &[('ꀀ', 'ꒌ'), ('꒐', '꓆')];
-
-pub const ZANABAZAR_SQUARE: &'static [(char, char)] = &[('𑨀', '\u{11a47}')];
diff --git a/vendor/regex-syntax/src/unicode_tables/script_extension.rs b/vendor/regex-syntax/src/unicode_tables/script_extension.rs
deleted file mode 100644
index e3f492e2..00000000
--- a/vendor/regex-syntax/src/unicode_tables/script_extension.rs
+++ /dev/null
@@ -1,1718 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate script-extension ucd-16.0.0 --chars
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
- ("Adlam", ADLAM),
- ("Ahom", AHOM),
- ("Anatolian_Hieroglyphs", ANATOLIAN_HIEROGLYPHS),
- ("Arabic", ARABIC),
- ("Armenian", ARMENIAN),
- ("Avestan", AVESTAN),
- ("Balinese", BALINESE),
- ("Bamum", BAMUM),
- ("Bassa_Vah", BASSA_VAH),
- ("Batak", BATAK),
- ("Bengali", BENGALI),
- ("Bhaiksuki", BHAIKSUKI),
- ("Bopomofo", BOPOMOFO),
- ("Brahmi", BRAHMI),
- ("Braille", BRAILLE),
- ("Buginese", BUGINESE),
- ("Buhid", BUHID),
- ("Canadian_Aboriginal", CANADIAN_ABORIGINAL),
- ("Carian", CARIAN),
- ("Caucasian_Albanian", CAUCASIAN_ALBANIAN),
- ("Chakma", CHAKMA),
- ("Cham", CHAM),
- ("Cherokee", CHEROKEE),
- ("Chorasmian", CHORASMIAN),
- ("Common", COMMON),
- ("Coptic", COPTIC),
- ("Cuneiform", CUNEIFORM),
- ("Cypriot", CYPRIOT),
- ("Cypro_Minoan", CYPRO_MINOAN),
- ("Cyrillic", CYRILLIC),
- ("Deseret", DESERET),
- ("Devanagari", DEVANAGARI),
- ("Dives_Akuru", DIVES_AKURU),
- ("Dogra", DOGRA),
- ("Duployan", DUPLOYAN),
- ("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS),
- ("Elbasan", ELBASAN),
- ("Elymaic", ELYMAIC),
- ("Ethiopic", ETHIOPIC),
- ("Garay", GARAY),
- ("Georgian", GEORGIAN),
- ("Glagolitic", GLAGOLITIC),
- ("Gothic", GOTHIC),
- ("Grantha", GRANTHA),
- ("Greek", GREEK),
- ("Gujarati", GUJARATI),
- ("Gunjala_Gondi", GUNJALA_GONDI),
- ("Gurmukhi", GURMUKHI),
- ("Gurung_Khema", GURUNG_KHEMA),
- ("Han", HAN),
- ("Hangul", HANGUL),
- ("Hanifi_Rohingya", HANIFI_ROHINGYA),
- ("Hanunoo", HANUNOO),
- ("Hatran", HATRAN),
- ("Hebrew", HEBREW),
- ("Hiragana", HIRAGANA),
- ("Imperial_Aramaic", IMPERIAL_ARAMAIC),
- ("Inherited", INHERITED),
- ("Inscriptional_Pahlavi", INSCRIPTIONAL_PAHLAVI),
- ("Inscriptional_Parthian", INSCRIPTIONAL_PARTHIAN),
- ("Javanese", JAVANESE),
- ("Kaithi", KAITHI),
- ("Kannada", KANNADA),
- ("Katakana", KATAKANA),
- ("Kawi", KAWI),
- ("Kayah_Li", KAYAH_LI),
- ("Kharoshthi", KHAROSHTHI),
- ("Khitan_Small_Script", KHITAN_SMALL_SCRIPT),
- ("Khmer", KHMER),
- ("Khojki", KHOJKI),
- ("Khudawadi", KHUDAWADI),
- ("Kirat_Rai", KIRAT_RAI),
- ("Lao", LAO),
- ("Latin", LATIN),
- ("Lepcha", LEPCHA),
- ("Limbu", LIMBU),
- ("Linear_A", LINEAR_A),
- ("Linear_B", LINEAR_B),
- ("Lisu", LISU),
- ("Lycian", LYCIAN),
- ("Lydian", LYDIAN),
- ("Mahajani", MAHAJANI),
- ("Makasar", MAKASAR),
- ("Malayalam", MALAYALAM),
- ("Mandaic", MANDAIC),
- ("Manichaean", MANICHAEAN),
- ("Marchen", MARCHEN),
- ("Masaram_Gondi", MASARAM_GONDI),
- ("Medefaidrin", MEDEFAIDRIN),
- ("Meetei_Mayek", MEETEI_MAYEK),
- ("Mende_Kikakui", MENDE_KIKAKUI),
- ("Meroitic_Cursive", MEROITIC_CURSIVE),
- ("Meroitic_Hieroglyphs", MEROITIC_HIEROGLYPHS),
- ("Miao", MIAO),
- ("Modi", MODI),
- ("Mongolian", MONGOLIAN),
- ("Mro", MRO),
- ("Multani", MULTANI),
- ("Myanmar", MYANMAR),
- ("Nabataean", NABATAEAN),
- ("Nag_Mundari", NAG_MUNDARI),
- ("Nandinagari", NANDINAGARI),
- ("New_Tai_Lue", NEW_TAI_LUE),
- ("Newa", NEWA),
- ("Nko", NKO),
- ("Nushu", NUSHU),
- ("Nyiakeng_Puachue_Hmong", NYIAKENG_PUACHUE_HMONG),
- ("Ogham", OGHAM),
- ("Ol_Chiki", OL_CHIKI),
- ("Ol_Onal", OL_ONAL),
- ("Old_Hungarian", OLD_HUNGARIAN),
- ("Old_Italic", OLD_ITALIC),
- ("Old_North_Arabian", OLD_NORTH_ARABIAN),
- ("Old_Permic", OLD_PERMIC),
- ("Old_Persian", OLD_PERSIAN),
- ("Old_Sogdian", OLD_SOGDIAN),
- ("Old_South_Arabian", OLD_SOUTH_ARABIAN),
- ("Old_Turkic", OLD_TURKIC),
- ("Old_Uyghur", OLD_UYGHUR),
- ("Oriya", ORIYA),
- ("Osage", OSAGE),
- ("Osmanya", OSMANYA),
- ("Pahawh_Hmong", PAHAWH_HMONG),
- ("Palmyrene", PALMYRENE),
- ("Pau_Cin_Hau", PAU_CIN_HAU),
- ("Phags_Pa", PHAGS_PA),
- ("Phoenician", PHOENICIAN),
- ("Psalter_Pahlavi", PSALTER_PAHLAVI),
- ("Rejang", REJANG),
- ("Runic", RUNIC),
- ("Samaritan", SAMARITAN),
- ("Saurashtra", SAURASHTRA),
- ("Sharada", SHARADA),
- ("Shavian", SHAVIAN),
- ("Siddham", SIDDHAM),
- ("SignWriting", SIGNWRITING),
- ("Sinhala", SINHALA),
- ("Sogdian", SOGDIAN),
- ("Sora_Sompeng", SORA_SOMPENG),
- ("Soyombo", SOYOMBO),
- ("Sundanese", SUNDANESE),
- ("Sunuwar", SUNUWAR),
- ("Syloti_Nagri", SYLOTI_NAGRI),
- ("Syriac", SYRIAC),
- ("Tagalog", TAGALOG),
- ("Tagbanwa", TAGBANWA),
- ("Tai_Le", TAI_LE),
- ("Tai_Tham", TAI_THAM),
- ("Tai_Viet", TAI_VIET),
- ("Takri", TAKRI),
- ("Tamil", TAMIL),
- ("Tangsa", TANGSA),
- ("Tangut", TANGUT),
- ("Telugu", TELUGU),
- ("Thaana", THAANA),
- ("Thai", THAI),
- ("Tibetan", TIBETAN),
- ("Tifinagh", TIFINAGH),
- ("Tirhuta", TIRHUTA),
- ("Todhri", TODHRI),
- ("Toto", TOTO),
- ("Tulu_Tigalari", TULU_TIGALARI),
- ("Ugaritic", UGARITIC),
- ("Vai", VAI),
- ("Vithkuqi", VITHKUQI),
- ("Wancho", WANCHO),
- ("Warang_Citi", WARANG_CITI),
- ("Yezidi", YEZIDI),
- ("Yi", YI),
- ("Zanabazar_Square", ZANABAZAR_SQUARE),
-];
-
-pub const ADLAM: &'static [(char, char)] = &[
- ('؟', '؟'),
- ('ـ', 'ـ'),
- ('⁏', '⁏'),
- ('⹁', '⹁'),
- ('𞤀', '𞥋'),
- ('𞥐', '𞥙'),
- ('𞥞', '𞥟'),
-];
-
-pub const AHOM: &'static [(char, char)] =
- &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑝆')];
-
-pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[('𔐀', '𔙆')];
-
-pub const ARABIC: &'static [(char, char)] = &[
- ('\u{600}', '\u{604}'),
- ('؆', '\u{6dc}'),
- ('۞', 'ۿ'),
- ('ݐ', 'ݿ'),
- ('ࡰ', 'ࢎ'),
- ('\u{890}', '\u{891}'),
- ('\u{897}', '\u{8e1}'),
- ('\u{8e3}', '\u{8ff}'),
- ('⁏', '⁏'),
- ('⹁', '⹁'),
- ('ﭐ', '﯂'),
- ('ﯓ', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('﷏', '﷏'),
- ('ﷰ', '﷿'),
- ('ﹰ', 'ﹴ'),
- ('ﹶ', 'ﻼ'),
- ('\u{102e0}', '𐋻'),
- ('𐹠', '𐹾'),
- ('𐻂', '𐻄'),
- ('\u{10efc}', '\u{10eff}'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('𞻰', '𞻱'),
-];
-
-pub const ARMENIAN: &'static [(char, char)] =
- &[('\u{308}', '\u{308}'), ('Ա', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('ﬓ', 'ﬗ')];
-
-pub const AVESTAN: &'static [(char, char)] =
- &[('·', '·'), ('⸰', '⸱'), ('𐬀', '𐬵'), ('𐬹', '𐬿')];
-
-pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭎', '᭿')];
-
-pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')];
-
-pub const BASSA_VAH: &'static [(char, char)] =
- &[('𖫐', '𖫭'), ('\u{16af0}', '𖫵')];
-
-pub const BATAK: &'static [(char, char)] = &[('ᯀ', '\u{1bf3}'), ('᯼', '᯿')];
-
-pub const BENGALI: &'static [(char, char)] = &[
- ('ʼ', 'ʼ'),
- ('\u{951}', '\u{952}'),
- ('।', '॥'),
- ('ঀ', 'ঃ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('\u{9bc}', '\u{9c4}'),
- ('ে', 'ৈ'),
- ('ো', 'ৎ'),
- ('\u{9d7}', '\u{9d7}'),
- ('ড়', 'ঢ়'),
- ('য়', '\u{9e3}'),
- ('০', '\u{9fe}'),
- ('\u{1cd0}', '\u{1cd0}'),
- ('\u{1cd2}', '\u{1cd2}'),
- ('\u{1cd5}', '\u{1cd6}'),
- ('\u{1cd8}', '\u{1cd8}'),
- ('᳡', '᳡'),
- ('ᳪ', 'ᳪ'),
- ('\u{1ced}', '\u{1ced}'),
- ('ᳲ', 'ᳲ'),
- ('ᳵ', '᳷'),
- ('\u{a8f1}', '\u{a8f1}'),
-];
-
-pub const BHAIKSUKI: &'static [(char, char)] =
- &[('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱅'), ('𑱐', '𑱬')];
-
-pub const BOPOMOFO: &'static [(char, char)] = &[
- ('ˇ', 'ˇ'),
- ('ˉ', 'ˋ'),
- ('˙', '˙'),
- ('˪', '˫'),
- ('、', '〃'),
- ('〈', '】'),
- ('〓', '〟'),
- ('\u{302a}', '\u{302d}'),
- ('〰', '〰'),
- ('〷', '〷'),
- ('・', '・'),
- ('ㄅ', 'ㄯ'),
- ('ㆠ', 'ㆿ'),
- ('﹅', '﹆'),
- ('。', '・'),
-];
-
-pub const BRAHMI: &'static [(char, char)] =
- &[('𑀀', '𑁍'), ('𑁒', '𑁵'), ('\u{1107f}', '\u{1107f}')];
-
-pub const BRAILLE: &'static [(char, char)] = &[('⠀', '⣿')];
-
-pub const BUGINESE: &'static [(char, char)] =
- &[('ᨀ', '\u{1a1b}'), ('᨞', '᨟'), ('ꧏ', 'ꧏ')];
-
-pub const BUHID: &'static [(char, char)] = &[('᜵', '᜶'), ('ᝀ', '\u{1753}')];
-
-pub const CANADIAN_ABORIGINAL: &'static [(char, char)] =
- &[('᐀', 'ᙿ'), ('ᢰ', 'ᣵ'), ('𑪰', '𑪿')];
-
-pub const CARIAN: &'static [(char, char)] =
- &[('·', '·'), ('⁚', '⁚'), ('⁝', '⁝'), ('⸱', '⸱'), ('𐊠', '𐋐')];
-
-pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = &[
- ('\u{304}', '\u{304}'),
- ('\u{331}', '\u{331}'),
- ('\u{35e}', '\u{35e}'),
- ('𐔰', '𐕣'),
- ('𐕯', '𐕯'),
-];
-
-pub const CHAKMA: &'static [(char, char)] =
- &[('০', '৯'), ('၀', '၉'), ('\u{11100}', '\u{11134}'), ('𑄶', '𑅇')];
-
-pub const CHAM: &'static [(char, char)] =
- &[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')];
-
-pub const CHEROKEE: &'static [(char, char)] = &[
- ('\u{300}', '\u{302}'),
- ('\u{304}', '\u{304}'),
- ('\u{30b}', '\u{30c}'),
- ('\u{323}', '\u{324}'),
- ('\u{330}', '\u{331}'),
- ('Ꭰ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('ꭰ', 'ꮿ'),
-];
-
-pub const CHORASMIAN: &'static [(char, char)] = &[('𐾰', '𐿋')];
-
-pub const COMMON: &'static [(char, char)] = &[
- ('\0', '@'),
- ('[', '`'),
- ('{', '©'),
- ('«', '¶'),
- ('¸', '¹'),
- ('»', '¿'),
- ('×', '×'),
- ('÷', '÷'),
- ('ʹ', 'ʻ'),
- ('ʽ', 'ˆ'),
- ('ˈ', 'ˈ'),
- ('ˌ', 'ˌ'),
- ('ˎ', '˖'),
- ('˘', '˘'),
- ('˚', '˟'),
- ('˥', '˩'),
- ('ˬ', '˿'),
- (';', ';'),
- ('΅', '΅'),
- ('·', '·'),
- ('\u{605}', '\u{605}'),
- ('\u{6dd}', '\u{6dd}'),
- ('\u{8e2}', '\u{8e2}'),
- ('฿', '฿'),
- ('࿕', '࿘'),
- ('\u{2000}', '\u{200b}'),
- ('\u{200e}', '\u{202e}'),
- ('‰', '⁎'),
- ('⁐', '⁙'),
- ('⁛', '⁜'),
- ('⁞', '\u{2064}'),
- ('\u{2066}', '⁰'),
- ('⁴', '⁾'),
- ('₀', '₎'),
- ('₠', '⃀'),
- ('℀', '℥'),
- ('℧', '℩'),
- ('ℬ', 'ℱ'),
- ('ℳ', '⅍'),
- ('⅏', '⅟'),
- ('↉', '↋'),
- ('←', '␩'),
- ('⑀', '⑊'),
- ('①', '⟿'),
- ('⤀', '⭳'),
- ('⭶', '⮕'),
- ('⮗', '⯿'),
- ('⸀', '⸖'),
- ('⸘', 'ⸯ'),
- ('⸲', '⸻'),
- ('⸽', '⹀'),
- ('⹂', '⹂'),
- ('⹄', '⹝'),
- ('\u{3000}', '\u{3000}'),
- ('〄', '〄'),
- ('〒', '〒'),
- ('〠', '〠'),
- ('〶', '〶'),
- ('㉈', '㉟'),
- ('㉿', '㉿'),
- ('㊱', '㊿'),
- ('㋌', '㋏'),
- ('㍱', '㍺'),
- ('㎀', '㏟'),
- ('㏿', '㏿'),
- ('䷀', '䷿'),
- ('꜈', '꜡'),
- ('ꞈ', '꞊'),
- ('꭛', '꭛'),
- ('꭪', '꭫'),
- ('︐', '︙'),
- ('︰', '﹄'),
- ('﹇', '﹒'),
- ('﹔', '﹦'),
- ('﹨', '﹫'),
- ('\u{feff}', '\u{feff}'),
- ('!', '@'),
- ('[', '`'),
- ('{', '⦆'),
- ('¢', '₩'),
- ('│', '○'),
- ('\u{fff9}', '�'),
- ('𐆐', '𐆜'),
- ('𐇐', '𐇼'),
- ('𜰀', '𜳹'),
- ('𜴀', '𜺳'),
- ('𜽐', '𜿃'),
- ('𝀀', '𝃵'),
- ('𝄀', '𝄦'),
- ('𝄩', '\u{1d166}'),
- ('𝅪', '\u{1d17a}'),
- ('𝆃', '𝆄'),
- ('𝆌', '𝆩'),
- ('𝆮', '𝇪'),
- ('𝋀', '𝋓'),
- ('𝋠', '𝋳'),
- ('𝌀', '𝍖'),
- ('𝍲', '𝍸'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝟋'),
- ('𝟎', '𝟿'),
- ('𞱱', '𞲴'),
- ('𞴁', '𞴽'),
- ('🀀', '🀫'),
- ('🀰', '🂓'),
- ('🂠', '🂮'),
- ('🂱', '🂿'),
- ('🃁', '🃏'),
- ('🃑', '🃵'),
- ('🄀', '🆭'),
- ('🇦', '🇿'),
- ('🈁', '🈂'),
- ('🈐', '🈻'),
- ('🉀', '🉈'),
- ('🉠', '🉥'),
- ('🌀', '🛗'),
- ('🛜', '🛬'),
- ('🛰', '🛼'),
- ('🜀', '🝶'),
- ('🝻', '🟙'),
- ('🟠', '🟫'),
- ('🟰', '🟰'),
- ('🠀', '🠋'),
- ('🠐', '🡇'),
- ('🡐', '🡙'),
- ('🡠', '🢇'),
- ('🢐', '🢭'),
- ('🢰', '🢻'),
- ('🣀', '🣁'),
- ('🤀', '🩓'),
- ('🩠', '🩭'),
- ('🩰', '🩼'),
- ('🪀', '🪉'),
- ('🪏', '🫆'),
- ('🫎', '🫜'),
- ('🫟', '🫩'),
- ('🫰', '🫸'),
- ('🬀', '🮒'),
- ('🮔', '🯹'),
- ('\u{e0001}', '\u{e0001}'),
- ('\u{e0020}', '\u{e007f}'),
-];
-
-pub const COPTIC: &'static [(char, char)] = &[
- ('·', '·'),
- ('\u{300}', '\u{300}'),
- ('\u{304}', '\u{305}'),
- ('\u{307}', '\u{307}'),
- ('ʹ', '͵'),
- ('Ϣ', 'ϯ'),
- ('Ⲁ', 'ⳳ'),
- ('⳹', '⳿'),
- ('⸗', '⸗'),
- ('\u{102e0}', '𐋻'),
-];
-
-pub const CUNEIFORM: &'static [(char, char)] =
- &[('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃')];
-
-pub const CYPRIOT: &'static [(char, char)] = &[
- ('𐄀', '𐄂'),
- ('𐄇', '𐄳'),
- ('𐄷', '𐄿'),
- ('𐠀', '𐠅'),
- ('𐠈', '𐠈'),
- ('𐠊', '𐠵'),
- ('𐠷', '𐠸'),
- ('𐠼', '𐠼'),
- ('𐠿', '𐠿'),
-];
-
-pub const CYPRO_MINOAN: &'static [(char, char)] = &[('𐄀', '𐄁'), ('𒾐', '𒿲')];
-
-pub const CYRILLIC: &'static [(char, char)] = &[
- ('ʼ', 'ʼ'),
- ('\u{300}', '\u{302}'),
- ('\u{304}', '\u{304}'),
- ('\u{306}', '\u{306}'),
- ('\u{308}', '\u{308}'),
- ('\u{30b}', '\u{30b}'),
- ('\u{311}', '\u{311}'),
- ('Ѐ', 'ԯ'),
- ('ᲀ', 'ᲊ'),
- ('ᴫ', 'ᴫ'),
- ('ᵸ', 'ᵸ'),
- ('\u{1df8}', '\u{1df8}'),
- ('\u{2de0}', '\u{2dff}'),
- ('⹃', '⹃'),
- ('Ꙁ', '\u{a69f}'),
- ('\u{fe2e}', '\u{fe2f}'),
- ('𞀰', '𞁭'),
- ('\u{1e08f}', '\u{1e08f}'),
-];
-
-pub const DESERET: &'static [(char, char)] = &[('𐐀', '𐑏')];
-
-pub const DEVANAGARI: &'static [(char, char)] = &[
- ('ʼ', 'ʼ'),
- ('\u{900}', '\u{952}'),
- ('\u{955}', 'ॿ'),
- ('\u{1cd0}', 'ᳶ'),
- ('\u{1cf8}', '\u{1cf9}'),
- ('\u{20f0}', '\u{20f0}'),
- ('꠰', '꠹'),
- ('\u{a8e0}', '\u{a8ff}'),
- ('𑬀', '𑬉'),
-];
-
-pub const DIVES_AKURU: &'static [(char, char)] = &[
- ('𑤀', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤵'),
- ('𑤷', '𑤸'),
- ('\u{1193b}', '𑥆'),
- ('𑥐', '𑥙'),
-];
-
-pub const DOGRA: &'static [(char, char)] =
- &[('।', '९'), ('꠰', '꠹'), ('𑠀', '𑠻')];
-
-pub const DUPLOYAN: &'static [(char, char)] = &[
- ('·', '·'),
- ('\u{307}', '\u{308}'),
- ('\u{30a}', '\u{30a}'),
- ('\u{323}', '\u{324}'),
- ('⸼', '⸼'),
- ('𛰀', '𛱪'),
- ('𛱰', '𛱼'),
- ('𛲀', '𛲈'),
- ('𛲐', '𛲙'),
- ('𛲜', '\u{1bca3}'),
-];
-
-pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] =
- &[('𓀀', '\u{13455}'), ('𓑠', '𔏺')];
-
-pub const ELBASAN: &'static [(char, char)] =
- &[('·', '·'), ('\u{305}', '\u{305}'), ('𐔀', '𐔧')];
-
-pub const ELYMAIC: &'static [(char, char)] = &[('𐿠', '𐿶')];
-
-pub const ETHIOPIC: &'static [(char, char)] = &[
- ('\u{30e}', '\u{30e}'),
- ('ሀ', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዖ'),
- ('ዘ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ፚ'),
- ('\u{135d}', '፼'),
- ('ᎀ', '᎙'),
- ('ⶀ', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
-];
-
-pub const GARAY: &'static [(char, char)] = &[
- ('،', '،'),
- ('؛', '؛'),
- ('؟', '؟'),
- ('𐵀', '𐵥'),
- ('\u{10d69}', '𐶅'),
- ('𐶎', '𐶏'),
-];
-
-pub const GEORGIAN: &'static [(char, char)] = &[
- ('·', '·'),
- ('։', '։'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ჿ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('⁚', '⁚'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('⸱', '⸱'),
-];
-
-pub const GLAGOLITIC: &'static [(char, char)] = &[
- ('·', '·'),
- ('\u{303}', '\u{303}'),
- ('\u{305}', '\u{305}'),
- ('\u{484}', '\u{484}'),
- ('\u{487}', '\u{487}'),
- ('։', '։'),
- ('჻', '჻'),
- ('⁚', '⁚'),
- ('Ⰰ', 'ⱟ'),
- ('⹃', '⹃'),
- ('\u{a66f}', '\u{a66f}'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
-];
-
-pub const GOTHIC: &'static [(char, char)] = &[
- ('·', '·'),
- ('\u{304}', '\u{305}'),
- ('\u{308}', '\u{308}'),
- ('\u{331}', '\u{331}'),
- ('𐌰', '𐍊'),
-];
-
-pub const GRANTHA: &'static [(char, char)] = &[
- ('\u{951}', '\u{952}'),
- ('।', '॥'),
- ('௦', '௳'),
- ('\u{1cd0}', '\u{1cd0}'),
- ('\u{1cd2}', '᳓'),
- ('ᳲ', '\u{1cf4}'),
- ('\u{1cf8}', '\u{1cf9}'),
- ('\u{20f0}', '\u{20f0}'),
- ('\u{11300}', '𑌃'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('\u{1133b}', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '\u{1134d}'),
- ('𑍐', '𑍐'),
- ('\u{11357}', '\u{11357}'),
- ('𑍝', '𑍣'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('𑿐', '𑿑'),
- ('𑿓', '𑿓'),
-];
-
-pub const GREEK: &'static [(char, char)] = &[
- ('·', '·'),
- ('\u{300}', '\u{301}'),
- ('\u{304}', '\u{304}'),
- ('\u{306}', '\u{306}'),
- ('\u{308}', '\u{308}'),
- ('\u{313}', '\u{313}'),
- ('\u{342}', '\u{342}'),
- ('\u{345}', '\u{345}'),
- ('Ͱ', 'ͷ'),
- ('ͺ', 'ͽ'),
- ('Ϳ', 'Ϳ'),
- ('΄', '΄'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ϡ'),
- ('ϰ', 'Ͽ'),
- ('ᴦ', 'ᴪ'),
- ('ᵝ', 'ᵡ'),
- ('ᵦ', 'ᵪ'),
- ('ᶿ', '\u{1dc1}'),
- ('ἀ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ῄ'),
- ('ῆ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('῝', '`'),
- ('ῲ', 'ῴ'),
- ('ῶ', '῾'),
- ('⁝', '⁝'),
- ('Ω', 'Ω'),
- ('ꭥ', 'ꭥ'),
- ('𐅀', '𐆎'),
- ('𐆠', '𐆠'),
- ('𝈀', '𝉅'),
-];
-
-pub const GUJARATI: &'static [(char, char)] = &[
- ('\u{951}', '\u{952}'),
- ('।', '॥'),
- ('\u{a81}', 'ઃ'),
- ('અ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('\u{abc}', '\u{ac5}'),
- ('\u{ac7}', 'ૉ'),
- ('ો', '\u{acd}'),
- ('ૐ', 'ૐ'),
- ('ૠ', '\u{ae3}'),
- ('૦', '૱'),
- ('ૹ', '\u{aff}'),
- ('꠰', '꠹'),
-];
-
-pub const GUNJALA_GONDI: &'static [(char, char)] = &[
- ('·', '·'),
- ('।', '॥'),
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶎'),
- ('\u{11d90}', '\u{11d91}'),
- ('𑶓', '𑶘'),
- ('𑶠', '𑶩'),
-];
-
-pub const GURMUKHI: &'static [(char, char)] = &[
- ('\u{951}', '\u{952}'),
- ('।', '॥'),
- ('\u{a01}', 'ਃ'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('\u{a3c}', '\u{a3c}'),
- ('ਾ', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('\u{a51}', '\u{a51}'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('੦', '੶'),
- ('꠰', '꠹'),
-];
-
-pub const GURUNG_KHEMA: &'static [(char, char)] = &[('॥', '॥'), ('𖄀', '𖄹')];
-
-pub const HAN: &'static [(char, char)] = &[
- ('·', '·'),
- ('⺀', '⺙'),
- ('⺛', '⻳'),
- ('⼀', '⿕'),
- ('⿰', '⿿'),
- ('、', '〃'),
- ('々', '】'),
- ('〓', '〟'),
- ('〡', '\u{302d}'),
- ('〰', '〰'),
- ('〷', '〿'),
- ('・', '・'),
- ('㆐', '㆟'),
- ('㇀', '㇥'),
- ('㇯', '㇯'),
- ('㈠', '㉇'),
- ('㊀', '㊰'),
- ('㋀', '㋋'),
- ('㋿', '㋿'),
- ('㍘', '㍰'),
- ('㍻', '㍿'),
- ('㏠', '㏾'),
- ('㐀', '䶿'),
- ('一', '鿿'),
- ('꜀', '꜇'),
- ('豈', '舘'),
- ('並', '龎'),
- ('﹅', '﹆'),
- ('。', '・'),
- ('𖿢', '𖿣'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('𝍠', '𝍱'),
- ('🉐', '🉑'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('丽', '𪘀'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
-];
-
-pub const HANGUL: &'static [(char, char)] = &[
- ('ᄀ', 'ᇿ'),
- ('、', '〃'),
- ('〈', '】'),
- ('〓', '〟'),
- ('\u{302e}', '〰'),
- ('〷', '〷'),
- ('・', '・'),
- ('ㄱ', 'ㆎ'),
- ('㈀', '㈞'),
- ('㉠', '㉾'),
- ('ꥠ', 'ꥼ'),
- ('가', '힣'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('﹅', '﹆'),
- ('。', '・'),
- ('ᅠ', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
-];
-
-pub const HANIFI_ROHINGYA: &'static [(char, char)] = &[
- ('،', '،'),
- ('؛', '؛'),
- ('؟', '؟'),
- ('ـ', 'ـ'),
- ('۔', '۔'),
- ('𐴀', '\u{10d27}'),
- ('𐴰', '𐴹'),
-];
-
-pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', '᜶')];
-
-pub const HATRAN: &'static [(char, char)] =
- &[('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿')];
-
-pub const HEBREW: &'static [(char, char)] = &[
- ('\u{307}', '\u{308}'),
- ('\u{591}', '\u{5c7}'),
- ('א', 'ת'),
- ('ׯ', '״'),
- ('יִ', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', 'ﭏ'),
-];
-
-pub const HIRAGANA: &'static [(char, char)] = &[
- ('、', '〃'),
- ('〈', '】'),
- ('〓', '〟'),
- ('〰', '〵'),
- ('〷', '〷'),
- ('〼', '〽'),
- ('ぁ', 'ゖ'),
- ('\u{3099}', '゠'),
- ('・', 'ー'),
- ('﹅', '﹆'),
- ('。', '・'),
- ('ー', 'ー'),
- ('\u{ff9e}', '\u{ff9f}'),
- ('𛀁', '𛄟'),
- ('𛄲', '𛄲'),
- ('𛅐', '𛅒'),
- ('🈀', '🈀'),
-];
-
-pub const IMPERIAL_ARAMAIC: &'static [(char, char)] =
- &[('𐡀', '𐡕'), ('𐡗', '𐡟')];
-
-pub const INHERITED: &'static [(char, char)] = &[
- ('\u{30f}', '\u{30f}'),
- ('\u{312}', '\u{312}'),
- ('\u{314}', '\u{31f}'),
- ('\u{321}', '\u{322}'),
- ('\u{326}', '\u{32c}'),
- ('\u{32f}', '\u{32f}'),
- ('\u{332}', '\u{341}'),
- ('\u{343}', '\u{344}'),
- ('\u{346}', '\u{357}'),
- ('\u{359}', '\u{35d}'),
- ('\u{35f}', '\u{362}'),
- ('\u{953}', '\u{954}'),
- ('\u{1ab0}', '\u{1ace}'),
- ('\u{1dc2}', '\u{1df7}'),
- ('\u{1df9}', '\u{1df9}'),
- ('\u{1dfb}', '\u{1dff}'),
- ('\u{200c}', '\u{200d}'),
- ('\u{20d0}', '\u{20ef}'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{fe20}', '\u{fe2d}'),
- ('\u{101fd}', '\u{101fd}'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d167}', '\u{1d169}'),
- ('\u{1d17b}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const INSCRIPTIONAL_PAHLAVI: &'static [(char, char)] =
- &[('𐭠', '𐭲'), ('𐭸', '𐭿')];
-
-pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] =
- &[('𐭀', '𐭕'), ('𐭘', '𐭟')];
-
-pub const JAVANESE: &'static [(char, char)] =
- &[('\u{a980}', '꧍'), ('ꧏ', '꧙'), ('꧞', '꧟')];
-
-pub const KAITHI: &'static [(char, char)] = &[
- ('०', '९'),
- ('⸱', '⸱'),
- ('꠰', '꠹'),
- ('\u{11080}', '\u{110c2}'),
- ('\u{110cd}', '\u{110cd}'),
-];
-
-pub const KANNADA: &'static [(char, char)] = &[
- ('\u{951}', '\u{952}'),
- ('।', '॥'),
- ('ಀ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('\u{cbc}', 'ೄ'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccd}'),
- ('\u{cd5}', '\u{cd6}'),
- ('ೝ', 'ೞ'),
- ('ೠ', '\u{ce3}'),
- ('೦', '೯'),
- ('ೱ', 'ೳ'),
- ('\u{1cd0}', '\u{1cd0}'),
- ('\u{1cd2}', '᳓'),
- ('\u{1cda}', '\u{1cda}'),
- ('ᳲ', 'ᳲ'),
- ('\u{1cf4}', '\u{1cf4}'),
- ('꠰', '꠵'),
-];
-
-pub const KATAKANA: &'static [(char, char)] = &[
- ('\u{305}', '\u{305}'),
- ('\u{323}', '\u{323}'),
- ('、', '〃'),
- ('〈', '】'),
- ('〓', '〟'),
- ('〰', '〵'),
- ('〷', '〷'),
- ('〼', '〽'),
- ('\u{3099}', '゜'),
- ('゠', 'ヿ'),
- ('ㇰ', 'ㇿ'),
- ('㋐', '㋾'),
- ('㌀', '㍗'),
- ('﹅', '﹆'),
- ('。', '\u{ff9f}'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𛀀', '𛀀'),
- ('𛄠', '𛄢'),
- ('𛅕', '𛅕'),
- ('𛅤', '𛅧'),
-];
-
-pub const KAWI: &'static [(char, char)] =
- &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f5a}')];
-
-pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '꤯')];
-
-pub const KHAROSHTHI: &'static [(char, char)] = &[
- ('𐨀', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨵'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '𐩈'),
- ('𐩐', '𐩘'),
-];
-
-pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] =
- &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕'), ('𘳿', '𘳿')];
-
-pub const KHMER: &'static [(char, char)] =
- &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')];
-
-pub const KHOJKI: &'static [(char, char)] =
- &[('૦', '૯'), ('꠰', '꠹'), ('𑈀', '𑈑'), ('𑈓', '\u{11241}')];
-
-pub const KHUDAWADI: &'static [(char, char)] =
- &[('।', '॥'), ('꠰', '꠹'), ('𑊰', '\u{112ea}'), ('𑋰', '𑋹')];
-
-pub const KIRAT_RAI: &'static [(char, char)] = &[('𖵀', '𖵹')];
-
-pub const LAO: &'static [(char, char)] = &[
- ('ກ', 'ຂ'),
- ('ຄ', 'ຄ'),
- ('ຆ', 'ຊ'),
- ('ຌ', 'ຣ'),
- ('ລ', 'ລ'),
- ('ວ', 'ຽ'),
- ('ເ', 'ໄ'),
- ('ໆ', 'ໆ'),
- ('\u{ec8}', '\u{ece}'),
- ('໐', '໙'),
- ('ໜ', 'ໟ'),
-];
-
-pub const LATIN: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('a', 'z'),
- ('ª', 'ª'),
- ('·', '·'),
- ('º', 'º'),
- ('À', 'Ö'),
- ('Ø', 'ö'),
- ('ø', 'ʸ'),
- ('ʼ', 'ʼ'),
- ('ˇ', 'ˇ'),
- ('ˉ', 'ˋ'),
- ('ˍ', 'ˍ'),
- ('˗', '˗'),
- ('˙', '˙'),
- ('ˠ', 'ˤ'),
- ('\u{300}', '\u{30e}'),
- ('\u{310}', '\u{311}'),
- ('\u{313}', '\u{313}'),
- ('\u{320}', '\u{320}'),
- ('\u{323}', '\u{325}'),
- ('\u{32d}', '\u{32e}'),
- ('\u{330}', '\u{331}'),
- ('\u{358}', '\u{358}'),
- ('\u{35e}', '\u{35e}'),
- ('\u{363}', '\u{36f}'),
- ('\u{485}', '\u{486}'),
- ('\u{951}', '\u{952}'),
- ('჻', '჻'),
- ('ᴀ', 'ᴥ'),
- ('ᴬ', 'ᵜ'),
- ('ᵢ', 'ᵥ'),
- ('ᵫ', 'ᵷ'),
- ('ᵹ', 'ᶾ'),
- ('\u{1df8}', '\u{1df8}'),
- ('Ḁ', 'ỿ'),
- ('\u{202f}', '\u{202f}'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('\u{20f0}', '\u{20f0}'),
- ('K', 'Å'),
- ('Ⅎ', 'Ⅎ'),
- ('ⅎ', 'ⅎ'),
- ('Ⅰ', 'ↈ'),
- ('Ⱡ', 'Ɀ'),
- ('⸗', '⸗'),
- ('꜀', '꜇'),
- ('Ꜣ', 'ꞇ'),
- ('Ꞌ', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'Ƛ'),
- ('ꟲ', 'ꟿ'),
- ('꤮', '꤮'),
- ('ꬰ', 'ꭚ'),
- ('ꭜ', 'ꭤ'),
- ('ꭦ', 'ꭩ'),
- ('ff', 'st'),
- ('A', 'Z'),
- ('a', 'z'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𝼀', '𝼞'),
- ('𝼥', '𝼪'),
-];
-
-pub const LEPCHA: &'static [(char, char)] =
- &[('ᰀ', '\u{1c37}'), ('᰻', '᱉'), ('ᱍ', 'ᱏ')];
-
-pub const LIMBU: &'static [(char, char)] = &[
- ('॥', '॥'),
- ('ᤀ', 'ᤞ'),
- ('\u{1920}', 'ᤫ'),
- ('ᤰ', '\u{193b}'),
- ('᥀', '᥀'),
- ('᥄', '᥏'),
-];
-
-pub const LINEAR_A: &'static [(char, char)] =
- &[('𐄇', '𐄳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧')];
-
-pub const LINEAR_B: &'static [(char, char)] = &[
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
- ('𐄀', '𐄂'),
- ('𐄇', '𐄳'),
- ('𐄷', '𐄿'),
-];
-
-pub const LISU: &'static [(char, char)] =
- &[('ʼ', 'ʼ'), ('ˍ', 'ˍ'), ('《', '》'), ('ꓐ', '꓿'), ('𑾰', '𑾰')];
-
-pub const LYCIAN: &'static [(char, char)] = &[('⁚', '⁚'), ('𐊀', '𐊜')];
-
-pub const LYDIAN: &'static [(char, char)] =
- &[('·', '·'), ('⸱', '⸱'), ('𐤠', '𐤹'), ('𐤿', '𐤿')];
-
-pub const MAHAJANI: &'static [(char, char)] =
- &[('·', '·'), ('।', '९'), ('꠰', '꠹'), ('𑅐', '𑅶')];
-
-pub const MAKASAR: &'static [(char, char)] = &[('𑻠', '𑻸')];
-
-pub const MALAYALAM: &'static [(char, char)] = &[
- ('\u{951}', '\u{952}'),
- ('।', '॥'),
- ('\u{d00}', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', '\u{d44}'),
- ('െ', 'ൈ'),
- ('ൊ', '൏'),
- ('ൔ', '\u{d63}'),
- ('൦', 'ൿ'),
- ('\u{1cda}', '\u{1cda}'),
- ('ᳲ', 'ᳲ'),
- ('꠰', '꠲'),
-];
-
-pub const MANDAIC: &'static [(char, char)] =
- &[('ـ', 'ـ'), ('ࡀ', '\u{85b}'), ('࡞', '࡞')];
-
-pub const MANICHAEAN: &'static [(char, char)] =
- &[('ـ', 'ـ'), ('𐫀', '\u{10ae6}'), ('𐫫', '𐫶')];
-
-pub const MARCHEN: &'static [(char, char)] =
- &[('𑱰', '𑲏'), ('\u{11c92}', '\u{11ca7}'), ('𑲩', '\u{11cb6}')];
-
-pub const MASARAM_GONDI: &'static [(char, char)] = &[
- ('।', '॥'),
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d47}'),
- ('𑵐', '𑵙'),
-];
-
-pub const MEDEFAIDRIN: &'static [(char, char)] = &[('𖹀', '𖺚')];
-
-pub const MEETEI_MAYEK: &'static [(char, char)] =
- &[('ꫠ', '\u{aaf6}'), ('ꯀ', '\u{abed}'), ('꯰', '꯹')];
-
-pub const MENDE_KIKAKUI: &'static [(char, char)] =
- &[('𞠀', '𞣄'), ('𞣇', '\u{1e8d6}')];
-
-pub const MEROITIC_CURSIVE: &'static [(char, char)] =
- &[('𐦠', '𐦷'), ('𐦼', '𐧏'), ('𐧒', '𐧿')];
-
-pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] =
- &[('⁝', '⁝'), ('𐦀', '𐦟')];
-
-pub const MIAO: &'static [(char, char)] =
- &[('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟')];
-
-pub const MODI: &'static [(char, char)] =
- &[('꠰', '꠹'), ('𑘀', '𑙄'), ('𑙐', '𑙙')];
-
-pub const MONGOLIAN: &'static [(char, char)] = &[
- ('᠀', '᠙'),
- ('ᠠ', 'ᡸ'),
- ('ᢀ', 'ᢪ'),
- ('\u{202f}', '\u{202f}'),
- ('、', '。'),
- ('〈', '》'),
- ('𑙠', '𑙬'),
-];
-
-pub const MRO: &'static [(char, char)] = &[('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯')];
-
-pub const MULTANI: &'static [(char, char)] =
- &[('੦', '੯'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')];
-
-pub const MYANMAR: &'static [(char, char)] =
- &[('က', '႟'), ('꤮', '꤮'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ'), ('𑛐', '𑛣')];
-
-pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')];
-
-pub const NAG_MUNDARI: &'static [(char, char)] = &[('𞓐', '𞓹')];
-
-pub const NANDINAGARI: &'static [(char, char)] = &[
- ('।', '॥'),
- ('೦', '೯'),
- ('ᳩ', 'ᳩ'),
- ('ᳲ', 'ᳲ'),
- ('ᳺ', 'ᳺ'),
- ('꠰', '꠵'),
- ('𑦠', '𑦧'),
- ('𑦪', '\u{119d7}'),
- ('\u{119da}', '𑧤'),
-];
-
-pub const NEW_TAI_LUE: &'static [(char, char)] =
- &[('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟')];
-
-pub const NEWA: &'static [(char, char)] = &[('𑐀', '𑑛'), ('𑑝', '𑑡')];
-
-pub const NKO: &'static [(char, char)] = &[
- ('،', '،'),
- ('؛', '؛'),
- ('؟', '؟'),
- ('߀', 'ߺ'),
- ('\u{7fd}', '߿'),
- ('﴾', '﴿'),
-];
-
-pub const NUSHU: &'static [(char, char)] = &[('𖿡', '𖿡'), ('𛅰', '𛋻')];
-
-pub const NYIAKENG_PUACHUE_HMONG: &'static [(char, char)] =
- &[('𞄀', '𞄬'), ('\u{1e130}', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅏')];
-
-pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')];
-
-pub const OL_CHIKI: &'static [(char, char)] = &[('᱐', '᱿')];
-
-pub const OL_ONAL: &'static [(char, char)] =
- &[('।', '॥'), ('𞗐', '𞗺'), ('𞗿', '𞗿')];
-
-pub const OLD_HUNGARIAN: &'static [(char, char)] = &[
- ('⁚', '⁚'),
- ('⁝', '⁝'),
- ('⸱', '⸱'),
- ('⹁', '⹁'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐳺', '𐳿'),
-];
-
-pub const OLD_ITALIC: &'static [(char, char)] = &[('𐌀', '𐌣'), ('𐌭', '𐌯')];
-
-pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[('𐪀', '𐪟')];
-
-pub const OLD_PERMIC: &'static [(char, char)] = &[
- ('·', '·'),
- ('\u{300}', '\u{300}'),
- ('\u{306}', '\u{308}'),
- ('\u{313}', '\u{313}'),
- ('\u{483}', '\u{483}'),
- ('𐍐', '\u{1037a}'),
-];
-
-pub const OLD_PERSIAN: &'static [(char, char)] = &[('𐎠', '𐏃'), ('𐏈', '𐏕')];
-
-pub const OLD_SOGDIAN: &'static [(char, char)] = &[('𐼀', '𐼧')];
-
-pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('𐩠', '𐩿')];
-
-pub const OLD_TURKIC: &'static [(char, char)] =
- &[('⁚', '⁚'), ('⸰', '⸰'), ('𐰀', '𐱈')];
-
-pub const OLD_UYGHUR: &'static [(char, char)] =
- &[('ـ', 'ـ'), ('𐫲', '𐫲'), ('𐽰', '𐾉')];
-
-pub const ORIYA: &'static [(char, char)] = &[
- ('\u{951}', '\u{952}'),
- ('।', '॥'),
- ('\u{b01}', 'ଃ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('\u{b3c}', '\u{b44}'),
- ('େ', 'ୈ'),
- ('ୋ', '\u{b4d}'),
- ('\u{b55}', '\u{b57}'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', '\u{b63}'),
- ('୦', '୷'),
- ('\u{1cda}', '\u{1cda}'),
- ('ᳲ', 'ᳲ'),
-];
-
-pub const OSAGE: &'static [(char, char)] = &[
- ('\u{301}', '\u{301}'),
- ('\u{304}', '\u{304}'),
- ('\u{30b}', '\u{30b}'),
- ('\u{358}', '\u{358}'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
-];
-
-pub const OSMANYA: &'static [(char, char)] = &[('𐒀', '𐒝'), ('𐒠', '𐒩')];
-
-pub const PAHAWH_HMONG: &'static [(char, char)] =
- &[('𖬀', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'), ('𖭽', '𖮏')];
-
-pub const PALMYRENE: &'static [(char, char)] = &[('𐡠', '𐡿')];
-
-pub const PAU_CIN_HAU: &'static [(char, char)] = &[('𑫀', '𑫸')];
-
-pub const PHAGS_PA: &'static [(char, char)] = &[
- ('᠂', '᠃'),
- ('᠅', '᠅'),
- ('\u{202f}', '\u{202f}'),
- ('。', '。'),
- ('ꡀ', '꡷'),
-];
-
-pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', '𐤟')];
-
-pub const PSALTER_PAHLAVI: &'static [(char, char)] =
- &[('ـ', 'ـ'), ('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯')];
-
-pub const REJANG: &'static [(char, char)] = &[('ꤰ', '\u{a953}'), ('꥟', '꥟')];
-
-pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛸ')];
-
-pub const SAMARITAN: &'static [(char, char)] =
- &[('ࠀ', '\u{82d}'), ('࠰', '࠾'), ('⸱', '⸱')];
-
-pub const SAURASHTRA: &'static [(char, char)] =
- &[('ꢀ', '\u{a8c5}'), ('꣎', '꣙')];
-
-pub const SHARADA: &'static [(char, char)] = &[
- ('\u{951}', '\u{951}'),
- ('\u{1cd7}', '\u{1cd7}'),
- ('\u{1cd9}', '\u{1cd9}'),
- ('\u{1cdc}', '\u{1cdd}'),
- ('\u{1ce0}', '\u{1ce0}'),
- ('꠰', '꠵'),
- ('꠸', '꠸'),
- ('\u{11180}', '𑇟'),
-];
-
-pub const SHAVIAN: &'static [(char, char)] = &[('·', '·'), ('𐑐', '𐑿')];
-
-pub const SIDDHAM: &'static [(char, char)] =
- &[('𑖀', '\u{115b5}'), ('𑖸', '\u{115dd}')];
-
-pub const SIGNWRITING: &'static [(char, char)] =
- &[('𝠀', '𝪋'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}')];
-
-pub const SINHALA: &'static [(char, char)] = &[
- ('।', '॥'),
- ('\u{d81}', 'ඃ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dcf}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('ෘ', '\u{ddf}'),
- ('෦', '෯'),
- ('ෲ', '෴'),
- ('ᳲ', 'ᳲ'),
- ('𑇡', '𑇴'),
-];
-
-pub const SOGDIAN: &'static [(char, char)] = &[('ـ', 'ـ'), ('𐼰', '𐽙')];
-
-pub const SORA_SOMPENG: &'static [(char, char)] = &[('𑃐', '𑃨'), ('𑃰', '𑃹')];
-
-pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', '𑪢')];
-
-pub const SUNDANESE: &'static [(char, char)] =
- &[('\u{1b80}', 'ᮿ'), ('᳀', '᳇')];
-
-pub const SUNUWAR: &'static [(char, char)] = &[
- ('\u{300}', '\u{301}'),
- ('\u{303}', '\u{303}'),
- ('\u{30d}', '\u{30d}'),
- ('\u{310}', '\u{310}'),
- ('\u{32d}', '\u{32d}'),
- ('\u{331}', '\u{331}'),
- ('𑯀', '𑯡'),
- ('𑯰', '𑯹'),
-];
-
-pub const SYLOTI_NAGRI: &'static [(char, char)] =
- &[('।', '॥'), ('০', '৯'), ('ꠀ', '\u{a82c}')];
-
-pub const SYRIAC: &'static [(char, char)] = &[
- ('\u{303}', '\u{304}'),
- ('\u{307}', '\u{308}'),
- ('\u{30a}', '\u{30a}'),
- ('\u{320}', '\u{320}'),
- ('\u{323}', '\u{325}'),
- ('\u{32d}', '\u{32e}'),
- ('\u{330}', '\u{330}'),
- ('،', '،'),
- ('؛', '\u{61c}'),
- ('؟', '؟'),
- ('ـ', 'ـ'),
- ('\u{64b}', '\u{655}'),
- ('\u{670}', '\u{670}'),
- ('܀', '܍'),
- ('\u{70f}', '\u{74a}'),
- ('ݍ', 'ݏ'),
- ('ࡠ', 'ࡪ'),
- ('\u{1df8}', '\u{1df8}'),
- ('\u{1dfa}', '\u{1dfa}'),
-];
-
-pub const TAGALOG: &'static [(char, char)] =
- &[('ᜀ', '\u{1715}'), ('ᜟ', 'ᜟ'), ('᜵', '᜶')];
-
-pub const TAGBANWA: &'static [(char, char)] =
- &[('᜵', '᜶'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')];
-
-pub const TAI_LE: &'static [(char, char)] = &[
- ('\u{300}', '\u{301}'),
- ('\u{307}', '\u{308}'),
- ('\u{30c}', '\u{30c}'),
- ('၀', '၉'),
- ('ᥐ', 'ᥭ'),
- ('ᥰ', 'ᥴ'),
-];
-
-pub const TAI_THAM: &'static [(char, char)] = &[
- ('ᨠ', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a7c}'),
- ('\u{1a7f}', '᪉'),
- ('᪐', '᪙'),
- ('᪠', '᪭'),
-];
-
-pub const TAI_VIET: &'static [(char, char)] = &[('ꪀ', 'ꫂ'), ('ꫛ', '꫟')];
-
-pub const TAKRI: &'static [(char, char)] =
- &[('।', '॥'), ('꠰', '꠹'), ('𑚀', '𑚹'), ('𑛀', '𑛉')];
-
-pub const TAMIL: &'static [(char, char)] = &[
- ('\u{951}', '\u{952}'),
- ('।', '॥'),
- ('\u{b82}', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'ஹ'),
- ('\u{bbe}', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', '\u{bcd}'),
- ('ௐ', 'ௐ'),
- ('\u{bd7}', '\u{bd7}'),
- ('௦', '௺'),
- ('\u{1cda}', '\u{1cda}'),
- ('ꣳ', 'ꣳ'),
- ('\u{11301}', '\u{11301}'),
- ('𑌃', '𑌃'),
- ('\u{1133b}', '\u{1133c}'),
- ('𑿀', '𑿱'),
- ('𑿿', '𑿿'),
-];
-
-pub const TANGSA: &'static [(char, char)] = &[('𖩰', '𖪾'), ('𖫀', '𖫉')];
-
-pub const TANGUT: &'static [(char, char)] = &[
- ('⿰', '⿿'),
- ('㇯', '㇯'),
- ('𖿠', '𖿠'),
- ('𗀀', '𘟷'),
- ('𘠀', '𘫿'),
- ('𘴀', '𘴈'),
-];
-
-pub const TELUGU: &'static [(char, char)] = &[
- ('\u{951}', '\u{952}'),
- ('।', '॥'),
- ('\u{c00}', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'హ'),
- ('\u{c3c}', 'ౄ'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('ౘ', 'ౚ'),
- ('ౝ', 'ౝ'),
- ('ౠ', '\u{c63}'),
- ('౦', '౯'),
- ('౷', '౿'),
- ('\u{1cda}', '\u{1cda}'),
- ('ᳲ', 'ᳲ'),
-];
-
-pub const THAANA: &'static [(char, char)] = &[
- ('،', '،'),
- ('؛', '\u{61c}'),
- ('؟', '؟'),
- ('٠', '٩'),
- ('ހ', 'ޱ'),
- ('ﷲ', 'ﷲ'),
- ('﷽', '﷽'),
-];
-
-pub const THAI: &'static [(char, char)] = &[
- ('ʼ', 'ʼ'),
- ('˗', '˗'),
- ('\u{303}', '\u{303}'),
- ('\u{331}', '\u{331}'),
- ('ก', '\u{e3a}'),
- ('เ', '๛'),
-];
-
-pub const TIBETAN: &'static [(char, char)] = &[
- ('ༀ', 'ཇ'),
- ('ཉ', 'ཬ'),
- ('\u{f71}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('྾', '࿌'),
- ('࿎', '࿔'),
- ('࿙', '࿚'),
- ('〈', '》'),
-];
-
-pub const TIFINAGH: &'static [(char, char)] = &[
- ('\u{302}', '\u{302}'),
- ('\u{304}', '\u{304}'),
- ('\u{307}', '\u{307}'),
- ('\u{309}', '\u{309}'),
- ('ⴰ', 'ⵧ'),
- ('ⵯ', '⵰'),
- ('\u{2d7f}', '\u{2d7f}'),
-];
-
-pub const TIRHUTA: &'static [(char, char)] = &[
- ('\u{951}', '\u{952}'),
- ('।', '॥'),
- ('ᳲ', 'ᳲ'),
- ('꠰', '꠹'),
- ('𑒀', '𑓇'),
- ('𑓐', '𑓙'),
-];
-
-pub const TODHRI: &'static [(char, char)] = &[
- ('\u{301}', '\u{301}'),
- ('\u{304}', '\u{304}'),
- ('\u{307}', '\u{307}'),
- ('\u{311}', '\u{311}'),
- ('\u{313}', '\u{313}'),
- ('\u{35e}', '\u{35e}'),
- ('𐗀', '𐗳'),
-];
-
-pub const TOTO: &'static [(char, char)] = &[('ʼ', 'ʼ'), ('𞊐', '\u{1e2ae}')];
-
-pub const TULU_TIGALARI: &'static [(char, char)] = &[
- ('೦', '೯'),
- ('ᳲ', 'ᳲ'),
- ('\u{1cf4}', '\u{1cf4}'),
- ('꠰', '꠵'),
- ('\u{a8f1}', '\u{a8f1}'),
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '𑏊'),
- ('𑏌', '𑏕'),
- ('𑏗', '𑏘'),
- ('\u{113e1}', '\u{113e2}'),
-];
-
-pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')];
-
-pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ꘫ')];
-
-pub const VITHKUQI: &'static [(char, char)] = &[
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
-];
-
-pub const WANCHO: &'static [(char, char)] = &[('𞋀', '𞋹'), ('𞋿', '𞋿')];
-
-pub const WARANG_CITI: &'static [(char, char)] = &[('𑢠', '𑣲'), ('𑣿', '𑣿')];
-
-pub const YEZIDI: &'static [(char, char)] = &[
- ('،', '،'),
- ('؛', '؛'),
- ('؟', '؟'),
- ('٠', '٩'),
- ('𐺀', '𐺩'),
- ('\u{10eab}', '𐺭'),
- ('𐺰', '𐺱'),
-];
-
-pub const YI: &'static [(char, char)] = &[
- ('、', '。'),
- ('〈', '】'),
- ('〔', '〛'),
- ('・', '・'),
- ('ꀀ', 'ꒌ'),
- ('꒐', '꓆'),
- ('。', '・'),
-];
-
-pub const ZANABAZAR_SQUARE: &'static [(char, char)] = &[('𑨀', '\u{11a47}')];
diff --git a/vendor/regex-syntax/src/unicode_tables/sentence_break.rs b/vendor/regex-syntax/src/unicode_tables/sentence_break.rs
deleted file mode 100644
index af1c5bea..00000000
--- a/vendor/regex-syntax/src/unicode_tables/sentence_break.rs
+++ /dev/null
@@ -1,2530 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate sentence-break ucd-16.0.0 --chars
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
- ("ATerm", ATERM),
- ("CR", CR),
- ("Close", CLOSE),
- ("Extend", EXTEND),
- ("Format", FORMAT),
- ("LF", LF),
- ("Lower", LOWER),
- ("Numeric", NUMERIC),
- ("OLetter", OLETTER),
- ("SContinue", SCONTINUE),
- ("STerm", STERM),
- ("Sep", SEP),
- ("Sp", SP),
- ("Upper", UPPER),
-];
-
-pub const ATERM: &'static [(char, char)] =
- &[('.', '.'), ('․', '․'), ('﹒', '﹒'), ('.', '.')];
-
-pub const CR: &'static [(char, char)] = &[('\r', '\r')];
-
-pub const CLOSE: &'static [(char, char)] = &[
- ('"', '"'),
- ('\'', ')'),
- ('[', '['),
- (']', ']'),
- ('{', '{'),
- ('}', '}'),
- ('«', '«'),
- ('»', '»'),
- ('༺', '༽'),
- ('᚛', '᚜'),
- ('‘', '‟'),
- ('‹', '›'),
- ('⁅', '⁆'),
- ('⁽', '⁾'),
- ('₍', '₎'),
- ('⌈', '⌋'),
- ('〈', '〉'),
- ('❛', '❠'),
- ('❨', '❵'),
- ('⟅', '⟆'),
- ('⟦', '⟯'),
- ('⦃', '⦘'),
- ('⧘', '⧛'),
- ('⧼', '⧽'),
- ('⸀', '⸍'),
- ('⸜', '⸝'),
- ('⸠', '⸩'),
- ('⹂', '⹂'),
- ('⹕', '⹜'),
- ('〈', '】'),
- ('〔', '〛'),
- ('〝', '〟'),
- ('﴾', '﴿'),
- ('︗', '︘'),
- ('︵', '﹄'),
- ('﹇', '﹈'),
- ('﹙', '﹞'),
- ('(', ')'),
- ('[', '['),
- (']', ']'),
- ('{', '{'),
- ('}', '}'),
- ('⦅', '⦆'),
- ('「', '」'),
- ('🙶', '🙸'),
-];
-
-pub const EXTEND: &'static [(char, char)] = &[
- ('\u{300}', '\u{36f}'),
- ('\u{483}', '\u{489}'),
- ('\u{591}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c5}'),
- ('\u{5c7}', '\u{5c7}'),
- ('\u{610}', '\u{61a}'),
- ('\u{64b}', '\u{65f}'),
- ('\u{670}', '\u{670}'),
- ('\u{6d6}', '\u{6dc}'),
- ('\u{6df}', '\u{6e4}'),
- ('\u{6e7}', '\u{6e8}'),
- ('\u{6ea}', '\u{6ed}'),
- ('\u{711}', '\u{711}'),
- ('\u{730}', '\u{74a}'),
- ('\u{7a6}', '\u{7b0}'),
- ('\u{7eb}', '\u{7f3}'),
- ('\u{7fd}', '\u{7fd}'),
- ('\u{816}', '\u{819}'),
- ('\u{81b}', '\u{823}'),
- ('\u{825}', '\u{827}'),
- ('\u{829}', '\u{82d}'),
- ('\u{859}', '\u{85b}'),
- ('\u{897}', '\u{89f}'),
- ('\u{8ca}', '\u{8e1}'),
- ('\u{8e3}', 'ः'),
- ('\u{93a}', '\u{93c}'),
- ('ा', 'ॏ'),
- ('\u{951}', '\u{957}'),
- ('\u{962}', '\u{963}'),
- ('\u{981}', 'ঃ'),
- ('\u{9bc}', '\u{9bc}'),
- ('\u{9be}', '\u{9c4}'),
- ('ে', 'ৈ'),
- ('ো', '\u{9cd}'),
- ('\u{9d7}', '\u{9d7}'),
- ('\u{9e2}', '\u{9e3}'),
- ('\u{9fe}', '\u{9fe}'),
- ('\u{a01}', 'ਃ'),
- ('\u{a3c}', '\u{a3c}'),
- ('ਾ', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('\u{a51}', '\u{a51}'),
- ('\u{a70}', '\u{a71}'),
- ('\u{a75}', '\u{a75}'),
- ('\u{a81}', 'ઃ'),
- ('\u{abc}', '\u{abc}'),
- ('ા', '\u{ac5}'),
- ('\u{ac7}', 'ૉ'),
- ('ો', '\u{acd}'),
- ('\u{ae2}', '\u{ae3}'),
- ('\u{afa}', '\u{aff}'),
- ('\u{b01}', 'ଃ'),
- ('\u{b3c}', '\u{b3c}'),
- ('\u{b3e}', '\u{b44}'),
- ('େ', 'ୈ'),
- ('ୋ', '\u{b4d}'),
- ('\u{b55}', '\u{b57}'),
- ('\u{b62}', '\u{b63}'),
- ('\u{b82}', '\u{b82}'),
- ('\u{bbe}', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', '\u{bcd}'),
- ('\u{bd7}', '\u{bd7}'),
- ('\u{c00}', '\u{c04}'),
- ('\u{c3c}', '\u{c3c}'),
- ('\u{c3e}', 'ౄ'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('\u{c62}', '\u{c63}'),
- ('\u{c81}', 'ಃ'),
- ('\u{cbc}', '\u{cbc}'),
- ('ಾ', 'ೄ'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccd}'),
- ('\u{cd5}', '\u{cd6}'),
- ('\u{ce2}', '\u{ce3}'),
- ('ೳ', 'ೳ'),
- ('\u{d00}', 'ഃ'),
- ('\u{d3b}', '\u{d3c}'),
- ('\u{d3e}', '\u{d44}'),
- ('െ', 'ൈ'),
- ('ൊ', '\u{d4d}'),
- ('\u{d57}', '\u{d57}'),
- ('\u{d62}', '\u{d63}'),
- ('\u{d81}', 'ඃ'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dcf}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('ෘ', '\u{ddf}'),
- ('ෲ', 'ෳ'),
- ('\u{e31}', '\u{e31}'),
- ('\u{e34}', '\u{e3a}'),
- ('\u{e47}', '\u{e4e}'),
- ('\u{eb1}', '\u{eb1}'),
- ('\u{eb4}', '\u{ebc}'),
- ('\u{ec8}', '\u{ece}'),
- ('\u{f18}', '\u{f19}'),
- ('\u{f35}', '\u{f35}'),
- ('\u{f37}', '\u{f37}'),
- ('\u{f39}', '\u{f39}'),
- ('༾', '༿'),
- ('\u{f71}', '\u{f84}'),
- ('\u{f86}', '\u{f87}'),
- ('\u{f8d}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('\u{fc6}', '\u{fc6}'),
- ('ါ', '\u{103e}'),
- ('ၖ', '\u{1059}'),
- ('\u{105e}', '\u{1060}'),
- ('ၢ', 'ၤ'),
- ('ၧ', 'ၭ'),
- ('\u{1071}', '\u{1074}'),
- ('\u{1082}', '\u{108d}'),
- ('ႏ', 'ႏ'),
- ('ႚ', '\u{109d}'),
- ('\u{135d}', '\u{135f}'),
- ('\u{1712}', '\u{1715}'),
- ('\u{1732}', '\u{1734}'),
- ('\u{1752}', '\u{1753}'),
- ('\u{1772}', '\u{1773}'),
- ('\u{17b4}', '\u{17d3}'),
- ('\u{17dd}', '\u{17dd}'),
- ('\u{180b}', '\u{180d}'),
- ('\u{180f}', '\u{180f}'),
- ('\u{1885}', '\u{1886}'),
- ('\u{18a9}', '\u{18a9}'),
- ('\u{1920}', 'ᤫ'),
- ('ᤰ', '\u{193b}'),
- ('\u{1a17}', '\u{1a1b}'),
- ('ᩕ', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a7c}'),
- ('\u{1a7f}', '\u{1a7f}'),
- ('\u{1ab0}', '\u{1ace}'),
- ('\u{1b00}', 'ᬄ'),
- ('\u{1b34}', '\u{1b44}'),
- ('\u{1b6b}', '\u{1b73}'),
- ('\u{1b80}', 'ᮂ'),
- ('ᮡ', '\u{1bad}'),
- ('\u{1be6}', '\u{1bf3}'),
- ('ᰤ', '\u{1c37}'),
- ('\u{1cd0}', '\u{1cd2}'),
- ('\u{1cd4}', '\u{1ce8}'),
- ('\u{1ced}', '\u{1ced}'),
- ('\u{1cf4}', '\u{1cf4}'),
- ('᳷', '\u{1cf9}'),
- ('\u{1dc0}', '\u{1dff}'),
- ('\u{200c}', '\u{200d}'),
- ('\u{20d0}', '\u{20f0}'),
- ('\u{2cef}', '\u{2cf1}'),
- ('\u{2d7f}', '\u{2d7f}'),
- ('\u{2de0}', '\u{2dff}'),
- ('\u{302a}', '\u{302f}'),
- ('\u{3099}', '\u{309a}'),
- ('\u{a66f}', '\u{a672}'),
- ('\u{a674}', '\u{a67d}'),
- ('\u{a69e}', '\u{a69f}'),
- ('\u{a6f0}', '\u{a6f1}'),
- ('\u{a802}', '\u{a802}'),
- ('\u{a806}', '\u{a806}'),
- ('\u{a80b}', '\u{a80b}'),
- ('ꠣ', 'ꠧ'),
- ('\u{a82c}', '\u{a82c}'),
- ('ꢀ', 'ꢁ'),
- ('ꢴ', '\u{a8c5}'),
- ('\u{a8e0}', '\u{a8f1}'),
- ('\u{a8ff}', '\u{a8ff}'),
- ('\u{a926}', '\u{a92d}'),
- ('\u{a947}', '\u{a953}'),
- ('\u{a980}', 'ꦃ'),
- ('\u{a9b3}', '\u{a9c0}'),
- ('\u{a9e5}', '\u{a9e5}'),
- ('\u{aa29}', '\u{aa36}'),
- ('\u{aa43}', '\u{aa43}'),
- ('\u{aa4c}', 'ꩍ'),
- ('ꩻ', 'ꩽ'),
- ('\u{aab0}', '\u{aab0}'),
- ('\u{aab2}', '\u{aab4}'),
- ('\u{aab7}', '\u{aab8}'),
- ('\u{aabe}', '\u{aabf}'),
- ('\u{aac1}', '\u{aac1}'),
- ('ꫫ', 'ꫯ'),
- ('ꫵ', '\u{aaf6}'),
- ('ꯣ', 'ꯪ'),
- ('꯬', '\u{abed}'),
- ('\u{fb1e}', '\u{fb1e}'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{fe20}', '\u{fe2f}'),
- ('\u{ff9e}', '\u{ff9f}'),
- ('\u{101fd}', '\u{101fd}'),
- ('\u{102e0}', '\u{102e0}'),
- ('\u{10376}', '\u{1037a}'),
- ('\u{10a01}', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '\u{10a0f}'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '\u{10a3f}'),
- ('\u{10ae5}', '\u{10ae6}'),
- ('\u{10d24}', '\u{10d27}'),
- ('\u{10d69}', '\u{10d6d}'),
- ('\u{10eab}', '\u{10eac}'),
- ('\u{10efc}', '\u{10eff}'),
- ('\u{10f46}', '\u{10f50}'),
- ('\u{10f82}', '\u{10f85}'),
- ('𑀀', '𑀂'),
- ('\u{11038}', '\u{11046}'),
- ('\u{11070}', '\u{11070}'),
- ('\u{11073}', '\u{11074}'),
- ('\u{1107f}', '𑂂'),
- ('𑂰', '\u{110ba}'),
- ('\u{110c2}', '\u{110c2}'),
- ('\u{11100}', '\u{11102}'),
- ('\u{11127}', '\u{11134}'),
- ('𑅅', '𑅆'),
- ('\u{11173}', '\u{11173}'),
- ('\u{11180}', '𑆂'),
- ('𑆳', '\u{111c0}'),
- ('\u{111c9}', '\u{111cc}'),
- ('𑇎', '\u{111cf}'),
- ('𑈬', '\u{11237}'),
- ('\u{1123e}', '\u{1123e}'),
- ('\u{11241}', '\u{11241}'),
- ('\u{112df}', '\u{112ea}'),
- ('\u{11300}', '𑌃'),
- ('\u{1133b}', '\u{1133c}'),
- ('\u{1133e}', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '\u{1134d}'),
- ('\u{11357}', '\u{11357}'),
- ('𑍢', '𑍣'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('\u{113b8}', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '𑏊'),
- ('𑏌', '\u{113d0}'),
- ('\u{113d2}', '\u{113d2}'),
- ('\u{113e1}', '\u{113e2}'),
- ('𑐵', '\u{11446}'),
- ('\u{1145e}', '\u{1145e}'),
- ('\u{114b0}', '\u{114c3}'),
- ('\u{115af}', '\u{115b5}'),
- ('𑖸', '\u{115c0}'),
- ('\u{115dc}', '\u{115dd}'),
- ('𑘰', '\u{11640}'),
- ('\u{116ab}', '\u{116b7}'),
- ('\u{1171d}', '\u{1172b}'),
- ('𑠬', '\u{1183a}'),
- ('\u{11930}', '𑤵'),
- ('𑤷', '𑤸'),
- ('\u{1193b}', '\u{1193e}'),
- ('𑥀', '𑥀'),
- ('𑥂', '\u{11943}'),
- ('𑧑', '\u{119d7}'),
- ('\u{119da}', '\u{119e0}'),
- ('𑧤', '𑧤'),
- ('\u{11a01}', '\u{11a0a}'),
- ('\u{11a33}', '𑨹'),
- ('\u{11a3b}', '\u{11a3e}'),
- ('\u{11a47}', '\u{11a47}'),
- ('\u{11a51}', '\u{11a5b}'),
- ('\u{11a8a}', '\u{11a99}'),
- ('𑰯', '\u{11c36}'),
- ('\u{11c38}', '\u{11c3f}'),
- ('\u{11c92}', '\u{11ca7}'),
- ('𑲩', '\u{11cb6}'),
- ('\u{11d31}', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d45}'),
- ('\u{11d47}', '\u{11d47}'),
- ('𑶊', '𑶎'),
- ('\u{11d90}', '\u{11d91}'),
- ('𑶓', '\u{11d97}'),
- ('\u{11ef3}', '𑻶'),
- ('\u{11f00}', '\u{11f01}'),
- ('𑼃', '𑼃'),
- ('𑼴', '\u{11f3a}'),
- ('𑼾', '\u{11f42}'),
- ('\u{11f5a}', '\u{11f5a}'),
- ('\u{13440}', '\u{13440}'),
- ('\u{13447}', '\u{13455}'),
- ('\u{1611e}', '\u{1612f}'),
- ('\u{16af0}', '\u{16af4}'),
- ('\u{16b30}', '\u{16b36}'),
- ('\u{16f4f}', '\u{16f4f}'),
- ('𖽑', '𖾇'),
- ('\u{16f8f}', '\u{16f92}'),
- ('\u{16fe4}', '\u{16fe4}'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('\u{1bc9d}', '\u{1bc9e}'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d165}', '\u{1d169}'),
- ('\u{1d16d}', '\u{1d172}'),
- ('\u{1d17b}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('\u{1d242}', '\u{1d244}'),
- ('\u{1da00}', '\u{1da36}'),
- ('\u{1da3b}', '\u{1da6c}'),
- ('\u{1da75}', '\u{1da75}'),
- ('\u{1da84}', '\u{1da84}'),
- ('\u{1da9b}', '\u{1da9f}'),
- ('\u{1daa1}', '\u{1daaf}'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('\u{1e130}', '\u{1e136}'),
- ('\u{1e2ae}', '\u{1e2ae}'),
- ('\u{1e2ec}', '\u{1e2ef}'),
- ('\u{1e4ec}', '\u{1e4ef}'),
- ('\u{1e5ee}', '\u{1e5ef}'),
- ('\u{1e8d0}', '\u{1e8d6}'),
- ('\u{1e944}', '\u{1e94a}'),
- ('\u{e0020}', '\u{e007f}'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const FORMAT: &'static [(char, char)] = &[
- ('\u{ad}', '\u{ad}'),
- ('\u{61c}', '\u{61c}'),
- ('\u{70f}', '\u{70f}'),
- ('\u{180e}', '\u{180e}'),
- ('\u{200b}', '\u{200b}'),
- ('\u{200e}', '\u{200f}'),
- ('\u{202a}', '\u{202e}'),
- ('\u{2060}', '\u{2064}'),
- ('\u{2066}', '\u{206f}'),
- ('\u{feff}', '\u{feff}'),
- ('\u{fff9}', '\u{fffb}'),
- ('\u{13430}', '\u{1343f}'),
- ('\u{1bca0}', '\u{1bca3}'),
- ('\u{1d173}', '\u{1d17a}'),
- ('\u{e0001}', '\u{e0001}'),
-];
-
-pub const LF: &'static [(char, char)] = &[('\n', '\n')];
-
-pub const LOWER: &'static [(char, char)] = &[
- ('a', 'z'),
- ('ª', 'ª'),
- ('µ', 'µ'),
- ('º', 'º'),
- ('ß', 'ö'),
- ('ø', 'ÿ'),
- ('ā', 'ā'),
- ('ă', 'ă'),
- ('ą', 'ą'),
- ('ć', 'ć'),
- ('ĉ', 'ĉ'),
- ('ċ', 'ċ'),
- ('č', 'č'),
- ('ď', 'ď'),
- ('đ', 'đ'),
- ('ē', 'ē'),
- ('ĕ', 'ĕ'),
- ('ė', 'ė'),
- ('ę', 'ę'),
- ('ě', 'ě'),
- ('ĝ', 'ĝ'),
- ('ğ', 'ğ'),
- ('ġ', 'ġ'),
- ('ģ', 'ģ'),
- ('ĥ', 'ĥ'),
- ('ħ', 'ħ'),
- ('ĩ', 'ĩ'),
- ('ī', 'ī'),
- ('ĭ', 'ĭ'),
- ('į', 'į'),
- ('ı', 'ı'),
- ('ij', 'ij'),
- ('ĵ', 'ĵ'),
- ('ķ', 'ĸ'),
- ('ĺ', 'ĺ'),
- ('ļ', 'ļ'),
- ('ľ', 'ľ'),
- ('ŀ', 'ŀ'),
- ('ł', 'ł'),
- ('ń', 'ń'),
- ('ņ', 'ņ'),
- ('ň', 'ʼn'),
- ('ŋ', 'ŋ'),
- ('ō', 'ō'),
- ('ŏ', 'ŏ'),
- ('ő', 'ő'),
- ('œ', 'œ'),
- ('ŕ', 'ŕ'),
- ('ŗ', 'ŗ'),
- ('ř', 'ř'),
- ('ś', 'ś'),
- ('ŝ', 'ŝ'),
- ('ş', 'ş'),
- ('š', 'š'),
- ('ţ', 'ţ'),
- ('ť', 'ť'),
- ('ŧ', 'ŧ'),
- ('ũ', 'ũ'),
- ('ū', 'ū'),
- ('ŭ', 'ŭ'),
- ('ů', 'ů'),
- ('ű', 'ű'),
- ('ų', 'ų'),
- ('ŵ', 'ŵ'),
- ('ŷ', 'ŷ'),
- ('ź', 'ź'),
- ('ż', 'ż'),
- ('ž', 'ƀ'),
- ('ƃ', 'ƃ'),
- ('ƅ', 'ƅ'),
- ('ƈ', 'ƈ'),
- ('ƌ', 'ƍ'),
- ('ƒ', 'ƒ'),
- ('ƕ', 'ƕ'),
- ('ƙ', 'ƛ'),
- ('ƞ', 'ƞ'),
- ('ơ', 'ơ'),
- ('ƣ', 'ƣ'),
- ('ƥ', 'ƥ'),
- ('ƨ', 'ƨ'),
- ('ƪ', 'ƫ'),
- ('ƭ', 'ƭ'),
- ('ư', 'ư'),
- ('ƴ', 'ƴ'),
- ('ƶ', 'ƶ'),
- ('ƹ', 'ƺ'),
- ('ƽ', 'ƿ'),
- ('dž', 'dž'),
- ('lj', 'lj'),
- ('nj', 'nj'),
- ('ǎ', 'ǎ'),
- ('ǐ', 'ǐ'),
- ('ǒ', 'ǒ'),
- ('ǔ', 'ǔ'),
- ('ǖ', 'ǖ'),
- ('ǘ', 'ǘ'),
- ('ǚ', 'ǚ'),
- ('ǜ', 'ǝ'),
- ('ǟ', 'ǟ'),
- ('ǡ', 'ǡ'),
- ('ǣ', 'ǣ'),
- ('ǥ', 'ǥ'),
- ('ǧ', 'ǧ'),
- ('ǩ', 'ǩ'),
- ('ǫ', 'ǫ'),
- ('ǭ', 'ǭ'),
- ('ǯ', 'ǰ'),
- ('dz', 'dz'),
- ('ǵ', 'ǵ'),
- ('ǹ', 'ǹ'),
- ('ǻ', 'ǻ'),
- ('ǽ', 'ǽ'),
- ('ǿ', 'ǿ'),
- ('ȁ', 'ȁ'),
- ('ȃ', 'ȃ'),
- ('ȅ', 'ȅ'),
- ('ȇ', 'ȇ'),
- ('ȉ', 'ȉ'),
- ('ȋ', 'ȋ'),
- ('ȍ', 'ȍ'),
- ('ȏ', 'ȏ'),
- ('ȑ', 'ȑ'),
- ('ȓ', 'ȓ'),
- ('ȕ', 'ȕ'),
- ('ȗ', 'ȗ'),
- ('ș', 'ș'),
- ('ț', 'ț'),
- ('ȝ', 'ȝ'),
- ('ȟ', 'ȟ'),
- ('ȡ', 'ȡ'),
- ('ȣ', 'ȣ'),
- ('ȥ', 'ȥ'),
- ('ȧ', 'ȧ'),
- ('ȩ', 'ȩ'),
- ('ȫ', 'ȫ'),
- ('ȭ', 'ȭ'),
- ('ȯ', 'ȯ'),
- ('ȱ', 'ȱ'),
- ('ȳ', 'ȹ'),
- ('ȼ', 'ȼ'),
- ('ȿ', 'ɀ'),
- ('ɂ', 'ɂ'),
- ('ɇ', 'ɇ'),
- ('ɉ', 'ɉ'),
- ('ɋ', 'ɋ'),
- ('ɍ', 'ɍ'),
- ('ɏ', 'ʓ'),
- ('ʕ', 'ʸ'),
- ('ˀ', 'ˁ'),
- ('ˠ', 'ˤ'),
- ('ͱ', 'ͱ'),
- ('ͳ', 'ͳ'),
- ('ͷ', 'ͷ'),
- ('ͺ', 'ͽ'),
- ('ΐ', 'ΐ'),
- ('ά', 'ώ'),
- ('ϐ', 'ϑ'),
- ('ϕ', 'ϗ'),
- ('ϙ', 'ϙ'),
- ('ϛ', 'ϛ'),
- ('ϝ', 'ϝ'),
- ('ϟ', 'ϟ'),
- ('ϡ', 'ϡ'),
- ('ϣ', 'ϣ'),
- ('ϥ', 'ϥ'),
- ('ϧ', 'ϧ'),
- ('ϩ', 'ϩ'),
- ('ϫ', 'ϫ'),
- ('ϭ', 'ϭ'),
- ('ϯ', 'ϳ'),
- ('ϵ', 'ϵ'),
- ('ϸ', 'ϸ'),
- ('ϻ', 'ϼ'),
- ('а', 'џ'),
- ('ѡ', 'ѡ'),
- ('ѣ', 'ѣ'),
- ('ѥ', 'ѥ'),
- ('ѧ', 'ѧ'),
- ('ѩ', 'ѩ'),
- ('ѫ', 'ѫ'),
- ('ѭ', 'ѭ'),
- ('ѯ', 'ѯ'),
- ('ѱ', 'ѱ'),
- ('ѳ', 'ѳ'),
- ('ѵ', 'ѵ'),
- ('ѷ', 'ѷ'),
- ('ѹ', 'ѹ'),
- ('ѻ', 'ѻ'),
- ('ѽ', 'ѽ'),
- ('ѿ', 'ѿ'),
- ('ҁ', 'ҁ'),
- ('ҋ', 'ҋ'),
- ('ҍ', 'ҍ'),
- ('ҏ', 'ҏ'),
- ('ґ', 'ґ'),
- ('ғ', 'ғ'),
- ('ҕ', 'ҕ'),
- ('җ', 'җ'),
- ('ҙ', 'ҙ'),
- ('қ', 'қ'),
- ('ҝ', 'ҝ'),
- ('ҟ', 'ҟ'),
- ('ҡ', 'ҡ'),
- ('ң', 'ң'),
- ('ҥ', 'ҥ'),
- ('ҧ', 'ҧ'),
- ('ҩ', 'ҩ'),
- ('ҫ', 'ҫ'),
- ('ҭ', 'ҭ'),
- ('ү', 'ү'),
- ('ұ', 'ұ'),
- ('ҳ', 'ҳ'),
- ('ҵ', 'ҵ'),
- ('ҷ', 'ҷ'),
- ('ҹ', 'ҹ'),
- ('һ', 'һ'),
- ('ҽ', 'ҽ'),
- ('ҿ', 'ҿ'),
- ('ӂ', 'ӂ'),
- ('ӄ', 'ӄ'),
- ('ӆ', 'ӆ'),
- ('ӈ', 'ӈ'),
- ('ӊ', 'ӊ'),
- ('ӌ', 'ӌ'),
- ('ӎ', 'ӏ'),
- ('ӑ', 'ӑ'),
- ('ӓ', 'ӓ'),
- ('ӕ', 'ӕ'),
- ('ӗ', 'ӗ'),
- ('ә', 'ә'),
- ('ӛ', 'ӛ'),
- ('ӝ', 'ӝ'),
- ('ӟ', 'ӟ'),
- ('ӡ', 'ӡ'),
- ('ӣ', 'ӣ'),
- ('ӥ', 'ӥ'),
- ('ӧ', 'ӧ'),
- ('ө', 'ө'),
- ('ӫ', 'ӫ'),
- ('ӭ', 'ӭ'),
- ('ӯ', 'ӯ'),
- ('ӱ', 'ӱ'),
- ('ӳ', 'ӳ'),
- ('ӵ', 'ӵ'),
- ('ӷ', 'ӷ'),
- ('ӹ', 'ӹ'),
- ('ӻ', 'ӻ'),
- ('ӽ', 'ӽ'),
- ('ӿ', 'ӿ'),
- ('ԁ', 'ԁ'),
- ('ԃ', 'ԃ'),
- ('ԅ', 'ԅ'),
- ('ԇ', 'ԇ'),
- ('ԉ', 'ԉ'),
- ('ԋ', 'ԋ'),
- ('ԍ', 'ԍ'),
- ('ԏ', 'ԏ'),
- ('ԑ', 'ԑ'),
- ('ԓ', 'ԓ'),
- ('ԕ', 'ԕ'),
- ('ԗ', 'ԗ'),
- ('ԙ', 'ԙ'),
- ('ԛ', 'ԛ'),
- ('ԝ', 'ԝ'),
- ('ԟ', 'ԟ'),
- ('ԡ', 'ԡ'),
- ('ԣ', 'ԣ'),
- ('ԥ', 'ԥ'),
- ('ԧ', 'ԧ'),
- ('ԩ', 'ԩ'),
- ('ԫ', 'ԫ'),
- ('ԭ', 'ԭ'),
- ('ԯ', 'ԯ'),
- ('ՠ', 'ֈ'),
- ('ჼ', 'ჼ'),
- ('ᏸ', 'ᏽ'),
- ('ᲀ', 'ᲈ'),
- ('ᲊ', 'ᲊ'),
- ('ᴀ', 'ᶿ'),
- ('ḁ', 'ḁ'),
- ('ḃ', 'ḃ'),
- ('ḅ', 'ḅ'),
- ('ḇ', 'ḇ'),
- ('ḉ', 'ḉ'),
- ('ḋ', 'ḋ'),
- ('ḍ', 'ḍ'),
- ('ḏ', 'ḏ'),
- ('ḑ', 'ḑ'),
- ('ḓ', 'ḓ'),
- ('ḕ', 'ḕ'),
- ('ḗ', 'ḗ'),
- ('ḙ', 'ḙ'),
- ('ḛ', 'ḛ'),
- ('ḝ', 'ḝ'),
- ('ḟ', 'ḟ'),
- ('ḡ', 'ḡ'),
- ('ḣ', 'ḣ'),
- ('ḥ', 'ḥ'),
- ('ḧ', 'ḧ'),
- ('ḩ', 'ḩ'),
- ('ḫ', 'ḫ'),
- ('ḭ', 'ḭ'),
- ('ḯ', 'ḯ'),
- ('ḱ', 'ḱ'),
- ('ḳ', 'ḳ'),
- ('ḵ', 'ḵ'),
- ('ḷ', 'ḷ'),
- ('ḹ', 'ḹ'),
- ('ḻ', 'ḻ'),
- ('ḽ', 'ḽ'),
- ('ḿ', 'ḿ'),
- ('ṁ', 'ṁ'),
- ('ṃ', 'ṃ'),
- ('ṅ', 'ṅ'),
- ('ṇ', 'ṇ'),
- ('ṉ', 'ṉ'),
- ('ṋ', 'ṋ'),
- ('ṍ', 'ṍ'),
- ('ṏ', 'ṏ'),
- ('ṑ', 'ṑ'),
- ('ṓ', 'ṓ'),
- ('ṕ', 'ṕ'),
- ('ṗ', 'ṗ'),
- ('ṙ', 'ṙ'),
- ('ṛ', 'ṛ'),
- ('ṝ', 'ṝ'),
- ('ṟ', 'ṟ'),
- ('ṡ', 'ṡ'),
- ('ṣ', 'ṣ'),
- ('ṥ', 'ṥ'),
- ('ṧ', 'ṧ'),
- ('ṩ', 'ṩ'),
- ('ṫ', 'ṫ'),
- ('ṭ', 'ṭ'),
- ('ṯ', 'ṯ'),
- ('ṱ', 'ṱ'),
- ('ṳ', 'ṳ'),
- ('ṵ', 'ṵ'),
- ('ṷ', 'ṷ'),
- ('ṹ', 'ṹ'),
- ('ṻ', 'ṻ'),
- ('ṽ', 'ṽ'),
- ('ṿ', 'ṿ'),
- ('ẁ', 'ẁ'),
- ('ẃ', 'ẃ'),
- ('ẅ', 'ẅ'),
- ('ẇ', 'ẇ'),
- ('ẉ', 'ẉ'),
- ('ẋ', 'ẋ'),
- ('ẍ', 'ẍ'),
- ('ẏ', 'ẏ'),
- ('ẑ', 'ẑ'),
- ('ẓ', 'ẓ'),
- ('ẕ', 'ẝ'),
- ('ẟ', 'ẟ'),
- ('ạ', 'ạ'),
- ('ả', 'ả'),
- ('ấ', 'ấ'),
- ('ầ', 'ầ'),
- ('ẩ', 'ẩ'),
- ('ẫ', 'ẫ'),
- ('ậ', 'ậ'),
- ('ắ', 'ắ'),
- ('ằ', 'ằ'),
- ('ẳ', 'ẳ'),
- ('ẵ', 'ẵ'),
- ('ặ', 'ặ'),
- ('ẹ', 'ẹ'),
- ('ẻ', 'ẻ'),
- ('ẽ', 'ẽ'),
- ('ế', 'ế'),
- ('ề', 'ề'),
- ('ể', 'ể'),
- ('ễ', 'ễ'),
- ('ệ', 'ệ'),
- ('ỉ', 'ỉ'),
- ('ị', 'ị'),
- ('ọ', 'ọ'),
- ('ỏ', 'ỏ'),
- ('ố', 'ố'),
- ('ồ', 'ồ'),
- ('ổ', 'ổ'),
- ('ỗ', 'ỗ'),
- ('ộ', 'ộ'),
- ('ớ', 'ớ'),
- ('ờ', 'ờ'),
- ('ở', 'ở'),
- ('ỡ', 'ỡ'),
- ('ợ', 'ợ'),
- ('ụ', 'ụ'),
- ('ủ', 'ủ'),
- ('ứ', 'ứ'),
- ('ừ', 'ừ'),
- ('ử', 'ử'),
- ('ữ', 'ữ'),
- ('ự', 'ự'),
- ('ỳ', 'ỳ'),
- ('ỵ', 'ỵ'),
- ('ỷ', 'ỷ'),
- ('ỹ', 'ỹ'),
- ('ỻ', 'ỻ'),
- ('ỽ', 'ỽ'),
- ('ỿ', 'ἇ'),
- ('ἐ', 'ἕ'),
- ('ἠ', 'ἧ'),
- ('ἰ', 'ἷ'),
- ('ὀ', 'ὅ'),
- ('ὐ', 'ὗ'),
- ('ὠ', 'ὧ'),
- ('ὰ', 'ώ'),
- ('ᾀ', 'ᾇ'),
- ('ᾐ', 'ᾗ'),
- ('ᾠ', 'ᾧ'),
- ('ᾰ', 'ᾴ'),
- ('ᾶ', 'ᾷ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῇ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'ῗ'),
- ('ῠ', 'ῧ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῷ'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('ℊ', 'ℊ'),
- ('ℎ', 'ℏ'),
- ('ℓ', 'ℓ'),
- ('ℯ', 'ℯ'),
- ('ℴ', 'ℴ'),
- ('ℹ', 'ℹ'),
- ('ℼ', 'ℽ'),
- ('ⅆ', 'ⅉ'),
- ('ⅎ', 'ⅎ'),
- ('ⅰ', 'ⅿ'),
- ('ↄ', 'ↄ'),
- ('ⓐ', 'ⓩ'),
- ('ⰰ', 'ⱟ'),
- ('ⱡ', 'ⱡ'),
- ('ⱥ', 'ⱦ'),
- ('ⱨ', 'ⱨ'),
- ('ⱪ', 'ⱪ'),
- ('ⱬ', 'ⱬ'),
- ('ⱱ', 'ⱱ'),
- ('ⱳ', 'ⱴ'),
- ('ⱶ', 'ⱽ'),
- ('ⲁ', 'ⲁ'),
- ('ⲃ', 'ⲃ'),
- ('ⲅ', 'ⲅ'),
- ('ⲇ', 'ⲇ'),
- ('ⲉ', 'ⲉ'),
- ('ⲋ', 'ⲋ'),
- ('ⲍ', 'ⲍ'),
- ('ⲏ', 'ⲏ'),
- ('ⲑ', 'ⲑ'),
- ('ⲓ', 'ⲓ'),
- ('ⲕ', 'ⲕ'),
- ('ⲗ', 'ⲗ'),
- ('ⲙ', 'ⲙ'),
- ('ⲛ', 'ⲛ'),
- ('ⲝ', 'ⲝ'),
- ('ⲟ', 'ⲟ'),
- ('ⲡ', 'ⲡ'),
- ('ⲣ', 'ⲣ'),
- ('ⲥ', 'ⲥ'),
- ('ⲧ', 'ⲧ'),
- ('ⲩ', 'ⲩ'),
- ('ⲫ', 'ⲫ'),
- ('ⲭ', 'ⲭ'),
- ('ⲯ', 'ⲯ'),
- ('ⲱ', 'ⲱ'),
- ('ⲳ', 'ⲳ'),
- ('ⲵ', 'ⲵ'),
- ('ⲷ', 'ⲷ'),
- ('ⲹ', 'ⲹ'),
- ('ⲻ', 'ⲻ'),
- ('ⲽ', 'ⲽ'),
- ('ⲿ', 'ⲿ'),
- ('ⳁ', 'ⳁ'),
- ('ⳃ', 'ⳃ'),
- ('ⳅ', 'ⳅ'),
- ('ⳇ', 'ⳇ'),
- ('ⳉ', 'ⳉ'),
- ('ⳋ', 'ⳋ'),
- ('ⳍ', 'ⳍ'),
- ('ⳏ', 'ⳏ'),
- ('ⳑ', 'ⳑ'),
- ('ⳓ', 'ⳓ'),
- ('ⳕ', 'ⳕ'),
- ('ⳗ', 'ⳗ'),
- ('ⳙ', 'ⳙ'),
- ('ⳛ', 'ⳛ'),
- ('ⳝ', 'ⳝ'),
- ('ⳟ', 'ⳟ'),
- ('ⳡ', 'ⳡ'),
- ('ⳣ', 'ⳤ'),
- ('ⳬ', 'ⳬ'),
- ('ⳮ', 'ⳮ'),
- ('ⳳ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ꙁ', 'ꙁ'),
- ('ꙃ', 'ꙃ'),
- ('ꙅ', 'ꙅ'),
- ('ꙇ', 'ꙇ'),
- ('ꙉ', 'ꙉ'),
- ('ꙋ', 'ꙋ'),
- ('ꙍ', 'ꙍ'),
- ('ꙏ', 'ꙏ'),
- ('ꙑ', 'ꙑ'),
- ('ꙓ', 'ꙓ'),
- ('ꙕ', 'ꙕ'),
- ('ꙗ', 'ꙗ'),
- ('ꙙ', 'ꙙ'),
- ('ꙛ', 'ꙛ'),
- ('ꙝ', 'ꙝ'),
- ('ꙟ', 'ꙟ'),
- ('ꙡ', 'ꙡ'),
- ('ꙣ', 'ꙣ'),
- ('ꙥ', 'ꙥ'),
- ('ꙧ', 'ꙧ'),
- ('ꙩ', 'ꙩ'),
- ('ꙫ', 'ꙫ'),
- ('ꙭ', 'ꙭ'),
- ('ꚁ', 'ꚁ'),
- ('ꚃ', 'ꚃ'),
- ('ꚅ', 'ꚅ'),
- ('ꚇ', 'ꚇ'),
- ('ꚉ', 'ꚉ'),
- ('ꚋ', 'ꚋ'),
- ('ꚍ', 'ꚍ'),
- ('ꚏ', 'ꚏ'),
- ('ꚑ', 'ꚑ'),
- ('ꚓ', 'ꚓ'),
- ('ꚕ', 'ꚕ'),
- ('ꚗ', 'ꚗ'),
- ('ꚙ', 'ꚙ'),
- ('ꚛ', 'ꚝ'),
- ('ꜣ', 'ꜣ'),
- ('ꜥ', 'ꜥ'),
- ('ꜧ', 'ꜧ'),
- ('ꜩ', 'ꜩ'),
- ('ꜫ', 'ꜫ'),
- ('ꜭ', 'ꜭ'),
- ('ꜯ', 'ꜱ'),
- ('ꜳ', 'ꜳ'),
- ('ꜵ', 'ꜵ'),
- ('ꜷ', 'ꜷ'),
- ('ꜹ', 'ꜹ'),
- ('ꜻ', 'ꜻ'),
- ('ꜽ', 'ꜽ'),
- ('ꜿ', 'ꜿ'),
- ('ꝁ', 'ꝁ'),
- ('ꝃ', 'ꝃ'),
- ('ꝅ', 'ꝅ'),
- ('ꝇ', 'ꝇ'),
- ('ꝉ', 'ꝉ'),
- ('ꝋ', 'ꝋ'),
- ('ꝍ', 'ꝍ'),
- ('ꝏ', 'ꝏ'),
- ('ꝑ', 'ꝑ'),
- ('ꝓ', 'ꝓ'),
- ('ꝕ', 'ꝕ'),
- ('ꝗ', 'ꝗ'),
- ('ꝙ', 'ꝙ'),
- ('ꝛ', 'ꝛ'),
- ('ꝝ', 'ꝝ'),
- ('ꝟ', 'ꝟ'),
- ('ꝡ', 'ꝡ'),
- ('ꝣ', 'ꝣ'),
- ('ꝥ', 'ꝥ'),
- ('ꝧ', 'ꝧ'),
- ('ꝩ', 'ꝩ'),
- ('ꝫ', 'ꝫ'),
- ('ꝭ', 'ꝭ'),
- ('ꝯ', 'ꝸ'),
- ('ꝺ', 'ꝺ'),
- ('ꝼ', 'ꝼ'),
- ('ꝿ', 'ꝿ'),
- ('ꞁ', 'ꞁ'),
- ('ꞃ', 'ꞃ'),
- ('ꞅ', 'ꞅ'),
- ('ꞇ', 'ꞇ'),
- ('ꞌ', 'ꞌ'),
- ('ꞎ', 'ꞎ'),
- ('ꞑ', 'ꞑ'),
- ('ꞓ', 'ꞕ'),
- ('ꞗ', 'ꞗ'),
- ('ꞙ', 'ꞙ'),
- ('ꞛ', 'ꞛ'),
- ('ꞝ', 'ꞝ'),
- ('ꞟ', 'ꞟ'),
- ('ꞡ', 'ꞡ'),
- ('ꞣ', 'ꞣ'),
- ('ꞥ', 'ꞥ'),
- ('ꞧ', 'ꞧ'),
- ('ꞩ', 'ꞩ'),
- ('ꞯ', 'ꞯ'),
- ('ꞵ', 'ꞵ'),
- ('ꞷ', 'ꞷ'),
- ('ꞹ', 'ꞹ'),
- ('ꞻ', 'ꞻ'),
- ('ꞽ', 'ꞽ'),
- ('ꞿ', 'ꞿ'),
- ('ꟁ', 'ꟁ'),
- ('ꟃ', 'ꟃ'),
- ('ꟈ', 'ꟈ'),
- ('ꟊ', 'ꟊ'),
- ('ꟍ', 'ꟍ'),
- ('ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'ꟕ'),
- ('ꟗ', 'ꟗ'),
- ('ꟙ', 'ꟙ'),
- ('ꟛ', 'ꟛ'),
- ('ꟲ', 'ꟴ'),
- ('ꟶ', 'ꟶ'),
- ('ꟸ', 'ꟺ'),
- ('ꬰ', 'ꭚ'),
- ('ꭜ', 'ꭩ'),
- ('ꭰ', 'ꮿ'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('a', 'z'),
- ('𐐨', '𐑏'),
- ('𐓘', '𐓻'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐞀', '𐞀'),
- ('𐞃', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐳀', '𐳲'),
- ('𐵰', '𐶅'),
- ('𑣀', '𑣟'),
- ('𖹠', '𖹿'),
- ('𝐚', '𝐳'),
- ('𝑎', '𝑔'),
- ('𝑖', '𝑧'),
- ('𝒂', '𝒛'),
- ('𝒶', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝓏'),
- ('𝓪', '𝔃'),
- ('𝔞', '𝔷'),
- ('𝕒', '𝕫'),
- ('𝖆', '𝖟'),
- ('𝖺', '𝗓'),
- ('𝗮', '𝘇'),
- ('𝘢', '𝘻'),
- ('𝙖', '𝙯'),
- ('𝚊', '𝚥'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛡'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜛'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝕'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞏'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟉'),
- ('𝟋', '𝟋'),
- ('𝼀', '𝼉'),
- ('𝼋', '𝼞'),
- ('𝼥', '𝼪'),
- ('𞀰', '𞁭'),
- ('𞤢', '𞥃'),
-];
-
-pub const NUMERIC: &'static [(char, char)] = &[
- ('0', '9'),
- ('\u{600}', '\u{605}'),
- ('٠', '٩'),
- ('٫', '٬'),
- ('\u{6dd}', '\u{6dd}'),
- ('۰', '۹'),
- ('߀', '߉'),
- ('\u{890}', '\u{891}'),
- ('\u{8e2}', '\u{8e2}'),
- ('०', '९'),
- ('০', '৯'),
- ('੦', '੯'),
- ('૦', '૯'),
- ('୦', '୯'),
- ('௦', '௯'),
- ('౦', '౯'),
- ('೦', '೯'),
- ('൦', '൯'),
- ('෦', '෯'),
- ('๐', '๙'),
- ('໐', '໙'),
- ('༠', '༩'),
- ('၀', '၉'),
- ('႐', '႙'),
- ('០', '៩'),
- ('᠐', '᠙'),
- ('᥆', '᥏'),
- ('᧐', '᧚'),
- ('᪀', '᪉'),
- ('᪐', '᪙'),
- ('᭐', '᭙'),
- ('᮰', '᮹'),
- ('᱀', '᱉'),
- ('᱐', '᱙'),
- ('꘠', '꘩'),
- ('꣐', '꣙'),
- ('꤀', '꤉'),
- ('꧐', '꧙'),
- ('꧰', '꧹'),
- ('꩐', '꩙'),
- ('꯰', '꯹'),
- ('0', '9'),
- ('𐒠', '𐒩'),
- ('𐴰', '𐴹'),
- ('𐵀', '𐵉'),
- ('𑁦', '𑁯'),
- ('\u{110bd}', '\u{110bd}'),
- ('\u{110cd}', '\u{110cd}'),
- ('𑃰', '𑃹'),
- ('𑄶', '𑄿'),
- ('𑇐', '𑇙'),
- ('𑋰', '𑋹'),
- ('𑑐', '𑑙'),
- ('𑓐', '𑓙'),
- ('𑙐', '𑙙'),
- ('𑛀', '𑛉'),
- ('𑛐', '𑛣'),
- ('𑜰', '𑜹'),
- ('𑣠', '𑣩'),
- ('𑥐', '𑥙'),
- ('𑯰', '𑯹'),
- ('𑱐', '𑱙'),
- ('𑵐', '𑵙'),
- ('𑶠', '𑶩'),
- ('𑽐', '𑽙'),
- ('𖄰', '𖄹'),
- ('𖩠', '𖩩'),
- ('𖫀', '𖫉'),
- ('𖭐', '𖭙'),
- ('𖵰', '𖵹'),
- ('𜳰', '𜳹'),
- ('𝟎', '𝟿'),
- ('𞅀', '𞅉'),
- ('𞋰', '𞋹'),
- ('𞓰', '𞓹'),
- ('𞗱', '𞗺'),
- ('𞥐', '𞥙'),
- ('🯰', '🯹'),
-];
-
-pub const OLETTER: &'static [(char, char)] = &[
- ('ƻ', 'ƻ'),
- ('ǀ', 'ǃ'),
- ('ʔ', 'ʔ'),
- ('ʹ', 'ʿ'),
- ('ˆ', 'ˑ'),
- ('ˬ', 'ˬ'),
- ('ˮ', 'ˮ'),
- ('ʹ', 'ʹ'),
- ('ՙ', 'ՙ'),
- ('א', 'ת'),
- ('ׯ', '׳'),
- ('ؠ', 'ي'),
- ('ٮ', 'ٯ'),
- ('ٱ', 'ۓ'),
- ('ە', 'ە'),
- ('ۥ', 'ۦ'),
- ('ۮ', 'ۯ'),
- ('ۺ', 'ۼ'),
- ('ۿ', 'ۿ'),
- ('ܐ', 'ܐ'),
- ('ܒ', 'ܯ'),
- ('ݍ', 'ޥ'),
- ('ޱ', 'ޱ'),
- ('ߊ', 'ߪ'),
- ('ߴ', 'ߵ'),
- ('ߺ', 'ߺ'),
- ('ࠀ', 'ࠕ'),
- ('ࠚ', 'ࠚ'),
- ('ࠤ', 'ࠤ'),
- ('ࠨ', 'ࠨ'),
- ('ࡀ', 'ࡘ'),
- ('ࡠ', 'ࡪ'),
- ('ࡰ', 'ࢇ'),
- ('ࢉ', 'ࢎ'),
- ('ࢠ', 'ࣉ'),
- ('ऄ', 'ह'),
- ('ऽ', 'ऽ'),
- ('ॐ', 'ॐ'),
- ('क़', 'ॡ'),
- ('ॱ', 'ঀ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('ঽ', 'ঽ'),
- ('ৎ', 'ৎ'),
- ('ড়', 'ঢ়'),
- ('য়', 'ৡ'),
- ('ৰ', 'ৱ'),
- ('ৼ', 'ৼ'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('ੲ', 'ੴ'),
- ('અ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('ઽ', 'ઽ'),
- ('ૐ', 'ૐ'),
- ('ૠ', 'ૡ'),
- ('ૹ', 'ૹ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('ଽ', 'ଽ'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', 'ୡ'),
- ('ୱ', 'ୱ'),
- ('ஃ', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'ஹ'),
- ('ௐ', 'ௐ'),
- ('అ', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'హ'),
- ('ఽ', 'ఽ'),
- ('ౘ', 'ౚ'),
- ('ౝ', 'ౝ'),
- ('ౠ', 'ౡ'),
- ('ಀ', 'ಀ'),
- ('ಅ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('ಽ', 'ಽ'),
- ('ೝ', 'ೞ'),
- ('ೠ', 'ೡ'),
- ('ೱ', 'ೲ'),
- ('ഄ', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', 'ഺ'),
- ('ഽ', 'ഽ'),
- ('ൎ', 'ൎ'),
- ('ൔ', 'ൖ'),
- ('ൟ', 'ൡ'),
- ('ൺ', 'ൿ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('ก', 'ะ'),
- ('า', 'ำ'),
- ('เ', 'ๆ'),
- ('ກ', 'ຂ'),
- ('ຄ', 'ຄ'),
- ('ຆ', 'ຊ'),
- ('ຌ', 'ຣ'),
- ('ລ', 'ລ'),
- ('ວ', 'ະ'),
- ('າ', 'ຳ'),
- ('ຽ', 'ຽ'),
- ('ເ', 'ໄ'),
- ('ໆ', 'ໆ'),
- ('ໜ', 'ໟ'),
- ('ༀ', 'ༀ'),
- ('ཀ', 'ཇ'),
- ('ཉ', 'ཬ'),
- ('ྈ', 'ྌ'),
- ('က', 'ဪ'),
- ('ဿ', 'ဿ'),
- ('ၐ', 'ၕ'),
- ('ၚ', 'ၝ'),
- ('ၡ', 'ၡ'),
- ('ၥ', 'ၦ'),
- ('ၮ', 'ၰ'),
- ('ၵ', 'ႁ'),
- ('ႎ', 'ႎ'),
- ('ა', 'ჺ'),
- ('ჽ', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዖ'),
- ('ዘ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ፚ'),
- ('ᎀ', 'ᎏ'),
- ('ᐁ', 'ᙬ'),
- ('ᙯ', 'ᙿ'),
- ('ᚁ', 'ᚚ'),
- ('ᚠ', 'ᛪ'),
- ('ᛮ', 'ᛸ'),
- ('ᜀ', 'ᜑ'),
- ('ᜟ', 'ᜱ'),
- ('ᝀ', 'ᝑ'),
- ('ᝠ', 'ᝬ'),
- ('ᝮ', 'ᝰ'),
- ('ក', 'ឳ'),
- ('ៗ', 'ៗ'),
- ('ៜ', 'ៜ'),
- ('ᠠ', 'ᡸ'),
- ('ᢀ', 'ᢄ'),
- ('ᢇ', 'ᢨ'),
- ('ᢪ', 'ᢪ'),
- ('ᢰ', 'ᣵ'),
- ('ᤀ', 'ᤞ'),
- ('ᥐ', 'ᥭ'),
- ('ᥰ', 'ᥴ'),
- ('ᦀ', 'ᦫ'),
- ('ᦰ', 'ᧉ'),
- ('ᨀ', 'ᨖ'),
- ('ᨠ', 'ᩔ'),
- ('ᪧ', 'ᪧ'),
- ('ᬅ', 'ᬳ'),
- ('ᭅ', 'ᭌ'),
- ('ᮃ', 'ᮠ'),
- ('ᮮ', 'ᮯ'),
- ('ᮺ', 'ᯥ'),
- ('ᰀ', 'ᰣ'),
- ('ᱍ', 'ᱏ'),
- ('ᱚ', 'ᱽ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('ᳩ', 'ᳬ'),
- ('ᳮ', 'ᳳ'),
- ('ᳵ', 'ᳶ'),
- ('ᳺ', 'ᳺ'),
- ('ℵ', 'ℸ'),
- ('ↀ', 'ↂ'),
- ('ↅ', 'ↈ'),
- ('ⴰ', 'ⵧ'),
- ('ⵯ', 'ⵯ'),
- ('ⶀ', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('ⸯ', 'ⸯ'),
- ('々', '〇'),
- ('〡', '〩'),
- ('〱', '〵'),
- ('〸', '〼'),
- ('ぁ', 'ゖ'),
- ('ゝ', 'ゟ'),
- ('ァ', 'ヺ'),
- ('ー', 'ヿ'),
- ('ㄅ', 'ㄯ'),
- ('ㄱ', 'ㆎ'),
- ('ㆠ', 'ㆿ'),
- ('ㇰ', 'ㇿ'),
- ('㐀', '䶿'),
- ('一', 'ꒌ'),
- ('ꓐ', 'ꓽ'),
- ('ꔀ', 'ꘌ'),
- ('ꘐ', 'ꘟ'),
- ('ꘪ', 'ꘫ'),
- ('ꙮ', 'ꙮ'),
- ('ꙿ', 'ꙿ'),
- ('ꚠ', 'ꛯ'),
- ('ꜗ', 'ꜟ'),
- ('ꞈ', 'ꞈ'),
- ('ꞏ', 'ꞏ'),
- ('ꟷ', 'ꟷ'),
- ('ꟻ', 'ꠁ'),
- ('ꠃ', 'ꠅ'),
- ('ꠇ', 'ꠊ'),
- ('ꠌ', 'ꠢ'),
- ('ꡀ', 'ꡳ'),
- ('ꢂ', 'ꢳ'),
- ('ꣲ', 'ꣷ'),
- ('ꣻ', 'ꣻ'),
- ('ꣽ', 'ꣾ'),
- ('ꤊ', 'ꤥ'),
- ('ꤰ', 'ꥆ'),
- ('ꥠ', 'ꥼ'),
- ('ꦄ', 'ꦲ'),
- ('ꧏ', 'ꧏ'),
- ('ꧠ', 'ꧤ'),
- ('ꧦ', 'ꧯ'),
- ('ꧺ', 'ꧾ'),
- ('ꨀ', 'ꨨ'),
- ('ꩀ', 'ꩂ'),
- ('ꩄ', 'ꩋ'),
- ('ꩠ', 'ꩶ'),
- ('ꩺ', 'ꩺ'),
- ('ꩾ', 'ꪯ'),
- ('ꪱ', 'ꪱ'),
- ('ꪵ', 'ꪶ'),
- ('ꪹ', 'ꪽ'),
- ('ꫀ', 'ꫀ'),
- ('ꫂ', 'ꫂ'),
- ('ꫛ', 'ꫝ'),
- ('ꫠ', 'ꫪ'),
- ('ꫲ', 'ꫴ'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('ꯀ', 'ꯢ'),
- ('가', '힣'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('豈', '舘'),
- ('並', '龎'),
- ('יִ', 'יִ'),
- ('ײַ', 'ﬨ'),
- ('שׁ', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', 'ﮱ'),
- ('ﯓ', 'ﴽ'),
- ('ﵐ', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('ﷰ', 'ﷻ'),
- ('ﹰ', 'ﹴ'),
- ('ﹶ', 'ﻼ'),
- ('ヲ', 'ン'),
- ('ᅠ', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
- ('𐅀', '𐅴'),
- ('𐊀', '𐊜'),
- ('𐊠', '𐋐'),
- ('𐌀', '𐌟'),
- ('𐌭', '𐍊'),
- ('𐍐', '𐍵'),
- ('𐎀', '𐎝'),
- ('𐎠', '𐏃'),
- ('𐏈', '𐏏'),
- ('𐏑', '𐏕'),
- ('𐑐', '𐒝'),
- ('𐔀', '𐔧'),
- ('𐔰', '𐕣'),
- ('𐗀', '𐗳'),
- ('𐘀', '𐜶'),
- ('𐝀', '𐝕'),
- ('𐝠', '𐝧'),
- ('𐞁', '𐞂'),
- ('𐠀', '𐠅'),
- ('𐠈', '𐠈'),
- ('𐠊', '𐠵'),
- ('𐠷', '𐠸'),
- ('𐠼', '𐠼'),
- ('𐠿', '𐡕'),
- ('𐡠', '𐡶'),
- ('𐢀', '𐢞'),
- ('𐣠', '𐣲'),
- ('𐣴', '𐣵'),
- ('𐤀', '𐤕'),
- ('𐤠', '𐤹'),
- ('𐦀', '𐦷'),
- ('𐦾', '𐦿'),
- ('𐨀', '𐨀'),
- ('𐨐', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨵'),
- ('𐩠', '𐩼'),
- ('𐪀', '𐪜'),
- ('𐫀', '𐫇'),
- ('𐫉', '𐫤'),
- ('𐬀', '𐬵'),
- ('𐭀', '𐭕'),
- ('𐭠', '𐭲'),
- ('𐮀', '𐮑'),
- ('𐰀', '𐱈'),
- ('𐴀', '𐴣'),
- ('𐵊', '𐵏'),
- ('𐵯', '𐵯'),
- ('𐺀', '𐺩'),
- ('𐺰', '𐺱'),
- ('𐻂', '𐻄'),
- ('𐼀', '𐼜'),
- ('𐼧', '𐼧'),
- ('𐼰', '𐽅'),
- ('𐽰', '𐾁'),
- ('𐾰', '𐿄'),
- ('𐿠', '𐿶'),
- ('𑀃', '𑀷'),
- ('𑁱', '𑁲'),
- ('𑁵', '𑁵'),
- ('𑂃', '𑂯'),
- ('𑃐', '𑃨'),
- ('𑄃', '𑄦'),
- ('𑅄', '𑅄'),
- ('𑅇', '𑅇'),
- ('𑅐', '𑅲'),
- ('𑅶', '𑅶'),
- ('𑆃', '𑆲'),
- ('𑇁', '𑇄'),
- ('𑇚', '𑇚'),
- ('𑇜', '𑇜'),
- ('𑈀', '𑈑'),
- ('𑈓', '𑈫'),
- ('𑈿', '𑉀'),
- ('𑊀', '𑊆'),
- ('𑊈', '𑊈'),
- ('𑊊', '𑊍'),
- ('𑊏', '𑊝'),
- ('𑊟', '𑊨'),
- ('𑊰', '𑋞'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('𑌽', '𑌽'),
- ('𑍐', '𑍐'),
- ('𑍝', '𑍡'),
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '𑎷'),
- ('𑏑', '𑏑'),
- ('𑏓', '𑏓'),
- ('𑐀', '𑐴'),
- ('𑑇', '𑑊'),
- ('𑑟', '𑑡'),
- ('𑒀', '𑒯'),
- ('𑓄', '𑓅'),
- ('𑓇', '𑓇'),
- ('𑖀', '𑖮'),
- ('𑗘', '𑗛'),
- ('𑘀', '𑘯'),
- ('𑙄', '𑙄'),
- ('𑚀', '𑚪'),
- ('𑚸', '𑚸'),
- ('𑜀', '𑜚'),
- ('𑝀', '𑝆'),
- ('𑠀', '𑠫'),
- ('𑣿', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤯'),
- ('𑤿', '𑤿'),
- ('𑥁', '𑥁'),
- ('𑦠', '𑦧'),
- ('𑦪', '𑧐'),
- ('𑧡', '𑧡'),
- ('𑧣', '𑧣'),
- ('𑨀', '𑨀'),
- ('𑨋', '𑨲'),
- ('𑨺', '𑨺'),
- ('𑩐', '𑩐'),
- ('𑩜', '𑪉'),
- ('𑪝', '𑪝'),
- ('𑪰', '𑫸'),
- ('𑯀', '𑯠'),
- ('𑰀', '𑰈'),
- ('𑰊', '𑰮'),
- ('𑱀', '𑱀'),
- ('𑱲', '𑲏'),
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '𑴰'),
- ('𑵆', '𑵆'),
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶉'),
- ('𑶘', '𑶘'),
- ('𑻠', '𑻲'),
- ('𑼂', '𑼂'),
- ('𑼄', '𑼐'),
- ('𑼒', '𑼳'),
- ('𑾰', '𑾰'),
- ('𒀀', '𒎙'),
- ('𒐀', '𒑮'),
- ('𒒀', '𒕃'),
- ('𒾐', '𒿰'),
- ('𓀀', '𓐯'),
- ('𓑁', '𓑆'),
- ('𓑠', '𔏺'),
- ('𔐀', '𔙆'),
- ('𖄀', '𖄝'),
- ('𖠀', '𖨸'),
- ('𖩀', '𖩞'),
- ('𖩰', '𖪾'),
- ('𖫐', '𖫭'),
- ('𖬀', '𖬯'),
- ('𖭀', '𖭃'),
- ('𖭣', '𖭷'),
- ('𖭽', '𖮏'),
- ('𖵀', '𖵬'),
- ('𖼀', '𖽊'),
- ('𖽐', '𖽐'),
- ('𖾓', '𖾟'),
- ('𖿠', '𖿡'),
- ('𖿣', '𖿣'),
- ('𗀀', '𘟷'),
- ('𘠀', '𘳕'),
- ('𘳿', '𘴈'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𛀀', '𛄢'),
- ('𛄲', '𛄲'),
- ('𛅐', '𛅒'),
- ('𛅕', '𛅕'),
- ('𛅤', '𛅧'),
- ('𛅰', '𛋻'),
- ('𛰀', '𛱪'),
- ('𛱰', '𛱼'),
- ('𛲀', '𛲈'),
- ('𛲐', '𛲙'),
- ('𝼊', '𝼊'),
- ('𞄀', '𞄬'),
- ('𞄷', '𞄽'),
- ('𞅎', '𞅎'),
- ('𞊐', '𞊭'),
- ('𞋀', '𞋫'),
- ('𞓐', '𞓫'),
- ('𞗐', '𞗭'),
- ('𞗰', '𞗰'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
- ('𞠀', '𞣄'),
- ('𞥋', '𞥋'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('𠀀', '𪛟'),
- ('𪜀', '𫜹'),
- ('𫝀', '𫠝'),
- ('𫠠', '𬺡'),
- ('𬺰', '𮯠'),
- ('𮯰', '𮹝'),
- ('丽', '𪘀'),
- ('𰀀', '𱍊'),
- ('𱍐', '𲎯'),
-];
-
-pub const SCONTINUE: &'static [(char, char)] = &[
- (',', '-'),
- (':', ';'),
- (';', ';'),
- ('՝', '՝'),
- ('،', '؍'),
- ('߸', '߸'),
- ('᠂', '᠂'),
- ('᠈', '᠈'),
- ('–', '—'),
- ('、', '、'),
- ('︐', '︑'),
- ('︓', '︔'),
- ('︱', '︲'),
- ('﹐', '﹑'),
- ('﹔', '﹕'),
- ('﹘', '﹘'),
- ('﹣', '﹣'),
- (',', '-'),
- (':', ';'),
- ('、', '、'),
-];
-
-pub const STERM: &'static [(char, char)] = &[
- ('!', '!'),
- ('?', '?'),
- ('։', '։'),
- ('؝', '؟'),
- ('۔', '۔'),
- ('܀', '܂'),
- ('߹', '߹'),
- ('࠷', '࠷'),
- ('࠹', '࠹'),
- ('࠽', '࠾'),
- ('।', '॥'),
- ('၊', '။'),
- ('።', '።'),
- ('፧', '፨'),
- ('᙮', '᙮'),
- ('᜵', '᜶'),
- ('។', '៕'),
- ('᠃', '᠃'),
- ('᠉', '᠉'),
- ('᥄', '᥅'),
- ('᪨', '᪫'),
- ('᭎', '᭏'),
- ('᭚', '᭛'),
- ('᭞', '᭟'),
- ('᭽', '᭿'),
- ('᰻', '᰼'),
- ('᱾', '᱿'),
- ('‼', '‽'),
- ('⁇', '⁉'),
- ('⳹', '⳻'),
- ('⸮', '⸮'),
- ('⸼', '⸼'),
- ('⹓', '⹔'),
- ('。', '。'),
- ('꓿', '꓿'),
- ('꘎', '꘏'),
- ('꛳', '꛳'),
- ('꛷', '꛷'),
- ('꡶', '꡷'),
- ('꣎', '꣏'),
- ('꤯', '꤯'),
- ('꧈', '꧉'),
- ('꩝', '꩟'),
- ('꫰', '꫱'),
- ('꯫', '꯫'),
- ('︒', '︒'),
- ('︕', '︖'),
- ('﹖', '﹗'),
- ('!', '!'),
- ('?', '?'),
- ('。', '。'),
- ('𐩖', '𐩗'),
- ('𐽕', '𐽙'),
- ('𐾆', '𐾉'),
- ('𑁇', '𑁈'),
- ('𑂾', '𑃁'),
- ('𑅁', '𑅃'),
- ('𑇅', '𑇆'),
- ('𑇍', '𑇍'),
- ('𑇞', '𑇟'),
- ('𑈸', '𑈹'),
- ('𑈻', '𑈼'),
- ('𑊩', '𑊩'),
- ('𑏔', '𑏕'),
- ('𑑋', '𑑌'),
- ('𑗂', '𑗃'),
- ('𑗉', '𑗗'),
- ('𑙁', '𑙂'),
- ('𑜼', '𑜾'),
- ('𑥄', '𑥄'),
- ('𑥆', '𑥆'),
- ('𑩂', '𑩃'),
- ('𑪛', '𑪜'),
- ('𑱁', '𑱂'),
- ('𑻷', '𑻸'),
- ('𑽃', '𑽄'),
- ('𖩮', '𖩯'),
- ('𖫵', '𖫵'),
- ('𖬷', '𖬸'),
- ('𖭄', '𖭄'),
- ('𖵮', '𖵯'),
- ('𖺘', '𖺘'),
- ('𛲟', '𛲟'),
- ('𝪈', '𝪈'),
-];
-
-pub const SEP: &'static [(char, char)] =
- &[('\u{85}', '\u{85}'), ('\u{2028}', '\u{2029}')];
-
-pub const SP: &'static [(char, char)] = &[
- ('\t', '\t'),
- ('\u{b}', '\u{c}'),
- (' ', ' '),
- ('\u{a0}', '\u{a0}'),
- ('\u{1680}', '\u{1680}'),
- ('\u{2000}', '\u{200a}'),
- ('\u{202f}', '\u{202f}'),
- ('\u{205f}', '\u{205f}'),
- ('\u{3000}', '\u{3000}'),
-];
-
-pub const UPPER: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('À', 'Ö'),
- ('Ø', 'Þ'),
- ('Ā', 'Ā'),
- ('Ă', 'Ă'),
- ('Ą', 'Ą'),
- ('Ć', 'Ć'),
- ('Ĉ', 'Ĉ'),
- ('Ċ', 'Ċ'),
- ('Č', 'Č'),
- ('Ď', 'Ď'),
- ('Đ', 'Đ'),
- ('Ē', 'Ē'),
- ('Ĕ', 'Ĕ'),
- ('Ė', 'Ė'),
- ('Ę', 'Ę'),
- ('Ě', 'Ě'),
- ('Ĝ', 'Ĝ'),
- ('Ğ', 'Ğ'),
- ('Ġ', 'Ġ'),
- ('Ģ', 'Ģ'),
- ('Ĥ', 'Ĥ'),
- ('Ħ', 'Ħ'),
- ('Ĩ', 'Ĩ'),
- ('Ī', 'Ī'),
- ('Ĭ', 'Ĭ'),
- ('Į', 'Į'),
- ('İ', 'İ'),
- ('IJ', 'IJ'),
- ('Ĵ', 'Ĵ'),
- ('Ķ', 'Ķ'),
- ('Ĺ', 'Ĺ'),
- ('Ļ', 'Ļ'),
- ('Ľ', 'Ľ'),
- ('Ŀ', 'Ŀ'),
- ('Ł', 'Ł'),
- ('Ń', 'Ń'),
- ('Ņ', 'Ņ'),
- ('Ň', 'Ň'),
- ('Ŋ', 'Ŋ'),
- ('Ō', 'Ō'),
- ('Ŏ', 'Ŏ'),
- ('Ő', 'Ő'),
- ('Œ', 'Œ'),
- ('Ŕ', 'Ŕ'),
- ('Ŗ', 'Ŗ'),
- ('Ř', 'Ř'),
- ('Ś', 'Ś'),
- ('Ŝ', 'Ŝ'),
- ('Ş', 'Ş'),
- ('Š', 'Š'),
- ('Ţ', 'Ţ'),
- ('Ť', 'Ť'),
- ('Ŧ', 'Ŧ'),
- ('Ũ', 'Ũ'),
- ('Ū', 'Ū'),
- ('Ŭ', 'Ŭ'),
- ('Ů', 'Ů'),
- ('Ű', 'Ű'),
- ('Ų', 'Ų'),
- ('Ŵ', 'Ŵ'),
- ('Ŷ', 'Ŷ'),
- ('Ÿ', 'Ź'),
- ('Ż', 'Ż'),
- ('Ž', 'Ž'),
- ('Ɓ', 'Ƃ'),
- ('Ƅ', 'Ƅ'),
- ('Ɔ', 'Ƈ'),
- ('Ɖ', 'Ƌ'),
- ('Ǝ', 'Ƒ'),
- ('Ɠ', 'Ɣ'),
- ('Ɩ', 'Ƙ'),
- ('Ɯ', 'Ɲ'),
- ('Ɵ', 'Ơ'),
- ('Ƣ', 'Ƣ'),
- ('Ƥ', 'Ƥ'),
- ('Ʀ', 'Ƨ'),
- ('Ʃ', 'Ʃ'),
- ('Ƭ', 'Ƭ'),
- ('Ʈ', 'Ư'),
- ('Ʊ', 'Ƴ'),
- ('Ƶ', 'Ƶ'),
- ('Ʒ', 'Ƹ'),
- ('Ƽ', 'Ƽ'),
- ('DŽ', 'Dž'),
- ('LJ', 'Lj'),
- ('NJ', 'Nj'),
- ('Ǎ', 'Ǎ'),
- ('Ǐ', 'Ǐ'),
- ('Ǒ', 'Ǒ'),
- ('Ǔ', 'Ǔ'),
- ('Ǖ', 'Ǖ'),
- ('Ǘ', 'Ǘ'),
- ('Ǚ', 'Ǚ'),
- ('Ǜ', 'Ǜ'),
- ('Ǟ', 'Ǟ'),
- ('Ǡ', 'Ǡ'),
- ('Ǣ', 'Ǣ'),
- ('Ǥ', 'Ǥ'),
- ('Ǧ', 'Ǧ'),
- ('Ǩ', 'Ǩ'),
- ('Ǫ', 'Ǫ'),
- ('Ǭ', 'Ǭ'),
- ('Ǯ', 'Ǯ'),
- ('DZ', 'Dz'),
- ('Ǵ', 'Ǵ'),
- ('Ƕ', 'Ǹ'),
- ('Ǻ', 'Ǻ'),
- ('Ǽ', 'Ǽ'),
- ('Ǿ', 'Ǿ'),
- ('Ȁ', 'Ȁ'),
- ('Ȃ', 'Ȃ'),
- ('Ȅ', 'Ȅ'),
- ('Ȇ', 'Ȇ'),
- ('Ȉ', 'Ȉ'),
- ('Ȋ', 'Ȋ'),
- ('Ȍ', 'Ȍ'),
- ('Ȏ', 'Ȏ'),
- ('Ȑ', 'Ȑ'),
- ('Ȓ', 'Ȓ'),
- ('Ȕ', 'Ȕ'),
- ('Ȗ', 'Ȗ'),
- ('Ș', 'Ș'),
- ('Ț', 'Ț'),
- ('Ȝ', 'Ȝ'),
- ('Ȟ', 'Ȟ'),
- ('Ƞ', 'Ƞ'),
- ('Ȣ', 'Ȣ'),
- ('Ȥ', 'Ȥ'),
- ('Ȧ', 'Ȧ'),
- ('Ȩ', 'Ȩ'),
- ('Ȫ', 'Ȫ'),
- ('Ȭ', 'Ȭ'),
- ('Ȯ', 'Ȯ'),
- ('Ȱ', 'Ȱ'),
- ('Ȳ', 'Ȳ'),
- ('Ⱥ', 'Ȼ'),
- ('Ƚ', 'Ⱦ'),
- ('Ɂ', 'Ɂ'),
- ('Ƀ', 'Ɇ'),
- ('Ɉ', 'Ɉ'),
- ('Ɋ', 'Ɋ'),
- ('Ɍ', 'Ɍ'),
- ('Ɏ', 'Ɏ'),
- ('Ͱ', 'Ͱ'),
- ('Ͳ', 'Ͳ'),
- ('Ͷ', 'Ͷ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ώ'),
- ('Α', 'Ρ'),
- ('Σ', 'Ϋ'),
- ('Ϗ', 'Ϗ'),
- ('ϒ', 'ϔ'),
- ('Ϙ', 'Ϙ'),
- ('Ϛ', 'Ϛ'),
- ('Ϝ', 'Ϝ'),
- ('Ϟ', 'Ϟ'),
- ('Ϡ', 'Ϡ'),
- ('Ϣ', 'Ϣ'),
- ('Ϥ', 'Ϥ'),
- ('Ϧ', 'Ϧ'),
- ('Ϩ', 'Ϩ'),
- ('Ϫ', 'Ϫ'),
- ('Ϭ', 'Ϭ'),
- ('Ϯ', 'Ϯ'),
- ('ϴ', 'ϴ'),
- ('Ϸ', 'Ϸ'),
- ('Ϲ', 'Ϻ'),
- ('Ͻ', 'Я'),
- ('Ѡ', 'Ѡ'),
- ('Ѣ', 'Ѣ'),
- ('Ѥ', 'Ѥ'),
- ('Ѧ', 'Ѧ'),
- ('Ѩ', 'Ѩ'),
- ('Ѫ', 'Ѫ'),
- ('Ѭ', 'Ѭ'),
- ('Ѯ', 'Ѯ'),
- ('Ѱ', 'Ѱ'),
- ('Ѳ', 'Ѳ'),
- ('Ѵ', 'Ѵ'),
- ('Ѷ', 'Ѷ'),
- ('Ѹ', 'Ѹ'),
- ('Ѻ', 'Ѻ'),
- ('Ѽ', 'Ѽ'),
- ('Ѿ', 'Ѿ'),
- ('Ҁ', 'Ҁ'),
- ('Ҋ', 'Ҋ'),
- ('Ҍ', 'Ҍ'),
- ('Ҏ', 'Ҏ'),
- ('Ґ', 'Ґ'),
- ('Ғ', 'Ғ'),
- ('Ҕ', 'Ҕ'),
- ('Җ', 'Җ'),
- ('Ҙ', 'Ҙ'),
- ('Қ', 'Қ'),
- ('Ҝ', 'Ҝ'),
- ('Ҟ', 'Ҟ'),
- ('Ҡ', 'Ҡ'),
- ('Ң', 'Ң'),
- ('Ҥ', 'Ҥ'),
- ('Ҧ', 'Ҧ'),
- ('Ҩ', 'Ҩ'),
- ('Ҫ', 'Ҫ'),
- ('Ҭ', 'Ҭ'),
- ('Ү', 'Ү'),
- ('Ұ', 'Ұ'),
- ('Ҳ', 'Ҳ'),
- ('Ҵ', 'Ҵ'),
- ('Ҷ', 'Ҷ'),
- ('Ҹ', 'Ҹ'),
- ('Һ', 'Һ'),
- ('Ҽ', 'Ҽ'),
- ('Ҿ', 'Ҿ'),
- ('Ӏ', 'Ӂ'),
- ('Ӄ', 'Ӄ'),
- ('Ӆ', 'Ӆ'),
- ('Ӈ', 'Ӈ'),
- ('Ӊ', 'Ӊ'),
- ('Ӌ', 'Ӌ'),
- ('Ӎ', 'Ӎ'),
- ('Ӑ', 'Ӑ'),
- ('Ӓ', 'Ӓ'),
- ('Ӕ', 'Ӕ'),
- ('Ӗ', 'Ӗ'),
- ('Ә', 'Ә'),
- ('Ӛ', 'Ӛ'),
- ('Ӝ', 'Ӝ'),
- ('Ӟ', 'Ӟ'),
- ('Ӡ', 'Ӡ'),
- ('Ӣ', 'Ӣ'),
- ('Ӥ', 'Ӥ'),
- ('Ӧ', 'Ӧ'),
- ('Ө', 'Ө'),
- ('Ӫ', 'Ӫ'),
- ('Ӭ', 'Ӭ'),
- ('Ӯ', 'Ӯ'),
- ('Ӱ', 'Ӱ'),
- ('Ӳ', 'Ӳ'),
- ('Ӵ', 'Ӵ'),
- ('Ӷ', 'Ӷ'),
- ('Ӹ', 'Ӹ'),
- ('Ӻ', 'Ӻ'),
- ('Ӽ', 'Ӽ'),
- ('Ӿ', 'Ӿ'),
- ('Ԁ', 'Ԁ'),
- ('Ԃ', 'Ԃ'),
- ('Ԅ', 'Ԅ'),
- ('Ԇ', 'Ԇ'),
- ('Ԉ', 'Ԉ'),
- ('Ԋ', 'Ԋ'),
- ('Ԍ', 'Ԍ'),
- ('Ԏ', 'Ԏ'),
- ('Ԑ', 'Ԑ'),
- ('Ԓ', 'Ԓ'),
- ('Ԕ', 'Ԕ'),
- ('Ԗ', 'Ԗ'),
- ('Ԙ', 'Ԙ'),
- ('Ԛ', 'Ԛ'),
- ('Ԝ', 'Ԝ'),
- ('Ԟ', 'Ԟ'),
- ('Ԡ', 'Ԡ'),
- ('Ԣ', 'Ԣ'),
- ('Ԥ', 'Ԥ'),
- ('Ԧ', 'Ԧ'),
- ('Ԩ', 'Ԩ'),
- ('Ԫ', 'Ԫ'),
- ('Ԭ', 'Ԭ'),
- ('Ԯ', 'Ԯ'),
- ('Ա', 'Ֆ'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('Ꭰ', 'Ᏽ'),
- ('Ᲊ', 'Ᲊ'),
- ('Ḁ', 'Ḁ'),
- ('Ḃ', 'Ḃ'),
- ('Ḅ', 'Ḅ'),
- ('Ḇ', 'Ḇ'),
- ('Ḉ', 'Ḉ'),
- ('Ḋ', 'Ḋ'),
- ('Ḍ', 'Ḍ'),
- ('Ḏ', 'Ḏ'),
- ('Ḑ', 'Ḑ'),
- ('Ḓ', 'Ḓ'),
- ('Ḕ', 'Ḕ'),
- ('Ḗ', 'Ḗ'),
- ('Ḙ', 'Ḙ'),
- ('Ḛ', 'Ḛ'),
- ('Ḝ', 'Ḝ'),
- ('Ḟ', 'Ḟ'),
- ('Ḡ', 'Ḡ'),
- ('Ḣ', 'Ḣ'),
- ('Ḥ', 'Ḥ'),
- ('Ḧ', 'Ḧ'),
- ('Ḩ', 'Ḩ'),
- ('Ḫ', 'Ḫ'),
- ('Ḭ', 'Ḭ'),
- ('Ḯ', 'Ḯ'),
- ('Ḱ', 'Ḱ'),
- ('Ḳ', 'Ḳ'),
- ('Ḵ', 'Ḵ'),
- ('Ḷ', 'Ḷ'),
- ('Ḹ', 'Ḹ'),
- ('Ḻ', 'Ḻ'),
- ('Ḽ', 'Ḽ'),
- ('Ḿ', 'Ḿ'),
- ('Ṁ', 'Ṁ'),
- ('Ṃ', 'Ṃ'),
- ('Ṅ', 'Ṅ'),
- ('Ṇ', 'Ṇ'),
- ('Ṉ', 'Ṉ'),
- ('Ṋ', 'Ṋ'),
- ('Ṍ', 'Ṍ'),
- ('Ṏ', 'Ṏ'),
- ('Ṑ', 'Ṑ'),
- ('Ṓ', 'Ṓ'),
- ('Ṕ', 'Ṕ'),
- ('Ṗ', 'Ṗ'),
- ('Ṙ', 'Ṙ'),
- ('Ṛ', 'Ṛ'),
- ('Ṝ', 'Ṝ'),
- ('Ṟ', 'Ṟ'),
- ('Ṡ', 'Ṡ'),
- ('Ṣ', 'Ṣ'),
- ('Ṥ', 'Ṥ'),
- ('Ṧ', 'Ṧ'),
- ('Ṩ', 'Ṩ'),
- ('Ṫ', 'Ṫ'),
- ('Ṭ', 'Ṭ'),
- ('Ṯ', 'Ṯ'),
- ('Ṱ', 'Ṱ'),
- ('Ṳ', 'Ṳ'),
- ('Ṵ', 'Ṵ'),
- ('Ṷ', 'Ṷ'),
- ('Ṹ', 'Ṹ'),
- ('Ṻ', 'Ṻ'),
- ('Ṽ', 'Ṽ'),
- ('Ṿ', 'Ṿ'),
- ('Ẁ', 'Ẁ'),
- ('Ẃ', 'Ẃ'),
- ('Ẅ', 'Ẅ'),
- ('Ẇ', 'Ẇ'),
- ('Ẉ', 'Ẉ'),
- ('Ẋ', 'Ẋ'),
- ('Ẍ', 'Ẍ'),
- ('Ẏ', 'Ẏ'),
- ('Ẑ', 'Ẑ'),
- ('Ẓ', 'Ẓ'),
- ('Ẕ', 'Ẕ'),
- ('ẞ', 'ẞ'),
- ('Ạ', 'Ạ'),
- ('Ả', 'Ả'),
- ('Ấ', 'Ấ'),
- ('Ầ', 'Ầ'),
- ('Ẩ', 'Ẩ'),
- ('Ẫ', 'Ẫ'),
- ('Ậ', 'Ậ'),
- ('Ắ', 'Ắ'),
- ('Ằ', 'Ằ'),
- ('Ẳ', 'Ẳ'),
- ('Ẵ', 'Ẵ'),
- ('Ặ', 'Ặ'),
- ('Ẹ', 'Ẹ'),
- ('Ẻ', 'Ẻ'),
- ('Ẽ', 'Ẽ'),
- ('Ế', 'Ế'),
- ('Ề', 'Ề'),
- ('Ể', 'Ể'),
- ('Ễ', 'Ễ'),
- ('Ệ', 'Ệ'),
- ('Ỉ', 'Ỉ'),
- ('Ị', 'Ị'),
- ('Ọ', 'Ọ'),
- ('Ỏ', 'Ỏ'),
- ('Ố', 'Ố'),
- ('Ồ', 'Ồ'),
- ('Ổ', 'Ổ'),
- ('Ỗ', 'Ỗ'),
- ('Ộ', 'Ộ'),
- ('Ớ', 'Ớ'),
- ('Ờ', 'Ờ'),
- ('Ở', 'Ở'),
- ('Ỡ', 'Ỡ'),
- ('Ợ', 'Ợ'),
- ('Ụ', 'Ụ'),
- ('Ủ', 'Ủ'),
- ('Ứ', 'Ứ'),
- ('Ừ', 'Ừ'),
- ('Ử', 'Ử'),
- ('Ữ', 'Ữ'),
- ('Ự', 'Ự'),
- ('Ỳ', 'Ỳ'),
- ('Ỵ', 'Ỵ'),
- ('Ỷ', 'Ỷ'),
- ('Ỹ', 'Ỹ'),
- ('Ỻ', 'Ỻ'),
- ('Ỽ', 'Ỽ'),
- ('Ỿ', 'Ỿ'),
- ('Ἀ', 'Ἇ'),
- ('Ἐ', 'Ἕ'),
- ('Ἠ', 'Ἧ'),
- ('Ἰ', 'Ἷ'),
- ('Ὀ', 'Ὅ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'Ὗ'),
- ('Ὠ', 'Ὧ'),
- ('ᾈ', 'ᾏ'),
- ('ᾘ', 'ᾟ'),
- ('ᾨ', 'ᾯ'),
- ('Ᾰ', 'ᾼ'),
- ('Ὲ', 'ῌ'),
- ('Ῐ', 'Ί'),
- ('Ῠ', 'Ῥ'),
- ('Ὸ', 'ῼ'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℋ', 'ℍ'),
- ('ℐ', 'ℒ'),
- ('ℕ', 'ℕ'),
- ('ℙ', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('Ω', 'Ω'),
- ('ℨ', 'ℨ'),
- ('K', 'ℭ'),
- ('ℰ', 'ℳ'),
- ('ℾ', 'ℿ'),
- ('ⅅ', 'ⅅ'),
- ('Ⅰ', 'Ⅿ'),
- ('Ↄ', 'Ↄ'),
- ('Ⓐ', 'Ⓩ'),
- ('Ⰰ', 'Ⱟ'),
- ('Ⱡ', 'Ⱡ'),
- ('Ɫ', 'Ɽ'),
- ('Ⱨ', 'Ⱨ'),
- ('Ⱪ', 'Ⱪ'),
- ('Ⱬ', 'Ⱬ'),
- ('Ɑ', 'Ɒ'),
- ('Ⱳ', 'Ⱳ'),
- ('Ⱶ', 'Ⱶ'),
- ('Ȿ', 'Ⲁ'),
- ('Ⲃ', 'Ⲃ'),
- ('Ⲅ', 'Ⲅ'),
- ('Ⲇ', 'Ⲇ'),
- ('Ⲉ', 'Ⲉ'),
- ('Ⲋ', 'Ⲋ'),
- ('Ⲍ', 'Ⲍ'),
- ('Ⲏ', 'Ⲏ'),
- ('Ⲑ', 'Ⲑ'),
- ('Ⲓ', 'Ⲓ'),
- ('Ⲕ', 'Ⲕ'),
- ('Ⲗ', 'Ⲗ'),
- ('Ⲙ', 'Ⲙ'),
- ('Ⲛ', 'Ⲛ'),
- ('Ⲝ', 'Ⲝ'),
- ('Ⲟ', 'Ⲟ'),
- ('Ⲡ', 'Ⲡ'),
- ('Ⲣ', 'Ⲣ'),
- ('Ⲥ', 'Ⲥ'),
- ('Ⲧ', 'Ⲧ'),
- ('Ⲩ', 'Ⲩ'),
- ('Ⲫ', 'Ⲫ'),
- ('Ⲭ', 'Ⲭ'),
- ('Ⲯ', 'Ⲯ'),
- ('Ⲱ', 'Ⲱ'),
- ('Ⲳ', 'Ⲳ'),
- ('Ⲵ', 'Ⲵ'),
- ('Ⲷ', 'Ⲷ'),
- ('Ⲹ', 'Ⲹ'),
- ('Ⲻ', 'Ⲻ'),
- ('Ⲽ', 'Ⲽ'),
- ('Ⲿ', 'Ⲿ'),
- ('Ⳁ', 'Ⳁ'),
- ('Ⳃ', 'Ⳃ'),
- ('Ⳅ', 'Ⳅ'),
- ('Ⳇ', 'Ⳇ'),
- ('Ⳉ', 'Ⳉ'),
- ('Ⳋ', 'Ⳋ'),
- ('Ⳍ', 'Ⳍ'),
- ('Ⳏ', 'Ⳏ'),
- ('Ⳑ', 'Ⳑ'),
- ('Ⳓ', 'Ⳓ'),
- ('Ⳕ', 'Ⳕ'),
- ('Ⳗ', 'Ⳗ'),
- ('Ⳙ', 'Ⳙ'),
- ('Ⳛ', 'Ⳛ'),
- ('Ⳝ', 'Ⳝ'),
- ('Ⳟ', 'Ⳟ'),
- ('Ⳡ', 'Ⳡ'),
- ('Ⳣ', 'Ⳣ'),
- ('Ⳬ', 'Ⳬ'),
- ('Ⳮ', 'Ⳮ'),
- ('Ⳳ', 'Ⳳ'),
- ('Ꙁ', 'Ꙁ'),
- ('Ꙃ', 'Ꙃ'),
- ('Ꙅ', 'Ꙅ'),
- ('Ꙇ', 'Ꙇ'),
- ('Ꙉ', 'Ꙉ'),
- ('Ꙋ', 'Ꙋ'),
- ('Ꙍ', 'Ꙍ'),
- ('Ꙏ', 'Ꙏ'),
- ('Ꙑ', 'Ꙑ'),
- ('Ꙓ', 'Ꙓ'),
- ('Ꙕ', 'Ꙕ'),
- ('Ꙗ', 'Ꙗ'),
- ('Ꙙ', 'Ꙙ'),
- ('Ꙛ', 'Ꙛ'),
- ('Ꙝ', 'Ꙝ'),
- ('Ꙟ', 'Ꙟ'),
- ('Ꙡ', 'Ꙡ'),
- ('Ꙣ', 'Ꙣ'),
- ('Ꙥ', 'Ꙥ'),
- ('Ꙧ', 'Ꙧ'),
- ('Ꙩ', 'Ꙩ'),
- ('Ꙫ', 'Ꙫ'),
- ('Ꙭ', 'Ꙭ'),
- ('Ꚁ', 'Ꚁ'),
- ('Ꚃ', 'Ꚃ'),
- ('Ꚅ', 'Ꚅ'),
- ('Ꚇ', 'Ꚇ'),
- ('Ꚉ', 'Ꚉ'),
- ('Ꚋ', 'Ꚋ'),
- ('Ꚍ', 'Ꚍ'),
- ('Ꚏ', 'Ꚏ'),
- ('Ꚑ', 'Ꚑ'),
- ('Ꚓ', 'Ꚓ'),
- ('Ꚕ', 'Ꚕ'),
- ('Ꚗ', 'Ꚗ'),
- ('Ꚙ', 'Ꚙ'),
- ('Ꚛ', 'Ꚛ'),
- ('Ꜣ', 'Ꜣ'),
- ('Ꜥ', 'Ꜥ'),
- ('Ꜧ', 'Ꜧ'),
- ('Ꜩ', 'Ꜩ'),
- ('Ꜫ', 'Ꜫ'),
- ('Ꜭ', 'Ꜭ'),
- ('Ꜯ', 'Ꜯ'),
- ('Ꜳ', 'Ꜳ'),
- ('Ꜵ', 'Ꜵ'),
- ('Ꜷ', 'Ꜷ'),
- ('Ꜹ', 'Ꜹ'),
- ('Ꜻ', 'Ꜻ'),
- ('Ꜽ', 'Ꜽ'),
- ('Ꜿ', 'Ꜿ'),
- ('Ꝁ', 'Ꝁ'),
- ('Ꝃ', 'Ꝃ'),
- ('Ꝅ', 'Ꝅ'),
- ('Ꝇ', 'Ꝇ'),
- ('Ꝉ', 'Ꝉ'),
- ('Ꝋ', 'Ꝋ'),
- ('Ꝍ', 'Ꝍ'),
- ('Ꝏ', 'Ꝏ'),
- ('Ꝑ', 'Ꝑ'),
- ('Ꝓ', 'Ꝓ'),
- ('Ꝕ', 'Ꝕ'),
- ('Ꝗ', 'Ꝗ'),
- ('Ꝙ', 'Ꝙ'),
- ('Ꝛ', 'Ꝛ'),
- ('Ꝝ', 'Ꝝ'),
- ('Ꝟ', 'Ꝟ'),
- ('Ꝡ', 'Ꝡ'),
- ('Ꝣ', 'Ꝣ'),
- ('Ꝥ', 'Ꝥ'),
- ('Ꝧ', 'Ꝧ'),
- ('Ꝩ', 'Ꝩ'),
- ('Ꝫ', 'Ꝫ'),
- ('Ꝭ', 'Ꝭ'),
- ('Ꝯ', 'Ꝯ'),
- ('Ꝺ', 'Ꝺ'),
- ('Ꝼ', 'Ꝼ'),
- ('Ᵹ', 'Ꝿ'),
- ('Ꞁ', 'Ꞁ'),
- ('Ꞃ', 'Ꞃ'),
- ('Ꞅ', 'Ꞅ'),
- ('Ꞇ', 'Ꞇ'),
- ('Ꞌ', 'Ꞌ'),
- ('Ɥ', 'Ɥ'),
- ('Ꞑ', 'Ꞑ'),
- ('Ꞓ', 'Ꞓ'),
- ('Ꞗ', 'Ꞗ'),
- ('Ꞙ', 'Ꞙ'),
- ('Ꞛ', 'Ꞛ'),
- ('Ꞝ', 'Ꞝ'),
- ('Ꞟ', 'Ꞟ'),
- ('Ꞡ', 'Ꞡ'),
- ('Ꞣ', 'Ꞣ'),
- ('Ꞥ', 'Ꞥ'),
- ('Ꞧ', 'Ꞧ'),
- ('Ꞩ', 'Ꞩ'),
- ('Ɦ', 'Ɪ'),
- ('Ʞ', 'Ꞵ'),
- ('Ꞷ', 'Ꞷ'),
- ('Ꞹ', 'Ꞹ'),
- ('Ꞻ', 'Ꞻ'),
- ('Ꞽ', 'Ꞽ'),
- ('Ꞿ', 'Ꞿ'),
- ('Ꟁ', 'Ꟁ'),
- ('Ꟃ', 'Ꟃ'),
- ('Ꞔ', 'Ꟈ'),
- ('Ꟊ', 'Ꟊ'),
- ('Ɤ', 'Ꟍ'),
- ('Ꟑ', 'Ꟑ'),
- ('Ꟗ', 'Ꟗ'),
- ('Ꟙ', 'Ꟙ'),
- ('Ꟛ', 'Ꟛ'),
- ('Ƛ', 'Ƛ'),
- ('Ꟶ', 'Ꟶ'),
- ('A', 'Z'),
- ('𐐀', '𐐧'),
- ('𐒰', '𐓓'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐲀', '𐲲'),
- ('𐵐', '𐵥'),
- ('𑢠', '𑢿'),
- ('𖹀', '𖹟'),
- ('𝐀', '𝐙'),
- ('𝐴', '𝑍'),
- ('𝑨', '𝒁'),
- ('𝒜', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒵'),
- ('𝓐', '𝓩'),
- ('𝔄', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔸', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕬', '𝖅'),
- ('𝖠', '𝖹'),
- ('𝗔', '𝗭'),
- ('𝘈', '𝘡'),
- ('𝘼', '𝙕'),
- ('𝙰', '𝚉'),
- ('𝚨', '𝛀'),
- ('𝛢', '𝛺'),
- ('𝜜', '𝜴'),
- ('𝝖', '𝝮'),
- ('𝞐', '𝞨'),
- ('𝟊', '𝟊'),
- ('𞤀', '𞤡'),
- ('🄰', '🅉'),
- ('🅐', '🅩'),
- ('🅰', '🆉'),
-];
diff --git a/vendor/regex-syntax/src/unicode_tables/word_break.rs b/vendor/regex-syntax/src/unicode_tables/word_break.rs
deleted file mode 100644
index b764d34a..00000000
--- a/vendor/regex-syntax/src/unicode_tables/word_break.rs
+++ /dev/null
@@ -1,1152 +0,0 @@
-// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
-//
-// ucd-generate word-break ucd-16.0.0 --chars
-//
-// Unicode version: 16.0.0.
-//
-// ucd-generate 0.3.1 is available on crates.io.
-
-pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
- ("ALetter", ALETTER),
- ("CR", CR),
- ("Double_Quote", DOUBLE_QUOTE),
- ("Extend", EXTEND),
- ("ExtendNumLet", EXTENDNUMLET),
- ("Format", FORMAT),
- ("Hebrew_Letter", HEBREW_LETTER),
- ("Katakana", KATAKANA),
- ("LF", LF),
- ("MidLetter", MIDLETTER),
- ("MidNum", MIDNUM),
- ("MidNumLet", MIDNUMLET),
- ("Newline", NEWLINE),
- ("Numeric", NUMERIC),
- ("Regional_Indicator", REGIONAL_INDICATOR),
- ("Single_Quote", SINGLE_QUOTE),
- ("WSegSpace", WSEGSPACE),
- ("ZWJ", ZWJ),
-];
-
-pub const ALETTER: &'static [(char, char)] = &[
- ('A', 'Z'),
- ('a', 'z'),
- ('ª', 'ª'),
- ('µ', 'µ'),
- ('º', 'º'),
- ('À', 'Ö'),
- ('Ø', 'ö'),
- ('ø', '˗'),
- ('˞', '˿'),
- ('Ͱ', 'ʹ'),
- ('Ͷ', 'ͷ'),
- ('ͺ', 'ͽ'),
- ('Ϳ', 'Ϳ'),
- ('Ά', 'Ά'),
- ('Έ', 'Ί'),
- ('Ό', 'Ό'),
- ('Ύ', 'Ρ'),
- ('Σ', 'ϵ'),
- ('Ϸ', 'ҁ'),
- ('Ҋ', 'ԯ'),
- ('Ա', 'Ֆ'),
- ('ՙ', '՜'),
- ('՞', '՞'),
- ('ՠ', 'ֈ'),
- ('֊', '֊'),
- ('׳', '׳'),
- ('ؠ', 'ي'),
- ('ٮ', 'ٯ'),
- ('ٱ', 'ۓ'),
- ('ە', 'ە'),
- ('ۥ', 'ۦ'),
- ('ۮ', 'ۯ'),
- ('ۺ', 'ۼ'),
- ('ۿ', 'ۿ'),
- ('\u{70f}', 'ܐ'),
- ('ܒ', 'ܯ'),
- ('ݍ', 'ޥ'),
- ('ޱ', 'ޱ'),
- ('ߊ', 'ߪ'),
- ('ߴ', 'ߵ'),
- ('ߺ', 'ߺ'),
- ('ࠀ', 'ࠕ'),
- ('ࠚ', 'ࠚ'),
- ('ࠤ', 'ࠤ'),
- ('ࠨ', 'ࠨ'),
- ('ࡀ', 'ࡘ'),
- ('ࡠ', 'ࡪ'),
- ('ࡰ', 'ࢇ'),
- ('ࢉ', 'ࢎ'),
- ('ࢠ', 'ࣉ'),
- ('ऄ', 'ह'),
- ('ऽ', 'ऽ'),
- ('ॐ', 'ॐ'),
- ('क़', 'ॡ'),
- ('ॱ', 'ঀ'),
- ('অ', 'ঌ'),
- ('এ', 'ঐ'),
- ('ও', 'ন'),
- ('প', 'র'),
- ('ল', 'ল'),
- ('শ', 'হ'),
- ('ঽ', 'ঽ'),
- ('ৎ', 'ৎ'),
- ('ড়', 'ঢ়'),
- ('য়', 'ৡ'),
- ('ৰ', 'ৱ'),
- ('ৼ', 'ৼ'),
- ('ਅ', 'ਊ'),
- ('ਏ', 'ਐ'),
- ('ਓ', 'ਨ'),
- ('ਪ', 'ਰ'),
- ('ਲ', 'ਲ਼'),
- ('ਵ', 'ਸ਼'),
- ('ਸ', 'ਹ'),
- ('ਖ਼', 'ੜ'),
- ('ਫ਼', 'ਫ਼'),
- ('ੲ', 'ੴ'),
- ('અ', 'ઍ'),
- ('એ', 'ઑ'),
- ('ઓ', 'ન'),
- ('પ', 'ર'),
- ('લ', 'ળ'),
- ('વ', 'હ'),
- ('ઽ', 'ઽ'),
- ('ૐ', 'ૐ'),
- ('ૠ', 'ૡ'),
- ('ૹ', 'ૹ'),
- ('ଅ', 'ଌ'),
- ('ଏ', 'ଐ'),
- ('ଓ', 'ନ'),
- ('ପ', 'ର'),
- ('ଲ', 'ଳ'),
- ('ଵ', 'ହ'),
- ('ଽ', 'ଽ'),
- ('ଡ଼', 'ଢ଼'),
- ('ୟ', 'ୡ'),
- ('ୱ', 'ୱ'),
- ('ஃ', 'ஃ'),
- ('அ', 'ஊ'),
- ('எ', 'ஐ'),
- ('ஒ', 'க'),
- ('ங', 'ச'),
- ('ஜ', 'ஜ'),
- ('ஞ', 'ட'),
- ('ண', 'த'),
- ('ந', 'ப'),
- ('ம', 'ஹ'),
- ('ௐ', 'ௐ'),
- ('అ', 'ఌ'),
- ('ఎ', 'ఐ'),
- ('ఒ', 'న'),
- ('ప', 'హ'),
- ('ఽ', 'ఽ'),
- ('ౘ', 'ౚ'),
- ('ౝ', 'ౝ'),
- ('ౠ', 'ౡ'),
- ('ಀ', 'ಀ'),
- ('ಅ', 'ಌ'),
- ('ಎ', 'ಐ'),
- ('ಒ', 'ನ'),
- ('ಪ', 'ಳ'),
- ('ವ', 'ಹ'),
- ('ಽ', 'ಽ'),
- ('ೝ', 'ೞ'),
- ('ೠ', 'ೡ'),
- ('ೱ', 'ೲ'),
- ('ഄ', 'ഌ'),
- ('എ', 'ഐ'),
- ('ഒ', 'ഺ'),
- ('ഽ', 'ഽ'),
- ('ൎ', 'ൎ'),
- ('ൔ', 'ൖ'),
- ('ൟ', 'ൡ'),
- ('ൺ', 'ൿ'),
- ('අ', 'ඖ'),
- ('ක', 'න'),
- ('ඳ', 'ර'),
- ('ල', 'ල'),
- ('ව', 'ෆ'),
- ('ༀ', 'ༀ'),
- ('ཀ', 'ཇ'),
- ('ཉ', 'ཬ'),
- ('ྈ', 'ྌ'),
- ('Ⴀ', 'Ⴥ'),
- ('Ⴧ', 'Ⴧ'),
- ('Ⴭ', 'Ⴭ'),
- ('ა', 'ჺ'),
- ('ჼ', 'ቈ'),
- ('ቊ', 'ቍ'),
- ('ቐ', 'ቖ'),
- ('ቘ', 'ቘ'),
- ('ቚ', 'ቝ'),
- ('በ', 'ኈ'),
- ('ኊ', 'ኍ'),
- ('ነ', 'ኰ'),
- ('ኲ', 'ኵ'),
- ('ኸ', 'ኾ'),
- ('ዀ', 'ዀ'),
- ('ዂ', 'ዅ'),
- ('ወ', 'ዖ'),
- ('ዘ', 'ጐ'),
- ('ጒ', 'ጕ'),
- ('ጘ', 'ፚ'),
- ('ᎀ', 'ᎏ'),
- ('Ꭰ', 'Ᏽ'),
- ('ᏸ', 'ᏽ'),
- ('ᐁ', 'ᙬ'),
- ('ᙯ', 'ᙿ'),
- ('ᚁ', 'ᚚ'),
- ('ᚠ', 'ᛪ'),
- ('ᛮ', 'ᛸ'),
- ('ᜀ', 'ᜑ'),
- ('ᜟ', 'ᜱ'),
- ('ᝀ', 'ᝑ'),
- ('ᝠ', 'ᝬ'),
- ('ᝮ', 'ᝰ'),
- ('ᠠ', 'ᡸ'),
- ('ᢀ', 'ᢄ'),
- ('ᢇ', 'ᢨ'),
- ('ᢪ', 'ᢪ'),
- ('ᢰ', 'ᣵ'),
- ('ᤀ', 'ᤞ'),
- ('ᨀ', 'ᨖ'),
- ('ᬅ', 'ᬳ'),
- ('ᭅ', 'ᭌ'),
- ('ᮃ', 'ᮠ'),
- ('ᮮ', 'ᮯ'),
- ('ᮺ', 'ᯥ'),
- ('ᰀ', 'ᰣ'),
- ('ᱍ', 'ᱏ'),
- ('ᱚ', 'ᱽ'),
- ('ᲀ', 'ᲊ'),
- ('Ა', 'Ჺ'),
- ('Ჽ', 'Ჿ'),
- ('ᳩ', 'ᳬ'),
- ('ᳮ', 'ᳳ'),
- ('ᳵ', 'ᳶ'),
- ('ᳺ', 'ᳺ'),
- ('ᴀ', 'ᶿ'),
- ('Ḁ', 'ἕ'),
- ('Ἐ', 'Ἕ'),
- ('ἠ', 'ὅ'),
- ('Ὀ', 'Ὅ'),
- ('ὐ', 'ὗ'),
- ('Ὑ', 'Ὑ'),
- ('Ὓ', 'Ὓ'),
- ('Ὕ', 'Ὕ'),
- ('Ὗ', 'ώ'),
- ('ᾀ', 'ᾴ'),
- ('ᾶ', 'ᾼ'),
- ('ι', 'ι'),
- ('ῂ', 'ῄ'),
- ('ῆ', 'ῌ'),
- ('ῐ', 'ΐ'),
- ('ῖ', 'Ί'),
- ('ῠ', 'Ῥ'),
- ('ῲ', 'ῴ'),
- ('ῶ', 'ῼ'),
- ('ⁱ', 'ⁱ'),
- ('ⁿ', 'ⁿ'),
- ('ₐ', 'ₜ'),
- ('ℂ', 'ℂ'),
- ('ℇ', 'ℇ'),
- ('ℊ', 'ℓ'),
- ('ℕ', 'ℕ'),
- ('ℙ', 'ℝ'),
- ('ℤ', 'ℤ'),
- ('Ω', 'Ω'),
- ('ℨ', 'ℨ'),
- ('K', 'ℭ'),
- ('ℯ', 'ℹ'),
- ('ℼ', 'ℿ'),
- ('ⅅ', 'ⅉ'),
- ('ⅎ', 'ⅎ'),
- ('Ⅰ', 'ↈ'),
- ('Ⓐ', 'ⓩ'),
- ('Ⰰ', 'ⳤ'),
- ('Ⳬ', 'ⳮ'),
- ('Ⳳ', 'ⳳ'),
- ('ⴀ', 'ⴥ'),
- ('ⴧ', 'ⴧ'),
- ('ⴭ', 'ⴭ'),
- ('ⴰ', 'ⵧ'),
- ('ⵯ', 'ⵯ'),
- ('ⶀ', 'ⶖ'),
- ('ⶠ', 'ⶦ'),
- ('ⶨ', 'ⶮ'),
- ('ⶰ', 'ⶶ'),
- ('ⶸ', 'ⶾ'),
- ('ⷀ', 'ⷆ'),
- ('ⷈ', 'ⷎ'),
- ('ⷐ', 'ⷖ'),
- ('ⷘ', 'ⷞ'),
- ('ⸯ', 'ⸯ'),
- ('々', '々'),
- ('〻', '〼'),
- ('ㄅ', 'ㄯ'),
- ('ㄱ', 'ㆎ'),
- ('ㆠ', 'ㆿ'),
- ('ꀀ', 'ꒌ'),
- ('ꓐ', 'ꓽ'),
- ('ꔀ', 'ꘌ'),
- ('ꘐ', 'ꘟ'),
- ('ꘪ', 'ꘫ'),
- ('Ꙁ', 'ꙮ'),
- ('ꙿ', 'ꚝ'),
- ('ꚠ', 'ꛯ'),
- ('꜈', 'ꟍ'),
- ('Ꟑ', 'ꟑ'),
- ('ꟓ', 'ꟓ'),
- ('ꟕ', 'Ƛ'),
- ('ꟲ', 'ꠁ'),
- ('ꠃ', 'ꠅ'),
- ('ꠇ', 'ꠊ'),
- ('ꠌ', 'ꠢ'),
- ('ꡀ', 'ꡳ'),
- ('ꢂ', 'ꢳ'),
- ('ꣲ', 'ꣷ'),
- ('ꣻ', 'ꣻ'),
- ('ꣽ', 'ꣾ'),
- ('ꤊ', 'ꤥ'),
- ('ꤰ', 'ꥆ'),
- ('ꥠ', 'ꥼ'),
- ('ꦄ', 'ꦲ'),
- ('ꧏ', 'ꧏ'),
- ('ꨀ', 'ꨨ'),
- ('ꩀ', 'ꩂ'),
- ('ꩄ', 'ꩋ'),
- ('ꫠ', 'ꫪ'),
- ('ꫲ', 'ꫴ'),
- ('ꬁ', 'ꬆ'),
- ('ꬉ', 'ꬎ'),
- ('ꬑ', 'ꬖ'),
- ('ꬠ', 'ꬦ'),
- ('ꬨ', 'ꬮ'),
- ('ꬰ', 'ꭩ'),
- ('ꭰ', 'ꯢ'),
- ('가', '힣'),
- ('ힰ', 'ퟆ'),
- ('ퟋ', 'ퟻ'),
- ('ff', 'st'),
- ('ﬓ', 'ﬗ'),
- ('ﭐ', 'ﮱ'),
- ('ﯓ', 'ﴽ'),
- ('ﵐ', 'ﶏ'),
- ('ﶒ', 'ﷇ'),
- ('ﷰ', 'ﷻ'),
- ('ﹰ', 'ﹴ'),
- ('ﹶ', 'ﻼ'),
- ('A', 'Z'),
- ('a', 'z'),
- ('ᅠ', 'ᄒ'),
- ('ᅡ', 'ᅦ'),
- ('ᅧ', 'ᅬ'),
- ('ᅭ', 'ᅲ'),
- ('ᅳ', 'ᅵ'),
- ('𐀀', '𐀋'),
- ('𐀍', '𐀦'),
- ('𐀨', '𐀺'),
- ('𐀼', '𐀽'),
- ('𐀿', '𐁍'),
- ('𐁐', '𐁝'),
- ('𐂀', '𐃺'),
- ('𐅀', '𐅴'),
- ('𐊀', '𐊜'),
- ('𐊠', '𐋐'),
- ('𐌀', '𐌟'),
- ('𐌭', '𐍊'),
- ('𐍐', '𐍵'),
- ('𐎀', '𐎝'),
- ('𐎠', '𐏃'),
- ('𐏈', '𐏏'),
- ('𐏑', '𐏕'),
- ('𐐀', '𐒝'),
- ('𐒰', '𐓓'),
- ('𐓘', '𐓻'),
- ('𐔀', '𐔧'),
- ('𐔰', '𐕣'),
- ('𐕰', '𐕺'),
- ('𐕼', '𐖊'),
- ('𐖌', '𐖒'),
- ('𐖔', '𐖕'),
- ('𐖗', '𐖡'),
- ('𐖣', '𐖱'),
- ('𐖳', '𐖹'),
- ('𐖻', '𐖼'),
- ('𐗀', '𐗳'),
- ('𐘀', '𐜶'),
- ('𐝀', '𐝕'),
- ('𐝠', '𐝧'),
- ('𐞀', '𐞅'),
- ('𐞇', '𐞰'),
- ('𐞲', '𐞺'),
- ('𐠀', '𐠅'),
- ('𐠈', '𐠈'),
- ('𐠊', '𐠵'),
- ('𐠷', '𐠸'),
- ('𐠼', '𐠼'),
- ('𐠿', '𐡕'),
- ('𐡠', '𐡶'),
- ('𐢀', '𐢞'),
- ('𐣠', '𐣲'),
- ('𐣴', '𐣵'),
- ('𐤀', '𐤕'),
- ('𐤠', '𐤹'),
- ('𐦀', '𐦷'),
- ('𐦾', '𐦿'),
- ('𐨀', '𐨀'),
- ('𐨐', '𐨓'),
- ('𐨕', '𐨗'),
- ('𐨙', '𐨵'),
- ('𐩠', '𐩼'),
- ('𐪀', '𐪜'),
- ('𐫀', '𐫇'),
- ('𐫉', '𐫤'),
- ('𐬀', '𐬵'),
- ('𐭀', '𐭕'),
- ('𐭠', '𐭲'),
- ('𐮀', '𐮑'),
- ('𐰀', '𐱈'),
- ('𐲀', '𐲲'),
- ('𐳀', '𐳲'),
- ('𐴀', '𐴣'),
- ('𐵊', '𐵥'),
- ('𐵯', '𐶅'),
- ('𐺀', '𐺩'),
- ('𐺰', '𐺱'),
- ('𐻂', '𐻄'),
- ('𐼀', '𐼜'),
- ('𐼧', '𐼧'),
- ('𐼰', '𐽅'),
- ('𐽰', '𐾁'),
- ('𐾰', '𐿄'),
- ('𐿠', '𐿶'),
- ('𑀃', '𑀷'),
- ('𑁱', '𑁲'),
- ('𑁵', '𑁵'),
- ('𑂃', '𑂯'),
- ('𑃐', '𑃨'),
- ('𑄃', '𑄦'),
- ('𑅄', '𑅄'),
- ('𑅇', '𑅇'),
- ('𑅐', '𑅲'),
- ('𑅶', '𑅶'),
- ('𑆃', '𑆲'),
- ('𑇁', '𑇄'),
- ('𑇚', '𑇚'),
- ('𑇜', '𑇜'),
- ('𑈀', '𑈑'),
- ('𑈓', '𑈫'),
- ('𑈿', '𑉀'),
- ('𑊀', '𑊆'),
- ('𑊈', '𑊈'),
- ('𑊊', '𑊍'),
- ('𑊏', '𑊝'),
- ('𑊟', '𑊨'),
- ('𑊰', '𑋞'),
- ('𑌅', '𑌌'),
- ('𑌏', '𑌐'),
- ('𑌓', '𑌨'),
- ('𑌪', '𑌰'),
- ('𑌲', '𑌳'),
- ('𑌵', '𑌹'),
- ('𑌽', '𑌽'),
- ('𑍐', '𑍐'),
- ('𑍝', '𑍡'),
- ('𑎀', '𑎉'),
- ('𑎋', '𑎋'),
- ('𑎎', '𑎎'),
- ('𑎐', '𑎵'),
- ('𑎷', '𑎷'),
- ('𑏑', '𑏑'),
- ('𑏓', '𑏓'),
- ('𑐀', '𑐴'),
- ('𑑇', '𑑊'),
- ('𑑟', '𑑡'),
- ('𑒀', '𑒯'),
- ('𑓄', '𑓅'),
- ('𑓇', '𑓇'),
- ('𑖀', '𑖮'),
- ('𑗘', '𑗛'),
- ('𑘀', '𑘯'),
- ('𑙄', '𑙄'),
- ('𑚀', '𑚪'),
- ('𑚸', '𑚸'),
- ('𑠀', '𑠫'),
- ('𑢠', '𑣟'),
- ('𑣿', '𑤆'),
- ('𑤉', '𑤉'),
- ('𑤌', '𑤓'),
- ('𑤕', '𑤖'),
- ('𑤘', '𑤯'),
- ('𑤿', '𑤿'),
- ('𑥁', '𑥁'),
- ('𑦠', '𑦧'),
- ('𑦪', '𑧐'),
- ('𑧡', '𑧡'),
- ('𑧣', '𑧣'),
- ('𑨀', '𑨀'),
- ('𑨋', '𑨲'),
- ('𑨺', '𑨺'),
- ('𑩐', '𑩐'),
- ('𑩜', '𑪉'),
- ('𑪝', '𑪝'),
- ('𑪰', '𑫸'),
- ('𑯀', '𑯠'),
- ('𑰀', '𑰈'),
- ('𑰊', '𑰮'),
- ('𑱀', '𑱀'),
- ('𑱲', '𑲏'),
- ('𑴀', '𑴆'),
- ('𑴈', '𑴉'),
- ('𑴋', '𑴰'),
- ('𑵆', '𑵆'),
- ('𑵠', '𑵥'),
- ('𑵧', '𑵨'),
- ('𑵪', '𑶉'),
- ('𑶘', '𑶘'),
- ('𑻠', '𑻲'),
- ('𑼂', '𑼂'),
- ('𑼄', '𑼐'),
- ('𑼒', '𑼳'),
- ('𑾰', '𑾰'),
- ('𒀀', '𒎙'),
- ('𒐀', '𒑮'),
- ('𒒀', '𒕃'),
- ('𒾐', '𒿰'),
- ('𓀀', '𓐯'),
- ('𓑁', '𓑆'),
- ('𓑠', '𔏺'),
- ('𔐀', '𔙆'),
- ('𖄀', '𖄝'),
- ('𖠀', '𖨸'),
- ('𖩀', '𖩞'),
- ('𖩰', '𖪾'),
- ('𖫐', '𖫭'),
- ('𖬀', '𖬯'),
- ('𖭀', '𖭃'),
- ('𖭣', '𖭷'),
- ('𖭽', '𖮏'),
- ('𖵀', '𖵬'),
- ('𖹀', '𖹿'),
- ('𖼀', '𖽊'),
- ('𖽐', '𖽐'),
- ('𖾓', '𖾟'),
- ('𖿠', '𖿡'),
- ('𖿣', '𖿣'),
- ('𛰀', '𛱪'),
- ('𛱰', '𛱼'),
- ('𛲀', '𛲈'),
- ('𛲐', '𛲙'),
- ('𝐀', '𝑔'),
- ('𝑖', '𝒜'),
- ('𝒞', '𝒟'),
- ('𝒢', '𝒢'),
- ('𝒥', '𝒦'),
- ('𝒩', '𝒬'),
- ('𝒮', '𝒹'),
- ('𝒻', '𝒻'),
- ('𝒽', '𝓃'),
- ('𝓅', '𝔅'),
- ('𝔇', '𝔊'),
- ('𝔍', '𝔔'),
- ('𝔖', '𝔜'),
- ('𝔞', '𝔹'),
- ('𝔻', '𝔾'),
- ('𝕀', '𝕄'),
- ('𝕆', '𝕆'),
- ('𝕊', '𝕐'),
- ('𝕒', '𝚥'),
- ('𝚨', '𝛀'),
- ('𝛂', '𝛚'),
- ('𝛜', '𝛺'),
- ('𝛼', '𝜔'),
- ('𝜖', '𝜴'),
- ('𝜶', '𝝎'),
- ('𝝐', '𝝮'),
- ('𝝰', '𝞈'),
- ('𝞊', '𝞨'),
- ('𝞪', '𝟂'),
- ('𝟄', '𝟋'),
- ('𝼀', '𝼞'),
- ('𝼥', '𝼪'),
- ('𞀰', '𞁭'),
- ('𞄀', '𞄬'),
- ('𞄷', '𞄽'),
- ('𞅎', '𞅎'),
- ('𞊐', '𞊭'),
- ('𞋀', '𞋫'),
- ('𞓐', '𞓫'),
- ('𞗐', '𞗭'),
- ('𞗰', '𞗰'),
- ('𞟠', '𞟦'),
- ('𞟨', '𞟫'),
- ('𞟭', '𞟮'),
- ('𞟰', '𞟾'),
- ('𞠀', '𞣄'),
- ('𞤀', '𞥃'),
- ('𞥋', '𞥋'),
- ('𞸀', '𞸃'),
- ('𞸅', '𞸟'),
- ('𞸡', '𞸢'),
- ('𞸤', '𞸤'),
- ('𞸧', '𞸧'),
- ('𞸩', '𞸲'),
- ('𞸴', '𞸷'),
- ('𞸹', '𞸹'),
- ('𞸻', '𞸻'),
- ('𞹂', '𞹂'),
- ('𞹇', '𞹇'),
- ('𞹉', '𞹉'),
- ('𞹋', '𞹋'),
- ('𞹍', '𞹏'),
- ('𞹑', '𞹒'),
- ('𞹔', '𞹔'),
- ('𞹗', '𞹗'),
- ('𞹙', '𞹙'),
- ('𞹛', '𞹛'),
- ('𞹝', '𞹝'),
- ('𞹟', '𞹟'),
- ('𞹡', '𞹢'),
- ('𞹤', '𞹤'),
- ('𞹧', '𞹪'),
- ('𞹬', '𞹲'),
- ('𞹴', '𞹷'),
- ('𞹹', '𞹼'),
- ('𞹾', '𞹾'),
- ('𞺀', '𞺉'),
- ('𞺋', '𞺛'),
- ('𞺡', '𞺣'),
- ('𞺥', '𞺩'),
- ('𞺫', '𞺻'),
- ('🄰', '🅉'),
- ('🅐', '🅩'),
- ('🅰', '🆉'),
-];
-
-pub const CR: &'static [(char, char)] = &[('\r', '\r')];
-
-pub const DOUBLE_QUOTE: &'static [(char, char)] = &[('"', '"')];
-
-pub const EXTEND: &'static [(char, char)] = &[
- ('\u{300}', '\u{36f}'),
- ('\u{483}', '\u{489}'),
- ('\u{591}', '\u{5bd}'),
- ('\u{5bf}', '\u{5bf}'),
- ('\u{5c1}', '\u{5c2}'),
- ('\u{5c4}', '\u{5c5}'),
- ('\u{5c7}', '\u{5c7}'),
- ('\u{610}', '\u{61a}'),
- ('\u{64b}', '\u{65f}'),
- ('\u{670}', '\u{670}'),
- ('\u{6d6}', '\u{6dc}'),
- ('\u{6df}', '\u{6e4}'),
- ('\u{6e7}', '\u{6e8}'),
- ('\u{6ea}', '\u{6ed}'),
- ('\u{711}', '\u{711}'),
- ('\u{730}', '\u{74a}'),
- ('\u{7a6}', '\u{7b0}'),
- ('\u{7eb}', '\u{7f3}'),
- ('\u{7fd}', '\u{7fd}'),
- ('\u{816}', '\u{819}'),
- ('\u{81b}', '\u{823}'),
- ('\u{825}', '\u{827}'),
- ('\u{829}', '\u{82d}'),
- ('\u{859}', '\u{85b}'),
- ('\u{897}', '\u{89f}'),
- ('\u{8ca}', '\u{8e1}'),
- ('\u{8e3}', 'ः'),
- ('\u{93a}', '\u{93c}'),
- ('ा', 'ॏ'),
- ('\u{951}', '\u{957}'),
- ('\u{962}', '\u{963}'),
- ('\u{981}', 'ঃ'),
- ('\u{9bc}', '\u{9bc}'),
- ('\u{9be}', '\u{9c4}'),
- ('ে', 'ৈ'),
- ('ো', '\u{9cd}'),
- ('\u{9d7}', '\u{9d7}'),
- ('\u{9e2}', '\u{9e3}'),
- ('\u{9fe}', '\u{9fe}'),
- ('\u{a01}', 'ਃ'),
- ('\u{a3c}', '\u{a3c}'),
- ('ਾ', '\u{a42}'),
- ('\u{a47}', '\u{a48}'),
- ('\u{a4b}', '\u{a4d}'),
- ('\u{a51}', '\u{a51}'),
- ('\u{a70}', '\u{a71}'),
- ('\u{a75}', '\u{a75}'),
- ('\u{a81}', 'ઃ'),
- ('\u{abc}', '\u{abc}'),
- ('ા', '\u{ac5}'),
- ('\u{ac7}', 'ૉ'),
- ('ો', '\u{acd}'),
- ('\u{ae2}', '\u{ae3}'),
- ('\u{afa}', '\u{aff}'),
- ('\u{b01}', 'ଃ'),
- ('\u{b3c}', '\u{b3c}'),
- ('\u{b3e}', '\u{b44}'),
- ('େ', 'ୈ'),
- ('ୋ', '\u{b4d}'),
- ('\u{b55}', '\u{b57}'),
- ('\u{b62}', '\u{b63}'),
- ('\u{b82}', '\u{b82}'),
- ('\u{bbe}', 'ூ'),
- ('ெ', 'ை'),
- ('ொ', '\u{bcd}'),
- ('\u{bd7}', '\u{bd7}'),
- ('\u{c00}', '\u{c04}'),
- ('\u{c3c}', '\u{c3c}'),
- ('\u{c3e}', 'ౄ'),
- ('\u{c46}', '\u{c48}'),
- ('\u{c4a}', '\u{c4d}'),
- ('\u{c55}', '\u{c56}'),
- ('\u{c62}', '\u{c63}'),
- ('\u{c81}', 'ಃ'),
- ('\u{cbc}', '\u{cbc}'),
- ('ಾ', 'ೄ'),
- ('\u{cc6}', '\u{cc8}'),
- ('\u{cca}', '\u{ccd}'),
- ('\u{cd5}', '\u{cd6}'),
- ('\u{ce2}', '\u{ce3}'),
- ('ೳ', 'ೳ'),
- ('\u{d00}', 'ഃ'),
- ('\u{d3b}', '\u{d3c}'),
- ('\u{d3e}', '\u{d44}'),
- ('െ', 'ൈ'),
- ('ൊ', '\u{d4d}'),
- ('\u{d57}', '\u{d57}'),
- ('\u{d62}', '\u{d63}'),
- ('\u{d81}', 'ඃ'),
- ('\u{dca}', '\u{dca}'),
- ('\u{dcf}', '\u{dd4}'),
- ('\u{dd6}', '\u{dd6}'),
- ('ෘ', '\u{ddf}'),
- ('ෲ', 'ෳ'),
- ('\u{e31}', '\u{e31}'),
- ('\u{e34}', '\u{e3a}'),
- ('\u{e47}', '\u{e4e}'),
- ('\u{eb1}', '\u{eb1}'),
- ('\u{eb4}', '\u{ebc}'),
- ('\u{ec8}', '\u{ece}'),
- ('\u{f18}', '\u{f19}'),
- ('\u{f35}', '\u{f35}'),
- ('\u{f37}', '\u{f37}'),
- ('\u{f39}', '\u{f39}'),
- ('༾', '༿'),
- ('\u{f71}', '\u{f84}'),
- ('\u{f86}', '\u{f87}'),
- ('\u{f8d}', '\u{f97}'),
- ('\u{f99}', '\u{fbc}'),
- ('\u{fc6}', '\u{fc6}'),
- ('ါ', '\u{103e}'),
- ('ၖ', '\u{1059}'),
- ('\u{105e}', '\u{1060}'),
- ('ၢ', 'ၤ'),
- ('ၧ', 'ၭ'),
- ('\u{1071}', '\u{1074}'),
- ('\u{1082}', '\u{108d}'),
- ('ႏ', 'ႏ'),
- ('ႚ', '\u{109d}'),
- ('\u{135d}', '\u{135f}'),
- ('\u{1712}', '\u{1715}'),
- ('\u{1732}', '\u{1734}'),
- ('\u{1752}', '\u{1753}'),
- ('\u{1772}', '\u{1773}'),
- ('\u{17b4}', '\u{17d3}'),
- ('\u{17dd}', '\u{17dd}'),
- ('\u{180b}', '\u{180d}'),
- ('\u{180f}', '\u{180f}'),
- ('\u{1885}', '\u{1886}'),
- ('\u{18a9}', '\u{18a9}'),
- ('\u{1920}', 'ᤫ'),
- ('ᤰ', '\u{193b}'),
- ('\u{1a17}', '\u{1a1b}'),
- ('ᩕ', '\u{1a5e}'),
- ('\u{1a60}', '\u{1a7c}'),
- ('\u{1a7f}', '\u{1a7f}'),
- ('\u{1ab0}', '\u{1ace}'),
- ('\u{1b00}', 'ᬄ'),
- ('\u{1b34}', '\u{1b44}'),
- ('\u{1b6b}', '\u{1b73}'),
- ('\u{1b80}', 'ᮂ'),
- ('ᮡ', '\u{1bad}'),
- ('\u{1be6}', '\u{1bf3}'),
- ('ᰤ', '\u{1c37}'),
- ('\u{1cd0}', '\u{1cd2}'),
- ('\u{1cd4}', '\u{1ce8}'),
- ('\u{1ced}', '\u{1ced}'),
- ('\u{1cf4}', '\u{1cf4}'),
- ('᳷', '\u{1cf9}'),
- ('\u{1dc0}', '\u{1dff}'),
- ('\u{200c}', '\u{200c}'),
- ('\u{20d0}', '\u{20f0}'),
- ('\u{2cef}', '\u{2cf1}'),
- ('\u{2d7f}', '\u{2d7f}'),
- ('\u{2de0}', '\u{2dff}'),
- ('\u{302a}', '\u{302f}'),
- ('\u{3099}', '\u{309a}'),
- ('\u{a66f}', '\u{a672}'),
- ('\u{a674}', '\u{a67d}'),
- ('\u{a69e}', '\u{a69f}'),
- ('\u{a6f0}', '\u{a6f1}'),
- ('\u{a802}', '\u{a802}'),
- ('\u{a806}', '\u{a806}'),
- ('\u{a80b}', '\u{a80b}'),
- ('ꠣ', 'ꠧ'),
- ('\u{a82c}', '\u{a82c}'),
- ('ꢀ', 'ꢁ'),
- ('ꢴ', '\u{a8c5}'),
- ('\u{a8e0}', '\u{a8f1}'),
- ('\u{a8ff}', '\u{a8ff}'),
- ('\u{a926}', '\u{a92d}'),
- ('\u{a947}', '\u{a953}'),
- ('\u{a980}', 'ꦃ'),
- ('\u{a9b3}', '\u{a9c0}'),
- ('\u{a9e5}', '\u{a9e5}'),
- ('\u{aa29}', '\u{aa36}'),
- ('\u{aa43}', '\u{aa43}'),
- ('\u{aa4c}', 'ꩍ'),
- ('ꩻ', 'ꩽ'),
- ('\u{aab0}', '\u{aab0}'),
- ('\u{aab2}', '\u{aab4}'),
- ('\u{aab7}', '\u{aab8}'),
- ('\u{aabe}', '\u{aabf}'),
- ('\u{aac1}', '\u{aac1}'),
- ('ꫫ', 'ꫯ'),
- ('ꫵ', '\u{aaf6}'),
- ('ꯣ', 'ꯪ'),
- ('꯬', '\u{abed}'),
- ('\u{fb1e}', '\u{fb1e}'),
- ('\u{fe00}', '\u{fe0f}'),
- ('\u{fe20}', '\u{fe2f}'),
- ('\u{ff9e}', '\u{ff9f}'),
- ('\u{101fd}', '\u{101fd}'),
- ('\u{102e0}', '\u{102e0}'),
- ('\u{10376}', '\u{1037a}'),
- ('\u{10a01}', '\u{10a03}'),
- ('\u{10a05}', '\u{10a06}'),
- ('\u{10a0c}', '\u{10a0f}'),
- ('\u{10a38}', '\u{10a3a}'),
- ('\u{10a3f}', '\u{10a3f}'),
- ('\u{10ae5}', '\u{10ae6}'),
- ('\u{10d24}', '\u{10d27}'),
- ('\u{10d69}', '\u{10d6d}'),
- ('\u{10eab}', '\u{10eac}'),
- ('\u{10efc}', '\u{10eff}'),
- ('\u{10f46}', '\u{10f50}'),
- ('\u{10f82}', '\u{10f85}'),
- ('𑀀', '𑀂'),
- ('\u{11038}', '\u{11046}'),
- ('\u{11070}', '\u{11070}'),
- ('\u{11073}', '\u{11074}'),
- ('\u{1107f}', '𑂂'),
- ('𑂰', '\u{110ba}'),
- ('\u{110c2}', '\u{110c2}'),
- ('\u{11100}', '\u{11102}'),
- ('\u{11127}', '\u{11134}'),
- ('𑅅', '𑅆'),
- ('\u{11173}', '\u{11173}'),
- ('\u{11180}', '𑆂'),
- ('𑆳', '\u{111c0}'),
- ('\u{111c9}', '\u{111cc}'),
- ('𑇎', '\u{111cf}'),
- ('𑈬', '\u{11237}'),
- ('\u{1123e}', '\u{1123e}'),
- ('\u{11241}', '\u{11241}'),
- ('\u{112df}', '\u{112ea}'),
- ('\u{11300}', '𑌃'),
- ('\u{1133b}', '\u{1133c}'),
- ('\u{1133e}', '𑍄'),
- ('𑍇', '𑍈'),
- ('𑍋', '\u{1134d}'),
- ('\u{11357}', '\u{11357}'),
- ('𑍢', '𑍣'),
- ('\u{11366}', '\u{1136c}'),
- ('\u{11370}', '\u{11374}'),
- ('\u{113b8}', '\u{113c0}'),
- ('\u{113c2}', '\u{113c2}'),
- ('\u{113c5}', '\u{113c5}'),
- ('\u{113c7}', '𑏊'),
- ('𑏌', '\u{113d0}'),
- ('\u{113d2}', '\u{113d2}'),
- ('\u{113e1}', '\u{113e2}'),
- ('𑐵', '\u{11446}'),
- ('\u{1145e}', '\u{1145e}'),
- ('\u{114b0}', '\u{114c3}'),
- ('\u{115af}', '\u{115b5}'),
- ('𑖸', '\u{115c0}'),
- ('\u{115dc}', '\u{115dd}'),
- ('𑘰', '\u{11640}'),
- ('\u{116ab}', '\u{116b7}'),
- ('\u{1171d}', '\u{1172b}'),
- ('𑠬', '\u{1183a}'),
- ('\u{11930}', '𑤵'),
- ('𑤷', '𑤸'),
- ('\u{1193b}', '\u{1193e}'),
- ('𑥀', '𑥀'),
- ('𑥂', '\u{11943}'),
- ('𑧑', '\u{119d7}'),
- ('\u{119da}', '\u{119e0}'),
- ('𑧤', '𑧤'),
- ('\u{11a01}', '\u{11a0a}'),
- ('\u{11a33}', '𑨹'),
- ('\u{11a3b}', '\u{11a3e}'),
- ('\u{11a47}', '\u{11a47}'),
- ('\u{11a51}', '\u{11a5b}'),
- ('\u{11a8a}', '\u{11a99}'),
- ('𑰯', '\u{11c36}'),
- ('\u{11c38}', '\u{11c3f}'),
- ('\u{11c92}', '\u{11ca7}'),
- ('𑲩', '\u{11cb6}'),
- ('\u{11d31}', '\u{11d36}'),
- ('\u{11d3a}', '\u{11d3a}'),
- ('\u{11d3c}', '\u{11d3d}'),
- ('\u{11d3f}', '\u{11d45}'),
- ('\u{11d47}', '\u{11d47}'),
- ('𑶊', '𑶎'),
- ('\u{11d90}', '\u{11d91}'),
- ('𑶓', '\u{11d97}'),
- ('\u{11ef3}', '𑻶'),
- ('\u{11f00}', '\u{11f01}'),
- ('𑼃', '𑼃'),
- ('𑼴', '\u{11f3a}'),
- ('𑼾', '\u{11f42}'),
- ('\u{11f5a}', '\u{11f5a}'),
- ('\u{13440}', '\u{13440}'),
- ('\u{13447}', '\u{13455}'),
- ('\u{1611e}', '\u{1612f}'),
- ('\u{16af0}', '\u{16af4}'),
- ('\u{16b30}', '\u{16b36}'),
- ('\u{16f4f}', '\u{16f4f}'),
- ('𖽑', '𖾇'),
- ('\u{16f8f}', '\u{16f92}'),
- ('\u{16fe4}', '\u{16fe4}'),
- ('\u{16ff0}', '\u{16ff1}'),
- ('\u{1bc9d}', '\u{1bc9e}'),
- ('\u{1cf00}', '\u{1cf2d}'),
- ('\u{1cf30}', '\u{1cf46}'),
- ('\u{1d165}', '\u{1d169}'),
- ('\u{1d16d}', '\u{1d172}'),
- ('\u{1d17b}', '\u{1d182}'),
- ('\u{1d185}', '\u{1d18b}'),
- ('\u{1d1aa}', '\u{1d1ad}'),
- ('\u{1d242}', '\u{1d244}'),
- ('\u{1da00}', '\u{1da36}'),
- ('\u{1da3b}', '\u{1da6c}'),
- ('\u{1da75}', '\u{1da75}'),
- ('\u{1da84}', '\u{1da84}'),
- ('\u{1da9b}', '\u{1da9f}'),
- ('\u{1daa1}', '\u{1daaf}'),
- ('\u{1e000}', '\u{1e006}'),
- ('\u{1e008}', '\u{1e018}'),
- ('\u{1e01b}', '\u{1e021}'),
- ('\u{1e023}', '\u{1e024}'),
- ('\u{1e026}', '\u{1e02a}'),
- ('\u{1e08f}', '\u{1e08f}'),
- ('\u{1e130}', '\u{1e136}'),
- ('\u{1e2ae}', '\u{1e2ae}'),
- ('\u{1e2ec}', '\u{1e2ef}'),
- ('\u{1e4ec}', '\u{1e4ef}'),
- ('\u{1e5ee}', '\u{1e5ef}'),
- ('\u{1e8d0}', '\u{1e8d6}'),
- ('\u{1e944}', '\u{1e94a}'),
- ('🏻', '🏿'),
- ('\u{e0020}', '\u{e007f}'),
- ('\u{e0100}', '\u{e01ef}'),
-];
-
-pub const EXTENDNUMLET: &'static [(char, char)] = &[
- ('_', '_'),
- ('\u{202f}', '\u{202f}'),
- ('‿', '⁀'),
- ('⁔', '⁔'),
- ('︳', '︴'),
- ('﹍', '﹏'),
- ('_', '_'),
-];
-
-pub const FORMAT: &'static [(char, char)] = &[
- ('\u{ad}', '\u{ad}'),
- ('\u{61c}', '\u{61c}'),
- ('\u{180e}', '\u{180e}'),
- ('\u{200e}', '\u{200f}'),
- ('\u{202a}', '\u{202e}'),
- ('\u{2060}', '\u{2064}'),
- ('\u{2066}', '\u{206f}'),
- ('\u{feff}', '\u{feff}'),
- ('\u{fff9}', '\u{fffb}'),
- ('\u{13430}', '\u{1343f}'),
- ('\u{1bca0}', '\u{1bca3}'),
- ('\u{1d173}', '\u{1d17a}'),
- ('\u{e0001}', '\u{e0001}'),
-];
-
-pub const HEBREW_LETTER: &'static [(char, char)] = &[
- ('א', 'ת'),
- ('ׯ', 'ײ'),
- ('יִ', 'יִ'),
- ('ײַ', 'ﬨ'),
- ('שׁ', 'זּ'),
- ('טּ', 'לּ'),
- ('מּ', 'מּ'),
- ('נּ', 'סּ'),
- ('ףּ', 'פּ'),
- ('צּ', 'ﭏ'),
-];
-
-pub const KATAKANA: &'static [(char, char)] = &[
- ('〱', '〵'),
- ('゛', '゜'),
- ('゠', 'ヺ'),
- ('ー', 'ヿ'),
- ('ㇰ', 'ㇿ'),
- ('㋐', '㋾'),
- ('㌀', '㍗'),
- ('ヲ', 'ン'),
- ('𚿰', '𚿳'),
- ('𚿵', '𚿻'),
- ('𚿽', '𚿾'),
- ('𛀀', '𛀀'),
- ('𛄠', '𛄢'),
- ('𛅕', '𛅕'),
- ('𛅤', '𛅧'),
-];
-
-pub const LF: &'static [(char, char)] = &[('\n', '\n')];
-
-pub const MIDLETTER: &'static [(char, char)] = &[
- (':', ':'),
- ('·', '·'),
- ('·', '·'),
- ('՟', '՟'),
- ('״', '״'),
- ('‧', '‧'),
- ('︓', '︓'),
- ('﹕', '﹕'),
- (':', ':'),
-];
-
-pub const MIDNUM: &'static [(char, char)] = &[
- (',', ','),
- (';', ';'),
- (';', ';'),
- ('։', '։'),
- ('،', '؍'),
- ('٬', '٬'),
- ('߸', '߸'),
- ('⁄', '⁄'),
- ('﹐', '﹐'),
- ('﹔', '﹔'),
- (',', ','),
- (';', ';'),
-];
-
-pub const MIDNUMLET: &'static [(char, char)] = &[
- ('.', '.'),
- ('‘', '’'),
- ('․', '․'),
- ('﹒', '﹒'),
- (''', '''),
- ('.', '.'),
-];
-
-pub const NEWLINE: &'static [(char, char)] =
- &[('\u{b}', '\u{c}'), ('\u{85}', '\u{85}'), ('\u{2028}', '\u{2029}')];
-
-pub const NUMERIC: &'static [(char, char)] = &[
- ('0', '9'),
- ('\u{600}', '\u{605}'),
- ('٠', '٩'),
- ('٫', '٫'),
- ('\u{6dd}', '\u{6dd}'),
- ('۰', '۹'),
- ('߀', '߉'),
- ('\u{890}', '\u{891}'),
- ('\u{8e2}', '\u{8e2}'),
- ('०', '९'),
- ('০', '৯'),
- ('੦', '੯'),
- ('૦', '૯'),
- ('୦', '୯'),
- ('௦', '௯'),
- ('౦', '౯'),
- ('೦', '೯'),
- ('൦', '൯'),
- ('෦', '෯'),
- ('๐', '๙'),
- ('໐', '໙'),
- ('༠', '༩'),
- ('၀', '၉'),
- ('႐', '႙'),
- ('០', '៩'),
- ('᠐', '᠙'),
- ('᥆', '᥏'),
- ('᧐', '᧚'),
- ('᪀', '᪉'),
- ('᪐', '᪙'),
- ('᭐', '᭙'),
- ('᮰', '᮹'),
- ('᱀', '᱉'),
- ('᱐', '᱙'),
- ('꘠', '꘩'),
- ('꣐', '꣙'),
- ('꤀', '꤉'),
- ('꧐', '꧙'),
- ('꧰', '꧹'),
- ('꩐', '꩙'),
- ('꯰', '꯹'),
- ('0', '9'),
- ('𐒠', '𐒩'),
- ('𐴰', '𐴹'),
- ('𐵀', '𐵉'),
- ('𑁦', '𑁯'),
- ('\u{110bd}', '\u{110bd}'),
- ('\u{110cd}', '\u{110cd}'),
- ('𑃰', '𑃹'),
- ('𑄶', '𑄿'),
- ('𑇐', '𑇙'),
- ('𑋰', '𑋹'),
- ('𑑐', '𑑙'),
- ('𑓐', '𑓙'),
- ('𑙐', '𑙙'),
- ('𑛀', '𑛉'),
- ('𑛐', '𑛣'),
- ('𑜰', '𑜹'),
- ('𑣠', '𑣩'),
- ('𑥐', '𑥙'),
- ('𑯰', '𑯹'),
- ('𑱐', '𑱙'),
- ('𑵐', '𑵙'),
- ('𑶠', '𑶩'),
- ('𑽐', '𑽙'),
- ('𖄰', '𖄹'),
- ('𖩠', '𖩩'),
- ('𖫀', '𖫉'),
- ('𖭐', '𖭙'),
- ('𖵰', '𖵹'),
- ('𜳰', '𜳹'),
- ('𝟎', '𝟿'),
- ('𞅀', '𞅉'),
- ('𞋰', '𞋹'),
- ('𞓰', '𞓹'),
- ('𞗱', '𞗺'),
- ('𞥐', '𞥙'),
- ('🯰', '🯹'),
-];
-
-pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[('🇦', '🇿')];
-
-pub const SINGLE_QUOTE: &'static [(char, char)] = &[('\'', '\'')];
-
-pub const WSEGSPACE: &'static [(char, char)] = &[
- (' ', ' '),
- ('\u{1680}', '\u{1680}'),
- ('\u{2000}', '\u{2006}'),
- ('\u{2008}', '\u{200a}'),
- ('\u{205f}', '\u{205f}'),
- ('\u{3000}', '\u{3000}'),
-];
-
-pub const ZWJ: &'static [(char, char)] = &[('\u{200d}', '\u{200d}')];
diff --git a/vendor/regex-syntax/src/utf8.rs b/vendor/regex-syntax/src/utf8.rs
deleted file mode 100644
index 69d74945..00000000
--- a/vendor/regex-syntax/src/utf8.rs
+++ /dev/null
@@ -1,592 +0,0 @@
-/*!
-Converts ranges of Unicode scalar values to equivalent ranges of UTF-8 bytes.
-
-This is sub-module is useful for constructing byte based automatons that need
-to embed UTF-8 decoding. The most common use of this module is in conjunction
-with the [`hir::ClassUnicodeRange`](crate::hir::ClassUnicodeRange) type.
-
-See the documentation on the `Utf8Sequences` iterator for more details and
-an example.
-
-# Wait, what is this?
-
-This is simplest to explain with an example. Let's say you wanted to test
-whether a particular byte sequence was a Cyrillic character. One possible
-scalar value range is `[0400-04FF]`. The set of allowed bytes for this
-range can be expressed as a sequence of byte ranges:
-
-```text
-[D0-D3][80-BF]
-```
-
-This is simple enough: simply encode the boundaries, `0400` encodes to
-`D0 80` and `04FF` encodes to `D3 BF`, and create ranges from each
-corresponding pair of bytes: `D0` to `D3` and `80` to `BF`.
-
-However, what if you wanted to add the Cyrillic Supplementary characters to
-your range? Your range might then become `[0400-052F]`. The same procedure
-as above doesn't quite work because `052F` encodes to `D4 AF`. The byte ranges
-you'd get from the previous transformation would be `[D0-D4][80-AF]`. However,
-this isn't quite correct because this range doesn't capture many characters,
-for example, `04FF` (because its last byte, `BF` isn't in the range `80-AF`).
-
-Instead, you need multiple sequences of byte ranges:
-
-```text
-[D0-D3][80-BF] # matches codepoints 0400-04FF
-[D4][80-AF] # matches codepoints 0500-052F
-```
-
-This gets even more complicated if you want bigger ranges, particularly if
-they naively contain surrogate codepoints. For example, the sequence of byte
-ranges for the basic multilingual plane (`[0000-FFFF]`) look like this:
-
-```text
-[0-7F]
-[C2-DF][80-BF]
-[E0][A0-BF][80-BF]
-[E1-EC][80-BF][80-BF]
-[ED][80-9F][80-BF]
-[EE-EF][80-BF][80-BF]
-```
-
-Note that the byte ranges above will *not* match any erroneous encoding of
-UTF-8, including encodings of surrogate codepoints.
-
-And, of course, for all of Unicode (`[000000-10FFFF]`):
-
-```text
-[0-7F]
-[C2-DF][80-BF]
-[E0][A0-BF][80-BF]
-[E1-EC][80-BF][80-BF]
-[ED][80-9F][80-BF]
-[EE-EF][80-BF][80-BF]
-[F0][90-BF][80-BF][80-BF]
-[F1-F3][80-BF][80-BF][80-BF]
-[F4][80-8F][80-BF][80-BF]
-```
-
-This module automates the process of creating these byte ranges from ranges of
-Unicode scalar values.
-
-# Lineage
-
-I got the idea and general implementation strategy from Russ Cox in his
-[article on regexps](https://web.archive.org/web/20160404141123/https://swtch.com/~rsc/regexp/regexp3.html) and RE2.
-Russ Cox got it from Ken Thompson's `grep` (no source, folk lore?).
-I also got the idea from
-[Lucene](https://github.com/apache/lucene-solr/blob/ae93f4e7ac6a3908046391de35d4f50a0d3c59ca/lucene/core/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java),
-which uses it for executing automata on their term index.
-*/
-
-use core::{char, fmt, iter::FusedIterator, slice};
-
-use alloc::{vec, vec::Vec};
-
-const MAX_UTF8_BYTES: usize = 4;
-
-/// Utf8Sequence represents a sequence of byte ranges.
-///
-/// To match a Utf8Sequence, a candidate byte sequence must match each
-/// successive range.
-///
-/// For example, if there are two ranges, `[C2-DF][80-BF]`, then the byte
-/// sequence `\xDD\x61` would not match because `0x61 < 0x80`.
-#[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
-pub enum Utf8Sequence {
- /// One byte range.
- One(Utf8Range),
- /// Two successive byte ranges.
- Two([Utf8Range; 2]),
- /// Three successive byte ranges.
- Three([Utf8Range; 3]),
- /// Four successive byte ranges.
- Four([Utf8Range; 4]),
-}
-
-impl Utf8Sequence {
- /// Creates a new UTF-8 sequence from the encoded bytes of a scalar value
- /// range.
- ///
- /// This assumes that `start` and `end` have the same length.
- fn from_encoded_range(start: &[u8], end: &[u8]) -> Self {
- assert_eq!(start.len(), end.len());
- match start.len() {
- 2 => Utf8Sequence::Two([
- Utf8Range::new(start[0], end[0]),
- Utf8Range::new(start[1], end[1]),
- ]),
- 3 => Utf8Sequence::Three([
- Utf8Range::new(start[0], end[0]),
- Utf8Range::new(start[1], end[1]),
- Utf8Range::new(start[2], end[2]),
- ]),
- 4 => Utf8Sequence::Four([
- Utf8Range::new(start[0], end[0]),
- Utf8Range::new(start[1], end[1]),
- Utf8Range::new(start[2], end[2]),
- Utf8Range::new(start[3], end[3]),
- ]),
- n => unreachable!("invalid encoded length: {}", n),
- }
- }
-
- /// Returns the underlying sequence of byte ranges as a slice.
- pub fn as_slice(&self) -> &[Utf8Range] {
- use self::Utf8Sequence::*;
- match *self {
- One(ref r) => slice::from_ref(r),
- Two(ref r) => &r[..],
- Three(ref r) => &r[..],
- Four(ref r) => &r[..],
- }
- }
-
- /// Returns the number of byte ranges in this sequence.
- ///
- /// The length is guaranteed to be in the closed interval `[1, 4]`.
- pub fn len(&self) -> usize {
- self.as_slice().len()
- }
-
- /// Reverses the ranges in this sequence.
- ///
- /// For example, if this corresponds to the following sequence:
- ///
- /// ```text
- /// [D0-D3][80-BF]
- /// ```
- ///
- /// Then after reversal, it will be
- ///
- /// ```text
- /// [80-BF][D0-D3]
- /// ```
- ///
- /// This is useful when one is constructing a UTF-8 automaton to match
- /// character classes in reverse.
- pub fn reverse(&mut self) {
- match *self {
- Utf8Sequence::One(_) => {}
- Utf8Sequence::Two(ref mut x) => x.reverse(),
- Utf8Sequence::Three(ref mut x) => x.reverse(),
- Utf8Sequence::Four(ref mut x) => x.reverse(),
- }
- }
-
- /// Returns true if and only if a prefix of `bytes` matches this sequence
- /// of byte ranges.
- pub fn matches(&self, bytes: &[u8]) -> bool {
- if bytes.len() < self.len() {
- return false;
- }
- for (&b, r) in bytes.iter().zip(self) {
- if !r.matches(b) {
- return false;
- }
- }
- true
- }
-}
-
-impl<'a> IntoIterator for &'a Utf8Sequence {
- type IntoIter = slice::Iter<'a, Utf8Range>;
- type Item = &'a Utf8Range;
-
- fn into_iter(self) -> Self::IntoIter {
- self.as_slice().iter()
- }
-}
-
-impl fmt::Debug for Utf8Sequence {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- use self::Utf8Sequence::*;
- match *self {
- One(ref r) => write!(f, "{:?}", r),
- Two(ref r) => write!(f, "{:?}{:?}", r[0], r[1]),
- Three(ref r) => write!(f, "{:?}{:?}{:?}", r[0], r[1], r[2]),
- Four(ref r) => {
- write!(f, "{:?}{:?}{:?}{:?}", r[0], r[1], r[2], r[3])
- }
- }
- }
-}
-
-/// A single inclusive range of UTF-8 bytes.
-#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)]
-pub struct Utf8Range {
- /// Start of byte range (inclusive).
- pub start: u8,
- /// End of byte range (inclusive).
- pub end: u8,
-}
-
-impl Utf8Range {
- fn new(start: u8, end: u8) -> Self {
- Utf8Range { start, end }
- }
-
- /// Returns true if and only if the given byte is in this range.
- pub fn matches(&self, b: u8) -> bool {
- self.start <= b && b <= self.end
- }
-}
-
-impl fmt::Debug for Utf8Range {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- if self.start == self.end {
- write!(f, "[{:X}]", self.start)
- } else {
- write!(f, "[{:X}-{:X}]", self.start, self.end)
- }
- }
-}
-
-/// An iterator over ranges of matching UTF-8 byte sequences.
-///
-/// The iteration represents an alternation of comprehensive byte sequences
-/// that match precisely the set of UTF-8 encoded scalar values.
-///
-/// A byte sequence corresponds to one of the scalar values in the range given
-/// if and only if it completely matches exactly one of the sequences of byte
-/// ranges produced by this iterator.
-///
-/// Each sequence of byte ranges matches a unique set of bytes. That is, no two
-/// sequences will match the same bytes.
-///
-/// # Example
-///
-/// This shows how to match an arbitrary byte sequence against a range of
-/// scalar values.
-///
-/// ```rust
-/// use regex_syntax::utf8::{Utf8Sequences, Utf8Sequence};
-///
-/// fn matches(seqs: &[Utf8Sequence], bytes: &[u8]) -> bool {
-/// for range in seqs {
-/// if range.matches(bytes) {
-/// return true;
-/// }
-/// }
-/// false
-/// }
-///
-/// // Test the basic multilingual plane.
-/// let seqs: Vec<_> = Utf8Sequences::new('\u{0}', '\u{FFFF}').collect();
-///
-/// // UTF-8 encoding of 'a'.
-/// assert!(matches(&seqs, &[0x61]));
-/// // UTF-8 encoding of '☃' (`\u{2603}`).
-/// assert!(matches(&seqs, &[0xE2, 0x98, 0x83]));
-/// // UTF-8 encoding of `\u{10348}` (outside the BMP).
-/// assert!(!matches(&seqs, &[0xF0, 0x90, 0x8D, 0x88]));
-/// // Tries to match against a UTF-8 encoding of a surrogate codepoint,
-/// // which is invalid UTF-8, and therefore fails, despite the fact that
-/// // the corresponding codepoint (0xD800) falls in the range given.
-/// assert!(!matches(&seqs, &[0xED, 0xA0, 0x80]));
-/// // And fails against plain old invalid UTF-8.
-/// assert!(!matches(&seqs, &[0xFF, 0xFF]));
-/// ```
-///
-/// If this example seems circuitous, that's because it is! It's meant to be
-/// illustrative. In practice, you could just try to decode your byte sequence
-/// and compare it with the scalar value range directly. However, this is not
-/// always possible (for example, in a byte based automaton).
-#[derive(Debug)]
-pub struct Utf8Sequences {
- range_stack: Vec<ScalarRange>,
-}
-
-impl Utf8Sequences {
- /// Create a new iterator over UTF-8 byte ranges for the scalar value range
- /// given.
- pub fn new(start: char, end: char) -> Self {
- let range =
- ScalarRange { start: u32::from(start), end: u32::from(end) };
- Utf8Sequences { range_stack: vec![range] }
- }
-
- /// reset resets the scalar value range.
- /// Any existing state is cleared, but resources may be reused.
- ///
- /// N.B. Benchmarks say that this method is dubious.
- #[doc(hidden)]
- pub fn reset(&mut self, start: char, end: char) {
- self.range_stack.clear();
- self.push(u32::from(start), u32::from(end));
- }
-
- fn push(&mut self, start: u32, end: u32) {
- self.range_stack.push(ScalarRange { start, end });
- }
-}
-
-struct ScalarRange {
- start: u32,
- end: u32,
-}
-
-impl fmt::Debug for ScalarRange {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- write!(f, "ScalarRange({:X}, {:X})", self.start, self.end)
- }
-}
-
-impl Iterator for Utf8Sequences {
- type Item = Utf8Sequence;
-
- fn next(&mut self) -> Option<Self::Item> {
- 'TOP: while let Some(mut r) = self.range_stack.pop() {
- 'INNER: loop {
- if let Some((r1, r2)) = r.split() {
- self.push(r2.start, r2.end);
- r.start = r1.start;
- r.end = r1.end;
- continue 'INNER;
- }
- if !r.is_valid() {
- continue 'TOP;
- }
- for i in 1..MAX_UTF8_BYTES {
- let max = max_scalar_value(i);
- if r.start <= max && max < r.end {
- self.push(max + 1, r.end);
- r.end = max;
- continue 'INNER;
- }
- }
- if let Some(ascii_range) = r.as_ascii() {
- return Some(Utf8Sequence::One(ascii_range));
- }
- for i in 1..MAX_UTF8_BYTES {
- let m = (1 << (6 * i)) - 1;
- if (r.start & !m) != (r.end & !m) {
- if (r.start & m) != 0 {
- self.push((r.start | m) + 1, r.end);
- r.end = r.start | m;
- continue 'INNER;
- }
- if (r.end & m) != m {
- self.push(r.end & !m, r.end);
- r.end = (r.end & !m) - 1;
- continue 'INNER;
- }
- }
- }
- let mut start = [0; MAX_UTF8_BYTES];
- let mut end = [0; MAX_UTF8_BYTES];
- let n = r.encode(&mut start, &mut end);
- return Some(Utf8Sequence::from_encoded_range(
- &start[0..n],
- &end[0..n],
- ));
- }
- }
- None
- }
-}
-
-impl FusedIterator for Utf8Sequences {}
-
-impl ScalarRange {
- /// split splits this range if it overlaps with a surrogate codepoint.
- ///
- /// Either or both ranges may be invalid.
- fn split(&self) -> Option<(ScalarRange, ScalarRange)> {
- if self.start < 0xE000 && self.end > 0xD7FF {
- Some((
- ScalarRange { start: self.start, end: 0xD7FF },
- ScalarRange { start: 0xE000, end: self.end },
- ))
- } else {
- None
- }
- }
-
- /// is_valid returns true if and only if start <= end.
- fn is_valid(&self) -> bool {
- self.start <= self.end
- }
-
- /// as_ascii returns this range as a Utf8Range if and only if all scalar
- /// values in this range can be encoded as a single byte.
- fn as_ascii(&self) -> Option<Utf8Range> {
- if self.is_ascii() {
- let start = u8::try_from(self.start).unwrap();
- let end = u8::try_from(self.end).unwrap();
- Some(Utf8Range::new(start, end))
- } else {
- None
- }
- }
-
- /// is_ascii returns true if the range is ASCII only (i.e., takes a single
- /// byte to encode any scalar value).
- fn is_ascii(&self) -> bool {
- self.is_valid() && self.end <= 0x7f
- }
-
- /// encode writes the UTF-8 encoding of the start and end of this range
- /// to the corresponding destination slices, and returns the number of
- /// bytes written.
- ///
- /// The slices should have room for at least `MAX_UTF8_BYTES`.
- fn encode(&self, start: &mut [u8], end: &mut [u8]) -> usize {
- let cs = char::from_u32(self.start).unwrap();
- let ce = char::from_u32(self.end).unwrap();
- let ss = cs.encode_utf8(start);
- let se = ce.encode_utf8(end);
- assert_eq!(ss.len(), se.len());
- ss.len()
- }
-}
-
-fn max_scalar_value(nbytes: usize) -> u32 {
- match nbytes {
- 1 => 0x007F,
- 2 => 0x07FF,
- 3 => 0xFFFF,
- 4 => 0x0010_FFFF,
- _ => unreachable!("invalid UTF-8 byte sequence size"),
- }
-}
-
-#[cfg(test)]
-mod tests {
- use core::char;
-
- use alloc::{vec, vec::Vec};
-
- use crate::utf8::{Utf8Range, Utf8Sequences};
-
- fn rutf8(s: u8, e: u8) -> Utf8Range {
- Utf8Range::new(s, e)
- }
-
- fn never_accepts_surrogate_codepoints(start: char, end: char) {
- for cp in 0xD800..0xE000 {
- let buf = encode_surrogate(cp);
- for r in Utf8Sequences::new(start, end) {
- if r.matches(&buf) {
- panic!(
- "Sequence ({:X}, {:X}) contains range {:?}, \
- which matches surrogate code point {:X} \
- with encoded bytes {:?}",
- u32::from(start),
- u32::from(end),
- r,
- cp,
- buf,
- );
- }
- }
- }
- }
-
- #[test]
- fn codepoints_no_surrogates() {
- never_accepts_surrogate_codepoints('\u{0}', '\u{FFFF}');
- never_accepts_surrogate_codepoints('\u{0}', '\u{10FFFF}');
- never_accepts_surrogate_codepoints('\u{0}', '\u{10FFFE}');
- never_accepts_surrogate_codepoints('\u{80}', '\u{10FFFF}');
- never_accepts_surrogate_codepoints('\u{D7FF}', '\u{E000}');
- }
-
- #[test]
- fn single_codepoint_one_sequence() {
- // Tests that every range of scalar values that contains a single
- // scalar value is recognized by one sequence of byte ranges.
- for i in 0x0..=0x0010_FFFF {
- let c = match char::from_u32(i) {
- None => continue,
- Some(c) => c,
- };
- let seqs: Vec<_> = Utf8Sequences::new(c, c).collect();
- assert_eq!(seqs.len(), 1);
- }
- }
-
- #[test]
- fn bmp() {
- use crate::utf8::Utf8Sequence::*;
-
- let seqs = Utf8Sequences::new('\u{0}', '\u{FFFF}').collect::<Vec<_>>();
- assert_eq!(
- seqs,
- vec![
- One(rutf8(0x0, 0x7F)),
- Two([rutf8(0xC2, 0xDF), rutf8(0x80, 0xBF)]),
- Three([
- rutf8(0xE0, 0xE0),
- rutf8(0xA0, 0xBF),
- rutf8(0x80, 0xBF)
- ]),
- Three([
- rutf8(0xE1, 0xEC),
- rutf8(0x80, 0xBF),
- rutf8(0x80, 0xBF)
- ]),
- Three([
- rutf8(0xED, 0xED),
- rutf8(0x80, 0x9F),
- rutf8(0x80, 0xBF)
- ]),
- Three([
- rutf8(0xEE, 0xEF),
- rutf8(0x80, 0xBF),
- rutf8(0x80, 0xBF)
- ]),
- ]
- );
- }
-
- #[test]
- fn reverse() {
- use crate::utf8::Utf8Sequence::*;
-
- let mut s = One(rutf8(0xA, 0xB));
- s.reverse();
- assert_eq!(s.as_slice(), &[rutf8(0xA, 0xB)]);
-
- let mut s = Two([rutf8(0xA, 0xB), rutf8(0xB, 0xC)]);
- s.reverse();
- assert_eq!(s.as_slice(), &[rutf8(0xB, 0xC), rutf8(0xA, 0xB)]);
-
- let mut s = Three([rutf8(0xA, 0xB), rutf8(0xB, 0xC), rutf8(0xC, 0xD)]);
- s.reverse();
- assert_eq!(
- s.as_slice(),
- &[rutf8(0xC, 0xD), rutf8(0xB, 0xC), rutf8(0xA, 0xB)]
- );
-
- let mut s = Four([
- rutf8(0xA, 0xB),
- rutf8(0xB, 0xC),
- rutf8(0xC, 0xD),
- rutf8(0xD, 0xE),
- ]);
- s.reverse();
- assert_eq!(
- s.as_slice(),
- &[
- rutf8(0xD, 0xE),
- rutf8(0xC, 0xD),
- rutf8(0xB, 0xC),
- rutf8(0xA, 0xB)
- ]
- );
- }
-
- fn encode_surrogate(cp: u32) -> [u8; 3] {
- const TAG_CONT: u8 = 0b1000_0000;
- const TAG_THREE_B: u8 = 0b1110_0000;
-
- assert!(0xD800 <= cp && cp < 0xE000);
- let mut dst = [0; 3];
- dst[0] = u8::try_from(cp >> 12 & 0x0F).unwrap() | TAG_THREE_B;
- dst[1] = u8::try_from(cp >> 6 & 0x3F).unwrap() | TAG_CONT;
- dst[2] = u8::try_from(cp & 0x3F).unwrap() | TAG_CONT;
- dst
- }
-}
diff --git a/vendor/regex-syntax/test b/vendor/regex-syntax/test
deleted file mode 100755
index 8626c3bf..00000000
--- a/vendor/regex-syntax/test
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-
-set -e
-
-# cd to the directory containing this crate's Cargo.toml so that we don't need
-# to pass --manifest-path to every `cargo` command.
-cd "$(dirname "$0")"
-
-# This is a convenience script for running a broad swath of the syntax tests.
-echo "===== DEFAULT FEATURES ==="
-cargo test
-
-features=(
- std
- unicode
- unicode-age
- unicode-bool
- unicode-case
- unicode-gencat
- unicode-perl
- unicode-script
- unicode-segment
-)
-for f in "${features[@]}"; do
- echo "=== FEATURE: $f ==="
- # We only run library tests because I couldn't figure out how to easily
- # make doc tests run in 'no_std' mode. In particular, without the Error
- # trait, using '?' in doc tests seems tricky.
- cargo test --no-default-features --lib --features "$f"
-done