summaryrefslogtreecommitdiff
path: root/vendor/logos-codegen
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2025-07-02 18:36:06 -0600
committermo khan <mo@mokhan.ca>2025-07-02 18:36:06 -0600
commit8cdfa445d6629ffef4cb84967ff7017654045bc2 (patch)
tree22f0b0907c024c78d26a731e2e1f5219407d8102 /vendor/logos-codegen
parent4351c74c7c5f97156bc94d3a8549b9940ac80e3f (diff)
chore: add vendor directory
Diffstat (limited to 'vendor/logos-codegen')
-rw-r--r--vendor/logos-codegen/.cargo-checksum.json1
-rw-r--r--vendor/logos-codegen/Cargo.toml90
-rw-r--r--vendor/logos-codegen/LICENSE-APACHE201
-rw-r--r--vendor/logos-codegen/LICENSE-MIT21
-rw-r--r--vendor/logos-codegen/README.md115
-rw-r--r--vendor/logos-codegen/build.rs21
-rw-r--r--vendor/logos-codegen/src/error.rs110
-rw-r--r--vendor/logos-codegen/src/generator/context.rs128
-rw-r--r--vendor/logos-codegen/src/generator/fork.rs216
-rw-r--r--vendor/logos-codegen/src/generator/leaf.rs67
-rw-r--r--vendor/logos-codegen/src/generator/mod.rs268
-rw-r--r--vendor/logos-codegen/src/generator/rope.rs39
-rw-r--r--vendor/logos-codegen/src/generator/tables.rs77
-rw-r--r--vendor/logos-codegen/src/graph/fork.rs267
-rw-r--r--vendor/logos-codegen/src/graph/impls.rs220
-rw-r--r--vendor/logos-codegen/src/graph/meta.rs174
-rw-r--r--vendor/logos-codegen/src/graph/mod.rs566
-rw-r--r--vendor/logos-codegen/src/graph/range.rs144
-rw-r--r--vendor/logos-codegen/src/graph/regex.rs295
-rw-r--r--vendor/logos-codegen/src/graph/rope.rs330
-rw-r--r--vendor/logos-codegen/src/leaf.rs118
-rw-r--r--vendor/logos-codegen/src/lib.rs391
-rw-r--r--vendor/logos-codegen/src/macros.rs12
-rw-r--r--vendor/logos-codegen/src/mir.rs235
-rw-r--r--vendor/logos-codegen/src/parser/definition.rs193
-rw-r--r--vendor/logos-codegen/src/parser/ignore_flags.rs499
-rw-r--r--vendor/logos-codegen/src/parser/mod.rs331
-rw-r--r--vendor/logos-codegen/src/parser/nested.rs146
-rw-r--r--vendor/logos-codegen/src/parser/subpattern.rs97
-rw-r--r--vendor/logos-codegen/src/parser/type_params.rs200
-rw-r--r--vendor/logos-codegen/src/util.rs64
-rw-r--r--vendor/logos-codegen/tests/codegen.rs31
-rw-r--r--vendor/logos-codegen/tests/data/no_error_lut/input.rs10
-rw-r--r--vendor/logos-codegen/tests/data/no_error_lut/output.rs1
-rw-r--r--vendor/logos-codegen/tests/data/simple/input.rs5
-rw-r--r--vendor/logos-codegen/tests/data/simple/output.rs1
36 files changed, 5684 insertions, 0 deletions
diff --git a/vendor/logos-codegen/.cargo-checksum.json b/vendor/logos-codegen/.cargo-checksum.json
new file mode 100644
index 00000000..97e0ce68
--- /dev/null
+++ b/vendor/logos-codegen/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"Cargo.toml":"a7610e627353e99aeaf86ef36a9ba33d5971b36cccd703e6913435e3ed9ae3d3","LICENSE-APACHE":"f30735c11407534952947e1c7b7457ddf28847000bb038402495ad66f3d020a4","LICENSE-MIT":"112fcdb9f4935988cc2313e4cc38faaeccfff53eb1296499e618932d472908e0","README.md":"da4edde5433a520a89358b90f5d916c78dcff39f4131245e9eefedbb7daca5d8","build.rs":"9a5db937e9f7068b5b32b9dfb4468d3d69ffb3efc0866fda63093482253f1883","src/error.rs":"440d20528db9a76b97de1b30f2c59a570a59368b1cd8aa70f8fc2e09bcfdd3a7","src/generator/context.rs":"46f39acab8d9d70cfad4428acdb3c05bf05f48412e3ae977938bd3a3b9521d2b","src/generator/fork.rs":"6e81913f421972fed784e66773073d63a5ca57b307c5676e6cc8c079c11e475b","src/generator/leaf.rs":"51277af712aecd2d396de902b9700e2e944d63285f483fb88ad5080dc9195251","src/generator/mod.rs":"12d31fbbe83cac553ffca7d4f437d3b4b1ab2272ee82e9048957b26cf3b5c7fd","src/generator/rope.rs":"a3e532ce1ff77d39a3746f0a61b533e94b8231d22d7abac30ecb0f40b9a7d4fd","src/generator/tables.rs":"8acf6de12f74a633fd1dc952775b55cf4107015c9aa39681cceb7788c217b30d","src/graph/fork.rs":"870688f1cbede6443755f3476d9d632024baf5b3176ad40e4e0f15b149df9c68","src/graph/impls.rs":"ec5717d68cc509b938671d361e062313176b9433bd96f8eab41cf5e57e28fc2e","src/graph/meta.rs":"aceb3fb5370aaa2daea615bb1f2e7ee841f4cd450493e4b3c4137feba60c3d09","src/graph/mod.rs":"135c5994c7f5453b12c06905bdf64eadcad61856a2ebc70bbca30889df499910","src/graph/range.rs":"4f66fa53ed793582c9f4afa8832be51ad73e87a91d0469033d76d1f29efb767f","src/graph/regex.rs":"f1a44b0d791362a9f1b32f5919ff9b5fe511f20052856de367858f38993583f4","src/graph/rope.rs":"ab5995e5912fb33b65b9dad2cffa315a3483b9123bd865a2fd4f6edf24fe0c5b","src/leaf.rs":"b0efac6ac811460001a114cc3c1cfec1bdf15c307ee1daf8c8729569acdddbf3","src/lib.rs":"95c41890714c12817acdf3ef1a31b8cc3c510886e1f6ad961c45a873e1011c2f","src/macros.rs":"02c06363993f13e78ce9aa8bcea597020b0ca40c2993a2e99207d87f72526e3e","src/mir.rs":"7ad2c77253324e6506e9a2c5b0f749339b5b5e1ebb91072f769d89c527e67e53","src/parser/definition.rs":"53f7d401cbe7548cec2e6ea79d966d5f27f52d36fc313e4aca25ea78a07c62e8","src/parser/ignore_flags.rs":"53493872f553139011153629b5ce9990e9195e0599ef6219ee1f58ec05545f93","src/parser/mod.rs":"d825ef182b3a6a8e020ef8d659038626399dab5fb17ee8737f7e1d52744eaffb","src/parser/nested.rs":"bac7c8bebf99a28a2148edc44972d642e1ab0310d54ff894248299c3264e74bc","src/parser/subpattern.rs":"06b41ede8464ecdfbed441bf60eb131e02a60f8043bea206670f1cc8163f35b0","src/parser/type_params.rs":"1d0725c103286a0c1b390238c5df722aa743fe05499fa00cd38f8b29f853f2ee","src/util.rs":"13f9f868c4c5bc54aea374dc51613da218cd120c49d4277445d9be62c9ce06e6","tests/codegen.rs":"34f58b400a1f519c69511f9d2b549d7e6205dde553d9756dfd028bbf87bd34fd","tests/data/no_error_lut/input.rs":"066a28b7a5128cf381d1df5697480733f763d9596b105b3e31ce5b8435363b03","tests/data/no_error_lut/output.rs":"0fbf3b00d0f0631f9a0f5847aeed7997c62440cd03eb5d52264eb0174da1b89a","tests/data/simple/input.rs":"78681c1f6073462c78f2b98d101bc3ffbbc136470c88aba392ec0282becaa772","tests/data/simple/output.rs":"fd3d86bda51afb59272970864d43b997730a28f587ddc2a0e96cd854b3c930a9"},"package":"189bbfd0b61330abea797e5e9276408f2edbe4f822d7ad08685d67419aafb34e"} \ No newline at end of file
diff --git a/vendor/logos-codegen/Cargo.toml b/vendor/logos-codegen/Cargo.toml
new file mode 100644
index 00000000..9f8cf5bb
--- /dev/null
+++ b/vendor/logos-codegen/Cargo.toml
@@ -0,0 +1,90 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+rust-version = "1.74.0"
+name = "logos-codegen"
+version = "0.15.0"
+authors = [
+ "Maciej Hirsz <hello@maciej.codes>",
+ "Jérome Eertmans (maintainer) <jeertmans@icloud.com>",
+]
+build = "build.rs"
+autolib = false
+autobins = false
+autoexamples = false
+autotests = false
+autobenches = false
+description = "Create ridiculously fast Lexers"
+homepage = "https://logos.maciej.codes/"
+readme = "README.md"
+keywords = [
+ "lexer",
+ "lexical",
+ "tokenizer",
+ "parser",
+ "no_std",
+]
+categories = [
+ "parsing",
+ "text-processing",
+]
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/maciejhirsz/logos"
+
+[package.metadata.release]
+shared-version = true
+
+[lib]
+name = "logos_codegen"
+path = "src/lib.rs"
+bench = false
+
+[[test]]
+name = "codegen"
+path = "tests/codegen.rs"
+
+[dependencies.beef]
+version = "0.5.0"
+
+[dependencies.fnv]
+version = "1.0.6"
+
+[dependencies.lazy_static]
+version = "1.4.0"
+
+[dependencies.proc-macro2]
+version = "1.0.9"
+
+[dependencies.quote]
+version = "1.0.3"
+
+[dependencies.regex-syntax]
+version = "0.8.2"
+
+[dependencies.syn]
+version = "2.0.13"
+features = ["full"]
+
+[dev-dependencies.pretty_assertions]
+version = "1.4.0"
+
+[dev-dependencies.rstest]
+version = "0.23.0"
+
+[build-dependencies.rustc_version]
+version = "0.4.1"
+
+[features]
+debug = []
+forbid_unsafe = []
+fuzzing = []
diff --git a/vendor/logos-codegen/LICENSE-APACHE b/vendor/logos-codegen/LICENSE-APACHE
new file mode 100644
index 00000000..c6592a3f
--- /dev/null
+++ b/vendor/logos-codegen/LICENSE-APACHE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+Copyright 2018 Maciej Hirsz <maciej.hirsz@gmail.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/vendor/logos-codegen/LICENSE-MIT b/vendor/logos-codegen/LICENSE-MIT
new file mode 100644
index 00000000..e22f0271
--- /dev/null
+++ b/vendor/logos-codegen/LICENSE-MIT
@@ -0,0 +1,21 @@
+Copyright (c) 2018 Maciej Hirsz <maciej.hirsz@gmail.com>
+
+The MIT License (MIT)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/logos-codegen/README.md b/vendor/logos-codegen/README.md
new file mode 100644
index 00000000..ba0937ff
--- /dev/null
+++ b/vendor/logos-codegen/README.md
@@ -0,0 +1,115 @@
+<img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right">
+
+# Logos
+
+![Test](https://github.com/maciejhirsz/logos/workflows/Test/badge.svg?branch=master)
+[![Crates.io version shield](https://img.shields.io/crates/v/logos.svg)](https://crates.io/crates/logos)
+[![Docs](https://docs.rs/logos/badge.svg)](https://docs.rs/logos)
+[![Crates.io license shield](https://img.shields.io/crates/l/logos.svg)](https://crates.io/crates/logos)
+
+_Create ridiculously fast Lexers._
+
+**Logos** has two goals:
+
++ To make it easy to create a Lexer, so you can focus on more complex problems.
++ To make the generated Lexer faster than anything you'd write by hand.
+
+To achieve those, **Logos**:
+
++ Combines all token definitions into a single [deterministic state machine](https://en.wikipedia.org/wiki/Deterministic_finite_automaton).
++ Optimizes branches into [lookup tables](https://en.wikipedia.org/wiki/Lookup_table) or [jump tables](https://en.wikipedia.org/wiki/Branch_table).
++ Prevents [backtracking](https://en.wikipedia.org/wiki/ReDoS) inside token definitions.
++ [Unwinds loops](https://en.wikipedia.org/wiki/Loop_unrolling), and batches reads to minimize bounds checking.
++ Does all of that heavy lifting at compile time.
+
+## Example
+
+```rust
+use logos::Logos;
+
+#[derive(Logos, Debug, PartialEq)]
+#[logos(skip r"[ \t\n\f]+")] // Ignore this regex pattern between tokens
+enum Token {
+ // Tokens can be literal strings, of any length.
+ #[token("fast")]
+ Fast,
+
+ #[token(".")]
+ Period,
+
+ // Or regular expressions.
+ #[regex("[a-zA-Z]+")]
+ Text,
+}
+
+fn main() {
+ let mut lex = Token::lexer("Create ridiculously fast Lexers.");
+
+ assert_eq!(lex.next(), Some(Ok(Token::Text)));
+ assert_eq!(lex.span(), 0..6);
+ assert_eq!(lex.slice(), "Create");
+
+ assert_eq!(lex.next(), Some(Ok(Token::Text)));
+ assert_eq!(lex.span(), 7..19);
+ assert_eq!(lex.slice(), "ridiculously");
+
+ assert_eq!(lex.next(), Some(Ok(Token::Fast)));
+ assert_eq!(lex.span(), 20..24);
+ assert_eq!(lex.slice(), "fast");
+
+ assert_eq!(lex.next(), Some(Ok(Token::Text)));
+ assert_eq!(lex.slice(), "Lexers");
+ assert_eq!(lex.span(), 25..31);
+
+ assert_eq!(lex.next(), Some(Ok(Token::Period)));
+ assert_eq!(lex.span(), 31..32);
+ assert_eq!(lex.slice(), ".");
+
+ assert_eq!(lex.next(), None);
+}
+```
+
+For more examples and documentation, please refer to the
+[Logos handbook](https://maciejhirsz.github.io/logos/) or the
+[crate documentation](https://docs.rs/logos/latest/logos/).
+
+## How fast?
+
+Ridiculously fast!
+
+```norust
+test identifiers ... bench: 647 ns/iter (+/- 27) = 1204 MB/s
+test keywords_operators_and_punctators ... bench: 2,054 ns/iter (+/- 78) = 1037 MB/s
+test strings ... bench: 553 ns/iter (+/- 34) = 1575 MB/s
+```
+
+## Acknowledgements
+
++ [Pedrors](https://pedrors.pt/) for the **Logos** logo.
+
+## Thank you
+
+**Logos** is very much a labor of love. If you find it useful, consider
+[getting me some coffee](https://github.com/sponsors/maciejhirsz). ☕
+
+If you'd like to contribute to Logos, then consider reading the
+[Contributing guide](https://maciejhirsz.github.io/logos/contributing).
+
+## Contributing
+
+**Logos** welcome any kind of contribution: bug reports, suggestions,
+or new features!
+
+Please use the
+[issues](https://github.com/maciejhirsz/logos/issues) or
+[pull requests](https://github.com/maciejhirsz/logos/pulls) tabs,
+when appropriate.
+
+To release a new version, follow the [RELEASE-PROCESS](RELEASE-PROCESS.md)
+
+## License
+
+This code is distributed under the terms of both the MIT license
+and the Apache License (Version 2.0), choose whatever works for you.
+
+See [LICENSE-APACHE](LICENSE-APACHE) and [LICENSE-MIT](LICENSE-MIT) for details.
diff --git a/vendor/logos-codegen/build.rs b/vendor/logos-codegen/build.rs
new file mode 100644
index 00000000..f11a575f
--- /dev/null
+++ b/vendor/logos-codegen/build.rs
@@ -0,0 +1,21 @@
+use rustc_version::{version_meta, Version};
+
+fn main() {
+ let version_meta = version_meta().expect("Could not get Rust version");
+
+ let rustc_version = version_meta.semver;
+ let trimmed_rustc_version = Version::new(
+ rustc_version.major,
+ rustc_version.minor,
+ rustc_version.patch,
+ );
+
+ // Add cfg flag for Rust >= 1.82
+ // Required for precise capturing in edition 2024
+ // Due to changes in lifetime and type capture behavior for impl trait
+ // see: https://github.com/maciejhirsz/logos/issues/434, https://github.com/rust-lang/rfcs/pull/3498
+ println!("cargo:rustc-check-cfg=cfg(rust_1_82)");
+ if trimmed_rustc_version >= Version::new(1, 82, 0) {
+ println!("cargo:rustc-cfg=rust_1_82");
+ }
+}
diff --git a/vendor/logos-codegen/src/error.rs b/vendor/logos-codegen/src/error.rs
new file mode 100644
index 00000000..64f93c01
--- /dev/null
+++ b/vendor/logos-codegen/src/error.rs
@@ -0,0 +1,110 @@
+use std::fmt;
+
+use beef::lean::Cow;
+use proc_macro2::{Span, TokenStream};
+use quote::quote;
+use quote::{quote_spanned, ToTokens, TokenStreamExt};
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+#[derive(Default)]
+pub struct Errors {
+ collected: Vec<SpannedError>,
+}
+
+impl Errors {
+ pub fn err<M>(&mut self, message: M, span: Span) -> &mut Self
+ where
+ M: Into<Cow<'static, str>>,
+ {
+ self.collected.push(SpannedError {
+ message: message.into(),
+ span,
+ });
+
+ self
+ }
+
+ pub fn render(self) -> Option<TokenStream> {
+ let errors = self.collected;
+
+ match errors.len() {
+ 0 => None,
+ _ => Some(quote! {
+ fn _logos_derive_compile_errors() {
+ #(#errors)*
+ }
+ }),
+ }
+ }
+}
+
+pub struct Error(Cow<'static, str>);
+
+#[derive(Debug)]
+pub struct SpannedError {
+ message: Cow<'static, str>,
+ span: Span,
+}
+
+impl Error {
+ pub fn new<M>(message: M) -> Self
+ where
+ M: Into<Cow<'static, str>>,
+ {
+ Error(message.into())
+ }
+
+ pub fn span(self, span: Span) -> SpannedError {
+ SpannedError {
+ message: self.0,
+ span,
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.0.fmt(f)
+ }
+}
+
+impl fmt::Debug for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Display::fmt(self, f)
+ }
+}
+
+impl From<regex_syntax::Error> for Error {
+ fn from(err: regex_syntax::Error) -> Error {
+ Error(err.to_string().into())
+ }
+}
+
+impl From<&'static str> for Error {
+ fn from(err: &'static str) -> Error {
+ Error(err.into())
+ }
+}
+
+impl From<String> for Error {
+ fn from(err: String) -> Error {
+ Error(err.into())
+ }
+}
+
+impl From<Error> for Cow<'static, str> {
+ fn from(err: Error) -> Self {
+ err.0
+ }
+}
+
+impl ToTokens for SpannedError {
+ fn to_tokens(&self, tokens: &mut TokenStream) {
+ let message = &*self.message;
+
+ tokens.append_all(quote_spanned!(self.span => {
+ compile_error!(#message)
+ }))
+ }
+}
diff --git a/vendor/logos-codegen/src/generator/context.rs b/vendor/logos-codegen/src/generator/context.rs
new file mode 100644
index 00000000..dc52f594
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/context.rs
@@ -0,0 +1,128 @@
+use proc_macro2::TokenStream;
+use quote::quote;
+
+use crate::generator::Generator;
+use crate::graph::NodeId;
+
+/// This struct keeps track of bytes available to be read without
+/// bounds checking across the tree.
+///
+/// For example, a branch that matches 4 bytes followed by a fork
+/// with smallest branch containing of 2 bytes can do a bounds check
+/// for 6 bytes ahead, and leave the remaining 2 byte array (fixed size)
+/// to be handled by the fork, avoiding bound checks there.
+#[derive(Default, Clone, Copy, PartialEq, Eq, Hash, Debug)]
+pub struct Context {
+ /// Amount of bytes that haven't been bumped yet but should
+ /// before a new read is performed
+ at: usize,
+ /// Number of bytes available without bound checks
+ available: usize,
+ /// Whether or not the Lexer has been bumped at least by 1 byte
+ bumped: bool,
+ /// Node to backtrack to to in case an explicit match has failed.
+ /// If `None` will instead produce an error token.
+ backtrack: Option<NodeId>,
+}
+
+impl Context {
+ pub fn can_backtrack(&self) -> bool {
+ self.backtrack.is_some()
+ }
+
+ pub fn switch(&mut self, miss: Option<NodeId>) -> Option<TokenStream> {
+ self.backtrack = Some(miss?);
+ self.bump()
+ }
+
+ pub const fn advance(self, n: usize) -> Self {
+ Context {
+ at: self.at + n,
+ ..self
+ }
+ }
+
+ pub fn bump(&mut self) -> Option<TokenStream> {
+ match self.at {
+ 0 => None,
+ n => {
+ let tokens = quote!(lex.bump_unchecked(#n););
+ self.at = 0;
+ self.available = 0;
+ self.bumped = true;
+ Some(tokens)
+ }
+ }
+ }
+
+ pub fn remainder(&self) -> usize {
+ self.available.saturating_sub(self.at)
+ }
+
+ pub fn read_byte(&mut self) -> TokenStream {
+ let at = self.at;
+
+ self.advance(1);
+
+ #[cfg(not(feature = "forbid_unsafe"))]
+ {
+ quote!(unsafe { lex.read_byte_unchecked(#at) })
+ }
+
+ #[cfg(feature = "forbid_unsafe")]
+ {
+ quote!(lex.read_byte(#at))
+ }
+ }
+
+ pub fn read(&mut self, len: usize) -> TokenStream {
+ self.available = len;
+
+ match (self.at, len) {
+ (0, 0) => quote!(lex.read::<u8>()),
+ (a, 0) => quote!(lex.read_at::<u8>(#a)),
+ (0, l) => quote!(lex.read::<&[u8; #l]>()),
+ (a, l) => quote!(lex.read_at::<&[u8; #l]>(#a)),
+ }
+ }
+
+ pub fn wipe(&mut self) {
+ self.available = 0;
+ }
+
+ const fn backtrack(self) -> Self {
+ Context {
+ at: 0,
+ available: 0,
+ bumped: self.bumped,
+ backtrack: None,
+ }
+ }
+
+ pub fn miss(mut self, miss: Option<NodeId>, gen: &mut Generator) -> TokenStream {
+ self.wipe();
+ match (miss, self.backtrack) {
+ (Some(id), _) => gen.goto(id, self).clone(),
+ (_, Some(id)) => gen.goto(id, self.backtrack()).clone(),
+ _ if self.bumped => quote!(lex.error()),
+ _ => quote!(_error(lex)),
+ }
+ }
+
+ pub fn write_suffix(&self, buf: &mut String) {
+ use std::fmt::Write;
+
+ if self.at > 0 {
+ let _ = write!(buf, "_at{}", self.at);
+ }
+ if self.available > 0 {
+ let _ = write!(buf, "_with{}", self.available);
+ }
+ if let Some(id) = self.backtrack {
+ let _ = write!(buf, "_ctx{}", id);
+ }
+ if self.bumped {
+ buf.push_str("_x");
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/generator/fork.rs b/vendor/logos-codegen/src/generator/fork.rs
new file mode 100644
index 00000000..11d4eab2
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/fork.rs
@@ -0,0 +1,216 @@
+use std::cmp::max;
+
+use fnv::FnvHashMap as Map;
+use proc_macro2::TokenStream;
+use quote::quote;
+
+use crate::generator::{Context, Generator};
+use crate::graph::{Fork, NodeId, Range};
+use crate::util::ToIdent;
+
+type Targets = Map<NodeId, Vec<Range>>;
+
+impl Generator<'_> {
+ pub fn generate_fork(&mut self, this: NodeId, fork: &Fork, mut ctx: Context) -> TokenStream {
+ let mut targets: Targets = Map::default();
+
+ for (range, then) in fork.branches() {
+ targets.entry(then).or_default().push(range);
+ }
+ let loops_to_self = self.meta[this].loop_entry_from.contains(&this);
+
+ match targets.len() {
+ 1 if loops_to_self => return self.generate_fast_loop(fork, ctx),
+ 0..=2 => (),
+ _ => return self.generate_fork_jump_table(this, fork, targets, ctx),
+ }
+ let miss = ctx.miss(fork.miss, self);
+ let end = self.fork_end(this, &miss);
+ let (byte, read) = self.fork_read(this, end, &mut ctx);
+ let branches = targets.into_iter().map(|(id, ranges)| {
+ let next = self.goto(id, ctx.advance(1));
+
+ match *ranges {
+ [range] => {
+ quote!(#range => #next,)
+ }
+ [a, b] if a.is_byte() && b.is_byte() => {
+ quote!(#a | #b => #next,)
+ }
+ _ => {
+ let test = self.generate_test(ranges).clone();
+ let next = self.goto(id, ctx.advance(1));
+
+ quote!(byte if #test(byte) => #next,)
+ }
+ }
+ });
+
+ quote! {
+ #read
+
+ match #byte {
+ #(#branches)*
+ _ => #miss,
+ }
+ }
+ }
+
+ fn generate_fork_jump_table(
+ &mut self,
+ this: NodeId,
+ fork: &Fork,
+ targets: Targets,
+ mut ctx: Context,
+ ) -> TokenStream {
+ let miss = ctx.miss(fork.miss, self);
+ let end = self.fork_end(this, &miss);
+ let (byte, read) = self.fork_read(this, end, &mut ctx);
+
+ let mut table: [u8; 256] = [0; 256];
+ let mut jumps = vec!["__".to_ident()];
+
+ let branches = targets
+ .into_iter()
+ .enumerate()
+ .map(|(idx, (id, ranges))| {
+ let idx = (idx as u8) + 1;
+ let next = self.goto(id, ctx.advance(1));
+ jumps.push(format!("J{}", id).to_ident());
+
+ for byte in ranges.into_iter().flatten() {
+ table[byte as usize] = idx;
+ }
+ let jump = jumps.last().unwrap();
+
+ quote!(Jump::#jump => #next,)
+ })
+ .collect::<TokenStream>();
+
+ let may_error = table.iter().any(|&idx| idx == 0);
+
+ let jumps = jumps.as_slice();
+ let table = table.iter().copied().map(|idx| &jumps[idx as usize]);
+
+ let jumps = if may_error { jumps } else { &jumps[1..] };
+ let error_branch = if may_error {
+ Some(quote!(Jump::__ => #miss))
+ } else {
+ None
+ };
+
+ quote! {
+ enum Jump {
+ #(#jumps,)*
+ }
+
+ const LUT: [Jump; 256] = {
+ use Jump::*;
+
+ [#(#table),*]
+ };
+
+ #read
+
+ match LUT[#byte as usize] {
+ #branches
+ #error_branch
+ }
+ }
+ }
+
+ fn fork_end(&self, this: NodeId, miss: &TokenStream) -> TokenStream {
+ if this == self.root {
+ quote!(_end(lex))
+ } else {
+ miss.clone()
+ }
+ }
+
+ fn fork_read(
+ &self,
+ this: NodeId,
+ end: TokenStream,
+ ctx: &mut Context,
+ ) -> (TokenStream, TokenStream) {
+ let min_read = self.meta[this].min_read;
+
+ if ctx.remainder() >= max(min_read, 1) {
+ let read = ctx.read_byte();
+
+ return (quote!(byte), quote!(let byte = #read;));
+ }
+
+ match min_read {
+ 0 | 1 => {
+ let read = ctx.read(0);
+
+ (
+ quote!(byte),
+ quote! {
+ let byte = match #read {
+ Some(byte) => byte,
+ None => return #end,
+ };
+ },
+ )
+ }
+ len => {
+ let read = ctx.read(len);
+
+ (
+ quote!(arr[0]),
+ quote! {
+ let arr = match #read {
+ Some(arr) => arr,
+ None => return #end,
+ };
+ },
+ )
+ }
+ }
+ }
+
+ fn generate_fast_loop(&mut self, fork: &Fork, ctx: Context) -> TokenStream {
+ let miss = ctx.miss(fork.miss, self);
+ let ranges = fork.branches().map(|(range, _)| range).collect::<Vec<_>>();
+ let test = self.generate_test(ranges);
+
+ quote! {
+ _fast_loop!(lex, #test, #miss);
+ }
+ }
+
+ pub fn fast_loop_macro() -> TokenStream {
+ quote! {
+ macro_rules! _fast_loop {
+ ($lex:ident, $test:ident, $miss:expr) => {
+ // Do one bounds check for multiple bytes till EOF
+ while let Some(arr) = $lex.read::<&[u8; 16]>() {
+ if $test(arr[0]) { if $test(arr[1]) { if $test(arr[2]) { if $test(arr[3]) {
+ if $test(arr[4]) { if $test(arr[5]) { if $test(arr[6]) { if $test(arr[7]) {
+ if $test(arr[8]) { if $test(arr[9]) { if $test(arr[10]) { if $test(arr[11]) {
+ if $test(arr[12]) { if $test(arr[13]) { if $test(arr[14]) { if $test(arr[15]) {
+
+ $lex.bump_unchecked(16); continue; } $lex.bump_unchecked(15); return $miss; }
+ $lex.bump_unchecked(14); return $miss; } $lex.bump_unchecked(13); return $miss; }
+ $lex.bump_unchecked(12); return $miss; } $lex.bump_unchecked(11); return $miss; }
+ $lex.bump_unchecked(10); return $miss; } $lex.bump_unchecked(9); return $miss; }
+ $lex.bump_unchecked(8); return $miss; } $lex.bump_unchecked(7); return $miss; }
+ $lex.bump_unchecked(6); return $miss; } $lex.bump_unchecked(5); return $miss; }
+ $lex.bump_unchecked(4); return $miss; } $lex.bump_unchecked(3); return $miss; }
+ $lex.bump_unchecked(2); return $miss; } $lex.bump_unchecked(1); return $miss; }
+
+ return $miss;
+ }
+
+ while $lex.test($test) {
+ $lex.bump_unchecked(1);
+ }
+
+ $miss
+ };
+ }
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/generator/leaf.rs b/vendor/logos-codegen/src/generator/leaf.rs
new file mode 100644
index 00000000..0841a4ff
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/leaf.rs
@@ -0,0 +1,67 @@
+use proc_macro2::TokenStream;
+use quote::quote;
+
+use crate::generator::{Context, Generator};
+use crate::leaf::{Callback, Leaf};
+use crate::util::MaybeVoid;
+
+impl Generator<'_> {
+ pub fn generate_leaf(&mut self, leaf: &Leaf, mut ctx: Context) -> TokenStream {
+ let bump = ctx.bump();
+
+ let ident = &leaf.ident;
+ let name = self.name;
+ let this = self.this;
+ let ty = &leaf.field;
+
+ let constructor = match leaf.field {
+ MaybeVoid::Some(_) => quote!(#name::#ident),
+ MaybeVoid::Void => quote!(|()| #name::#ident),
+ };
+
+ match &leaf.callback {
+ Some(Callback::Label(callback)) => quote! {
+ #bump
+ #callback(lex).construct(#constructor, lex);
+ },
+ Some(Callback::Inline(inline)) => {
+ let arg = &inline.arg;
+ let body = &inline.body;
+
+ #[cfg(not(rust_1_82))]
+ let ret = quote!(impl CallbackResult<'s, #ty, #this>);
+
+ #[cfg(rust_1_82)]
+ let ret = quote!(impl CallbackResult<'s, #ty, #this> + use<'s>);
+
+ quote! {
+ #bump
+
+ #[inline]
+ fn callback<'s>(#arg: &mut Lexer<'s>) -> #ret {
+ #body
+ }
+
+ callback(lex).construct(#constructor, lex);
+ }
+ }
+ Some(Callback::Skip(_)) => {
+ quote! {
+ #bump
+
+ lex.trivia();
+ #name::lex(lex);
+ }
+ }
+ None if matches!(leaf.field, MaybeVoid::Void) => quote! {
+ #bump
+ lex.set(Ok(#name::#ident));
+ },
+ None => quote! {
+ #bump
+ let token = #name::#ident(lex.slice());
+ lex.set(Ok(token));
+ },
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/generator/mod.rs b/vendor/logos-codegen/src/generator/mod.rs
new file mode 100644
index 00000000..1b8bf8bf
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/mod.rs
@@ -0,0 +1,268 @@
+use fnv::{FnvHashMap as Map, FnvHashSet as Set};
+use proc_macro2::TokenStream;
+use quote::{quote, ToTokens, TokenStreamExt};
+use syn::Ident;
+
+use crate::graph::{Graph, Meta, Node, NodeId, Range};
+use crate::leaf::Leaf;
+use crate::util::ToIdent;
+
+mod context;
+mod fork;
+mod leaf;
+mod rope;
+mod tables;
+
+use self::context::Context;
+use self::tables::TableStack;
+
+pub struct Generator<'a> {
+ /// Name of the type we are implementing the `Logos` trait for
+ name: &'a Ident,
+ /// Name of the type with any generics it might need
+ this: &'a TokenStream,
+ /// Id to the root node
+ root: NodeId,
+ /// Reference to the graph with all of the nodes
+ graph: &'a Graph<Leaf<'a>>,
+ /// Meta data collected for the nodes
+ meta: Meta,
+ /// Buffer with functions growing during generation
+ rendered: TokenStream,
+ /// Set of functions that have already been rendered
+ fns: Set<(NodeId, Context)>,
+ /// Function name identifiers
+ idents: Map<(NodeId, Context), Ident>,
+ /// Local function calls. Note: a call might change its context,
+ /// so we can't use `idents` for this purpose.
+ gotos: Map<(NodeId, Context), TokenStream>,
+ /// Identifiers for helper functions matching a byte to a given
+ /// set of ranges
+ tests: Map<Vec<Range>, Ident>,
+ /// Related to above, table stack manages tables that need to be
+ tables: TableStack,
+}
+
+impl<'a> Generator<'a> {
+ pub fn new(
+ name: &'a Ident,
+ this: &'a TokenStream,
+ root: NodeId,
+ graph: &'a Graph<Leaf>,
+ ) -> Self {
+ let rendered = Self::fast_loop_macro();
+ let meta = Meta::analyze(root, graph);
+
+ Generator {
+ name,
+ this,
+ root,
+ graph,
+ meta,
+ rendered,
+ fns: Set::default(),
+ idents: Map::default(),
+ gotos: Map::default(),
+ tests: Map::default(),
+ tables: TableStack::new(),
+ }
+ }
+
+ pub fn generate(mut self) -> TokenStream {
+ let root = self.goto(self.root, Context::default()).clone();
+ let rendered = &self.rendered;
+ let tables = &self.tables;
+
+ quote! {
+ #tables
+ #rendered
+ #root
+ }
+ }
+
+ fn generate_fn(&mut self, id: NodeId, ctx: Context) {
+ if self.fns.contains(&(id, ctx)) {
+ return;
+ }
+ self.fns.insert((id, ctx));
+
+ let body = match &self.graph[id] {
+ Node::Fork(fork) => self.generate_fork(id, fork, ctx),
+ Node::Rope(rope) => self.generate_rope(rope, ctx),
+ Node::Leaf(leaf) => self.generate_leaf(leaf, ctx),
+ };
+ let ident = self.generate_ident(id, ctx);
+ let out = quote! {
+ #[inline]
+ fn #ident<'s>(lex: &mut Lexer<'s>) {
+ #body
+ }
+ };
+
+ self.rendered.append_all(out);
+ }
+
+ fn goto(&mut self, id: NodeId, mut ctx: Context) -> &TokenStream {
+ let key = (id, ctx);
+
+ // Allow contains_key + insert because self.generate_ident borrows a mutable ref to self
+ // too.
+ #[allow(clippy::map_entry)]
+ if !self.gotos.contains_key(&key) {
+ let meta = &self.meta[id];
+ let enters_loop = !meta.loop_entry_from.is_empty();
+
+ let bump = if enters_loop || !ctx.can_backtrack() {
+ ctx.switch(self.graph[id].miss())
+ } else {
+ None
+ };
+
+ let bump = match (bump, enters_loop, meta.min_read) {
+ (Some(t), _, _) => Some(t),
+ (None, true, _) => ctx.bump(),
+ (None, false, 0) => ctx.bump(),
+ (None, false, _) => None,
+ };
+
+ if meta.min_read == 0 || ctx.remainder() < meta.min_read {
+ ctx.wipe();
+ }
+
+ let ident = self.generate_ident(id, ctx);
+ let mut call_site = quote!(#ident(lex));
+
+ if let Some(bump) = bump {
+ call_site = quote!({
+ #bump
+ #call_site
+ });
+ }
+ self.gotos.insert(key, call_site);
+ self.generate_fn(id, ctx);
+ }
+ &self.gotos[&key]
+ }
+
+ fn generate_ident(&mut self, id: NodeId, ctx: Context) -> &Ident {
+ self.idents.entry((id, ctx)).or_insert_with(|| {
+ let mut ident = format!("goto{}", id);
+
+ ctx.write_suffix(&mut ident);
+
+ ident.to_ident()
+ })
+ }
+
+ /// Returns an identifier to a function that matches a byte to any
+ /// of the provided ranges. This will generate either a simple
+ /// match expression, or use a lookup table internally.
+ fn generate_test(&mut self, ranges: Vec<Range>) -> &Ident {
+ if !self.tests.contains_key(&ranges) {
+ let idx = self.tests.len();
+ let ident = format!("pattern{}", idx).to_ident();
+
+ let lo = ranges.first().unwrap().start;
+ let hi = ranges.last().unwrap().end;
+
+ let body = match ranges.len() {
+ 0..=2 => {
+ quote! {
+ match byte {
+ #(#ranges)|* => true,
+ _ => false,
+ }
+ }
+ }
+ _ if hi - lo < 64 => {
+ let mut offset = hi.saturating_sub(63);
+
+ while offset.count_ones() > 1 && lo - offset > 0 {
+ offset += 1;
+ }
+
+ let mut table = 0u64;
+
+ for byte in ranges.iter().flat_map(|range| *range) {
+ if byte - offset >= 64 {
+ panic!("{:#?} {} {} {}", ranges, hi, lo, offset);
+ }
+ table |= 1 << (byte - offset);
+ }
+
+ let search = match offset {
+ 0 => quote!(byte),
+ _ => quote!(byte.wrapping_sub(#offset)),
+ };
+
+ quote! {
+ const LUT: u64 = #table;
+
+ match 1u64.checked_shl(#search as u32) {
+ Some(shift) => LUT & shift != 0,
+ None => false,
+ }
+ }
+ }
+ _ => {
+ let mut view = self.tables.view();
+
+ for byte in ranges.iter().flat_map(|range| *range) {
+ view.flag(byte);
+ }
+
+ let mask = view.mask();
+ let lut = view.ident();
+
+ quote! {
+ #lut[byte as usize] & #mask > 0
+ }
+ }
+ };
+ self.rendered.append_all(quote! {
+ #[inline]
+ fn #ident(byte: u8) -> bool {
+ #body
+ }
+ });
+ self.tests.insert(ranges.clone(), ident);
+ }
+ &self.tests[&ranges]
+ }
+}
+
+macro_rules! match_quote {
+ ($source:expr; $($byte:tt,)* ) => {match $source {
+ $( $byte => quote!($byte), )*
+ byte => quote!(#byte),
+ }}
+}
+
+fn byte_to_tokens(byte: u8) -> TokenStream {
+ match_quote! {
+ byte;
+ b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9',
+ b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
+ b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
+ b'u', b'v', b'w', b'x', b'y', b'z',
+ b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J',
+ b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
+ b'U', b'V', b'W', b'X', b'Y', b'Z',
+ b'!', b'@', b'#', b'$', b'%', b'^', b'&', b'*', b'(', b')',
+ b'{', b'}', b'[', b']', b'<', b'>', b'-', b'=', b'_', b'+',
+ b':', b';', b',', b'.', b'/', b'?', b'|', b'"', b'\'', b'\\',
+ }
+}
+
+impl ToTokens for Range {
+ fn to_tokens(&self, tokens: &mut TokenStream) {
+ let Range { start, end } = self;
+
+ tokens.append_all(byte_to_tokens(*start));
+
+ if start != end {
+ tokens.append_all(quote!(..=));
+ tokens.append_all(byte_to_tokens(*end));
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/generator/rope.rs b/vendor/logos-codegen/src/generator/rope.rs
new file mode 100644
index 00000000..ae3b07ad
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/rope.rs
@@ -0,0 +1,39 @@
+use proc_macro2::TokenStream;
+use quote::quote;
+
+use crate::generator::{Context, Generator};
+use crate::graph::Rope;
+
+impl Generator<'_> {
+ pub fn generate_rope(&mut self, rope: &Rope, mut ctx: Context) -> TokenStream {
+ let miss = ctx.miss(rope.miss.first(), self);
+ let read = ctx.read(rope.pattern.len());
+ let then = self.goto(rope.then, ctx.advance(rope.pattern.len()));
+
+ let pat = match rope.pattern.to_bytes() {
+ Some(bytes) => byte_slice_literal(&bytes),
+ None => {
+ let ranges = rope.pattern.iter();
+
+ quote!([#(#ranges),*])
+ }
+ };
+
+ quote! {
+ match #read {
+ Some(#pat) => #then,
+ _ => #miss,
+ }
+ }
+ }
+}
+
+fn byte_slice_literal(bytes: &[u8]) -> TokenStream {
+ if bytes.iter().any(|&b| !(0x20..0x7F).contains(&b)) {
+ return quote!(&[#(#bytes),*]);
+ }
+
+ let slice = std::str::from_utf8(bytes).unwrap();
+
+ syn::parse_str(&format!("b{:?}", slice)).unwrap()
+}
diff --git a/vendor/logos-codegen/src/generator/tables.rs b/vendor/logos-codegen/src/generator/tables.rs
new file mode 100644
index 00000000..f1e53273
--- /dev/null
+++ b/vendor/logos-codegen/src/generator/tables.rs
@@ -0,0 +1,77 @@
+use crate::util::ToIdent;
+use proc_macro2::{Literal, TokenStream};
+use quote::{quote, ToTokens};
+use syn::Ident;
+
+pub struct TableStack {
+ tables: Vec<(Ident, [u8; 256])>,
+ shift: u8,
+}
+
+pub struct TableView<'a> {
+ ident: &'a Ident,
+ table: &'a mut [u8; 256],
+ mask: u8,
+}
+
+impl TableStack {
+ pub fn new() -> Self {
+ TableStack {
+ tables: vec![("COMPACT_TABLE_0".to_ident(), [0; 256])],
+ shift: 0,
+ }
+ }
+
+ pub fn view(&mut self) -> TableView {
+ let mask = if self.shift < 8 {
+ // Reusing existing table with a shifted mask
+ let mask = 1u8 << self.shift;
+
+ self.shift += 1;
+
+ mask
+ } else {
+ // Need to create a new table
+ let ident = format!("COMPACT_TABLE_{}", self.tables.len()).to_ident();
+
+ self.tables.push((ident, [0; 256]));
+ self.shift = 1;
+
+ 1
+ };
+
+ let (ref ident, ref mut table) = self.tables.last_mut().unwrap();
+
+ TableView { ident, table, mask }
+ }
+}
+
+impl<'a> TableView<'a> {
+ pub fn ident(&self) -> &'a Ident {
+ self.ident
+ }
+
+ pub fn flag(&mut self, byte: u8) {
+ self.table[byte as usize] |= self.mask;
+ }
+
+ pub fn mask(&self) -> Literal {
+ Literal::u8_unsuffixed(self.mask)
+ }
+}
+
+impl ToTokens for TableStack {
+ fn to_tokens(&self, out: &mut TokenStream) {
+ if self.shift == 0 {
+ return;
+ }
+
+ for (ident, table) in self.tables.iter() {
+ let bytes = table.iter().copied().map(Literal::u8_unsuffixed);
+
+ out.extend(quote! {
+ static #ident: [u8; 256] = [#(#bytes),*];
+ });
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/fork.rs b/vendor/logos-codegen/src/graph/fork.rs
new file mode 100644
index 00000000..6b59836b
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/fork.rs
@@ -0,0 +1,267 @@
+use crate::graph::{Disambiguate, Graph, NodeId, Range};
+
+#[derive(Clone)]
+pub struct Fork {
+ /// LUT matching byte -> node id
+ lut: Box<[Option<NodeId>; 256]>,
+ /// State to go to if no arms are matching
+ pub miss: Option<NodeId>,
+}
+
+impl Fork {
+ pub fn new() -> Self {
+ Fork {
+ lut: Box::new([None; 256]),
+ miss: None,
+ }
+ }
+
+ pub fn miss<M>(mut self, miss: M) -> Self
+ where
+ M: Into<Option<NodeId>>,
+ {
+ self.miss = miss.into();
+ self
+ }
+
+ pub fn add_branch<R, T>(&mut self, range: R, then: NodeId, graph: &mut Graph<T>)
+ where
+ R: Into<Range>,
+ T: Disambiguate,
+ {
+ for byte in range.into() {
+ match &mut self.lut[byte as usize] {
+ Some(other) if *other != then => {
+ *other = graph.merge(*other, then);
+ }
+ opt => *opt = Some(then),
+ }
+ }
+ }
+
+ // TODO: Add result with a printable error
+ pub fn merge<T>(&mut self, other: Fork, graph: &mut Graph<T>)
+ where
+ T: Disambiguate,
+ {
+ self.miss = match (self.miss, other.miss) {
+ (None, None) => None,
+ (Some(id), None) | (None, Some(id)) => Some(id),
+ (Some(a), Some(b)) => Some(graph.merge(a, b)),
+ };
+
+ for (left, right) in self.lut.iter_mut().zip(other.lut.iter()) {
+ *left = match (*left, *right) {
+ (None, None) => continue,
+ (Some(id), None) | (None, Some(id)) => Some(id),
+ (Some(a), Some(b)) => Some(graph.merge(a, b)),
+ }
+ }
+ }
+
+ pub fn branches(&self) -> ForkIter<'_> {
+ ForkIter {
+ offset: 0,
+ lut: &self.lut,
+ }
+ }
+
+ /// Checks if all bytes in the `range` have a branch on this
+ /// fork, and those branches are resolve to the same `NodeId`.
+ pub fn contains<R>(&self, range: R) -> Option<NodeId>
+ where
+ R: Into<Range>,
+ {
+ let mut range = range.into();
+ let byte = range.next()?;
+ let first = self.lut[byte as usize]?;
+
+ for byte in range {
+ if first != self.lut[byte as usize]? {
+ return None;
+ }
+ }
+
+ Some(first)
+ }
+
+ pub fn branch<R>(mut self, range: R, then: NodeId) -> Self
+ where
+ R: Into<Range>,
+ {
+ for byte in range.into() {
+ match &mut self.lut[byte as usize] {
+ Some(other) if *other != then => {
+ panic!("Overlapping branches");
+ }
+ opt => *opt = Some(then),
+ }
+ }
+ self
+ }
+
+ pub fn shake<T>(&self, graph: &Graph<T>, filter: &mut [bool]) {
+ if let Some(id) = self.miss {
+ if !filter[id.get()] {
+ filter[id.get()] = true;
+ graph[id].shake(graph, filter);
+ }
+ }
+
+ for (_, id) in self.branches() {
+ if !filter[id.get()] {
+ filter[id.get()] = true;
+ graph[id].shake(graph, filter);
+ }
+ }
+ }
+}
+
+pub struct ForkIter<'a> {
+ offset: usize,
+ lut: &'a [Option<NodeId>; 256],
+}
+
+impl Iterator for ForkIter<'_> {
+ type Item = (Range, NodeId);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ // Consume empty slots
+ self.offset += self.lut[self.offset..]
+ .iter()
+ .take_while(|next| next.is_none())
+ .count();
+
+ let then = self.lut.get(self.offset).copied().flatten()?;
+ let start = self.offset;
+
+ // Consume all slots with same NodeId target
+ self.offset += self.lut[self.offset..]
+ .iter()
+ .take_while(|next| **next == Some(then))
+ .count();
+
+ Some((
+ Range {
+ start: start as u8,
+ end: (self.offset - 1) as u8,
+ },
+ then,
+ ))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::graph::Node;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn fork_iter() {
+ let mut buf = [None; 256];
+
+ for byte in b'4'..=b'7' {
+ buf[byte as usize] = Some(NodeId::new(1));
+ }
+ for byte in b'a'..=b'd' {
+ buf[byte as usize] = Some(NodeId::new(2));
+ }
+
+ let iter = ForkIter {
+ offset: 0,
+ lut: &buf,
+ };
+
+ assert_eq!(
+ &[
+ (
+ Range {
+ start: b'4',
+ end: b'7'
+ },
+ NodeId::new(1)
+ ),
+ (
+ Range {
+ start: b'a',
+ end: b'd'
+ },
+ NodeId::new(2)
+ ),
+ ],
+ &*iter.collect::<Vec<_>>(),
+ );
+ }
+
+ #[test]
+ fn merge_no_conflict() {
+ let mut graph = Graph::new();
+
+ let leaf1 = graph.push(Node::Leaf("FOO"));
+ let leaf2 = graph.push(Node::Leaf("BAR"));
+
+ let mut fork = Fork::new().branch(b'1', leaf1);
+
+ fork.merge(Fork::new().branch(b'2', leaf2), &mut graph);
+
+ assert_eq!(fork, Fork::new().branch(b'1', leaf1).branch(b'2', leaf2));
+ }
+
+ #[test]
+ fn merge_miss_right() {
+ let mut graph = Graph::new();
+
+ let leaf1 = graph.push(Node::Leaf("FOO"));
+ let leaf2 = graph.push(Node::Leaf("BAR"));
+
+ let mut fork = Fork::new().branch(b'1', leaf1);
+
+ fork.merge(Fork::new().miss(leaf2), &mut graph);
+
+ assert_eq!(fork, Fork::new().branch(b'1', leaf1).miss(leaf2));
+ }
+
+ #[test]
+ fn merge_miss_left() {
+ let mut graph = Graph::new();
+
+ let leaf1 = graph.push(Node::Leaf("FOO"));
+ let leaf2 = graph.push(Node::Leaf("BAR"));
+
+ let mut fork = Fork::new().miss(leaf1);
+
+ fork.merge(Fork::new().branch(b'2', leaf2), &mut graph);
+
+ assert_eq!(fork, Fork::new().branch(b'2', leaf2).miss(leaf1));
+ }
+
+ #[test]
+ fn contains_byte() {
+ let fork = Fork::new().branch('a'..='z', NodeId::new(42));
+
+ assert_eq!(fork.contains(b't'), Some(NodeId::new(42)));
+ }
+
+ #[test]
+ fn contains_range() {
+ let fork = Fork::new()
+ .branch('a'..='m', NodeId::new(42))
+ .branch('n'..='z', NodeId::new(42));
+
+ assert_eq!(fork.contains('i'..='r'), Some(NodeId::new(42)));
+ assert_eq!(fork.contains('a'..='z'), Some(NodeId::new(42)));
+ }
+
+ #[test]
+ fn contains_different_ranges() {
+ let fork = Fork::new()
+ .branch('a'..='m', NodeId::new(42))
+ .branch('n'..='z', NodeId::new(47));
+
+ assert_eq!(fork.contains('i'..='r'), None);
+ assert_eq!(fork.contains('a'..='z'), None);
+ assert_eq!(fork.contains('d'..='f'), Some(NodeId::new(42)));
+ assert_eq!(fork.contains('n'..='p'), Some(NodeId::new(47)));
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/impls.rs b/vendor/logos-codegen/src/graph/impls.rs
new file mode 100644
index 00000000..dc97bdf1
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/impls.rs
@@ -0,0 +1,220 @@
+use std::fmt::{self, Debug, Display};
+use std::hash::{Hash, Hasher};
+
+use crate::graph::{Fork, Graph, Node, NodeId, Range, Rope};
+
+impl<T> From<Fork> for Node<T> {
+ fn from(fork: Fork) -> Self {
+ Node::Fork(fork)
+ }
+}
+impl<T> From<Rope> for Node<T> {
+ fn from(rope: Rope) -> Self {
+ Node::Rope(rope)
+ }
+}
+
+fn is_ascii(byte: u8) -> bool {
+ (0x20..0x7F).contains(&byte)
+}
+
+impl Hash for Fork {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ for branch in self.branches() {
+ branch.hash(state);
+ }
+ self.miss.hash(state);
+ }
+}
+
+impl<T> Hash for Node<T> {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ match self {
+ Node::Rope(rope) => {
+ b"ROPE".hash(state);
+ rope.hash(state);
+ }
+ Node::Fork(fork) => {
+ b"FORK".hash(state);
+ fork.hash(state);
+ }
+ Node::Leaf(_) => b"LEAF".hash(state),
+ }
+ }
+}
+
+impl Debug for NodeId {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ Debug::fmt(&self.0, f)
+ }
+}
+
+/// We don't need debug impls in release builds
+// #[cfg(test)]
+mod debug {
+ use super::*;
+ use crate::graph::rope::Miss;
+ use crate::graph::Disambiguate;
+ use std::cmp::{Ord, Ordering};
+
+ impl Disambiguate for &str {
+ fn cmp(left: &&str, right: &&str) -> Ordering {
+ Ord::cmp(left, right)
+ }
+ }
+
+ impl Debug for Range {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let Range { start, end } = *self;
+
+ if start != end || !is_ascii(start) {
+ f.write_str("[")?;
+ }
+ match is_ascii(start) {
+ true => write!(f, "{}", start as char),
+ false => write!(f, "{:02X}", start),
+ }?;
+ if start != end {
+ match is_ascii(end) {
+ true => write!(f, "-{}]", end as char),
+ false => write!(f, "-{:02X}]", end),
+ }?;
+ } else if !is_ascii(start) {
+ f.write_str("]")?;
+ }
+ Ok(())
+ }
+ }
+
+ impl Display for Range {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ <Range as Debug>::fmt(self, f)
+ }
+ }
+
+ impl<T: Debug> Debug for Graph<T> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let entries = self
+ .nodes()
+ .iter()
+ .enumerate()
+ .filter_map(|(i, n)| n.as_ref().map(|n| (i, n)));
+
+ f.debug_map().entries(entries).finish()
+ }
+ }
+
+ struct Arm<T, U>(T, U);
+
+ impl<T, U> Debug for Arm<T, U>
+ where
+ T: Display,
+ U: Display,
+ {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{} ⇒ {}", self.0, self.1)
+ }
+ }
+
+ impl Debug for Fork {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let mut list = f.debug_set();
+
+ for (range, then) in self.branches() {
+ list.entry(&Arm(range, then));
+ }
+ if let Some(id) = self.miss {
+ list.entry(&Arm('_', id));
+ }
+
+ list.finish()
+ }
+ }
+
+ impl Display for Miss {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ Miss::First(id) => Display::fmt(id, f),
+ Miss::Any(id) => write!(f, "{}*", id),
+ Miss::None => f.write_str("n/a"),
+ }
+ }
+ }
+
+ impl Debug for Rope {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ use std::fmt::Write;
+
+ let mut rope = String::with_capacity(self.pattern.len());
+ for range in self.pattern.iter() {
+ write!(rope, "{}", range)?;
+ }
+
+ match self.miss.is_none() {
+ false => {
+ let mut list = f.debug_list();
+
+ list.entry(&Arm(rope, self.then));
+ list.entry(&Arm('_', self.miss));
+
+ list.finish()
+ }
+ true => Arm(rope, self.then).fmt(f),
+ }
+ }
+ }
+
+ impl PartialEq for Fork {
+ fn eq(&self, other: &Self) -> bool {
+ self.miss == other.miss && self.branches().eq(other.branches())
+ }
+ }
+
+ impl<T: Debug> Debug for Node<T> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ Node::Fork(fork) => fork.fmt(f),
+ Node::Rope(rope) => rope.fmt(f),
+ Node::Leaf(leaf) => leaf.fmt(f),
+ }
+ }
+ }
+
+ use std::ops::RangeInclusive;
+
+ impl From<RangeInclusive<u8>> for Range {
+ fn from(range: RangeInclusive<u8>) -> Range {
+ Range {
+ start: *range.start(),
+ end: *range.end(),
+ }
+ }
+ }
+
+ impl From<RangeInclusive<char>> for Range {
+ fn from(range: RangeInclusive<char>) -> Range {
+ Range {
+ start: *range.start() as u8,
+ end: *range.end() as u8,
+ }
+ }
+ }
+
+ impl<T> PartialEq<Rope> for Node<T> {
+ fn eq(&self, other: &Rope) -> bool {
+ match self {
+ Node::Rope(rope) => rope == other,
+ _ => false,
+ }
+ }
+ }
+
+ impl<T> PartialEq<Fork> for Node<T> {
+ fn eq(&self, other: &Fork) -> bool {
+ match self {
+ Node::Fork(fork) => fork == other,
+ _ => false,
+ }
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/meta.rs b/vendor/logos-codegen/src/graph/meta.rs
new file mode 100644
index 00000000..757ced09
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/meta.rs
@@ -0,0 +1,174 @@
+use std::cmp::min;
+use std::collections::BTreeMap;
+use std::ops::{Index, IndexMut};
+
+use crate::graph::{Graph, Node, NodeId};
+
+#[derive(Debug)]
+pub struct Meta {
+ map: BTreeMap<NodeId, MetaItem>,
+}
+
+#[derive(Debug, Default)]
+pub struct MetaItem {
+ /// Number of references to this node
+ pub refcount: usize,
+ /// Minimum number of bytes that ought to be read for this
+ /// node to find a match
+ pub min_read: usize,
+ /// Marks whether or not this node leads to a loop entry node.
+ pub is_loop_init: bool,
+ /// Ids of other nodes that point to this node while this
+ /// node is on a stack (creating a loop)
+ pub loop_entry_from: Vec<NodeId>,
+}
+
+impl Index<NodeId> for Meta {
+ type Output = MetaItem;
+
+ fn index(&self, id: NodeId) -> &MetaItem {
+ &self.map[&id]
+ }
+}
+
+impl IndexMut<NodeId> for Meta {
+ fn index_mut(&mut self, id: NodeId) -> &mut MetaItem {
+ self.map.entry(id).or_default()
+ }
+}
+
+impl MetaItem {
+ fn loop_entry(&mut self, id: NodeId) {
+ if let Err(idx) = self.loop_entry_from.binary_search(&id) {
+ self.loop_entry_from.insert(idx, id);
+ }
+ }
+}
+
+impl Meta {
+ pub fn analyze<T>(root: NodeId, graph: &Graph<T>) -> Self {
+ let mut meta = Meta {
+ map: Default::default(),
+ };
+
+ meta.first_pass(root, root, graph, &mut Vec::new());
+
+ meta
+ }
+
+ pub fn first_pass<T>(
+ &mut self,
+ this: NodeId,
+ parent: NodeId,
+ graph: &Graph<T>,
+ stack: &mut Vec<NodeId>,
+ ) -> &MetaItem {
+ let meta = &mut self[this];
+ let is_done = meta.refcount > 0;
+
+ meta.refcount += 1;
+
+ if stack.contains(&this) {
+ meta.loop_entry(parent);
+ self[parent].is_loop_init = true;
+ }
+ if is_done {
+ return &self[this];
+ }
+
+ stack.push(this);
+
+ let mut min_read;
+
+ match &graph[this] {
+ Node::Fork(fork) => {
+ min_read = usize::MAX;
+ for (_, id) in fork.branches() {
+ let meta = self.first_pass(id, this, graph, stack);
+
+ if meta.is_loop_init {
+ min_read = 1;
+ } else {
+ min_read = min(min_read, meta.min_read + 1);
+ }
+ }
+ if let Some(id) = fork.miss {
+ let meta = self.first_pass(id, this, graph, stack);
+
+ if meta.is_loop_init {
+ min_read = 0;
+ } else {
+ min_read = min(min_read, meta.min_read);
+ }
+ }
+ if min_read == usize::MAX {
+ min_read = 0;
+ }
+ }
+ Node::Rope(rope) => {
+ min_read = rope.pattern.len();
+ let meta = self.first_pass(rope.then, this, graph, stack);
+
+ if !meta.is_loop_init {
+ min_read += meta.min_read;
+ }
+
+ if let Some(id) = rope.miss.first() {
+ let meta = self.first_pass(id, this, graph, stack);
+
+ if meta.is_loop_init {
+ min_read = 0;
+ } else {
+ min_read = min(min_read, meta.min_read);
+ }
+ }
+ }
+ Node::Leaf(_) => min_read = 0,
+ }
+
+ stack.pop();
+
+ let meta = &mut self[this];
+ meta.min_read = min_read;
+ let second_pass = meta.loop_entry_from.clone();
+
+ for id in second_pass {
+ self.meta_second_pass(id, graph);
+ }
+
+ &self[this]
+ }
+
+ fn meta_second_pass<T>(&mut self, id: NodeId, graph: &Graph<T>) {
+ let mut min_read;
+
+ match &graph[id] {
+ Node::Fork(fork) => {
+ min_read = usize::MAX;
+ for (_, id) in fork.branches() {
+ let meta = &self[id];
+
+ if meta.is_loop_init {
+ min_read = 1;
+ } else {
+ min_read = min(min_read, meta.min_read + 1);
+ }
+ }
+ if min_read == usize::MAX {
+ min_read = 0;
+ }
+ }
+ Node::Rope(rope) => {
+ min_read = rope.pattern.len();
+ let meta = &self[rope.then];
+
+ if !meta.is_loop_init {
+ min_read += meta.min_read;
+ }
+ }
+ Node::Leaf(_) => unreachable!(),
+ }
+
+ self[id].min_read = min_read;
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/mod.rs b/vendor/logos-codegen/src/graph/mod.rs
new file mode 100644
index 00000000..6d218e8c
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/mod.rs
@@ -0,0 +1,566 @@
+use std::cmp::Ordering;
+use std::collections::btree_map::Entry;
+use std::collections::BTreeMap as Map;
+use std::hash::{Hash, Hasher};
+use std::num::NonZeroU32;
+use std::ops::Index;
+
+use fnv::FnvHasher;
+
+mod fork;
+mod impls;
+mod meta;
+mod range;
+mod regex;
+mod rope;
+
+pub use self::fork::Fork;
+pub use self::meta::Meta;
+pub use self::range::Range;
+pub use self::rope::Rope;
+
+/// Disambiguation error during the attempt to merge two leaf
+/// nodes with the same priority
+#[derive(Debug)]
+pub struct DisambiguationError(pub NodeId, pub NodeId);
+
+pub struct Graph<Leaf> {
+ /// Internal storage of all allocated nodes. Once a node is
+ /// put here, it should never be mutated.
+ nodes: Vec<Option<Node<Leaf>>>,
+ /// When merging two nodes into a new node, we store the two
+ /// entry keys and the result, so that we don't merge the same
+ /// two nodes multiple times.
+ ///
+ /// Most of the time the entry we want to find will be the last
+ /// one that has been inserted, so we can use a vec with reverse
+ /// order search to get O(1) searches much faster than any *Map.
+ merges: Map<Merge, NodeId>,
+ /// Another map used for accounting. Before `.push`ing a new node
+ /// onto the graph (inserts are exempt), we hash it and find if
+ /// an identical(!) node has been created before.
+ hashes: Map<u64, NodeId>,
+ /// Instead of handling errors over return types, opt to collect
+ /// them internally.
+ errors: Vec<DisambiguationError>,
+ /// Deferred merges. When when attempting to merge a node with an
+ /// empty reserved slot, the merging operation is deferred until
+ /// the reserved slot is populated. This is a stack that keeps track
+ /// of all such deferred merges
+ deferred: Vec<DeferredMerge>,
+}
+
+/// Trait to be implemented on `Leaf` nodes in order to disambiguate
+/// between them.
+pub trait Disambiguate {
+ fn cmp(left: &Self, right: &Self) -> Ordering;
+}
+
+/// Id of a Node in the graph. `NodeId` can be referencing an empty
+/// slot that is going to be populated later in time.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub struct NodeId(NonZeroU32);
+
+impl Hash for NodeId {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ // Always use little-endian byte order for hashing to avoid
+ // different code generation on big-endian platforms due to
+ // iteration over a HashMap,
+ // see https://github.com/maciejhirsz/logos/issues/427.
+ state.write(&self.0.get().to_le_bytes())
+ }
+}
+
+impl NodeId {
+ fn get(self) -> usize {
+ self.0.get() as usize
+ }
+
+ fn new(n: usize) -> NodeId {
+ NodeId(NonZeroU32::new(n as u32).expect("Invalid NodeId"))
+ }
+}
+
+/// Unique reserved `NodeId` that is guaranteed to point to an
+/// empty allocated slot in the graph. It's safe to create multiple
+/// `NodeId` copies of `ReservedId`, however API users should never
+/// be able to clone a `ReservedId`, or create a new one from `NodeId`.
+///
+/// `ReservedId` is consumed once passed into `Graph::insert`.
+#[derive(Debug)]
+pub struct ReservedId(NodeId);
+
+impl ReservedId {
+ pub fn get(&self) -> NodeId {
+ self.0
+ }
+}
+
+/// Merge key used to lookup whether two nodes have been previously
+/// mered, so we can avoid duplicating merges, potentially running into
+/// loops that blow out the stack.
+///
+/// `Merge::new(a, b)` should always equal to `Merge::new(b, a)` to ensure
+/// that node merges are symmetric.
+#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)]
+struct Merge(NodeId, NodeId);
+
+impl Merge {
+ fn new(a: NodeId, b: NodeId) -> Self {
+ if a < b {
+ Merge(a, b)
+ } else {
+ Merge(b, a)
+ }
+ }
+}
+
+/// When attempting to merge two nodes, one of which was not yet created,
+/// we can record such attempt, and execute the merge later on when the
+/// `awaiting` has been `insert`ed into the graph.
+#[derive(Debug)]
+pub struct DeferredMerge {
+ awaiting: NodeId,
+ with: NodeId,
+ into: ReservedId,
+}
+
+impl<Leaf> Graph<Leaf> {
+ pub fn new() -> Self {
+ Graph {
+ // Start with an empty slot so we can start
+ // counting NodeIds from 1 and use NonZero
+ // optimizations
+ nodes: vec![None],
+ merges: Map::new(),
+ hashes: Map::new(),
+ errors: Vec::new(),
+ deferred: Vec::new(),
+ }
+ }
+
+ pub fn errors(&self) -> &[DisambiguationError] {
+ &self.errors
+ }
+
+ fn next_id(&self) -> NodeId {
+ NodeId::new(self.nodes.len())
+ }
+
+ /// Reserve an empty slot for a node on the graph and return an
+ /// id for it. `ReservedId` cannot be cloned, and must be consumed
+ /// by calling `insert` on the graph.
+ pub fn reserve(&mut self) -> ReservedId {
+ let id = self.next_id();
+
+ self.nodes.push(None);
+
+ ReservedId(id)
+ }
+
+ /// Insert a node at a given, previously reserved id. Returns the
+ /// inserted `NodeId`.
+ pub fn insert<N>(&mut self, reserved: ReservedId, node: N) -> NodeId
+ where
+ N: Into<Node<Leaf>>,
+ Leaf: Disambiguate,
+ {
+ let id = reserved.get();
+
+ self.nodes[id.get()] = Some(node.into());
+
+ let mut awaiting = Vec::new();
+
+ // Partition out all `DeferredMerge`s that can be completed
+ // now that this `ReservedId` has a `Node` inserted into it.
+ for idx in (0..self.deferred.len()).rev() {
+ if self.deferred[idx].awaiting == id {
+ awaiting.push(self.deferred.remove(idx));
+ }
+ }
+
+ // Complete deferred merges. We've collected them from the back,
+ // so we must iterate through them from the back as well to restore
+ // proper order of merges in case there is some cascading going on.
+ for DeferredMerge {
+ awaiting,
+ with,
+ into,
+ } in awaiting.into_iter().rev()
+ {
+ self.merge_unchecked(awaiting, with, into);
+ }
+
+ id
+ }
+
+ /// Push a node onto the graph and get an id to it. If an identical
+ /// node has already been pushed on the graph, it will return the id
+ /// of that node instead.
+ pub fn push<B>(&mut self, node: B) -> NodeId
+ where
+ B: Into<Node<Leaf>>,
+ {
+ let node = node.into();
+
+ if let Node::Leaf(_) = node {
+ return self.push_unchecked(node);
+ }
+
+ let mut hasher = FnvHasher::default();
+ node.hash(&mut hasher);
+
+ let next_id = self.next_id();
+
+ match self.hashes.entry(hasher.finish()) {
+ Entry::Occupied(occupied) => {
+ let id = *occupied.get();
+
+ if self[id].eq(&node) {
+ return id;
+ }
+ }
+ Entry::Vacant(vacant) => {
+ vacant.insert(next_id);
+ }
+ }
+
+ self.push_unchecked(node)
+ }
+
+ fn push_unchecked(&mut self, node: Node<Leaf>) -> NodeId {
+ let id = self.next_id();
+
+ self.nodes.push(Some(node));
+
+ id
+ }
+
+ /// If nodes `a` and `b` have been already merged, return the
+ /// `NodeId` of the node they have been merged into.
+ fn find_merged(&self, a: NodeId, b: NodeId) -> Option<NodeId> {
+ let probe = Merge::new(a, b);
+
+ self.merges.get(&probe).copied()
+ }
+
+ /// Mark that nodes `a` and `b` have been merged into `product`.
+ ///
+ /// This will also mark merging `a` and `product`, as well as
+ /// `b` and `product` into `product`, since those are symmetric
+ /// operations.
+ ///
+ /// This is necessary to break out asymmetric merge loops.
+ fn set_merged(&mut self, a: NodeId, b: NodeId, product: NodeId) {
+ self.merges.insert(Merge::new(a, b), product);
+ self.merges.insert(Merge::new(a, product), product);
+ self.merges.insert(Merge::new(b, product), product);
+ }
+
+ /// Merge the nodes at id `a` and `b`, returning a new id.
+ pub fn merge(&mut self, a: NodeId, b: NodeId) -> NodeId
+ where
+ Leaf: Disambiguate,
+ {
+ if a == b {
+ return a;
+ }
+
+ // If the id pair is already merged (or is being merged), just return the id
+ if let Some(id) = self.find_merged(a, b) {
+ return id;
+ }
+
+ match (self.get(a), self.get(b)) {
+ (None, None) => {
+ panic!(
+ "Merging two reserved nodes! This is a bug, please report it:\n\
+ \n\
+ https://github.com/maciejhirsz/logos/issues"
+ );
+ }
+ (None, Some(_)) => {
+ let reserved = self.reserve();
+ let id = reserved.get();
+ self.deferred.push(DeferredMerge {
+ awaiting: a,
+ with: b,
+ into: reserved,
+ });
+ self.set_merged(a, b, id);
+
+ return id;
+ }
+ (Some(_), None) => {
+ let reserved = self.reserve();
+ let id = reserved.get();
+ self.deferred.push(DeferredMerge {
+ awaiting: b,
+ with: a,
+ into: reserved,
+ });
+ self.set_merged(a, b, id);
+
+ return id;
+ }
+ (Some(Node::Leaf(left)), Some(Node::Leaf(right))) => {
+ return match Disambiguate::cmp(left, right) {
+ Ordering::Less => b,
+ Ordering::Greater => a,
+ Ordering::Equal => {
+ self.errors.push(DisambiguationError(a, b));
+
+ a
+ }
+ };
+ }
+ _ => (),
+ }
+
+ // Reserve the id for the merge and save it. Since the graph can contain loops,
+ // this prevents us from trying to merge the same id pair in a loop, blowing up
+ // the stack.
+ let reserved = self.reserve();
+ self.set_merged(a, b, reserved.get());
+
+ self.merge_unchecked(a, b, reserved)
+ }
+
+ /// Unchecked merge of `a` and `b`. This fn assumes that `a` and `b` are
+ /// not pointing to empty slots.
+ fn merge_unchecked(&mut self, a: NodeId, b: NodeId, reserved: ReservedId) -> NodeId
+ where
+ Leaf: Disambiguate,
+ {
+ let merged_rope = match (self.get(a), self.get(b)) {
+ (Some(Node::Rope(rope)), _) => {
+ let rope = rope.clone();
+
+ self.merge_rope(rope, b)
+ }
+ (_, Some(Node::Rope(rope))) => {
+ let rope = rope.clone();
+
+ self.merge_rope(rope, a)
+ }
+ _ => None,
+ };
+
+ if let Some(rope) = merged_rope {
+ return self.insert(reserved, rope);
+ }
+
+ let mut fork = self.fork_off(a);
+ fork.merge(self.fork_off(b), self);
+
+ let mut stack = vec![reserved.get()];
+
+ // Flatten the fork
+ while let Some(miss) = fork.miss {
+ if stack.contains(&miss) {
+ break;
+ }
+ stack.push(miss);
+
+ let other = match self.get(miss) {
+ Some(Node::Fork(other)) => other.clone(),
+ Some(Node::Rope(other)) => other.clone().into_fork(self),
+ _ => break,
+ };
+ match other.miss {
+ Some(id) if self.get(id).is_none() => break,
+ _ => (),
+ }
+ fork.miss = None;
+ fork.merge(other, self);
+ }
+
+ self.insert(reserved, fork)
+ }
+
+ fn merge_rope(&mut self, rope: Rope, other: NodeId) -> Option<Rope>
+ where
+ Leaf: Disambiguate,
+ {
+ match self.get(other) {
+ Some(Node::Fork(fork)) if rope.miss.is_none() => {
+ // Count how many consecutive ranges in this rope would
+ // branch into the fork that results in a loop.
+ //
+ // e.g.: for rope "foobar" and a looping fork [a-z]: 6
+ let count = rope
+ .pattern
+ .iter()
+ .take_while(|range| fork.contains(**range) == Some(other))
+ .count();
+
+ let mut rope = rope.split_at(count, self)?.miss_any(other);
+
+ rope.then = self.merge(rope.then, other);
+
+ Some(rope)
+ }
+ Some(Node::Rope(other)) => {
+ let (prefix, miss) = rope.prefix(other)?;
+
+ let (a, b) = (rope, other.clone());
+
+ let a = a.remainder(prefix.len(), self);
+ let b = b.remainder(prefix.len(), self);
+
+ let rope = Rope::new(prefix, self.merge(a, b)).miss(miss);
+
+ Some(rope)
+ }
+ Some(Node::Leaf(_)) | None => {
+ if rope.miss.is_none() {
+ Some(rope.miss(other))
+ } else {
+ None
+ }
+ }
+ _ => None,
+ }
+ }
+
+ pub fn fork_off(&mut self, id: NodeId) -> Fork
+ where
+ Leaf: Disambiguate,
+ {
+ match self.get(id) {
+ Some(Node::Fork(fork)) => fork.clone(),
+ Some(Node::Rope(rope)) => rope.clone().into_fork(self),
+ Some(Node::Leaf(_)) | None => Fork::new().miss(id),
+ }
+ }
+
+ pub fn nodes(&self) -> &[Option<Node<Leaf>>] {
+ &self.nodes
+ }
+
+ /// Find all nodes that have no references and remove them.
+ pub fn shake(&mut self, root: NodeId) {
+ let mut filter = vec![false; self.nodes.len()];
+
+ filter[root.get()] = true;
+
+ self[root].shake(self, &mut filter);
+
+ for (id, referenced) in filter.into_iter().enumerate() {
+ if !referenced {
+ self.nodes[id] = None;
+ }
+ }
+ }
+
+ pub fn get(&self, id: NodeId) -> Option<&Node<Leaf>> {
+ self.nodes.get(id.get())?.as_ref()
+ }
+}
+
+impl<Leaf> Index<NodeId> for Graph<Leaf> {
+ type Output = Node<Leaf>;
+
+ fn index(&self, id: NodeId) -> &Node<Leaf> {
+ self.get(id).expect(
+ "Indexing into an empty node. This is a bug, please report it at:\n\
+ \n\
+ https://github.com/maciejhirsz/logos/issues",
+ )
+ }
+}
+
+impl std::fmt::Display for NodeId {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ std::fmt::Display::fmt(&self.0, f)
+ }
+}
+
+#[cfg_attr(test, derive(PartialEq))]
+pub enum Node<Leaf> {
+ /// Fork node, can lead to more than one state
+ Fork(Fork),
+ /// Rope node, can lead to one state on match, one state on miss
+ Rope(Rope),
+ /// Leaf node, terminal state
+ Leaf(Leaf),
+}
+
+impl<Leaf> Node<Leaf> {
+ pub fn miss(&self) -> Option<NodeId> {
+ match self {
+ Node::Rope(rope) => rope.miss.first(),
+ Node::Fork(fork) => fork.miss,
+ Node::Leaf(_) => None,
+ }
+ }
+
+ fn eq(&self, other: &Node<Leaf>) -> bool {
+ match (self, other) {
+ (Node::Fork(a), Node::Fork(b)) => a == b,
+ (Node::Rope(a), Node::Rope(b)) => a == b,
+ _ => false,
+ }
+ }
+
+ fn shake(&self, graph: &Graph<Leaf>, filter: &mut [bool]) {
+ match self {
+ Node::Fork(fork) => fork.shake(graph, filter),
+ Node::Rope(rope) => rope.shake(graph, filter),
+ Node::Leaf(_) => (),
+ }
+ }
+
+ pub fn unwrap_leaf(&self) -> &Leaf {
+ match self {
+ Node::Fork(_) => panic!("Internal Error: called unwrap_leaf on a fork"),
+ Node::Rope(_) => panic!("Internal Error: called unwrap_leaf on a rope"),
+ Node::Leaf(leaf) => leaf,
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn leaf_stack_size() {
+ use std::mem::size_of;
+
+ const WORD: usize = size_of::<usize>();
+ const NODE: usize = size_of::<Node<()>>();
+
+ assert!(NODE <= 6 * WORD, "Size of Node<()> is {} bytes!", NODE);
+ }
+
+ #[test]
+ fn create_a_loop() {
+ let mut graph = Graph::new();
+
+ let token = graph.push(Node::Leaf("IDENT"));
+ let id = graph.reserve();
+ let fork = Fork::new().branch('a'..='z', id.get()).miss(token);
+ let root = graph.insert(id, fork);
+
+ assert_eq!(graph[token], Node::Leaf("IDENT"));
+ assert_eq!(graph[root], Fork::new().branch('a'..='z', root).miss(token),);
+ }
+
+ #[test]
+ fn fork_off() {
+ let mut graph = Graph::new();
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let rope = graph.push(Rope::new("rope", leaf));
+ let fork = graph.push(Fork::new().branch(b'!', leaf));
+
+ assert_eq!(graph.fork_off(leaf), Fork::new().miss(leaf));
+ assert_eq!(
+ graph.fork_off(rope),
+ Fork::new().branch(b'r', NodeId::new(graph.nodes.len() - 1))
+ );
+ assert_eq!(graph.fork_off(fork), Fork::new().branch(b'!', leaf));
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/range.rs b/vendor/logos-codegen/src/graph/range.rs
new file mode 100644
index 00000000..a4d23d3b
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/range.rs
@@ -0,0 +1,144 @@
+use regex_syntax::hir::ClassBytesRange;
+use regex_syntax::hir::ClassUnicodeRange;
+use regex_syntax::utf8::Utf8Range;
+
+use std::cmp::{Ord, Ordering};
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub struct Range {
+ pub start: u8,
+ pub end: u8,
+}
+
+impl Range {
+ pub fn as_byte(&self) -> Option<u8> {
+ if self.is_byte() {
+ Some(self.start)
+ } else {
+ None
+ }
+ }
+
+ pub fn is_byte(&self) -> bool {
+ self.start == self.end
+ }
+}
+
+impl From<u8> for Range {
+ fn from(byte: u8) -> Range {
+ Range {
+ start: byte,
+ end: byte,
+ }
+ }
+}
+
+impl From<&u8> for Range {
+ fn from(byte: &u8) -> Range {
+ Range::from(*byte)
+ }
+}
+
+impl Iterator for Range {
+ type Item = u8;
+
+ fn next(&mut self) -> Option<u8> {
+ match self.start.cmp(&self.end) {
+ std::cmp::Ordering::Less => {
+ let res = self.start;
+ self.start += 1;
+
+ Some(res)
+ }
+ std::cmp::Ordering::Equal => {
+ let res = self.start;
+
+ // Necessary so that range 0xFF-0xFF doesn't loop forever
+ self.start = 0xFF;
+ self.end = 0x00;
+
+ Some(res)
+ }
+ std::cmp::Ordering::Greater => None,
+ }
+ }
+}
+
+impl PartialOrd for Range {
+ fn partial_cmp(&self, other: &Range) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for Range {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.start.cmp(&other.start)
+ }
+}
+
+impl From<Utf8Range> for Range {
+ fn from(r: Utf8Range) -> Range {
+ Range {
+ start: r.start,
+ end: r.end,
+ }
+ }
+}
+
+impl From<ClassUnicodeRange> for Range {
+ fn from(r: ClassUnicodeRange) -> Range {
+ let start = r.start() as u32;
+ let end = r.end() as u32;
+
+ if start >= 128 || end >= 128 && end != 0x0010FFFF {
+ panic!("Casting non-ascii ClassUnicodeRange to Range")
+ }
+
+ Range {
+ start: start as u8,
+ end: end as u8,
+ }
+ }
+}
+
+impl From<ClassBytesRange> for Range {
+ fn from(r: ClassBytesRange) -> Range {
+ Range {
+ start: r.start(),
+ end: r.end(),
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn range_iter_one() {
+ let byte = Range::from(b'!');
+ let collected = byte.take(1000).collect::<Vec<_>>();
+
+ assert_eq!(b"!", &collected[..]);
+ }
+
+ #[test]
+ fn range_iter_few() {
+ let byte = Range {
+ start: b'a',
+ end: b'd',
+ };
+ let collected = byte.take(1000).collect::<Vec<_>>();
+
+ assert_eq!(b"abcd", &collected[..]);
+ }
+
+ #[test]
+ fn range_iter_bounds() {
+ let byte = Range::from(0xFA..=0xFF);
+
+ let collected = byte.take(1000).collect::<Vec<_>>();
+
+ assert_eq!(b"\xFA\xFB\xFC\xFD\xFE\xFF", &collected[..]);
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/regex.rs b/vendor/logos-codegen/src/graph/regex.rs
new file mode 100644
index 00000000..d55c490f
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/regex.rs
@@ -0,0 +1,295 @@
+use std::fmt::Debug;
+
+use regex_syntax::utf8::Utf8Sequences;
+
+use crate::graph::{Disambiguate, Fork, Graph, Node, NodeId, Range, ReservedId, Rope};
+use crate::mir::{Class, ClassUnicode, Literal, Mir};
+
+impl<Leaf: Disambiguate + Debug> Graph<Leaf> {
+ pub fn regex(&mut self, mir: Mir, then: NodeId) -> NodeId {
+ self.parse_mir(&mir, then, None, None, false)
+ }
+
+ fn parse_mir(
+ &mut self,
+ mir: &Mir,
+ then: NodeId,
+ miss: Option<NodeId>,
+ reserved: Option<ReservedId>,
+ repeated: bool,
+ ) -> NodeId {
+ match mir {
+ Mir::Empty => then,
+ Mir::Loop(mir) => {
+ let reserved_first = reserved.unwrap_or_else(|| self.reserve());
+
+ let (new_then, new_miss);
+ if let Some(old_miss) = miss {
+ // We have to separate the first iteration from the other iterations,
+ // because the `old_miss` path must only be taken if we miss the first
+ // iteration.
+ let reserved_next = self.reserve();
+ new_then = self.parse_mir(
+ mir,
+ reserved_next.get(),
+ Some(then),
+ Some(reserved_next),
+ true,
+ );
+ new_miss = self.merge(old_miss, then);
+ } else {
+ new_then = reserved_first.get();
+ new_miss = then;
+ }
+
+ self.parse_mir(mir, new_then, Some(new_miss), Some(reserved_first), true)
+ }
+ Mir::Maybe(mir) => {
+ let miss = match miss {
+ Some(id) => self.merge(id, then),
+ None => then,
+ };
+
+ self.parse_mir(mir, then, Some(miss), reserved, true)
+ }
+ Mir::Alternation(alternation) => {
+ let mut fork = Fork::new().miss(miss);
+
+ for mir in alternation {
+ let id = self.parse_mir(mir, then, None, None, repeated);
+ let alt = self.fork_off(id);
+
+ fork.merge(alt, self);
+ }
+
+ self.insert_or_push(reserved, fork)
+ }
+ Mir::Literal(literal) => {
+ let pattern = literal.0.to_vec();
+
+ self.insert_or_push(reserved, Rope::new(pattern, then).miss(miss))
+ }
+ Mir::Concat(concat) => {
+ // Take an initial guess at the capacity - estimates a little worse than an average case
+ // scenario by assuming every concat element is singular but has a full code-point unicode literal.
+ // The only way to get the actual size of the Vec is if every sub-concat node is added up.
+ let mut ropebuf: Vec<Range> = Vec::with_capacity(concat.len() * 4);
+ let mut then = then;
+
+ let mut handle_bytes = |graph: &mut Self, mir: &Mir, then: &mut NodeId| match mir {
+ Mir::Literal(Literal(bytes)) => {
+ ropebuf.extend(bytes.iter().rev().map(Into::<Range>::into));
+ true
+ }
+ Mir::Class(Class::Unicode(class)) if is_one_ascii(class, repeated) => {
+ ropebuf.push(class.ranges()[0].into());
+ true
+ }
+ Mir::Class(Class::Bytes(class)) if class.ranges().len() == 1 => {
+ ropebuf.push(class.ranges()[0].into());
+ true
+ }
+ _ => {
+ if !ropebuf.is_empty() {
+ let rope =
+ Rope::new(ropebuf.iter().cloned().rev().collect::<Vec<_>>(), *then);
+
+ *then = graph.push(rope);
+ ropebuf = Vec::with_capacity(concat.len() * 4);
+ }
+ false
+ }
+ };
+
+ for mir in concat[1..].iter().rev() {
+ if !handle_bytes(self, mir, &mut then) {
+ then = self.parse_mir(mir, then, None, None, false);
+ }
+ }
+
+ let first_mir = &concat[0];
+ if handle_bytes(self, first_mir, &mut then) {
+ let rope = Rope::new(ropebuf.iter().cloned().rev().collect::<Vec<_>>(), then)
+ .miss(miss);
+ self.insert_or_push(reserved, rope)
+ } else {
+ self.parse_mir(first_mir, then, miss, reserved, false)
+ }
+ }
+ Mir::Class(Class::Unicode(class)) if !is_ascii(class, repeated) => {
+ let mut ropes = class
+ .iter()
+ .flat_map(|range| Utf8Sequences::new(range.start(), range.end()))
+ .map(|sequence| Rope::new(sequence.as_slice(), then))
+ .collect::<Vec<_>>();
+
+ if ropes.len() == 1 {
+ let rope = ropes.remove(0);
+
+ return self.insert_or_push(reserved, rope.miss(miss));
+ }
+
+ let mut root = Fork::new().miss(miss);
+
+ for rope in ropes {
+ let fork = rope.into_fork(self);
+ root.merge(fork, self);
+ }
+
+ self.insert_or_push(reserved, root)
+ }
+ Mir::Class(class) => {
+ let mut fork = Fork::new().miss(miss);
+
+ let class: Vec<Range> = match class {
+ Class::Unicode(u) => u.iter().copied().map(Into::into).collect(),
+ Class::Bytes(b) => b.iter().copied().map(Into::into).collect(),
+ };
+
+ for range in class {
+ fork.add_branch(range, then, self);
+ }
+
+ self.insert_or_push(reserved, fork)
+ }
+ }
+ }
+
+ fn insert_or_push<N>(&mut self, id: Option<ReservedId>, node: N) -> NodeId
+ where
+ N: Into<Node<Leaf>>,
+ {
+ match id {
+ Some(id) => self.insert(id, node),
+ None => self.push(node),
+ }
+ }
+}
+
+/// Return whether current class unicode is ascii.
+///
+/// Because unicode ranges are iterated in increasing order,
+/// it is only necessary to check the last range.
+///
+/// If the check is performed in a repetition,
+/// a fast path is used by checking if end of range is 0x0010_FFFF.
+fn is_ascii(class: &ClassUnicode, repeated: bool) -> bool {
+ class.iter().last().map_or(true, |range| {
+ let start = range.start() as u32;
+ let end = range.end() as u32;
+ end < 128 || (repeated && start < 128 && end == 0x0010_FFFF)
+ })
+}
+
+/// Return whether current class unicode is ascii and only contains
+/// one range.
+///
+/// See [`is_ascii`] function for more details.
+fn is_one_ascii(class: &ClassUnicode, repeated: bool) -> bool {
+ if class.ranges().len() != 1 {
+ return false;
+ }
+
+ let range = &class.ranges()[0];
+ let start = range.start() as u32;
+ let end = range.end() as u32;
+ end < 128 || (repeated && start < 128 && end == 0x0010_FFFF)
+}
+
+#[cfg(test)]
+mod tests {
+ use std::num::NonZeroU32;
+
+ use super::*;
+ use crate::graph::Node;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn rope() {
+ let mut graph = Graph::new();
+
+ let mir = Mir::utf8("foobar").unwrap();
+
+ assert_eq!(mir.priority(), 12);
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let id = graph.regex(mir, leaf);
+
+ assert_eq!(graph[id], Node::Rope(Rope::new("foobar", leaf)),)
+ }
+
+ #[test]
+ fn alternation() {
+ let mut graph = Graph::new();
+
+ let mir = Mir::utf8("a|b").unwrap();
+
+ assert_eq!(mir.priority(), 2);
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let id = graph.regex(mir, leaf);
+
+ assert_eq!(
+ graph[id],
+ Node::Fork(Fork::new().branch(b'a', leaf).branch(b'b', leaf)),
+ );
+ }
+
+ #[test]
+ fn repeat() {
+ let mut graph = Graph::new();
+
+ let mir = Mir::utf8("[a-z]*").unwrap();
+
+ assert_eq!(mir.priority(), 0);
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let id = graph.regex(mir, leaf);
+
+ assert_eq!(
+ graph[id],
+ Node::Fork(
+ Fork::new()
+ .branch('a'..='z', id) // goto self == loop
+ .miss(leaf)
+ ),
+ );
+ }
+
+ #[test]
+ fn maybe() {
+ let mut graph = Graph::new();
+
+ let mir = Mir::utf8("[a-z]?").unwrap();
+
+ assert_eq!(mir.priority(), 0);
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let id = graph.regex(mir, leaf);
+
+ assert_eq!(
+ graph[id],
+ Node::Fork(Fork::new().branch('a'..='z', leaf).miss(leaf)),
+ );
+ }
+
+ #[test]
+ fn long_concat_389() {
+ let mut graph = Graph::new();
+
+ let mir = Mir::utf8("abcdefghijklmnopqrstuvwxyz*").unwrap();
+
+ assert_eq!(mir.priority(), 50);
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let id = graph.regex(mir, leaf);
+ let sub_id = NodeId(NonZeroU32::new(2).unwrap());
+
+ assert_eq!(
+ graph[id],
+ Node::Rope(Rope::new("abcdefghijklmnopqrstuvwxy", sub_id))
+ );
+
+ assert_eq!(graph[sub_id], Node::Rope(Rope::new("z", sub_id).miss(leaf)))
+ }
+}
diff --git a/vendor/logos-codegen/src/graph/rope.rs b/vendor/logos-codegen/src/graph/rope.rs
new file mode 100644
index 00000000..6e563fac
--- /dev/null
+++ b/vendor/logos-codegen/src/graph/rope.rs
@@ -0,0 +1,330 @@
+use std::ops::Deref;
+
+use crate::graph::{Disambiguate, Fork, Graph, NodeId, Range};
+
+#[derive(PartialEq, Clone, Hash)]
+pub struct Rope {
+ pub pattern: Pattern,
+ pub then: NodeId,
+ pub miss: Miss,
+}
+
+#[derive(PartialEq, Clone, Hash)]
+pub struct Pattern(pub Vec<Range>);
+
+impl Deref for Pattern {
+ type Target = [Range];
+
+ fn deref(&self) -> &[Range] {
+ &self.0
+ }
+}
+
+/// Because Ropes could potentially fail a match mid-pattern,
+/// a regular `Option` is not sufficient here.
+#[derive(PartialEq, Clone, Copy, Hash)]
+pub enum Miss {
+ /// Same as Option::None, error on fail
+ None,
+ /// Jump to id if first byte does not match, fail on partial match
+ First(NodeId),
+ /// Jump to id on partial or empty match
+ Any(NodeId),
+}
+
+impl Miss {
+ pub fn is_none(&self) -> bool {
+ matches!(self, Miss::None)
+ }
+
+ pub fn first(self) -> Option<NodeId> {
+ match self {
+ Miss::First(id) | Miss::Any(id) => Some(id),
+ _ => None,
+ }
+ }
+
+ pub fn take_first(&mut self) -> Option<NodeId> {
+ match *self {
+ Miss::First(id) => {
+ *self = Miss::None;
+
+ Some(id)
+ }
+ Miss::Any(id) => Some(id),
+ Miss::None => None,
+ }
+ }
+}
+
+impl From<Option<NodeId>> for Miss {
+ fn from(miss: Option<NodeId>) -> Self {
+ match miss {
+ Some(id) => Miss::First(id),
+ None => Miss::None,
+ }
+ }
+}
+
+impl From<NodeId> for Miss {
+ fn from(id: NodeId) -> Self {
+ Miss::First(id)
+ }
+}
+
+impl Rope {
+ pub fn new<P>(pattern: P, then: NodeId) -> Self
+ where
+ P: Into<Pattern>,
+ {
+ Rope {
+ pattern: pattern.into(),
+ then,
+ miss: Miss::None,
+ }
+ }
+
+ pub fn miss<M>(mut self, miss: M) -> Self
+ where
+ M: Into<Miss>,
+ {
+ self.miss = miss.into();
+ self
+ }
+
+ pub fn miss_any(mut self, miss: NodeId) -> Self {
+ self.miss = Miss::Any(miss);
+ self
+ }
+
+ pub fn into_fork<T>(mut self, graph: &mut Graph<T>) -> Fork
+ where
+ T: Disambiguate,
+ {
+ let first = self.pattern.0.remove(0);
+ let miss = self.miss.take_first();
+
+ // The new fork will lead to a new rope,
+ // or the old target if no new rope was created
+ let then = match self.pattern.len() {
+ 0 => self.then,
+ _ => graph.push(self),
+ };
+
+ Fork::new().branch(first, then).miss(miss)
+ }
+
+ pub fn prefix(&self, other: &Self) -> Option<(Pattern, Miss)> {
+ let count = self
+ .pattern
+ .iter()
+ .zip(other.pattern.iter())
+ .take_while(|(a, b)| a == b)
+ .count();
+
+ let pattern = match count {
+ 0 => return None,
+ n => self.pattern[..n].into(),
+ };
+ let miss = match (self.miss, other.miss) {
+ (Miss::None, miss) => miss,
+ (miss, Miss::None) => miss,
+ _ => return None,
+ };
+
+ Some((pattern, miss))
+ }
+
+ pub fn split_at<T>(mut self, at: usize, graph: &mut Graph<T>) -> Option<Rope>
+ where
+ T: Disambiguate,
+ {
+ match at {
+ 0 => return None,
+ n if n == self.pattern.len() => return Some(self),
+ _ => (),
+ }
+
+ let (this, next) = self.pattern.split_at(at);
+
+ let next_miss = match self.miss {
+ Miss::Any(_) => self.miss,
+ _ => Miss::None,
+ };
+
+ let next = graph.push(Rope {
+ pattern: next.into(),
+ miss: next_miss,
+ then: self.then,
+ });
+
+ self.pattern = this.into();
+ self.then = next;
+
+ Some(self)
+ }
+
+ pub fn remainder<T>(mut self, at: usize, graph: &mut Graph<T>) -> NodeId
+ where
+ T: Disambiguate,
+ {
+ self.pattern = self.pattern[at..].into();
+
+ match self.pattern.len() {
+ 0 => self.then,
+ _ => graph.push(self),
+ }
+ }
+
+ pub fn shake<T>(&self, graph: &Graph<T>, filter: &mut [bool]) {
+ if let Some(id) = self.miss.first() {
+ if !filter[id.get()] {
+ filter[id.get()] = true;
+ graph[id].shake(graph, filter);
+ }
+ }
+
+ if !filter[self.then.get()] {
+ filter[self.then.get()] = true;
+ graph[self.then].shake(graph, filter);
+ }
+ }
+}
+
+impl Pattern {
+ pub fn to_bytes(&self) -> Option<Vec<u8>> {
+ let mut out = Vec::with_capacity(self.len());
+
+ for range in self.iter() {
+ out.push(range.as_byte()?);
+ }
+
+ Some(out)
+ }
+}
+
+impl<T> From<&[T]> for Pattern
+where
+ T: Into<Range> + Copy,
+{
+ fn from(slice: &[T]) -> Self {
+ Pattern(slice.iter().copied().map(Into::into).collect())
+ }
+}
+
+impl<T> From<Vec<T>> for Pattern
+where
+ T: Into<Range>,
+{
+ fn from(vec: Vec<T>) -> Self {
+ Pattern(vec.into_iter().map(Into::into).collect())
+ }
+}
+
+impl From<&str> for Pattern {
+ fn from(slice: &str) -> Self {
+ slice.as_bytes().into()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::graph::Node;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn into_fork() {
+ let mut graph = Graph::new();
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let rope = Rope::new("foobar", leaf);
+
+ let fork = rope.into_fork(&mut graph);
+
+ assert_eq!(leaf, NodeId::new(1));
+ assert_eq!(fork, Fork::new().branch(b'f', NodeId::new(2)));
+ assert_eq!(graph[NodeId::new(2)], Rope::new("oobar", leaf));
+ }
+
+ #[test]
+ fn into_fork_one_byte() {
+ let mut graph = Graph::new();
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let rope = Rope::new("!", leaf);
+
+ let fork = rope.into_fork(&mut graph);
+
+ assert_eq!(leaf, NodeId::new(1));
+ assert_eq!(fork, Fork::new().branch(b'!', NodeId::new(1)));
+ }
+
+ #[test]
+ fn into_fork_miss_any() {
+ let mut graph = Graph::new();
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let rope = Rope::new("42", leaf).miss_any(NodeId::new(42));
+
+ let fork = rope.into_fork(&mut graph);
+
+ assert_eq!(leaf, NodeId::new(1));
+ assert_eq!(
+ fork,
+ Fork::new()
+ .branch(b'4', NodeId::new(2))
+ .miss(NodeId::new(42))
+ );
+ assert_eq!(
+ graph[NodeId::new(2)],
+ Rope::new("2", leaf).miss_any(NodeId::new(42))
+ );
+ }
+
+ #[test]
+ fn into_fork_miss_first() {
+ let mut graph = Graph::new();
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let rope = Rope::new("42", leaf).miss(Miss::First(NodeId::new(42)));
+
+ let fork = rope.into_fork(&mut graph);
+
+ assert_eq!(leaf, NodeId::new(1));
+ assert_eq!(
+ fork,
+ Fork::new()
+ .branch(b'4', NodeId::new(2))
+ .miss(NodeId::new(42))
+ );
+ assert_eq!(graph[NodeId::new(2)], Rope::new("2", leaf));
+ }
+
+ #[test]
+ fn split_at() {
+ let mut graph = Graph::new();
+
+ let leaf = graph.push(Node::Leaf("LEAF"));
+ let rope = Rope::new("foobar", leaf);
+
+ assert_eq!(rope.clone().split_at(6, &mut graph).unwrap(), rope);
+
+ let split = rope.split_at(3, &mut graph).unwrap();
+ let expected_id = NodeId::new(leaf.get() + 1);
+
+ assert_eq!(split, Rope::new("foo", expected_id));
+ assert_eq!(graph[expected_id], Rope::new("bar", leaf));
+ }
+
+ #[test]
+ fn pattern_to_bytes() {
+ let pat = Pattern::from("foobar");
+
+ assert_eq!(pat.to_bytes().unwrap(), b"foobar");
+
+ let ranges = Pattern::from(vec![0..=0, 42..=42, b'{'..=b'}']);
+
+ assert_eq!(ranges.to_bytes(), None);
+ }
+}
diff --git a/vendor/logos-codegen/src/leaf.rs b/vendor/logos-codegen/src/leaf.rs
new file mode 100644
index 00000000..5e0b810e
--- /dev/null
+++ b/vendor/logos-codegen/src/leaf.rs
@@ -0,0 +1,118 @@
+use std::cmp::{Ord, Ordering};
+use std::fmt::{self, Debug, Display};
+
+use proc_macro2::{Span, TokenStream};
+use syn::{spanned::Spanned, Ident};
+
+use crate::graph::{Disambiguate, Node};
+use crate::util::MaybeVoid;
+
+#[derive(Clone)]
+pub struct Leaf<'t> {
+ pub ident: Option<&'t Ident>,
+ pub span: Span,
+ pub priority: usize,
+ pub field: MaybeVoid,
+ pub callback: Option<Callback>,
+}
+
+#[derive(Clone)]
+pub enum Callback {
+ Label(TokenStream),
+ Inline(Box<InlineCallback>),
+ Skip(Span),
+}
+
+#[derive(Clone)]
+pub struct InlineCallback {
+ pub arg: Ident,
+ pub body: TokenStream,
+ pub span: Span,
+}
+
+impl From<InlineCallback> for Callback {
+ fn from(inline: InlineCallback) -> Callback {
+ Callback::Inline(Box::new(inline))
+ }
+}
+
+impl Callback {
+ pub fn span(&self) -> Span {
+ match self {
+ Callback::Label(tokens) => tokens.span(),
+ Callback::Inline(inline) => inline.span,
+ Callback::Skip(span) => *span,
+ }
+ }
+}
+
+impl<'t> Leaf<'t> {
+ pub fn new(ident: &'t Ident, span: Span) -> Self {
+ Leaf {
+ ident: Some(ident),
+ span,
+ priority: 0,
+ field: MaybeVoid::Void,
+ callback: None,
+ }
+ }
+
+ pub fn new_skip(span: Span) -> Self {
+ Leaf {
+ ident: None,
+ span,
+ priority: 0,
+ field: MaybeVoid::Void,
+ callback: Some(Callback::Skip(span)),
+ }
+ }
+
+ pub fn callback(mut self, callback: Option<Callback>) -> Self {
+ self.callback = callback;
+ self
+ }
+
+ pub fn field(mut self, field: MaybeVoid) -> Self {
+ self.field = field;
+ self
+ }
+
+ pub fn priority(mut self, priority: usize) -> Self {
+ self.priority = priority;
+ self
+ }
+}
+
+impl Disambiguate for Leaf<'_> {
+ fn cmp(left: &Leaf, right: &Leaf) -> Ordering {
+ Ord::cmp(&left.priority, &right.priority)
+ }
+}
+
+impl<'t> From<Leaf<'t>> for Node<Leaf<'t>> {
+ fn from(leaf: Leaf<'t>) -> Self {
+ Node::Leaf(leaf)
+ }
+}
+
+impl Debug for Leaf<'_> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "::{}", self)?;
+
+ match self.callback {
+ Some(Callback::Label(ref label)) => write!(f, " ({})", label),
+ Some(Callback::Inline(_)) => f.write_str(" (<inline>)"),
+ Some(Callback::Skip(_)) => f.write_str(" (<skip>)"),
+ None => Ok(()),
+ }
+ }
+}
+
+impl Display for Leaf<'_> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self.ident {
+ Some(ident) => Display::fmt(ident, f),
+ None => f.write_str("<skip>"),
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/lib.rs b/vendor/logos-codegen/src/lib.rs
new file mode 100644
index 00000000..2b2d3db2
--- /dev/null
+++ b/vendor/logos-codegen/src/lib.rs
@@ -0,0 +1,391 @@
+//! <img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right">
+//!
+//! # Logos
+//!
+//! This is a `#[derive]` macro crate, [for documentation go to main crate](https://docs.rs/logos).
+
+// The `quote!` macro requires deep recursion.
+#![recursion_limit = "196"]
+#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
+
+mod error;
+mod generator;
+#[cfg(not(feature = "fuzzing"))]
+mod graph;
+#[cfg(feature = "fuzzing")]
+pub mod graph;
+mod leaf;
+#[cfg(not(feature = "fuzzing"))]
+mod mir;
+#[cfg(feature = "fuzzing")]
+pub mod mir;
+mod parser;
+mod util;
+
+#[macro_use]
+#[allow(missing_docs)]
+mod macros;
+
+use generator::Generator;
+use graph::{DisambiguationError, Fork, Graph, Rope};
+use leaf::Leaf;
+use parser::{IgnoreFlags, Mode, Parser};
+use quote::ToTokens;
+use util::MaybeVoid;
+
+use proc_macro2::{Delimiter, TokenStream, TokenTree};
+use quote::quote;
+use syn::parse_quote;
+use syn::spanned::Spanned;
+use syn::{Fields, ItemEnum};
+
+const LOGOS_ATTR: &str = "logos";
+const ERROR_ATTR: &str = "error";
+const TOKEN_ATTR: &str = "token";
+const REGEX_ATTR: &str = "regex";
+
+/// Generate a `Logos` implementation for the given struct, provided as a stream of rust tokens.
+pub fn generate(input: TokenStream) -> TokenStream {
+ debug!("Reading input token streams");
+
+ let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
+
+ let name = &item.ident;
+
+ let mut parser = Parser::default();
+
+ for param in item.generics.params {
+ parser.parse_generic(param);
+ }
+
+ for attr in &mut item.attrs {
+ parser.try_parse_logos(attr);
+ }
+
+ let mut ropes = Vec::new();
+ let mut regex_ids = Vec::new();
+ let mut graph = Graph::new();
+
+ {
+ let errors = &mut parser.errors;
+
+ for literal in &parser.skips {
+ match literal.to_mir(&parser.subpatterns, IgnoreFlags::Empty, errors) {
+ Ok(mir) => {
+ let then = graph.push(Leaf::new_skip(literal.span()).priority(mir.priority()));
+ let id = graph.regex(mir, then);
+
+ regex_ids.push(id);
+ }
+ Err(err) => {
+ errors.err(err, literal.span());
+ }
+ }
+ }
+ }
+
+ debug!("Iterating through enum variants");
+
+ for variant in &mut item.variants {
+ let field = match &mut variant.fields {
+ Fields::Unit => MaybeVoid::Void,
+ Fields::Unnamed(fields) => {
+ if fields.unnamed.len() != 1 {
+ parser.err(
+ format!(
+ "Logos currently only supports variants with one field, found {}",
+ fields.unnamed.len(),
+ ),
+ fields.span(),
+ );
+ }
+
+ let ty = &mut fields
+ .unnamed
+ .first_mut()
+ .expect("Already checked len; qed")
+ .ty;
+ let ty = parser.get_type(ty);
+
+ MaybeVoid::Some(ty)
+ }
+ Fields::Named(fields) => {
+ parser.err("Logos doesn't support named fields yet.", fields.span());
+
+ MaybeVoid::Void
+ }
+ };
+
+ // Lazy leaf constructor to avoid cloning
+ let var_ident = &variant.ident;
+ let leaf = move |span| Leaf::new(var_ident, span).field(field.clone());
+
+ for attr in &mut variant.attrs {
+ let attr_name = match attr.path().get_ident() {
+ Some(ident) => ident.to_string(),
+ None => continue,
+ };
+
+ match attr_name.as_str() {
+ ERROR_ATTR => {
+ // TODO: Remove in future versions
+ parser.err(
+ "\
+ Since 0.13 Logos no longer requires the #[error] variant.\n\
+ \n\
+ For help with migration see release notes: \
+ https://github.com/maciejhirsz/logos/releases\
+ ",
+ attr.span(),
+ );
+ }
+ TOKEN_ATTR => {
+ let definition = match parser.parse_definition(attr) {
+ Some(definition) => definition,
+ None => {
+ parser.err("Expected #[token(...)]", attr.span());
+ continue;
+ }
+ };
+
+ if definition.ignore_flags.is_empty() {
+ let bytes = definition.literal.to_bytes();
+ let then = graph.push(
+ leaf(definition.literal.span())
+ .priority(definition.priority.unwrap_or(bytes.len() * 2))
+ .callback(definition.callback),
+ );
+
+ ropes.push(Rope::new(bytes, then));
+ } else {
+ let mir = definition
+ .literal
+ .escape_regex()
+ .to_mir(
+ &Default::default(),
+ definition.ignore_flags,
+ &mut parser.errors,
+ )
+ .expect("The literal should be perfectly valid regex");
+
+ let then = graph.push(
+ leaf(definition.literal.span())
+ .priority(definition.priority.unwrap_or_else(|| mir.priority()))
+ .callback(definition.callback),
+ );
+ let id = graph.regex(mir, then);
+
+ regex_ids.push(id);
+ }
+ }
+ REGEX_ATTR => {
+ let definition = match parser.parse_definition(attr) {
+ Some(definition) => definition,
+ None => {
+ parser.err("Expected #[regex(...)]", attr.span());
+ continue;
+ }
+ };
+ let mir = match definition.literal.to_mir(
+ &parser.subpatterns,
+ definition.ignore_flags,
+ &mut parser.errors,
+ ) {
+ Ok(mir) => mir,
+ Err(err) => {
+ parser.err(err, definition.literal.span());
+ continue;
+ }
+ };
+
+ let then = graph.push(
+ leaf(definition.literal.span())
+ .priority(definition.priority.unwrap_or_else(|| mir.priority()))
+ .callback(definition.callback),
+ );
+ let id = graph.regex(mir, then);
+
+ regex_ids.push(id);
+ }
+ _ => (),
+ }
+ }
+ }
+
+ let mut root = Fork::new();
+
+ debug!("Parsing additional options (extras, source, ...)");
+
+ let error_type = parser.error_type.take();
+ let extras = parser.extras.take();
+ let source = parser
+ .source
+ .take()
+ .map(strip_wrapping_parens)
+ .unwrap_or(match parser.mode {
+ Mode::Utf8 => quote!(str),
+ Mode::Binary => quote!([u8]),
+ });
+ let logos_path = parser
+ .logos_path
+ .take()
+ .unwrap_or_else(|| parse_quote!(::logos));
+
+ let generics = parser.generics();
+ let this = quote!(#name #generics);
+
+ let impl_logos = |body| {
+ quote! {
+ impl<'s> #logos_path::Logos<'s> for #this {
+ type Error = #error_type;
+
+ type Extras = #extras;
+
+ type Source = #source;
+
+ fn lex(lex: &mut #logos_path::Lexer<'s, Self>) {
+ #body
+ }
+ }
+ }
+ };
+
+ for id in regex_ids {
+ let fork = graph.fork_off(id);
+
+ root.merge(fork, &mut graph);
+ }
+ for rope in ropes {
+ root.merge(rope.into_fork(&mut graph), &mut graph);
+ }
+ while let Some(id) = root.miss.take() {
+ let fork = graph.fork_off(id);
+
+ if fork.branches().next().is_some() {
+ root.merge(fork, &mut graph);
+ } else {
+ break;
+ }
+ }
+
+ debug!("Checking if any two tokens have the same priority");
+
+ for &DisambiguationError(a, b) in graph.errors() {
+ let a = graph[a].unwrap_leaf();
+ let b = graph[b].unwrap_leaf();
+ let disambiguate = a.priority + 1;
+
+ let mut err = |a: &Leaf, b: &Leaf| {
+ parser.err(
+ format!(
+ "\
+ A definition of variant `{a}` can match the same input as another definition of variant `{b}`.\n\
+ \n\
+ hint: Consider giving one definition a higher priority: \
+ #[regex(..., priority = {disambiguate})]\
+ ",
+ ),
+ a.span
+ );
+ };
+
+ err(a, b);
+ err(b, a);
+ }
+
+ if let Some(errors) = parser.errors.render() {
+ return impl_logos(errors);
+ }
+
+ let root = graph.push(root);
+
+ graph.shake(root);
+
+ debug!("Generating code from graph:\n{graph:#?}");
+
+ let generator = Generator::new(name, &this, root, &graph);
+
+ let body = generator.generate();
+ impl_logos(quote! {
+ use #logos_path::internal::{LexerInternal, CallbackResult};
+
+ type Lexer<'s> = #logos_path::Lexer<'s, #this>;
+
+ fn _end<'s>(lex: &mut Lexer<'s>) {
+ lex.end()
+ }
+
+ fn _error<'s>(lex: &mut Lexer<'s>) {
+ lex.bump_unchecked(1);
+
+ lex.error();
+ }
+
+ #body
+ })
+}
+
+/// Strip all logos attributes from the given struct, allowing it to be used in code without `logos-derive` present.
+pub fn strip_attributes(input: TokenStream) -> TokenStream {
+ let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
+
+ strip_attrs_from_vec(&mut item.attrs);
+
+ for attr in &mut item.attrs {
+ if let syn::Meta::List(meta) = &mut attr.meta {
+ if meta.path.is_ident("derive") {
+ let mut tokens =
+ std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter();
+
+ while let Some(TokenTree::Ident(ident)) = tokens.next() {
+ let punct = tokens.next();
+
+ if ident == "Logos" {
+ continue;
+ }
+
+ meta.tokens.extend([TokenTree::Ident(ident)]);
+ meta.tokens.extend(punct);
+ }
+ }
+ }
+ }
+
+ for variant in &mut item.variants {
+ strip_attrs_from_vec(&mut variant.attrs);
+ for field in &mut variant.fields {
+ strip_attrs_from_vec(&mut field.attrs);
+ }
+ }
+
+ item.to_token_stream()
+}
+
+fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) {
+ attrs.retain(|attr| !is_logos_attr(attr))
+}
+
+fn is_logos_attr(attr: &syn::Attribute) -> bool {
+ attr.path().is_ident(LOGOS_ATTR)
+ || attr.path().is_ident(TOKEN_ATTR)
+ || attr.path().is_ident(REGEX_ATTR)
+}
+
+fn strip_wrapping_parens(t: TokenStream) -> TokenStream {
+ let tts: Vec<TokenTree> = t.into_iter().collect();
+
+ if tts.len() != 1 {
+ tts.into_iter().collect()
+ } else {
+ match tts.into_iter().next().unwrap() {
+ TokenTree::Group(g) => {
+ if g.delimiter() == Delimiter::Parenthesis {
+ g.stream()
+ } else {
+ core::iter::once(TokenTree::Group(g)).collect()
+ }
+ }
+ tt => core::iter::once(tt).collect(),
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/macros.rs b/vendor/logos-codegen/src/macros.rs
new file mode 100644
index 00000000..148c6f6a
--- /dev/null
+++ b/vendor/logos-codegen/src/macros.rs
@@ -0,0 +1,12 @@
+#[cfg(feature = "debug")]
+macro_rules! debug {
+ ($($arg:tt)*) => {
+ eprint!("[{}:{}:{}] ", file!(), line!(), column!());
+ eprintln!($($arg)*)
+ }
+}
+
+#[cfg(not(feature = "debug"))]
+macro_rules! debug {
+ ($($arg:tt)*) => {};
+}
diff --git a/vendor/logos-codegen/src/mir.rs b/vendor/logos-codegen/src/mir.rs
new file mode 100644
index 00000000..e07c5b78
--- /dev/null
+++ b/vendor/logos-codegen/src/mir.rs
@@ -0,0 +1,235 @@
+use std::convert::TryFrom;
+
+use lazy_static::lazy_static;
+use regex_syntax::hir::{Dot, Hir, HirKind};
+use regex_syntax::ParserBuilder;
+
+pub use regex_syntax::hir::{Class, ClassUnicode, Literal};
+
+use crate::error::{Error, Result};
+
+lazy_static! {
+ static ref DOT_UTF8: Hir = Hir::dot(Dot::AnyChar);
+ static ref DOT_BYTES: Hir = Hir::dot(Dot::AnyByte);
+}
+
+/// Middle Intermediate Representation of the regex, built from
+/// `regex_syntax`'s `Hir`. The goal here is to strip and canonicalize
+/// the tree, so that we don't have to do transformations later on the
+/// graph, with the potential of running into looping references.
+#[derive(Clone, Debug)]
+pub enum Mir {
+ Empty,
+ Loop(Box<Mir>),
+ Maybe(Box<Mir>),
+ Concat(Vec<Mir>),
+ Alternation(Vec<Mir>),
+ Class(Class),
+ Literal(Literal),
+}
+
+impl Mir {
+ pub fn utf8(source: &str) -> Result<Mir> {
+ Mir::try_from(ParserBuilder::new().build().parse(source)?)
+ }
+
+ pub fn utf8_ignore_case(source: &str) -> Result<Mir> {
+ Mir::try_from(
+ ParserBuilder::new()
+ .case_insensitive(true)
+ .build()
+ .parse(source)?,
+ )
+ }
+
+ pub fn binary(source: &str) -> Result<Mir> {
+ Mir::try_from(
+ ParserBuilder::new()
+ .utf8(false)
+ .unicode(false)
+ .build()
+ .parse(source)?,
+ )
+ }
+
+ pub fn binary_ignore_case(source: &str) -> Result<Mir> {
+ Mir::try_from(
+ ParserBuilder::new()
+ .utf8(false)
+ .unicode(false)
+ .case_insensitive(true)
+ .build()
+ .parse(source)?,
+ )
+ }
+
+ pub fn priority(&self) -> usize {
+ match self {
+ Mir::Empty | Mir::Loop(_) | Mir::Maybe(_) => 0,
+ Mir::Concat(concat) => concat.iter().map(Mir::priority).sum(),
+ Mir::Alternation(alt) => alt.iter().map(Mir::priority).min().unwrap_or(0),
+ Mir::Class(_) => 2,
+ Mir::Literal(lit) => match std::str::from_utf8(&lit.0) {
+ Ok(s) => 2 * s.chars().count(),
+ Err(_) => 2 * lit.0.len(),
+ },
+ }
+ }
+}
+
+impl TryFrom<Hir> for Mir {
+ type Error = Error;
+
+ fn try_from(hir: Hir) -> Result<Mir> {
+ match hir.into_kind() {
+ HirKind::Empty => Ok(Mir::Empty),
+ HirKind::Concat(concat) => {
+ let mut out = Vec::with_capacity(concat.len());
+
+ fn extend(mir: Mir, out: &mut Vec<Mir>) {
+ match mir {
+ Mir::Concat(nested) => {
+ for child in nested {
+ extend(child, out);
+ }
+ }
+ mir => out.push(mir),
+ }
+ }
+
+ for hir in concat {
+ extend(Mir::try_from(hir)?, &mut out);
+ }
+
+ Ok(Mir::Concat(out))
+ }
+ HirKind::Alternation(alternation) => {
+ let alternation = alternation
+ .into_iter()
+ .map(Mir::try_from)
+ .collect::<Result<_>>()?;
+
+ Ok(Mir::Alternation(alternation))
+ }
+ HirKind::Literal(literal) => Ok(Mir::Literal(literal)),
+ HirKind::Class(class) => Ok(Mir::Class(class)),
+ HirKind::Repetition(repetition) => {
+ if !repetition.greedy {
+ return Err("#[regex]: non-greedy parsing is currently unsupported.".into());
+ }
+
+ let is_dot = if repetition.sub.properties().is_utf8() {
+ *repetition.sub == *DOT_UTF8
+ } else {
+ *repetition.sub == *DOT_BYTES
+ };
+ let mir = Mir::try_from(*repetition.sub)?;
+
+ match (repetition.min, repetition.max) {
+ (0..=1, None) if is_dot => {
+ Err(
+ "#[regex]: \".+\" and \".*\" patterns will greedily consume \
+ the entire source till the end as Logos does not allow \
+ backtracking. If you are looking to match everything until \
+ a specific character, you should use a negative character \
+ class. E.g., use regex r\"'[^']*'\" to match anything in \
+ between two quotes. Read more about that here: \
+ https://github.com/maciejhirsz/logos/issues/302#issuecomment-1521342541."
+ .into()
+ )
+ }
+ // 0 or 1
+ (0, Some(1)) => Ok(Mir::Maybe(Box::new(mir))),
+ // 0 or more
+ (0, None) => Ok(Mir::Loop(Box::new(mir))),
+ // 1 or more
+ (1, None) => {
+ Ok(Mir::Concat(vec![mir.clone(), Mir::Loop(Box::new(mir))]))
+ }
+ // Exact {n}
+ (n, Some(m)) if m == n => {
+ let mut out = Vec::with_capacity(n as usize);
+ for _ in 0..n {
+ out.push(mir.clone());
+ }
+ Ok(Mir::Concat(out))
+ }
+ // At least {n,}
+ (n, None) => {
+ let mut out = Vec::with_capacity(n as usize);
+ for _ in 0..n {
+ out.push(mir.clone());
+ }
+ out.push(Mir::Loop(Box::new(mir)));
+ Ok(Mir::Concat(out))
+ }
+ // Bounded {n, m}
+ (n, Some(m)) => {
+ let mut out = Vec::with_capacity(m as usize);
+ for _ in 0..n {
+ out.push(mir.clone());
+ }
+ for _ in n..m {
+ out.push(Mir::Maybe(Box::new(mir.clone())));
+ }
+ Ok(Mir::Concat(out))
+ }
+ }
+ }
+ HirKind::Capture(capture) => Mir::try_from(*capture.sub),
+ HirKind::Look(_) => {
+ Err("#[regex]: look-around assertions are currently unsupported.".into())
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::Mir;
+
+ #[test]
+ fn priorities() {
+ let regexes = [
+ ("a", 2),
+ ("à", 2),
+ ("京", 2),
+ ("Eté", 6),
+ ("Été", 6),
+ ("[a-z]+", 2),
+ ("a|b", 2),
+ ("a|[b-z]", 2),
+ ("(foo)+", 6),
+ ("foobar", 12),
+ ("(fooz|bar)+qux", 12),
+ ];
+
+ for (regex, expected) in regexes.iter() {
+ let mir = Mir::utf8(regex).unwrap();
+ assert_eq!(mir.priority(), *expected, "Failed for regex \"{}\"", regex);
+ }
+ }
+
+ #[test]
+ fn equivalent_patterns() {
+ let regexes = [
+ ("a|b", "[a-b]"),
+ ("1|2|3", "[1-3]"),
+ ("1+", "[1]+"),
+ ("c*", "[c]*"),
+ ("aaa", "a{3}"),
+ ("a[a]{2}", "a{3}"),
+ ];
+
+ for (regex_left, regex_right) in regexes.iter() {
+ let mir_left = Mir::utf8(regex_left).unwrap();
+ let mir_right = Mir::utf8(regex_right).unwrap();
+ assert_eq!(
+ mir_left.priority(),
+ mir_right.priority(),
+ "Regexes \"{regex_left}\" and \"{regex_right}\" \
+ are equivalent but have different priorities"
+ );
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/definition.rs b/vendor/logos-codegen/src/parser/definition.rs
new file mode 100644
index 00000000..a876fb59
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/definition.rs
@@ -0,0 +1,193 @@
+use proc_macro2::{Ident, Span};
+use syn::{spanned::Spanned, LitByteStr, LitStr};
+
+use crate::error::{Errors, Result};
+use crate::leaf::Callback;
+use crate::mir::Mir;
+use crate::parser::nested::NestedValue;
+use crate::parser::{IgnoreFlags, Parser, Subpatterns};
+
+use super::ignore_flags::ascii_case::MakeAsciiCaseInsensitive;
+
+pub struct Definition {
+ pub literal: Literal,
+ pub priority: Option<usize>,
+ pub callback: Option<Callback>,
+ pub ignore_flags: IgnoreFlags,
+}
+
+pub enum Literal {
+ Utf8(LitStr),
+ Bytes(LitByteStr),
+}
+
+impl Definition {
+ pub fn new(literal: Literal) -> Self {
+ Definition {
+ literal,
+ priority: None,
+ callback: None,
+ ignore_flags: IgnoreFlags::Empty,
+ }
+ }
+
+ pub fn named_attr(&mut self, name: Ident, value: NestedValue, parser: &mut Parser) {
+ match (name.to_string().as_str(), value) {
+ ("priority", NestedValue::Assign(tokens)) => {
+ let prio = match tokens.to_string().parse() {
+ Ok(prio) => prio,
+ Err(_) => {
+ parser.err("Expected an unsigned integer", tokens.span());
+ return;
+ }
+ };
+
+ if self.priority.replace(prio).is_some() {
+ parser.err("Resetting previously set priority", tokens.span());
+ }
+ }
+ ("priority", _) => {
+ parser.err("Expected: priority = <integer>", name.span());
+ }
+ ("callback", NestedValue::Assign(tokens)) => {
+ let span = tokens.span();
+ let callback = match parser.parse_callback(tokens) {
+ Some(callback) => callback,
+ None => {
+ parser.err("Not a valid callback", span);
+ return;
+ }
+ };
+
+ if let Some(previous) = self.callback.replace(callback) {
+ parser
+ .err(
+ "Callback has been already set",
+ span.join(name.span()).unwrap(),
+ )
+ .err("Previous callback set here", previous.span());
+ }
+ }
+ ("callback", _) => {
+ parser.err("Expected: callback = ...", name.span());
+ }
+ ("ignore", NestedValue::Group(tokens)) => {
+ self.ignore_flags.parse_group(name, tokens, parser);
+ }
+ ("ignore", _) => {
+ parser.err("Expected: ignore(<flag>, ...)", name.span());
+ }
+ (unknown, _) => {
+ parser.err(
+ format!(
+ "\
+ Unknown nested attribute: {}\n\
+ \n\
+ Expected one of: priority, callback\
+ ",
+ unknown
+ ),
+ name.span(),
+ );
+ }
+ }
+ }
+}
+
+impl Literal {
+ pub fn to_bytes(&self) -> Vec<u8> {
+ match self {
+ Literal::Utf8(string) => string.value().into_bytes(),
+ Literal::Bytes(bytes) => bytes.value(),
+ }
+ }
+
+ pub fn escape_regex(&self) -> Literal {
+ match self {
+ Literal::Utf8(string) => Literal::Utf8(LitStr::new(
+ regex_syntax::escape(&string.value()).as_str(),
+ self.span(),
+ )),
+ Literal::Bytes(bytes) => Literal::Bytes(LitByteStr::new(
+ regex_syntax::escape(&bytes_to_regex_string(bytes.value())).as_bytes(),
+ self.span(),
+ )),
+ }
+ }
+
+ pub fn to_mir(
+ &self,
+ subpatterns: &Subpatterns,
+ ignore_flags: IgnoreFlags,
+ errors: &mut Errors,
+ ) -> Result<Mir> {
+ let value = subpatterns.fix(self, errors);
+
+ if ignore_flags.contains(IgnoreFlags::IgnoreAsciiCase) {
+ match self {
+ Literal::Utf8(_) => {
+ Mir::utf8(&value).map(MakeAsciiCaseInsensitive::make_ascii_case_insensitive)
+ }
+ Literal::Bytes(_) => Mir::binary_ignore_case(&value),
+ }
+ } else if ignore_flags.contains(IgnoreFlags::IgnoreCase) {
+ match self {
+ Literal::Utf8(_) => Mir::utf8_ignore_case(&value),
+ Literal::Bytes(_) => Mir::binary_ignore_case(&value),
+ }
+ } else {
+ match self {
+ Literal::Utf8(_) => Mir::utf8(&value),
+ Literal::Bytes(_) => Mir::binary(&value),
+ }
+ }
+ }
+
+ pub fn span(&self) -> Span {
+ match self {
+ Literal::Utf8(string) => string.span(),
+ Literal::Bytes(bytes) => bytes.span(),
+ }
+ }
+}
+
+impl syn::parse::Parse for Literal {
+ fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
+ let la = input.lookahead1();
+ if la.peek(LitStr) {
+ Ok(Literal::Utf8(input.parse()?))
+ } else if la.peek(LitByteStr) {
+ Ok(Literal::Bytes(input.parse()?))
+ } else {
+ Err(la.error())
+ }
+ }
+}
+
+pub fn bytes_to_regex_string(bytes: Vec<u8>) -> String {
+ if bytes.is_ascii() {
+ unsafe {
+ // Unicode values are prohibited, so we can't use
+ // safe version of String::from_utf8
+ //
+ // We can, however, construct a safe ASCII string
+ return String::from_utf8_unchecked(bytes);
+ }
+ }
+
+ let mut string = String::with_capacity(bytes.len() * 2);
+
+ for byte in bytes {
+ if byte < 0x80 {
+ string.push(byte as char);
+ } else {
+ static DIGITS: [u8; 16] = *b"0123456789abcdef";
+
+ string.push_str(r"\x");
+ string.push(DIGITS[(byte / 16) as usize] as char);
+ string.push(DIGITS[(byte % 16) as usize] as char);
+ }
+ }
+
+ string
+}
diff --git a/vendor/logos-codegen/src/parser/ignore_flags.rs b/vendor/logos-codegen/src/parser/ignore_flags.rs
new file mode 100644
index 00000000..3a79d31b
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/ignore_flags.rs
@@ -0,0 +1,499 @@
+use std::ops::{BitAnd, BitOr};
+
+use proc_macro2::{Ident, TokenStream, TokenTree};
+
+use crate::parser::Parser;
+use crate::util::is_punct;
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct IgnoreFlags {
+ bits: u8,
+}
+
+#[allow(non_upper_case_globals)]
+impl IgnoreFlags {
+ pub const Empty: Self = Self::new(0x00);
+ pub const IgnoreCase: Self = Self::new(0x01);
+ pub const IgnoreAsciiCase: Self = Self::new(0x02);
+
+ #[inline]
+ pub const fn new(bits: u8) -> Self {
+ Self { bits }
+ }
+
+ /// Enables a variant.
+ #[inline]
+ pub fn enable(&mut self, variant: Self) {
+ self.bits |= variant.bits;
+ }
+
+ /// Checks if this `IgnoreFlags` contains *any* of the given variants.
+ #[inline]
+ pub fn contains(&self, variants: Self) -> bool {
+ self.bits & variants.bits != 0
+ }
+
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.bits == 0
+ }
+
+ /// Parses an identifier an enables it for `self`.
+ ///
+ /// Valid inputs are (that produces `true`):
+ /// * `"case"` (incompatible with `"ascii_case"`)
+ /// * `"ascii_case"` (incompatible with `"case"`)
+ ///
+ /// An error causes this function to return `false` and emits an error to
+ /// the given `Parser`.
+ fn parse_ident(&mut self, ident: Ident, parser: &mut Parser) -> bool {
+ match ident.to_string().as_str() {
+ "case" => {
+ if self.contains(Self::IgnoreAsciiCase) {
+ parser.err(
+ "\
+ The flag \"case\" cannot be used along with \"ascii_case\"\
+ ",
+ ident.span(),
+ );
+ false
+ } else {
+ self.enable(Self::IgnoreCase);
+ true
+ }
+ }
+ "ascii_case" => {
+ if self.contains(Self::IgnoreCase) {
+ parser.err(
+ "\
+ The flag \"ascii_case\" cannot be used along with \"case\"\
+ ",
+ ident.span(),
+ );
+ false
+ } else {
+ self.enable(Self::IgnoreAsciiCase);
+ true
+ }
+ }
+ unknown => {
+ parser.err(
+ format!(
+ "\
+ Unknown flag: {}\n\
+ \n\
+ Expected one of: case, ascii_case\
+ ",
+ unknown
+ ),
+ ident.span(),
+ );
+ false
+ }
+ }
+ }
+
+ pub fn parse_group(&mut self, name: Ident, tokens: TokenStream, parser: &mut Parser) {
+ // Little finite state machine to parse "<flag>(,<flag>)*,?"
+
+ // FSM description for future maintenance
+ // 0: Initial state
+ // <flag> -> 1
+ // _ -> error
+ // 1: A flag was found
+ // , -> 2
+ // None -> done
+ // _ -> error
+ // 2: A comma was found (after a <flag>)
+ // <flag> -> 1
+ // None -> done
+ // _ -> error
+ let mut state = 0u8;
+
+ let mut tokens = tokens.into_iter();
+
+ loop {
+ state = match state {
+ 0 => match tokens.next() {
+ Some(TokenTree::Ident(ident)) => {
+ if self.parse_ident(ident, parser) {
+ 1
+ } else {
+ return;
+ }
+ }
+ _ => {
+ parser.err(
+ "\
+ Invalid ignore flag\n\
+ \n\
+ Expected one of: case, ascii_case\
+ ",
+ name.span(),
+ );
+ return;
+ }
+ },
+ 1 => match tokens.next() {
+ Some(tt) if is_punct(&tt, ',') => 2,
+ None => return,
+ Some(unexpected_tt) => {
+ parser.err(
+ format!(
+ "\
+ Unexpected token: {:?}\
+ ",
+ unexpected_tt.to_string(),
+ ),
+ unexpected_tt.span(),
+ );
+ return;
+ }
+ },
+ 2 => match tokens.next() {
+ Some(TokenTree::Ident(ident)) => {
+ if self.parse_ident(ident, parser) {
+ 1
+ } else {
+ return;
+ }
+ }
+ None => return,
+ Some(unexpected_tt) => {
+ parser.err(
+ format!(
+ "\
+ Unexpected token: {:?}\
+ ",
+ unexpected_tt.to_string(),
+ ),
+ unexpected_tt.span(),
+ );
+ return;
+ }
+ },
+ _ => unreachable!("Internal Error: invalid state ({})", state),
+ }
+ }
+ }
+}
+
+impl BitOr for IgnoreFlags {
+ type Output = Self;
+
+ fn bitor(self, other: Self) -> Self {
+ Self::new(self.bits | other.bits)
+ }
+}
+
+impl BitAnd for IgnoreFlags {
+ type Output = Self;
+
+ fn bitand(self, other: Self) -> Self {
+ Self::new(self.bits & other.bits)
+ }
+}
+
+pub mod ascii_case {
+ use regex_syntax::hir;
+
+ use crate::mir::Mir;
+ use crate::parser::Literal;
+
+ macro_rules! literal {
+ ($byte:expr) => {
+ hir::Literal(Box::new([$byte]))
+ };
+ (@char $c:expr) => {
+ hir::Literal(
+ $c.encode_utf8(&mut [0; 4])
+ .as_bytes()
+ .to_vec()
+ .into_boxed_slice(),
+ )
+ };
+ }
+
+ pub trait MakeAsciiCaseInsensitive {
+ /// Creates a equivalent regular expression which ignore the letter casing
+ /// of ascii characters.
+ fn make_ascii_case_insensitive(self) -> Mir;
+ }
+
+ impl MakeAsciiCaseInsensitive for u8 {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ if self.is_ascii_lowercase() {
+ Mir::Alternation(vec![
+ Mir::Literal(literal!(self - 32)),
+ Mir::Literal(literal!(self)),
+ ])
+ } else if self.is_ascii_uppercase() {
+ Mir::Alternation(vec![
+ Mir::Literal(literal!(self)),
+ Mir::Literal(literal!(self + 32)),
+ ])
+ } else {
+ Mir::Literal(literal!(self))
+ }
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for char {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ if self.is_ascii() {
+ (self as u8).make_ascii_case_insensitive()
+ } else {
+ Mir::Literal(literal!(@char self))
+ }
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for hir::Literal {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ Mir::Concat(
+ self.0
+ .iter()
+ .map(|x| x.make_ascii_case_insensitive())
+ .collect(),
+ )
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for hir::ClassBytes {
+ fn make_ascii_case_insensitive(mut self) -> Mir {
+ self.case_fold_simple();
+ Mir::Class(hir::Class::Bytes(self))
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for hir::ClassUnicode {
+ fn make_ascii_case_insensitive(mut self) -> Mir {
+ use std::cmp;
+
+ // Manuall implementation to only perform the case folding on ascii characters.
+
+ let mut ranges = Vec::new();
+
+ for range in self.ranges() {
+ #[inline]
+ fn overlaps(st1: u8, end1: u8, st2: u8, end2: u8) -> bool {
+ (st2 <= st1 && st1 <= end2) || (st1 <= st2 && st2 <= end1)
+ }
+
+ #[inline]
+ fn make_ascii(c: char) -> Option<u8> {
+ if c.is_ascii() {
+ Some(c as u8)
+ } else {
+ None
+ }
+ }
+
+ match (make_ascii(range.start()), make_ascii(range.end())) {
+ (Some(start), Some(end)) => {
+ if overlaps(b'a', b'z', start, end) {
+ let lower = cmp::max(start, b'a');
+ let upper = cmp::min(end, b'z');
+ ranges.push(hir::ClassUnicodeRange::new(
+ (lower - 32) as char,
+ (upper - 32) as char,
+ ))
+ }
+
+ if overlaps(b'A', b'Z', start, end) {
+ let lower = cmp::max(start, b'A');
+ let upper = cmp::min(end, b'Z');
+ ranges.push(hir::ClassUnicodeRange::new(
+ (lower + 32) as char,
+ (upper + 32) as char,
+ ))
+ }
+ }
+ (Some(start), None) => {
+ if overlaps(b'a', b'z', start, b'z') {
+ let lower = cmp::max(start, b'a');
+ ranges.push(hir::ClassUnicodeRange::new((lower - 32) as char, 'Z'))
+ }
+
+ if overlaps(b'A', b'Z', start, b'Z') {
+ let lower = cmp::max(start, b'A');
+ ranges.push(hir::ClassUnicodeRange::new((lower + 32) as char, 'Z'))
+ }
+ }
+ _ => (),
+ }
+ }
+
+ self.union(&hir::ClassUnicode::new(ranges));
+
+ Mir::Class(hir::Class::Unicode(self))
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for hir::Class {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ match self {
+ hir::Class::Bytes(b) => b.make_ascii_case_insensitive(),
+ hir::Class::Unicode(u) => u.make_ascii_case_insensitive(),
+ }
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for &Literal {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ match self {
+ Literal::Bytes(bytes) => Mir::Concat(
+ bytes
+ .value()
+ .into_iter()
+ .map(|b| b.make_ascii_case_insensitive())
+ .collect(),
+ ),
+ Literal::Utf8(s) => Mir::Concat(
+ s.value()
+ .chars()
+ .map(|b| b.make_ascii_case_insensitive())
+ .collect(),
+ ),
+ }
+ }
+ }
+
+ impl MakeAsciiCaseInsensitive for Mir {
+ fn make_ascii_case_insensitive(self) -> Mir {
+ match self {
+ Mir::Empty => Mir::Empty,
+ Mir::Loop(l) => Mir::Loop(Box::new(l.make_ascii_case_insensitive())),
+ Mir::Maybe(m) => Mir::Maybe(Box::new(m.make_ascii_case_insensitive())),
+ Mir::Concat(c) => Mir::Concat(
+ c.into_iter()
+ .map(|m| m.make_ascii_case_insensitive())
+ .collect(),
+ ),
+ Mir::Alternation(a) => Mir::Alternation(
+ a.into_iter()
+ .map(|m| m.make_ascii_case_insensitive())
+ .collect(),
+ ),
+ Mir::Class(c) => c.make_ascii_case_insensitive(),
+ Mir::Literal(l) => l.make_ascii_case_insensitive(),
+ }
+ }
+ }
+
+ #[cfg(test)]
+ mod tests {
+ use super::MakeAsciiCaseInsensitive;
+ use crate::mir::{Class, Mir};
+ use regex_syntax::hir::{ClassUnicode, ClassUnicodeRange};
+
+ fn assert_range(in_s: char, in_e: char, expected: &[(char, char)]) {
+ let range = ClassUnicodeRange::new(in_s, in_e);
+ let class = ClassUnicode::new(vec![range]);
+
+ let expected =
+ ClassUnicode::new(expected.iter().map(|&(a, b)| ClassUnicodeRange::new(a, b)));
+
+ if let Mir::Class(Class::Unicode(result)) = class.make_ascii_case_insensitive() {
+ assert_eq!(result, expected);
+ } else {
+ panic!("Not a unicode class");
+ };
+ }
+
+ #[test]
+ fn no_letters_left() {
+ assert_range(' ', '+', &[(' ', '+')]);
+ }
+
+ #[test]
+ fn no_letters_right() {
+ assert_range('{', '~', &[('{', '~')]);
+ }
+
+ #[test]
+ fn no_letters_middle() {
+ assert_range('[', '`', &[('[', '`')]);
+ }
+
+ #[test]
+ fn lowercase_left_edge() {
+ assert_range('a', 'd', &[('a', 'd'), ('A', 'D')]);
+ }
+
+ #[test]
+ fn lowercase_right_edge() {
+ assert_range('r', 'z', &[('r', 'z'), ('R', 'Z')]);
+ }
+
+ #[test]
+ fn lowercase_total() {
+ assert_range('a', 'z', &[('a', 'z'), ('A', 'Z')]);
+ }
+
+ #[test]
+ fn uppercase_left_edge() {
+ assert_range('A', 'D', &[('a', 'd'), ('A', 'D')]);
+ }
+
+ #[test]
+ fn uppercase_right_edge() {
+ assert_range('R', 'Z', &[('r', 'z'), ('R', 'Z')]);
+ }
+
+ #[test]
+ fn uppercase_total() {
+ assert_range('A', 'Z', &[('a', 'z'), ('A', 'Z')]);
+ }
+
+ #[test]
+ fn lowercase_cross_left() {
+ assert_range('[', 'h', &[('[', 'h'), ('A', 'H')]);
+ }
+
+ #[test]
+ fn lowercase_cross_right() {
+ assert_range('d', '}', &[('d', '}'), ('D', 'Z')]);
+ }
+
+ #[test]
+ fn uppercase_cross_left() {
+ assert_range(';', 'H', &[(';', 'H'), ('a', 'h')]);
+ }
+
+ #[test]
+ fn uppercase_cross_right() {
+ assert_range('T', ']', &[('t', 'z'), ('T', ']')]);
+ }
+
+ #[test]
+ fn cross_both() {
+ assert_range('X', 'c', &[('X', 'c'), ('x', 'z'), ('A', 'C')]);
+ }
+
+ #[test]
+ fn all_letters() {
+ assert_range('+', '|', &[('+', '|')]);
+ }
+
+ #[test]
+ fn oob_all_letters() {
+ assert_range('#', 'é', &[('#', 'é')]);
+ }
+
+ #[test]
+ fn oob_from_uppercase() {
+ assert_range('Q', 'é', &[('A', 'é')]);
+ }
+
+ #[test]
+ fn oob_from_lowercase() {
+ assert_range('q', 'é', &[('q', 'é'), ('Q', 'Z')]);
+ }
+
+ #[test]
+ fn oob_no_letters() {
+ assert_range('|', 'é', &[('|', 'é')]);
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/mod.rs b/vendor/logos-codegen/src/parser/mod.rs
new file mode 100644
index 00000000..3ad7202e
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/mod.rs
@@ -0,0 +1,331 @@
+use beef::lean::Cow;
+use proc_macro2::{Span, TokenStream, TokenTree};
+use quote::quote;
+use syn::spanned::Spanned;
+use syn::{Attribute, GenericParam, Lit, Meta, Type};
+
+use crate::error::Errors;
+use crate::leaf::{Callback, InlineCallback};
+use crate::util::{expect_punct, MaybeVoid};
+use crate::LOGOS_ATTR;
+
+mod definition;
+mod ignore_flags;
+mod nested;
+mod subpattern;
+mod type_params;
+
+pub use self::definition::{Definition, Literal};
+pub use self::ignore_flags::IgnoreFlags;
+use self::nested::{AttributeParser, Nested, NestedValue};
+pub use self::subpattern::Subpatterns;
+use self::type_params::{replace_lifetime, traverse_type, TypeParams};
+
+#[derive(Default)]
+pub struct Parser {
+ pub errors: Errors,
+ pub mode: Mode,
+ pub source: Option<TokenStream>,
+ pub skips: Vec<Literal>,
+ pub extras: MaybeVoid,
+ pub error_type: MaybeVoid,
+ pub subpatterns: Subpatterns,
+ pub logos_path: Option<TokenStream>,
+ types: TypeParams,
+}
+
+#[derive(Default)]
+pub enum Mode {
+ #[default]
+ Utf8,
+ Binary,
+}
+
+impl Parser {
+ pub fn parse_generic(&mut self, param: GenericParam) {
+ match param {
+ GenericParam::Lifetime(lt) => {
+ self.types.explicit_lifetime(lt, &mut self.errors);
+ }
+ GenericParam::Type(ty) => {
+ self.types.add(ty.ident);
+ }
+ GenericParam::Const(c) => {
+ self.err("Logos doesn't support const generics.", c.span());
+ }
+ }
+ }
+
+ pub fn generics(&mut self) -> Option<TokenStream> {
+ self.types.generics(&mut self.errors)
+ }
+
+ fn parse_attr(&mut self, attr: &mut Attribute) -> Option<AttributeParser> {
+ match &mut attr.meta {
+ Meta::List(list) => {
+ let tokens = std::mem::replace(&mut list.tokens, TokenStream::new());
+
+ Some(AttributeParser::new(tokens))
+ }
+ _ => None,
+ }
+ }
+
+ /// Try to parse the main `#[logos(...)]`, does nothing if
+ /// the attribute's name isn't `logos`.
+ pub fn try_parse_logos(&mut self, attr: &mut Attribute) {
+ if !attr.path().is_ident(LOGOS_ATTR) {
+ return;
+ }
+
+ let nested = match self.parse_attr(attr) {
+ Some(tokens) => tokens,
+ None => {
+ self.err("Expected #[logos(...)]", attr.span());
+ return;
+ }
+ };
+
+ for nested in nested {
+ let (name, value) = match nested {
+ Nested::Named(name, value) => (name, value),
+ Nested::Unexpected(tokens) | Nested::Unnamed(tokens) => {
+ self.err("Invalid nested attribute", tokens.span());
+ continue;
+ }
+ };
+
+ // IMPORTANT: Keep these sorted alphabetically for binary search down the line
+ #[allow(clippy::type_complexity)]
+ static NESTED_LOOKUP: &[(&str, fn(&mut Parser, Span, NestedValue))] = &[
+ ("crate", |parser, span, value| match value {
+ NestedValue::Assign(logos_path) => parser.logos_path = Some(logos_path),
+ _ => {
+ parser.err("Expected: #[logos(crate = path::to::logos)]", span);
+ }
+ }),
+ ("error", |parser, span, value| match value {
+ NestedValue::Assign(value) => {
+ let span = value.span();
+
+ if let MaybeVoid::Some(previous) = parser.error_type.replace(value) {
+ parser
+ .err("Error type can be defined only once", span)
+ .err("Previous definition here", previous.span());
+ }
+ }
+ _ => {
+ parser.err("Expected: #[logos(error = SomeType)]", span);
+ }
+ }),
+ ("extras", |parser, span, value| match value {
+ NestedValue::Assign(value) => {
+ let span = value.span();
+
+ if let MaybeVoid::Some(previous) = parser.extras.replace(value) {
+ parser
+ .err("Extras can be defined only once", span)
+ .err("Previous definition here", previous.span());
+ }
+ }
+ _ => {
+ parser.err("Expected: #[logos(extras = SomeType)]", span);
+ }
+ }),
+ ("skip", |parser, span, value| match value {
+ NestedValue::Literal(lit) => {
+ if let Some(literal) = parser.parse_literal(Lit::new(lit)) {
+ parser.skips.push(literal);
+ }
+ }
+ _ => {
+ parser.err("Expected: #[logos(skip \"regex literal\")]", span);
+ }
+ }),
+ ("source", |parser, span, value| match value {
+ NestedValue::Assign(value) => {
+ let span = value.span();
+ if let Some(previous) = parser.source.replace(value) {
+ parser
+ .err("Source can be defined only once", span)
+ .err("Previous definition here", previous.span());
+ }
+ }
+ _ => {
+ parser.err("Expected: #[logos(source = SomeType)]", span);
+ }
+ }),
+ ("subpattern", |parser, span, value| match value {
+ NestedValue::KeywordAssign(name, value) => {
+ parser.subpatterns.add(name, value, &mut parser.errors);
+ }
+ _ => {
+ parser.err(r#"Expected: #[logos(subpattern name = r"regex")]"#, span);
+ }
+ }),
+ ("type", |parser, span, value| match value {
+ NestedValue::KeywordAssign(generic, ty) => {
+ parser.types.set(generic, ty, &mut parser.errors);
+ }
+ _ => {
+ parser.err("Expected: #[logos(type T = SomeType)]", span);
+ }
+ }),
+ ];
+
+ match NESTED_LOOKUP.binary_search_by_key(&name.to_string().as_str(), |(n, _)| n) {
+ Ok(idx) => NESTED_LOOKUP[idx].1(self, name.span(), value),
+ Err(_) => {
+ let mut err = format!(
+ "Unknown nested attribute #[logos({name})], expected one of: {}",
+ NESTED_LOOKUP[0].0
+ );
+
+ for (allowed, _) in &NESTED_LOOKUP[1..] {
+ err.push_str(", ");
+ err.push_str(allowed);
+ }
+
+ self.err(err, name.span());
+ }
+ }
+ }
+ }
+
+ pub fn parse_literal(&mut self, lit: Lit) -> Option<Literal> {
+ match lit {
+ Lit::Str(string) => Some(Literal::Utf8(string)),
+ Lit::ByteStr(bytes) => {
+ self.mode = Mode::Binary;
+
+ Some(Literal::Bytes(bytes))
+ }
+ _ => {
+ self.err("Expected a &str or &[u8] slice", lit.span());
+
+ None
+ }
+ }
+ }
+
+ /// Parse attribute definition of a token:
+ ///
+ /// + `#[token(literal[, callback])]`
+ /// + `#[regex(literal[, callback])]`
+ pub fn parse_definition(&mut self, attr: &mut Attribute) -> Option<Definition> {
+ let mut nested = self.parse_attr(attr)?;
+
+ let literal = match nested.parsed::<Lit>()? {
+ Ok(lit) => self.parse_literal(lit)?,
+ Err(err) => {
+ self.err(err.to_string(), err.span());
+
+ return None;
+ }
+ };
+
+ let mut def = Definition::new(literal);
+
+ for (position, next) in nested.enumerate() {
+ match next {
+ Nested::Unexpected(tokens) => {
+ self.err("Unexpected token in attribute", tokens.span());
+ }
+ Nested::Unnamed(tokens) => match position {
+ 0 => def.callback = self.parse_callback(tokens),
+ _ => {
+ self.err(
+ "\
+ Expected a named argument at this position\n\
+ \n\
+ hint: If you are trying to define a callback here use: callback = ...\
+ ",
+ tokens.span(),
+ );
+ }
+ },
+ Nested::Named(name, value) => {
+ def.named_attr(name, value, self);
+ }
+ }
+ }
+
+ Some(def)
+ }
+
+ fn parse_callback(&mut self, tokens: TokenStream) -> Option<Callback> {
+ let span = tokens.span();
+ let mut tokens = tokens.into_iter();
+
+ if let Some(tt) = expect_punct(tokens.next(), '|') {
+ let mut label = TokenStream::from(tt);
+
+ label.extend(tokens);
+
+ return Some(Callback::Label(label));
+ }
+
+ let first = tokens.next();
+ let error = expect_punct(tokens.next(), '|');
+
+ let arg = match (error, first) {
+ (None, Some(TokenTree::Ident(arg))) => arg,
+ _ => {
+ self.err(
+ "Inline callbacks must use closure syntax with exactly one parameter",
+ span,
+ );
+ return None;
+ }
+ };
+
+ let body = match tokens.next() {
+ Some(TokenTree::Group(group)) => group.stream(),
+ Some(first) => {
+ let mut body = TokenStream::from(first);
+
+ body.extend(tokens);
+ body
+ }
+ None => {
+ self.err("Callback missing a body", span);
+ return None;
+ }
+ };
+
+ let inline = InlineCallback { arg, body, span };
+
+ Some(inline.into())
+ }
+
+ /// Checks if `ty` is a declared generic param, if so replaces it
+ /// with a concrete type defined using #[logos(type T = Type)]
+ ///
+ /// If no matching generic param is found, all lifetimes are fixed
+ /// to the source lifetime
+ pub fn get_type(&self, ty: &mut Type) -> TokenStream {
+ traverse_type(ty, &mut |ty| {
+ if let Type::Path(tp) = ty {
+ // Skip types that begin with `self::`
+ if tp.qself.is_none() {
+ // If `ty` is a generic type parameter, try to find
+ // its concrete type defined with #[logos(type T = Type)]
+ if let Some(substitute) = self.types.find(&tp.path) {
+ *ty = substitute;
+ }
+ }
+ }
+ // If `ty` is a concrete type, fix its lifetimes to 'source
+ replace_lifetime(ty);
+ });
+
+ quote!(#ty)
+ }
+
+ pub fn err<M>(&mut self, message: M, span: Span) -> &mut Errors
+ where
+ M: Into<Cow<'static, str>>,
+ {
+ self.errors.err(message, span)
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/nested.rs b/vendor/logos-codegen/src/parser/nested.rs
new file mode 100644
index 00000000..44ecaeac
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/nested.rs
@@ -0,0 +1,146 @@
+use proc_macro2::token_stream::IntoIter as TokenIter;
+use proc_macro2::{Ident, Literal, TokenStream, TokenTree};
+use quote::quote;
+
+use crate::util::{expect_punct, is_punct};
+
+pub enum NestedValue {
+ /// `name = ...`
+ Assign(TokenStream),
+ /// `name "literal"`
+ Literal(Literal),
+ /// `name(...)`
+ Group(TokenStream),
+ /// `name ident = ...`
+ KeywordAssign(Ident, TokenStream),
+}
+
+pub enum Nested {
+ /// Unnamed nested attribute, such as a string,
+ /// callback closure, or a lone ident/path
+ ///
+ /// Note: a lone ident will be Named with no value instead
+ Unnamed(TokenStream),
+ /// Named: name ...
+ Named(Ident, NestedValue),
+ /// Unexpected token,
+ Unexpected(TokenStream),
+}
+
+pub struct AttributeParser {
+ inner: TokenIter,
+}
+
+pub struct Empty;
+
+impl From<Empty> for TokenStream {
+ fn from(_: Empty) -> TokenStream {
+ TokenStream::new()
+ }
+}
+
+impl AttributeParser {
+ pub fn new(stream: TokenStream) -> Self {
+ AttributeParser {
+ inner: stream.into_iter(),
+ }
+ }
+
+ pub fn parsed<T>(&mut self) -> Option<syn::Result<T>>
+ where
+ T: syn::parse::Parse,
+ {
+ let tokens = self.collect_tail(TokenStream::new());
+
+ if tokens.is_empty() {
+ return None;
+ }
+
+ Some(syn::parse2(tokens))
+ }
+
+ fn next_tt(&mut self) -> Option<TokenTree> {
+ expect_punct(self.inner.next(), ',')
+ }
+
+ fn collect_tail<T>(&mut self, first: T) -> TokenStream
+ where
+ T: Into<TokenStream>,
+ {
+ let mut out = first.into();
+
+ while let Some(tt) = self.next_tt() {
+ out.extend(Some(tt));
+ }
+
+ out
+ }
+
+ fn parse_unnamed(&mut self, first: Ident, next: TokenTree) -> Nested {
+ let mut out = TokenStream::from(TokenTree::Ident(first));
+
+ out.extend(self.collect_tail(next));
+
+ Nested::Unnamed(out.into_iter().collect())
+ }
+
+ fn parse_assign(&mut self, name: Ident) -> Nested {
+ let value = self.collect_tail(Empty);
+
+ Nested::Named(name, NestedValue::Assign(value))
+ }
+
+ fn parse_literal(&mut self, name: Ident, lit: Literal) -> Nested {
+ // TODO: Error if there are any tokens following
+ let _ = self.collect_tail(Empty);
+
+ Nested::Named(name, NestedValue::Literal(lit))
+ }
+
+ fn parse_group(&mut self, name: Ident, group: TokenStream) -> Nested {
+ Nested::Named(name, NestedValue::Group(group))
+ }
+
+ fn parse_keyword(&mut self, keyword: Ident, name: Ident) -> Nested {
+ let error = expect_punct(self.next_tt(), '=');
+
+ match error {
+ Some(error) => {
+ let error = self.collect_tail(error);
+
+ Nested::Unexpected(error)
+ }
+ None => {
+ let value = self.collect_tail(Empty);
+
+ Nested::Named(keyword, NestedValue::KeywordAssign(name, value))
+ }
+ }
+ }
+}
+
+impl Iterator for AttributeParser {
+ type Item = Nested;
+
+ fn next(&mut self) -> Option<Nested> {
+ let first = self.inner.next()?;
+
+ let name = match first {
+ TokenTree::Ident(ident) => ident,
+ tt => {
+ let stream = self.collect_tail(tt);
+
+ return Some(Nested::Unnamed(stream.into_iter().collect()));
+ }
+ };
+
+ match self.next_tt() {
+ Some(tt) if is_punct(&tt, '=') => Some(self.parse_assign(name)),
+ Some(TokenTree::Literal(lit)) => Some(self.parse_literal(name, lit)),
+ Some(TokenTree::Group(group)) => Some(self.parse_group(name, group.stream())),
+ Some(TokenTree::Ident(next)) => Some(self.parse_keyword(name, next)),
+ Some(next) => Some(self.parse_unnamed(name, next)),
+ None => Some(Nested::Unnamed(quote!(#name))),
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/subpattern.rs b/vendor/logos-codegen/src/parser/subpattern.rs
new file mode 100644
index 00000000..eb620028
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/subpattern.rs
@@ -0,0 +1,97 @@
+use proc_macro2::TokenStream;
+use syn::Ident;
+
+use crate::error::Errors;
+use crate::mir::Mir;
+use crate::parser::definition::{bytes_to_regex_string, Literal};
+
+#[derive(Default)]
+pub struct Subpatterns {
+ map: Vec<(Ident, String)>,
+}
+
+impl Subpatterns {
+ pub fn add(&mut self, param: Ident, pattern: TokenStream, errors: &mut Errors) {
+ let lit = match syn::parse2::<Literal>(pattern) {
+ Ok(lit) => lit,
+ Err(e) => {
+ errors.err(e.to_string(), e.span());
+ return;
+ }
+ };
+
+ if let Some((name, _)) = self.map.iter().find(|(name, _)| *name == param) {
+ errors
+ .err(format!("{} can only be assigned once", param), param.span())
+ .err("Previously assigned here", name.span());
+ return;
+ }
+
+ let fixed = self.fix(&lit, errors);
+
+ // Validate the literal as proper regex. If it's not, emit an error.
+ let mir = match &lit {
+ Literal::Utf8(_) => Mir::utf8(&fixed),
+ Literal::Bytes(_) => Mir::binary(&fixed),
+ };
+
+ if let Err(err) = mir {
+ errors.err(err, lit.span());
+ };
+
+ self.map.push((param, fixed));
+ }
+
+ pub fn fix(&self, lit: &Literal, errors: &mut Errors) -> String {
+ let mut i = 0;
+ let mut pattern = match lit {
+ Literal::Utf8(s) => s.value(),
+ Literal::Bytes(b) => bytes_to_regex_string(b.value()),
+ };
+
+ while let Some(f) = pattern[i..].find("(?&") {
+ i += f;
+ pattern.replace_range(i..i + 3, "(?:");
+ i += 3;
+
+ let subref_end = if let Some(f) = pattern[i..].find(')') {
+ i + f
+ } else {
+ pattern.truncate(i); // truncate so latter error doesn't suppress
+ break; // regex-syntax will report the unclosed group
+ };
+
+ let name = &pattern[i..subref_end];
+ let name = match syn::parse_str::<Ident>(name) {
+ Ok(name) => name,
+ Err(_) => {
+ errors.err(
+ format!("subpattern reference `{}` is not an identifier", name),
+ lit.span(),
+ );
+ // we emitted the error; make something up and continue
+ pattern.replace_range(i..subref_end, "_");
+ i += 2;
+ continue;
+ }
+ };
+
+ match self.map.iter().find(|(def, _)| *def == name) {
+ Some((_, subpattern)) => {
+ pattern.replace_range(i..subref_end, subpattern);
+ i += subpattern.len() + 1;
+ }
+ None => {
+ errors.err(
+ format!("subpattern reference `{}` has not been defined", name),
+ lit.span(),
+ );
+ // leaving `(?:name)` is fine
+ i = subref_end + 1;
+ }
+ }
+ }
+
+ pattern
+ }
+}
diff --git a/vendor/logos-codegen/src/parser/type_params.rs b/vendor/logos-codegen/src/parser/type_params.rs
new file mode 100644
index 00000000..1be4948e
--- /dev/null
+++ b/vendor/logos-codegen/src/parser/type_params.rs
@@ -0,0 +1,200 @@
+use proc_macro2::{Ident, Span, TokenStream};
+use quote::quote;
+use syn::spanned::Spanned;
+use syn::{Lifetime, LifetimeParam, Path, Type};
+
+use crate::error::Errors;
+
+#[derive(Default)]
+pub struct TypeParams {
+ lifetime: bool,
+ type_params: Vec<(Ident, Option<Type>)>,
+}
+
+impl TypeParams {
+ pub fn explicit_lifetime(&mut self, lt: LifetimeParam, errors: &mut Errors) {
+ if self.lifetime {
+ let span = lt.span();
+
+ errors.err("Logos types can only have one lifetime can be set", span);
+ }
+
+ self.lifetime = true;
+ }
+
+ pub fn add(&mut self, param: Ident) {
+ self.type_params.push((param, None));
+ }
+
+ pub fn set(&mut self, param: Ident, ty: TokenStream, errors: &mut Errors) {
+ let ty = match syn::parse2::<Type>(ty) {
+ Ok(mut ty) => {
+ replace_lifetimes(&mut ty);
+ ty
+ }
+ Err(err) => {
+ errors.err(err.to_string(), err.span());
+ return;
+ }
+ };
+
+ match self.type_params.iter_mut().find(|(name, _)| *name == param) {
+ Some((_, slot)) => {
+ if let Some(previous) = slot.replace(ty) {
+ errors
+ .err(
+ format!("{} can only have one type assigned to it", param),
+ param.span(),
+ )
+ .err("Previously assigned here", previous.span());
+ }
+ }
+ None => {
+ errors.err(
+ format!("{} is not a declared type parameter", param),
+ param.span(),
+ );
+ }
+ }
+ }
+
+ pub fn find(&self, path: &Path) -> Option<Type> {
+ for (ident, ty) in &self.type_params {
+ if path.is_ident(ident) {
+ return ty.clone();
+ }
+ }
+
+ None
+ }
+
+ pub fn generics(&self, errors: &mut Errors) -> Option<TokenStream> {
+ if !self.lifetime && self.type_params.is_empty() {
+ return None;
+ }
+
+ let mut generics = Vec::new();
+
+ if self.lifetime {
+ generics.push(quote!('s));
+ }
+
+ for (ty, replace) in self.type_params.iter() {
+ match replace {
+ Some(ty) => generics.push(quote!(#ty)),
+ None => {
+ errors.err(
+ format!(
+ "Generic type parameter without a concrete type\n\
+ \n\
+ Define a concrete type Logos can use: #[logos(type {} = Type)]",
+ ty,
+ ),
+ ty.span(),
+ );
+ }
+ }
+ }
+
+ if generics.is_empty() {
+ None
+ } else {
+ Some(quote!(<#(#generics),*>))
+ }
+ }
+}
+
+pub fn replace_lifetimes(ty: &mut Type) {
+ traverse_type(ty, &mut replace_lifetime)
+}
+
+pub fn replace_lifetime(ty: &mut Type) {
+ use syn::{GenericArgument, PathArguments};
+
+ match ty {
+ Type::Path(p) => {
+ p.path
+ .segments
+ .iter_mut()
+ .filter_map(|segment| match &mut segment.arguments {
+ PathArguments::AngleBracketed(ab) => Some(ab),
+ _ => None,
+ })
+ .flat_map(|ab| ab.args.iter_mut())
+ .for_each(|arg| {
+ if let GenericArgument::Lifetime(lt) = arg {
+ *lt = Lifetime::new("'s", lt.span());
+ }
+ });
+ }
+ Type::Reference(r) => {
+ let span = match r.lifetime.take() {
+ Some(lt) => lt.span(),
+ None => Span::call_site(),
+ };
+
+ r.lifetime = Some(Lifetime::new("'s", span));
+ }
+ _ => (),
+ }
+}
+
+pub fn traverse_type(ty: &mut Type, f: &mut impl FnMut(&mut Type)) {
+ f(ty);
+ match ty {
+ Type::Array(array) => traverse_type(&mut array.elem, f),
+ Type::BareFn(bare_fn) => {
+ for input in &mut bare_fn.inputs {
+ traverse_type(&mut input.ty, f);
+ }
+ if let syn::ReturnType::Type(_, ty) = &mut bare_fn.output {
+ traverse_type(ty, f);
+ }
+ }
+ Type::Group(group) => traverse_type(&mut group.elem, f),
+ Type::Paren(paren) => traverse_type(&mut paren.elem, f),
+ Type::Path(path) => traverse_path(&mut path.path, f),
+ Type::Ptr(p) => traverse_type(&mut p.elem, f),
+ Type::Reference(r) => traverse_type(&mut r.elem, f),
+ Type::Slice(slice) => traverse_type(&mut slice.elem, f),
+ Type::TraitObject(object) => object.bounds.iter_mut().for_each(|bound| {
+ if let syn::TypeParamBound::Trait(trait_bound) = bound {
+ traverse_path(&mut trait_bound.path, f);
+ }
+ }),
+ Type::Tuple(tuple) => tuple
+ .elems
+ .iter_mut()
+ .for_each(|elem| traverse_type(elem, f)),
+ _ => (),
+ }
+}
+
+fn traverse_path(path: &mut Path, f: &mut impl FnMut(&mut Type)) {
+ for segment in &mut path.segments {
+ match &mut segment.arguments {
+ syn::PathArguments::None => (),
+ syn::PathArguments::AngleBracketed(args) => {
+ for arg in &mut args.args {
+ match arg {
+ syn::GenericArgument::Type(ty) => {
+ traverse_type(ty, f);
+ }
+ syn::GenericArgument::AssocType(assoc) => {
+ traverse_type(&mut assoc.ty, f);
+ }
+ _ => (),
+ }
+ }
+ }
+ syn::PathArguments::Parenthesized(args) => {
+ for arg in &mut args.inputs {
+ traverse_type(arg, f);
+ }
+ if let syn::ReturnType::Type(_, ty) = &mut args.output {
+ traverse_type(ty, f);
+ }
+ }
+ }
+ }
+}
diff --git a/vendor/logos-codegen/src/util.rs b/vendor/logos-codegen/src/util.rs
new file mode 100644
index 00000000..156de035
--- /dev/null
+++ b/vendor/logos-codegen/src/util.rs
@@ -0,0 +1,64 @@
+use proc_macro2::{Spacing, Span, TokenStream, TokenTree};
+use quote::{quote, ToTokens};
+use syn::Ident;
+
+/// Analog to Option<TokenStream>, except when put into the quote!
+/// macro, `MaybeVoid::Void` will produce `()`
+#[derive(Clone, Default)]
+pub enum MaybeVoid {
+ Some(TokenStream),
+ #[default]
+ Void,
+}
+
+impl MaybeVoid {
+ pub fn replace(&mut self, stream: TokenStream) -> MaybeVoid {
+ std::mem::replace(self, MaybeVoid::Some(stream))
+ }
+
+ pub fn take(&mut self) -> MaybeVoid {
+ std::mem::replace(self, MaybeVoid::Void)
+ }
+}
+
+impl ToTokens for MaybeVoid {
+ fn to_tokens(&self, out: &mut TokenStream) {
+ match self {
+ MaybeVoid::Some(stream) => out.extend(stream.clone()),
+ MaybeVoid::Void => out.extend(quote!(())),
+ }
+ }
+
+ fn to_token_stream(&self) -> TokenStream {
+ match self {
+ MaybeVoid::Some(stream) => stream.clone(),
+ MaybeVoid::Void => quote!(()),
+ }
+ }
+
+ fn into_token_stream(self) -> TokenStream {
+ match self {
+ MaybeVoid::Some(stream) => stream,
+ MaybeVoid::Void => quote!(()),
+ }
+ }
+}
+
+pub fn is_punct(tt: &TokenTree, expect: char) -> bool {
+ matches!(tt, TokenTree::Punct(punct) if punct.as_char() == expect && punct.spacing() == Spacing::Alone)
+}
+
+/// If supplied `tt` is a punct matching a char, returns `None`, else returns `tt`
+pub fn expect_punct(tt: Option<TokenTree>, expect: char) -> Option<TokenTree> {
+ tt.filter(|tt| !is_punct(tt, expect))
+}
+
+pub trait ToIdent {
+ fn to_ident(&self) -> Ident;
+}
+
+impl ToIdent for str {
+ fn to_ident(&self) -> Ident {
+ Ident::new(self, Span::call_site())
+ }
+}
diff --git a/vendor/logos-codegen/tests/codegen.rs b/vendor/logos-codegen/tests/codegen.rs
new file mode 100644
index 00000000..5f53c0db
--- /dev/null
+++ b/vendor/logos-codegen/tests/codegen.rs
@@ -0,0 +1,31 @@
+use std::{error::Error, io, path::PathBuf};
+
+#[rstest::rstest]
+#[case("simple")]
+#[case("no_error_lut")]
+pub fn test_codegen(#[case] fixture: &str) -> Result<(), Box<dyn Error>> {
+ let mut fixture_dir = PathBuf::new();
+ fixture_dir.push(env!("CARGO_MANIFEST_DIR"));
+ fixture_dir.push("tests");
+ fixture_dir.push("data");
+ fixture_dir.push(fixture);
+
+ let input = fixture_dir.join("input.rs");
+ fixture_dir.push("output.rs");
+ let output_file = fixture_dir;
+
+ let input = std::fs::read_to_string(input)?;
+ let output = std::fs::read_to_string(&output_file)?;
+
+ let generated = logos_codegen::generate(input.parse()?);
+ let generated = generated.to_string();
+
+ if std::env::var("BLESS_CODEGEN").is_ok_and(|value| value == "1") {
+ std::fs::write(&output_file, &generated)?;
+ return Ok(());
+ }
+
+ assert_eq!(generated, output, "Codegen test failed: `{fixture}`, run tests again with env var `BLESS_CODEGEN=1` to bless these changes");
+
+ Ok(())
+}
diff --git a/vendor/logos-codegen/tests/data/no_error_lut/input.rs b/vendor/logos-codegen/tests/data/no_error_lut/input.rs
new file mode 100644
index 00000000..84504272
--- /dev/null
+++ b/vendor/logos-codegen/tests/data/no_error_lut/input.rs
@@ -0,0 +1,10 @@
+#[derive(Logos)]
+#[logos(source = [u8])]
+enum Token {
+ #[token("\n")]
+ Newline,
+ #[regex(".")]
+ AnyUnicode,
+ #[regex(b".", priority = 0)]
+ Any,
+}
diff --git a/vendor/logos-codegen/tests/data/no_error_lut/output.rs b/vendor/logos-codegen/tests/data/no_error_lut/output.rs
new file mode 100644
index 00000000..4c0edd35
--- /dev/null
+++ b/vendor/logos-codegen/tests/data/no_error_lut/output.rs
@@ -0,0 +1 @@
+impl < 's > :: logos :: Logos < 's > for Token { type Error = () ; type Extras = () ; type Source = [u8] ; fn lex (lex : & mut :: logos :: Lexer < 's , Self >) { use :: logos :: internal :: { LexerInternal , CallbackResult } ; type Lexer < 's > = :: logos :: Lexer < 's , Token > ; fn _end < 's > (lex : & mut Lexer < 's >) { lex . end () } fn _error < 's > (lex : & mut Lexer < 's >) { lex . bump_unchecked (1) ; lex . error () ; } macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } # [inline] fn goto1_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Newline)) ; } # [inline] fn goto11_ctx11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Any)) ; } # [inline] fn goto2_ctx11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: AnyUnicode)) ; } # [inline] fn goto16_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([128u8 ..= 159u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto17_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([144u8 ..= 191u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto2_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: AnyUnicode)) ; } # [inline] fn goto13_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 1usize] > () { Some ([128u8 ..= 191u8]) => { lex . bump_unchecked (1usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto18_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([128u8 ..= 191u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto15_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto14_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([160u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto19_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([128u8 ..= 143u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Any)) ; } # [inline] fn goto20 < 's > (lex : & mut Lexer < 's >) { enum Jump { J1 , J16 , J17 , J2 , J13 , J18 , J15 , J14 , J19 , J11 , } const LUT : [Jump ; 256] = { use Jump :: * ; [J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J1 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J14 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J16 , J15 , J15 , J17 , J18 , J18 , J18 , J19 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11] } ; let byte = match lex . read :: < u8 > () { Some (byte) => byte , None => return _end (lex) , } ; match LUT [byte as usize] { Jump :: J1 => { lex . bump_unchecked (1usize) ; goto1_x (lex) } , Jump :: J16 => { lex . bump_unchecked (1usize) ; goto16_ctx11_x (lex) } , Jump :: J17 => { lex . bump_unchecked (1usize) ; goto17_ctx11_x (lex) } , Jump :: J2 => { lex . bump_unchecked (1usize) ; goto2_x (lex) } , Jump :: J13 => { lex . bump_unchecked (1usize) ; goto13_ctx11_x (lex) } , Jump :: J18 => { lex . bump_unchecked (1usize) ; goto18_ctx11_x (lex) } , Jump :: J15 => { lex . bump_unchecked (1usize) ; goto15_ctx11_x (lex) } , Jump :: J14 => { lex . bump_unchecked (1usize) ; goto14_ctx11_x (lex) } , Jump :: J19 => { lex . bump_unchecked (1usize) ; goto19_ctx11_x (lex) } , Jump :: J11 => { lex . bump_unchecked (1usize) ; goto11_x (lex) } , } } goto20 (lex) } } \ No newline at end of file
diff --git a/vendor/logos-codegen/tests/data/simple/input.rs b/vendor/logos-codegen/tests/data/simple/input.rs
new file mode 100644
index 00000000..d2f0517d
--- /dev/null
+++ b/vendor/logos-codegen/tests/data/simple/input.rs
@@ -0,0 +1,5 @@
+#[derive(Logos, Debug, Clone, Copy, PartialEq)]
+enum Token {
+ #[regex("a-z")]
+ Letter,
+}
diff --git a/vendor/logos-codegen/tests/data/simple/output.rs b/vendor/logos-codegen/tests/data/simple/output.rs
new file mode 100644
index 00000000..e8a4dc3b
--- /dev/null
+++ b/vendor/logos-codegen/tests/data/simple/output.rs
@@ -0,0 +1 @@
+impl < 's > :: logos :: Logos < 's > for Token { type Error = () ; type Extras = () ; type Source = str ; fn lex (lex : & mut :: logos :: Lexer < 's , Self >) { use :: logos :: internal :: { LexerInternal , CallbackResult } ; type Lexer < 's > = :: logos :: Lexer < 's , Token > ; fn _end < 's > (lex : & mut Lexer < 's >) { lex . end () } fn _error < 's > (lex : & mut Lexer < 's >) { lex . bump_unchecked (1) ; lex . error () ; } macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } # [inline] fn goto1_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Letter)) ; } # [inline] fn goto3_at1_with3 < 's > (lex : & mut Lexer < 's >) { match lex . read_at :: < & [u8 ; 2usize] > (1usize) { Some (b"-z") => { lex . bump_unchecked (3usize) ; goto1_x (lex) } , _ => _error (lex) , } } # [inline] fn goto4 < 's > (lex : & mut Lexer < 's >) { let arr = match lex . read :: < & [u8 ; 3usize] > () { Some (arr) => arr , None => return _end (lex) , } ; match arr [0] { b'a' => goto3_at1_with3 (lex) , _ => _error (lex) , } } goto4 (lex) } } \ No newline at end of file