diff options
| author | mo khan <mo@mokhan.ca> | 2025-07-15 16:37:08 -0600 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2025-07-17 16:30:22 -0600 |
| commit | 45df4d0d9b577fecee798d672695fe24ff57fb1b (patch) | |
| tree | 1b99bf645035b58e0d6db08c7a83521f41f7a75b /vendor/logos-codegen | |
| parent | f94f79608393d4ab127db63cc41668445ef6b243 (diff) | |
feat: migrate from Cedar to SpiceDB authorization system
This is a major architectural change that replaces the Cedar policy-based
authorization system with SpiceDB's relation-based authorization.
Key changes:
- Migrate from Rust to Go implementation
- Replace Cedar policies with SpiceDB schema and relationships
- Switch from envoy `ext_authz` with Cedar to SpiceDB permission checks
- Update build system and dependencies for Go ecosystem
- Maintain Envoy integration for external authorization
This change enables more flexible permission modeling through SpiceDB's
Google Zanzibar inspired relation-based system, supporting complex
hierarchical permissions that were difficult to express in Cedar.
Breaking change: Existing Cedar policies and Rust-based configuration
will no longer work and need to be migrated to SpiceDB schema.
Diffstat (limited to 'vendor/logos-codegen')
36 files changed, 0 insertions, 5684 deletions
diff --git a/vendor/logos-codegen/.cargo-checksum.json b/vendor/logos-codegen/.cargo-checksum.json deleted file mode 100644 index 97e0ce68..00000000 --- a/vendor/logos-codegen/.cargo-checksum.json +++ /dev/null @@ -1 +0,0 @@ -{"files":{"Cargo.toml":"a7610e627353e99aeaf86ef36a9ba33d5971b36cccd703e6913435e3ed9ae3d3","LICENSE-APACHE":"f30735c11407534952947e1c7b7457ddf28847000bb038402495ad66f3d020a4","LICENSE-MIT":"112fcdb9f4935988cc2313e4cc38faaeccfff53eb1296499e618932d472908e0","README.md":"da4edde5433a520a89358b90f5d916c78dcff39f4131245e9eefedbb7daca5d8","build.rs":"9a5db937e9f7068b5b32b9dfb4468d3d69ffb3efc0866fda63093482253f1883","src/error.rs":"440d20528db9a76b97de1b30f2c59a570a59368b1cd8aa70f8fc2e09bcfdd3a7","src/generator/context.rs":"46f39acab8d9d70cfad4428acdb3c05bf05f48412e3ae977938bd3a3b9521d2b","src/generator/fork.rs":"6e81913f421972fed784e66773073d63a5ca57b307c5676e6cc8c079c11e475b","src/generator/leaf.rs":"51277af712aecd2d396de902b9700e2e944d63285f483fb88ad5080dc9195251","src/generator/mod.rs":"12d31fbbe83cac553ffca7d4f437d3b4b1ab2272ee82e9048957b26cf3b5c7fd","src/generator/rope.rs":"a3e532ce1ff77d39a3746f0a61b533e94b8231d22d7abac30ecb0f40b9a7d4fd","src/generator/tables.rs":"8acf6de12f74a633fd1dc952775b55cf4107015c9aa39681cceb7788c217b30d","src/graph/fork.rs":"870688f1cbede6443755f3476d9d632024baf5b3176ad40e4e0f15b149df9c68","src/graph/impls.rs":"ec5717d68cc509b938671d361e062313176b9433bd96f8eab41cf5e57e28fc2e","src/graph/meta.rs":"aceb3fb5370aaa2daea615bb1f2e7ee841f4cd450493e4b3c4137feba60c3d09","src/graph/mod.rs":"135c5994c7f5453b12c06905bdf64eadcad61856a2ebc70bbca30889df499910","src/graph/range.rs":"4f66fa53ed793582c9f4afa8832be51ad73e87a91d0469033d76d1f29efb767f","src/graph/regex.rs":"f1a44b0d791362a9f1b32f5919ff9b5fe511f20052856de367858f38993583f4","src/graph/rope.rs":"ab5995e5912fb33b65b9dad2cffa315a3483b9123bd865a2fd4f6edf24fe0c5b","src/leaf.rs":"b0efac6ac811460001a114cc3c1cfec1bdf15c307ee1daf8c8729569acdddbf3","src/lib.rs":"95c41890714c12817acdf3ef1a31b8cc3c510886e1f6ad961c45a873e1011c2f","src/macros.rs":"02c06363993f13e78ce9aa8bcea597020b0ca40c2993a2e99207d87f72526e3e","src/mir.rs":"7ad2c77253324e6506e9a2c5b0f749339b5b5e1ebb91072f769d89c527e67e53","src/parser/definition.rs":"53f7d401cbe7548cec2e6ea79d966d5f27f52d36fc313e4aca25ea78a07c62e8","src/parser/ignore_flags.rs":"53493872f553139011153629b5ce9990e9195e0599ef6219ee1f58ec05545f93","src/parser/mod.rs":"d825ef182b3a6a8e020ef8d659038626399dab5fb17ee8737f7e1d52744eaffb","src/parser/nested.rs":"bac7c8bebf99a28a2148edc44972d642e1ab0310d54ff894248299c3264e74bc","src/parser/subpattern.rs":"06b41ede8464ecdfbed441bf60eb131e02a60f8043bea206670f1cc8163f35b0","src/parser/type_params.rs":"1d0725c103286a0c1b390238c5df722aa743fe05499fa00cd38f8b29f853f2ee","src/util.rs":"13f9f868c4c5bc54aea374dc51613da218cd120c49d4277445d9be62c9ce06e6","tests/codegen.rs":"34f58b400a1f519c69511f9d2b549d7e6205dde553d9756dfd028bbf87bd34fd","tests/data/no_error_lut/input.rs":"066a28b7a5128cf381d1df5697480733f763d9596b105b3e31ce5b8435363b03","tests/data/no_error_lut/output.rs":"0fbf3b00d0f0631f9a0f5847aeed7997c62440cd03eb5d52264eb0174da1b89a","tests/data/simple/input.rs":"78681c1f6073462c78f2b98d101bc3ffbbc136470c88aba392ec0282becaa772","tests/data/simple/output.rs":"fd3d86bda51afb59272970864d43b997730a28f587ddc2a0e96cd854b3c930a9"},"package":"189bbfd0b61330abea797e5e9276408f2edbe4f822d7ad08685d67419aafb34e"}
\ No newline at end of file diff --git a/vendor/logos-codegen/Cargo.toml b/vendor/logos-codegen/Cargo.toml deleted file mode 100644 index 9f8cf5bb..00000000 --- a/vendor/logos-codegen/Cargo.toml +++ /dev/null @@ -1,90 +0,0 @@ -# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO -# -# When uploading crates to the registry Cargo will automatically -# "normalize" Cargo.toml files for maximal compatibility -# with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies. -# -# If you are reading this file be aware that the original Cargo.toml -# will likely look very different (and much more reasonable). -# See Cargo.toml.orig for the original contents. - -[package] -edition = "2021" -rust-version = "1.74.0" -name = "logos-codegen" -version = "0.15.0" -authors = [ - "Maciej Hirsz <hello@maciej.codes>", - "Jérome Eertmans (maintainer) <jeertmans@icloud.com>", -] -build = "build.rs" -autolib = false -autobins = false -autoexamples = false -autotests = false -autobenches = false -description = "Create ridiculously fast Lexers" -homepage = "https://logos.maciej.codes/" -readme = "README.md" -keywords = [ - "lexer", - "lexical", - "tokenizer", - "parser", - "no_std", -] -categories = [ - "parsing", - "text-processing", -] -license = "MIT OR Apache-2.0" -repository = "https://github.com/maciejhirsz/logos" - -[package.metadata.release] -shared-version = true - -[lib] -name = "logos_codegen" -path = "src/lib.rs" -bench = false - -[[test]] -name = "codegen" -path = "tests/codegen.rs" - -[dependencies.beef] -version = "0.5.0" - -[dependencies.fnv] -version = "1.0.6" - -[dependencies.lazy_static] -version = "1.4.0" - -[dependencies.proc-macro2] -version = "1.0.9" - -[dependencies.quote] -version = "1.0.3" - -[dependencies.regex-syntax] -version = "0.8.2" - -[dependencies.syn] -version = "2.0.13" -features = ["full"] - -[dev-dependencies.pretty_assertions] -version = "1.4.0" - -[dev-dependencies.rstest] -version = "0.23.0" - -[build-dependencies.rustc_version] -version = "0.4.1" - -[features] -debug = [] -forbid_unsafe = [] -fuzzing = [] diff --git a/vendor/logos-codegen/LICENSE-APACHE b/vendor/logos-codegen/LICENSE-APACHE deleted file mode 100644 index c6592a3f..00000000 --- a/vendor/logos-codegen/LICENSE-APACHE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright 2018 Maciej Hirsz <maciej.hirsz@gmail.com> - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/vendor/logos-codegen/LICENSE-MIT b/vendor/logos-codegen/LICENSE-MIT deleted file mode 100644 index e22f0271..00000000 --- a/vendor/logos-codegen/LICENSE-MIT +++ /dev/null @@ -1,21 +0,0 @@ -Copyright (c) 2018 Maciej Hirsz <maciej.hirsz@gmail.com> - -The MIT License (MIT) - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/vendor/logos-codegen/README.md b/vendor/logos-codegen/README.md deleted file mode 100644 index ba0937ff..00000000 --- a/vendor/logos-codegen/README.md +++ /dev/null @@ -1,115 +0,0 @@ -<img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right"> - -# Logos - - -[](https://crates.io/crates/logos) -[](https://docs.rs/logos) -[](https://crates.io/crates/logos) - -_Create ridiculously fast Lexers._ - -**Logos** has two goals: - -+ To make it easy to create a Lexer, so you can focus on more complex problems. -+ To make the generated Lexer faster than anything you'd write by hand. - -To achieve those, **Logos**: - -+ Combines all token definitions into a single [deterministic state machine](https://en.wikipedia.org/wiki/Deterministic_finite_automaton). -+ Optimizes branches into [lookup tables](https://en.wikipedia.org/wiki/Lookup_table) or [jump tables](https://en.wikipedia.org/wiki/Branch_table). -+ Prevents [backtracking](https://en.wikipedia.org/wiki/ReDoS) inside token definitions. -+ [Unwinds loops](https://en.wikipedia.org/wiki/Loop_unrolling), and batches reads to minimize bounds checking. -+ Does all of that heavy lifting at compile time. - -## Example - -```rust -use logos::Logos; - -#[derive(Logos, Debug, PartialEq)] -#[logos(skip r"[ \t\n\f]+")] // Ignore this regex pattern between tokens -enum Token { - // Tokens can be literal strings, of any length. - #[token("fast")] - Fast, - - #[token(".")] - Period, - - // Or regular expressions. - #[regex("[a-zA-Z]+")] - Text, -} - -fn main() { - let mut lex = Token::lexer("Create ridiculously fast Lexers."); - - assert_eq!(lex.next(), Some(Ok(Token::Text))); - assert_eq!(lex.span(), 0..6); - assert_eq!(lex.slice(), "Create"); - - assert_eq!(lex.next(), Some(Ok(Token::Text))); - assert_eq!(lex.span(), 7..19); - assert_eq!(lex.slice(), "ridiculously"); - - assert_eq!(lex.next(), Some(Ok(Token::Fast))); - assert_eq!(lex.span(), 20..24); - assert_eq!(lex.slice(), "fast"); - - assert_eq!(lex.next(), Some(Ok(Token::Text))); - assert_eq!(lex.slice(), "Lexers"); - assert_eq!(lex.span(), 25..31); - - assert_eq!(lex.next(), Some(Ok(Token::Period))); - assert_eq!(lex.span(), 31..32); - assert_eq!(lex.slice(), "."); - - assert_eq!(lex.next(), None); -} -``` - -For more examples and documentation, please refer to the -[Logos handbook](https://maciejhirsz.github.io/logos/) or the -[crate documentation](https://docs.rs/logos/latest/logos/). - -## How fast? - -Ridiculously fast! - -```norust -test identifiers ... bench: 647 ns/iter (+/- 27) = 1204 MB/s -test keywords_operators_and_punctators ... bench: 2,054 ns/iter (+/- 78) = 1037 MB/s -test strings ... bench: 553 ns/iter (+/- 34) = 1575 MB/s -``` - -## Acknowledgements - -+ [Pedrors](https://pedrors.pt/) for the **Logos** logo. - -## Thank you - -**Logos** is very much a labor of love. If you find it useful, consider -[getting me some coffee](https://github.com/sponsors/maciejhirsz). ☕ - -If you'd like to contribute to Logos, then consider reading the -[Contributing guide](https://maciejhirsz.github.io/logos/contributing). - -## Contributing - -**Logos** welcome any kind of contribution: bug reports, suggestions, -or new features! - -Please use the -[issues](https://github.com/maciejhirsz/logos/issues) or -[pull requests](https://github.com/maciejhirsz/logos/pulls) tabs, -when appropriate. - -To release a new version, follow the [RELEASE-PROCESS](RELEASE-PROCESS.md) - -## License - -This code is distributed under the terms of both the MIT license -and the Apache License (Version 2.0), choose whatever works for you. - -See [LICENSE-APACHE](LICENSE-APACHE) and [LICENSE-MIT](LICENSE-MIT) for details. diff --git a/vendor/logos-codegen/build.rs b/vendor/logos-codegen/build.rs deleted file mode 100644 index f11a575f..00000000 --- a/vendor/logos-codegen/build.rs +++ /dev/null @@ -1,21 +0,0 @@ -use rustc_version::{version_meta, Version}; - -fn main() { - let version_meta = version_meta().expect("Could not get Rust version"); - - let rustc_version = version_meta.semver; - let trimmed_rustc_version = Version::new( - rustc_version.major, - rustc_version.minor, - rustc_version.patch, - ); - - // Add cfg flag for Rust >= 1.82 - // Required for precise capturing in edition 2024 - // Due to changes in lifetime and type capture behavior for impl trait - // see: https://github.com/maciejhirsz/logos/issues/434, https://github.com/rust-lang/rfcs/pull/3498 - println!("cargo:rustc-check-cfg=cfg(rust_1_82)"); - if trimmed_rustc_version >= Version::new(1, 82, 0) { - println!("cargo:rustc-cfg=rust_1_82"); - } -} diff --git a/vendor/logos-codegen/src/error.rs b/vendor/logos-codegen/src/error.rs deleted file mode 100644 index 64f93c01..00000000 --- a/vendor/logos-codegen/src/error.rs +++ /dev/null @@ -1,110 +0,0 @@ -use std::fmt; - -use beef::lean::Cow; -use proc_macro2::{Span, TokenStream}; -use quote::quote; -use quote::{quote_spanned, ToTokens, TokenStreamExt}; - -pub type Result<T> = std::result::Result<T, Error>; - -#[derive(Default)] -pub struct Errors { - collected: Vec<SpannedError>, -} - -impl Errors { - pub fn err<M>(&mut self, message: M, span: Span) -> &mut Self - where - M: Into<Cow<'static, str>>, - { - self.collected.push(SpannedError { - message: message.into(), - span, - }); - - self - } - - pub fn render(self) -> Option<TokenStream> { - let errors = self.collected; - - match errors.len() { - 0 => None, - _ => Some(quote! { - fn _logos_derive_compile_errors() { - #(#errors)* - } - }), - } - } -} - -pub struct Error(Cow<'static, str>); - -#[derive(Debug)] -pub struct SpannedError { - message: Cow<'static, str>, - span: Span, -} - -impl Error { - pub fn new<M>(message: M) -> Self - where - M: Into<Cow<'static, str>>, - { - Error(message.into()) - } - - pub fn span(self, span: Span) -> SpannedError { - SpannedError { - message: self.0, - span, - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.0.fmt(f) - } -} - -impl fmt::Debug for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} - -impl From<regex_syntax::Error> for Error { - fn from(err: regex_syntax::Error) -> Error { - Error(err.to_string().into()) - } -} - -impl From<&'static str> for Error { - fn from(err: &'static str) -> Error { - Error(err.into()) - } -} - -impl From<String> for Error { - fn from(err: String) -> Error { - Error(err.into()) - } -} - -impl From<Error> for Cow<'static, str> { - fn from(err: Error) -> Self { - err.0 - } -} - -impl ToTokens for SpannedError { - fn to_tokens(&self, tokens: &mut TokenStream) { - let message = &*self.message; - - tokens.append_all(quote_spanned!(self.span => { - compile_error!(#message) - })) - } -} diff --git a/vendor/logos-codegen/src/generator/context.rs b/vendor/logos-codegen/src/generator/context.rs deleted file mode 100644 index dc52f594..00000000 --- a/vendor/logos-codegen/src/generator/context.rs +++ /dev/null @@ -1,128 +0,0 @@ -use proc_macro2::TokenStream; -use quote::quote; - -use crate::generator::Generator; -use crate::graph::NodeId; - -/// This struct keeps track of bytes available to be read without -/// bounds checking across the tree. -/// -/// For example, a branch that matches 4 bytes followed by a fork -/// with smallest branch containing of 2 bytes can do a bounds check -/// for 6 bytes ahead, and leave the remaining 2 byte array (fixed size) -/// to be handled by the fork, avoiding bound checks there. -#[derive(Default, Clone, Copy, PartialEq, Eq, Hash, Debug)] -pub struct Context { - /// Amount of bytes that haven't been bumped yet but should - /// before a new read is performed - at: usize, - /// Number of bytes available without bound checks - available: usize, - /// Whether or not the Lexer has been bumped at least by 1 byte - bumped: bool, - /// Node to backtrack to to in case an explicit match has failed. - /// If `None` will instead produce an error token. - backtrack: Option<NodeId>, -} - -impl Context { - pub fn can_backtrack(&self) -> bool { - self.backtrack.is_some() - } - - pub fn switch(&mut self, miss: Option<NodeId>) -> Option<TokenStream> { - self.backtrack = Some(miss?); - self.bump() - } - - pub const fn advance(self, n: usize) -> Self { - Context { - at: self.at + n, - ..self - } - } - - pub fn bump(&mut self) -> Option<TokenStream> { - match self.at { - 0 => None, - n => { - let tokens = quote!(lex.bump_unchecked(#n);); - self.at = 0; - self.available = 0; - self.bumped = true; - Some(tokens) - } - } - } - - pub fn remainder(&self) -> usize { - self.available.saturating_sub(self.at) - } - - pub fn read_byte(&mut self) -> TokenStream { - let at = self.at; - - self.advance(1); - - #[cfg(not(feature = "forbid_unsafe"))] - { - quote!(unsafe { lex.read_byte_unchecked(#at) }) - } - - #[cfg(feature = "forbid_unsafe")] - { - quote!(lex.read_byte(#at)) - } - } - - pub fn read(&mut self, len: usize) -> TokenStream { - self.available = len; - - match (self.at, len) { - (0, 0) => quote!(lex.read::<u8>()), - (a, 0) => quote!(lex.read_at::<u8>(#a)), - (0, l) => quote!(lex.read::<&[u8; #l]>()), - (a, l) => quote!(lex.read_at::<&[u8; #l]>(#a)), - } - } - - pub fn wipe(&mut self) { - self.available = 0; - } - - const fn backtrack(self) -> Self { - Context { - at: 0, - available: 0, - bumped: self.bumped, - backtrack: None, - } - } - - pub fn miss(mut self, miss: Option<NodeId>, gen: &mut Generator) -> TokenStream { - self.wipe(); - match (miss, self.backtrack) { - (Some(id), _) => gen.goto(id, self).clone(), - (_, Some(id)) => gen.goto(id, self.backtrack()).clone(), - _ if self.bumped => quote!(lex.error()), - _ => quote!(_error(lex)), - } - } - - pub fn write_suffix(&self, buf: &mut String) { - use std::fmt::Write; - - if self.at > 0 { - let _ = write!(buf, "_at{}", self.at); - } - if self.available > 0 { - let _ = write!(buf, "_with{}", self.available); - } - if let Some(id) = self.backtrack { - let _ = write!(buf, "_ctx{}", id); - } - if self.bumped { - buf.push_str("_x"); - } - } -} diff --git a/vendor/logos-codegen/src/generator/fork.rs b/vendor/logos-codegen/src/generator/fork.rs deleted file mode 100644 index 11d4eab2..00000000 --- a/vendor/logos-codegen/src/generator/fork.rs +++ /dev/null @@ -1,216 +0,0 @@ -use std::cmp::max; - -use fnv::FnvHashMap as Map; -use proc_macro2::TokenStream; -use quote::quote; - -use crate::generator::{Context, Generator}; -use crate::graph::{Fork, NodeId, Range}; -use crate::util::ToIdent; - -type Targets = Map<NodeId, Vec<Range>>; - -impl Generator<'_> { - pub fn generate_fork(&mut self, this: NodeId, fork: &Fork, mut ctx: Context) -> TokenStream { - let mut targets: Targets = Map::default(); - - for (range, then) in fork.branches() { - targets.entry(then).or_default().push(range); - } - let loops_to_self = self.meta[this].loop_entry_from.contains(&this); - - match targets.len() { - 1 if loops_to_self => return self.generate_fast_loop(fork, ctx), - 0..=2 => (), - _ => return self.generate_fork_jump_table(this, fork, targets, ctx), - } - let miss = ctx.miss(fork.miss, self); - let end = self.fork_end(this, &miss); - let (byte, read) = self.fork_read(this, end, &mut ctx); - let branches = targets.into_iter().map(|(id, ranges)| { - let next = self.goto(id, ctx.advance(1)); - - match *ranges { - [range] => { - quote!(#range => #next,) - } - [a, b] if a.is_byte() && b.is_byte() => { - quote!(#a | #b => #next,) - } - _ => { - let test = self.generate_test(ranges).clone(); - let next = self.goto(id, ctx.advance(1)); - - quote!(byte if #test(byte) => #next,) - } - } - }); - - quote! { - #read - - match #byte { - #(#branches)* - _ => #miss, - } - } - } - - fn generate_fork_jump_table( - &mut self, - this: NodeId, - fork: &Fork, - targets: Targets, - mut ctx: Context, - ) -> TokenStream { - let miss = ctx.miss(fork.miss, self); - let end = self.fork_end(this, &miss); - let (byte, read) = self.fork_read(this, end, &mut ctx); - - let mut table: [u8; 256] = [0; 256]; - let mut jumps = vec!["__".to_ident()]; - - let branches = targets - .into_iter() - .enumerate() - .map(|(idx, (id, ranges))| { - let idx = (idx as u8) + 1; - let next = self.goto(id, ctx.advance(1)); - jumps.push(format!("J{}", id).to_ident()); - - for byte in ranges.into_iter().flatten() { - table[byte as usize] = idx; - } - let jump = jumps.last().unwrap(); - - quote!(Jump::#jump => #next,) - }) - .collect::<TokenStream>(); - - let may_error = table.iter().any(|&idx| idx == 0); - - let jumps = jumps.as_slice(); - let table = table.iter().copied().map(|idx| &jumps[idx as usize]); - - let jumps = if may_error { jumps } else { &jumps[1..] }; - let error_branch = if may_error { - Some(quote!(Jump::__ => #miss)) - } else { - None - }; - - quote! { - enum Jump { - #(#jumps,)* - } - - const LUT: [Jump; 256] = { - use Jump::*; - - [#(#table),*] - }; - - #read - - match LUT[#byte as usize] { - #branches - #error_branch - } - } - } - - fn fork_end(&self, this: NodeId, miss: &TokenStream) -> TokenStream { - if this == self.root { - quote!(_end(lex)) - } else { - miss.clone() - } - } - - fn fork_read( - &self, - this: NodeId, - end: TokenStream, - ctx: &mut Context, - ) -> (TokenStream, TokenStream) { - let min_read = self.meta[this].min_read; - - if ctx.remainder() >= max(min_read, 1) { - let read = ctx.read_byte(); - - return (quote!(byte), quote!(let byte = #read;)); - } - - match min_read { - 0 | 1 => { - let read = ctx.read(0); - - ( - quote!(byte), - quote! { - let byte = match #read { - Some(byte) => byte, - None => return #end, - }; - }, - ) - } - len => { - let read = ctx.read(len); - - ( - quote!(arr[0]), - quote! { - let arr = match #read { - Some(arr) => arr, - None => return #end, - }; - }, - ) - } - } - } - - fn generate_fast_loop(&mut self, fork: &Fork, ctx: Context) -> TokenStream { - let miss = ctx.miss(fork.miss, self); - let ranges = fork.branches().map(|(range, _)| range).collect::<Vec<_>>(); - let test = self.generate_test(ranges); - - quote! { - _fast_loop!(lex, #test, #miss); - } - } - - pub fn fast_loop_macro() -> TokenStream { - quote! { - macro_rules! _fast_loop { - ($lex:ident, $test:ident, $miss:expr) => { - // Do one bounds check for multiple bytes till EOF - while let Some(arr) = $lex.read::<&[u8; 16]>() { - if $test(arr[0]) { if $test(arr[1]) { if $test(arr[2]) { if $test(arr[3]) { - if $test(arr[4]) { if $test(arr[5]) { if $test(arr[6]) { if $test(arr[7]) { - if $test(arr[8]) { if $test(arr[9]) { if $test(arr[10]) { if $test(arr[11]) { - if $test(arr[12]) { if $test(arr[13]) { if $test(arr[14]) { if $test(arr[15]) { - - $lex.bump_unchecked(16); continue; } $lex.bump_unchecked(15); return $miss; } - $lex.bump_unchecked(14); return $miss; } $lex.bump_unchecked(13); return $miss; } - $lex.bump_unchecked(12); return $miss; } $lex.bump_unchecked(11); return $miss; } - $lex.bump_unchecked(10); return $miss; } $lex.bump_unchecked(9); return $miss; } - $lex.bump_unchecked(8); return $miss; } $lex.bump_unchecked(7); return $miss; } - $lex.bump_unchecked(6); return $miss; } $lex.bump_unchecked(5); return $miss; } - $lex.bump_unchecked(4); return $miss; } $lex.bump_unchecked(3); return $miss; } - $lex.bump_unchecked(2); return $miss; } $lex.bump_unchecked(1); return $miss; } - - return $miss; - } - - while $lex.test($test) { - $lex.bump_unchecked(1); - } - - $miss - }; - } - } - } -} diff --git a/vendor/logos-codegen/src/generator/leaf.rs b/vendor/logos-codegen/src/generator/leaf.rs deleted file mode 100644 index 0841a4ff..00000000 --- a/vendor/logos-codegen/src/generator/leaf.rs +++ /dev/null @@ -1,67 +0,0 @@ -use proc_macro2::TokenStream; -use quote::quote; - -use crate::generator::{Context, Generator}; -use crate::leaf::{Callback, Leaf}; -use crate::util::MaybeVoid; - -impl Generator<'_> { - pub fn generate_leaf(&mut self, leaf: &Leaf, mut ctx: Context) -> TokenStream { - let bump = ctx.bump(); - - let ident = &leaf.ident; - let name = self.name; - let this = self.this; - let ty = &leaf.field; - - let constructor = match leaf.field { - MaybeVoid::Some(_) => quote!(#name::#ident), - MaybeVoid::Void => quote!(|()| #name::#ident), - }; - - match &leaf.callback { - Some(Callback::Label(callback)) => quote! { - #bump - #callback(lex).construct(#constructor, lex); - }, - Some(Callback::Inline(inline)) => { - let arg = &inline.arg; - let body = &inline.body; - - #[cfg(not(rust_1_82))] - let ret = quote!(impl CallbackResult<'s, #ty, #this>); - - #[cfg(rust_1_82)] - let ret = quote!(impl CallbackResult<'s, #ty, #this> + use<'s>); - - quote! { - #bump - - #[inline] - fn callback<'s>(#arg: &mut Lexer<'s>) -> #ret { - #body - } - - callback(lex).construct(#constructor, lex); - } - } - Some(Callback::Skip(_)) => { - quote! { - #bump - - lex.trivia(); - #name::lex(lex); - } - } - None if matches!(leaf.field, MaybeVoid::Void) => quote! { - #bump - lex.set(Ok(#name::#ident)); - }, - None => quote! { - #bump - let token = #name::#ident(lex.slice()); - lex.set(Ok(token)); - }, - } - } -} diff --git a/vendor/logos-codegen/src/generator/mod.rs b/vendor/logos-codegen/src/generator/mod.rs deleted file mode 100644 index 1b8bf8bf..00000000 --- a/vendor/logos-codegen/src/generator/mod.rs +++ /dev/null @@ -1,268 +0,0 @@ -use fnv::{FnvHashMap as Map, FnvHashSet as Set}; -use proc_macro2::TokenStream; -use quote::{quote, ToTokens, TokenStreamExt}; -use syn::Ident; - -use crate::graph::{Graph, Meta, Node, NodeId, Range}; -use crate::leaf::Leaf; -use crate::util::ToIdent; - -mod context; -mod fork; -mod leaf; -mod rope; -mod tables; - -use self::context::Context; -use self::tables::TableStack; - -pub struct Generator<'a> { - /// Name of the type we are implementing the `Logos` trait for - name: &'a Ident, - /// Name of the type with any generics it might need - this: &'a TokenStream, - /// Id to the root node - root: NodeId, - /// Reference to the graph with all of the nodes - graph: &'a Graph<Leaf<'a>>, - /// Meta data collected for the nodes - meta: Meta, - /// Buffer with functions growing during generation - rendered: TokenStream, - /// Set of functions that have already been rendered - fns: Set<(NodeId, Context)>, - /// Function name identifiers - idents: Map<(NodeId, Context), Ident>, - /// Local function calls. Note: a call might change its context, - /// so we can't use `idents` for this purpose. - gotos: Map<(NodeId, Context), TokenStream>, - /// Identifiers for helper functions matching a byte to a given - /// set of ranges - tests: Map<Vec<Range>, Ident>, - /// Related to above, table stack manages tables that need to be - tables: TableStack, -} - -impl<'a> Generator<'a> { - pub fn new( - name: &'a Ident, - this: &'a TokenStream, - root: NodeId, - graph: &'a Graph<Leaf>, - ) -> Self { - let rendered = Self::fast_loop_macro(); - let meta = Meta::analyze(root, graph); - - Generator { - name, - this, - root, - graph, - meta, - rendered, - fns: Set::default(), - idents: Map::default(), - gotos: Map::default(), - tests: Map::default(), - tables: TableStack::new(), - } - } - - pub fn generate(mut self) -> TokenStream { - let root = self.goto(self.root, Context::default()).clone(); - let rendered = &self.rendered; - let tables = &self.tables; - - quote! { - #tables - #rendered - #root - } - } - - fn generate_fn(&mut self, id: NodeId, ctx: Context) { - if self.fns.contains(&(id, ctx)) { - return; - } - self.fns.insert((id, ctx)); - - let body = match &self.graph[id] { - Node::Fork(fork) => self.generate_fork(id, fork, ctx), - Node::Rope(rope) => self.generate_rope(rope, ctx), - Node::Leaf(leaf) => self.generate_leaf(leaf, ctx), - }; - let ident = self.generate_ident(id, ctx); - let out = quote! { - #[inline] - fn #ident<'s>(lex: &mut Lexer<'s>) { - #body - } - }; - - self.rendered.append_all(out); - } - - fn goto(&mut self, id: NodeId, mut ctx: Context) -> &TokenStream { - let key = (id, ctx); - - // Allow contains_key + insert because self.generate_ident borrows a mutable ref to self - // too. - #[allow(clippy::map_entry)] - if !self.gotos.contains_key(&key) { - let meta = &self.meta[id]; - let enters_loop = !meta.loop_entry_from.is_empty(); - - let bump = if enters_loop || !ctx.can_backtrack() { - ctx.switch(self.graph[id].miss()) - } else { - None - }; - - let bump = match (bump, enters_loop, meta.min_read) { - (Some(t), _, _) => Some(t), - (None, true, _) => ctx.bump(), - (None, false, 0) => ctx.bump(), - (None, false, _) => None, - }; - - if meta.min_read == 0 || ctx.remainder() < meta.min_read { - ctx.wipe(); - } - - let ident = self.generate_ident(id, ctx); - let mut call_site = quote!(#ident(lex)); - - if let Some(bump) = bump { - call_site = quote!({ - #bump - #call_site - }); - } - self.gotos.insert(key, call_site); - self.generate_fn(id, ctx); - } - &self.gotos[&key] - } - - fn generate_ident(&mut self, id: NodeId, ctx: Context) -> &Ident { - self.idents.entry((id, ctx)).or_insert_with(|| { - let mut ident = format!("goto{}", id); - - ctx.write_suffix(&mut ident); - - ident.to_ident() - }) - } - - /// Returns an identifier to a function that matches a byte to any - /// of the provided ranges. This will generate either a simple - /// match expression, or use a lookup table internally. - fn generate_test(&mut self, ranges: Vec<Range>) -> &Ident { - if !self.tests.contains_key(&ranges) { - let idx = self.tests.len(); - let ident = format!("pattern{}", idx).to_ident(); - - let lo = ranges.first().unwrap().start; - let hi = ranges.last().unwrap().end; - - let body = match ranges.len() { - 0..=2 => { - quote! { - match byte { - #(#ranges)|* => true, - _ => false, - } - } - } - _ if hi - lo < 64 => { - let mut offset = hi.saturating_sub(63); - - while offset.count_ones() > 1 && lo - offset > 0 { - offset += 1; - } - - let mut table = 0u64; - - for byte in ranges.iter().flat_map(|range| *range) { - if byte - offset >= 64 { - panic!("{:#?} {} {} {}", ranges, hi, lo, offset); - } - table |= 1 << (byte - offset); - } - - let search = match offset { - 0 => quote!(byte), - _ => quote!(byte.wrapping_sub(#offset)), - }; - - quote! { - const LUT: u64 = #table; - - match 1u64.checked_shl(#search as u32) { - Some(shift) => LUT & shift != 0, - None => false, - } - } - } - _ => { - let mut view = self.tables.view(); - - for byte in ranges.iter().flat_map(|range| *range) { - view.flag(byte); - } - - let mask = view.mask(); - let lut = view.ident(); - - quote! { - #lut[byte as usize] & #mask > 0 - } - } - }; - self.rendered.append_all(quote! { - #[inline] - fn #ident(byte: u8) -> bool { - #body - } - }); - self.tests.insert(ranges.clone(), ident); - } - &self.tests[&ranges] - } -} - -macro_rules! match_quote { - ($source:expr; $($byte:tt,)* ) => {match $source { - $( $byte => quote!($byte), )* - byte => quote!(#byte), - }} -} - -fn byte_to_tokens(byte: u8) -> TokenStream { - match_quote! { - byte; - b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', - b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', - b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', - b'u', b'v', b'w', b'x', b'y', b'z', - b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', - b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', - b'U', b'V', b'W', b'X', b'Y', b'Z', - b'!', b'@', b'#', b'$', b'%', b'^', b'&', b'*', b'(', b')', - b'{', b'}', b'[', b']', b'<', b'>', b'-', b'=', b'_', b'+', - b':', b';', b',', b'.', b'/', b'?', b'|', b'"', b'\'', b'\\', - } -} - -impl ToTokens for Range { - fn to_tokens(&self, tokens: &mut TokenStream) { - let Range { start, end } = self; - - tokens.append_all(byte_to_tokens(*start)); - - if start != end { - tokens.append_all(quote!(..=)); - tokens.append_all(byte_to_tokens(*end)); - } - } -} diff --git a/vendor/logos-codegen/src/generator/rope.rs b/vendor/logos-codegen/src/generator/rope.rs deleted file mode 100644 index ae3b07ad..00000000 --- a/vendor/logos-codegen/src/generator/rope.rs +++ /dev/null @@ -1,39 +0,0 @@ -use proc_macro2::TokenStream; -use quote::quote; - -use crate::generator::{Context, Generator}; -use crate::graph::Rope; - -impl Generator<'_> { - pub fn generate_rope(&mut self, rope: &Rope, mut ctx: Context) -> TokenStream { - let miss = ctx.miss(rope.miss.first(), self); - let read = ctx.read(rope.pattern.len()); - let then = self.goto(rope.then, ctx.advance(rope.pattern.len())); - - let pat = match rope.pattern.to_bytes() { - Some(bytes) => byte_slice_literal(&bytes), - None => { - let ranges = rope.pattern.iter(); - - quote!([#(#ranges),*]) - } - }; - - quote! { - match #read { - Some(#pat) => #then, - _ => #miss, - } - } - } -} - -fn byte_slice_literal(bytes: &[u8]) -> TokenStream { - if bytes.iter().any(|&b| !(0x20..0x7F).contains(&b)) { - return quote!(&[#(#bytes),*]); - } - - let slice = std::str::from_utf8(bytes).unwrap(); - - syn::parse_str(&format!("b{:?}", slice)).unwrap() -} diff --git a/vendor/logos-codegen/src/generator/tables.rs b/vendor/logos-codegen/src/generator/tables.rs deleted file mode 100644 index f1e53273..00000000 --- a/vendor/logos-codegen/src/generator/tables.rs +++ /dev/null @@ -1,77 +0,0 @@ -use crate::util::ToIdent; -use proc_macro2::{Literal, TokenStream}; -use quote::{quote, ToTokens}; -use syn::Ident; - -pub struct TableStack { - tables: Vec<(Ident, [u8; 256])>, - shift: u8, -} - -pub struct TableView<'a> { - ident: &'a Ident, - table: &'a mut [u8; 256], - mask: u8, -} - -impl TableStack { - pub fn new() -> Self { - TableStack { - tables: vec![("COMPACT_TABLE_0".to_ident(), [0; 256])], - shift: 0, - } - } - - pub fn view(&mut self) -> TableView { - let mask = if self.shift < 8 { - // Reusing existing table with a shifted mask - let mask = 1u8 << self.shift; - - self.shift += 1; - - mask - } else { - // Need to create a new table - let ident = format!("COMPACT_TABLE_{}", self.tables.len()).to_ident(); - - self.tables.push((ident, [0; 256])); - self.shift = 1; - - 1 - }; - - let (ref ident, ref mut table) = self.tables.last_mut().unwrap(); - - TableView { ident, table, mask } - } -} - -impl<'a> TableView<'a> { - pub fn ident(&self) -> &'a Ident { - self.ident - } - - pub fn flag(&mut self, byte: u8) { - self.table[byte as usize] |= self.mask; - } - - pub fn mask(&self) -> Literal { - Literal::u8_unsuffixed(self.mask) - } -} - -impl ToTokens for TableStack { - fn to_tokens(&self, out: &mut TokenStream) { - if self.shift == 0 { - return; - } - - for (ident, table) in self.tables.iter() { - let bytes = table.iter().copied().map(Literal::u8_unsuffixed); - - out.extend(quote! { - static #ident: [u8; 256] = [#(#bytes),*]; - }); - } - } -} diff --git a/vendor/logos-codegen/src/graph/fork.rs b/vendor/logos-codegen/src/graph/fork.rs deleted file mode 100644 index 6b59836b..00000000 --- a/vendor/logos-codegen/src/graph/fork.rs +++ /dev/null @@ -1,267 +0,0 @@ -use crate::graph::{Disambiguate, Graph, NodeId, Range}; - -#[derive(Clone)] -pub struct Fork { - /// LUT matching byte -> node id - lut: Box<[Option<NodeId>; 256]>, - /// State to go to if no arms are matching - pub miss: Option<NodeId>, -} - -impl Fork { - pub fn new() -> Self { - Fork { - lut: Box::new([None; 256]), - miss: None, - } - } - - pub fn miss<M>(mut self, miss: M) -> Self - where - M: Into<Option<NodeId>>, - { - self.miss = miss.into(); - self - } - - pub fn add_branch<R, T>(&mut self, range: R, then: NodeId, graph: &mut Graph<T>) - where - R: Into<Range>, - T: Disambiguate, - { - for byte in range.into() { - match &mut self.lut[byte as usize] { - Some(other) if *other != then => { - *other = graph.merge(*other, then); - } - opt => *opt = Some(then), - } - } - } - - // TODO: Add result with a printable error - pub fn merge<T>(&mut self, other: Fork, graph: &mut Graph<T>) - where - T: Disambiguate, - { - self.miss = match (self.miss, other.miss) { - (None, None) => None, - (Some(id), None) | (None, Some(id)) => Some(id), - (Some(a), Some(b)) => Some(graph.merge(a, b)), - }; - - for (left, right) in self.lut.iter_mut().zip(other.lut.iter()) { - *left = match (*left, *right) { - (None, None) => continue, - (Some(id), None) | (None, Some(id)) => Some(id), - (Some(a), Some(b)) => Some(graph.merge(a, b)), - } - } - } - - pub fn branches(&self) -> ForkIter<'_> { - ForkIter { - offset: 0, - lut: &self.lut, - } - } - - /// Checks if all bytes in the `range` have a branch on this - /// fork, and those branches are resolve to the same `NodeId`. - pub fn contains<R>(&self, range: R) -> Option<NodeId> - where - R: Into<Range>, - { - let mut range = range.into(); - let byte = range.next()?; - let first = self.lut[byte as usize]?; - - for byte in range { - if first != self.lut[byte as usize]? { - return None; - } - } - - Some(first) - } - - pub fn branch<R>(mut self, range: R, then: NodeId) -> Self - where - R: Into<Range>, - { - for byte in range.into() { - match &mut self.lut[byte as usize] { - Some(other) if *other != then => { - panic!("Overlapping branches"); - } - opt => *opt = Some(then), - } - } - self - } - - pub fn shake<T>(&self, graph: &Graph<T>, filter: &mut [bool]) { - if let Some(id) = self.miss { - if !filter[id.get()] { - filter[id.get()] = true; - graph[id].shake(graph, filter); - } - } - - for (_, id) in self.branches() { - if !filter[id.get()] { - filter[id.get()] = true; - graph[id].shake(graph, filter); - } - } - } -} - -pub struct ForkIter<'a> { - offset: usize, - lut: &'a [Option<NodeId>; 256], -} - -impl Iterator for ForkIter<'_> { - type Item = (Range, NodeId); - - fn next(&mut self) -> Option<Self::Item> { - // Consume empty slots - self.offset += self.lut[self.offset..] - .iter() - .take_while(|next| next.is_none()) - .count(); - - let then = self.lut.get(self.offset).copied().flatten()?; - let start = self.offset; - - // Consume all slots with same NodeId target - self.offset += self.lut[self.offset..] - .iter() - .take_while(|next| **next == Some(then)) - .count(); - - Some(( - Range { - start: start as u8, - end: (self.offset - 1) as u8, - }, - then, - )) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::graph::Node; - use pretty_assertions::assert_eq; - - #[test] - fn fork_iter() { - let mut buf = [None; 256]; - - for byte in b'4'..=b'7' { - buf[byte as usize] = Some(NodeId::new(1)); - } - for byte in b'a'..=b'd' { - buf[byte as usize] = Some(NodeId::new(2)); - } - - let iter = ForkIter { - offset: 0, - lut: &buf, - }; - - assert_eq!( - &[ - ( - Range { - start: b'4', - end: b'7' - }, - NodeId::new(1) - ), - ( - Range { - start: b'a', - end: b'd' - }, - NodeId::new(2) - ), - ], - &*iter.collect::<Vec<_>>(), - ); - } - - #[test] - fn merge_no_conflict() { - let mut graph = Graph::new(); - - let leaf1 = graph.push(Node::Leaf("FOO")); - let leaf2 = graph.push(Node::Leaf("BAR")); - - let mut fork = Fork::new().branch(b'1', leaf1); - - fork.merge(Fork::new().branch(b'2', leaf2), &mut graph); - - assert_eq!(fork, Fork::new().branch(b'1', leaf1).branch(b'2', leaf2)); - } - - #[test] - fn merge_miss_right() { - let mut graph = Graph::new(); - - let leaf1 = graph.push(Node::Leaf("FOO")); - let leaf2 = graph.push(Node::Leaf("BAR")); - - let mut fork = Fork::new().branch(b'1', leaf1); - - fork.merge(Fork::new().miss(leaf2), &mut graph); - - assert_eq!(fork, Fork::new().branch(b'1', leaf1).miss(leaf2)); - } - - #[test] - fn merge_miss_left() { - let mut graph = Graph::new(); - - let leaf1 = graph.push(Node::Leaf("FOO")); - let leaf2 = graph.push(Node::Leaf("BAR")); - - let mut fork = Fork::new().miss(leaf1); - - fork.merge(Fork::new().branch(b'2', leaf2), &mut graph); - - assert_eq!(fork, Fork::new().branch(b'2', leaf2).miss(leaf1)); - } - - #[test] - fn contains_byte() { - let fork = Fork::new().branch('a'..='z', NodeId::new(42)); - - assert_eq!(fork.contains(b't'), Some(NodeId::new(42))); - } - - #[test] - fn contains_range() { - let fork = Fork::new() - .branch('a'..='m', NodeId::new(42)) - .branch('n'..='z', NodeId::new(42)); - - assert_eq!(fork.contains('i'..='r'), Some(NodeId::new(42))); - assert_eq!(fork.contains('a'..='z'), Some(NodeId::new(42))); - } - - #[test] - fn contains_different_ranges() { - let fork = Fork::new() - .branch('a'..='m', NodeId::new(42)) - .branch('n'..='z', NodeId::new(47)); - - assert_eq!(fork.contains('i'..='r'), None); - assert_eq!(fork.contains('a'..='z'), None); - assert_eq!(fork.contains('d'..='f'), Some(NodeId::new(42))); - assert_eq!(fork.contains('n'..='p'), Some(NodeId::new(47))); - } -} diff --git a/vendor/logos-codegen/src/graph/impls.rs b/vendor/logos-codegen/src/graph/impls.rs deleted file mode 100644 index dc97bdf1..00000000 --- a/vendor/logos-codegen/src/graph/impls.rs +++ /dev/null @@ -1,220 +0,0 @@ -use std::fmt::{self, Debug, Display}; -use std::hash::{Hash, Hasher}; - -use crate::graph::{Fork, Graph, Node, NodeId, Range, Rope}; - -impl<T> From<Fork> for Node<T> { - fn from(fork: Fork) -> Self { - Node::Fork(fork) - } -} -impl<T> From<Rope> for Node<T> { - fn from(rope: Rope) -> Self { - Node::Rope(rope) - } -} - -fn is_ascii(byte: u8) -> bool { - (0x20..0x7F).contains(&byte) -} - -impl Hash for Fork { - fn hash<H: Hasher>(&self, state: &mut H) { - for branch in self.branches() { - branch.hash(state); - } - self.miss.hash(state); - } -} - -impl<T> Hash for Node<T> { - fn hash<H: Hasher>(&self, state: &mut H) { - match self { - Node::Rope(rope) => { - b"ROPE".hash(state); - rope.hash(state); - } - Node::Fork(fork) => { - b"FORK".hash(state); - fork.hash(state); - } - Node::Leaf(_) => b"LEAF".hash(state), - } - } -} - -impl Debug for NodeId { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - Debug::fmt(&self.0, f) - } -} - -/// We don't need debug impls in release builds -// #[cfg(test)] -mod debug { - use super::*; - use crate::graph::rope::Miss; - use crate::graph::Disambiguate; - use std::cmp::{Ord, Ordering}; - - impl Disambiguate for &str { - fn cmp(left: &&str, right: &&str) -> Ordering { - Ord::cmp(left, right) - } - } - - impl Debug for Range { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let Range { start, end } = *self; - - if start != end || !is_ascii(start) { - f.write_str("[")?; - } - match is_ascii(start) { - true => write!(f, "{}", start as char), - false => write!(f, "{:02X}", start), - }?; - if start != end { - match is_ascii(end) { - true => write!(f, "-{}]", end as char), - false => write!(f, "-{:02X}]", end), - }?; - } else if !is_ascii(start) { - f.write_str("]")?; - } - Ok(()) - } - } - - impl Display for Range { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - <Range as Debug>::fmt(self, f) - } - } - - impl<T: Debug> Debug for Graph<T> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let entries = self - .nodes() - .iter() - .enumerate() - .filter_map(|(i, n)| n.as_ref().map(|n| (i, n))); - - f.debug_map().entries(entries).finish() - } - } - - struct Arm<T, U>(T, U); - - impl<T, U> Debug for Arm<T, U> - where - T: Display, - U: Display, - { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{} ⇒ {}", self.0, self.1) - } - } - - impl Debug for Fork { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut list = f.debug_set(); - - for (range, then) in self.branches() { - list.entry(&Arm(range, then)); - } - if let Some(id) = self.miss { - list.entry(&Arm('_', id)); - } - - list.finish() - } - } - - impl Display for Miss { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Miss::First(id) => Display::fmt(id, f), - Miss::Any(id) => write!(f, "{}*", id), - Miss::None => f.write_str("n/a"), - } - } - } - - impl Debug for Rope { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use std::fmt::Write; - - let mut rope = String::with_capacity(self.pattern.len()); - for range in self.pattern.iter() { - write!(rope, "{}", range)?; - } - - match self.miss.is_none() { - false => { - let mut list = f.debug_list(); - - list.entry(&Arm(rope, self.then)); - list.entry(&Arm('_', self.miss)); - - list.finish() - } - true => Arm(rope, self.then).fmt(f), - } - } - } - - impl PartialEq for Fork { - fn eq(&self, other: &Self) -> bool { - self.miss == other.miss && self.branches().eq(other.branches()) - } - } - - impl<T: Debug> Debug for Node<T> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Node::Fork(fork) => fork.fmt(f), - Node::Rope(rope) => rope.fmt(f), - Node::Leaf(leaf) => leaf.fmt(f), - } - } - } - - use std::ops::RangeInclusive; - - impl From<RangeInclusive<u8>> for Range { - fn from(range: RangeInclusive<u8>) -> Range { - Range { - start: *range.start(), - end: *range.end(), - } - } - } - - impl From<RangeInclusive<char>> for Range { - fn from(range: RangeInclusive<char>) -> Range { - Range { - start: *range.start() as u8, - end: *range.end() as u8, - } - } - } - - impl<T> PartialEq<Rope> for Node<T> { - fn eq(&self, other: &Rope) -> bool { - match self { - Node::Rope(rope) => rope == other, - _ => false, - } - } - } - - impl<T> PartialEq<Fork> for Node<T> { - fn eq(&self, other: &Fork) -> bool { - match self { - Node::Fork(fork) => fork == other, - _ => false, - } - } - } -} diff --git a/vendor/logos-codegen/src/graph/meta.rs b/vendor/logos-codegen/src/graph/meta.rs deleted file mode 100644 index 757ced09..00000000 --- a/vendor/logos-codegen/src/graph/meta.rs +++ /dev/null @@ -1,174 +0,0 @@ -use std::cmp::min; -use std::collections::BTreeMap; -use std::ops::{Index, IndexMut}; - -use crate::graph::{Graph, Node, NodeId}; - -#[derive(Debug)] -pub struct Meta { - map: BTreeMap<NodeId, MetaItem>, -} - -#[derive(Debug, Default)] -pub struct MetaItem { - /// Number of references to this node - pub refcount: usize, - /// Minimum number of bytes that ought to be read for this - /// node to find a match - pub min_read: usize, - /// Marks whether or not this node leads to a loop entry node. - pub is_loop_init: bool, - /// Ids of other nodes that point to this node while this - /// node is on a stack (creating a loop) - pub loop_entry_from: Vec<NodeId>, -} - -impl Index<NodeId> for Meta { - type Output = MetaItem; - - fn index(&self, id: NodeId) -> &MetaItem { - &self.map[&id] - } -} - -impl IndexMut<NodeId> for Meta { - fn index_mut(&mut self, id: NodeId) -> &mut MetaItem { - self.map.entry(id).or_default() - } -} - -impl MetaItem { - fn loop_entry(&mut self, id: NodeId) { - if let Err(idx) = self.loop_entry_from.binary_search(&id) { - self.loop_entry_from.insert(idx, id); - } - } -} - -impl Meta { - pub fn analyze<T>(root: NodeId, graph: &Graph<T>) -> Self { - let mut meta = Meta { - map: Default::default(), - }; - - meta.first_pass(root, root, graph, &mut Vec::new()); - - meta - } - - pub fn first_pass<T>( - &mut self, - this: NodeId, - parent: NodeId, - graph: &Graph<T>, - stack: &mut Vec<NodeId>, - ) -> &MetaItem { - let meta = &mut self[this]; - let is_done = meta.refcount > 0; - - meta.refcount += 1; - - if stack.contains(&this) { - meta.loop_entry(parent); - self[parent].is_loop_init = true; - } - if is_done { - return &self[this]; - } - - stack.push(this); - - let mut min_read; - - match &graph[this] { - Node::Fork(fork) => { - min_read = usize::MAX; - for (_, id) in fork.branches() { - let meta = self.first_pass(id, this, graph, stack); - - if meta.is_loop_init { - min_read = 1; - } else { - min_read = min(min_read, meta.min_read + 1); - } - } - if let Some(id) = fork.miss { - let meta = self.first_pass(id, this, graph, stack); - - if meta.is_loop_init { - min_read = 0; - } else { - min_read = min(min_read, meta.min_read); - } - } - if min_read == usize::MAX { - min_read = 0; - } - } - Node::Rope(rope) => { - min_read = rope.pattern.len(); - let meta = self.first_pass(rope.then, this, graph, stack); - - if !meta.is_loop_init { - min_read += meta.min_read; - } - - if let Some(id) = rope.miss.first() { - let meta = self.first_pass(id, this, graph, stack); - - if meta.is_loop_init { - min_read = 0; - } else { - min_read = min(min_read, meta.min_read); - } - } - } - Node::Leaf(_) => min_read = 0, - } - - stack.pop(); - - let meta = &mut self[this]; - meta.min_read = min_read; - let second_pass = meta.loop_entry_from.clone(); - - for id in second_pass { - self.meta_second_pass(id, graph); - } - - &self[this] - } - - fn meta_second_pass<T>(&mut self, id: NodeId, graph: &Graph<T>) { - let mut min_read; - - match &graph[id] { - Node::Fork(fork) => { - min_read = usize::MAX; - for (_, id) in fork.branches() { - let meta = &self[id]; - - if meta.is_loop_init { - min_read = 1; - } else { - min_read = min(min_read, meta.min_read + 1); - } - } - if min_read == usize::MAX { - min_read = 0; - } - } - Node::Rope(rope) => { - min_read = rope.pattern.len(); - let meta = &self[rope.then]; - - if !meta.is_loop_init { - min_read += meta.min_read; - } - } - Node::Leaf(_) => unreachable!(), - } - - self[id].min_read = min_read; - } -} diff --git a/vendor/logos-codegen/src/graph/mod.rs b/vendor/logos-codegen/src/graph/mod.rs deleted file mode 100644 index 6d218e8c..00000000 --- a/vendor/logos-codegen/src/graph/mod.rs +++ /dev/null @@ -1,566 +0,0 @@ -use std::cmp::Ordering; -use std::collections::btree_map::Entry; -use std::collections::BTreeMap as Map; -use std::hash::{Hash, Hasher}; -use std::num::NonZeroU32; -use std::ops::Index; - -use fnv::FnvHasher; - -mod fork; -mod impls; -mod meta; -mod range; -mod regex; -mod rope; - -pub use self::fork::Fork; -pub use self::meta::Meta; -pub use self::range::Range; -pub use self::rope::Rope; - -/// Disambiguation error during the attempt to merge two leaf -/// nodes with the same priority -#[derive(Debug)] -pub struct DisambiguationError(pub NodeId, pub NodeId); - -pub struct Graph<Leaf> { - /// Internal storage of all allocated nodes. Once a node is - /// put here, it should never be mutated. - nodes: Vec<Option<Node<Leaf>>>, - /// When merging two nodes into a new node, we store the two - /// entry keys and the result, so that we don't merge the same - /// two nodes multiple times. - /// - /// Most of the time the entry we want to find will be the last - /// one that has been inserted, so we can use a vec with reverse - /// order search to get O(1) searches much faster than any *Map. - merges: Map<Merge, NodeId>, - /// Another map used for accounting. Before `.push`ing a new node - /// onto the graph (inserts are exempt), we hash it and find if - /// an identical(!) node has been created before. - hashes: Map<u64, NodeId>, - /// Instead of handling errors over return types, opt to collect - /// them internally. - errors: Vec<DisambiguationError>, - /// Deferred merges. When when attempting to merge a node with an - /// empty reserved slot, the merging operation is deferred until - /// the reserved slot is populated. This is a stack that keeps track - /// of all such deferred merges - deferred: Vec<DeferredMerge>, -} - -/// Trait to be implemented on `Leaf` nodes in order to disambiguate -/// between them. -pub trait Disambiguate { - fn cmp(left: &Self, right: &Self) -> Ordering; -} - -/// Id of a Node in the graph. `NodeId` can be referencing an empty -/// slot that is going to be populated later in time. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub struct NodeId(NonZeroU32); - -impl Hash for NodeId { - fn hash<H: Hasher>(&self, state: &mut H) { - // Always use little-endian byte order for hashing to avoid - // different code generation on big-endian platforms due to - // iteration over a HashMap, - // see https://github.com/maciejhirsz/logos/issues/427. - state.write(&self.0.get().to_le_bytes()) - } -} - -impl NodeId { - fn get(self) -> usize { - self.0.get() as usize - } - - fn new(n: usize) -> NodeId { - NodeId(NonZeroU32::new(n as u32).expect("Invalid NodeId")) - } -} - -/// Unique reserved `NodeId` that is guaranteed to point to an -/// empty allocated slot in the graph. It's safe to create multiple -/// `NodeId` copies of `ReservedId`, however API users should never -/// be able to clone a `ReservedId`, or create a new one from `NodeId`. -/// -/// `ReservedId` is consumed once passed into `Graph::insert`. -#[derive(Debug)] -pub struct ReservedId(NodeId); - -impl ReservedId { - pub fn get(&self) -> NodeId { - self.0 - } -} - -/// Merge key used to lookup whether two nodes have been previously -/// mered, so we can avoid duplicating merges, potentially running into -/// loops that blow out the stack. -/// -/// `Merge::new(a, b)` should always equal to `Merge::new(b, a)` to ensure -/// that node merges are symmetric. -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)] -struct Merge(NodeId, NodeId); - -impl Merge { - fn new(a: NodeId, b: NodeId) -> Self { - if a < b { - Merge(a, b) - } else { - Merge(b, a) - } - } -} - -/// When attempting to merge two nodes, one of which was not yet created, -/// we can record such attempt, and execute the merge later on when the -/// `awaiting` has been `insert`ed into the graph. -#[derive(Debug)] -pub struct DeferredMerge { - awaiting: NodeId, - with: NodeId, - into: ReservedId, -} - -impl<Leaf> Graph<Leaf> { - pub fn new() -> Self { - Graph { - // Start with an empty slot so we can start - // counting NodeIds from 1 and use NonZero - // optimizations - nodes: vec![None], - merges: Map::new(), - hashes: Map::new(), - errors: Vec::new(), - deferred: Vec::new(), - } - } - - pub fn errors(&self) -> &[DisambiguationError] { - &self.errors - } - - fn next_id(&self) -> NodeId { - NodeId::new(self.nodes.len()) - } - - /// Reserve an empty slot for a node on the graph and return an - /// id for it. `ReservedId` cannot be cloned, and must be consumed - /// by calling `insert` on the graph. - pub fn reserve(&mut self) -> ReservedId { - let id = self.next_id(); - - self.nodes.push(None); - - ReservedId(id) - } - - /// Insert a node at a given, previously reserved id. Returns the - /// inserted `NodeId`. - pub fn insert<N>(&mut self, reserved: ReservedId, node: N) -> NodeId - where - N: Into<Node<Leaf>>, - Leaf: Disambiguate, - { - let id = reserved.get(); - - self.nodes[id.get()] = Some(node.into()); - - let mut awaiting = Vec::new(); - - // Partition out all `DeferredMerge`s that can be completed - // now that this `ReservedId` has a `Node` inserted into it. - for idx in (0..self.deferred.len()).rev() { - if self.deferred[idx].awaiting == id { - awaiting.push(self.deferred.remove(idx)); - } - } - - // Complete deferred merges. We've collected them from the back, - // so we must iterate through them from the back as well to restore - // proper order of merges in case there is some cascading going on. - for DeferredMerge { - awaiting, - with, - into, - } in awaiting.into_iter().rev() - { - self.merge_unchecked(awaiting, with, into); - } - - id - } - - /// Push a node onto the graph and get an id to it. If an identical - /// node has already been pushed on the graph, it will return the id - /// of that node instead. - pub fn push<B>(&mut self, node: B) -> NodeId - where - B: Into<Node<Leaf>>, - { - let node = node.into(); - - if let Node::Leaf(_) = node { - return self.push_unchecked(node); - } - - let mut hasher = FnvHasher::default(); - node.hash(&mut hasher); - - let next_id = self.next_id(); - - match self.hashes.entry(hasher.finish()) { - Entry::Occupied(occupied) => { - let id = *occupied.get(); - - if self[id].eq(&node) { - return id; - } - } - Entry::Vacant(vacant) => { - vacant.insert(next_id); - } - } - - self.push_unchecked(node) - } - - fn push_unchecked(&mut self, node: Node<Leaf>) -> NodeId { - let id = self.next_id(); - - self.nodes.push(Some(node)); - - id - } - - /// If nodes `a` and `b` have been already merged, return the - /// `NodeId` of the node they have been merged into. - fn find_merged(&self, a: NodeId, b: NodeId) -> Option<NodeId> { - let probe = Merge::new(a, b); - - self.merges.get(&probe).copied() - } - - /// Mark that nodes `a` and `b` have been merged into `product`. - /// - /// This will also mark merging `a` and `product`, as well as - /// `b` and `product` into `product`, since those are symmetric - /// operations. - /// - /// This is necessary to break out asymmetric merge loops. - fn set_merged(&mut self, a: NodeId, b: NodeId, product: NodeId) { - self.merges.insert(Merge::new(a, b), product); - self.merges.insert(Merge::new(a, product), product); - self.merges.insert(Merge::new(b, product), product); - } - - /// Merge the nodes at id `a` and `b`, returning a new id. - pub fn merge(&mut self, a: NodeId, b: NodeId) -> NodeId - where - Leaf: Disambiguate, - { - if a == b { - return a; - } - - // If the id pair is already merged (or is being merged), just return the id - if let Some(id) = self.find_merged(a, b) { - return id; - } - - match (self.get(a), self.get(b)) { - (None, None) => { - panic!( - "Merging two reserved nodes! This is a bug, please report it:\n\ - \n\ - https://github.com/maciejhirsz/logos/issues" - ); - } - (None, Some(_)) => { - let reserved = self.reserve(); - let id = reserved.get(); - self.deferred.push(DeferredMerge { - awaiting: a, - with: b, - into: reserved, - }); - self.set_merged(a, b, id); - - return id; - } - (Some(_), None) => { - let reserved = self.reserve(); - let id = reserved.get(); - self.deferred.push(DeferredMerge { - awaiting: b, - with: a, - into: reserved, - }); - self.set_merged(a, b, id); - - return id; - } - (Some(Node::Leaf(left)), Some(Node::Leaf(right))) => { - return match Disambiguate::cmp(left, right) { - Ordering::Less => b, - Ordering::Greater => a, - Ordering::Equal => { - self.errors.push(DisambiguationError(a, b)); - - a - } - }; - } - _ => (), - } - - // Reserve the id for the merge and save it. Since the graph can contain loops, - // this prevents us from trying to merge the same id pair in a loop, blowing up - // the stack. - let reserved = self.reserve(); - self.set_merged(a, b, reserved.get()); - - self.merge_unchecked(a, b, reserved) - } - - /// Unchecked merge of `a` and `b`. This fn assumes that `a` and `b` are - /// not pointing to empty slots. - fn merge_unchecked(&mut self, a: NodeId, b: NodeId, reserved: ReservedId) -> NodeId - where - Leaf: Disambiguate, - { - let merged_rope = match (self.get(a), self.get(b)) { - (Some(Node::Rope(rope)), _) => { - let rope = rope.clone(); - - self.merge_rope(rope, b) - } - (_, Some(Node::Rope(rope))) => { - let rope = rope.clone(); - - self.merge_rope(rope, a) - } - _ => None, - }; - - if let Some(rope) = merged_rope { - return self.insert(reserved, rope); - } - - let mut fork = self.fork_off(a); - fork.merge(self.fork_off(b), self); - - let mut stack = vec![reserved.get()]; - - // Flatten the fork - while let Some(miss) = fork.miss { - if stack.contains(&miss) { - break; - } - stack.push(miss); - - let other = match self.get(miss) { - Some(Node::Fork(other)) => other.clone(), - Some(Node::Rope(other)) => other.clone().into_fork(self), - _ => break, - }; - match other.miss { - Some(id) if self.get(id).is_none() => break, - _ => (), - } - fork.miss = None; - fork.merge(other, self); - } - - self.insert(reserved, fork) - } - - fn merge_rope(&mut self, rope: Rope, other: NodeId) -> Option<Rope> - where - Leaf: Disambiguate, - { - match self.get(other) { - Some(Node::Fork(fork)) if rope.miss.is_none() => { - // Count how many consecutive ranges in this rope would - // branch into the fork that results in a loop. - // - // e.g.: for rope "foobar" and a looping fork [a-z]: 6 - let count = rope - .pattern - .iter() - .take_while(|range| fork.contains(**range) == Some(other)) - .count(); - - let mut rope = rope.split_at(count, self)?.miss_any(other); - - rope.then = self.merge(rope.then, other); - - Some(rope) - } - Some(Node::Rope(other)) => { - let (prefix, miss) = rope.prefix(other)?; - - let (a, b) = (rope, other.clone()); - - let a = a.remainder(prefix.len(), self); - let b = b.remainder(prefix.len(), self); - - let rope = Rope::new(prefix, self.merge(a, b)).miss(miss); - - Some(rope) - } - Some(Node::Leaf(_)) | None => { - if rope.miss.is_none() { - Some(rope.miss(other)) - } else { - None - } - } - _ => None, - } - } - - pub fn fork_off(&mut self, id: NodeId) -> Fork - where - Leaf: Disambiguate, - { - match self.get(id) { - Some(Node::Fork(fork)) => fork.clone(), - Some(Node::Rope(rope)) => rope.clone().into_fork(self), - Some(Node::Leaf(_)) | None => Fork::new().miss(id), - } - } - - pub fn nodes(&self) -> &[Option<Node<Leaf>>] { - &self.nodes - } - - /// Find all nodes that have no references and remove them. - pub fn shake(&mut self, root: NodeId) { - let mut filter = vec![false; self.nodes.len()]; - - filter[root.get()] = true; - - self[root].shake(self, &mut filter); - - for (id, referenced) in filter.into_iter().enumerate() { - if !referenced { - self.nodes[id] = None; - } - } - } - - pub fn get(&self, id: NodeId) -> Option<&Node<Leaf>> { - self.nodes.get(id.get())?.as_ref() - } -} - -impl<Leaf> Index<NodeId> for Graph<Leaf> { - type Output = Node<Leaf>; - - fn index(&self, id: NodeId) -> &Node<Leaf> { - self.get(id).expect( - "Indexing into an empty node. This is a bug, please report it at:\n\ - \n\ - https://github.com/maciejhirsz/logos/issues", - ) - } -} - -impl std::fmt::Display for NodeId { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - std::fmt::Display::fmt(&self.0, f) - } -} - -#[cfg_attr(test, derive(PartialEq))] -pub enum Node<Leaf> { - /// Fork node, can lead to more than one state - Fork(Fork), - /// Rope node, can lead to one state on match, one state on miss - Rope(Rope), - /// Leaf node, terminal state - Leaf(Leaf), -} - -impl<Leaf> Node<Leaf> { - pub fn miss(&self) -> Option<NodeId> { - match self { - Node::Rope(rope) => rope.miss.first(), - Node::Fork(fork) => fork.miss, - Node::Leaf(_) => None, - } - } - - fn eq(&self, other: &Node<Leaf>) -> bool { - match (self, other) { - (Node::Fork(a), Node::Fork(b)) => a == b, - (Node::Rope(a), Node::Rope(b)) => a == b, - _ => false, - } - } - - fn shake(&self, graph: &Graph<Leaf>, filter: &mut [bool]) { - match self { - Node::Fork(fork) => fork.shake(graph, filter), - Node::Rope(rope) => rope.shake(graph, filter), - Node::Leaf(_) => (), - } - } - - pub fn unwrap_leaf(&self) -> &Leaf { - match self { - Node::Fork(_) => panic!("Internal Error: called unwrap_leaf on a fork"), - Node::Rope(_) => panic!("Internal Error: called unwrap_leaf on a rope"), - Node::Leaf(leaf) => leaf, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn leaf_stack_size() { - use std::mem::size_of; - - const WORD: usize = size_of::<usize>(); - const NODE: usize = size_of::<Node<()>>(); - - assert!(NODE <= 6 * WORD, "Size of Node<()> is {} bytes!", NODE); - } - - #[test] - fn create_a_loop() { - let mut graph = Graph::new(); - - let token = graph.push(Node::Leaf("IDENT")); - let id = graph.reserve(); - let fork = Fork::new().branch('a'..='z', id.get()).miss(token); - let root = graph.insert(id, fork); - - assert_eq!(graph[token], Node::Leaf("IDENT")); - assert_eq!(graph[root], Fork::new().branch('a'..='z', root).miss(token),); - } - - #[test] - fn fork_off() { - let mut graph = Graph::new(); - - let leaf = graph.push(Node::Leaf("LEAF")); - let rope = graph.push(Rope::new("rope", leaf)); - let fork = graph.push(Fork::new().branch(b'!', leaf)); - - assert_eq!(graph.fork_off(leaf), Fork::new().miss(leaf)); - assert_eq!( - graph.fork_off(rope), - Fork::new().branch(b'r', NodeId::new(graph.nodes.len() - 1)) - ); - assert_eq!(graph.fork_off(fork), Fork::new().branch(b'!', leaf)); - } -} diff --git a/vendor/logos-codegen/src/graph/range.rs b/vendor/logos-codegen/src/graph/range.rs deleted file mode 100644 index a4d23d3b..00000000 --- a/vendor/logos-codegen/src/graph/range.rs +++ /dev/null @@ -1,144 +0,0 @@ -use regex_syntax::hir::ClassBytesRange; -use regex_syntax::hir::ClassUnicodeRange; -use regex_syntax::utf8::Utf8Range; - -use std::cmp::{Ord, Ordering}; - -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub struct Range { - pub start: u8, - pub end: u8, -} - -impl Range { - pub fn as_byte(&self) -> Option<u8> { - if self.is_byte() { - Some(self.start) - } else { - None - } - } - - pub fn is_byte(&self) -> bool { - self.start == self.end - } -} - -impl From<u8> for Range { - fn from(byte: u8) -> Range { - Range { - start: byte, - end: byte, - } - } -} - -impl From<&u8> for Range { - fn from(byte: &u8) -> Range { - Range::from(*byte) - } -} - -impl Iterator for Range { - type Item = u8; - - fn next(&mut self) -> Option<u8> { - match self.start.cmp(&self.end) { - std::cmp::Ordering::Less => { - let res = self.start; - self.start += 1; - - Some(res) - } - std::cmp::Ordering::Equal => { - let res = self.start; - - // Necessary so that range 0xFF-0xFF doesn't loop forever - self.start = 0xFF; - self.end = 0x00; - - Some(res) - } - std::cmp::Ordering::Greater => None, - } - } -} - -impl PartialOrd for Range { - fn partial_cmp(&self, other: &Range) -> Option<Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for Range { - fn cmp(&self, other: &Self) -> Ordering { - self.start.cmp(&other.start) - } -} - -impl From<Utf8Range> for Range { - fn from(r: Utf8Range) -> Range { - Range { - start: r.start, - end: r.end, - } - } -} - -impl From<ClassUnicodeRange> for Range { - fn from(r: ClassUnicodeRange) -> Range { - let start = r.start() as u32; - let end = r.end() as u32; - - if start >= 128 || end >= 128 && end != 0x0010FFFF { - panic!("Casting non-ascii ClassUnicodeRange to Range") - } - - Range { - start: start as u8, - end: end as u8, - } - } -} - -impl From<ClassBytesRange> for Range { - fn from(r: ClassBytesRange) -> Range { - Range { - start: r.start(), - end: r.end(), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn range_iter_one() { - let byte = Range::from(b'!'); - let collected = byte.take(1000).collect::<Vec<_>>(); - - assert_eq!(b"!", &collected[..]); - } - - #[test] - fn range_iter_few() { - let byte = Range { - start: b'a', - end: b'd', - }; - let collected = byte.take(1000).collect::<Vec<_>>(); - - assert_eq!(b"abcd", &collected[..]); - } - - #[test] - fn range_iter_bounds() { - let byte = Range::from(0xFA..=0xFF); - - let collected = byte.take(1000).collect::<Vec<_>>(); - - assert_eq!(b"\xFA\xFB\xFC\xFD\xFE\xFF", &collected[..]); - } -} diff --git a/vendor/logos-codegen/src/graph/regex.rs b/vendor/logos-codegen/src/graph/regex.rs deleted file mode 100644 index d55c490f..00000000 --- a/vendor/logos-codegen/src/graph/regex.rs +++ /dev/null @@ -1,295 +0,0 @@ -use std::fmt::Debug; - -use regex_syntax::utf8::Utf8Sequences; - -use crate::graph::{Disambiguate, Fork, Graph, Node, NodeId, Range, ReservedId, Rope}; -use crate::mir::{Class, ClassUnicode, Literal, Mir}; - -impl<Leaf: Disambiguate + Debug> Graph<Leaf> { - pub fn regex(&mut self, mir: Mir, then: NodeId) -> NodeId { - self.parse_mir(&mir, then, None, None, false) - } - - fn parse_mir( - &mut self, - mir: &Mir, - then: NodeId, - miss: Option<NodeId>, - reserved: Option<ReservedId>, - repeated: bool, - ) -> NodeId { - match mir { - Mir::Empty => then, - Mir::Loop(mir) => { - let reserved_first = reserved.unwrap_or_else(|| self.reserve()); - - let (new_then, new_miss); - if let Some(old_miss) = miss { - // We have to separate the first iteration from the other iterations, - // because the `old_miss` path must only be taken if we miss the first - // iteration. - let reserved_next = self.reserve(); - new_then = self.parse_mir( - mir, - reserved_next.get(), - Some(then), - Some(reserved_next), - true, - ); - new_miss = self.merge(old_miss, then); - } else { - new_then = reserved_first.get(); - new_miss = then; - } - - self.parse_mir(mir, new_then, Some(new_miss), Some(reserved_first), true) - } - Mir::Maybe(mir) => { - let miss = match miss { - Some(id) => self.merge(id, then), - None => then, - }; - - self.parse_mir(mir, then, Some(miss), reserved, true) - } - Mir::Alternation(alternation) => { - let mut fork = Fork::new().miss(miss); - - for mir in alternation { - let id = self.parse_mir(mir, then, None, None, repeated); - let alt = self.fork_off(id); - - fork.merge(alt, self); - } - - self.insert_or_push(reserved, fork) - } - Mir::Literal(literal) => { - let pattern = literal.0.to_vec(); - - self.insert_or_push(reserved, Rope::new(pattern, then).miss(miss)) - } - Mir::Concat(concat) => { - // Take an initial guess at the capacity - estimates a little worse than an average case - // scenario by assuming every concat element is singular but has a full code-point unicode literal. - // The only way to get the actual size of the Vec is if every sub-concat node is added up. - let mut ropebuf: Vec<Range> = Vec::with_capacity(concat.len() * 4); - let mut then = then; - - let mut handle_bytes = |graph: &mut Self, mir: &Mir, then: &mut NodeId| match mir { - Mir::Literal(Literal(bytes)) => { - ropebuf.extend(bytes.iter().rev().map(Into::<Range>::into)); - true - } - Mir::Class(Class::Unicode(class)) if is_one_ascii(class, repeated) => { - ropebuf.push(class.ranges()[0].into()); - true - } - Mir::Class(Class::Bytes(class)) if class.ranges().len() == 1 => { - ropebuf.push(class.ranges()[0].into()); - true - } - _ => { - if !ropebuf.is_empty() { - let rope = - Rope::new(ropebuf.iter().cloned().rev().collect::<Vec<_>>(), *then); - - *then = graph.push(rope); - ropebuf = Vec::with_capacity(concat.len() * 4); - } - false - } - }; - - for mir in concat[1..].iter().rev() { - if !handle_bytes(self, mir, &mut then) { - then = self.parse_mir(mir, then, None, None, false); - } - } - - let first_mir = &concat[0]; - if handle_bytes(self, first_mir, &mut then) { - let rope = Rope::new(ropebuf.iter().cloned().rev().collect::<Vec<_>>(), then) - .miss(miss); - self.insert_or_push(reserved, rope) - } else { - self.parse_mir(first_mir, then, miss, reserved, false) - } - } - Mir::Class(Class::Unicode(class)) if !is_ascii(class, repeated) => { - let mut ropes = class - .iter() - .flat_map(|range| Utf8Sequences::new(range.start(), range.end())) - .map(|sequence| Rope::new(sequence.as_slice(), then)) - .collect::<Vec<_>>(); - - if ropes.len() == 1 { - let rope = ropes.remove(0); - - return self.insert_or_push(reserved, rope.miss(miss)); - } - - let mut root = Fork::new().miss(miss); - - for rope in ropes { - let fork = rope.into_fork(self); - root.merge(fork, self); - } - - self.insert_or_push(reserved, root) - } - Mir::Class(class) => { - let mut fork = Fork::new().miss(miss); - - let class: Vec<Range> = match class { - Class::Unicode(u) => u.iter().copied().map(Into::into).collect(), - Class::Bytes(b) => b.iter().copied().map(Into::into).collect(), - }; - - for range in class { - fork.add_branch(range, then, self); - } - - self.insert_or_push(reserved, fork) - } - } - } - - fn insert_or_push<N>(&mut self, id: Option<ReservedId>, node: N) -> NodeId - where - N: Into<Node<Leaf>>, - { - match id { - Some(id) => self.insert(id, node), - None => self.push(node), - } - } -} - -/// Return whether current class unicode is ascii. -/// -/// Because unicode ranges are iterated in increasing order, -/// it is only necessary to check the last range. -/// -/// If the check is performed in a repetition, -/// a fast path is used by checking if end of range is 0x0010_FFFF. -fn is_ascii(class: &ClassUnicode, repeated: bool) -> bool { - class.iter().last().map_or(true, |range| { - let start = range.start() as u32; - let end = range.end() as u32; - end < 128 || (repeated && start < 128 && end == 0x0010_FFFF) - }) -} - -/// Return whether current class unicode is ascii and only contains -/// one range. -/// -/// See [`is_ascii`] function for more details. -fn is_one_ascii(class: &ClassUnicode, repeated: bool) -> bool { - if class.ranges().len() != 1 { - return false; - } - - let range = &class.ranges()[0]; - let start = range.start() as u32; - let end = range.end() as u32; - end < 128 || (repeated && start < 128 && end == 0x0010_FFFF) -} - -#[cfg(test)] -mod tests { - use std::num::NonZeroU32; - - use super::*; - use crate::graph::Node; - use pretty_assertions::assert_eq; - - #[test] - fn rope() { - let mut graph = Graph::new(); - - let mir = Mir::utf8("foobar").unwrap(); - - assert_eq!(mir.priority(), 12); - - let leaf = graph.push(Node::Leaf("LEAF")); - let id = graph.regex(mir, leaf); - - assert_eq!(graph[id], Node::Rope(Rope::new("foobar", leaf)),) - } - - #[test] - fn alternation() { - let mut graph = Graph::new(); - - let mir = Mir::utf8("a|b").unwrap(); - - assert_eq!(mir.priority(), 2); - - let leaf = graph.push(Node::Leaf("LEAF")); - let id = graph.regex(mir, leaf); - - assert_eq!( - graph[id], - Node::Fork(Fork::new().branch(b'a', leaf).branch(b'b', leaf)), - ); - } - - #[test] - fn repeat() { - let mut graph = Graph::new(); - - let mir = Mir::utf8("[a-z]*").unwrap(); - - assert_eq!(mir.priority(), 0); - - let leaf = graph.push(Node::Leaf("LEAF")); - let id = graph.regex(mir, leaf); - - assert_eq!( - graph[id], - Node::Fork( - Fork::new() - .branch('a'..='z', id) // goto self == loop - .miss(leaf) - ), - ); - } - - #[test] - fn maybe() { - let mut graph = Graph::new(); - - let mir = Mir::utf8("[a-z]?").unwrap(); - - assert_eq!(mir.priority(), 0); - - let leaf = graph.push(Node::Leaf("LEAF")); - let id = graph.regex(mir, leaf); - - assert_eq!( - graph[id], - Node::Fork(Fork::new().branch('a'..='z', leaf).miss(leaf)), - ); - } - - #[test] - fn long_concat_389() { - let mut graph = Graph::new(); - - let mir = Mir::utf8("abcdefghijklmnopqrstuvwxyz*").unwrap(); - - assert_eq!(mir.priority(), 50); - - let leaf = graph.push(Node::Leaf("LEAF")); - let id = graph.regex(mir, leaf); - let sub_id = NodeId(NonZeroU32::new(2).unwrap()); - - assert_eq!( - graph[id], - Node::Rope(Rope::new("abcdefghijklmnopqrstuvwxy", sub_id)) - ); - - assert_eq!(graph[sub_id], Node::Rope(Rope::new("z", sub_id).miss(leaf))) - } -} diff --git a/vendor/logos-codegen/src/graph/rope.rs b/vendor/logos-codegen/src/graph/rope.rs deleted file mode 100644 index 6e563fac..00000000 --- a/vendor/logos-codegen/src/graph/rope.rs +++ /dev/null @@ -1,330 +0,0 @@ -use std::ops::Deref; - -use crate::graph::{Disambiguate, Fork, Graph, NodeId, Range}; - -#[derive(PartialEq, Clone, Hash)] -pub struct Rope { - pub pattern: Pattern, - pub then: NodeId, - pub miss: Miss, -} - -#[derive(PartialEq, Clone, Hash)] -pub struct Pattern(pub Vec<Range>); - -impl Deref for Pattern { - type Target = [Range]; - - fn deref(&self) -> &[Range] { - &self.0 - } -} - -/// Because Ropes could potentially fail a match mid-pattern, -/// a regular `Option` is not sufficient here. -#[derive(PartialEq, Clone, Copy, Hash)] -pub enum Miss { - /// Same as Option::None, error on fail - None, - /// Jump to id if first byte does not match, fail on partial match - First(NodeId), - /// Jump to id on partial or empty match - Any(NodeId), -} - -impl Miss { - pub fn is_none(&self) -> bool { - matches!(self, Miss::None) - } - - pub fn first(self) -> Option<NodeId> { - match self { - Miss::First(id) | Miss::Any(id) => Some(id), - _ => None, - } - } - - pub fn take_first(&mut self) -> Option<NodeId> { - match *self { - Miss::First(id) => { - *self = Miss::None; - - Some(id) - } - Miss::Any(id) => Some(id), - Miss::None => None, - } - } -} - -impl From<Option<NodeId>> for Miss { - fn from(miss: Option<NodeId>) -> Self { - match miss { - Some(id) => Miss::First(id), - None => Miss::None, - } - } -} - -impl From<NodeId> for Miss { - fn from(id: NodeId) -> Self { - Miss::First(id) - } -} - -impl Rope { - pub fn new<P>(pattern: P, then: NodeId) -> Self - where - P: Into<Pattern>, - { - Rope { - pattern: pattern.into(), - then, - miss: Miss::None, - } - } - - pub fn miss<M>(mut self, miss: M) -> Self - where - M: Into<Miss>, - { - self.miss = miss.into(); - self - } - - pub fn miss_any(mut self, miss: NodeId) -> Self { - self.miss = Miss::Any(miss); - self - } - - pub fn into_fork<T>(mut self, graph: &mut Graph<T>) -> Fork - where - T: Disambiguate, - { - let first = self.pattern.0.remove(0); - let miss = self.miss.take_first(); - - // The new fork will lead to a new rope, - // or the old target if no new rope was created - let then = match self.pattern.len() { - 0 => self.then, - _ => graph.push(self), - }; - - Fork::new().branch(first, then).miss(miss) - } - - pub fn prefix(&self, other: &Self) -> Option<(Pattern, Miss)> { - let count = self - .pattern - .iter() - .zip(other.pattern.iter()) - .take_while(|(a, b)| a == b) - .count(); - - let pattern = match count { - 0 => return None, - n => self.pattern[..n].into(), - }; - let miss = match (self.miss, other.miss) { - (Miss::None, miss) => miss, - (miss, Miss::None) => miss, - _ => return None, - }; - - Some((pattern, miss)) - } - - pub fn split_at<T>(mut self, at: usize, graph: &mut Graph<T>) -> Option<Rope> - where - T: Disambiguate, - { - match at { - 0 => return None, - n if n == self.pattern.len() => return Some(self), - _ => (), - } - - let (this, next) = self.pattern.split_at(at); - - let next_miss = match self.miss { - Miss::Any(_) => self.miss, - _ => Miss::None, - }; - - let next = graph.push(Rope { - pattern: next.into(), - miss: next_miss, - then: self.then, - }); - - self.pattern = this.into(); - self.then = next; - - Some(self) - } - - pub fn remainder<T>(mut self, at: usize, graph: &mut Graph<T>) -> NodeId - where - T: Disambiguate, - { - self.pattern = self.pattern[at..].into(); - - match self.pattern.len() { - 0 => self.then, - _ => graph.push(self), - } - } - - pub fn shake<T>(&self, graph: &Graph<T>, filter: &mut [bool]) { - if let Some(id) = self.miss.first() { - if !filter[id.get()] { - filter[id.get()] = true; - graph[id].shake(graph, filter); - } - } - - if !filter[self.then.get()] { - filter[self.then.get()] = true; - graph[self.then].shake(graph, filter); - } - } -} - -impl Pattern { - pub fn to_bytes(&self) -> Option<Vec<u8>> { - let mut out = Vec::with_capacity(self.len()); - - for range in self.iter() { - out.push(range.as_byte()?); - } - - Some(out) - } -} - -impl<T> From<&[T]> for Pattern -where - T: Into<Range> + Copy, -{ - fn from(slice: &[T]) -> Self { - Pattern(slice.iter().copied().map(Into::into).collect()) - } -} - -impl<T> From<Vec<T>> for Pattern -where - T: Into<Range>, -{ - fn from(vec: Vec<T>) -> Self { - Pattern(vec.into_iter().map(Into::into).collect()) - } -} - -impl From<&str> for Pattern { - fn from(slice: &str) -> Self { - slice.as_bytes().into() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::graph::Node; - use pretty_assertions::assert_eq; - - #[test] - fn into_fork() { - let mut graph = Graph::new(); - - let leaf = graph.push(Node::Leaf("LEAF")); - let rope = Rope::new("foobar", leaf); - - let fork = rope.into_fork(&mut graph); - - assert_eq!(leaf, NodeId::new(1)); - assert_eq!(fork, Fork::new().branch(b'f', NodeId::new(2))); - assert_eq!(graph[NodeId::new(2)], Rope::new("oobar", leaf)); - } - - #[test] - fn into_fork_one_byte() { - let mut graph = Graph::new(); - - let leaf = graph.push(Node::Leaf("LEAF")); - let rope = Rope::new("!", leaf); - - let fork = rope.into_fork(&mut graph); - - assert_eq!(leaf, NodeId::new(1)); - assert_eq!(fork, Fork::new().branch(b'!', NodeId::new(1))); - } - - #[test] - fn into_fork_miss_any() { - let mut graph = Graph::new(); - - let leaf = graph.push(Node::Leaf("LEAF")); - let rope = Rope::new("42", leaf).miss_any(NodeId::new(42)); - - let fork = rope.into_fork(&mut graph); - - assert_eq!(leaf, NodeId::new(1)); - assert_eq!( - fork, - Fork::new() - .branch(b'4', NodeId::new(2)) - .miss(NodeId::new(42)) - ); - assert_eq!( - graph[NodeId::new(2)], - Rope::new("2", leaf).miss_any(NodeId::new(42)) - ); - } - - #[test] - fn into_fork_miss_first() { - let mut graph = Graph::new(); - - let leaf = graph.push(Node::Leaf("LEAF")); - let rope = Rope::new("42", leaf).miss(Miss::First(NodeId::new(42))); - - let fork = rope.into_fork(&mut graph); - - assert_eq!(leaf, NodeId::new(1)); - assert_eq!( - fork, - Fork::new() - .branch(b'4', NodeId::new(2)) - .miss(NodeId::new(42)) - ); - assert_eq!(graph[NodeId::new(2)], Rope::new("2", leaf)); - } - - #[test] - fn split_at() { - let mut graph = Graph::new(); - - let leaf = graph.push(Node::Leaf("LEAF")); - let rope = Rope::new("foobar", leaf); - - assert_eq!(rope.clone().split_at(6, &mut graph).unwrap(), rope); - - let split = rope.split_at(3, &mut graph).unwrap(); - let expected_id = NodeId::new(leaf.get() + 1); - - assert_eq!(split, Rope::new("foo", expected_id)); - assert_eq!(graph[expected_id], Rope::new("bar", leaf)); - } - - #[test] - fn pattern_to_bytes() { - let pat = Pattern::from("foobar"); - - assert_eq!(pat.to_bytes().unwrap(), b"foobar"); - - let ranges = Pattern::from(vec![0..=0, 42..=42, b'{'..=b'}']); - - assert_eq!(ranges.to_bytes(), None); - } -} diff --git a/vendor/logos-codegen/src/leaf.rs b/vendor/logos-codegen/src/leaf.rs deleted file mode 100644 index 5e0b810e..00000000 --- a/vendor/logos-codegen/src/leaf.rs +++ /dev/null @@ -1,118 +0,0 @@ -use std::cmp::{Ord, Ordering}; -use std::fmt::{self, Debug, Display}; - -use proc_macro2::{Span, TokenStream}; -use syn::{spanned::Spanned, Ident}; - -use crate::graph::{Disambiguate, Node}; -use crate::util::MaybeVoid; - -#[derive(Clone)] -pub struct Leaf<'t> { - pub ident: Option<&'t Ident>, - pub span: Span, - pub priority: usize, - pub field: MaybeVoid, - pub callback: Option<Callback>, -} - -#[derive(Clone)] -pub enum Callback { - Label(TokenStream), - Inline(Box<InlineCallback>), - Skip(Span), -} - -#[derive(Clone)] -pub struct InlineCallback { - pub arg: Ident, - pub body: TokenStream, - pub span: Span, -} - -impl From<InlineCallback> for Callback { - fn from(inline: InlineCallback) -> Callback { - Callback::Inline(Box::new(inline)) - } -} - -impl Callback { - pub fn span(&self) -> Span { - match self { - Callback::Label(tokens) => tokens.span(), - Callback::Inline(inline) => inline.span, - Callback::Skip(span) => *span, - } - } -} - -impl<'t> Leaf<'t> { - pub fn new(ident: &'t Ident, span: Span) -> Self { - Leaf { - ident: Some(ident), - span, - priority: 0, - field: MaybeVoid::Void, - callback: None, - } - } - - pub fn new_skip(span: Span) -> Self { - Leaf { - ident: None, - span, - priority: 0, - field: MaybeVoid::Void, - callback: Some(Callback::Skip(span)), - } - } - - pub fn callback(mut self, callback: Option<Callback>) -> Self { - self.callback = callback; - self - } - - pub fn field(mut self, field: MaybeVoid) -> Self { - self.field = field; - self - } - - pub fn priority(mut self, priority: usize) -> Self { - self.priority = priority; - self - } -} - -impl Disambiguate for Leaf<'_> { - fn cmp(left: &Leaf, right: &Leaf) -> Ordering { - Ord::cmp(&left.priority, &right.priority) - } -} - -impl<'t> From<Leaf<'t>> for Node<Leaf<'t>> { - fn from(leaf: Leaf<'t>) -> Self { - Node::Leaf(leaf) - } -} - -impl Debug for Leaf<'_> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "::{}", self)?; - - match self.callback { - Some(Callback::Label(ref label)) => write!(f, " ({})", label), - Some(Callback::Inline(_)) => f.write_str(" (<inline>)"), - Some(Callback::Skip(_)) => f.write_str(" (<skip>)"), - None => Ok(()), - } - } -} - -impl Display for Leaf<'_> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.ident { - Some(ident) => Display::fmt(ident, f), - None => f.write_str("<skip>"), - } - } -} diff --git a/vendor/logos-codegen/src/lib.rs b/vendor/logos-codegen/src/lib.rs deleted file mode 100644 index 2b2d3db2..00000000 --- a/vendor/logos-codegen/src/lib.rs +++ /dev/null @@ -1,391 +0,0 @@ -//! <img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right"> -//! -//! # Logos -//! -//! This is a `#[derive]` macro crate, [for documentation go to main crate](https://docs.rs/logos). - -// The `quote!` macro requires deep recursion. -#![recursion_limit = "196"] -#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")] - -mod error; -mod generator; -#[cfg(not(feature = "fuzzing"))] -mod graph; -#[cfg(feature = "fuzzing")] -pub mod graph; -mod leaf; -#[cfg(not(feature = "fuzzing"))] -mod mir; -#[cfg(feature = "fuzzing")] -pub mod mir; -mod parser; -mod util; - -#[macro_use] -#[allow(missing_docs)] -mod macros; - -use generator::Generator; -use graph::{DisambiguationError, Fork, Graph, Rope}; -use leaf::Leaf; -use parser::{IgnoreFlags, Mode, Parser}; -use quote::ToTokens; -use util::MaybeVoid; - -use proc_macro2::{Delimiter, TokenStream, TokenTree}; -use quote::quote; -use syn::parse_quote; -use syn::spanned::Spanned; -use syn::{Fields, ItemEnum}; - -const LOGOS_ATTR: &str = "logos"; -const ERROR_ATTR: &str = "error"; -const TOKEN_ATTR: &str = "token"; -const REGEX_ATTR: &str = "regex"; - -/// Generate a `Logos` implementation for the given struct, provided as a stream of rust tokens. -pub fn generate(input: TokenStream) -> TokenStream { - debug!("Reading input token streams"); - - let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums"); - - let name = &item.ident; - - let mut parser = Parser::default(); - - for param in item.generics.params { - parser.parse_generic(param); - } - - for attr in &mut item.attrs { - parser.try_parse_logos(attr); - } - - let mut ropes = Vec::new(); - let mut regex_ids = Vec::new(); - let mut graph = Graph::new(); - - { - let errors = &mut parser.errors; - - for literal in &parser.skips { - match literal.to_mir(&parser.subpatterns, IgnoreFlags::Empty, errors) { - Ok(mir) => { - let then = graph.push(Leaf::new_skip(literal.span()).priority(mir.priority())); - let id = graph.regex(mir, then); - - regex_ids.push(id); - } - Err(err) => { - errors.err(err, literal.span()); - } - } - } - } - - debug!("Iterating through enum variants"); - - for variant in &mut item.variants { - let field = match &mut variant.fields { - Fields::Unit => MaybeVoid::Void, - Fields::Unnamed(fields) => { - if fields.unnamed.len() != 1 { - parser.err( - format!( - "Logos currently only supports variants with one field, found {}", - fields.unnamed.len(), - ), - fields.span(), - ); - } - - let ty = &mut fields - .unnamed - .first_mut() - .expect("Already checked len; qed") - .ty; - let ty = parser.get_type(ty); - - MaybeVoid::Some(ty) - } - Fields::Named(fields) => { - parser.err("Logos doesn't support named fields yet.", fields.span()); - - MaybeVoid::Void - } - }; - - // Lazy leaf constructor to avoid cloning - let var_ident = &variant.ident; - let leaf = move |span| Leaf::new(var_ident, span).field(field.clone()); - - for attr in &mut variant.attrs { - let attr_name = match attr.path().get_ident() { - Some(ident) => ident.to_string(), - None => continue, - }; - - match attr_name.as_str() { - ERROR_ATTR => { - // TODO: Remove in future versions - parser.err( - "\ - Since 0.13 Logos no longer requires the #[error] variant.\n\ - \n\ - For help with migration see release notes: \ - https://github.com/maciejhirsz/logos/releases\ - ", - attr.span(), - ); - } - TOKEN_ATTR => { - let definition = match parser.parse_definition(attr) { - Some(definition) => definition, - None => { - parser.err("Expected #[token(...)]", attr.span()); - continue; - } - }; - - if definition.ignore_flags.is_empty() { - let bytes = definition.literal.to_bytes(); - let then = graph.push( - leaf(definition.literal.span()) - .priority(definition.priority.unwrap_or(bytes.len() * 2)) - .callback(definition.callback), - ); - - ropes.push(Rope::new(bytes, then)); - } else { - let mir = definition - .literal - .escape_regex() - .to_mir( - &Default::default(), - definition.ignore_flags, - &mut parser.errors, - ) - .expect("The literal should be perfectly valid regex"); - - let then = graph.push( - leaf(definition.literal.span()) - .priority(definition.priority.unwrap_or_else(|| mir.priority())) - .callback(definition.callback), - ); - let id = graph.regex(mir, then); - - regex_ids.push(id); - } - } - REGEX_ATTR => { - let definition = match parser.parse_definition(attr) { - Some(definition) => definition, - None => { - parser.err("Expected #[regex(...)]", attr.span()); - continue; - } - }; - let mir = match definition.literal.to_mir( - &parser.subpatterns, - definition.ignore_flags, - &mut parser.errors, - ) { - Ok(mir) => mir, - Err(err) => { - parser.err(err, definition.literal.span()); - continue; - } - }; - - let then = graph.push( - leaf(definition.literal.span()) - .priority(definition.priority.unwrap_or_else(|| mir.priority())) - .callback(definition.callback), - ); - let id = graph.regex(mir, then); - - regex_ids.push(id); - } - _ => (), - } - } - } - - let mut root = Fork::new(); - - debug!("Parsing additional options (extras, source, ...)"); - - let error_type = parser.error_type.take(); - let extras = parser.extras.take(); - let source = parser - .source - .take() - .map(strip_wrapping_parens) - .unwrap_or(match parser.mode { - Mode::Utf8 => quote!(str), - Mode::Binary => quote!([u8]), - }); - let logos_path = parser - .logos_path - .take() - .unwrap_or_else(|| parse_quote!(::logos)); - - let generics = parser.generics(); - let this = quote!(#name #generics); - - let impl_logos = |body| { - quote! { - impl<'s> #logos_path::Logos<'s> for #this { - type Error = #error_type; - - type Extras = #extras; - - type Source = #source; - - fn lex(lex: &mut #logos_path::Lexer<'s, Self>) { - #body - } - } - } - }; - - for id in regex_ids { - let fork = graph.fork_off(id); - - root.merge(fork, &mut graph); - } - for rope in ropes { - root.merge(rope.into_fork(&mut graph), &mut graph); - } - while let Some(id) = root.miss.take() { - let fork = graph.fork_off(id); - - if fork.branches().next().is_some() { - root.merge(fork, &mut graph); - } else { - break; - } - } - - debug!("Checking if any two tokens have the same priority"); - - for &DisambiguationError(a, b) in graph.errors() { - let a = graph[a].unwrap_leaf(); - let b = graph[b].unwrap_leaf(); - let disambiguate = a.priority + 1; - - let mut err = |a: &Leaf, b: &Leaf| { - parser.err( - format!( - "\ - A definition of variant `{a}` can match the same input as another definition of variant `{b}`.\n\ - \n\ - hint: Consider giving one definition a higher priority: \ - #[regex(..., priority = {disambiguate})]\ - ", - ), - a.span - ); - }; - - err(a, b); - err(b, a); - } - - if let Some(errors) = parser.errors.render() { - return impl_logos(errors); - } - - let root = graph.push(root); - - graph.shake(root); - - debug!("Generating code from graph:\n{graph:#?}"); - - let generator = Generator::new(name, &this, root, &graph); - - let body = generator.generate(); - impl_logos(quote! { - use #logos_path::internal::{LexerInternal, CallbackResult}; - - type Lexer<'s> = #logos_path::Lexer<'s, #this>; - - fn _end<'s>(lex: &mut Lexer<'s>) { - lex.end() - } - - fn _error<'s>(lex: &mut Lexer<'s>) { - lex.bump_unchecked(1); - - lex.error(); - } - - #body - }) -} - -/// Strip all logos attributes from the given struct, allowing it to be used in code without `logos-derive` present. -pub fn strip_attributes(input: TokenStream) -> TokenStream { - let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums"); - - strip_attrs_from_vec(&mut item.attrs); - - for attr in &mut item.attrs { - if let syn::Meta::List(meta) = &mut attr.meta { - if meta.path.is_ident("derive") { - let mut tokens = - std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter(); - - while let Some(TokenTree::Ident(ident)) = tokens.next() { - let punct = tokens.next(); - - if ident == "Logos" { - continue; - } - - meta.tokens.extend([TokenTree::Ident(ident)]); - meta.tokens.extend(punct); - } - } - } - } - - for variant in &mut item.variants { - strip_attrs_from_vec(&mut variant.attrs); - for field in &mut variant.fields { - strip_attrs_from_vec(&mut field.attrs); - } - } - - item.to_token_stream() -} - -fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) { - attrs.retain(|attr| !is_logos_attr(attr)) -} - -fn is_logos_attr(attr: &syn::Attribute) -> bool { - attr.path().is_ident(LOGOS_ATTR) - || attr.path().is_ident(TOKEN_ATTR) - || attr.path().is_ident(REGEX_ATTR) -} - -fn strip_wrapping_parens(t: TokenStream) -> TokenStream { - let tts: Vec<TokenTree> = t.into_iter().collect(); - - if tts.len() != 1 { - tts.into_iter().collect() - } else { - match tts.into_iter().next().unwrap() { - TokenTree::Group(g) => { - if g.delimiter() == Delimiter::Parenthesis { - g.stream() - } else { - core::iter::once(TokenTree::Group(g)).collect() - } - } - tt => core::iter::once(tt).collect(), - } - } -} diff --git a/vendor/logos-codegen/src/macros.rs b/vendor/logos-codegen/src/macros.rs deleted file mode 100644 index 148c6f6a..00000000 --- a/vendor/logos-codegen/src/macros.rs +++ /dev/null @@ -1,12 +0,0 @@ -#[cfg(feature = "debug")] -macro_rules! debug { - ($($arg:tt)*) => { - eprint!("[{}:{}:{}] ", file!(), line!(), column!()); - eprintln!($($arg)*) - } -} - -#[cfg(not(feature = "debug"))] -macro_rules! debug { - ($($arg:tt)*) => {}; -} diff --git a/vendor/logos-codegen/src/mir.rs b/vendor/logos-codegen/src/mir.rs deleted file mode 100644 index e07c5b78..00000000 --- a/vendor/logos-codegen/src/mir.rs +++ /dev/null @@ -1,235 +0,0 @@ -use std::convert::TryFrom; - -use lazy_static::lazy_static; -use regex_syntax::hir::{Dot, Hir, HirKind}; -use regex_syntax::ParserBuilder; - -pub use regex_syntax::hir::{Class, ClassUnicode, Literal}; - -use crate::error::{Error, Result}; - -lazy_static! { - static ref DOT_UTF8: Hir = Hir::dot(Dot::AnyChar); - static ref DOT_BYTES: Hir = Hir::dot(Dot::AnyByte); -} - -/// Middle Intermediate Representation of the regex, built from -/// `regex_syntax`'s `Hir`. The goal here is to strip and canonicalize -/// the tree, so that we don't have to do transformations later on the -/// graph, with the potential of running into looping references. -#[derive(Clone, Debug)] -pub enum Mir { - Empty, - Loop(Box<Mir>), - Maybe(Box<Mir>), - Concat(Vec<Mir>), - Alternation(Vec<Mir>), - Class(Class), - Literal(Literal), -} - -impl Mir { - pub fn utf8(source: &str) -> Result<Mir> { - Mir::try_from(ParserBuilder::new().build().parse(source)?) - } - - pub fn utf8_ignore_case(source: &str) -> Result<Mir> { - Mir::try_from( - ParserBuilder::new() - .case_insensitive(true) - .build() - .parse(source)?, - ) - } - - pub fn binary(source: &str) -> Result<Mir> { - Mir::try_from( - ParserBuilder::new() - .utf8(false) - .unicode(false) - .build() - .parse(source)?, - ) - } - - pub fn binary_ignore_case(source: &str) -> Result<Mir> { - Mir::try_from( - ParserBuilder::new() - .utf8(false) - .unicode(false) - .case_insensitive(true) - .build() - .parse(source)?, - ) - } - - pub fn priority(&self) -> usize { - match self { - Mir::Empty | Mir::Loop(_) | Mir::Maybe(_) => 0, - Mir::Concat(concat) => concat.iter().map(Mir::priority).sum(), - Mir::Alternation(alt) => alt.iter().map(Mir::priority).min().unwrap_or(0), - Mir::Class(_) => 2, - Mir::Literal(lit) => match std::str::from_utf8(&lit.0) { - Ok(s) => 2 * s.chars().count(), - Err(_) => 2 * lit.0.len(), - }, - } - } -} - -impl TryFrom<Hir> for Mir { - type Error = Error; - - fn try_from(hir: Hir) -> Result<Mir> { - match hir.into_kind() { - HirKind::Empty => Ok(Mir::Empty), - HirKind::Concat(concat) => { - let mut out = Vec::with_capacity(concat.len()); - - fn extend(mir: Mir, out: &mut Vec<Mir>) { - match mir { - Mir::Concat(nested) => { - for child in nested { - extend(child, out); - } - } - mir => out.push(mir), - } - } - - for hir in concat { - extend(Mir::try_from(hir)?, &mut out); - } - - Ok(Mir::Concat(out)) - } - HirKind::Alternation(alternation) => { - let alternation = alternation - .into_iter() - .map(Mir::try_from) - .collect::<Result<_>>()?; - - Ok(Mir::Alternation(alternation)) - } - HirKind::Literal(literal) => Ok(Mir::Literal(literal)), - HirKind::Class(class) => Ok(Mir::Class(class)), - HirKind::Repetition(repetition) => { - if !repetition.greedy { - return Err("#[regex]: non-greedy parsing is currently unsupported.".into()); - } - - let is_dot = if repetition.sub.properties().is_utf8() { - *repetition.sub == *DOT_UTF8 - } else { - *repetition.sub == *DOT_BYTES - }; - let mir = Mir::try_from(*repetition.sub)?; - - match (repetition.min, repetition.max) { - (0..=1, None) if is_dot => { - Err( - "#[regex]: \".+\" and \".*\" patterns will greedily consume \ - the entire source till the end as Logos does not allow \ - backtracking. If you are looking to match everything until \ - a specific character, you should use a negative character \ - class. E.g., use regex r\"'[^']*'\" to match anything in \ - between two quotes. Read more about that here: \ - https://github.com/maciejhirsz/logos/issues/302#issuecomment-1521342541." - .into() - ) - } - // 0 or 1 - (0, Some(1)) => Ok(Mir::Maybe(Box::new(mir))), - // 0 or more - (0, None) => Ok(Mir::Loop(Box::new(mir))), - // 1 or more - (1, None) => { - Ok(Mir::Concat(vec![mir.clone(), Mir::Loop(Box::new(mir))])) - } - // Exact {n} - (n, Some(m)) if m == n => { - let mut out = Vec::with_capacity(n as usize); - for _ in 0..n { - out.push(mir.clone()); - } - Ok(Mir::Concat(out)) - } - // At least {n,} - (n, None) => { - let mut out = Vec::with_capacity(n as usize); - for _ in 0..n { - out.push(mir.clone()); - } - out.push(Mir::Loop(Box::new(mir))); - Ok(Mir::Concat(out)) - } - // Bounded {n, m} - (n, Some(m)) => { - let mut out = Vec::with_capacity(m as usize); - for _ in 0..n { - out.push(mir.clone()); - } - for _ in n..m { - out.push(Mir::Maybe(Box::new(mir.clone()))); - } - Ok(Mir::Concat(out)) - } - } - } - HirKind::Capture(capture) => Mir::try_from(*capture.sub), - HirKind::Look(_) => { - Err("#[regex]: look-around assertions are currently unsupported.".into()) - } - } - } -} - -#[cfg(test)] -mod tests { - use super::Mir; - - #[test] - fn priorities() { - let regexes = [ - ("a", 2), - ("à", 2), - ("京", 2), - ("Eté", 6), - ("Été", 6), - ("[a-z]+", 2), - ("a|b", 2), - ("a|[b-z]", 2), - ("(foo)+", 6), - ("foobar", 12), - ("(fooz|bar)+qux", 12), - ]; - - for (regex, expected) in regexes.iter() { - let mir = Mir::utf8(regex).unwrap(); - assert_eq!(mir.priority(), *expected, "Failed for regex \"{}\"", regex); - } - } - - #[test] - fn equivalent_patterns() { - let regexes = [ - ("a|b", "[a-b]"), - ("1|2|3", "[1-3]"), - ("1+", "[1]+"), - ("c*", "[c]*"), - ("aaa", "a{3}"), - ("a[a]{2}", "a{3}"), - ]; - - for (regex_left, regex_right) in regexes.iter() { - let mir_left = Mir::utf8(regex_left).unwrap(); - let mir_right = Mir::utf8(regex_right).unwrap(); - assert_eq!( - mir_left.priority(), - mir_right.priority(), - "Regexes \"{regex_left}\" and \"{regex_right}\" \ - are equivalent but have different priorities" - ); - } - } -} diff --git a/vendor/logos-codegen/src/parser/definition.rs b/vendor/logos-codegen/src/parser/definition.rs deleted file mode 100644 index a876fb59..00000000 --- a/vendor/logos-codegen/src/parser/definition.rs +++ /dev/null @@ -1,193 +0,0 @@ -use proc_macro2::{Ident, Span}; -use syn::{spanned::Spanned, LitByteStr, LitStr}; - -use crate::error::{Errors, Result}; -use crate::leaf::Callback; -use crate::mir::Mir; -use crate::parser::nested::NestedValue; -use crate::parser::{IgnoreFlags, Parser, Subpatterns}; - -use super::ignore_flags::ascii_case::MakeAsciiCaseInsensitive; - -pub struct Definition { - pub literal: Literal, - pub priority: Option<usize>, - pub callback: Option<Callback>, - pub ignore_flags: IgnoreFlags, -} - -pub enum Literal { - Utf8(LitStr), - Bytes(LitByteStr), -} - -impl Definition { - pub fn new(literal: Literal) -> Self { - Definition { - literal, - priority: None, - callback: None, - ignore_flags: IgnoreFlags::Empty, - } - } - - pub fn named_attr(&mut self, name: Ident, value: NestedValue, parser: &mut Parser) { - match (name.to_string().as_str(), value) { - ("priority", NestedValue::Assign(tokens)) => { - let prio = match tokens.to_string().parse() { - Ok(prio) => prio, - Err(_) => { - parser.err("Expected an unsigned integer", tokens.span()); - return; - } - }; - - if self.priority.replace(prio).is_some() { - parser.err("Resetting previously set priority", tokens.span()); - } - } - ("priority", _) => { - parser.err("Expected: priority = <integer>", name.span()); - } - ("callback", NestedValue::Assign(tokens)) => { - let span = tokens.span(); - let callback = match parser.parse_callback(tokens) { - Some(callback) => callback, - None => { - parser.err("Not a valid callback", span); - return; - } - }; - - if let Some(previous) = self.callback.replace(callback) { - parser - .err( - "Callback has been already set", - span.join(name.span()).unwrap(), - ) - .err("Previous callback set here", previous.span()); - } - } - ("callback", _) => { - parser.err("Expected: callback = ...", name.span()); - } - ("ignore", NestedValue::Group(tokens)) => { - self.ignore_flags.parse_group(name, tokens, parser); - } - ("ignore", _) => { - parser.err("Expected: ignore(<flag>, ...)", name.span()); - } - (unknown, _) => { - parser.err( - format!( - "\ - Unknown nested attribute: {}\n\ - \n\ - Expected one of: priority, callback\ - ", - unknown - ), - name.span(), - ); - } - } - } -} - -impl Literal { - pub fn to_bytes(&self) -> Vec<u8> { - match self { - Literal::Utf8(string) => string.value().into_bytes(), - Literal::Bytes(bytes) => bytes.value(), - } - } - - pub fn escape_regex(&self) -> Literal { - match self { - Literal::Utf8(string) => Literal::Utf8(LitStr::new( - regex_syntax::escape(&string.value()).as_str(), - self.span(), - )), - Literal::Bytes(bytes) => Literal::Bytes(LitByteStr::new( - regex_syntax::escape(&bytes_to_regex_string(bytes.value())).as_bytes(), - self.span(), - )), - } - } - - pub fn to_mir( - &self, - subpatterns: &Subpatterns, - ignore_flags: IgnoreFlags, - errors: &mut Errors, - ) -> Result<Mir> { - let value = subpatterns.fix(self, errors); - - if ignore_flags.contains(IgnoreFlags::IgnoreAsciiCase) { - match self { - Literal::Utf8(_) => { - Mir::utf8(&value).map(MakeAsciiCaseInsensitive::make_ascii_case_insensitive) - } - Literal::Bytes(_) => Mir::binary_ignore_case(&value), - } - } else if ignore_flags.contains(IgnoreFlags::IgnoreCase) { - match self { - Literal::Utf8(_) => Mir::utf8_ignore_case(&value), - Literal::Bytes(_) => Mir::binary_ignore_case(&value), - } - } else { - match self { - Literal::Utf8(_) => Mir::utf8(&value), - Literal::Bytes(_) => Mir::binary(&value), - } - } - } - - pub fn span(&self) -> Span { - match self { - Literal::Utf8(string) => string.span(), - Literal::Bytes(bytes) => bytes.span(), - } - } -} - -impl syn::parse::Parse for Literal { - fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> { - let la = input.lookahead1(); - if la.peek(LitStr) { - Ok(Literal::Utf8(input.parse()?)) - } else if la.peek(LitByteStr) { - Ok(Literal::Bytes(input.parse()?)) - } else { - Err(la.error()) - } - } -} - -pub fn bytes_to_regex_string(bytes: Vec<u8>) -> String { - if bytes.is_ascii() { - unsafe { - // Unicode values are prohibited, so we can't use - // safe version of String::from_utf8 - // - // We can, however, construct a safe ASCII string - return String::from_utf8_unchecked(bytes); - } - } - - let mut string = String::with_capacity(bytes.len() * 2); - - for byte in bytes { - if byte < 0x80 { - string.push(byte as char); - } else { - static DIGITS: [u8; 16] = *b"0123456789abcdef"; - - string.push_str(r"\x"); - string.push(DIGITS[(byte / 16) as usize] as char); - string.push(DIGITS[(byte % 16) as usize] as char); - } - } - - string -} diff --git a/vendor/logos-codegen/src/parser/ignore_flags.rs b/vendor/logos-codegen/src/parser/ignore_flags.rs deleted file mode 100644 index 3a79d31b..00000000 --- a/vendor/logos-codegen/src/parser/ignore_flags.rs +++ /dev/null @@ -1,499 +0,0 @@ -use std::ops::{BitAnd, BitOr}; - -use proc_macro2::{Ident, TokenStream, TokenTree}; - -use crate::parser::Parser; -use crate::util::is_punct; - -#[derive(Clone, Copy, PartialEq, Eq)] -pub struct IgnoreFlags { - bits: u8, -} - -#[allow(non_upper_case_globals)] -impl IgnoreFlags { - pub const Empty: Self = Self::new(0x00); - pub const IgnoreCase: Self = Self::new(0x01); - pub const IgnoreAsciiCase: Self = Self::new(0x02); - - #[inline] - pub const fn new(bits: u8) -> Self { - Self { bits } - } - - /// Enables a variant. - #[inline] - pub fn enable(&mut self, variant: Self) { - self.bits |= variant.bits; - } - - /// Checks if this `IgnoreFlags` contains *any* of the given variants. - #[inline] - pub fn contains(&self, variants: Self) -> bool { - self.bits & variants.bits != 0 - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.bits == 0 - } - - /// Parses an identifier an enables it for `self`. - /// - /// Valid inputs are (that produces `true`): - /// * `"case"` (incompatible with `"ascii_case"`) - /// * `"ascii_case"` (incompatible with `"case"`) - /// - /// An error causes this function to return `false` and emits an error to - /// the given `Parser`. - fn parse_ident(&mut self, ident: Ident, parser: &mut Parser) -> bool { - match ident.to_string().as_str() { - "case" => { - if self.contains(Self::IgnoreAsciiCase) { - parser.err( - "\ - The flag \"case\" cannot be used along with \"ascii_case\"\ - ", - ident.span(), - ); - false - } else { - self.enable(Self::IgnoreCase); - true - } - } - "ascii_case" => { - if self.contains(Self::IgnoreCase) { - parser.err( - "\ - The flag \"ascii_case\" cannot be used along with \"case\"\ - ", - ident.span(), - ); - false - } else { - self.enable(Self::IgnoreAsciiCase); - true - } - } - unknown => { - parser.err( - format!( - "\ - Unknown flag: {}\n\ - \n\ - Expected one of: case, ascii_case\ - ", - unknown - ), - ident.span(), - ); - false - } - } - } - - pub fn parse_group(&mut self, name: Ident, tokens: TokenStream, parser: &mut Parser) { - // Little finite state machine to parse "<flag>(,<flag>)*,?" - - // FSM description for future maintenance - // 0: Initial state - // <flag> -> 1 - // _ -> error - // 1: A flag was found - // , -> 2 - // None -> done - // _ -> error - // 2: A comma was found (after a <flag>) - // <flag> -> 1 - // None -> done - // _ -> error - let mut state = 0u8; - - let mut tokens = tokens.into_iter(); - - loop { - state = match state { - 0 => match tokens.next() { - Some(TokenTree::Ident(ident)) => { - if self.parse_ident(ident, parser) { - 1 - } else { - return; - } - } - _ => { - parser.err( - "\ - Invalid ignore flag\n\ - \n\ - Expected one of: case, ascii_case\ - ", - name.span(), - ); - return; - } - }, - 1 => match tokens.next() { - Some(tt) if is_punct(&tt, ',') => 2, - None => return, - Some(unexpected_tt) => { - parser.err( - format!( - "\ - Unexpected token: {:?}\ - ", - unexpected_tt.to_string(), - ), - unexpected_tt.span(), - ); - return; - } - }, - 2 => match tokens.next() { - Some(TokenTree::Ident(ident)) => { - if self.parse_ident(ident, parser) { - 1 - } else { - return; - } - } - None => return, - Some(unexpected_tt) => { - parser.err( - format!( - "\ - Unexpected token: {:?}\ - ", - unexpected_tt.to_string(), - ), - unexpected_tt.span(), - ); - return; - } - }, - _ => unreachable!("Internal Error: invalid state ({})", state), - } - } - } -} - -impl BitOr for IgnoreFlags { - type Output = Self; - - fn bitor(self, other: Self) -> Self { - Self::new(self.bits | other.bits) - } -} - -impl BitAnd for IgnoreFlags { - type Output = Self; - - fn bitand(self, other: Self) -> Self { - Self::new(self.bits & other.bits) - } -} - -pub mod ascii_case { - use regex_syntax::hir; - - use crate::mir::Mir; - use crate::parser::Literal; - - macro_rules! literal { - ($byte:expr) => { - hir::Literal(Box::new([$byte])) - }; - (@char $c:expr) => { - hir::Literal( - $c.encode_utf8(&mut [0; 4]) - .as_bytes() - .to_vec() - .into_boxed_slice(), - ) - }; - } - - pub trait MakeAsciiCaseInsensitive { - /// Creates a equivalent regular expression which ignore the letter casing - /// of ascii characters. - fn make_ascii_case_insensitive(self) -> Mir; - } - - impl MakeAsciiCaseInsensitive for u8 { - fn make_ascii_case_insensitive(self) -> Mir { - if self.is_ascii_lowercase() { - Mir::Alternation(vec![ - Mir::Literal(literal!(self - 32)), - Mir::Literal(literal!(self)), - ]) - } else if self.is_ascii_uppercase() { - Mir::Alternation(vec![ - Mir::Literal(literal!(self)), - Mir::Literal(literal!(self + 32)), - ]) - } else { - Mir::Literal(literal!(self)) - } - } - } - - impl MakeAsciiCaseInsensitive for char { - fn make_ascii_case_insensitive(self) -> Mir { - if self.is_ascii() { - (self as u8).make_ascii_case_insensitive() - } else { - Mir::Literal(literal!(@char self)) - } - } - } - - impl MakeAsciiCaseInsensitive for hir::Literal { - fn make_ascii_case_insensitive(self) -> Mir { - Mir::Concat( - self.0 - .iter() - .map(|x| x.make_ascii_case_insensitive()) - .collect(), - ) - } - } - - impl MakeAsciiCaseInsensitive for hir::ClassBytes { - fn make_ascii_case_insensitive(mut self) -> Mir { - self.case_fold_simple(); - Mir::Class(hir::Class::Bytes(self)) - } - } - - impl MakeAsciiCaseInsensitive for hir::ClassUnicode { - fn make_ascii_case_insensitive(mut self) -> Mir { - use std::cmp; - - // Manuall implementation to only perform the case folding on ascii characters. - - let mut ranges = Vec::new(); - - for range in self.ranges() { - #[inline] - fn overlaps(st1: u8, end1: u8, st2: u8, end2: u8) -> bool { - (st2 <= st1 && st1 <= end2) || (st1 <= st2 && st2 <= end1) - } - - #[inline] - fn make_ascii(c: char) -> Option<u8> { - if c.is_ascii() { - Some(c as u8) - } else { - None - } - } - - match (make_ascii(range.start()), make_ascii(range.end())) { - (Some(start), Some(end)) => { - if overlaps(b'a', b'z', start, end) { - let lower = cmp::max(start, b'a'); - let upper = cmp::min(end, b'z'); - ranges.push(hir::ClassUnicodeRange::new( - (lower - 32) as char, - (upper - 32) as char, - )) - } - - if overlaps(b'A', b'Z', start, end) { - let lower = cmp::max(start, b'A'); - let upper = cmp::min(end, b'Z'); - ranges.push(hir::ClassUnicodeRange::new( - (lower + 32) as char, - (upper + 32) as char, - )) - } - } - (Some(start), None) => { - if overlaps(b'a', b'z', start, b'z') { - let lower = cmp::max(start, b'a'); - ranges.push(hir::ClassUnicodeRange::new((lower - 32) as char, 'Z')) - } - - if overlaps(b'A', b'Z', start, b'Z') { - let lower = cmp::max(start, b'A'); - ranges.push(hir::ClassUnicodeRange::new((lower + 32) as char, 'Z')) - } - } - _ => (), - } - } - - self.union(&hir::ClassUnicode::new(ranges)); - - Mir::Class(hir::Class::Unicode(self)) - } - } - - impl MakeAsciiCaseInsensitive for hir::Class { - fn make_ascii_case_insensitive(self) -> Mir { - match self { - hir::Class::Bytes(b) => b.make_ascii_case_insensitive(), - hir::Class::Unicode(u) => u.make_ascii_case_insensitive(), - } - } - } - - impl MakeAsciiCaseInsensitive for &Literal { - fn make_ascii_case_insensitive(self) -> Mir { - match self { - Literal::Bytes(bytes) => Mir::Concat( - bytes - .value() - .into_iter() - .map(|b| b.make_ascii_case_insensitive()) - .collect(), - ), - Literal::Utf8(s) => Mir::Concat( - s.value() - .chars() - .map(|b| b.make_ascii_case_insensitive()) - .collect(), - ), - } - } - } - - impl MakeAsciiCaseInsensitive for Mir { - fn make_ascii_case_insensitive(self) -> Mir { - match self { - Mir::Empty => Mir::Empty, - Mir::Loop(l) => Mir::Loop(Box::new(l.make_ascii_case_insensitive())), - Mir::Maybe(m) => Mir::Maybe(Box::new(m.make_ascii_case_insensitive())), - Mir::Concat(c) => Mir::Concat( - c.into_iter() - .map(|m| m.make_ascii_case_insensitive()) - .collect(), - ), - Mir::Alternation(a) => Mir::Alternation( - a.into_iter() - .map(|m| m.make_ascii_case_insensitive()) - .collect(), - ), - Mir::Class(c) => c.make_ascii_case_insensitive(), - Mir::Literal(l) => l.make_ascii_case_insensitive(), - } - } - } - - #[cfg(test)] - mod tests { - use super::MakeAsciiCaseInsensitive; - use crate::mir::{Class, Mir}; - use regex_syntax::hir::{ClassUnicode, ClassUnicodeRange}; - - fn assert_range(in_s: char, in_e: char, expected: &[(char, char)]) { - let range = ClassUnicodeRange::new(in_s, in_e); - let class = ClassUnicode::new(vec![range]); - - let expected = - ClassUnicode::new(expected.iter().map(|&(a, b)| ClassUnicodeRange::new(a, b))); - - if let Mir::Class(Class::Unicode(result)) = class.make_ascii_case_insensitive() { - assert_eq!(result, expected); - } else { - panic!("Not a unicode class"); - }; - } - - #[test] - fn no_letters_left() { - assert_range(' ', '+', &[(' ', '+')]); - } - - #[test] - fn no_letters_right() { - assert_range('{', '~', &[('{', '~')]); - } - - #[test] - fn no_letters_middle() { - assert_range('[', '`', &[('[', '`')]); - } - - #[test] - fn lowercase_left_edge() { - assert_range('a', 'd', &[('a', 'd'), ('A', 'D')]); - } - - #[test] - fn lowercase_right_edge() { - assert_range('r', 'z', &[('r', 'z'), ('R', 'Z')]); - } - - #[test] - fn lowercase_total() { - assert_range('a', 'z', &[('a', 'z'), ('A', 'Z')]); - } - - #[test] - fn uppercase_left_edge() { - assert_range('A', 'D', &[('a', 'd'), ('A', 'D')]); - } - - #[test] - fn uppercase_right_edge() { - assert_range('R', 'Z', &[('r', 'z'), ('R', 'Z')]); - } - - #[test] - fn uppercase_total() { - assert_range('A', 'Z', &[('a', 'z'), ('A', 'Z')]); - } - - #[test] - fn lowercase_cross_left() { - assert_range('[', 'h', &[('[', 'h'), ('A', 'H')]); - } - - #[test] - fn lowercase_cross_right() { - assert_range('d', '}', &[('d', '}'), ('D', 'Z')]); - } - - #[test] - fn uppercase_cross_left() { - assert_range(';', 'H', &[(';', 'H'), ('a', 'h')]); - } - - #[test] - fn uppercase_cross_right() { - assert_range('T', ']', &[('t', 'z'), ('T', ']')]); - } - - #[test] - fn cross_both() { - assert_range('X', 'c', &[('X', 'c'), ('x', 'z'), ('A', 'C')]); - } - - #[test] - fn all_letters() { - assert_range('+', '|', &[('+', '|')]); - } - - #[test] - fn oob_all_letters() { - assert_range('#', 'é', &[('#', 'é')]); - } - - #[test] - fn oob_from_uppercase() { - assert_range('Q', 'é', &[('A', 'é')]); - } - - #[test] - fn oob_from_lowercase() { - assert_range('q', 'é', &[('q', 'é'), ('Q', 'Z')]); - } - - #[test] - fn oob_no_letters() { - assert_range('|', 'é', &[('|', 'é')]); - } - } -} diff --git a/vendor/logos-codegen/src/parser/mod.rs b/vendor/logos-codegen/src/parser/mod.rs deleted file mode 100644 index 3ad7202e..00000000 --- a/vendor/logos-codegen/src/parser/mod.rs +++ /dev/null @@ -1,331 +0,0 @@ -use beef::lean::Cow; -use proc_macro2::{Span, TokenStream, TokenTree}; -use quote::quote; -use syn::spanned::Spanned; -use syn::{Attribute, GenericParam, Lit, Meta, Type}; - -use crate::error::Errors; -use crate::leaf::{Callback, InlineCallback}; -use crate::util::{expect_punct, MaybeVoid}; -use crate::LOGOS_ATTR; - -mod definition; -mod ignore_flags; -mod nested; -mod subpattern; -mod type_params; - -pub use self::definition::{Definition, Literal}; -pub use self::ignore_flags::IgnoreFlags; -use self::nested::{AttributeParser, Nested, NestedValue}; -pub use self::subpattern::Subpatterns; -use self::type_params::{replace_lifetime, traverse_type, TypeParams}; - -#[derive(Default)] -pub struct Parser { - pub errors: Errors, - pub mode: Mode, - pub source: Option<TokenStream>, - pub skips: Vec<Literal>, - pub extras: MaybeVoid, - pub error_type: MaybeVoid, - pub subpatterns: Subpatterns, - pub logos_path: Option<TokenStream>, - types: TypeParams, -} - -#[derive(Default)] -pub enum Mode { - #[default] - Utf8, - Binary, -} - -impl Parser { - pub fn parse_generic(&mut self, param: GenericParam) { - match param { - GenericParam::Lifetime(lt) => { - self.types.explicit_lifetime(lt, &mut self.errors); - } - GenericParam::Type(ty) => { - self.types.add(ty.ident); - } - GenericParam::Const(c) => { - self.err("Logos doesn't support const generics.", c.span()); - } - } - } - - pub fn generics(&mut self) -> Option<TokenStream> { - self.types.generics(&mut self.errors) - } - - fn parse_attr(&mut self, attr: &mut Attribute) -> Option<AttributeParser> { - match &mut attr.meta { - Meta::List(list) => { - let tokens = std::mem::replace(&mut list.tokens, TokenStream::new()); - - Some(AttributeParser::new(tokens)) - } - _ => None, - } - } - - /// Try to parse the main `#[logos(...)]`, does nothing if - /// the attribute's name isn't `logos`. - pub fn try_parse_logos(&mut self, attr: &mut Attribute) { - if !attr.path().is_ident(LOGOS_ATTR) { - return; - } - - let nested = match self.parse_attr(attr) { - Some(tokens) => tokens, - None => { - self.err("Expected #[logos(...)]", attr.span()); - return; - } - }; - - for nested in nested { - let (name, value) = match nested { - Nested::Named(name, value) => (name, value), - Nested::Unexpected(tokens) | Nested::Unnamed(tokens) => { - self.err("Invalid nested attribute", tokens.span()); - continue; - } - }; - - // IMPORTANT: Keep these sorted alphabetically for binary search down the line - #[allow(clippy::type_complexity)] - static NESTED_LOOKUP: &[(&str, fn(&mut Parser, Span, NestedValue))] = &[ - ("crate", |parser, span, value| match value { - NestedValue::Assign(logos_path) => parser.logos_path = Some(logos_path), - _ => { - parser.err("Expected: #[logos(crate = path::to::logos)]", span); - } - }), - ("error", |parser, span, value| match value { - NestedValue::Assign(value) => { - let span = value.span(); - - if let MaybeVoid::Some(previous) = parser.error_type.replace(value) { - parser - .err("Error type can be defined only once", span) - .err("Previous definition here", previous.span()); - } - } - _ => { - parser.err("Expected: #[logos(error = SomeType)]", span); - } - }), - ("extras", |parser, span, value| match value { - NestedValue::Assign(value) => { - let span = value.span(); - - if let MaybeVoid::Some(previous) = parser.extras.replace(value) { - parser - .err("Extras can be defined only once", span) - .err("Previous definition here", previous.span()); - } - } - _ => { - parser.err("Expected: #[logos(extras = SomeType)]", span); - } - }), - ("skip", |parser, span, value| match value { - NestedValue::Literal(lit) => { - if let Some(literal) = parser.parse_literal(Lit::new(lit)) { - parser.skips.push(literal); - } - } - _ => { - parser.err("Expected: #[logos(skip \"regex literal\")]", span); - } - }), - ("source", |parser, span, value| match value { - NestedValue::Assign(value) => { - let span = value.span(); - if let Some(previous) = parser.source.replace(value) { - parser - .err("Source can be defined only once", span) - .err("Previous definition here", previous.span()); - } - } - _ => { - parser.err("Expected: #[logos(source = SomeType)]", span); - } - }), - ("subpattern", |parser, span, value| match value { - NestedValue::KeywordAssign(name, value) => { - parser.subpatterns.add(name, value, &mut parser.errors); - } - _ => { - parser.err(r#"Expected: #[logos(subpattern name = r"regex")]"#, span); - } - }), - ("type", |parser, span, value| match value { - NestedValue::KeywordAssign(generic, ty) => { - parser.types.set(generic, ty, &mut parser.errors); - } - _ => { - parser.err("Expected: #[logos(type T = SomeType)]", span); - } - }), - ]; - - match NESTED_LOOKUP.binary_search_by_key(&name.to_string().as_str(), |(n, _)| n) { - Ok(idx) => NESTED_LOOKUP[idx].1(self, name.span(), value), - Err(_) => { - let mut err = format!( - "Unknown nested attribute #[logos({name})], expected one of: {}", - NESTED_LOOKUP[0].0 - ); - - for (allowed, _) in &NESTED_LOOKUP[1..] { - err.push_str(", "); - err.push_str(allowed); - } - - self.err(err, name.span()); - } - } - } - } - - pub fn parse_literal(&mut self, lit: Lit) -> Option<Literal> { - match lit { - Lit::Str(string) => Some(Literal::Utf8(string)), - Lit::ByteStr(bytes) => { - self.mode = Mode::Binary; - - Some(Literal::Bytes(bytes)) - } - _ => { - self.err("Expected a &str or &[u8] slice", lit.span()); - - None - } - } - } - - /// Parse attribute definition of a token: - /// - /// + `#[token(literal[, callback])]` - /// + `#[regex(literal[, callback])]` - pub fn parse_definition(&mut self, attr: &mut Attribute) -> Option<Definition> { - let mut nested = self.parse_attr(attr)?; - - let literal = match nested.parsed::<Lit>()? { - Ok(lit) => self.parse_literal(lit)?, - Err(err) => { - self.err(err.to_string(), err.span()); - - return None; - } - }; - - let mut def = Definition::new(literal); - - for (position, next) in nested.enumerate() { - match next { - Nested::Unexpected(tokens) => { - self.err("Unexpected token in attribute", tokens.span()); - } - Nested::Unnamed(tokens) => match position { - 0 => def.callback = self.parse_callback(tokens), - _ => { - self.err( - "\ - Expected a named argument at this position\n\ - \n\ - hint: If you are trying to define a callback here use: callback = ...\ - ", - tokens.span(), - ); - } - }, - Nested::Named(name, value) => { - def.named_attr(name, value, self); - } - } - } - - Some(def) - } - - fn parse_callback(&mut self, tokens: TokenStream) -> Option<Callback> { - let span = tokens.span(); - let mut tokens = tokens.into_iter(); - - if let Some(tt) = expect_punct(tokens.next(), '|') { - let mut label = TokenStream::from(tt); - - label.extend(tokens); - - return Some(Callback::Label(label)); - } - - let first = tokens.next(); - let error = expect_punct(tokens.next(), '|'); - - let arg = match (error, first) { - (None, Some(TokenTree::Ident(arg))) => arg, - _ => { - self.err( - "Inline callbacks must use closure syntax with exactly one parameter", - span, - ); - return None; - } - }; - - let body = match tokens.next() { - Some(TokenTree::Group(group)) => group.stream(), - Some(first) => { - let mut body = TokenStream::from(first); - - body.extend(tokens); - body - } - None => { - self.err("Callback missing a body", span); - return None; - } - }; - - let inline = InlineCallback { arg, body, span }; - - Some(inline.into()) - } - - /// Checks if `ty` is a declared generic param, if so replaces it - /// with a concrete type defined using #[logos(type T = Type)] - /// - /// If no matching generic param is found, all lifetimes are fixed - /// to the source lifetime - pub fn get_type(&self, ty: &mut Type) -> TokenStream { - traverse_type(ty, &mut |ty| { - if let Type::Path(tp) = ty { - // Skip types that begin with `self::` - if tp.qself.is_none() { - // If `ty` is a generic type parameter, try to find - // its concrete type defined with #[logos(type T = Type)] - if let Some(substitute) = self.types.find(&tp.path) { - *ty = substitute; - } - } - } - // If `ty` is a concrete type, fix its lifetimes to 'source - replace_lifetime(ty); - }); - - quote!(#ty) - } - - pub fn err<M>(&mut self, message: M, span: Span) -> &mut Errors - where - M: Into<Cow<'static, str>>, - { - self.errors.err(message, span) - } -} diff --git a/vendor/logos-codegen/src/parser/nested.rs b/vendor/logos-codegen/src/parser/nested.rs deleted file mode 100644 index 44ecaeac..00000000 --- a/vendor/logos-codegen/src/parser/nested.rs +++ /dev/null @@ -1,146 +0,0 @@ -use proc_macro2::token_stream::IntoIter as TokenIter; -use proc_macro2::{Ident, Literal, TokenStream, TokenTree}; -use quote::quote; - -use crate::util::{expect_punct, is_punct}; - -pub enum NestedValue { - /// `name = ...` - Assign(TokenStream), - /// `name "literal"` - Literal(Literal), - /// `name(...)` - Group(TokenStream), - /// `name ident = ...` - KeywordAssign(Ident, TokenStream), -} - -pub enum Nested { - /// Unnamed nested attribute, such as a string, - /// callback closure, or a lone ident/path - /// - /// Note: a lone ident will be Named with no value instead - Unnamed(TokenStream), - /// Named: name ... - Named(Ident, NestedValue), - /// Unexpected token, - Unexpected(TokenStream), -} - -pub struct AttributeParser { - inner: TokenIter, -} - -pub struct Empty; - -impl From<Empty> for TokenStream { - fn from(_: Empty) -> TokenStream { - TokenStream::new() - } -} - -impl AttributeParser { - pub fn new(stream: TokenStream) -> Self { - AttributeParser { - inner: stream.into_iter(), - } - } - - pub fn parsed<T>(&mut self) -> Option<syn::Result<T>> - where - T: syn::parse::Parse, - { - let tokens = self.collect_tail(TokenStream::new()); - - if tokens.is_empty() { - return None; - } - - Some(syn::parse2(tokens)) - } - - fn next_tt(&mut self) -> Option<TokenTree> { - expect_punct(self.inner.next(), ',') - } - - fn collect_tail<T>(&mut self, first: T) -> TokenStream - where - T: Into<TokenStream>, - { - let mut out = first.into(); - - while let Some(tt) = self.next_tt() { - out.extend(Some(tt)); - } - - out - } - - fn parse_unnamed(&mut self, first: Ident, next: TokenTree) -> Nested { - let mut out = TokenStream::from(TokenTree::Ident(first)); - - out.extend(self.collect_tail(next)); - - Nested::Unnamed(out.into_iter().collect()) - } - - fn parse_assign(&mut self, name: Ident) -> Nested { - let value = self.collect_tail(Empty); - - Nested::Named(name, NestedValue::Assign(value)) - } - - fn parse_literal(&mut self, name: Ident, lit: Literal) -> Nested { - // TODO: Error if there are any tokens following - let _ = self.collect_tail(Empty); - - Nested::Named(name, NestedValue::Literal(lit)) - } - - fn parse_group(&mut self, name: Ident, group: TokenStream) -> Nested { - Nested::Named(name, NestedValue::Group(group)) - } - - fn parse_keyword(&mut self, keyword: Ident, name: Ident) -> Nested { - let error = expect_punct(self.next_tt(), '='); - - match error { - Some(error) => { - let error = self.collect_tail(error); - - Nested::Unexpected(error) - } - None => { - let value = self.collect_tail(Empty); - - Nested::Named(keyword, NestedValue::KeywordAssign(name, value)) - } - } - } -} - -impl Iterator for AttributeParser { - type Item = Nested; - - fn next(&mut self) -> Option<Nested> { - let first = self.inner.next()?; - - let name = match first { - TokenTree::Ident(ident) => ident, - tt => { - let stream = self.collect_tail(tt); - - return Some(Nested::Unnamed(stream.into_iter().collect())); - } - }; - - match self.next_tt() { - Some(tt) if is_punct(&tt, '=') => Some(self.parse_assign(name)), - Some(TokenTree::Literal(lit)) => Some(self.parse_literal(name, lit)), - Some(TokenTree::Group(group)) => Some(self.parse_group(name, group.stream())), - Some(TokenTree::Ident(next)) => Some(self.parse_keyword(name, next)), - Some(next) => Some(self.parse_unnamed(name, next)), - None => Some(Nested::Unnamed(quote!(#name))), - } - } -} diff --git a/vendor/logos-codegen/src/parser/subpattern.rs b/vendor/logos-codegen/src/parser/subpattern.rs deleted file mode 100644 index eb620028..00000000 --- a/vendor/logos-codegen/src/parser/subpattern.rs +++ /dev/null @@ -1,97 +0,0 @@ -use proc_macro2::TokenStream; -use syn::Ident; - -use crate::error::Errors; -use crate::mir::Mir; -use crate::parser::definition::{bytes_to_regex_string, Literal}; - -#[derive(Default)] -pub struct Subpatterns { - map: Vec<(Ident, String)>, -} - -impl Subpatterns { - pub fn add(&mut self, param: Ident, pattern: TokenStream, errors: &mut Errors) { - let lit = match syn::parse2::<Literal>(pattern) { - Ok(lit) => lit, - Err(e) => { - errors.err(e.to_string(), e.span()); - return; - } - }; - - if let Some((name, _)) = self.map.iter().find(|(name, _)| *name == param) { - errors - .err(format!("{} can only be assigned once", param), param.span()) - .err("Previously assigned here", name.span()); - return; - } - - let fixed = self.fix(&lit, errors); - - // Validate the literal as proper regex. If it's not, emit an error. - let mir = match &lit { - Literal::Utf8(_) => Mir::utf8(&fixed), - Literal::Bytes(_) => Mir::binary(&fixed), - }; - - if let Err(err) = mir { - errors.err(err, lit.span()); - }; - - self.map.push((param, fixed)); - } - - pub fn fix(&self, lit: &Literal, errors: &mut Errors) -> String { - let mut i = 0; - let mut pattern = match lit { - Literal::Utf8(s) => s.value(), - Literal::Bytes(b) => bytes_to_regex_string(b.value()), - }; - - while let Some(f) = pattern[i..].find("(?&") { - i += f; - pattern.replace_range(i..i + 3, "(?:"); - i += 3; - - let subref_end = if let Some(f) = pattern[i..].find(')') { - i + f - } else { - pattern.truncate(i); // truncate so latter error doesn't suppress - break; // regex-syntax will report the unclosed group - }; - - let name = &pattern[i..subref_end]; - let name = match syn::parse_str::<Ident>(name) { - Ok(name) => name, - Err(_) => { - errors.err( - format!("subpattern reference `{}` is not an identifier", name), - lit.span(), - ); - // we emitted the error; make something up and continue - pattern.replace_range(i..subref_end, "_"); - i += 2; - continue; - } - }; - - match self.map.iter().find(|(def, _)| *def == name) { - Some((_, subpattern)) => { - pattern.replace_range(i..subref_end, subpattern); - i += subpattern.len() + 1; - } - None => { - errors.err( - format!("subpattern reference `{}` has not been defined", name), - lit.span(), - ); - // leaving `(?:name)` is fine - i = subref_end + 1; - } - } - } - - pattern - } -} diff --git a/vendor/logos-codegen/src/parser/type_params.rs b/vendor/logos-codegen/src/parser/type_params.rs deleted file mode 100644 index 1be4948e..00000000 --- a/vendor/logos-codegen/src/parser/type_params.rs +++ /dev/null @@ -1,200 +0,0 @@ -use proc_macro2::{Ident, Span, TokenStream}; -use quote::quote; -use syn::spanned::Spanned; -use syn::{Lifetime, LifetimeParam, Path, Type}; - -use crate::error::Errors; - -#[derive(Default)] -pub struct TypeParams { - lifetime: bool, - type_params: Vec<(Ident, Option<Type>)>, -} - -impl TypeParams { - pub fn explicit_lifetime(&mut self, lt: LifetimeParam, errors: &mut Errors) { - if self.lifetime { - let span = lt.span(); - - errors.err("Logos types can only have one lifetime can be set", span); - } - - self.lifetime = true; - } - - pub fn add(&mut self, param: Ident) { - self.type_params.push((param, None)); - } - - pub fn set(&mut self, param: Ident, ty: TokenStream, errors: &mut Errors) { - let ty = match syn::parse2::<Type>(ty) { - Ok(mut ty) => { - replace_lifetimes(&mut ty); - ty - } - Err(err) => { - errors.err(err.to_string(), err.span()); - return; - } - }; - - match self.type_params.iter_mut().find(|(name, _)| *name == param) { - Some((_, slot)) => { - if let Some(previous) = slot.replace(ty) { - errors - .err( - format!("{} can only have one type assigned to it", param), - param.span(), - ) - .err("Previously assigned here", previous.span()); - } - } - None => { - errors.err( - format!("{} is not a declared type parameter", param), - param.span(), - ); - } - } - } - - pub fn find(&self, path: &Path) -> Option<Type> { - for (ident, ty) in &self.type_params { - if path.is_ident(ident) { - return ty.clone(); - } - } - - None - } - - pub fn generics(&self, errors: &mut Errors) -> Option<TokenStream> { - if !self.lifetime && self.type_params.is_empty() { - return None; - } - - let mut generics = Vec::new(); - - if self.lifetime { - generics.push(quote!('s)); - } - - for (ty, replace) in self.type_params.iter() { - match replace { - Some(ty) => generics.push(quote!(#ty)), - None => { - errors.err( - format!( - "Generic type parameter without a concrete type\n\ - \n\ - Define a concrete type Logos can use: #[logos(type {} = Type)]", - ty, - ), - ty.span(), - ); - } - } - } - - if generics.is_empty() { - None - } else { - Some(quote!(<#(#generics),*>)) - } - } -} - -pub fn replace_lifetimes(ty: &mut Type) { - traverse_type(ty, &mut replace_lifetime) -} - -pub fn replace_lifetime(ty: &mut Type) { - use syn::{GenericArgument, PathArguments}; - - match ty { - Type::Path(p) => { - p.path - .segments - .iter_mut() - .filter_map(|segment| match &mut segment.arguments { - PathArguments::AngleBracketed(ab) => Some(ab), - _ => None, - }) - .flat_map(|ab| ab.args.iter_mut()) - .for_each(|arg| { - if let GenericArgument::Lifetime(lt) = arg { - *lt = Lifetime::new("'s", lt.span()); - } - }); - } - Type::Reference(r) => { - let span = match r.lifetime.take() { - Some(lt) => lt.span(), - None => Span::call_site(), - }; - - r.lifetime = Some(Lifetime::new("'s", span)); - } - _ => (), - } -} - -pub fn traverse_type(ty: &mut Type, f: &mut impl FnMut(&mut Type)) { - f(ty); - match ty { - Type::Array(array) => traverse_type(&mut array.elem, f), - Type::BareFn(bare_fn) => { - for input in &mut bare_fn.inputs { - traverse_type(&mut input.ty, f); - } - if let syn::ReturnType::Type(_, ty) = &mut bare_fn.output { - traverse_type(ty, f); - } - } - Type::Group(group) => traverse_type(&mut group.elem, f), - Type::Paren(paren) => traverse_type(&mut paren.elem, f), - Type::Path(path) => traverse_path(&mut path.path, f), - Type::Ptr(p) => traverse_type(&mut p.elem, f), - Type::Reference(r) => traverse_type(&mut r.elem, f), - Type::Slice(slice) => traverse_type(&mut slice.elem, f), - Type::TraitObject(object) => object.bounds.iter_mut().for_each(|bound| { - if let syn::TypeParamBound::Trait(trait_bound) = bound { - traverse_path(&mut trait_bound.path, f); - } - }), - Type::Tuple(tuple) => tuple - .elems - .iter_mut() - .for_each(|elem| traverse_type(elem, f)), - _ => (), - } -} - -fn traverse_path(path: &mut Path, f: &mut impl FnMut(&mut Type)) { - for segment in &mut path.segments { - match &mut segment.arguments { - syn::PathArguments::None => (), - syn::PathArguments::AngleBracketed(args) => { - for arg in &mut args.args { - match arg { - syn::GenericArgument::Type(ty) => { - traverse_type(ty, f); - } - syn::GenericArgument::AssocType(assoc) => { - traverse_type(&mut assoc.ty, f); - } - _ => (), - } - } - } - syn::PathArguments::Parenthesized(args) => { - for arg in &mut args.inputs { - traverse_type(arg, f); - } - if let syn::ReturnType::Type(_, ty) = &mut args.output { - traverse_type(ty, f); - } - } - } - } -} diff --git a/vendor/logos-codegen/src/util.rs b/vendor/logos-codegen/src/util.rs deleted file mode 100644 index 156de035..00000000 --- a/vendor/logos-codegen/src/util.rs +++ /dev/null @@ -1,64 +0,0 @@ -use proc_macro2::{Spacing, Span, TokenStream, TokenTree}; -use quote::{quote, ToTokens}; -use syn::Ident; - -/// Analog to Option<TokenStream>, except when put into the quote! -/// macro, `MaybeVoid::Void` will produce `()` -#[derive(Clone, Default)] -pub enum MaybeVoid { - Some(TokenStream), - #[default] - Void, -} - -impl MaybeVoid { - pub fn replace(&mut self, stream: TokenStream) -> MaybeVoid { - std::mem::replace(self, MaybeVoid::Some(stream)) - } - - pub fn take(&mut self) -> MaybeVoid { - std::mem::replace(self, MaybeVoid::Void) - } -} - -impl ToTokens for MaybeVoid { - fn to_tokens(&self, out: &mut TokenStream) { - match self { - MaybeVoid::Some(stream) => out.extend(stream.clone()), - MaybeVoid::Void => out.extend(quote!(())), - } - } - - fn to_token_stream(&self) -> TokenStream { - match self { - MaybeVoid::Some(stream) => stream.clone(), - MaybeVoid::Void => quote!(()), - } - } - - fn into_token_stream(self) -> TokenStream { - match self { - MaybeVoid::Some(stream) => stream, - MaybeVoid::Void => quote!(()), - } - } -} - -pub fn is_punct(tt: &TokenTree, expect: char) -> bool { - matches!(tt, TokenTree::Punct(punct) if punct.as_char() == expect && punct.spacing() == Spacing::Alone) -} - -/// If supplied `tt` is a punct matching a char, returns `None`, else returns `tt` -pub fn expect_punct(tt: Option<TokenTree>, expect: char) -> Option<TokenTree> { - tt.filter(|tt| !is_punct(tt, expect)) -} - -pub trait ToIdent { - fn to_ident(&self) -> Ident; -} - -impl ToIdent for str { - fn to_ident(&self) -> Ident { - Ident::new(self, Span::call_site()) - } -} diff --git a/vendor/logos-codegen/tests/codegen.rs b/vendor/logos-codegen/tests/codegen.rs deleted file mode 100644 index 5f53c0db..00000000 --- a/vendor/logos-codegen/tests/codegen.rs +++ /dev/null @@ -1,31 +0,0 @@ -use std::{error::Error, io, path::PathBuf}; - -#[rstest::rstest] -#[case("simple")] -#[case("no_error_lut")] -pub fn test_codegen(#[case] fixture: &str) -> Result<(), Box<dyn Error>> { - let mut fixture_dir = PathBuf::new(); - fixture_dir.push(env!("CARGO_MANIFEST_DIR")); - fixture_dir.push("tests"); - fixture_dir.push("data"); - fixture_dir.push(fixture); - - let input = fixture_dir.join("input.rs"); - fixture_dir.push("output.rs"); - let output_file = fixture_dir; - - let input = std::fs::read_to_string(input)?; - let output = std::fs::read_to_string(&output_file)?; - - let generated = logos_codegen::generate(input.parse()?); - let generated = generated.to_string(); - - if std::env::var("BLESS_CODEGEN").is_ok_and(|value| value == "1") { - std::fs::write(&output_file, &generated)?; - return Ok(()); - } - - assert_eq!(generated, output, "Codegen test failed: `{fixture}`, run tests again with env var `BLESS_CODEGEN=1` to bless these changes"); - - Ok(()) -} diff --git a/vendor/logos-codegen/tests/data/no_error_lut/input.rs b/vendor/logos-codegen/tests/data/no_error_lut/input.rs deleted file mode 100644 index 84504272..00000000 --- a/vendor/logos-codegen/tests/data/no_error_lut/input.rs +++ /dev/null @@ -1,10 +0,0 @@ -#[derive(Logos)] -#[logos(source = [u8])] -enum Token { - #[token("\n")] - Newline, - #[regex(".")] - AnyUnicode, - #[regex(b".", priority = 0)] - Any, -} diff --git a/vendor/logos-codegen/tests/data/no_error_lut/output.rs b/vendor/logos-codegen/tests/data/no_error_lut/output.rs deleted file mode 100644 index 4c0edd35..00000000 --- a/vendor/logos-codegen/tests/data/no_error_lut/output.rs +++ /dev/null @@ -1 +0,0 @@ -impl < 's > :: logos :: Logos < 's > for Token { type Error = () ; type Extras = () ; type Source = [u8] ; fn lex (lex : & mut :: logos :: Lexer < 's , Self >) { use :: logos :: internal :: { LexerInternal , CallbackResult } ; type Lexer < 's > = :: logos :: Lexer < 's , Token > ; fn _end < 's > (lex : & mut Lexer < 's >) { lex . end () } fn _error < 's > (lex : & mut Lexer < 's >) { lex . bump_unchecked (1) ; lex . error () ; } macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } # [inline] fn goto1_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Newline)) ; } # [inline] fn goto11_ctx11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Any)) ; } # [inline] fn goto2_ctx11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: AnyUnicode)) ; } # [inline] fn goto16_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([128u8 ..= 159u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto17_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([144u8 ..= 191u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto2_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: AnyUnicode)) ; } # [inline] fn goto13_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 1usize] > () { Some ([128u8 ..= 191u8]) => { lex . bump_unchecked (1usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto18_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([128u8 ..= 191u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto15_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto14_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([160u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto19_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([128u8 ..= 143u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Any)) ; } # [inline] fn goto20 < 's > (lex : & mut Lexer < 's >) { enum Jump { J1 , J16 , J17 , J2 , J13 , J18 , J15 , J14 , J19 , J11 , } const LUT : [Jump ; 256] = { use Jump :: * ; [J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J1 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J14 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J16 , J15 , J15 , J17 , J18 , J18 , J18 , J19 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11] } ; let byte = match lex . read :: < u8 > () { Some (byte) => byte , None => return _end (lex) , } ; match LUT [byte as usize] { Jump :: J1 => { lex . bump_unchecked (1usize) ; goto1_x (lex) } , Jump :: J16 => { lex . bump_unchecked (1usize) ; goto16_ctx11_x (lex) } , Jump :: J17 => { lex . bump_unchecked (1usize) ; goto17_ctx11_x (lex) } , Jump :: J2 => { lex . bump_unchecked (1usize) ; goto2_x (lex) } , Jump :: J13 => { lex . bump_unchecked (1usize) ; goto13_ctx11_x (lex) } , Jump :: J18 => { lex . bump_unchecked (1usize) ; goto18_ctx11_x (lex) } , Jump :: J15 => { lex . bump_unchecked (1usize) ; goto15_ctx11_x (lex) } , Jump :: J14 => { lex . bump_unchecked (1usize) ; goto14_ctx11_x (lex) } , Jump :: J19 => { lex . bump_unchecked (1usize) ; goto19_ctx11_x (lex) } , Jump :: J11 => { lex . bump_unchecked (1usize) ; goto11_x (lex) } , } } goto20 (lex) } }
\ No newline at end of file diff --git a/vendor/logos-codegen/tests/data/simple/input.rs b/vendor/logos-codegen/tests/data/simple/input.rs deleted file mode 100644 index d2f0517d..00000000 --- a/vendor/logos-codegen/tests/data/simple/input.rs +++ /dev/null @@ -1,5 +0,0 @@ -#[derive(Logos, Debug, Clone, Copy, PartialEq)] -enum Token { - #[regex("a-z")] - Letter, -} diff --git a/vendor/logos-codegen/tests/data/simple/output.rs b/vendor/logos-codegen/tests/data/simple/output.rs deleted file mode 100644 index e8a4dc3b..00000000 --- a/vendor/logos-codegen/tests/data/simple/output.rs +++ /dev/null @@ -1 +0,0 @@ -impl < 's > :: logos :: Logos < 's > for Token { type Error = () ; type Extras = () ; type Source = str ; fn lex (lex : & mut :: logos :: Lexer < 's , Self >) { use :: logos :: internal :: { LexerInternal , CallbackResult } ; type Lexer < 's > = :: logos :: Lexer < 's , Token > ; fn _end < 's > (lex : & mut Lexer < 's >) { lex . end () } fn _error < 's > (lex : & mut Lexer < 's >) { lex . bump_unchecked (1) ; lex . error () ; } macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } # [inline] fn goto1_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Letter)) ; } # [inline] fn goto3_at1_with3 < 's > (lex : & mut Lexer < 's >) { match lex . read_at :: < & [u8 ; 2usize] > (1usize) { Some (b"-z") => { lex . bump_unchecked (3usize) ; goto1_x (lex) } , _ => _error (lex) , } } # [inline] fn goto4 < 's > (lex : & mut Lexer < 's >) { let arr = match lex . read :: < & [u8 ; 3usize] > () { Some (arr) => arr , None => return _end (lex) , } ; match arr [0] { b'a' => goto3_at1_with3 (lex) , _ => _error (lex) , } } goto4 (lex) } }
\ No newline at end of file |
