summaryrefslogtreecommitdiff
path: root/vendor/regex-syntax/src/parser.rs
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2025-07-15 16:37:08 -0600
committermo khan <mo@mokhan.ca>2025-07-17 16:30:22 -0600
commit45df4d0d9b577fecee798d672695fe24ff57fb1b (patch)
tree1b99bf645035b58e0d6db08c7a83521f41f7a75b /vendor/regex-syntax/src/parser.rs
parentf94f79608393d4ab127db63cc41668445ef6b243 (diff)
feat: migrate from Cedar to SpiceDB authorization system
This is a major architectural change that replaces the Cedar policy-based authorization system with SpiceDB's relation-based authorization. Key changes: - Migrate from Rust to Go implementation - Replace Cedar policies with SpiceDB schema and relationships - Switch from envoy `ext_authz` with Cedar to SpiceDB permission checks - Update build system and dependencies for Go ecosystem - Maintain Envoy integration for external authorization This change enables more flexible permission modeling through SpiceDB's Google Zanzibar inspired relation-based system, supporting complex hierarchical permissions that were difficult to express in Cedar. Breaking change: Existing Cedar policies and Rust-based configuration will no longer work and need to be migrated to SpiceDB schema.
Diffstat (limited to 'vendor/regex-syntax/src/parser.rs')
-rw-r--r--vendor/regex-syntax/src/parser.rs254
1 files changed, 0 insertions, 254 deletions
diff --git a/vendor/regex-syntax/src/parser.rs b/vendor/regex-syntax/src/parser.rs
deleted file mode 100644
index f482b846..00000000
--- a/vendor/regex-syntax/src/parser.rs
+++ /dev/null
@@ -1,254 +0,0 @@
-use crate::{ast, hir, Error};
-
-/// A convenience routine for parsing a regex using default options.
-///
-/// This is equivalent to `Parser::new().parse(pattern)`.
-///
-/// If you need to set non-default options, then use a [`ParserBuilder`].
-///
-/// This routine returns an [`Hir`](hir::Hir) value. Namely, it automatically
-/// parses the pattern as an [`Ast`](ast::Ast) and then invokes the translator
-/// to convert the `Ast` into an `Hir`. If you need access to the `Ast`, then
-/// you should use a [`ast::parse::Parser`].
-pub fn parse(pattern: &str) -> Result<hir::Hir, Error> {
- Parser::new().parse(pattern)
-}
-
-/// A builder for a regular expression parser.
-///
-/// This builder permits modifying configuration options for the parser.
-///
-/// This type combines the builder options for both the [AST
-/// `ParserBuilder`](ast::parse::ParserBuilder) and the [HIR
-/// `TranslatorBuilder`](hir::translate::TranslatorBuilder).
-#[derive(Clone, Debug, Default)]
-pub struct ParserBuilder {
- ast: ast::parse::ParserBuilder,
- hir: hir::translate::TranslatorBuilder,
-}
-
-impl ParserBuilder {
- /// Create a new parser builder with a default configuration.
- pub fn new() -> ParserBuilder {
- ParserBuilder::default()
- }
-
- /// Build a parser from this configuration with the given pattern.
- pub fn build(&self) -> Parser {
- Parser { ast: self.ast.build(), hir: self.hir.build() }
- }
-
- /// Set the nesting limit for this parser.
- ///
- /// The nesting limit controls how deep the abstract syntax tree is allowed
- /// to be. If the AST exceeds the given limit (e.g., with too many nested
- /// groups), then an error is returned by the parser.
- ///
- /// The purpose of this limit is to act as a heuristic to prevent stack
- /// overflow for consumers that do structural induction on an `Ast` using
- /// explicit recursion. While this crate never does this (instead using
- /// constant stack space and moving the call stack to the heap), other
- /// crates may.
- ///
- /// This limit is not checked until the entire Ast is parsed. Therefore,
- /// if callers want to put a limit on the amount of heap space used, then
- /// they should impose a limit on the length, in bytes, of the concrete
- /// pattern string. In particular, this is viable since this parser
- /// implementation will limit itself to heap space proportional to the
- /// length of the pattern string.
- ///
- /// Note that a nest limit of `0` will return a nest limit error for most
- /// patterns but not all. For example, a nest limit of `0` permits `a` but
- /// not `ab`, since `ab` requires a concatenation, which results in a nest
- /// depth of `1`. In general, a nest limit is not something that manifests
- /// in an obvious way in the concrete syntax, therefore, it should not be
- /// used in a granular way.
- pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
- self.ast.nest_limit(limit);
- self
- }
-
- /// Whether to support octal syntax or not.
- ///
- /// Octal syntax is a little-known way of uttering Unicode codepoints in
- /// a regular expression. For example, `a`, `\x61`, `\u0061` and
- /// `\141` are all equivalent regular expressions, where the last example
- /// shows octal syntax.
- ///
- /// While supporting octal syntax isn't in and of itself a problem, it does
- /// make good error messages harder. That is, in PCRE based regex engines,
- /// syntax like `\0` invokes a backreference, which is explicitly
- /// unsupported in Rust's regex engine. However, many users expect it to
- /// be supported. Therefore, when octal support is disabled, the error
- /// message will explicitly mention that backreferences aren't supported.
- ///
- /// Octal syntax is disabled by default.
- pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
- self.ast.octal(yes);
- self
- }
-
- /// When disabled, translation will permit the construction of a regular
- /// expression that may match invalid UTF-8.
- ///
- /// When enabled (the default), the translator is guaranteed to produce an
- /// expression that, for non-empty matches, will only ever produce spans
- /// that are entirely valid UTF-8 (otherwise, the translator will return an
- /// error).
- ///
- /// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even
- /// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete
- /// syntax) will be allowed even though they can produce matches that split
- /// a UTF-8 encoded codepoint. This only applies to zero-width or "empty"
- /// matches, and it is expected that the regex engine itself must handle
- /// these cases if necessary (perhaps by suppressing any zero-width matches
- /// that split a codepoint).
- pub fn utf8(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.utf8(yes);
- self
- }
-
- /// Enable verbose mode in the regular expression.
- ///
- /// When enabled, verbose mode permits insignificant whitespace in many
- /// places in the regular expression, as well as comments. Comments are
- /// started using `#` and continue until the end of the line.
- ///
- /// By default, this is disabled. It may be selectively enabled in the
- /// regular expression by using the `x` flag regardless of this setting.
- pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
- self.ast.ignore_whitespace(yes);
- self
- }
-
- /// Enable or disable the case insensitive flag by default.
- ///
- /// By default this is disabled. It may alternatively be selectively
- /// enabled in the regular expression itself via the `i` flag.
- pub fn case_insensitive(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.case_insensitive(yes);
- self
- }
-
- /// Enable or disable the multi-line matching flag by default.
- ///
- /// By default this is disabled. It may alternatively be selectively
- /// enabled in the regular expression itself via the `m` flag.
- pub fn multi_line(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.multi_line(yes);
- self
- }
-
- /// Enable or disable the "dot matches any character" flag by default.
- ///
- /// By default this is disabled. It may alternatively be selectively
- /// enabled in the regular expression itself via the `s` flag.
- pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.dot_matches_new_line(yes);
- self
- }
-
- /// Enable or disable the CRLF mode flag by default.
- ///
- /// By default this is disabled. It may alternatively be selectively
- /// enabled in the regular expression itself via the `R` flag.
- ///
- /// When CRLF mode is enabled, the following happens:
- ///
- /// * Unless `dot_matches_new_line` is enabled, `.` will match any character
- /// except for `\r` and `\n`.
- /// * When `multi_line` mode is enabled, `^` and `$` will treat `\r\n`,
- /// `\r` and `\n` as line terminators. And in particular, neither will
- /// match between a `\r` and a `\n`.
- pub fn crlf(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.crlf(yes);
- self
- }
-
- /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`.
- ///
- /// Namely, instead of `.` (by default) matching everything except for `\n`,
- /// this will cause `.` to match everything except for the byte given.
- ///
- /// If `.` is used in a context where Unicode mode is enabled and this byte
- /// isn't ASCII, then an error will be returned. When Unicode mode is
- /// disabled, then any byte is permitted, but will return an error if UTF-8
- /// mode is enabled and it is a non-ASCII byte.
- ///
- /// In short, any ASCII value for a line terminator is always okay. But a
- /// non-ASCII byte might result in an error depending on whether Unicode
- /// mode or UTF-8 mode are enabled.
- ///
- /// Note that if `R` mode is enabled then it always takes precedence and
- /// the line terminator will be treated as `\r` and `\n` simultaneously.
- ///
- /// Note also that this *doesn't* impact the look-around assertions
- /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional
- /// configuration in the regex engine itself.
- pub fn line_terminator(&mut self, byte: u8) -> &mut ParserBuilder {
- self.hir.line_terminator(byte);
- self
- }
-
- /// Enable or disable the "swap greed" flag by default.
- ///
- /// By default this is disabled. It may alternatively be selectively
- /// enabled in the regular expression itself via the `U` flag.
- pub fn swap_greed(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.swap_greed(yes);
- self
- }
-
- /// Enable or disable the Unicode flag (`u`) by default.
- ///
- /// By default this is **enabled**. It may alternatively be selectively
- /// disabled in the regular expression itself via the `u` flag.
- ///
- /// Note that unless `utf8` is disabled (it's enabled by default), a
- /// regular expression will fail to parse if Unicode mode is disabled and a
- /// sub-expression could possibly match invalid UTF-8.
- pub fn unicode(&mut self, yes: bool) -> &mut ParserBuilder {
- self.hir.unicode(yes);
- self
- }
-}
-
-/// A convenience parser for regular expressions.
-///
-/// This parser takes as input a regular expression pattern string (the
-/// "concrete syntax") and returns a high-level intermediate representation
-/// (the HIR) suitable for most types of analysis. In particular, this parser
-/// hides the intermediate state of producing an AST (the "abstract syntax").
-/// The AST is itself far more complex than the HIR, so this parser serves as a
-/// convenience for never having to deal with it at all.
-///
-/// If callers have more fine grained use cases that need an AST, then please
-/// see the [`ast::parse`] module.
-///
-/// A `Parser` can be configured in more detail via a [`ParserBuilder`].
-#[derive(Clone, Debug)]
-pub struct Parser {
- ast: ast::parse::Parser,
- hir: hir::translate::Translator,
-}
-
-impl Parser {
- /// Create a new parser with a default configuration.
- ///
- /// The parser can be run with `parse` method. The parse method returns
- /// a high level intermediate representation of the given regular
- /// expression.
- ///
- /// To set configuration options on the parser, use [`ParserBuilder`].
- pub fn new() -> Parser {
- ParserBuilder::new().build()
- }
-
- /// Parse the regular expression into a high level intermediate
- /// representation.
- pub fn parse(&mut self, pattern: &str) -> Result<hir::Hir, Error> {
- let ast = self.ast.parse(pattern)?;
- let hir = self.hir.translate(pattern, &ast)?;
- Ok(hir)
- }
-}