summaryrefslogtreecommitdiff
path: root/vendor/github.com/authzed/cel-go/common/runes/buffer.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/authzed/cel-go/common/runes/buffer.go')
-rw-r--r--vendor/github.com/authzed/cel-go/common/runes/buffer.go242
1 files changed, 242 insertions, 0 deletions
diff --git a/vendor/github.com/authzed/cel-go/common/runes/buffer.go b/vendor/github.com/authzed/cel-go/common/runes/buffer.go
new file mode 100644
index 0000000..a11a800
--- /dev/null
+++ b/vendor/github.com/authzed/cel-go/common/runes/buffer.go
@@ -0,0 +1,242 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package runes provides interfaces and utilities for working with runes.
+package runes
+
+import (
+ "strings"
+ "unicode/utf8"
+)
+
+// Buffer is an interface for accessing a contiguous array of code points.
+type Buffer interface {
+ Get(i int) rune
+ Slice(i, j int) string
+ Len() int
+}
+
+type emptyBuffer struct{}
+
+func (e *emptyBuffer) Get(i int) rune {
+ panic("slice index out of bounds")
+}
+
+func (e *emptyBuffer) Slice(i, j int) string {
+ if i != 0 || i != j {
+ panic("slice index out of bounds")
+ }
+ return ""
+}
+
+func (e *emptyBuffer) Len() int {
+ return 0
+}
+
+var _ Buffer = &emptyBuffer{}
+
+// asciiBuffer is an implementation for an array of code points that contain code points only from
+// the ASCII character set.
+type asciiBuffer struct {
+ arr []byte
+}
+
+func (a *asciiBuffer) Get(i int) rune {
+ return rune(uint32(a.arr[i]))
+}
+
+func (a *asciiBuffer) Slice(i, j int) string {
+ return string(a.arr[i:j])
+}
+
+func (a *asciiBuffer) Len() int {
+ return len(a.arr)
+}
+
+var _ Buffer = &asciiBuffer{}
+
+// basicBuffer is an implementation for an array of code points that contain code points from both
+// the Latin-1 character set and Basic Multilingual Plane.
+type basicBuffer struct {
+ arr []uint16
+}
+
+func (b *basicBuffer) Get(i int) rune {
+ return rune(uint32(b.arr[i]))
+}
+
+func (b *basicBuffer) Slice(i, j int) string {
+ var str strings.Builder
+ str.Grow((j - i) * 3) // Worst case encoding size for 0xffff is 3.
+ for ; i < j; i++ {
+ str.WriteRune(rune(uint32(b.arr[i])))
+ }
+ return str.String()
+}
+
+func (b *basicBuffer) Len() int {
+ return len(b.arr)
+}
+
+var _ Buffer = &basicBuffer{}
+
+// supplementalBuffer is an implementation for an array of code points that contain code points from
+// the Latin-1 character set, Basic Multilingual Plane, or the Supplemental Multilingual Plane.
+type supplementalBuffer struct {
+ arr []rune
+}
+
+func (s *supplementalBuffer) Get(i int) rune {
+ return rune(uint32(s.arr[i]))
+}
+
+func (s *supplementalBuffer) Slice(i, j int) string {
+ return string(s.arr[i:j])
+}
+
+func (s *supplementalBuffer) Len() int {
+ return len(s.arr)
+}
+
+var _ Buffer = &supplementalBuffer{}
+
+var nilBuffer = &emptyBuffer{}
+
+// NewBuffer returns an efficient implementation of Buffer for the given text based on the ranges of
+// the encoded code points contained within.
+//
+// Code points are represented as an array of byte, uint16, or rune. This approach ensures that
+// each index represents a code point by itself without needing to use an array of rune. At first
+// we assume all code points are less than or equal to '\u007f'. If this holds true, the
+// underlying storage is a byte array containing only ASCII characters. If we encountered a code
+// point above this range but less than or equal to '\uffff' we allocate a uint16 array, copy the
+// elements of previous byte array to the uint16 array, and continue. If this holds true, the
+// underlying storage is a uint16 array containing only Unicode characters in the Basic Multilingual
+// Plane. If we encounter a code point above '\uffff' we allocate an rune array, copy the previous
+// elements of the byte or uint16 array, and continue. The underlying storage is an rune array
+// containing any Unicode character.
+func NewBuffer(data string) Buffer {
+ buf, _ := newBuffer(data, false)
+ return buf
+}
+
+// NewBuffer returns an efficient implementation of Buffer for the given text based on the ranges of
+// the encoded code points contained within, as well as returning the line offsets.
+//
+// Code points are represented as an array of byte, uint16, or rune. This approach ensures that
+// each index represents a code point by itself without needing to use an array of rune. At first
+// we assume all code points are less than or equal to '\u007f'. If this holds true, the
+// underlying storage is a byte array containing only ASCII characters. If we encountered a code
+// point above this range but less than or equal to '\uffff' we allocate a uint16 array, copy the
+// elements of previous byte array to the uint16 array, and continue. If this holds true, the
+// underlying storage is a uint16 array containing only Unicode characters in the Basic Multilingual
+// Plane. If we encounter a code point above '\uffff' we allocate an rune array, copy the previous
+// elements of the byte or uint16 array, and continue. The underlying storage is an rune array
+// containing any Unicode character.
+func NewBufferAndLineOffsets(data string) (Buffer, []int32) {
+ return newBuffer(data, true)
+}
+
+func newBuffer(data string, lines bool) (Buffer, []int32) {
+ if len(data) == 0 {
+ return nilBuffer, []int32{0}
+ }
+ var (
+ idx = 0
+ off int32 = 0
+ buf8 = make([]byte, 0, len(data))
+ buf16 []uint16
+ buf32 []rune
+ offs []int32
+ )
+ for idx < len(data) {
+ r, s := utf8.DecodeRuneInString(data[idx:])
+ idx += s
+ if lines && r == '\n' {
+ offs = append(offs, off+1)
+ }
+ if r < utf8.RuneSelf {
+ buf8 = append(buf8, byte(r))
+ off++
+ continue
+ }
+ if r <= 0xffff {
+ buf16 = make([]uint16, len(buf8), len(data))
+ for i, v := range buf8 {
+ buf16[i] = uint16(v)
+ }
+ buf8 = nil
+ buf16 = append(buf16, uint16(r))
+ off++
+ goto copy16
+ }
+ buf32 = make([]rune, len(buf8), len(data))
+ for i, v := range buf8 {
+ buf32[i] = rune(uint32(v))
+ }
+ buf8 = nil
+ buf32 = append(buf32, r)
+ off++
+ goto copy32
+ }
+ if lines {
+ offs = append(offs, off+1)
+ }
+ return &asciiBuffer{
+ arr: buf8,
+ }, offs
+copy16:
+ for idx < len(data) {
+ r, s := utf8.DecodeRuneInString(data[idx:])
+ idx += s
+ if lines && r == '\n' {
+ offs = append(offs, off+1)
+ }
+ if r <= 0xffff {
+ buf16 = append(buf16, uint16(r))
+ off++
+ continue
+ }
+ buf32 = make([]rune, len(buf16), len(data))
+ for i, v := range buf16 {
+ buf32[i] = rune(uint32(v))
+ }
+ buf16 = nil
+ buf32 = append(buf32, r)
+ off++
+ goto copy32
+ }
+ if lines {
+ offs = append(offs, off+1)
+ }
+ return &basicBuffer{
+ arr: buf16,
+ }, offs
+copy32:
+ for idx < len(data) {
+ r, s := utf8.DecodeRuneInString(data[idx:])
+ idx += s
+ if lines && r == '\n' {
+ offs = append(offs, off+1)
+ }
+ buf32 = append(buf32, r)
+ off++
+ }
+ if lines {
+ offs = append(offs, off+1)
+ }
+ return &supplementalBuffer{
+ arr: buf32,
+ }, offs
+}