diff options
Diffstat (limited to 'vendor/github.com/authzed/cel-go/common/runes/buffer.go')
| -rw-r--r-- | vendor/github.com/authzed/cel-go/common/runes/buffer.go | 242 |
1 files changed, 242 insertions, 0 deletions
diff --git a/vendor/github.com/authzed/cel-go/common/runes/buffer.go b/vendor/github.com/authzed/cel-go/common/runes/buffer.go new file mode 100644 index 0000000..a11a800 --- /dev/null +++ b/vendor/github.com/authzed/cel-go/common/runes/buffer.go @@ -0,0 +1,242 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package runes provides interfaces and utilities for working with runes. +package runes + +import ( + "strings" + "unicode/utf8" +) + +// Buffer is an interface for accessing a contiguous array of code points. +type Buffer interface { + Get(i int) rune + Slice(i, j int) string + Len() int +} + +type emptyBuffer struct{} + +func (e *emptyBuffer) Get(i int) rune { + panic("slice index out of bounds") +} + +func (e *emptyBuffer) Slice(i, j int) string { + if i != 0 || i != j { + panic("slice index out of bounds") + } + return "" +} + +func (e *emptyBuffer) Len() int { + return 0 +} + +var _ Buffer = &emptyBuffer{} + +// asciiBuffer is an implementation for an array of code points that contain code points only from +// the ASCII character set. +type asciiBuffer struct { + arr []byte +} + +func (a *asciiBuffer) Get(i int) rune { + return rune(uint32(a.arr[i])) +} + +func (a *asciiBuffer) Slice(i, j int) string { + return string(a.arr[i:j]) +} + +func (a *asciiBuffer) Len() int { + return len(a.arr) +} + +var _ Buffer = &asciiBuffer{} + +// basicBuffer is an implementation for an array of code points that contain code points from both +// the Latin-1 character set and Basic Multilingual Plane. +type basicBuffer struct { + arr []uint16 +} + +func (b *basicBuffer) Get(i int) rune { + return rune(uint32(b.arr[i])) +} + +func (b *basicBuffer) Slice(i, j int) string { + var str strings.Builder + str.Grow((j - i) * 3) // Worst case encoding size for 0xffff is 3. + for ; i < j; i++ { + str.WriteRune(rune(uint32(b.arr[i]))) + } + return str.String() +} + +func (b *basicBuffer) Len() int { + return len(b.arr) +} + +var _ Buffer = &basicBuffer{} + +// supplementalBuffer is an implementation for an array of code points that contain code points from +// the Latin-1 character set, Basic Multilingual Plane, or the Supplemental Multilingual Plane. +type supplementalBuffer struct { + arr []rune +} + +func (s *supplementalBuffer) Get(i int) rune { + return rune(uint32(s.arr[i])) +} + +func (s *supplementalBuffer) Slice(i, j int) string { + return string(s.arr[i:j]) +} + +func (s *supplementalBuffer) Len() int { + return len(s.arr) +} + +var _ Buffer = &supplementalBuffer{} + +var nilBuffer = &emptyBuffer{} + +// NewBuffer returns an efficient implementation of Buffer for the given text based on the ranges of +// the encoded code points contained within. +// +// Code points are represented as an array of byte, uint16, or rune. This approach ensures that +// each index represents a code point by itself without needing to use an array of rune. At first +// we assume all code points are less than or equal to '\u007f'. If this holds true, the +// underlying storage is a byte array containing only ASCII characters. If we encountered a code +// point above this range but less than or equal to '\uffff' we allocate a uint16 array, copy the +// elements of previous byte array to the uint16 array, and continue. If this holds true, the +// underlying storage is a uint16 array containing only Unicode characters in the Basic Multilingual +// Plane. If we encounter a code point above '\uffff' we allocate an rune array, copy the previous +// elements of the byte or uint16 array, and continue. The underlying storage is an rune array +// containing any Unicode character. +func NewBuffer(data string) Buffer { + buf, _ := newBuffer(data, false) + return buf +} + +// NewBuffer returns an efficient implementation of Buffer for the given text based on the ranges of +// the encoded code points contained within, as well as returning the line offsets. +// +// Code points are represented as an array of byte, uint16, or rune. This approach ensures that +// each index represents a code point by itself without needing to use an array of rune. At first +// we assume all code points are less than or equal to '\u007f'. If this holds true, the +// underlying storage is a byte array containing only ASCII characters. If we encountered a code +// point above this range but less than or equal to '\uffff' we allocate a uint16 array, copy the +// elements of previous byte array to the uint16 array, and continue. If this holds true, the +// underlying storage is a uint16 array containing only Unicode characters in the Basic Multilingual +// Plane. If we encounter a code point above '\uffff' we allocate an rune array, copy the previous +// elements of the byte or uint16 array, and continue. The underlying storage is an rune array +// containing any Unicode character. +func NewBufferAndLineOffsets(data string) (Buffer, []int32) { + return newBuffer(data, true) +} + +func newBuffer(data string, lines bool) (Buffer, []int32) { + if len(data) == 0 { + return nilBuffer, []int32{0} + } + var ( + idx = 0 + off int32 = 0 + buf8 = make([]byte, 0, len(data)) + buf16 []uint16 + buf32 []rune + offs []int32 + ) + for idx < len(data) { + r, s := utf8.DecodeRuneInString(data[idx:]) + idx += s + if lines && r == '\n' { + offs = append(offs, off+1) + } + if r < utf8.RuneSelf { + buf8 = append(buf8, byte(r)) + off++ + continue + } + if r <= 0xffff { + buf16 = make([]uint16, len(buf8), len(data)) + for i, v := range buf8 { + buf16[i] = uint16(v) + } + buf8 = nil + buf16 = append(buf16, uint16(r)) + off++ + goto copy16 + } + buf32 = make([]rune, len(buf8), len(data)) + for i, v := range buf8 { + buf32[i] = rune(uint32(v)) + } + buf8 = nil + buf32 = append(buf32, r) + off++ + goto copy32 + } + if lines { + offs = append(offs, off+1) + } + return &asciiBuffer{ + arr: buf8, + }, offs +copy16: + for idx < len(data) { + r, s := utf8.DecodeRuneInString(data[idx:]) + idx += s + if lines && r == '\n' { + offs = append(offs, off+1) + } + if r <= 0xffff { + buf16 = append(buf16, uint16(r)) + off++ + continue + } + buf32 = make([]rune, len(buf16), len(data)) + for i, v := range buf16 { + buf32[i] = rune(uint32(v)) + } + buf16 = nil + buf32 = append(buf32, r) + off++ + goto copy32 + } + if lines { + offs = append(offs, off+1) + } + return &basicBuffer{ + arr: buf16, + }, offs +copy32: + for idx < len(data) { + r, s := utf8.DecodeRuneInString(data[idx:]) + idx += s + if lines && r == '\n' { + offs = append(offs, off+1) + } + buf32 = append(buf32, r) + off++ + } + if lines { + offs = append(offs, off+1) + } + return &supplementalBuffer{ + arr: buf32, + }, offs +} |
