summaryrefslogtreecommitdiff
path: root/vendor/github.com/charmbracelet/x/ansi/parser.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/charmbracelet/x/ansi/parser.go')
-rw-r--r--vendor/github.com/charmbracelet/x/ansi/parser.go417
1 files changed, 417 insertions, 0 deletions
diff --git a/vendor/github.com/charmbracelet/x/ansi/parser.go b/vendor/github.com/charmbracelet/x/ansi/parser.go
new file mode 100644
index 0000000..882e1ed
--- /dev/null
+++ b/vendor/github.com/charmbracelet/x/ansi/parser.go
@@ -0,0 +1,417 @@
+package ansi
+
+import (
+ "unicode/utf8"
+ "unsafe"
+
+ "github.com/charmbracelet/x/ansi/parser"
+)
+
+// Parser represents a DEC ANSI compatible sequence parser.
+//
+// It uses a state machine to parse ANSI escape sequences and control
+// characters. The parser is designed to be used with a terminal emulator or
+// similar application that needs to parse ANSI escape sequences and control
+// characters.
+// See package [parser] for more information.
+//
+//go:generate go run ./gen.go
+type Parser struct {
+ handler Handler
+
+ // params contains the raw parameters of the sequence.
+ // These parameters used when constructing CSI and DCS sequences.
+ params []int
+
+ // data contains the raw data of the sequence.
+ // These data used when constructing OSC, DCS, SOS, PM, and APC sequences.
+ data []byte
+
+ // dataLen keeps track of the length of the data buffer.
+ // If dataLen is -1, the data buffer is unlimited and will grow as needed.
+ // Otherwise, dataLen is limited by the size of the data buffer.
+ dataLen int
+
+ // paramsLen keeps track of the number of parameters.
+ // This is limited by the size of the params buffer.
+ //
+ // This is also used when collecting UTF-8 runes to keep track of the
+ // number of rune bytes collected.
+ paramsLen int
+
+ // cmd contains the raw command along with the private prefix and
+ // intermediate bytes of the sequence.
+ // The first lower byte contains the command byte, the next byte contains
+ // the private prefix, and the next byte contains the intermediate byte.
+ //
+ // This is also used when collecting UTF-8 runes treating it as a slice of
+ // 4 bytes.
+ cmd int
+
+ // state is the current state of the parser.
+ state byte
+}
+
+// NewParser returns a new parser with the default settings.
+// The [Parser] uses a default size of 32 for the parameters and 64KB for the
+// data buffer. Use [Parser.SetParamsSize] and [Parser.SetDataSize] to set the
+// size of the parameters and data buffer respectively.
+func NewParser() *Parser {
+ p := new(Parser)
+ p.SetParamsSize(parser.MaxParamsSize)
+ p.SetDataSize(1024 * 64) // 64KB data buffer
+ return p
+}
+
+// SetParamsSize sets the size of the parameters buffer.
+// This is used when constructing CSI and DCS sequences.
+func (p *Parser) SetParamsSize(size int) {
+ p.params = make([]int, size)
+}
+
+// SetDataSize sets the size of the data buffer.
+// This is used when constructing OSC, DCS, SOS, PM, and APC sequences.
+// If size is less than or equal to 0, the data buffer is unlimited and will
+// grow as needed.
+func (p *Parser) SetDataSize(size int) {
+ if size <= 0 {
+ size = 0
+ p.dataLen = -1
+ }
+ p.data = make([]byte, size)
+}
+
+// Params returns the list of parsed packed parameters.
+func (p *Parser) Params() Params {
+ return unsafe.Slice((*Param)(unsafe.Pointer(&p.params[0])), p.paramsLen)
+}
+
+// Param returns the parameter at the given index and falls back to the default
+// value if the parameter is missing. If the index is out of bounds, it returns
+// the default value and false.
+func (p *Parser) Param(i, def int) (int, bool) {
+ if i < 0 || i >= p.paramsLen {
+ return def, false
+ }
+ return Param(p.params[i]).Param(def), true
+}
+
+// Command returns the packed command of the last dispatched sequence. Use
+// [Cmd] to unpack the command.
+func (p *Parser) Command() int {
+ return p.cmd
+}
+
+// Rune returns the last dispatched sequence as a rune.
+func (p *Parser) Rune() rune {
+ rw := utf8ByteLen(byte(p.cmd & 0xff))
+ if rw == -1 {
+ return utf8.RuneError
+ }
+ r, _ := utf8.DecodeRune((*[utf8.UTFMax]byte)(unsafe.Pointer(&p.cmd))[:rw])
+ return r
+}
+
+// Control returns the last dispatched sequence as a control code.
+func (p *Parser) Control() byte {
+ return byte(p.cmd & 0xff)
+}
+
+// Data returns the raw data of the last dispatched sequence.
+func (p *Parser) Data() []byte {
+ return p.data[:p.dataLen]
+}
+
+// Reset resets the parser to its initial state.
+func (p *Parser) Reset() {
+ p.clear()
+ p.state = parser.GroundState
+}
+
+// clear clears the parser parameters and command.
+func (p *Parser) clear() {
+ if len(p.params) > 0 {
+ p.params[0] = parser.MissingParam
+ }
+ p.paramsLen = 0
+ p.cmd = 0
+}
+
+// State returns the current state of the parser.
+func (p *Parser) State() parser.State {
+ return p.state
+}
+
+// StateName returns the name of the current state.
+func (p *Parser) StateName() string {
+ return parser.StateNames[p.state]
+}
+
+// Parse parses the given dispatcher and byte buffer.
+// Deprecated: Loop over the buffer and call [Parser.Advance] instead.
+func (p *Parser) Parse(b []byte) {
+ for i := 0; i < len(b); i++ {
+ p.Advance(b[i])
+ }
+}
+
+// Advance advances the parser using the given byte. It returns the action
+// performed by the parser.
+func (p *Parser) Advance(b byte) parser.Action {
+ switch p.state {
+ case parser.Utf8State:
+ // We handle UTF-8 here.
+ return p.advanceUtf8(b)
+ default:
+ return p.advance(b)
+ }
+}
+
+func (p *Parser) collectRune(b byte) {
+ if p.paramsLen >= utf8.UTFMax {
+ return
+ }
+
+ shift := p.paramsLen * 8
+ p.cmd &^= 0xff << shift
+ p.cmd |= int(b) << shift
+ p.paramsLen++
+}
+
+func (p *Parser) advanceUtf8(b byte) parser.Action {
+ // Collect UTF-8 rune bytes.
+ p.collectRune(b)
+ rw := utf8ByteLen(byte(p.cmd & 0xff))
+ if rw == -1 {
+ // We panic here because the first byte comes from the state machine,
+ // if this panics, it means there is a bug in the state machine!
+ panic("invalid rune") // unreachable
+ }
+
+ if p.paramsLen < rw {
+ return parser.CollectAction
+ }
+
+ // We have enough bytes to decode the rune using unsafe
+ if p.handler.Print != nil {
+ p.handler.Print(p.Rune())
+ }
+
+ p.state = parser.GroundState
+ p.paramsLen = 0
+
+ return parser.PrintAction
+}
+
+func (p *Parser) advance(b byte) parser.Action {
+ state, action := parser.Table.Transition(p.state, b)
+
+ // We need to clear the parser state if the state changes from EscapeState.
+ // This is because when we enter the EscapeState, we don't get a chance to
+ // clear the parser state. For example, when a sequence terminates with a
+ // ST (\x1b\\ or \x9c), we dispatch the current sequence and transition to
+ // EscapeState. However, the parser state is not cleared in this case and
+ // we need to clear it here before dispatching the esc sequence.
+ if p.state != state {
+ if p.state == parser.EscapeState {
+ p.performAction(parser.ClearAction, state, b)
+ }
+ if action == parser.PutAction &&
+ p.state == parser.DcsEntryState && state == parser.DcsStringState {
+ // XXX: This is a special case where we need to start collecting
+ // non-string parameterized data i.e. doesn't follow the ECMA-48 ยง
+ // 5.4.1 string parameters format.
+ p.performAction(parser.StartAction, state, 0)
+ }
+ }
+
+ // Handle special cases
+ switch {
+ case b == ESC && p.state == parser.EscapeState:
+ // Two ESCs in a row
+ p.performAction(parser.ExecuteAction, state, b)
+ default:
+ p.performAction(action, state, b)
+ }
+
+ p.state = state
+
+ return action
+}
+
+func (p *Parser) parseStringCmd() {
+ // Try to parse the command
+ datalen := len(p.data)
+ if p.dataLen >= 0 {
+ datalen = p.dataLen
+ }
+ for i := 0; i < datalen; i++ {
+ d := p.data[i]
+ if d < '0' || d > '9' {
+ break
+ }
+ if p.cmd == parser.MissingCommand {
+ p.cmd = 0
+ }
+ p.cmd *= 10
+ p.cmd += int(d - '0')
+ }
+}
+
+func (p *Parser) performAction(action parser.Action, state parser.State, b byte) {
+ switch action {
+ case parser.IgnoreAction:
+ break
+
+ case parser.ClearAction:
+ p.clear()
+
+ case parser.PrintAction:
+ p.cmd = int(b)
+ if p.handler.Print != nil {
+ p.handler.Print(rune(b))
+ }
+
+ case parser.ExecuteAction:
+ p.cmd = int(b)
+ if p.handler.Execute != nil {
+ p.handler.Execute(b)
+ }
+
+ case parser.PrefixAction:
+ // Collect private prefix
+ // we only store the last prefix
+ p.cmd &^= 0xff << parser.PrefixShift
+ p.cmd |= int(b) << parser.PrefixShift
+
+ case parser.CollectAction:
+ if state == parser.Utf8State {
+ // Reset the UTF-8 counter
+ p.paramsLen = 0
+ p.collectRune(b)
+ } else {
+ // Collect intermediate bytes
+ // we only store the last intermediate byte
+ p.cmd &^= 0xff << parser.IntermedShift
+ p.cmd |= int(b) << parser.IntermedShift
+ }
+
+ case parser.ParamAction:
+ // Collect parameters
+ if p.paramsLen >= len(p.params) {
+ break
+ }
+
+ if b >= '0' && b <= '9' {
+ if p.params[p.paramsLen] == parser.MissingParam {
+ p.params[p.paramsLen] = 0
+ }
+
+ p.params[p.paramsLen] *= 10
+ p.params[p.paramsLen] += int(b - '0')
+ }
+
+ if b == ':' {
+ p.params[p.paramsLen] |= parser.HasMoreFlag
+ }
+
+ if b == ';' || b == ':' {
+ p.paramsLen++
+ if p.paramsLen < len(p.params) {
+ p.params[p.paramsLen] = parser.MissingParam
+ }
+ }
+
+ case parser.StartAction:
+ if p.dataLen < 0 && p.data != nil {
+ p.data = p.data[:0]
+ } else {
+ p.dataLen = 0
+ }
+ if p.state >= parser.DcsEntryState && p.state <= parser.DcsStringState {
+ // Collect the command byte for DCS
+ p.cmd |= int(b)
+ } else {
+ p.cmd = parser.MissingCommand
+ }
+
+ case parser.PutAction:
+ switch p.state {
+ case parser.OscStringState:
+ if b == ';' && p.cmd == parser.MissingCommand {
+ p.parseStringCmd()
+ }
+ }
+
+ if p.dataLen < 0 {
+ p.data = append(p.data, b)
+ } else {
+ if p.dataLen < len(p.data) {
+ p.data[p.dataLen] = b
+ p.dataLen++
+ }
+ }
+
+ case parser.DispatchAction:
+ // Increment the last parameter
+ if p.paramsLen > 0 && p.paramsLen < len(p.params)-1 ||
+ p.paramsLen == 0 && len(p.params) > 0 && p.params[0] != parser.MissingParam {
+ p.paramsLen++
+ }
+
+ if p.state == parser.OscStringState && p.cmd == parser.MissingCommand {
+ // Ensure we have a command for OSC
+ p.parseStringCmd()
+ }
+
+ data := p.data
+ if p.dataLen >= 0 {
+ data = data[:p.dataLen]
+ }
+ switch p.state {
+ case parser.CsiEntryState, parser.CsiParamState, parser.CsiIntermediateState:
+ p.cmd |= int(b)
+ if p.handler.HandleCsi != nil {
+ p.handler.HandleCsi(Cmd(p.cmd), p.Params())
+ }
+ case parser.EscapeState, parser.EscapeIntermediateState:
+ p.cmd |= int(b)
+ if p.handler.HandleEsc != nil {
+ p.handler.HandleEsc(Cmd(p.cmd))
+ }
+ case parser.DcsEntryState, parser.DcsParamState, parser.DcsIntermediateState, parser.DcsStringState:
+ if p.handler.HandleDcs != nil {
+ p.handler.HandleDcs(Cmd(p.cmd), p.Params(), data)
+ }
+ case parser.OscStringState:
+ if p.handler.HandleOsc != nil {
+ p.handler.HandleOsc(p.cmd, data)
+ }
+ case parser.SosStringState:
+ if p.handler.HandleSos != nil {
+ p.handler.HandleSos(data)
+ }
+ case parser.PmStringState:
+ if p.handler.HandlePm != nil {
+ p.handler.HandlePm(data)
+ }
+ case parser.ApcStringState:
+ if p.handler.HandleApc != nil {
+ p.handler.HandleApc(data)
+ }
+ }
+ }
+}
+
+func utf8ByteLen(b byte) int {
+ if b <= 0b0111_1111 { // 0x00-0x7F
+ return 1
+ } else if b >= 0b1100_0000 && b <= 0b1101_1111 { // 0xC0-0xDF
+ return 2
+ } else if b >= 0b1110_0000 && b <= 0b1110_1111 { // 0xE0-0xEF
+ return 3
+ } else if b >= 0b1111_0000 && b <= 0b1111_0111 { // 0xF0-0xF7
+ return 4
+ }
+ return -1
+}