diff options
| author | Anton Medvedev <anton@medv.io> | 2025-11-30 12:46:34 +0100 |
|---|---|---|
| committer | Anton Medvedev <anton@medv.io> | 2025-11-30 12:46:34 +0100 |
| commit | f6b0f38af648d028422a7494378b5dabdc90573f (patch) | |
| tree | 3c26cfc269c021300a2d1e4e02623dd440c20226 /pkg/gitdiff/parser.go | |
First commit
Diffstat (limited to 'pkg/gitdiff/parser.go')
| -rw-r--r-- | pkg/gitdiff/parser.go | 142 |
1 files changed, 142 insertions, 0 deletions
diff --git a/pkg/gitdiff/parser.go b/pkg/gitdiff/parser.go new file mode 100644 index 0000000..e8f8430 --- /dev/null +++ b/pkg/gitdiff/parser.go @@ -0,0 +1,142 @@ +// Package gitdiff parses and applies patches generated by Git. It supports +// line-oriented text patches, binary patches, and can also parse standard +// unified diffs generated by other tools. +package gitdiff + +import ( + "bufio" + "fmt" + "io" +) + +// Parse parses a patch with changes to one or more files. Any content before +// the first file is returned as the second value. If an error occurs while +// parsing, it returns all files parsed before the error. +// +// Parse expects to receive a single patch. If the input may contain multiple +// patches (for example, if it is an mbox file), callers should split it into +// individual patches and call Parse on each one. +func Parse(r io.Reader) ([]*File, string, error) { + p := newParser(r) + + if err := p.Next(); err != nil { + if err == io.EOF { + return nil, "", nil + } + return nil, "", err + } + + var preamble string + var files []*File + for { + file, pre, err := p.ParseNextFileHeader() + if err != nil { + return files, preamble, err + } + if len(files) == 0 { + preamble = pre + } + if file == nil { + break + } + + for _, fn := range []func(*File) (int, error){ + p.ParseTextFragments, + p.ParseBinaryFragments, + } { + n, err := fn(file) + if err != nil { + return files, preamble, err + } + if n > 0 { + break + } + } + + files = append(files, file) + } + + return files, preamble, nil +} + +// TODO(bkeyes): consider exporting the parser type with configuration +// this would enable OID validation, p-value guessing, and prefix stripping +// by allowing users to set or override defaults + +// parser invariants: +// - methods that parse objects: +// - start with the parser on the first line of the first object +// - if returning nil, do not advance +// - if returning an error, do not advance past the object +// - if returning an object, advance to the first line after the object +// - any exported parsing methods must initialize the parser by calling Next() + +type stringReader interface { + ReadString(delim byte) (string, error) +} + +type parser struct { + r stringReader + + eof bool + lineno int64 + lines [3]string +} + +func newParser(r io.Reader) *parser { + if r, ok := r.(stringReader); ok { + return &parser{r: r} + } + return &parser{r: bufio.NewReader(r)} +} + +// Next advances the parser by one line. It returns any error encountered while +// reading the line, including io.EOF when the end of stream is reached. +func (p *parser) Next() error { + if p.eof { + return io.EOF + } + + if p.lineno == 0 { + // on first call to next, need to shift in all lines + for i := 0; i < len(p.lines)-1; i++ { + if err := p.shiftLines(); err != nil && err != io.EOF { + return err + } + } + } + + err := p.shiftLines() + if err != nil && err != io.EOF { + return err + } + + p.lineno++ + if p.lines[0] == "" { + p.eof = true + return io.EOF + } + return nil +} + +func (p *parser) shiftLines() (err error) { + for i := 0; i < len(p.lines)-1; i++ { + p.lines[i] = p.lines[i+1] + } + p.lines[len(p.lines)-1], err = p.r.ReadString('\n') + return +} + +// Line returns a line from the parser without advancing it. A delta of 0 +// returns the current line, while higher deltas return read-ahead lines. It +// returns an empty string if the delta is higher than the available lines, +// either because of the buffer size or because the parser reached the end of +// the input. Valid lines always contain at least a newline character. +func (p *parser) Line(delta uint) string { + return p.lines[delta] +} + +// Errorf generates an error and appends the current line information. +func (p *parser) Errorf(delta int64, msg string, args ...interface{}) error { + return fmt.Errorf("gitdiff: line %d: %s", p.lineno+delta, fmt.Sprintf(msg, args...)) +} |
