summaryrefslogtreecommitdiff
path: root/pkg/gitdiff
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2026-01-30 18:16:31 -0700
committermo khan <mo@mokhan.ca>2026-01-30 18:16:31 -0700
commitfeee7d43ef63ae607c6fd4cca88a356a93553ebe (patch)
tree2969055a894dc4e72d8d79a9ac74cc30d78aff64 /pkg/gitdiff
parente0db8f82e96acadf6968e0cf9c805a7b22d835db (diff)
refactor: move packages to internal/
Diffstat (limited to 'pkg/gitdiff')
-rw-r--r--pkg/gitdiff/apply.go147
-rw-r--r--pkg/gitdiff/apply_binary.go206
-rw-r--r--pkg/gitdiff/apply_text.go158
-rw-r--r--pkg/gitdiff/base85.go91
-rw-r--r--pkg/gitdiff/binary.go186
-rw-r--r--pkg/gitdiff/file_header.go546
-rw-r--r--pkg/gitdiff/format.go281
-rw-r--r--pkg/gitdiff/gitdiff.go230
-rw-r--r--pkg/gitdiff/io.go220
-rw-r--r--pkg/gitdiff/parser.go142
-rw-r--r--pkg/gitdiff/patch_header.go470
-rw-r--r--pkg/gitdiff/patch_identity.go166
-rw-r--r--pkg/gitdiff/text.go192
13 files changed, 0 insertions, 3035 deletions
diff --git a/pkg/gitdiff/apply.go b/pkg/gitdiff/apply.go
deleted file mode 100644
index 44bbcca..0000000
--- a/pkg/gitdiff/apply.go
+++ /dev/null
@@ -1,147 +0,0 @@
-package gitdiff
-
-import (
- "errors"
- "fmt"
- "io"
- "sort"
-)
-
-// Conflict indicates an apply failed due to a conflict between the patch and
-// the source content.
-//
-// Users can test if an error was caused by a conflict by using errors.Is with
-// an empty Conflict:
-//
-// if errors.Is(err, &Conflict{}) {
-// // handle conflict
-// }
-type Conflict struct {
- msg string
-}
-
-func (c *Conflict) Error() string {
- return "conflict: " + c.msg
-}
-
-// Is implements error matching for Conflict. Passing an empty instance of
-// Conflict always returns true.
-func (c *Conflict) Is(other error) bool {
- if other, ok := other.(*Conflict); ok {
- return other.msg == "" || other.msg == c.msg
- }
- return false
-}
-
-// ApplyError wraps an error that occurs during patch application with
-// additional location information, if it is available.
-type ApplyError struct {
- // Line is the one-indexed line number in the source data
- Line int64
- // Fragment is the one-indexed fragment number in the file
- Fragment int
- // FragmentLine is the one-indexed line number in the fragment
- FragmentLine int
-
- err error
-}
-
-// Unwrap returns the wrapped error.
-func (e *ApplyError) Unwrap() error {
- return e.err
-}
-
-func (e *ApplyError) Error() string {
- return fmt.Sprintf("%v", e.err)
-}
-
-type lineNum int
-type fragNum int
-type fragLineNum int
-
-// applyError creates a new *ApplyError wrapping err or augments the information
-// in err with args if it is already an *ApplyError. Returns nil if err is nil.
-func applyError(err error, args ...interface{}) error {
- if err == nil {
- return nil
- }
-
- e, ok := err.(*ApplyError)
- if !ok {
- if err == io.EOF {
- err = io.ErrUnexpectedEOF
- }
- e = &ApplyError{err: err}
- }
- for _, arg := range args {
- switch v := arg.(type) {
- case lineNum:
- e.Line = int64(v) + 1
- case fragNum:
- e.Fragment = int(v) + 1
- case fragLineNum:
- e.FragmentLine = int(v) + 1
- }
- }
- return e
-}
-
-var (
- errApplyInProgress = errors.New("gitdiff: incompatible apply in progress")
- errApplierClosed = errors.New("gitdiff: applier is closed")
-)
-
-// Apply applies the changes in f to src, writing the result to dst. It can
-// apply both text and binary changes.
-//
-// If an error occurs while applying, Apply returns an *ApplyError that
-// annotates the error with additional information. If the error is because of
-// a conflict with the source, the wrapped error will be a *Conflict.
-func Apply(dst io.Writer, src io.ReaderAt, f *File) error {
- if f.IsBinary {
- if len(f.TextFragments) > 0 {
- return applyError(errors.New("binary file contains text fragments"))
- }
- if f.BinaryFragment == nil {
- return applyError(errors.New("binary file does not contain a binary fragment"))
- }
- } else {
- if f.BinaryFragment != nil {
- return applyError(errors.New("text file contains a binary fragment"))
- }
- }
-
- switch {
- case f.BinaryFragment != nil:
- applier := NewBinaryApplier(dst, src)
- if err := applier.ApplyFragment(f.BinaryFragment); err != nil {
- return err
- }
- return applier.Close()
-
- case len(f.TextFragments) > 0:
- frags := make([]*TextFragment, len(f.TextFragments))
- copy(frags, f.TextFragments)
-
- sort.Slice(frags, func(i, j int) bool {
- return frags[i].OldPosition < frags[j].OldPosition
- })
-
- // TODO(bkeyes): consider merging overlapping fragments
- // right now, the application fails if fragments overlap, but it should be
- // possible to precompute the result of applying them in order
-
- applier := NewTextApplier(dst, src)
- for i, frag := range frags {
- if err := applier.ApplyFragment(frag); err != nil {
- return applyError(err, fragNum(i))
- }
- }
- return applier.Close()
-
- default:
- // nothing to apply, just copy all the data
- _, err := copyFrom(dst, src, 0)
- return err
- }
-}
diff --git a/pkg/gitdiff/apply_binary.go b/pkg/gitdiff/apply_binary.go
deleted file mode 100644
index b34772d..0000000
--- a/pkg/gitdiff/apply_binary.go
+++ /dev/null
@@ -1,206 +0,0 @@
-package gitdiff
-
-import (
- "errors"
- "fmt"
- "io"
-)
-
-// BinaryApplier applies binary changes described in a fragment to source data.
-// The applier must be closed after use.
-type BinaryApplier struct {
- dst io.Writer
- src io.ReaderAt
-
- closed bool
- dirty bool
-}
-
-// NewBinaryApplier creates an BinaryApplier that reads data from src and
-// writes modified data to dst.
-func NewBinaryApplier(dst io.Writer, src io.ReaderAt) *BinaryApplier {
- a := BinaryApplier{
- dst: dst,
- src: src,
- }
- return &a
-}
-
-// ApplyFragment applies the changes in the fragment f and writes the result to
-// dst. ApplyFragment can be called at most once.
-//
-// If an error occurs while applying, ApplyFragment returns an *ApplyError that
-// annotates the error with additional information. If the error is because of
-// a conflict between a fragment and the source, the wrapped error will be a
-// *Conflict.
-func (a *BinaryApplier) ApplyFragment(f *BinaryFragment) error {
- if f == nil {
- return applyError(errors.New("nil fragment"))
- }
- if a.closed {
- return applyError(errApplierClosed)
- }
- if a.dirty {
- return applyError(errApplyInProgress)
- }
-
- // mark an apply as in progress, even if it fails before making changes
- a.dirty = true
-
- switch f.Method {
- case BinaryPatchLiteral:
- if _, err := a.dst.Write(f.Data); err != nil {
- return applyError(err)
- }
- case BinaryPatchDelta:
- if err := applyBinaryDeltaFragment(a.dst, a.src, f.Data); err != nil {
- return applyError(err)
- }
- default:
- return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method))
- }
- return nil
-}
-
-// Close writes any data following the last applied fragment and prevents
-// future calls to ApplyFragment.
-func (a *BinaryApplier) Close() (err error) {
- if a.closed {
- return nil
- }
-
- a.closed = true
- if !a.dirty {
- _, err = copyFrom(a.dst, a.src, 0)
- } else {
- // do nothing, applying a binary fragment copies all data
- }
- return err
-}
-
-func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error {
- srcSize, delta := readBinaryDeltaSize(frag)
- if err := checkBinarySrcSize(src, srcSize); err != nil {
- return err
- }
-
- dstSize, delta := readBinaryDeltaSize(delta)
-
- for len(delta) > 0 {
- op := delta[0]
- if op == 0 {
- return errors.New("invalid delta opcode 0")
- }
-
- var n int64
- var err error
- switch op & 0x80 {
- case 0x80:
- n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src)
- case 0x00:
- n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:])
- }
- if err != nil {
- return err
- }
- dstSize -= n
- }
-
- if dstSize != 0 {
- return errors.New("corrupt binary delta: insufficient or extra data")
- }
- return nil
-}
-
-// readBinaryDeltaSize reads a variable length size from a delta-encoded binary
-// fragment, returing the size and the unused data. Data is encoded as:
-//
-// [[1xxxxxxx]...] [0xxxxxxx]
-//
-// in little-endian order, with 7 bits of the value per byte.
-func readBinaryDeltaSize(d []byte) (size int64, rest []byte) {
- shift := uint(0)
- for i, b := range d {
- size |= int64(b&0x7F) << shift
- shift += 7
- if b <= 0x7F {
- return size, d[i+1:]
- }
- }
- return size, nil
-}
-
-// applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary
-// fragment, returning the amount of data written and the usused part of the
-// fragment. An add operation takes the form:
-//
-// [0xxxxxx][[data1]...]
-//
-// where the lower seven bits of the opcode is the number of data bytes
-// following the opcode. See also pack-format.txt in the Git source.
-func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) {
- size := int(op)
- if len(delta) < size {
- return 0, delta, errors.New("corrupt binary delta: incomplete add")
- }
- _, err = w.Write(delta[:size])
- return int64(size), delta[size:], err
-}
-
-// applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary
-// fragment, returing the amount of data written and the unused part of the
-// fragment. A copy operation takes the form:
-//
-// [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3]
-//
-// where the lower seven bits of the opcode determine which non-zero offset and
-// size bytes are present in little-endian order: if bit 0 is set, offset1 is
-// present, etc. If no offset or size bytes are present, offset is 0 and size
-// is 0x10000. See also pack-format.txt in the Git source.
-func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) {
- const defaultSize = 0x10000
-
- unpack := func(start, bits uint) (v int64) {
- for i := uint(0); i < bits; i++ {
- mask := byte(1 << (i + start))
- if op&mask > 0 {
- if len(delta) == 0 {
- err = errors.New("corrupt binary delta: incomplete copy")
- return
- }
- v |= int64(delta[0]) << (8 * i)
- delta = delta[1:]
- }
- }
- return
- }
-
- offset := unpack(0, 4)
- size := unpack(4, 3)
- if err != nil {
- return 0, delta, err
- }
- if size == 0 {
- size = defaultSize
- }
-
- // TODO(bkeyes): consider pooling these buffers
- b := make([]byte, size)
- if _, err := src.ReadAt(b, offset); err != nil {
- return 0, delta, err
- }
-
- _, err = w.Write(b)
- return size, delta, err
-}
-
-func checkBinarySrcSize(r io.ReaderAt, size int64) error {
- ok, err := isLen(r, size)
- if err != nil {
- return err
- }
- if !ok {
- return &Conflict{"fragment src size does not match actual src size"}
- }
- return nil
-}
diff --git a/pkg/gitdiff/apply_text.go b/pkg/gitdiff/apply_text.go
deleted file mode 100644
index 8d0accb..0000000
--- a/pkg/gitdiff/apply_text.go
+++ /dev/null
@@ -1,158 +0,0 @@
-package gitdiff
-
-import (
- "errors"
- "io"
-)
-
-// TextApplier applies changes described in text fragments to source data. If
-// changes are described in multiple fragments, those fragments must be applied
-// in order. The applier must be closed after use.
-//
-// By default, TextApplier operates in "strict" mode, where fragment content
-// and positions must exactly match those of the source.
-type TextApplier struct {
- dst io.Writer
- src io.ReaderAt
- lineSrc LineReaderAt
- nextLine int64
-
- closed bool
- dirty bool
-}
-
-// NewTextApplier creates a TextApplier that reads data from src and writes
-// modified data to dst. If src implements LineReaderAt, it is used directly.
-func NewTextApplier(dst io.Writer, src io.ReaderAt) *TextApplier {
- a := TextApplier{
- dst: dst,
- src: src,
- }
-
- if lineSrc, ok := src.(LineReaderAt); ok {
- a.lineSrc = lineSrc
- } else {
- a.lineSrc = &lineReaderAt{r: src}
- }
-
- return &a
-}
-
-// ApplyFragment applies the changes in the fragment f, writing unwritten data
-// before the start of the fragment and any changes from the fragment. If
-// multiple text fragments apply to the same content, ApplyFragment must be
-// called in order of increasing start position. As a result, each fragment can
-// be applied at most once.
-//
-// If an error occurs while applying, ApplyFragment returns an *ApplyError that
-// annotates the error with additional information. If the error is because of
-// a conflict between the fragment and the source, the wrapped error will be a
-// *Conflict.
-func (a *TextApplier) ApplyFragment(f *TextFragment) error {
- if a.closed {
- return applyError(errApplierClosed)
- }
-
- // mark an apply as in progress, even if it fails before making changes
- a.dirty = true
-
- // application code assumes fragment fields are consistent
- if err := f.Validate(); err != nil {
- return applyError(err)
- }
-
- // lines are 0-indexed, positions are 1-indexed (but new files have position = 0)
- fragStart := f.OldPosition - 1
- if fragStart < 0 {
- fragStart = 0
- }
- fragEnd := fragStart + f.OldLines
-
- start := a.nextLine
- if fragStart < start {
- return applyError(&Conflict{"fragment overlaps with an applied fragment"})
- }
-
- if f.OldPosition == 0 {
- ok, err := isLen(a.src, 0)
- if err != nil {
- return applyError(err)
- }
- if !ok {
- return applyError(&Conflict{"cannot create new file from non-empty src"})
- }
- }
-
- preimage := make([][]byte, fragEnd-start)
- n, err := a.lineSrc.ReadLinesAt(preimage, start)
- if err != nil {
- // an EOF indicates that source file is shorter than the patch expects,
- // which should be reported as a conflict rather than a generic error
- if errors.Is(err, io.EOF) {
- err = &Conflict{"src has fewer lines than required by fragment"}
- }
- return applyError(err, lineNum(start+int64(n)))
- }
-
- // copy leading data before the fragment starts
- for i, line := range preimage[:fragStart-start] {
- if _, err := a.dst.Write(line); err != nil {
- a.nextLine = start + int64(i)
- return applyError(err, lineNum(a.nextLine))
- }
- }
- preimage = preimage[fragStart-start:]
-
- // apply the changes in the fragment
- used := int64(0)
- for i, line := range f.Lines {
- if err := applyTextLine(a.dst, line, preimage, used); err != nil {
- a.nextLine = fragStart + used
- return applyError(err, lineNum(a.nextLine), fragLineNum(i))
- }
- if line.Old() {
- used++
- }
- }
- a.nextLine = fragStart + used
-
- // new position of +0,0 mean a full delete, so check for leftovers
- if f.NewPosition == 0 && f.NewLines == 0 {
- var b [1][]byte
- n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine)
- if err != nil && err != io.EOF {
- return applyError(err, lineNum(a.nextLine))
- }
- if n > 0 {
- return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine))
- }
- }
-
- return nil
-}
-
-func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) {
- if line.Old() && string(preimage[i]) != line.Line {
- return &Conflict{"fragment line does not match src line"}
- }
- if line.New() {
- _, err = io.WriteString(dst, line.Line)
- }
- return err
-}
-
-// Close writes any data following the last applied fragment and prevents
-// future calls to ApplyFragment.
-func (a *TextApplier) Close() (err error) {
- if a.closed {
- return nil
- }
-
- a.closed = true
- if !a.dirty {
- _, err = copyFrom(a.dst, a.src, 0)
- } else {
- _, err = copyLinesFrom(a.dst, a.lineSrc, a.nextLine)
- }
- return err
-}
diff --git a/pkg/gitdiff/base85.go b/pkg/gitdiff/base85.go
deleted file mode 100644
index 86db117..0000000
--- a/pkg/gitdiff/base85.go
+++ /dev/null
@@ -1,91 +0,0 @@
-package gitdiff
-
-import (
- "fmt"
-)
-
-var (
- b85Table map[byte]byte
- b85Alpha = []byte(
- "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "!#$%&()*+-;<=>?@^_`{|}~",
- )
-)
-
-func init() {
- b85Table = make(map[byte]byte)
- for i, c := range b85Alpha {
- b85Table[c] = byte(i)
- }
-}
-
-// base85Decode decodes Base85-encoded data from src into dst. It uses the
-// alphabet defined by base85.c in the Git source tree. src must contain at
-// least len(dst) bytes of encoded data.
-func base85Decode(dst, src []byte) error {
- var v uint32
- var n, ndst int
- for i, b := range src {
- if b, ok := b85Table[b]; ok {
- v = 85*v + uint32(b)
- n++
- } else {
- return fmt.Errorf("invalid base85 byte at index %d: 0x%X", i, src[i])
- }
- if n == 5 {
- rem := len(dst) - ndst
- for j := 0; j < 4 && j < rem; j++ {
- dst[ndst] = byte(v >> 24)
- ndst++
- v <<= 8
- }
- v = 0
- n = 0
- }
- }
- if n > 0 {
- return fmt.Errorf("base85 data terminated by underpadded sequence")
- }
- if ndst < len(dst) {
- return fmt.Errorf("base85 data underrun: %d < %d", ndst, len(dst))
- }
- return nil
-}
-
-// base85Encode encodes src in Base85, writing the result to dst. It uses the
-// alphabet defined by base85.c in the Git source tree.
-func base85Encode(dst, src []byte) {
- var di, si int
-
- encode := func(v uint32) {
- dst[di+0] = b85Alpha[(v/(85*85*85*85))%85]
- dst[di+1] = b85Alpha[(v/(85*85*85))%85]
- dst[di+2] = b85Alpha[(v/(85*85))%85]
- dst[di+3] = b85Alpha[(v/85)%85]
- dst[di+4] = b85Alpha[v%85]
- }
-
- n := (len(src) / 4) * 4
- for si < n {
- encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3]))
- si += 4
- di += 5
- }
-
- var v uint32
- switch len(src) - si {
- case 3:
- v |= uint32(src[si+2]) << 8
- fallthrough
- case 2:
- v |= uint32(src[si+1]) << 16
- fallthrough
- case 1:
- v |= uint32(src[si+0]) << 24
- encode(v)
- }
-}
-
-// base85Len returns the length of n bytes of Base85 encoded data.
-func base85Len(n int) int {
- return (n + 3) / 4 * 5
-}
diff --git a/pkg/gitdiff/binary.go b/pkg/gitdiff/binary.go
deleted file mode 100644
index 282e323..0000000
--- a/pkg/gitdiff/binary.go
+++ /dev/null
@@ -1,186 +0,0 @@
-package gitdiff
-
-import (
- "bytes"
- "compress/zlib"
- "fmt"
- "io"
- "io/ioutil"
- "strconv"
- "strings"
-)
-
-func (p *parser) ParseBinaryFragments(f *File) (n int, err error) {
- isBinary, hasData, err := p.ParseBinaryMarker()
- if err != nil || !isBinary {
- return 0, err
- }
-
- f.IsBinary = true
- if !hasData {
- return 0, nil
- }
-
- forward, err := p.ParseBinaryFragmentHeader()
- if err != nil {
- return 0, err
- }
- if forward == nil {
- return 0, p.Errorf(0, "missing data for binary patch")
- }
- if err := p.ParseBinaryChunk(forward); err != nil {
- return 0, err
- }
- f.BinaryFragment = forward
-
- // valid for reverse to not exist, but it must be valid if present
- reverse, err := p.ParseBinaryFragmentHeader()
- if err != nil {
- return 1, err
- }
- if reverse == nil {
- return 1, nil
- }
- if err := p.ParseBinaryChunk(reverse); err != nil {
- return 1, err
- }
- f.ReverseBinaryFragment = reverse
-
- return 1, nil
-}
-
-func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) {
- line := p.Line(0)
- switch {
- case line == "GIT binary patch\n":
- hasData = true
- case isBinaryNoDataMarker(line):
- default:
- return false, false, nil
- }
-
- if err = p.Next(); err != nil && err != io.EOF {
- return false, false, err
- }
- return true, hasData, nil
-}
-
-func isBinaryNoDataMarker(line string) bool {
- if strings.HasSuffix(line, " differ\n") {
- return strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "Files ")
- }
- return false
-}
-
-func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) {
- parts := strings.SplitN(strings.TrimSuffix(p.Line(0), "\n"), " ", 2)
- if len(parts) < 2 {
- return nil, nil
- }
-
- frag := &BinaryFragment{}
- switch parts[0] {
- case "delta":
- frag.Method = BinaryPatchDelta
- case "literal":
- frag.Method = BinaryPatchLiteral
- default:
- return nil, nil
- }
-
- var err error
- if frag.Size, err = strconv.ParseInt(parts[1], 10, 64); err != nil {
- nerr := err.(*strconv.NumError)
- return nil, p.Errorf(0, "binary patch: invalid size: %v", nerr.Err)
- }
-
- if err := p.Next(); err != nil && err != io.EOF {
- return nil, err
- }
- return frag, nil
-}
-
-func (p *parser) ParseBinaryChunk(frag *BinaryFragment) error {
- // Binary fragments are encoded as a series of base85 encoded lines. Each
- // line starts with a character in [A-Za-z] giving the number of bytes on
- // the line, where A = 1 and z = 52, and ends with a newline character.
- //
- // The base85 encoding means each line is a multiple of 5 characters + 2
- // additional characters for the length byte and the newline. The fragment
- // ends with a blank line.
- const (
- shortestValidLine = "A00000\n"
- maxBytesPerLine = 52
- )
-
- var data bytes.Buffer
- buf := make([]byte, maxBytesPerLine)
- for {
- line := p.Line(0)
- if line == "\n" {
- break
- }
- if len(line) < len(shortestValidLine) || (len(line)-2)%5 != 0 {
- return p.Errorf(0, "binary patch: corrupt data line")
- }
-
- byteCount, seq := int(line[0]), line[1:len(line)-1]
- switch {
- case 'A' <= byteCount && byteCount <= 'Z':
- byteCount = byteCount - 'A' + 1
- case 'a' <= byteCount && byteCount <= 'z':
- byteCount = byteCount - 'a' + 27
- default:
- return p.Errorf(0, "binary patch: invalid length byte")
- }
-
- // base85 encodes every 4 bytes into 5 characters, with up to 3 bytes of end padding
- maxByteCount := len(seq) / 5 * 4
- if byteCount > maxByteCount || byteCount < maxByteCount-3 {
- return p.Errorf(0, "binary patch: incorrect byte count")
- }
-
- if err := base85Decode(buf[:byteCount], []byte(seq)); err != nil {
- return p.Errorf(0, "binary patch: %v", err)
- }
- data.Write(buf[:byteCount])
-
- if err := p.Next(); err != nil {
- if err == io.EOF {
- return p.Errorf(0, "binary patch: unexpected EOF")
- }
- return err
- }
- }
-
- if err := inflateBinaryChunk(frag, &data); err != nil {
- return p.Errorf(0, "binary patch: %v", err)
- }
-
- // consume the empty line that ended the fragment
- if err := p.Next(); err != nil && err != io.EOF {
- return err
- }
- return nil
-}
-
-func inflateBinaryChunk(frag *BinaryFragment, r io.Reader) error {
- zr, err := zlib.NewReader(r)
- if err != nil {
- return err
- }
-
- data, err := ioutil.ReadAll(zr)
- if err != nil {
- return err
- }
- if err := zr.Close(); err != nil {
- return err
- }
-
- if int64(len(data)) != frag.Size {
- return fmt.Errorf("%d byte fragment inflated to %d", frag.Size, len(data))
- }
- frag.Data = data
- return nil
-}
diff --git a/pkg/gitdiff/file_header.go b/pkg/gitdiff/file_header.go
deleted file mode 100644
index 7ae4bc9..0000000
--- a/pkg/gitdiff/file_header.go
+++ /dev/null
@@ -1,546 +0,0 @@
-package gitdiff
-
-import (
- "fmt"
- "io"
- "os"
- "strconv"
- "strings"
- "time"
-)
-
-const (
- devNull = "/dev/null"
-)
-
-// ParseNextFileHeader finds and parses the next file header in the stream. If
-// a header is found, it returns a file and all input before the header. It
-// returns nil if no headers are found before the end of the input.
-func (p *parser) ParseNextFileHeader() (*File, string, error) {
- var preamble strings.Builder
- var file *File
- for {
- // check for disconnected fragment headers (corrupt patch)
- frag, err := p.ParseTextFragmentHeader()
- if err != nil {
- // not a valid header, nothing to worry about
- goto NextLine
- }
- if frag != nil {
- return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header())
- }
-
- // check for a git-generated patch
- file, err = p.ParseGitFileHeader()
- if err != nil {
- return nil, "", err
- }
- if file != nil {
- return file, preamble.String(), nil
- }
-
- // check for a "traditional" patch
- file, err = p.ParseTraditionalFileHeader()
- if err != nil {
- return nil, "", err
- }
- if file != nil {
- return file, preamble.String(), nil
- }
-
- NextLine:
- preamble.WriteString(p.Line(0))
- if err := p.Next(); err != nil {
- if err == io.EOF {
- break
- }
- return nil, "", err
- }
- }
- return nil, preamble.String(), nil
-}
-
-func (p *parser) ParseGitFileHeader() (*File, error) {
- const prefix = "diff --git "
-
- if !strings.HasPrefix(p.Line(0), prefix) {
- return nil, nil
- }
- header := p.Line(0)[len(prefix):]
-
- defaultName, err := parseGitHeaderName(header)
- if err != nil {
- return nil, p.Errorf(0, "git file header: %v", err)
- }
-
- f := &File{}
- for {
- end, err := parseGitHeaderData(f, p.Line(1), defaultName)
- if err != nil {
- return nil, p.Errorf(1, "git file header: %v", err)
- }
-
- if err := p.Next(); err != nil {
- if err == io.EOF {
- break
- }
- return nil, err
- }
-
- if end {
- break
- }
- }
-
- if f.OldName == "" && f.NewName == "" {
- if defaultName == "" {
- return nil, p.Errorf(0, "git file header: missing filename information")
- }
- f.OldName = defaultName
- f.NewName = defaultName
- }
-
- if (f.NewName == "" && !f.IsDelete) || (f.OldName == "" && !f.IsNew) {
- return nil, p.Errorf(0, "git file header: missing filename information")
- }
-
- return f, nil
-}
-
-func (p *parser) ParseTraditionalFileHeader() (*File, error) {
- const shortestValidFragHeader = "@@ -1 +1 @@\n"
- const (
- oldPrefix = "--- "
- newPrefix = "+++ "
- )
-
- oldLine, newLine := p.Line(0), p.Line(1)
-
- if !strings.HasPrefix(oldLine, oldPrefix) || !strings.HasPrefix(newLine, newPrefix) {
- return nil, nil
- }
- // heuristic: only a file header if followed by a (probable) fragment header
- if len(p.Line(2)) < len(shortestValidFragHeader) || !strings.HasPrefix(p.Line(2), "@@ -") {
- return nil, nil
- }
-
- // advance past the first two lines so parser is after the header
- // no EOF check needed because we know there are >=3 valid lines
- if err := p.Next(); err != nil {
- return nil, err
- }
- if err := p.Next(); err != nil {
- return nil, err
- }
-
- oldName, _, err := parseName(oldLine[len(oldPrefix):], '\t', 0)
- if err != nil {
- return nil, p.Errorf(0, "file header: %v", err)
- }
-
- newName, _, err := parseName(newLine[len(newPrefix):], '\t', 0)
- if err != nil {
- return nil, p.Errorf(1, "file header: %v", err)
- }
-
- f := &File{}
- switch {
- case oldName == devNull || hasEpochTimestamp(oldLine):
- f.IsNew = true
- f.NewName = newName
- case newName == devNull || hasEpochTimestamp(newLine):
- f.IsDelete = true
- f.OldName = oldName
- default:
- // if old name is a prefix of new name, use that instead
- // this avoids picking variants like "file.bak" or "file~"
- if strings.HasPrefix(newName, oldName) {
- f.OldName = oldName
- f.NewName = oldName
- } else {
- f.OldName = newName
- f.NewName = newName
- }
- }
-
- return f, nil
-}
-
-// parseGitHeaderName extracts a default file name from the Git file header
-// line. This is required for mode-only changes and creation/deletion of empty
-// files. Other types of patch include the file name(s) in the header data.
-// If the names in the header do not match because the patch is a rename,
-// return an empty default name.
-func parseGitHeaderName(header string) (string, error) {
- header = strings.TrimSuffix(header, "\n")
- if len(header) == 0 {
- return "", nil
- }
-
- var err error
- var first, second string
-
- // there are 4 cases to account for:
- //
- // 1) unquoted unquoted
- // 2) unquoted "quoted"
- // 3) "quoted" unquoted
- // 4) "quoted" "quoted"
- //
- quote := strings.IndexByte(header, '"')
- switch {
- case quote < 0:
- // case 1
- first = header
-
- case quote > 0:
- // case 2
- first = header[:quote-1]
- if !isSpace(header[quote-1]) {
- return "", fmt.Errorf("missing separator")
- }
-
- second, _, err = parseQuotedName(header[quote:])
- if err != nil {
- return "", err
- }
-
- case quote == 0:
- // case 3 or case 4
- var n int
- first, n, err = parseQuotedName(header)
- if err != nil {
- return "", err
- }
-
- // git accepts multiple spaces after a quoted name, but not after an
- // unquoted name, since the name might end with one or more spaces
- for n < len(header) && isSpace(header[n]) {
- n++
- }
- if n == len(header) {
- return "", nil
- }
-
- if header[n] == '"' {
- second, _, err = parseQuotedName(header[n:])
- if err != nil {
- return "", err
- }
- } else {
- second = header[n:]
- }
- }
-
- first = trimTreePrefix(first, 1)
- if second != "" {
- if first == trimTreePrefix(second, 1) {
- return first, nil
- }
- return "", nil
- }
-
- // at this point, both names are unquoted (case 1)
- // since names may contain spaces, we can't use a known separator
- // instead, look for a split that produces two equal names
-
- for i := 0; i < len(first)-1; i++ {
- if !isSpace(first[i]) {
- continue
- }
- second = trimTreePrefix(first[i+1:], 1)
- if name := first[:i]; name == second {
- return name, nil
- }
- }
- return "", nil
-}
-
-// parseGitHeaderData parses a single line of metadata from a Git file header.
-// It returns true when header parsing is complete; in that case, line was the
-// first line of non-header content.
-func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) {
- if len(line) > 0 && line[len(line)-1] == '\n' {
- line = line[:len(line)-1]
- }
-
- for _, hdr := range []struct {
- prefix string
- end bool
- parse func(*File, string, string) error
- }{
- {"@@ -", true, nil},
- {"--- ", false, parseGitHeaderOldName},
- {"+++ ", false, parseGitHeaderNewName},
- {"old mode ", false, parseGitHeaderOldMode},
- {"new mode ", false, parseGitHeaderNewMode},
- {"deleted file mode ", false, parseGitHeaderDeletedMode},
- {"new file mode ", false, parseGitHeaderCreatedMode},
- {"copy from ", false, parseGitHeaderCopyFrom},
- {"copy to ", false, parseGitHeaderCopyTo},
- {"rename old ", false, parseGitHeaderRenameFrom},
- {"rename new ", false, parseGitHeaderRenameTo},
- {"rename from ", false, parseGitHeaderRenameFrom},
- {"rename to ", false, parseGitHeaderRenameTo},
- {"similarity index ", false, parseGitHeaderScore},
- {"dissimilarity index ", false, parseGitHeaderScore},
- {"index ", false, parseGitHeaderIndex},
- } {
- if strings.HasPrefix(line, hdr.prefix) {
- if hdr.parse != nil {
- err = hdr.parse(f, line[len(hdr.prefix):], defaultName)
- }
- return hdr.end, err
- }
- }
-
- // unknown line indicates the end of the header
- // this usually happens if the diff is empty
- return true, nil
-}
-
-func parseGitHeaderOldName(f *File, line, defaultName string) error {
- name, _, err := parseName(line, '\t', 1)
- if err != nil {
- return err
- }
- if f.OldName == "" && !f.IsNew {
- f.OldName = name
- return nil
- }
- return verifyGitHeaderName(name, f.OldName, f.IsNew, "old")
-}
-
-func parseGitHeaderNewName(f *File, line, defaultName string) error {
- name, _, err := parseName(line, '\t', 1)
- if err != nil {
- return err
- }
- if f.NewName == "" && !f.IsDelete {
- f.NewName = name
- return nil
- }
- return verifyGitHeaderName(name, f.NewName, f.IsDelete, "new")
-}
-
-func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) {
- f.OldMode, err = parseMode(strings.TrimSpace(line))
- return
-}
-
-func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) {
- f.NewMode, err = parseMode(strings.TrimSpace(line))
- return
-}
-
-func parseGitHeaderDeletedMode(f *File, line, defaultName string) error {
- f.IsDelete = true
- f.OldName = defaultName
- return parseGitHeaderOldMode(f, line, defaultName)
-}
-
-func parseGitHeaderCreatedMode(f *File, line, defaultName string) error {
- f.IsNew = true
- f.NewName = defaultName
- return parseGitHeaderNewMode(f, line, defaultName)
-}
-
-func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) {
- f.IsCopy = true
- f.OldName, _, err = parseName(line, 0, 0)
- return
-}
-
-func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) {
- f.IsCopy = true
- f.NewName, _, err = parseName(line, 0, 0)
- return
-}
-
-func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) {
- f.IsRename = true
- f.OldName, _, err = parseName(line, 0, 0)
- return
-}
-
-func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) {
- f.IsRename = true
- f.NewName, _, err = parseName(line, 0, 0)
- return
-}
-
-func parseGitHeaderScore(f *File, line, defaultName string) error {
- score, err := strconv.ParseInt(strings.TrimSuffix(line, "%"), 10, 32)
- if err != nil {
- nerr := err.(*strconv.NumError)
- return fmt.Errorf("invalid score line: %v", nerr.Err)
- }
- if score <= 100 {
- f.Score = int(score)
- }
- return nil
-}
-
-func parseGitHeaderIndex(f *File, line, defaultName string) error {
- const sep = ".."
-
- // note that git stops parsing if the OIDs are too long to be valid
- // checking this requires knowing if the repository uses SHA1 or SHA256
- // hashes, which we don't know, so we just skip that check
-
- parts := strings.SplitN(line, " ", 2)
- oids := strings.SplitN(parts[0], sep, 2)
-
- if len(oids) < 2 {
- return fmt.Errorf("invalid index line: missing %q", sep)
- }
- f.OldOIDPrefix, f.NewOIDPrefix = oids[0], oids[1]
-
- if len(parts) > 1 {
- return parseGitHeaderOldMode(f, parts[1], defaultName)
- }
- return nil
-}
-
-func parseMode(s string) (os.FileMode, error) {
- mode, err := strconv.ParseInt(s, 8, 32)
- if err != nil {
- nerr := err.(*strconv.NumError)
- return os.FileMode(0), fmt.Errorf("invalid mode line: %v", nerr.Err)
- }
- return os.FileMode(mode), nil
-}
-
-// parseName extracts a file name from the start of a string and returns the
-// name and the index of the first character after the name. If the name is
-// unquoted and term is non-zero, parsing stops at the first occurrence of
-// term.
-//
-// If the name is exactly "/dev/null", no further processing occurs. Otherwise,
-// if dropPrefix is greater than zero, that number of prefix components
-// separated by forward slashes are dropped from the name and any duplicate
-// slashes are collapsed.
-func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) {
- if len(s) > 0 && s[0] == '"' {
- name, n, err = parseQuotedName(s)
- } else {
- name, n, err = parseUnquotedName(s, term)
- }
- if err != nil {
- return "", 0, err
- }
- if name == devNull {
- return name, n, nil
- }
- return cleanName(name, dropPrefix), n, nil
-}
-
-func parseQuotedName(s string) (name string, n int, err error) {
- for n = 1; n < len(s); n++ {
- if s[n] == '"' && s[n-1] != '\\' {
- n++
- break
- }
- }
- if n == 2 {
- return "", 0, fmt.Errorf("missing name")
- }
- if name, err = strconv.Unquote(s[:n]); err != nil {
- return "", 0, err
- }
- return name, n, err
-}
-
-func parseUnquotedName(s string, term byte) (name string, n int, err error) {
- for n = 0; n < len(s); n++ {
- if s[n] == '\n' {
- break
- }
- if term > 0 && s[n] == term {
- break
- }
- }
- if n == 0 {
- return "", 0, fmt.Errorf("missing name")
- }
- return s[:n], n, nil
-}
-
-// verifyGitHeaderName checks a parsed name against state set by previous lines
-func verifyGitHeaderName(parsed, existing string, isNull bool, side string) error {
- if existing != "" {
- if isNull {
- return fmt.Errorf("expected %s, but filename is set to %s", devNull, existing)
- }
- if existing != parsed {
- return fmt.Errorf("inconsistent %s filename", side)
- }
- }
- if isNull && parsed != devNull {
- return fmt.Errorf("expected %s", devNull)
- }
- return nil
-}
-
-// cleanName removes double slashes and drops prefix segments.
-func cleanName(name string, drop int) string {
- var b strings.Builder
- for i := 0; i < len(name); i++ {
- if name[i] == '/' {
- if i < len(name)-1 && name[i+1] == '/' {
- continue
- }
- if drop > 0 {
- drop--
- b.Reset()
- continue
- }
- }
- b.WriteByte(name[i])
- }
- return b.String()
-}
-
-// trimTreePrefix removes up to n leading directory components from name.
-func trimTreePrefix(name string, n int) string {
- i := 0
- for ; i < len(name) && n > 0; i++ {
- if name[i] == '/' {
- n--
- }
- }
- return name[i:]
-}
-
-// hasEpochTimestamp returns true if the string ends with a POSIX-formatted
-// timestamp for the UNIX epoch after a tab character. According to git, this
-// is used by GNU diff to mark creations and deletions.
-func hasEpochTimestamp(s string) bool {
- const posixTimeLayout = "2006-01-02 15:04:05.9 -0700"
-
- start := strings.IndexRune(s, '\t')
- if start < 0 {
- return false
- }
-
- ts := strings.TrimSuffix(s[start+1:], "\n")
-
- // a valid timestamp can have optional ':' in zone specifier
- // remove that if it exists so we have a single format
- if len(ts) >= 3 && ts[len(ts)-3] == ':' {
- ts = ts[:len(ts)-3] + ts[len(ts)-2:]
- }
-
- t, err := time.Parse(posixTimeLayout, ts)
- if err != nil {
- return false
- }
- if !t.Equal(time.Unix(0, 0)) {
- return false
- }
- return true
-}
-
-func isSpace(c byte) bool {
- return c == ' ' || c == '\t' || c == '\n'
-}
diff --git a/pkg/gitdiff/format.go b/pkg/gitdiff/format.go
deleted file mode 100644
index d97aba9..0000000
--- a/pkg/gitdiff/format.go
+++ /dev/null
@@ -1,281 +0,0 @@
-package gitdiff
-
-import (
- "bytes"
- "compress/zlib"
- "fmt"
- "io"
- "strconv"
-)
-
-type formatter struct {
- w io.Writer
- err error
-}
-
-func newFormatter(w io.Writer) *formatter {
- return &formatter{w: w}
-}
-
-func (fm *formatter) Write(p []byte) (int, error) {
- if fm.err != nil {
- return len(p), nil
- }
- if _, err := fm.w.Write(p); err != nil {
- fm.err = err
- }
- return len(p), nil
-}
-
-func (fm *formatter) WriteString(s string) (int, error) {
- fm.Write([]byte(s))
- return len(s), nil
-}
-
-func (fm *formatter) WriteByte(c byte) error {
- fm.Write([]byte{c})
- return nil
-}
-
-func (fm *formatter) WriteQuotedName(s string) {
- qpos := 0
- for i := 0; i < len(s); i++ {
- ch := s[i]
- if q, quoted := quoteByte(ch); quoted {
- if qpos == 0 {
- fm.WriteByte('"')
- }
- fm.WriteString(s[qpos:i])
- fm.Write(q)
- qpos = i + 1
- }
- }
- fm.WriteString(s[qpos:])
- if qpos > 0 {
- fm.WriteByte('"')
- }
-}
-
-var quoteEscapeTable = map[byte]byte{
- '\a': 'a',
- '\b': 'b',
- '\t': 't',
- '\n': 'n',
- '\v': 'v',
- '\f': 'f',
- '\r': 'r',
- '"': '"',
- '\\': '\\',
-}
-
-func quoteByte(b byte) ([]byte, bool) {
- if q, ok := quoteEscapeTable[b]; ok {
- return []byte{'\\', q}, true
- }
- if b < 0x20 || b >= 0x7F {
- return []byte{
- '\\',
- '0' + (b>>6)&0o3,
- '0' + (b>>3)&0o7,
- '0' + (b>>0)&0o7,
- }, true
- }
- return nil, false
-}
-
-func (fm *formatter) FormatFile(f *File) {
- fm.WriteString("diff --git ")
-
- var aName, bName string
- switch {
- case f.OldName == "":
- aName = f.NewName
- bName = f.NewName
-
- case f.NewName == "":
- aName = f.OldName
- bName = f.OldName
-
- default:
- aName = f.OldName
- bName = f.NewName
- }
-
- fm.WriteQuotedName("a/" + aName)
- fm.WriteByte(' ')
- fm.WriteQuotedName("b/" + bName)
- fm.WriteByte('\n')
-
- if f.OldMode != 0 {
- if f.IsDelete {
- fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode)
- } else if f.NewMode != 0 {
- fmt.Fprintf(fm, "old mode %o\n", f.OldMode)
- }
- }
-
- if f.NewMode != 0 {
- if f.IsNew {
- fmt.Fprintf(fm, "new file mode %o\n", f.NewMode)
- } else if f.OldMode != 0 {
- fmt.Fprintf(fm, "new mode %o\n", f.NewMode)
- }
- }
-
- if f.Score > 0 {
- if f.IsCopy || f.IsRename {
- fmt.Fprintf(fm, "similarity index %d%%\n", f.Score)
- } else {
- fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score)
- }
- }
-
- if f.IsCopy {
- if f.OldName != "" {
- fm.WriteString("copy from ")
- fm.WriteQuotedName(f.OldName)
- fm.WriteByte('\n')
- }
- if f.NewName != "" {
- fm.WriteString("copy to ")
- fm.WriteQuotedName(f.NewName)
- fm.WriteByte('\n')
- }
- }
-
- if f.IsRename {
- if f.OldName != "" {
- fm.WriteString("rename from ")
- fm.WriteQuotedName(f.OldName)
- fm.WriteByte('\n')
- }
- if f.NewName != "" {
- fm.WriteString("rename to ")
- fm.WriteQuotedName(f.NewName)
- fm.WriteByte('\n')
- }
- }
-
- if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" {
- fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix)
-
- // Mode is only included on the index line when it is not changing
- if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) {
- fmt.Fprintf(fm, " %o", f.OldMode)
- }
-
- fm.WriteByte('\n')
- }
-
- if f.IsBinary {
- if f.BinaryFragment == nil {
- fm.WriteString("Binary files ")
- fm.WriteQuotedName("a/" + aName)
- fm.WriteString(" and ")
- fm.WriteQuotedName("b/" + bName)
- fm.WriteString(" differ\n")
- } else {
- fm.WriteString("GIT binary patch\n")
- fm.FormatBinaryFragment(f.BinaryFragment)
- if f.ReverseBinaryFragment != nil {
- fm.FormatBinaryFragment(f.ReverseBinaryFragment)
- }
- }
- }
-
- // The "---" and "+++" lines only appear for text patches with fragments
- if len(f.TextFragments) > 0 {
- fm.WriteString("--- ")
- if f.OldName == "" {
- fm.WriteString("/dev/null")
- } else {
- fm.WriteQuotedName("a/" + f.OldName)
- }
- fm.WriteByte('\n')
-
- fm.WriteString("+++ ")
- if f.NewName == "" {
- fm.WriteString("/dev/null")
- } else {
- fm.WriteQuotedName("b/" + f.NewName)
- }
- fm.WriteByte('\n')
-
- for _, frag := range f.TextFragments {
- fm.FormatTextFragment(frag)
- }
- }
-}
-
-func (fm *formatter) FormatTextFragment(f *TextFragment) {
- fm.FormatTextFragmentHeader(f)
- fm.WriteByte('\n')
-
- for _, line := range f.Lines {
- fm.WriteString(line.Op.String())
- fm.WriteString(line.Line)
- if line.NoEOL() {
- fm.WriteString("\n\\ No newline at end of file\n")
- }
- }
-}
-
-func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) {
- fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines)
- if f.Comment != "" {
- fm.WriteByte(' ')
- fm.WriteString(f.Comment)
- }
-}
-
-func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) {
- const (
- maxBytesPerLine = 52
- )
-
- switch f.Method {
- case BinaryPatchDelta:
- fm.WriteString("delta ")
- case BinaryPatchLiteral:
- fm.WriteString("literal ")
- }
- fm.Write(strconv.AppendInt(nil, f.Size, 10))
- fm.WriteByte('\n')
-
- data := deflateBinaryChunk(f.Data)
- n := (len(data) / maxBytesPerLine) * maxBytesPerLine
-
- buf := make([]byte, base85Len(maxBytesPerLine))
- for i := 0; i < n; i += maxBytesPerLine {
- base85Encode(buf, data[i:i+maxBytesPerLine])
- fm.WriteByte('z')
- fm.Write(buf)
- fm.WriteByte('\n')
- }
- if remainder := len(data) - n; remainder > 0 {
- buf = buf[0:base85Len(remainder)]
-
- sizeChar := byte(remainder)
- if remainder <= 26 {
- sizeChar = 'A' + sizeChar - 1
- } else {
- sizeChar = 'a' + sizeChar - 27
- }
-
- base85Encode(buf, data[n:])
- fm.WriteByte(sizeChar)
- fm.Write(buf)
- fm.WriteByte('\n')
- }
- fm.WriteByte('\n')
-}
-
-func deflateBinaryChunk(data []byte) []byte {
- var b bytes.Buffer
-
- zw := zlib.NewWriter(&b)
- _, _ = zw.Write(data)
- _ = zw.Close()
-
- return b.Bytes()
-}
diff --git a/pkg/gitdiff/gitdiff.go b/pkg/gitdiff/gitdiff.go
deleted file mode 100644
index 5365645..0000000
--- a/pkg/gitdiff/gitdiff.go
+++ /dev/null
@@ -1,230 +0,0 @@
-package gitdiff
-
-import (
- "errors"
- "fmt"
- "os"
- "strings"
-)
-
-// File describes changes to a single file. It can be either a text file or a
-// binary file.
-type File struct {
- OldName string
- NewName string
-
- IsNew bool
- IsDelete bool
- IsCopy bool
- IsRename bool
-
- OldMode os.FileMode
- NewMode os.FileMode
-
- OldOIDPrefix string
- NewOIDPrefix string
- Score int
-
- // TextFragments contains the fragments describing changes to a text file. It
- // may be empty if the file is empty or if only the mode changes.
- TextFragments []*TextFragment
-
- // IsBinary is true if the file is a binary file. If the patch includes
- // binary data, BinaryFragment will be non-nil and describe the changes to
- // the data. If the patch is reversible, ReverseBinaryFragment will also be
- // non-nil and describe the changes needed to restore the original file
- // after applying the changes in BinaryFragment.
- IsBinary bool
- BinaryFragment *BinaryFragment
- ReverseBinaryFragment *BinaryFragment
-}
-
-// String returns a git diff representation of this file. The value can be
-// parsed by this library to obtain the same File, but may not be the same as
-// the original input.
-func (f *File) String() string {
- var diff strings.Builder
- newFormatter(&diff).FormatFile(f)
- return diff.String()
-}
-
-// TextFragment describes changed lines starting at a specific line in a text file.
-type TextFragment struct {
- Comment string
-
- OldPosition int64
- OldLines int64
-
- NewPosition int64
- NewLines int64
-
- LinesAdded int64
- LinesDeleted int64
-
- LeadingContext int64
- TrailingContext int64
-
- Lines []Line
-}
-
-// String returns a git diff format of this fragment. See [File.String] for
-// more details on this format.
-func (f *TextFragment) String() string {
- var diff strings.Builder
- newFormatter(&diff).FormatTextFragment(f)
- return diff.String()
-}
-
-// Header returns a git diff header of this fragment. See [File.String] for
-// more details on this format.
-func (f *TextFragment) Header() string {
- var hdr strings.Builder
- newFormatter(&hdr).FormatTextFragmentHeader(f)
- return hdr.String()
-}
-
-// Validate checks that the fragment is self-consistent and appliable. Validate
-// returns an error if and only if the fragment is invalid.
-func (f *TextFragment) Validate() error {
- if f == nil {
- return errors.New("nil fragment")
- }
-
- var (
- oldLines, newLines int64
- leadingContext, trailingContext int64
- contextLines, addedLines, deletedLines int64
- )
-
- // count the types of lines in the fragment content
- for i, line := range f.Lines {
- switch line.Op {
- case OpContext:
- oldLines++
- newLines++
- contextLines++
- if addedLines == 0 && deletedLines == 0 {
- leadingContext++
- } else {
- trailingContext++
- }
- case OpAdd:
- newLines++
- addedLines++
- trailingContext = 0
- case OpDelete:
- oldLines++
- deletedLines++
- trailingContext = 0
- default:
- return fmt.Errorf("unknown operator %q on line %d", line.Op, i+1)
- }
- }
-
- // check the actual counts against the reported counts
- if oldLines != f.OldLines {
- return lineCountErr("old", oldLines, f.OldLines)
- }
- if newLines != f.NewLines {
- return lineCountErr("new", newLines, f.NewLines)
- }
- if leadingContext != f.LeadingContext {
- return lineCountErr("leading context", leadingContext, f.LeadingContext)
- }
- if trailingContext != f.TrailingContext {
- return lineCountErr("trailing context", trailingContext, f.TrailingContext)
- }
- if addedLines != f.LinesAdded {
- return lineCountErr("added", addedLines, f.LinesAdded)
- }
- if deletedLines != f.LinesDeleted {
- return lineCountErr("deleted", deletedLines, f.LinesDeleted)
- }
-
- // if a file is being created, it can only contain additions
- if f.OldPosition == 0 && f.OldLines != 0 {
- return errors.New("file creation fragment contains context or deletion lines")
- }
-
- return nil
-}
-
-func lineCountErr(kind string, actual, reported int64) error {
- return fmt.Errorf("fragment contains %d %s lines but reports %d", actual, kind, reported)
-}
-
-// Line is a line in a text fragment.
-type Line struct {
- Op LineOp
- Line string
-}
-
-func (fl Line) String() string {
- return fl.Op.String() + fl.Line
-}
-
-// Old returns true if the line appears in the old content of the fragment.
-func (fl Line) Old() bool {
- return fl.Op == OpContext || fl.Op == OpDelete
-}
-
-// New returns true if the line appears in the new content of the fragment.
-func (fl Line) New() bool {
- return fl.Op == OpContext || fl.Op == OpAdd
-}
-
-// NoEOL returns true if the line is missing a trailing newline character.
-func (fl Line) NoEOL() bool {
- return len(fl.Line) == 0 || fl.Line[len(fl.Line)-1] != '\n'
-}
-
-// LineOp describes the type of a text fragment line: context, added, or removed.
-type LineOp int
-
-const (
- // OpContext indicates a context line
- OpContext LineOp = iota
- // OpDelete indicates a deleted line
- OpDelete
- // OpAdd indicates an added line
- OpAdd
-)
-
-func (op LineOp) String() string {
- switch op {
- case OpContext:
- return " "
- case OpDelete:
- return "-"
- case OpAdd:
- return "+"
- }
- return "?"
-}
-
-// BinaryFragment describes changes to a binary file.
-type BinaryFragment struct {
- Method BinaryPatchMethod
- Size int64
- Data []byte
-}
-
-// BinaryPatchMethod is the method used to create and apply the binary patch.
-type BinaryPatchMethod int
-
-const (
- // BinaryPatchDelta indicates the data uses Git's packfile encoding
- BinaryPatchDelta BinaryPatchMethod = iota
- // BinaryPatchLiteral indicates the data is the exact file content
- BinaryPatchLiteral
-)
-
-// String returns a git diff format of this fragment. Due to differences in
-// zlib implementation between Go and Git, encoded binary data in the result
-// will likely differ from what Git produces for the same input. See
-// [File.String] for more details on this format.
-func (f *BinaryFragment) String() string {
- var diff strings.Builder
- newFormatter(&diff).FormatBinaryFragment(f)
- return diff.String()
-}
diff --git a/pkg/gitdiff/io.go b/pkg/gitdiff/io.go
deleted file mode 100644
index 8143238..0000000
--- a/pkg/gitdiff/io.go
+++ /dev/null
@@ -1,220 +0,0 @@
-package gitdiff
-
-import (
- "errors"
- "io"
-)
-
-const (
- byteBufferSize = 32 * 1024 // from io.Copy
- lineBufferSize = 32
- indexBufferSize = 1024
-)
-
-// LineReaderAt is the interface that wraps the ReadLinesAt method.
-//
-// ReadLinesAt reads len(lines) into lines starting at line offset. It returns
-// the number of lines read (0 <= n <= len(lines)) and any error encountered.
-// Line numbers are zero-indexed.
-//
-// If n < len(lines), ReadLinesAt returns a non-nil error explaining why more
-// lines were not returned.
-//
-// Lines read by ReadLinesAt include the newline character. The last line does
-// not have a final newline character if the input ends without one.
-type LineReaderAt interface {
- ReadLinesAt(lines [][]byte, offset int64) (n int, err error)
-}
-
-type lineReaderAt struct {
- r io.ReaderAt
- index []int64
- eof bool
-}
-
-func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err error) {
- if offset < 0 {
- return 0, errors.New("ReadLinesAt: negative offset")
- }
- if len(lines) == 0 {
- return 0, nil
- }
-
- count := len(lines)
- startLine := offset
- endLine := startLine + int64(count)
-
- if endLine > int64(len(r.index)) && !r.eof {
- if err := r.indexTo(endLine); err != nil {
- return 0, err
- }
- }
- if startLine >= int64(len(r.index)) {
- return 0, io.EOF
- }
-
- buf, byteOffset, err := r.readBytes(startLine, int64(count))
- if err != nil {
- return 0, err
- }
-
- for n = 0; n < count && startLine+int64(n) < int64(len(r.index)); n++ {
- lineno := startLine + int64(n)
- start, end := int64(0), r.index[lineno]-byteOffset
- if lineno > 0 {
- start = r.index[lineno-1] - byteOffset
- }
- lines[n] = buf[start:end]
- }
-
- if n < count {
- return n, io.EOF
- }
- return n, nil
-}
-
-// indexTo reads data and computes the line index until there is information
-// for line or a read returns io.EOF. It returns an error if and only if there
-// is an error reading data.
-func (r *lineReaderAt) indexTo(line int64) error {
- var buf [indexBufferSize]byte
-
- offset := r.lastOffset()
- for int64(len(r.index)) < line {
- n, err := r.r.ReadAt(buf[:], offset)
- if err != nil && err != io.EOF {
- return err
- }
- for _, b := range buf[:n] {
- offset++
- if b == '\n' {
- r.index = append(r.index, offset)
- }
- }
- if err == io.EOF {
- if offset > r.lastOffset() {
- r.index = append(r.index, offset)
- }
- r.eof = true
- break
- }
- }
- return nil
-}
-
-func (r *lineReaderAt) lastOffset() int64 {
- if n := len(r.index); n > 0 {
- return r.index[n-1]
- }
- return 0
-}
-
-// readBytes reads the bytes of the n lines starting at line and returns the
-// bytes and the offset of the first byte in the underlying source.
-func (r *lineReaderAt) readBytes(line, n int64) (b []byte, offset int64, err error) {
- indexLen := int64(len(r.index))
-
- var size int64
- if line > indexLen {
- offset = r.index[indexLen-1]
- } else if line > 0 {
- offset = r.index[line-1]
- }
- if n > 0 {
- if line+n > indexLen {
- size = r.index[indexLen-1] - offset
- } else {
- size = r.index[line+n-1] - offset
- }
- }
-
- b = make([]byte, size)
- if _, err := r.r.ReadAt(b, offset); err != nil {
- if err == io.EOF {
- err = errors.New("ReadLinesAt: corrupt line index or changed source data")
- }
- return nil, 0, err
- }
- return b, offset, nil
-}
-
-func isLen(r io.ReaderAt, n int64) (bool, error) {
- off := n - 1
- if off < 0 {
- off = 0
- }
-
- var b [2]byte
- nr, err := r.ReadAt(b[:], off)
- if err == io.EOF {
- return (n == 0 && nr == 0) || (n > 0 && nr == 1), nil
- }
- return false, err
-}
-
-// copyFrom writes bytes starting from offset off in src to dst stopping at the
-// end of src or at the first error. copyFrom returns the number of bytes
-// written and any error.
-func copyFrom(dst io.Writer, src io.ReaderAt, off int64) (written int64, err error) {
- buf := make([]byte, byteBufferSize)
- for {
- nr, rerr := src.ReadAt(buf, off)
- if nr > 0 {
- nw, werr := dst.Write(buf[0:nr])
- if nw > 0 {
- written += int64(nw)
- }
- if werr != nil {
- err = werr
- break
- }
- if nr != nw {
- err = io.ErrShortWrite
- break
- }
- off += int64(nr)
- }
- if rerr != nil {
- if rerr != io.EOF {
- err = rerr
- }
- break
- }
- }
- return written, err
-}
-
-// copyLinesFrom writes lines starting from line off in src to dst stopping at
-// the end of src or at the first error. copyLinesFrom returns the number of
-// lines written and any error.
-func copyLinesFrom(dst io.Writer, src LineReaderAt, off int64) (written int64, err error) {
- buf := make([][]byte, lineBufferSize)
-ReadLoop:
- for {
- nr, rerr := src.ReadLinesAt(buf, off)
- if nr > 0 {
- for _, line := range buf[0:nr] {
- nw, werr := dst.Write(line)
- if nw > 0 {
- written++
- }
- if werr != nil {
- err = werr
- break ReadLoop
- }
- if len(line) != nw {
- err = io.ErrShortWrite
- break ReadLoop
- }
- }
- off += int64(nr)
- }
- if rerr != nil {
- if rerr != io.EOF {
- err = rerr
- }
- break
- }
- }
- return written, err
-}
diff --git a/pkg/gitdiff/parser.go b/pkg/gitdiff/parser.go
deleted file mode 100644
index e8f8430..0000000
--- a/pkg/gitdiff/parser.go
+++ /dev/null
@@ -1,142 +0,0 @@
-// Package gitdiff parses and applies patches generated by Git. It supports
-// line-oriented text patches, binary patches, and can also parse standard
-// unified diffs generated by other tools.
-package gitdiff
-
-import (
- "bufio"
- "fmt"
- "io"
-)
-
-// Parse parses a patch with changes to one or more files. Any content before
-// the first file is returned as the second value. If an error occurs while
-// parsing, it returns all files parsed before the error.
-//
-// Parse expects to receive a single patch. If the input may contain multiple
-// patches (for example, if it is an mbox file), callers should split it into
-// individual patches and call Parse on each one.
-func Parse(r io.Reader) ([]*File, string, error) {
- p := newParser(r)
-
- if err := p.Next(); err != nil {
- if err == io.EOF {
- return nil, "", nil
- }
- return nil, "", err
- }
-
- var preamble string
- var files []*File
- for {
- file, pre, err := p.ParseNextFileHeader()
- if err != nil {
- return files, preamble, err
- }
- if len(files) == 0 {
- preamble = pre
- }
- if file == nil {
- break
- }
-
- for _, fn := range []func(*File) (int, error){
- p.ParseTextFragments,
- p.ParseBinaryFragments,
- } {
- n, err := fn(file)
- if err != nil {
- return files, preamble, err
- }
- if n > 0 {
- break
- }
- }
-
- files = append(files, file)
- }
-
- return files, preamble, nil
-}
-
-// TODO(bkeyes): consider exporting the parser type with configuration
-// this would enable OID validation, p-value guessing, and prefix stripping
-// by allowing users to set or override defaults
-
-// parser invariants:
-// - methods that parse objects:
-// - start with the parser on the first line of the first object
-// - if returning nil, do not advance
-// - if returning an error, do not advance past the object
-// - if returning an object, advance to the first line after the object
-// - any exported parsing methods must initialize the parser by calling Next()
-
-type stringReader interface {
- ReadString(delim byte) (string, error)
-}
-
-type parser struct {
- r stringReader
-
- eof bool
- lineno int64
- lines [3]string
-}
-
-func newParser(r io.Reader) *parser {
- if r, ok := r.(stringReader); ok {
- return &parser{r: r}
- }
- return &parser{r: bufio.NewReader(r)}
-}
-
-// Next advances the parser by one line. It returns any error encountered while
-// reading the line, including io.EOF when the end of stream is reached.
-func (p *parser) Next() error {
- if p.eof {
- return io.EOF
- }
-
- if p.lineno == 0 {
- // on first call to next, need to shift in all lines
- for i := 0; i < len(p.lines)-1; i++ {
- if err := p.shiftLines(); err != nil && err != io.EOF {
- return err
- }
- }
- }
-
- err := p.shiftLines()
- if err != nil && err != io.EOF {
- return err
- }
-
- p.lineno++
- if p.lines[0] == "" {
- p.eof = true
- return io.EOF
- }
- return nil
-}
-
-func (p *parser) shiftLines() (err error) {
- for i := 0; i < len(p.lines)-1; i++ {
- p.lines[i] = p.lines[i+1]
- }
- p.lines[len(p.lines)-1], err = p.r.ReadString('\n')
- return
-}
-
-// Line returns a line from the parser without advancing it. A delta of 0
-// returns the current line, while higher deltas return read-ahead lines. It
-// returns an empty string if the delta is higher than the available lines,
-// either because of the buffer size or because the parser reached the end of
-// the input. Valid lines always contain at least a newline character.
-func (p *parser) Line(delta uint) string {
- return p.lines[delta]
-}
-
-// Errorf generates an error and appends the current line information.
-func (p *parser) Errorf(delta int64, msg string, args ...interface{}) error {
- return fmt.Errorf("gitdiff: line %d: %s", p.lineno+delta, fmt.Sprintf(msg, args...))
-}
diff --git a/pkg/gitdiff/patch_header.go b/pkg/gitdiff/patch_header.go
deleted file mode 100644
index f047059..0000000
--- a/pkg/gitdiff/patch_header.go
+++ /dev/null
@@ -1,470 +0,0 @@
-package gitdiff
-
-import (
- "bufio"
- "errors"
- "fmt"
- "io"
- "io/ioutil"
- "mime/quotedprintable"
- "net/mail"
- "strconv"
- "strings"
- "time"
- "unicode"
-)
-
-const (
- mailHeaderPrefix = "From "
- prettyHeaderPrefix = "commit "
- mailMinimumHeaderPrefix = "From:"
-)
-
-// PatchHeader is a parsed version of the preamble content that appears before
-// the first diff in a patch. It includes metadata about the patch, such as the
-// author and a subject.
-type PatchHeader struct {
- // The SHA of the commit the patch was generated from. Empty if the SHA is
- // not included in the header.
- SHA string
-
- // The author details of the patch. If these details are not included in
- // the header, Author is nil and AuthorDate is the zero time.
- Author *PatchIdentity
- AuthorDate time.Time
-
- // The committer details of the patch. If these details are not included in
- // the header, Committer is nil and CommitterDate is the zero time.
- Committer *PatchIdentity
- CommitterDate time.Time
-
- // The title and body of the commit message describing the changes in the
- // patch. Empty if no message is included in the header.
- Title string
- Body string
-
- // If the preamble looks like an email, ParsePatchHeader will
- // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the
- // Title and place them here.
- SubjectPrefix string
-
- // If the preamble looks like an email, and it contains a `---`
- // line, that line will be removed and everything after it will be
- // placed in BodyAppendix.
- BodyAppendix string
-}
-
-// Message returns the commit message for the header. The message consists of
-// the title and the body separated by an empty line.
-func (h *PatchHeader) Message() string {
- var msg strings.Builder
- if h != nil {
- msg.WriteString(h.Title)
- if h.Body != "" {
- msg.WriteString("\n\n")
- msg.WriteString(h.Body)
- }
- }
- return msg.String()
-}
-
-// ParsePatchDate parses a patch date string. It returns the parsed time or an
-// error if s has an unknown format. ParsePatchDate supports the iso, rfc,
-// short, raw, unix, and default formats (with local variants) used by the
-// --date flag in Git.
-func ParsePatchDate(s string) (time.Time, error) {
- const (
- isoFormat = "2006-01-02 15:04:05 -0700"
- isoStrictFormat = "2006-01-02T15:04:05-07:00"
- rfc2822Format = "Mon, 2 Jan 2006 15:04:05 -0700"
- shortFormat = "2006-01-02"
- defaultFormat = "Mon Jan 2 15:04:05 2006 -0700"
- defaultLocalFormat = "Mon Jan 2 15:04:05 2006"
- )
-
- if s == "" {
- return time.Time{}, nil
- }
-
- for _, fmt := range []string{
- isoFormat,
- isoStrictFormat,
- rfc2822Format,
- shortFormat,
- defaultFormat,
- defaultLocalFormat,
- } {
- if t, err := time.ParseInLocation(fmt, s, time.Local); err == nil {
- return t, nil
- }
- }
-
- // unix format
- if unix, err := strconv.ParseInt(s, 10, 64); err == nil {
- return time.Unix(unix, 0), nil
- }
-
- // raw format
- if space := strings.IndexByte(s, ' '); space > 0 {
- unix, uerr := strconv.ParseInt(s[:space], 10, 64)
- zone, zerr := time.Parse("-0700", s[space+1:])
- if uerr == nil && zerr == nil {
- return time.Unix(unix, 0).In(zone.Location()), nil
- }
- }
-
- return time.Time{}, fmt.Errorf("unknown date format: %s", s)
-}
-
-// A PatchHeaderOption modifies the behavior of ParsePatchHeader.
-type PatchHeaderOption func(*patchHeaderOptions)
-
-// SubjectCleanMode controls how ParsePatchHeader cleans subject lines when
-// parsing mail-formatted patches.
-type SubjectCleanMode int
-
-const (
- // SubjectCleanWhitespace removes leading and trailing whitespace.
- SubjectCleanWhitespace SubjectCleanMode = iota
-
- // SubjectCleanAll removes leading and trailing whitespace, leading "Re:",
- // "re:", and ":" strings, and leading strings enclosed by '[' and ']'.
- // This is the default behavior of git (see `git mailinfo`) and this
- // package.
- SubjectCleanAll
-
- // SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes
- // leading strings enclosed by '[' and ']' if they start with "PATCH".
- SubjectCleanPatchOnly
-)
-
-// WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By
-// default, uses SubjectCleanAll.
-func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption {
- return func(opts *patchHeaderOptions) {
- opts.subjectCleanMode = m
- }
-}
-
-type patchHeaderOptions struct {
- subjectCleanMode SubjectCleanMode
-}
-
-// ParsePatchHeader parses the preamble string returned by [Parse] into a
-// PatchHeader. Due to the variety of header formats, some fields of the parsed
-// PatchHeader may be unset after parsing.
-//
-// Supported formats are the short, medium, full, fuller, and email pretty
-// formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox
-// format used by `git format-patch`.
-//
-// When parsing mail-formatted headers, ParsePatchHeader tries to remove
-// email-specific content from the title and body:
-//
-// - Based on the SubjectCleanMode, remove prefixes like reply markers and
-// "[PATCH]" strings from the subject, saving any removed content in the
-// SubjectPrefix field. Parsing always discards leading and trailing
-// whitespace from the subject line. The default mode is SubjectCleanAll.
-//
-// - If the body contains a "---" line (3 hyphens), remove that line and any
-// content after it from the body and save it in the BodyAppendix field.
-//
-// ParsePatchHeader tries to process content it does not understand wthout
-// returning errors, but will return errors if well-identified content like
-// dates or identies uses unknown or invalid formats.
-func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) {
- opts := patchHeaderOptions{
- subjectCleanMode: SubjectCleanAll, // match git defaults
- }
- for _, optFn := range options {
- optFn(&opts)
- }
-
- header = strings.TrimSpace(header)
- if header == "" {
- return &PatchHeader{}, nil
- }
-
- var firstLine, rest string
- if idx := strings.IndexByte(header, '\n'); idx >= 0 {
- firstLine = header[:idx]
- rest = header[idx+1:]
- } else {
- firstLine = header
- rest = ""
- }
-
- switch {
- case strings.HasPrefix(firstLine, mailHeaderPrefix):
- return parseHeaderMail(firstLine, strings.NewReader(rest), opts)
-
- case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix):
- // With a minimum header, the first line is part of the actual mail
- // content and needs to be parsed as part of the "rest"
- return parseHeaderMail("", strings.NewReader(header), opts)
-
- case strings.HasPrefix(firstLine, prettyHeaderPrefix):
- return parseHeaderPretty(firstLine, strings.NewReader(rest))
- }
-
- return nil, errors.New("unrecognized patch header format")
-}
-
-func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) {
- const (
- authorPrefix = "Author:"
- commitPrefix = "Commit:"
- datePrefix = "Date:"
- authorDatePrefix = "AuthorDate:"
- commitDatePrefix = "CommitDate:"
- )
-
- h := &PatchHeader{}
-
- prettyLine = strings.TrimPrefix(prettyLine, prettyHeaderPrefix)
- if i := strings.IndexByte(prettyLine, ' '); i > 0 {
- h.SHA = prettyLine[:i]
- } else {
- h.SHA = prettyLine
- }
-
- s := bufio.NewScanner(r)
- for s.Scan() {
- line := s.Text()
-
- // empty line marks end of fields, remaining lines are title/message
- if strings.TrimSpace(line) == "" {
- break
- }
-
- switch {
- case strings.HasPrefix(line, authorPrefix):
- u, err := ParsePatchIdentity(line[len(authorPrefix):])
- if err != nil {
- return nil, err
- }
- h.Author = &u
-
- case strings.HasPrefix(line, commitPrefix):
- u, err := ParsePatchIdentity(line[len(commitPrefix):])
- if err != nil {
- return nil, err
- }
- h.Committer = &u
-
- case strings.HasPrefix(line, datePrefix):
- d, err := ParsePatchDate(strings.TrimSpace(line[len(datePrefix):]))
- if err != nil {
- return nil, err
- }
- h.AuthorDate = d
-
- case strings.HasPrefix(line, authorDatePrefix):
- d, err := ParsePatchDate(strings.TrimSpace(line[len(authorDatePrefix):]))
- if err != nil {
- return nil, err
- }
- h.AuthorDate = d
-
- case strings.HasPrefix(line, commitDatePrefix):
- d, err := ParsePatchDate(strings.TrimSpace(line[len(commitDatePrefix):]))
- if err != nil {
- return nil, err
- }
- h.CommitterDate = d
- }
- }
- if s.Err() != nil {
- return nil, s.Err()
- }
-
- title, indent := scanMessageTitle(s)
- if s.Err() != nil {
- return nil, s.Err()
- }
- h.Title = title
-
- if title != "" {
- // Don't check for an appendix, pretty headers do not contain them
- body, _ := scanMessageBody(s, indent, false)
- if s.Err() != nil {
- return nil, s.Err()
- }
- h.Body = body
- }
-
- return h, nil
-}
-
-func scanMessageTitle(s *bufio.Scanner) (title string, indent string) {
- var b strings.Builder
- for i := 0; s.Scan(); i++ {
- line := s.Text()
- trimLine := strings.TrimSpace(line)
- if trimLine == "" {
- break
- }
-
- if i == 0 {
- if start := strings.IndexFunc(line, func(c rune) bool { return !unicode.IsSpace(c) }); start > 0 {
- indent = line[:start]
- }
- }
- if b.Len() > 0 {
- b.WriteByte(' ')
- }
- b.WriteString(trimLine)
- }
- return b.String(), indent
-}
-
-func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) {
- // Body and appendix
- var body, appendix strings.Builder
- c := &body
- var empty int
- for i := 0; s.Scan(); i++ {
- line := s.Text()
-
- line = strings.TrimRightFunc(line, unicode.IsSpace)
- line = strings.TrimPrefix(line, indent)
-
- if line == "" {
- empty++
- continue
- }
-
- // If requested, parse out "appendix" information (often added
- // by `git format-patch` and removed by `git am`).
- if separateAppendix && c == &body && line == "---" {
- c = &appendix
- continue
- }
-
- if c.Len() > 0 {
- c.WriteByte('\n')
- if empty > 0 {
- c.WriteByte('\n')
- }
- }
- empty = 0
-
- c.WriteString(line)
- }
- return body.String(), appendix.String()
-}
-
-func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) {
- msg, err := mail.ReadMessage(r)
- if err != nil {
- return nil, err
- }
-
- h := &PatchHeader{}
-
- if strings.HasPrefix(mailLine, mailHeaderPrefix) {
- mailLine = strings.TrimPrefix(mailLine, mailHeaderPrefix)
- if i := strings.IndexByte(mailLine, ' '); i > 0 {
- h.SHA = mailLine[:i]
- }
- }
-
- from := msg.Header.Get("From")
- if from != "" {
- u, err := ParsePatchIdentity(from)
- if err != nil {
- return nil, err
- }
- h.Author = &u
- }
-
- date := msg.Header.Get("Date")
- if date != "" {
- d, err := ParsePatchDate(date)
- if err != nil {
- return nil, err
- }
- h.AuthorDate = d
- }
-
- subject := msg.Header.Get("Subject")
- h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode)
-
- s := bufio.NewScanner(msg.Body)
- h.Body, h.BodyAppendix = scanMessageBody(s, "", true)
- if s.Err() != nil {
- return nil, s.Err()
- }
-
- return h, nil
-}
-
-func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) {
- switch mode {
- case SubjectCleanAll, SubjectCleanPatchOnly:
- case SubjectCleanWhitespace:
- return "", strings.TrimSpace(decodeSubject(s))
- default:
- panic(fmt.Sprintf("unknown clean mode: %d", mode))
- }
-
- // Based on the algorithm from Git in mailinfo.c:cleanup_subject()
- // If compatibility with `git am` drifts, go there to see if there are any updates.
-
- at := 0
- for at < len(s) {
- switch s[at] {
- case 'r', 'R':
- // Detect re:, Re:, rE: and RE:
- if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' {
- at += 3
- continue
- }
-
- case ' ', '\t', ':':
- // Delete whitespace and duplicate ':' characters
- at++
- continue
-
- case '[':
- if i := strings.IndexByte(s[at:], ']'); i > 0 {
- if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") {
- at += i + 1
- continue
- }
- }
- }
-
- // Nothing was removed, end processing
- break
- }
-
- prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace)
- subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace)
- return
-}
-
-// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result
-// of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji).
-// See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject
-func decodeSubject(encoded string) string {
- if !strings.HasPrefix(encoded, "=?UTF-8?q?") {
- // not UTF-8 encoded
- return encoded
- }
-
- // If the subject is too long, `git format-patch` may produce a subject line across
- // multiple lines. When parsed, this can look like the following:
- // <UTF8-prefix><first-line> <UTF8-prefix><second-line>
- payload := " " + encoded
- payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "")
- payload = strings.ReplaceAll(payload, "?=", "")
-
- decoded, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(payload)))
- if err != nil {
- // if err, abort decoding and return original subject
- return encoded
- }
-
- return string(decoded)
-}
diff --git a/pkg/gitdiff/patch_identity.go b/pkg/gitdiff/patch_identity.go
deleted file mode 100644
index 018f80c..0000000
--- a/pkg/gitdiff/patch_identity.go
+++ /dev/null
@@ -1,166 +0,0 @@
-package gitdiff
-
-import (
- "fmt"
- "strings"
-)
-
-// PatchIdentity identifies a person who authored or committed a patch.
-type PatchIdentity struct {
- Name string
- Email string
-}
-
-func (i PatchIdentity) String() string {
- name := i.Name
- if name == "" {
- name = `""`
- }
- return fmt.Sprintf("%s <%s>", name, i.Email)
-}
-
-// ParsePatchIdentity parses a patch identity string. A patch identity contains
-// an email address and an optional name in [RFC 5322] format. This is either a
-// plain email adddress or a name followed by an address in angle brackets:
-//
-// author@example.com
-// Author Name <author@example.com>
-//
-// If the input is not one of these formats, ParsePatchIdentity applies a
-// heuristic to separate the name and email portions. If both the name and
-// email are missing or empty, ParsePatchIdentity returns an error. It
-// otherwise does not validate the result.
-//
-// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322
-func ParsePatchIdentity(s string) (PatchIdentity, error) {
- s = normalizeSpace(s)
- s = unquotePairs(s)
-
- var name, email string
- if at := strings.IndexByte(s, '@'); at >= 0 {
- start, end := at, at
- for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' {
- start--
- }
- for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' {
- end++
- }
- email = s[start+1 : end]
-
- // Adjust the boundaries so that we drop angle brackets, but keep
- // spaces when removing the email to form the name.
- if start < 0 || s[start] != '<' {
- start++
- }
- if end >= len(s) || s[end] != '>' {
- end--
- }
- name = s[:start] + s[end+1:]
- } else {
- start, end := 0, 0
- for i := 0; i < len(s); i++ {
- if s[i] == '<' && start == 0 {
- start = i + 1
- }
- if s[i] == '>' && start > 0 {
- end = i
- break
- }
- }
- if start > 0 && end >= start {
- email = strings.TrimSpace(s[start:end])
- name = s[:start-1]
- }
- }
-
- // After extracting the email, the name might contain extra whitespace
- // again and may be surrounded by comment characters. The git source gives
- // these examples of when this can happen:
- //
- // "Name <email@domain>"
- // "email@domain (Name)"
- // "Name <email@domain> (Comment)"
- //
- name = normalizeSpace(name)
- if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") {
- name = name[1 : len(name)-1]
- }
- name = strings.TrimSpace(name)
-
- // If the name is empty or contains email-like characters, use the email
- // instead (assuming one exists)
- if name == "" || strings.ContainsAny(name, "@<>") {
- name = email
- }
-
- if name == "" && email == "" {
- return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s)
- }
- return PatchIdentity{Name: name, Email: email}, nil
-}
-
-// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to
-// remove any "quoted-pairs" (backslash-espaced characters). It also removes
-// the quotes from any quoted strings, but leaves the comment delimiters.
-func unquotePairs(s string) string {
- quote := false
- comments := 0
- escaped := false
-
- var out strings.Builder
- for i := 0; i < len(s); i++ {
- if escaped {
- escaped = false
- } else {
- switch s[i] {
- case '\\':
- // quoted-pair is only allowed in quoted-string/comment
- if quote || comments > 0 {
- escaped = true
- continue // drop '\' character
- }
-
- case '"':
- if comments == 0 {
- quote = !quote
- continue // drop '"' character
- }
-
- case '(':
- if !quote {
- comments++
- }
- case ')':
- if comments > 0 {
- comments--
- }
- }
- }
- out.WriteByte(s[i])
- }
- return out.String()
-}
-
-// normalizeSpace trims leading and trailing whitespace from s and converts
-// inner sequences of one or more whitespace characters to single spaces.
-func normalizeSpace(s string) string {
- var sb strings.Builder
- for i := 0; i < len(s); i++ {
- c := s[i]
- if !isRFC5332Space(c) {
- if sb.Len() > 0 && isRFC5332Space(s[i-1]) {
- sb.WriteByte(' ')
- }
- sb.WriteByte(c)
- }
- }
- return sb.String()
-}
-
-func isRFC5332Space(c byte) bool {
- switch c {
- case '\t', '\n', '\r', ' ':
- return true
- }
- return false
-}
diff --git a/pkg/gitdiff/text.go b/pkg/gitdiff/text.go
deleted file mode 100644
index ee30792..0000000
--- a/pkg/gitdiff/text.go
+++ /dev/null
@@ -1,192 +0,0 @@
-package gitdiff
-
-import (
- "fmt"
- "io"
- "strconv"
- "strings"
-)
-
-// ParseTextFragments parses text fragments until the next file header or the
-// end of the stream and attaches them to the given file. It returns the number
-// of fragments that were added.
-func (p *parser) ParseTextFragments(f *File) (n int, err error) {
- for {
- frag, err := p.ParseTextFragmentHeader()
- if err != nil {
- return n, err
- }
- if frag == nil {
- return n, nil
- }
-
- if f.IsNew && frag.OldLines > 0 {
- return n, p.Errorf(-1, "new file depends on old contents")
- }
- if f.IsDelete && frag.NewLines > 0 {
- return n, p.Errorf(-1, "deleted file still has contents")
- }
-
- if err := p.ParseTextChunk(frag); err != nil {
- return n, err
- }
-
- f.TextFragments = append(f.TextFragments, frag)
- n++
- }
-}
-
-func (p *parser) ParseTextFragmentHeader() (*TextFragment, error) {
- const (
- startMark = "@@ -"
- endMark = " @@"
- )
-
- if !strings.HasPrefix(p.Line(0), startMark) {
- return nil, nil
- }
-
- parts := strings.SplitAfterN(p.Line(0), endMark, 2)
- if len(parts) < 2 {
- return nil, p.Errorf(0, "invalid fragment header")
- }
-
- f := &TextFragment{}
- f.Comment = strings.TrimSpace(parts[1])
-
- header := parts[0][len(startMark) : len(parts[0])-len(endMark)]
- ranges := strings.Split(header, " +")
- if len(ranges) != 2 {
- return nil, p.Errorf(0, "invalid fragment header")
- }
-
- var err error
- if f.OldPosition, f.OldLines, err = parseRange(ranges[0]); err != nil {
- return nil, p.Errorf(0, "invalid fragment header: %v", err)
- }
- if f.NewPosition, f.NewLines, err = parseRange(ranges[1]); err != nil {
- return nil, p.Errorf(0, "invalid fragment header: %v", err)
- }
-
- if err := p.Next(); err != nil && err != io.EOF {
- return nil, err
- }
- return f, nil
-}
-
-func (p *parser) ParseTextChunk(frag *TextFragment) error {
- if p.Line(0) == "" {
- return p.Errorf(0, "no content following fragment header")
- }
-
- oldLines, newLines := frag.OldLines, frag.NewLines
- for oldLines > 0 || newLines > 0 {
- line := p.Line(0)
- op, data := line[0], line[1:]
-
- switch op {
- case '\n':
- data = "\n"
- fallthrough // newer GNU diff versions create empty context lines
- case ' ':
- oldLines--
- newLines--
- if frag.LinesAdded == 0 && frag.LinesDeleted == 0 {
- frag.LeadingContext++
- } else {
- frag.TrailingContext++
- }
- frag.Lines = append(frag.Lines, Line{OpContext, data})
- case '-':
- oldLines--
- frag.LinesDeleted++
- frag.TrailingContext = 0
- frag.Lines = append(frag.Lines, Line{OpDelete, data})
- case '+':
- newLines--
- frag.LinesAdded++
- frag.TrailingContext = 0
- frag.Lines = append(frag.Lines, Line{OpAdd, data})
- case '\\':
- // this may appear in middle of fragment if it's for a deleted line
- if isNoNewlineMarker(line) {
- removeLastNewline(frag)
- break
- }
- fallthrough
- default:
- // TODO(bkeyes): if this is because we hit the next header, it
- // would be helpful to return the miscounts line error. We could
- // either test for the common headers ("@@ -", "diff --git") or
- // assume any invalid op ends the fragment; git returns the same
- // generic error in all cases so either is compatible
- return p.Errorf(0, "invalid line operation: %q", op)
- }
-
- if err := p.Next(); err != nil {
- if err == io.EOF {
- break
- }
- return err
- }
- }
-
- if oldLines != 0 || newLines != 0 {
- hdr := max(frag.OldLines-oldLines, frag.NewLines-newLines) + 1
- return p.Errorf(-hdr, "fragment header miscounts lines: %+d old, %+d new", -oldLines, -newLines)
- }
- if frag.LinesAdded == 0 && frag.LinesDeleted == 0 {
- return p.Errorf(0, "fragment contains no changes")
- }
-
- // check for a final "no newline" marker since it is not included in the
- // counters used to stop the loop above
- if isNoNewlineMarker(p.Line(0)) {
- removeLastNewline(frag)
- if err := p.Next(); err != nil && err != io.EOF {
- return err
- }
- }
-
- return nil
-}
-
-func isNoNewlineMarker(s string) bool {
- // test for "\ No newline at end of file" by prefix because the text
- // changes by locale (git claims all versions are at least 12 chars)
- return len(s) >= 12 && s[:2] == "\\ "
-}
-
-func removeLastNewline(frag *TextFragment) {
- if len(frag.Lines) > 0 {
- last := &frag.Lines[len(frag.Lines)-1]
- last.Line = strings.TrimSuffix(last.Line, "\n")
- }
-}
-
-func parseRange(s string) (start int64, end int64, err error) {
- parts := strings.SplitN(s, ",", 2)
-
- if start, err = strconv.ParseInt(parts[0], 10, 64); err != nil {
- nerr := err.(*strconv.NumError)
- return 0, 0, fmt.Errorf("bad start of range: %s: %v", parts[0], nerr.Err)
- }
-
- if len(parts) > 1 {
- if end, err = strconv.ParseInt(parts[1], 10, 64); err != nil {
- nerr := err.(*strconv.NumError)
- return 0, 0, fmt.Errorf("bad end of range: %s: %v", parts[1], nerr.Err)
- }
- } else {
- end = 1
- }
-
- return
-}
-
-func max(a, b int64) int64 {
- if a > b {
- return a
- }
- return b
-}