diff options
Diffstat (limited to 'pkg/gitdiff')
| -rw-r--r-- | pkg/gitdiff/apply.go | 147 | ||||
| -rw-r--r-- | pkg/gitdiff/apply_binary.go | 206 | ||||
| -rw-r--r-- | pkg/gitdiff/apply_text.go | 158 | ||||
| -rw-r--r-- | pkg/gitdiff/base85.go | 91 | ||||
| -rw-r--r-- | pkg/gitdiff/binary.go | 186 | ||||
| -rw-r--r-- | pkg/gitdiff/file_header.go | 546 | ||||
| -rw-r--r-- | pkg/gitdiff/format.go | 281 | ||||
| -rw-r--r-- | pkg/gitdiff/gitdiff.go | 230 | ||||
| -rw-r--r-- | pkg/gitdiff/io.go | 220 | ||||
| -rw-r--r-- | pkg/gitdiff/parser.go | 142 | ||||
| -rw-r--r-- | pkg/gitdiff/patch_header.go | 470 | ||||
| -rw-r--r-- | pkg/gitdiff/patch_identity.go | 166 | ||||
| -rw-r--r-- | pkg/gitdiff/text.go | 192 |
13 files changed, 0 insertions, 3035 deletions
diff --git a/pkg/gitdiff/apply.go b/pkg/gitdiff/apply.go deleted file mode 100644 index 44bbcca..0000000 --- a/pkg/gitdiff/apply.go +++ /dev/null @@ -1,147 +0,0 @@ -package gitdiff - -import ( - "errors" - "fmt" - "io" - "sort" -) - -// Conflict indicates an apply failed due to a conflict between the patch and -// the source content. -// -// Users can test if an error was caused by a conflict by using errors.Is with -// an empty Conflict: -// -// if errors.Is(err, &Conflict{}) { -// // handle conflict -// } -type Conflict struct { - msg string -} - -func (c *Conflict) Error() string { - return "conflict: " + c.msg -} - -// Is implements error matching for Conflict. Passing an empty instance of -// Conflict always returns true. -func (c *Conflict) Is(other error) bool { - if other, ok := other.(*Conflict); ok { - return other.msg == "" || other.msg == c.msg - } - return false -} - -// ApplyError wraps an error that occurs during patch application with -// additional location information, if it is available. -type ApplyError struct { - // Line is the one-indexed line number in the source data - Line int64 - // Fragment is the one-indexed fragment number in the file - Fragment int - // FragmentLine is the one-indexed line number in the fragment - FragmentLine int - - err error -} - -// Unwrap returns the wrapped error. -func (e *ApplyError) Unwrap() error { - return e.err -} - -func (e *ApplyError) Error() string { - return fmt.Sprintf("%v", e.err) -} - -type lineNum int -type fragNum int -type fragLineNum int - -// applyError creates a new *ApplyError wrapping err or augments the information -// in err with args if it is already an *ApplyError. Returns nil if err is nil. -func applyError(err error, args ...interface{}) error { - if err == nil { - return nil - } - - e, ok := err.(*ApplyError) - if !ok { - if err == io.EOF { - err = io.ErrUnexpectedEOF - } - e = &ApplyError{err: err} - } - for _, arg := range args { - switch v := arg.(type) { - case lineNum: - e.Line = int64(v) + 1 - case fragNum: - e.Fragment = int(v) + 1 - case fragLineNum: - e.FragmentLine = int(v) + 1 - } - } - return e -} - -var ( - errApplyInProgress = errors.New("gitdiff: incompatible apply in progress") - errApplierClosed = errors.New("gitdiff: applier is closed") -) - -// Apply applies the changes in f to src, writing the result to dst. It can -// apply both text and binary changes. -// -// If an error occurs while applying, Apply returns an *ApplyError that -// annotates the error with additional information. If the error is because of -// a conflict with the source, the wrapped error will be a *Conflict. -func Apply(dst io.Writer, src io.ReaderAt, f *File) error { - if f.IsBinary { - if len(f.TextFragments) > 0 { - return applyError(errors.New("binary file contains text fragments")) - } - if f.BinaryFragment == nil { - return applyError(errors.New("binary file does not contain a binary fragment")) - } - } else { - if f.BinaryFragment != nil { - return applyError(errors.New("text file contains a binary fragment")) - } - } - - switch { - case f.BinaryFragment != nil: - applier := NewBinaryApplier(dst, src) - if err := applier.ApplyFragment(f.BinaryFragment); err != nil { - return err - } - return applier.Close() - - case len(f.TextFragments) > 0: - frags := make([]*TextFragment, len(f.TextFragments)) - copy(frags, f.TextFragments) - - sort.Slice(frags, func(i, j int) bool { - return frags[i].OldPosition < frags[j].OldPosition - }) - - // TODO(bkeyes): consider merging overlapping fragments - // right now, the application fails if fragments overlap, but it should be - // possible to precompute the result of applying them in order - - applier := NewTextApplier(dst, src) - for i, frag := range frags { - if err := applier.ApplyFragment(frag); err != nil { - return applyError(err, fragNum(i)) - } - } - return applier.Close() - - default: - // nothing to apply, just copy all the data - _, err := copyFrom(dst, src, 0) - return err - } -} diff --git a/pkg/gitdiff/apply_binary.go b/pkg/gitdiff/apply_binary.go deleted file mode 100644 index b34772d..0000000 --- a/pkg/gitdiff/apply_binary.go +++ /dev/null @@ -1,206 +0,0 @@ -package gitdiff - -import ( - "errors" - "fmt" - "io" -) - -// BinaryApplier applies binary changes described in a fragment to source data. -// The applier must be closed after use. -type BinaryApplier struct { - dst io.Writer - src io.ReaderAt - - closed bool - dirty bool -} - -// NewBinaryApplier creates an BinaryApplier that reads data from src and -// writes modified data to dst. -func NewBinaryApplier(dst io.Writer, src io.ReaderAt) *BinaryApplier { - a := BinaryApplier{ - dst: dst, - src: src, - } - return &a -} - -// ApplyFragment applies the changes in the fragment f and writes the result to -// dst. ApplyFragment can be called at most once. -// -// If an error occurs while applying, ApplyFragment returns an *ApplyError that -// annotates the error with additional information. If the error is because of -// a conflict between a fragment and the source, the wrapped error will be a -// *Conflict. -func (a *BinaryApplier) ApplyFragment(f *BinaryFragment) error { - if f == nil { - return applyError(errors.New("nil fragment")) - } - if a.closed { - return applyError(errApplierClosed) - } - if a.dirty { - return applyError(errApplyInProgress) - } - - // mark an apply as in progress, even if it fails before making changes - a.dirty = true - - switch f.Method { - case BinaryPatchLiteral: - if _, err := a.dst.Write(f.Data); err != nil { - return applyError(err) - } - case BinaryPatchDelta: - if err := applyBinaryDeltaFragment(a.dst, a.src, f.Data); err != nil { - return applyError(err) - } - default: - return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method)) - } - return nil -} - -// Close writes any data following the last applied fragment and prevents -// future calls to ApplyFragment. -func (a *BinaryApplier) Close() (err error) { - if a.closed { - return nil - } - - a.closed = true - if !a.dirty { - _, err = copyFrom(a.dst, a.src, 0) - } else { - // do nothing, applying a binary fragment copies all data - } - return err -} - -func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error { - srcSize, delta := readBinaryDeltaSize(frag) - if err := checkBinarySrcSize(src, srcSize); err != nil { - return err - } - - dstSize, delta := readBinaryDeltaSize(delta) - - for len(delta) > 0 { - op := delta[0] - if op == 0 { - return errors.New("invalid delta opcode 0") - } - - var n int64 - var err error - switch op & 0x80 { - case 0x80: - n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src) - case 0x00: - n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:]) - } - if err != nil { - return err - } - dstSize -= n - } - - if dstSize != 0 { - return errors.New("corrupt binary delta: insufficient or extra data") - } - return nil -} - -// readBinaryDeltaSize reads a variable length size from a delta-encoded binary -// fragment, returing the size and the unused data. Data is encoded as: -// -// [[1xxxxxxx]...] [0xxxxxxx] -// -// in little-endian order, with 7 bits of the value per byte. -func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { - shift := uint(0) - for i, b := range d { - size |= int64(b&0x7F) << shift - shift += 7 - if b <= 0x7F { - return size, d[i+1:] - } - } - return size, nil -} - -// applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary -// fragment, returning the amount of data written and the usused part of the -// fragment. An add operation takes the form: -// -// [0xxxxxx][[data1]...] -// -// where the lower seven bits of the opcode is the number of data bytes -// following the opcode. See also pack-format.txt in the Git source. -func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) { - size := int(op) - if len(delta) < size { - return 0, delta, errors.New("corrupt binary delta: incomplete add") - } - _, err = w.Write(delta[:size]) - return int64(size), delta[size:], err -} - -// applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary -// fragment, returing the amount of data written and the unused part of the -// fragment. A copy operation takes the form: -// -// [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] -// -// where the lower seven bits of the opcode determine which non-zero offset and -// size bytes are present in little-endian order: if bit 0 is set, offset1 is -// present, etc. If no offset or size bytes are present, offset is 0 and size -// is 0x10000. See also pack-format.txt in the Git source. -func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) { - const defaultSize = 0x10000 - - unpack := func(start, bits uint) (v int64) { - for i := uint(0); i < bits; i++ { - mask := byte(1 << (i + start)) - if op&mask > 0 { - if len(delta) == 0 { - err = errors.New("corrupt binary delta: incomplete copy") - return - } - v |= int64(delta[0]) << (8 * i) - delta = delta[1:] - } - } - return - } - - offset := unpack(0, 4) - size := unpack(4, 3) - if err != nil { - return 0, delta, err - } - if size == 0 { - size = defaultSize - } - - // TODO(bkeyes): consider pooling these buffers - b := make([]byte, size) - if _, err := src.ReadAt(b, offset); err != nil { - return 0, delta, err - } - - _, err = w.Write(b) - return size, delta, err -} - -func checkBinarySrcSize(r io.ReaderAt, size int64) error { - ok, err := isLen(r, size) - if err != nil { - return err - } - if !ok { - return &Conflict{"fragment src size does not match actual src size"} - } - return nil -} diff --git a/pkg/gitdiff/apply_text.go b/pkg/gitdiff/apply_text.go deleted file mode 100644 index 8d0accb..0000000 --- a/pkg/gitdiff/apply_text.go +++ /dev/null @@ -1,158 +0,0 @@ -package gitdiff - -import ( - "errors" - "io" -) - -// TextApplier applies changes described in text fragments to source data. If -// changes are described in multiple fragments, those fragments must be applied -// in order. The applier must be closed after use. -// -// By default, TextApplier operates in "strict" mode, where fragment content -// and positions must exactly match those of the source. -type TextApplier struct { - dst io.Writer - src io.ReaderAt - lineSrc LineReaderAt - nextLine int64 - - closed bool - dirty bool -} - -// NewTextApplier creates a TextApplier that reads data from src and writes -// modified data to dst. If src implements LineReaderAt, it is used directly. -func NewTextApplier(dst io.Writer, src io.ReaderAt) *TextApplier { - a := TextApplier{ - dst: dst, - src: src, - } - - if lineSrc, ok := src.(LineReaderAt); ok { - a.lineSrc = lineSrc - } else { - a.lineSrc = &lineReaderAt{r: src} - } - - return &a -} - -// ApplyFragment applies the changes in the fragment f, writing unwritten data -// before the start of the fragment and any changes from the fragment. If -// multiple text fragments apply to the same content, ApplyFragment must be -// called in order of increasing start position. As a result, each fragment can -// be applied at most once. -// -// If an error occurs while applying, ApplyFragment returns an *ApplyError that -// annotates the error with additional information. If the error is because of -// a conflict between the fragment and the source, the wrapped error will be a -// *Conflict. -func (a *TextApplier) ApplyFragment(f *TextFragment) error { - if a.closed { - return applyError(errApplierClosed) - } - - // mark an apply as in progress, even if it fails before making changes - a.dirty = true - - // application code assumes fragment fields are consistent - if err := f.Validate(); err != nil { - return applyError(err) - } - - // lines are 0-indexed, positions are 1-indexed (but new files have position = 0) - fragStart := f.OldPosition - 1 - if fragStart < 0 { - fragStart = 0 - } - fragEnd := fragStart + f.OldLines - - start := a.nextLine - if fragStart < start { - return applyError(&Conflict{"fragment overlaps with an applied fragment"}) - } - - if f.OldPosition == 0 { - ok, err := isLen(a.src, 0) - if err != nil { - return applyError(err) - } - if !ok { - return applyError(&Conflict{"cannot create new file from non-empty src"}) - } - } - - preimage := make([][]byte, fragEnd-start) - n, err := a.lineSrc.ReadLinesAt(preimage, start) - if err != nil { - // an EOF indicates that source file is shorter than the patch expects, - // which should be reported as a conflict rather than a generic error - if errors.Is(err, io.EOF) { - err = &Conflict{"src has fewer lines than required by fragment"} - } - return applyError(err, lineNum(start+int64(n))) - } - - // copy leading data before the fragment starts - for i, line := range preimage[:fragStart-start] { - if _, err := a.dst.Write(line); err != nil { - a.nextLine = start + int64(i) - return applyError(err, lineNum(a.nextLine)) - } - } - preimage = preimage[fragStart-start:] - - // apply the changes in the fragment - used := int64(0) - for i, line := range f.Lines { - if err := applyTextLine(a.dst, line, preimage, used); err != nil { - a.nextLine = fragStart + used - return applyError(err, lineNum(a.nextLine), fragLineNum(i)) - } - if line.Old() { - used++ - } - } - a.nextLine = fragStart + used - - // new position of +0,0 mean a full delete, so check for leftovers - if f.NewPosition == 0 && f.NewLines == 0 { - var b [1][]byte - n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine) - if err != nil && err != io.EOF { - return applyError(err, lineNum(a.nextLine)) - } - if n > 0 { - return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine)) - } - } - - return nil -} - -func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) { - if line.Old() && string(preimage[i]) != line.Line { - return &Conflict{"fragment line does not match src line"} - } - if line.New() { - _, err = io.WriteString(dst, line.Line) - } - return err -} - -// Close writes any data following the last applied fragment and prevents -// future calls to ApplyFragment. -func (a *TextApplier) Close() (err error) { - if a.closed { - return nil - } - - a.closed = true - if !a.dirty { - _, err = copyFrom(a.dst, a.src, 0) - } else { - _, err = copyLinesFrom(a.dst, a.lineSrc, a.nextLine) - } - return err -} diff --git a/pkg/gitdiff/base85.go b/pkg/gitdiff/base85.go deleted file mode 100644 index 86db117..0000000 --- a/pkg/gitdiff/base85.go +++ /dev/null @@ -1,91 +0,0 @@ -package gitdiff - -import ( - "fmt" -) - -var ( - b85Table map[byte]byte - b85Alpha = []byte( - "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "!#$%&()*+-;<=>?@^_`{|}~", - ) -) - -func init() { - b85Table = make(map[byte]byte) - for i, c := range b85Alpha { - b85Table[c] = byte(i) - } -} - -// base85Decode decodes Base85-encoded data from src into dst. It uses the -// alphabet defined by base85.c in the Git source tree. src must contain at -// least len(dst) bytes of encoded data. -func base85Decode(dst, src []byte) error { - var v uint32 - var n, ndst int - for i, b := range src { - if b, ok := b85Table[b]; ok { - v = 85*v + uint32(b) - n++ - } else { - return fmt.Errorf("invalid base85 byte at index %d: 0x%X", i, src[i]) - } - if n == 5 { - rem := len(dst) - ndst - for j := 0; j < 4 && j < rem; j++ { - dst[ndst] = byte(v >> 24) - ndst++ - v <<= 8 - } - v = 0 - n = 0 - } - } - if n > 0 { - return fmt.Errorf("base85 data terminated by underpadded sequence") - } - if ndst < len(dst) { - return fmt.Errorf("base85 data underrun: %d < %d", ndst, len(dst)) - } - return nil -} - -// base85Encode encodes src in Base85, writing the result to dst. It uses the -// alphabet defined by base85.c in the Git source tree. -func base85Encode(dst, src []byte) { - var di, si int - - encode := func(v uint32) { - dst[di+0] = b85Alpha[(v/(85*85*85*85))%85] - dst[di+1] = b85Alpha[(v/(85*85*85))%85] - dst[di+2] = b85Alpha[(v/(85*85))%85] - dst[di+3] = b85Alpha[(v/85)%85] - dst[di+4] = b85Alpha[v%85] - } - - n := (len(src) / 4) * 4 - for si < n { - encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3])) - si += 4 - di += 5 - } - - var v uint32 - switch len(src) - si { - case 3: - v |= uint32(src[si+2]) << 8 - fallthrough - case 2: - v |= uint32(src[si+1]) << 16 - fallthrough - case 1: - v |= uint32(src[si+0]) << 24 - encode(v) - } -} - -// base85Len returns the length of n bytes of Base85 encoded data. -func base85Len(n int) int { - return (n + 3) / 4 * 5 -} diff --git a/pkg/gitdiff/binary.go b/pkg/gitdiff/binary.go deleted file mode 100644 index 282e323..0000000 --- a/pkg/gitdiff/binary.go +++ /dev/null @@ -1,186 +0,0 @@ -package gitdiff - -import ( - "bytes" - "compress/zlib" - "fmt" - "io" - "io/ioutil" - "strconv" - "strings" -) - -func (p *parser) ParseBinaryFragments(f *File) (n int, err error) { - isBinary, hasData, err := p.ParseBinaryMarker() - if err != nil || !isBinary { - return 0, err - } - - f.IsBinary = true - if !hasData { - return 0, nil - } - - forward, err := p.ParseBinaryFragmentHeader() - if err != nil { - return 0, err - } - if forward == nil { - return 0, p.Errorf(0, "missing data for binary patch") - } - if err := p.ParseBinaryChunk(forward); err != nil { - return 0, err - } - f.BinaryFragment = forward - - // valid for reverse to not exist, but it must be valid if present - reverse, err := p.ParseBinaryFragmentHeader() - if err != nil { - return 1, err - } - if reverse == nil { - return 1, nil - } - if err := p.ParseBinaryChunk(reverse); err != nil { - return 1, err - } - f.ReverseBinaryFragment = reverse - - return 1, nil -} - -func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { - line := p.Line(0) - switch { - case line == "GIT binary patch\n": - hasData = true - case isBinaryNoDataMarker(line): - default: - return false, false, nil - } - - if err = p.Next(); err != nil && err != io.EOF { - return false, false, err - } - return true, hasData, nil -} - -func isBinaryNoDataMarker(line string) bool { - if strings.HasSuffix(line, " differ\n") { - return strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "Files ") - } - return false -} - -func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) { - parts := strings.SplitN(strings.TrimSuffix(p.Line(0), "\n"), " ", 2) - if len(parts) < 2 { - return nil, nil - } - - frag := &BinaryFragment{} - switch parts[0] { - case "delta": - frag.Method = BinaryPatchDelta - case "literal": - frag.Method = BinaryPatchLiteral - default: - return nil, nil - } - - var err error - if frag.Size, err = strconv.ParseInt(parts[1], 10, 64); err != nil { - nerr := err.(*strconv.NumError) - return nil, p.Errorf(0, "binary patch: invalid size: %v", nerr.Err) - } - - if err := p.Next(); err != nil && err != io.EOF { - return nil, err - } - return frag, nil -} - -func (p *parser) ParseBinaryChunk(frag *BinaryFragment) error { - // Binary fragments are encoded as a series of base85 encoded lines. Each - // line starts with a character in [A-Za-z] giving the number of bytes on - // the line, where A = 1 and z = 52, and ends with a newline character. - // - // The base85 encoding means each line is a multiple of 5 characters + 2 - // additional characters for the length byte and the newline. The fragment - // ends with a blank line. - const ( - shortestValidLine = "A00000\n" - maxBytesPerLine = 52 - ) - - var data bytes.Buffer - buf := make([]byte, maxBytesPerLine) - for { - line := p.Line(0) - if line == "\n" { - break - } - if len(line) < len(shortestValidLine) || (len(line)-2)%5 != 0 { - return p.Errorf(0, "binary patch: corrupt data line") - } - - byteCount, seq := int(line[0]), line[1:len(line)-1] - switch { - case 'A' <= byteCount && byteCount <= 'Z': - byteCount = byteCount - 'A' + 1 - case 'a' <= byteCount && byteCount <= 'z': - byteCount = byteCount - 'a' + 27 - default: - return p.Errorf(0, "binary patch: invalid length byte") - } - - // base85 encodes every 4 bytes into 5 characters, with up to 3 bytes of end padding - maxByteCount := len(seq) / 5 * 4 - if byteCount > maxByteCount || byteCount < maxByteCount-3 { - return p.Errorf(0, "binary patch: incorrect byte count") - } - - if err := base85Decode(buf[:byteCount], []byte(seq)); err != nil { - return p.Errorf(0, "binary patch: %v", err) - } - data.Write(buf[:byteCount]) - - if err := p.Next(); err != nil { - if err == io.EOF { - return p.Errorf(0, "binary patch: unexpected EOF") - } - return err - } - } - - if err := inflateBinaryChunk(frag, &data); err != nil { - return p.Errorf(0, "binary patch: %v", err) - } - - // consume the empty line that ended the fragment - if err := p.Next(); err != nil && err != io.EOF { - return err - } - return nil -} - -func inflateBinaryChunk(frag *BinaryFragment, r io.Reader) error { - zr, err := zlib.NewReader(r) - if err != nil { - return err - } - - data, err := ioutil.ReadAll(zr) - if err != nil { - return err - } - if err := zr.Close(); err != nil { - return err - } - - if int64(len(data)) != frag.Size { - return fmt.Errorf("%d byte fragment inflated to %d", frag.Size, len(data)) - } - frag.Data = data - return nil -} diff --git a/pkg/gitdiff/file_header.go b/pkg/gitdiff/file_header.go deleted file mode 100644 index 7ae4bc9..0000000 --- a/pkg/gitdiff/file_header.go +++ /dev/null @@ -1,546 +0,0 @@ -package gitdiff - -import ( - "fmt" - "io" - "os" - "strconv" - "strings" - "time" -) - -const ( - devNull = "/dev/null" -) - -// ParseNextFileHeader finds and parses the next file header in the stream. If -// a header is found, it returns a file and all input before the header. It -// returns nil if no headers are found before the end of the input. -func (p *parser) ParseNextFileHeader() (*File, string, error) { - var preamble strings.Builder - var file *File - for { - // check for disconnected fragment headers (corrupt patch) - frag, err := p.ParseTextFragmentHeader() - if err != nil { - // not a valid header, nothing to worry about - goto NextLine - } - if frag != nil { - return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header()) - } - - // check for a git-generated patch - file, err = p.ParseGitFileHeader() - if err != nil { - return nil, "", err - } - if file != nil { - return file, preamble.String(), nil - } - - // check for a "traditional" patch - file, err = p.ParseTraditionalFileHeader() - if err != nil { - return nil, "", err - } - if file != nil { - return file, preamble.String(), nil - } - - NextLine: - preamble.WriteString(p.Line(0)) - if err := p.Next(); err != nil { - if err == io.EOF { - break - } - return nil, "", err - } - } - return nil, preamble.String(), nil -} - -func (p *parser) ParseGitFileHeader() (*File, error) { - const prefix = "diff --git " - - if !strings.HasPrefix(p.Line(0), prefix) { - return nil, nil - } - header := p.Line(0)[len(prefix):] - - defaultName, err := parseGitHeaderName(header) - if err != nil { - return nil, p.Errorf(0, "git file header: %v", err) - } - - f := &File{} - for { - end, err := parseGitHeaderData(f, p.Line(1), defaultName) - if err != nil { - return nil, p.Errorf(1, "git file header: %v", err) - } - - if err := p.Next(); err != nil { - if err == io.EOF { - break - } - return nil, err - } - - if end { - break - } - } - - if f.OldName == "" && f.NewName == "" { - if defaultName == "" { - return nil, p.Errorf(0, "git file header: missing filename information") - } - f.OldName = defaultName - f.NewName = defaultName - } - - if (f.NewName == "" && !f.IsDelete) || (f.OldName == "" && !f.IsNew) { - return nil, p.Errorf(0, "git file header: missing filename information") - } - - return f, nil -} - -func (p *parser) ParseTraditionalFileHeader() (*File, error) { - const shortestValidFragHeader = "@@ -1 +1 @@\n" - const ( - oldPrefix = "--- " - newPrefix = "+++ " - ) - - oldLine, newLine := p.Line(0), p.Line(1) - - if !strings.HasPrefix(oldLine, oldPrefix) || !strings.HasPrefix(newLine, newPrefix) { - return nil, nil - } - // heuristic: only a file header if followed by a (probable) fragment header - if len(p.Line(2)) < len(shortestValidFragHeader) || !strings.HasPrefix(p.Line(2), "@@ -") { - return nil, nil - } - - // advance past the first two lines so parser is after the header - // no EOF check needed because we know there are >=3 valid lines - if err := p.Next(); err != nil { - return nil, err - } - if err := p.Next(); err != nil { - return nil, err - } - - oldName, _, err := parseName(oldLine[len(oldPrefix):], '\t', 0) - if err != nil { - return nil, p.Errorf(0, "file header: %v", err) - } - - newName, _, err := parseName(newLine[len(newPrefix):], '\t', 0) - if err != nil { - return nil, p.Errorf(1, "file header: %v", err) - } - - f := &File{} - switch { - case oldName == devNull || hasEpochTimestamp(oldLine): - f.IsNew = true - f.NewName = newName - case newName == devNull || hasEpochTimestamp(newLine): - f.IsDelete = true - f.OldName = oldName - default: - // if old name is a prefix of new name, use that instead - // this avoids picking variants like "file.bak" or "file~" - if strings.HasPrefix(newName, oldName) { - f.OldName = oldName - f.NewName = oldName - } else { - f.OldName = newName - f.NewName = newName - } - } - - return f, nil -} - -// parseGitHeaderName extracts a default file name from the Git file header -// line. This is required for mode-only changes and creation/deletion of empty -// files. Other types of patch include the file name(s) in the header data. -// If the names in the header do not match because the patch is a rename, -// return an empty default name. -func parseGitHeaderName(header string) (string, error) { - header = strings.TrimSuffix(header, "\n") - if len(header) == 0 { - return "", nil - } - - var err error - var first, second string - - // there are 4 cases to account for: - // - // 1) unquoted unquoted - // 2) unquoted "quoted" - // 3) "quoted" unquoted - // 4) "quoted" "quoted" - // - quote := strings.IndexByte(header, '"') - switch { - case quote < 0: - // case 1 - first = header - - case quote > 0: - // case 2 - first = header[:quote-1] - if !isSpace(header[quote-1]) { - return "", fmt.Errorf("missing separator") - } - - second, _, err = parseQuotedName(header[quote:]) - if err != nil { - return "", err - } - - case quote == 0: - // case 3 or case 4 - var n int - first, n, err = parseQuotedName(header) - if err != nil { - return "", err - } - - // git accepts multiple spaces after a quoted name, but not after an - // unquoted name, since the name might end with one or more spaces - for n < len(header) && isSpace(header[n]) { - n++ - } - if n == len(header) { - return "", nil - } - - if header[n] == '"' { - second, _, err = parseQuotedName(header[n:]) - if err != nil { - return "", err - } - } else { - second = header[n:] - } - } - - first = trimTreePrefix(first, 1) - if second != "" { - if first == trimTreePrefix(second, 1) { - return first, nil - } - return "", nil - } - - // at this point, both names are unquoted (case 1) - // since names may contain spaces, we can't use a known separator - // instead, look for a split that produces two equal names - - for i := 0; i < len(first)-1; i++ { - if !isSpace(first[i]) { - continue - } - second = trimTreePrefix(first[i+1:], 1) - if name := first[:i]; name == second { - return name, nil - } - } - return "", nil -} - -// parseGitHeaderData parses a single line of metadata from a Git file header. -// It returns true when header parsing is complete; in that case, line was the -// first line of non-header content. -func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) { - if len(line) > 0 && line[len(line)-1] == '\n' { - line = line[:len(line)-1] - } - - for _, hdr := range []struct { - prefix string - end bool - parse func(*File, string, string) error - }{ - {"@@ -", true, nil}, - {"--- ", false, parseGitHeaderOldName}, - {"+++ ", false, parseGitHeaderNewName}, - {"old mode ", false, parseGitHeaderOldMode}, - {"new mode ", false, parseGitHeaderNewMode}, - {"deleted file mode ", false, parseGitHeaderDeletedMode}, - {"new file mode ", false, parseGitHeaderCreatedMode}, - {"copy from ", false, parseGitHeaderCopyFrom}, - {"copy to ", false, parseGitHeaderCopyTo}, - {"rename old ", false, parseGitHeaderRenameFrom}, - {"rename new ", false, parseGitHeaderRenameTo}, - {"rename from ", false, parseGitHeaderRenameFrom}, - {"rename to ", false, parseGitHeaderRenameTo}, - {"similarity index ", false, parseGitHeaderScore}, - {"dissimilarity index ", false, parseGitHeaderScore}, - {"index ", false, parseGitHeaderIndex}, - } { - if strings.HasPrefix(line, hdr.prefix) { - if hdr.parse != nil { - err = hdr.parse(f, line[len(hdr.prefix):], defaultName) - } - return hdr.end, err - } - } - - // unknown line indicates the end of the header - // this usually happens if the diff is empty - return true, nil -} - -func parseGitHeaderOldName(f *File, line, defaultName string) error { - name, _, err := parseName(line, '\t', 1) - if err != nil { - return err - } - if f.OldName == "" && !f.IsNew { - f.OldName = name - return nil - } - return verifyGitHeaderName(name, f.OldName, f.IsNew, "old") -} - -func parseGitHeaderNewName(f *File, line, defaultName string) error { - name, _, err := parseName(line, '\t', 1) - if err != nil { - return err - } - if f.NewName == "" && !f.IsDelete { - f.NewName = name - return nil - } - return verifyGitHeaderName(name, f.NewName, f.IsDelete, "new") -} - -func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) { - f.OldMode, err = parseMode(strings.TrimSpace(line)) - return -} - -func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) { - f.NewMode, err = parseMode(strings.TrimSpace(line)) - return -} - -func parseGitHeaderDeletedMode(f *File, line, defaultName string) error { - f.IsDelete = true - f.OldName = defaultName - return parseGitHeaderOldMode(f, line, defaultName) -} - -func parseGitHeaderCreatedMode(f *File, line, defaultName string) error { - f.IsNew = true - f.NewName = defaultName - return parseGitHeaderNewMode(f, line, defaultName) -} - -func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) { - f.IsCopy = true - f.OldName, _, err = parseName(line, 0, 0) - return -} - -func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) { - f.IsCopy = true - f.NewName, _, err = parseName(line, 0, 0) - return -} - -func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) { - f.IsRename = true - f.OldName, _, err = parseName(line, 0, 0) - return -} - -func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) { - f.IsRename = true - f.NewName, _, err = parseName(line, 0, 0) - return -} - -func parseGitHeaderScore(f *File, line, defaultName string) error { - score, err := strconv.ParseInt(strings.TrimSuffix(line, "%"), 10, 32) - if err != nil { - nerr := err.(*strconv.NumError) - return fmt.Errorf("invalid score line: %v", nerr.Err) - } - if score <= 100 { - f.Score = int(score) - } - return nil -} - -func parseGitHeaderIndex(f *File, line, defaultName string) error { - const sep = ".." - - // note that git stops parsing if the OIDs are too long to be valid - // checking this requires knowing if the repository uses SHA1 or SHA256 - // hashes, which we don't know, so we just skip that check - - parts := strings.SplitN(line, " ", 2) - oids := strings.SplitN(parts[0], sep, 2) - - if len(oids) < 2 { - return fmt.Errorf("invalid index line: missing %q", sep) - } - f.OldOIDPrefix, f.NewOIDPrefix = oids[0], oids[1] - - if len(parts) > 1 { - return parseGitHeaderOldMode(f, parts[1], defaultName) - } - return nil -} - -func parseMode(s string) (os.FileMode, error) { - mode, err := strconv.ParseInt(s, 8, 32) - if err != nil { - nerr := err.(*strconv.NumError) - return os.FileMode(0), fmt.Errorf("invalid mode line: %v", nerr.Err) - } - return os.FileMode(mode), nil -} - -// parseName extracts a file name from the start of a string and returns the -// name and the index of the first character after the name. If the name is -// unquoted and term is non-zero, parsing stops at the first occurrence of -// term. -// -// If the name is exactly "/dev/null", no further processing occurs. Otherwise, -// if dropPrefix is greater than zero, that number of prefix components -// separated by forward slashes are dropped from the name and any duplicate -// slashes are collapsed. -func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) { - if len(s) > 0 && s[0] == '"' { - name, n, err = parseQuotedName(s) - } else { - name, n, err = parseUnquotedName(s, term) - } - if err != nil { - return "", 0, err - } - if name == devNull { - return name, n, nil - } - return cleanName(name, dropPrefix), n, nil -} - -func parseQuotedName(s string) (name string, n int, err error) { - for n = 1; n < len(s); n++ { - if s[n] == '"' && s[n-1] != '\\' { - n++ - break - } - } - if n == 2 { - return "", 0, fmt.Errorf("missing name") - } - if name, err = strconv.Unquote(s[:n]); err != nil { - return "", 0, err - } - return name, n, err -} - -func parseUnquotedName(s string, term byte) (name string, n int, err error) { - for n = 0; n < len(s); n++ { - if s[n] == '\n' { - break - } - if term > 0 && s[n] == term { - break - } - } - if n == 0 { - return "", 0, fmt.Errorf("missing name") - } - return s[:n], n, nil -} - -// verifyGitHeaderName checks a parsed name against state set by previous lines -func verifyGitHeaderName(parsed, existing string, isNull bool, side string) error { - if existing != "" { - if isNull { - return fmt.Errorf("expected %s, but filename is set to %s", devNull, existing) - } - if existing != parsed { - return fmt.Errorf("inconsistent %s filename", side) - } - } - if isNull && parsed != devNull { - return fmt.Errorf("expected %s", devNull) - } - return nil -} - -// cleanName removes double slashes and drops prefix segments. -func cleanName(name string, drop int) string { - var b strings.Builder - for i := 0; i < len(name); i++ { - if name[i] == '/' { - if i < len(name)-1 && name[i+1] == '/' { - continue - } - if drop > 0 { - drop-- - b.Reset() - continue - } - } - b.WriteByte(name[i]) - } - return b.String() -} - -// trimTreePrefix removes up to n leading directory components from name. -func trimTreePrefix(name string, n int) string { - i := 0 - for ; i < len(name) && n > 0; i++ { - if name[i] == '/' { - n-- - } - } - return name[i:] -} - -// hasEpochTimestamp returns true if the string ends with a POSIX-formatted -// timestamp for the UNIX epoch after a tab character. According to git, this -// is used by GNU diff to mark creations and deletions. -func hasEpochTimestamp(s string) bool { - const posixTimeLayout = "2006-01-02 15:04:05.9 -0700" - - start := strings.IndexRune(s, '\t') - if start < 0 { - return false - } - - ts := strings.TrimSuffix(s[start+1:], "\n") - - // a valid timestamp can have optional ':' in zone specifier - // remove that if it exists so we have a single format - if len(ts) >= 3 && ts[len(ts)-3] == ':' { - ts = ts[:len(ts)-3] + ts[len(ts)-2:] - } - - t, err := time.Parse(posixTimeLayout, ts) - if err != nil { - return false - } - if !t.Equal(time.Unix(0, 0)) { - return false - } - return true -} - -func isSpace(c byte) bool { - return c == ' ' || c == '\t' || c == '\n' -} diff --git a/pkg/gitdiff/format.go b/pkg/gitdiff/format.go deleted file mode 100644 index d97aba9..0000000 --- a/pkg/gitdiff/format.go +++ /dev/null @@ -1,281 +0,0 @@ -package gitdiff - -import ( - "bytes" - "compress/zlib" - "fmt" - "io" - "strconv" -) - -type formatter struct { - w io.Writer - err error -} - -func newFormatter(w io.Writer) *formatter { - return &formatter{w: w} -} - -func (fm *formatter) Write(p []byte) (int, error) { - if fm.err != nil { - return len(p), nil - } - if _, err := fm.w.Write(p); err != nil { - fm.err = err - } - return len(p), nil -} - -func (fm *formatter) WriteString(s string) (int, error) { - fm.Write([]byte(s)) - return len(s), nil -} - -func (fm *formatter) WriteByte(c byte) error { - fm.Write([]byte{c}) - return nil -} - -func (fm *formatter) WriteQuotedName(s string) { - qpos := 0 - for i := 0; i < len(s); i++ { - ch := s[i] - if q, quoted := quoteByte(ch); quoted { - if qpos == 0 { - fm.WriteByte('"') - } - fm.WriteString(s[qpos:i]) - fm.Write(q) - qpos = i + 1 - } - } - fm.WriteString(s[qpos:]) - if qpos > 0 { - fm.WriteByte('"') - } -} - -var quoteEscapeTable = map[byte]byte{ - '\a': 'a', - '\b': 'b', - '\t': 't', - '\n': 'n', - '\v': 'v', - '\f': 'f', - '\r': 'r', - '"': '"', - '\\': '\\', -} - -func quoteByte(b byte) ([]byte, bool) { - if q, ok := quoteEscapeTable[b]; ok { - return []byte{'\\', q}, true - } - if b < 0x20 || b >= 0x7F { - return []byte{ - '\\', - '0' + (b>>6)&0o3, - '0' + (b>>3)&0o7, - '0' + (b>>0)&0o7, - }, true - } - return nil, false -} - -func (fm *formatter) FormatFile(f *File) { - fm.WriteString("diff --git ") - - var aName, bName string - switch { - case f.OldName == "": - aName = f.NewName - bName = f.NewName - - case f.NewName == "": - aName = f.OldName - bName = f.OldName - - default: - aName = f.OldName - bName = f.NewName - } - - fm.WriteQuotedName("a/" + aName) - fm.WriteByte(' ') - fm.WriteQuotedName("b/" + bName) - fm.WriteByte('\n') - - if f.OldMode != 0 { - if f.IsDelete { - fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode) - } else if f.NewMode != 0 { - fmt.Fprintf(fm, "old mode %o\n", f.OldMode) - } - } - - if f.NewMode != 0 { - if f.IsNew { - fmt.Fprintf(fm, "new file mode %o\n", f.NewMode) - } else if f.OldMode != 0 { - fmt.Fprintf(fm, "new mode %o\n", f.NewMode) - } - } - - if f.Score > 0 { - if f.IsCopy || f.IsRename { - fmt.Fprintf(fm, "similarity index %d%%\n", f.Score) - } else { - fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score) - } - } - - if f.IsCopy { - if f.OldName != "" { - fm.WriteString("copy from ") - fm.WriteQuotedName(f.OldName) - fm.WriteByte('\n') - } - if f.NewName != "" { - fm.WriteString("copy to ") - fm.WriteQuotedName(f.NewName) - fm.WriteByte('\n') - } - } - - if f.IsRename { - if f.OldName != "" { - fm.WriteString("rename from ") - fm.WriteQuotedName(f.OldName) - fm.WriteByte('\n') - } - if f.NewName != "" { - fm.WriteString("rename to ") - fm.WriteQuotedName(f.NewName) - fm.WriteByte('\n') - } - } - - if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { - fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) - - // Mode is only included on the index line when it is not changing - if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) { - fmt.Fprintf(fm, " %o", f.OldMode) - } - - fm.WriteByte('\n') - } - - if f.IsBinary { - if f.BinaryFragment == nil { - fm.WriteString("Binary files ") - fm.WriteQuotedName("a/" + aName) - fm.WriteString(" and ") - fm.WriteQuotedName("b/" + bName) - fm.WriteString(" differ\n") - } else { - fm.WriteString("GIT binary patch\n") - fm.FormatBinaryFragment(f.BinaryFragment) - if f.ReverseBinaryFragment != nil { - fm.FormatBinaryFragment(f.ReverseBinaryFragment) - } - } - } - - // The "---" and "+++" lines only appear for text patches with fragments - if len(f.TextFragments) > 0 { - fm.WriteString("--- ") - if f.OldName == "" { - fm.WriteString("/dev/null") - } else { - fm.WriteQuotedName("a/" + f.OldName) - } - fm.WriteByte('\n') - - fm.WriteString("+++ ") - if f.NewName == "" { - fm.WriteString("/dev/null") - } else { - fm.WriteQuotedName("b/" + f.NewName) - } - fm.WriteByte('\n') - - for _, frag := range f.TextFragments { - fm.FormatTextFragment(frag) - } - } -} - -func (fm *formatter) FormatTextFragment(f *TextFragment) { - fm.FormatTextFragmentHeader(f) - fm.WriteByte('\n') - - for _, line := range f.Lines { - fm.WriteString(line.Op.String()) - fm.WriteString(line.Line) - if line.NoEOL() { - fm.WriteString("\n\\ No newline at end of file\n") - } - } -} - -func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) { - fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines) - if f.Comment != "" { - fm.WriteByte(' ') - fm.WriteString(f.Comment) - } -} - -func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) { - const ( - maxBytesPerLine = 52 - ) - - switch f.Method { - case BinaryPatchDelta: - fm.WriteString("delta ") - case BinaryPatchLiteral: - fm.WriteString("literal ") - } - fm.Write(strconv.AppendInt(nil, f.Size, 10)) - fm.WriteByte('\n') - - data := deflateBinaryChunk(f.Data) - n := (len(data) / maxBytesPerLine) * maxBytesPerLine - - buf := make([]byte, base85Len(maxBytesPerLine)) - for i := 0; i < n; i += maxBytesPerLine { - base85Encode(buf, data[i:i+maxBytesPerLine]) - fm.WriteByte('z') - fm.Write(buf) - fm.WriteByte('\n') - } - if remainder := len(data) - n; remainder > 0 { - buf = buf[0:base85Len(remainder)] - - sizeChar := byte(remainder) - if remainder <= 26 { - sizeChar = 'A' + sizeChar - 1 - } else { - sizeChar = 'a' + sizeChar - 27 - } - - base85Encode(buf, data[n:]) - fm.WriteByte(sizeChar) - fm.Write(buf) - fm.WriteByte('\n') - } - fm.WriteByte('\n') -} - -func deflateBinaryChunk(data []byte) []byte { - var b bytes.Buffer - - zw := zlib.NewWriter(&b) - _, _ = zw.Write(data) - _ = zw.Close() - - return b.Bytes() -} diff --git a/pkg/gitdiff/gitdiff.go b/pkg/gitdiff/gitdiff.go deleted file mode 100644 index 5365645..0000000 --- a/pkg/gitdiff/gitdiff.go +++ /dev/null @@ -1,230 +0,0 @@ -package gitdiff - -import ( - "errors" - "fmt" - "os" - "strings" -) - -// File describes changes to a single file. It can be either a text file or a -// binary file. -type File struct { - OldName string - NewName string - - IsNew bool - IsDelete bool - IsCopy bool - IsRename bool - - OldMode os.FileMode - NewMode os.FileMode - - OldOIDPrefix string - NewOIDPrefix string - Score int - - // TextFragments contains the fragments describing changes to a text file. It - // may be empty if the file is empty or if only the mode changes. - TextFragments []*TextFragment - - // IsBinary is true if the file is a binary file. If the patch includes - // binary data, BinaryFragment will be non-nil and describe the changes to - // the data. If the patch is reversible, ReverseBinaryFragment will also be - // non-nil and describe the changes needed to restore the original file - // after applying the changes in BinaryFragment. - IsBinary bool - BinaryFragment *BinaryFragment - ReverseBinaryFragment *BinaryFragment -} - -// String returns a git diff representation of this file. The value can be -// parsed by this library to obtain the same File, but may not be the same as -// the original input. -func (f *File) String() string { - var diff strings.Builder - newFormatter(&diff).FormatFile(f) - return diff.String() -} - -// TextFragment describes changed lines starting at a specific line in a text file. -type TextFragment struct { - Comment string - - OldPosition int64 - OldLines int64 - - NewPosition int64 - NewLines int64 - - LinesAdded int64 - LinesDeleted int64 - - LeadingContext int64 - TrailingContext int64 - - Lines []Line -} - -// String returns a git diff format of this fragment. See [File.String] for -// more details on this format. -func (f *TextFragment) String() string { - var diff strings.Builder - newFormatter(&diff).FormatTextFragment(f) - return diff.String() -} - -// Header returns a git diff header of this fragment. See [File.String] for -// more details on this format. -func (f *TextFragment) Header() string { - var hdr strings.Builder - newFormatter(&hdr).FormatTextFragmentHeader(f) - return hdr.String() -} - -// Validate checks that the fragment is self-consistent and appliable. Validate -// returns an error if and only if the fragment is invalid. -func (f *TextFragment) Validate() error { - if f == nil { - return errors.New("nil fragment") - } - - var ( - oldLines, newLines int64 - leadingContext, trailingContext int64 - contextLines, addedLines, deletedLines int64 - ) - - // count the types of lines in the fragment content - for i, line := range f.Lines { - switch line.Op { - case OpContext: - oldLines++ - newLines++ - contextLines++ - if addedLines == 0 && deletedLines == 0 { - leadingContext++ - } else { - trailingContext++ - } - case OpAdd: - newLines++ - addedLines++ - trailingContext = 0 - case OpDelete: - oldLines++ - deletedLines++ - trailingContext = 0 - default: - return fmt.Errorf("unknown operator %q on line %d", line.Op, i+1) - } - } - - // check the actual counts against the reported counts - if oldLines != f.OldLines { - return lineCountErr("old", oldLines, f.OldLines) - } - if newLines != f.NewLines { - return lineCountErr("new", newLines, f.NewLines) - } - if leadingContext != f.LeadingContext { - return lineCountErr("leading context", leadingContext, f.LeadingContext) - } - if trailingContext != f.TrailingContext { - return lineCountErr("trailing context", trailingContext, f.TrailingContext) - } - if addedLines != f.LinesAdded { - return lineCountErr("added", addedLines, f.LinesAdded) - } - if deletedLines != f.LinesDeleted { - return lineCountErr("deleted", deletedLines, f.LinesDeleted) - } - - // if a file is being created, it can only contain additions - if f.OldPosition == 0 && f.OldLines != 0 { - return errors.New("file creation fragment contains context or deletion lines") - } - - return nil -} - -func lineCountErr(kind string, actual, reported int64) error { - return fmt.Errorf("fragment contains %d %s lines but reports %d", actual, kind, reported) -} - -// Line is a line in a text fragment. -type Line struct { - Op LineOp - Line string -} - -func (fl Line) String() string { - return fl.Op.String() + fl.Line -} - -// Old returns true if the line appears in the old content of the fragment. -func (fl Line) Old() bool { - return fl.Op == OpContext || fl.Op == OpDelete -} - -// New returns true if the line appears in the new content of the fragment. -func (fl Line) New() bool { - return fl.Op == OpContext || fl.Op == OpAdd -} - -// NoEOL returns true if the line is missing a trailing newline character. -func (fl Line) NoEOL() bool { - return len(fl.Line) == 0 || fl.Line[len(fl.Line)-1] != '\n' -} - -// LineOp describes the type of a text fragment line: context, added, or removed. -type LineOp int - -const ( - // OpContext indicates a context line - OpContext LineOp = iota - // OpDelete indicates a deleted line - OpDelete - // OpAdd indicates an added line - OpAdd -) - -func (op LineOp) String() string { - switch op { - case OpContext: - return " " - case OpDelete: - return "-" - case OpAdd: - return "+" - } - return "?" -} - -// BinaryFragment describes changes to a binary file. -type BinaryFragment struct { - Method BinaryPatchMethod - Size int64 - Data []byte -} - -// BinaryPatchMethod is the method used to create and apply the binary patch. -type BinaryPatchMethod int - -const ( - // BinaryPatchDelta indicates the data uses Git's packfile encoding - BinaryPatchDelta BinaryPatchMethod = iota - // BinaryPatchLiteral indicates the data is the exact file content - BinaryPatchLiteral -) - -// String returns a git diff format of this fragment. Due to differences in -// zlib implementation between Go and Git, encoded binary data in the result -// will likely differ from what Git produces for the same input. See -// [File.String] for more details on this format. -func (f *BinaryFragment) String() string { - var diff strings.Builder - newFormatter(&diff).FormatBinaryFragment(f) - return diff.String() -} diff --git a/pkg/gitdiff/io.go b/pkg/gitdiff/io.go deleted file mode 100644 index 8143238..0000000 --- a/pkg/gitdiff/io.go +++ /dev/null @@ -1,220 +0,0 @@ -package gitdiff - -import ( - "errors" - "io" -) - -const ( - byteBufferSize = 32 * 1024 // from io.Copy - lineBufferSize = 32 - indexBufferSize = 1024 -) - -// LineReaderAt is the interface that wraps the ReadLinesAt method. -// -// ReadLinesAt reads len(lines) into lines starting at line offset. It returns -// the number of lines read (0 <= n <= len(lines)) and any error encountered. -// Line numbers are zero-indexed. -// -// If n < len(lines), ReadLinesAt returns a non-nil error explaining why more -// lines were not returned. -// -// Lines read by ReadLinesAt include the newline character. The last line does -// not have a final newline character if the input ends without one. -type LineReaderAt interface { - ReadLinesAt(lines [][]byte, offset int64) (n int, err error) -} - -type lineReaderAt struct { - r io.ReaderAt - index []int64 - eof bool -} - -func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err error) { - if offset < 0 { - return 0, errors.New("ReadLinesAt: negative offset") - } - if len(lines) == 0 { - return 0, nil - } - - count := len(lines) - startLine := offset - endLine := startLine + int64(count) - - if endLine > int64(len(r.index)) && !r.eof { - if err := r.indexTo(endLine); err != nil { - return 0, err - } - } - if startLine >= int64(len(r.index)) { - return 0, io.EOF - } - - buf, byteOffset, err := r.readBytes(startLine, int64(count)) - if err != nil { - return 0, err - } - - for n = 0; n < count && startLine+int64(n) < int64(len(r.index)); n++ { - lineno := startLine + int64(n) - start, end := int64(0), r.index[lineno]-byteOffset - if lineno > 0 { - start = r.index[lineno-1] - byteOffset - } - lines[n] = buf[start:end] - } - - if n < count { - return n, io.EOF - } - return n, nil -} - -// indexTo reads data and computes the line index until there is information -// for line or a read returns io.EOF. It returns an error if and only if there -// is an error reading data. -func (r *lineReaderAt) indexTo(line int64) error { - var buf [indexBufferSize]byte - - offset := r.lastOffset() - for int64(len(r.index)) < line { - n, err := r.r.ReadAt(buf[:], offset) - if err != nil && err != io.EOF { - return err - } - for _, b := range buf[:n] { - offset++ - if b == '\n' { - r.index = append(r.index, offset) - } - } - if err == io.EOF { - if offset > r.lastOffset() { - r.index = append(r.index, offset) - } - r.eof = true - break - } - } - return nil -} - -func (r *lineReaderAt) lastOffset() int64 { - if n := len(r.index); n > 0 { - return r.index[n-1] - } - return 0 -} - -// readBytes reads the bytes of the n lines starting at line and returns the -// bytes and the offset of the first byte in the underlying source. -func (r *lineReaderAt) readBytes(line, n int64) (b []byte, offset int64, err error) { - indexLen := int64(len(r.index)) - - var size int64 - if line > indexLen { - offset = r.index[indexLen-1] - } else if line > 0 { - offset = r.index[line-1] - } - if n > 0 { - if line+n > indexLen { - size = r.index[indexLen-1] - offset - } else { - size = r.index[line+n-1] - offset - } - } - - b = make([]byte, size) - if _, err := r.r.ReadAt(b, offset); err != nil { - if err == io.EOF { - err = errors.New("ReadLinesAt: corrupt line index or changed source data") - } - return nil, 0, err - } - return b, offset, nil -} - -func isLen(r io.ReaderAt, n int64) (bool, error) { - off := n - 1 - if off < 0 { - off = 0 - } - - var b [2]byte - nr, err := r.ReadAt(b[:], off) - if err == io.EOF { - return (n == 0 && nr == 0) || (n > 0 && nr == 1), nil - } - return false, err -} - -// copyFrom writes bytes starting from offset off in src to dst stopping at the -// end of src or at the first error. copyFrom returns the number of bytes -// written and any error. -func copyFrom(dst io.Writer, src io.ReaderAt, off int64) (written int64, err error) { - buf := make([]byte, byteBufferSize) - for { - nr, rerr := src.ReadAt(buf, off) - if nr > 0 { - nw, werr := dst.Write(buf[0:nr]) - if nw > 0 { - written += int64(nw) - } - if werr != nil { - err = werr - break - } - if nr != nw { - err = io.ErrShortWrite - break - } - off += int64(nr) - } - if rerr != nil { - if rerr != io.EOF { - err = rerr - } - break - } - } - return written, err -} - -// copyLinesFrom writes lines starting from line off in src to dst stopping at -// the end of src or at the first error. copyLinesFrom returns the number of -// lines written and any error. -func copyLinesFrom(dst io.Writer, src LineReaderAt, off int64) (written int64, err error) { - buf := make([][]byte, lineBufferSize) -ReadLoop: - for { - nr, rerr := src.ReadLinesAt(buf, off) - if nr > 0 { - for _, line := range buf[0:nr] { - nw, werr := dst.Write(line) - if nw > 0 { - written++ - } - if werr != nil { - err = werr - break ReadLoop - } - if len(line) != nw { - err = io.ErrShortWrite - break ReadLoop - } - } - off += int64(nr) - } - if rerr != nil { - if rerr != io.EOF { - err = rerr - } - break - } - } - return written, err -} diff --git a/pkg/gitdiff/parser.go b/pkg/gitdiff/parser.go deleted file mode 100644 index e8f8430..0000000 --- a/pkg/gitdiff/parser.go +++ /dev/null @@ -1,142 +0,0 @@ -// Package gitdiff parses and applies patches generated by Git. It supports -// line-oriented text patches, binary patches, and can also parse standard -// unified diffs generated by other tools. -package gitdiff - -import ( - "bufio" - "fmt" - "io" -) - -// Parse parses a patch with changes to one or more files. Any content before -// the first file is returned as the second value. If an error occurs while -// parsing, it returns all files parsed before the error. -// -// Parse expects to receive a single patch. If the input may contain multiple -// patches (for example, if it is an mbox file), callers should split it into -// individual patches and call Parse on each one. -func Parse(r io.Reader) ([]*File, string, error) { - p := newParser(r) - - if err := p.Next(); err != nil { - if err == io.EOF { - return nil, "", nil - } - return nil, "", err - } - - var preamble string - var files []*File - for { - file, pre, err := p.ParseNextFileHeader() - if err != nil { - return files, preamble, err - } - if len(files) == 0 { - preamble = pre - } - if file == nil { - break - } - - for _, fn := range []func(*File) (int, error){ - p.ParseTextFragments, - p.ParseBinaryFragments, - } { - n, err := fn(file) - if err != nil { - return files, preamble, err - } - if n > 0 { - break - } - } - - files = append(files, file) - } - - return files, preamble, nil -} - -// TODO(bkeyes): consider exporting the parser type with configuration -// this would enable OID validation, p-value guessing, and prefix stripping -// by allowing users to set or override defaults - -// parser invariants: -// - methods that parse objects: -// - start with the parser on the first line of the first object -// - if returning nil, do not advance -// - if returning an error, do not advance past the object -// - if returning an object, advance to the first line after the object -// - any exported parsing methods must initialize the parser by calling Next() - -type stringReader interface { - ReadString(delim byte) (string, error) -} - -type parser struct { - r stringReader - - eof bool - lineno int64 - lines [3]string -} - -func newParser(r io.Reader) *parser { - if r, ok := r.(stringReader); ok { - return &parser{r: r} - } - return &parser{r: bufio.NewReader(r)} -} - -// Next advances the parser by one line. It returns any error encountered while -// reading the line, including io.EOF when the end of stream is reached. -func (p *parser) Next() error { - if p.eof { - return io.EOF - } - - if p.lineno == 0 { - // on first call to next, need to shift in all lines - for i := 0; i < len(p.lines)-1; i++ { - if err := p.shiftLines(); err != nil && err != io.EOF { - return err - } - } - } - - err := p.shiftLines() - if err != nil && err != io.EOF { - return err - } - - p.lineno++ - if p.lines[0] == "" { - p.eof = true - return io.EOF - } - return nil -} - -func (p *parser) shiftLines() (err error) { - for i := 0; i < len(p.lines)-1; i++ { - p.lines[i] = p.lines[i+1] - } - p.lines[len(p.lines)-1], err = p.r.ReadString('\n') - return -} - -// Line returns a line from the parser without advancing it. A delta of 0 -// returns the current line, while higher deltas return read-ahead lines. It -// returns an empty string if the delta is higher than the available lines, -// either because of the buffer size or because the parser reached the end of -// the input. Valid lines always contain at least a newline character. -func (p *parser) Line(delta uint) string { - return p.lines[delta] -} - -// Errorf generates an error and appends the current line information. -func (p *parser) Errorf(delta int64, msg string, args ...interface{}) error { - return fmt.Errorf("gitdiff: line %d: %s", p.lineno+delta, fmt.Sprintf(msg, args...)) -} diff --git a/pkg/gitdiff/patch_header.go b/pkg/gitdiff/patch_header.go deleted file mode 100644 index f047059..0000000 --- a/pkg/gitdiff/patch_header.go +++ /dev/null @@ -1,470 +0,0 @@ -package gitdiff - -import ( - "bufio" - "errors" - "fmt" - "io" - "io/ioutil" - "mime/quotedprintable" - "net/mail" - "strconv" - "strings" - "time" - "unicode" -) - -const ( - mailHeaderPrefix = "From " - prettyHeaderPrefix = "commit " - mailMinimumHeaderPrefix = "From:" -) - -// PatchHeader is a parsed version of the preamble content that appears before -// the first diff in a patch. It includes metadata about the patch, such as the -// author and a subject. -type PatchHeader struct { - // The SHA of the commit the patch was generated from. Empty if the SHA is - // not included in the header. - SHA string - - // The author details of the patch. If these details are not included in - // the header, Author is nil and AuthorDate is the zero time. - Author *PatchIdentity - AuthorDate time.Time - - // The committer details of the patch. If these details are not included in - // the header, Committer is nil and CommitterDate is the zero time. - Committer *PatchIdentity - CommitterDate time.Time - - // The title and body of the commit message describing the changes in the - // patch. Empty if no message is included in the header. - Title string - Body string - - // If the preamble looks like an email, ParsePatchHeader will - // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the - // Title and place them here. - SubjectPrefix string - - // If the preamble looks like an email, and it contains a `---` - // line, that line will be removed and everything after it will be - // placed in BodyAppendix. - BodyAppendix string -} - -// Message returns the commit message for the header. The message consists of -// the title and the body separated by an empty line. -func (h *PatchHeader) Message() string { - var msg strings.Builder - if h != nil { - msg.WriteString(h.Title) - if h.Body != "" { - msg.WriteString("\n\n") - msg.WriteString(h.Body) - } - } - return msg.String() -} - -// ParsePatchDate parses a patch date string. It returns the parsed time or an -// error if s has an unknown format. ParsePatchDate supports the iso, rfc, -// short, raw, unix, and default formats (with local variants) used by the -// --date flag in Git. -func ParsePatchDate(s string) (time.Time, error) { - const ( - isoFormat = "2006-01-02 15:04:05 -0700" - isoStrictFormat = "2006-01-02T15:04:05-07:00" - rfc2822Format = "Mon, 2 Jan 2006 15:04:05 -0700" - shortFormat = "2006-01-02" - defaultFormat = "Mon Jan 2 15:04:05 2006 -0700" - defaultLocalFormat = "Mon Jan 2 15:04:05 2006" - ) - - if s == "" { - return time.Time{}, nil - } - - for _, fmt := range []string{ - isoFormat, - isoStrictFormat, - rfc2822Format, - shortFormat, - defaultFormat, - defaultLocalFormat, - } { - if t, err := time.ParseInLocation(fmt, s, time.Local); err == nil { - return t, nil - } - } - - // unix format - if unix, err := strconv.ParseInt(s, 10, 64); err == nil { - return time.Unix(unix, 0), nil - } - - // raw format - if space := strings.IndexByte(s, ' '); space > 0 { - unix, uerr := strconv.ParseInt(s[:space], 10, 64) - zone, zerr := time.Parse("-0700", s[space+1:]) - if uerr == nil && zerr == nil { - return time.Unix(unix, 0).In(zone.Location()), nil - } - } - - return time.Time{}, fmt.Errorf("unknown date format: %s", s) -} - -// A PatchHeaderOption modifies the behavior of ParsePatchHeader. -type PatchHeaderOption func(*patchHeaderOptions) - -// SubjectCleanMode controls how ParsePatchHeader cleans subject lines when -// parsing mail-formatted patches. -type SubjectCleanMode int - -const ( - // SubjectCleanWhitespace removes leading and trailing whitespace. - SubjectCleanWhitespace SubjectCleanMode = iota - - // SubjectCleanAll removes leading and trailing whitespace, leading "Re:", - // "re:", and ":" strings, and leading strings enclosed by '[' and ']'. - // This is the default behavior of git (see `git mailinfo`) and this - // package. - SubjectCleanAll - - // SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes - // leading strings enclosed by '[' and ']' if they start with "PATCH". - SubjectCleanPatchOnly -) - -// WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By -// default, uses SubjectCleanAll. -func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption { - return func(opts *patchHeaderOptions) { - opts.subjectCleanMode = m - } -} - -type patchHeaderOptions struct { - subjectCleanMode SubjectCleanMode -} - -// ParsePatchHeader parses the preamble string returned by [Parse] into a -// PatchHeader. Due to the variety of header formats, some fields of the parsed -// PatchHeader may be unset after parsing. -// -// Supported formats are the short, medium, full, fuller, and email pretty -// formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox -// format used by `git format-patch`. -// -// When parsing mail-formatted headers, ParsePatchHeader tries to remove -// email-specific content from the title and body: -// -// - Based on the SubjectCleanMode, remove prefixes like reply markers and -// "[PATCH]" strings from the subject, saving any removed content in the -// SubjectPrefix field. Parsing always discards leading and trailing -// whitespace from the subject line. The default mode is SubjectCleanAll. -// -// - If the body contains a "---" line (3 hyphens), remove that line and any -// content after it from the body and save it in the BodyAppendix field. -// -// ParsePatchHeader tries to process content it does not understand wthout -// returning errors, but will return errors if well-identified content like -// dates or identies uses unknown or invalid formats. -func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) { - opts := patchHeaderOptions{ - subjectCleanMode: SubjectCleanAll, // match git defaults - } - for _, optFn := range options { - optFn(&opts) - } - - header = strings.TrimSpace(header) - if header == "" { - return &PatchHeader{}, nil - } - - var firstLine, rest string - if idx := strings.IndexByte(header, '\n'); idx >= 0 { - firstLine = header[:idx] - rest = header[idx+1:] - } else { - firstLine = header - rest = "" - } - - switch { - case strings.HasPrefix(firstLine, mailHeaderPrefix): - return parseHeaderMail(firstLine, strings.NewReader(rest), opts) - - case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix): - // With a minimum header, the first line is part of the actual mail - // content and needs to be parsed as part of the "rest" - return parseHeaderMail("", strings.NewReader(header), opts) - - case strings.HasPrefix(firstLine, prettyHeaderPrefix): - return parseHeaderPretty(firstLine, strings.NewReader(rest)) - } - - return nil, errors.New("unrecognized patch header format") -} - -func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) { - const ( - authorPrefix = "Author:" - commitPrefix = "Commit:" - datePrefix = "Date:" - authorDatePrefix = "AuthorDate:" - commitDatePrefix = "CommitDate:" - ) - - h := &PatchHeader{} - - prettyLine = strings.TrimPrefix(prettyLine, prettyHeaderPrefix) - if i := strings.IndexByte(prettyLine, ' '); i > 0 { - h.SHA = prettyLine[:i] - } else { - h.SHA = prettyLine - } - - s := bufio.NewScanner(r) - for s.Scan() { - line := s.Text() - - // empty line marks end of fields, remaining lines are title/message - if strings.TrimSpace(line) == "" { - break - } - - switch { - case strings.HasPrefix(line, authorPrefix): - u, err := ParsePatchIdentity(line[len(authorPrefix):]) - if err != nil { - return nil, err - } - h.Author = &u - - case strings.HasPrefix(line, commitPrefix): - u, err := ParsePatchIdentity(line[len(commitPrefix):]) - if err != nil { - return nil, err - } - h.Committer = &u - - case strings.HasPrefix(line, datePrefix): - d, err := ParsePatchDate(strings.TrimSpace(line[len(datePrefix):])) - if err != nil { - return nil, err - } - h.AuthorDate = d - - case strings.HasPrefix(line, authorDatePrefix): - d, err := ParsePatchDate(strings.TrimSpace(line[len(authorDatePrefix):])) - if err != nil { - return nil, err - } - h.AuthorDate = d - - case strings.HasPrefix(line, commitDatePrefix): - d, err := ParsePatchDate(strings.TrimSpace(line[len(commitDatePrefix):])) - if err != nil { - return nil, err - } - h.CommitterDate = d - } - } - if s.Err() != nil { - return nil, s.Err() - } - - title, indent := scanMessageTitle(s) - if s.Err() != nil { - return nil, s.Err() - } - h.Title = title - - if title != "" { - // Don't check for an appendix, pretty headers do not contain them - body, _ := scanMessageBody(s, indent, false) - if s.Err() != nil { - return nil, s.Err() - } - h.Body = body - } - - return h, nil -} - -func scanMessageTitle(s *bufio.Scanner) (title string, indent string) { - var b strings.Builder - for i := 0; s.Scan(); i++ { - line := s.Text() - trimLine := strings.TrimSpace(line) - if trimLine == "" { - break - } - - if i == 0 { - if start := strings.IndexFunc(line, func(c rune) bool { return !unicode.IsSpace(c) }); start > 0 { - indent = line[:start] - } - } - if b.Len() > 0 { - b.WriteByte(' ') - } - b.WriteString(trimLine) - } - return b.String(), indent -} - -func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) { - // Body and appendix - var body, appendix strings.Builder - c := &body - var empty int - for i := 0; s.Scan(); i++ { - line := s.Text() - - line = strings.TrimRightFunc(line, unicode.IsSpace) - line = strings.TrimPrefix(line, indent) - - if line == "" { - empty++ - continue - } - - // If requested, parse out "appendix" information (often added - // by `git format-patch` and removed by `git am`). - if separateAppendix && c == &body && line == "---" { - c = &appendix - continue - } - - if c.Len() > 0 { - c.WriteByte('\n') - if empty > 0 { - c.WriteByte('\n') - } - } - empty = 0 - - c.WriteString(line) - } - return body.String(), appendix.String() -} - -func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) { - msg, err := mail.ReadMessage(r) - if err != nil { - return nil, err - } - - h := &PatchHeader{} - - if strings.HasPrefix(mailLine, mailHeaderPrefix) { - mailLine = strings.TrimPrefix(mailLine, mailHeaderPrefix) - if i := strings.IndexByte(mailLine, ' '); i > 0 { - h.SHA = mailLine[:i] - } - } - - from := msg.Header.Get("From") - if from != "" { - u, err := ParsePatchIdentity(from) - if err != nil { - return nil, err - } - h.Author = &u - } - - date := msg.Header.Get("Date") - if date != "" { - d, err := ParsePatchDate(date) - if err != nil { - return nil, err - } - h.AuthorDate = d - } - - subject := msg.Header.Get("Subject") - h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode) - - s := bufio.NewScanner(msg.Body) - h.Body, h.BodyAppendix = scanMessageBody(s, "", true) - if s.Err() != nil { - return nil, s.Err() - } - - return h, nil -} - -func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) { - switch mode { - case SubjectCleanAll, SubjectCleanPatchOnly: - case SubjectCleanWhitespace: - return "", strings.TrimSpace(decodeSubject(s)) - default: - panic(fmt.Sprintf("unknown clean mode: %d", mode)) - } - - // Based on the algorithm from Git in mailinfo.c:cleanup_subject() - // If compatibility with `git am` drifts, go there to see if there are any updates. - - at := 0 - for at < len(s) { - switch s[at] { - case 'r', 'R': - // Detect re:, Re:, rE: and RE: - if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' { - at += 3 - continue - } - - case ' ', '\t', ':': - // Delete whitespace and duplicate ':' characters - at++ - continue - - case '[': - if i := strings.IndexByte(s[at:], ']'); i > 0 { - if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") { - at += i + 1 - continue - } - } - } - - // Nothing was removed, end processing - break - } - - prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace) - subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace) - return -} - -// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result -// of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji). -// See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject -func decodeSubject(encoded string) string { - if !strings.HasPrefix(encoded, "=?UTF-8?q?") { - // not UTF-8 encoded - return encoded - } - - // If the subject is too long, `git format-patch` may produce a subject line across - // multiple lines. When parsed, this can look like the following: - // <UTF8-prefix><first-line> <UTF8-prefix><second-line> - payload := " " + encoded - payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "") - payload = strings.ReplaceAll(payload, "?=", "") - - decoded, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(payload))) - if err != nil { - // if err, abort decoding and return original subject - return encoded - } - - return string(decoded) -} diff --git a/pkg/gitdiff/patch_identity.go b/pkg/gitdiff/patch_identity.go deleted file mode 100644 index 018f80c..0000000 --- a/pkg/gitdiff/patch_identity.go +++ /dev/null @@ -1,166 +0,0 @@ -package gitdiff - -import ( - "fmt" - "strings" -) - -// PatchIdentity identifies a person who authored or committed a patch. -type PatchIdentity struct { - Name string - Email string -} - -func (i PatchIdentity) String() string { - name := i.Name - if name == "" { - name = `""` - } - return fmt.Sprintf("%s <%s>", name, i.Email) -} - -// ParsePatchIdentity parses a patch identity string. A patch identity contains -// an email address and an optional name in [RFC 5322] format. This is either a -// plain email adddress or a name followed by an address in angle brackets: -// -// author@example.com -// Author Name <author@example.com> -// -// If the input is not one of these formats, ParsePatchIdentity applies a -// heuristic to separate the name and email portions. If both the name and -// email are missing or empty, ParsePatchIdentity returns an error. It -// otherwise does not validate the result. -// -// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322 -func ParsePatchIdentity(s string) (PatchIdentity, error) { - s = normalizeSpace(s) - s = unquotePairs(s) - - var name, email string - if at := strings.IndexByte(s, '@'); at >= 0 { - start, end := at, at - for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' { - start-- - } - for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' { - end++ - } - email = s[start+1 : end] - - // Adjust the boundaries so that we drop angle brackets, but keep - // spaces when removing the email to form the name. - if start < 0 || s[start] != '<' { - start++ - } - if end >= len(s) || s[end] != '>' { - end-- - } - name = s[:start] + s[end+1:] - } else { - start, end := 0, 0 - for i := 0; i < len(s); i++ { - if s[i] == '<' && start == 0 { - start = i + 1 - } - if s[i] == '>' && start > 0 { - end = i - break - } - } - if start > 0 && end >= start { - email = strings.TrimSpace(s[start:end]) - name = s[:start-1] - } - } - - // After extracting the email, the name might contain extra whitespace - // again and may be surrounded by comment characters. The git source gives - // these examples of when this can happen: - // - // "Name <email@domain>" - // "email@domain (Name)" - // "Name <email@domain> (Comment)" - // - name = normalizeSpace(name) - if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") { - name = name[1 : len(name)-1] - } - name = strings.TrimSpace(name) - - // If the name is empty or contains email-like characters, use the email - // instead (assuming one exists) - if name == "" || strings.ContainsAny(name, "@<>") { - name = email - } - - if name == "" && email == "" { - return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s) - } - return PatchIdentity{Name: name, Email: email}, nil -} - -// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to -// remove any "quoted-pairs" (backslash-espaced characters). It also removes -// the quotes from any quoted strings, but leaves the comment delimiters. -func unquotePairs(s string) string { - quote := false - comments := 0 - escaped := false - - var out strings.Builder - for i := 0; i < len(s); i++ { - if escaped { - escaped = false - } else { - switch s[i] { - case '\\': - // quoted-pair is only allowed in quoted-string/comment - if quote || comments > 0 { - escaped = true - continue // drop '\' character - } - - case '"': - if comments == 0 { - quote = !quote - continue // drop '"' character - } - - case '(': - if !quote { - comments++ - } - case ')': - if comments > 0 { - comments-- - } - } - } - out.WriteByte(s[i]) - } - return out.String() -} - -// normalizeSpace trims leading and trailing whitespace from s and converts -// inner sequences of one or more whitespace characters to single spaces. -func normalizeSpace(s string) string { - var sb strings.Builder - for i := 0; i < len(s); i++ { - c := s[i] - if !isRFC5332Space(c) { - if sb.Len() > 0 && isRFC5332Space(s[i-1]) { - sb.WriteByte(' ') - } - sb.WriteByte(c) - } - } - return sb.String() -} - -func isRFC5332Space(c byte) bool { - switch c { - case '\t', '\n', '\r', ' ': - return true - } - return false -} diff --git a/pkg/gitdiff/text.go b/pkg/gitdiff/text.go deleted file mode 100644 index ee30792..0000000 --- a/pkg/gitdiff/text.go +++ /dev/null @@ -1,192 +0,0 @@ -package gitdiff - -import ( - "fmt" - "io" - "strconv" - "strings" -) - -// ParseTextFragments parses text fragments until the next file header or the -// end of the stream and attaches them to the given file. It returns the number -// of fragments that were added. -func (p *parser) ParseTextFragments(f *File) (n int, err error) { - for { - frag, err := p.ParseTextFragmentHeader() - if err != nil { - return n, err - } - if frag == nil { - return n, nil - } - - if f.IsNew && frag.OldLines > 0 { - return n, p.Errorf(-1, "new file depends on old contents") - } - if f.IsDelete && frag.NewLines > 0 { - return n, p.Errorf(-1, "deleted file still has contents") - } - - if err := p.ParseTextChunk(frag); err != nil { - return n, err - } - - f.TextFragments = append(f.TextFragments, frag) - n++ - } -} - -func (p *parser) ParseTextFragmentHeader() (*TextFragment, error) { - const ( - startMark = "@@ -" - endMark = " @@" - ) - - if !strings.HasPrefix(p.Line(0), startMark) { - return nil, nil - } - - parts := strings.SplitAfterN(p.Line(0), endMark, 2) - if len(parts) < 2 { - return nil, p.Errorf(0, "invalid fragment header") - } - - f := &TextFragment{} - f.Comment = strings.TrimSpace(parts[1]) - - header := parts[0][len(startMark) : len(parts[0])-len(endMark)] - ranges := strings.Split(header, " +") - if len(ranges) != 2 { - return nil, p.Errorf(0, "invalid fragment header") - } - - var err error - if f.OldPosition, f.OldLines, err = parseRange(ranges[0]); err != nil { - return nil, p.Errorf(0, "invalid fragment header: %v", err) - } - if f.NewPosition, f.NewLines, err = parseRange(ranges[1]); err != nil { - return nil, p.Errorf(0, "invalid fragment header: %v", err) - } - - if err := p.Next(); err != nil && err != io.EOF { - return nil, err - } - return f, nil -} - -func (p *parser) ParseTextChunk(frag *TextFragment) error { - if p.Line(0) == "" { - return p.Errorf(0, "no content following fragment header") - } - - oldLines, newLines := frag.OldLines, frag.NewLines - for oldLines > 0 || newLines > 0 { - line := p.Line(0) - op, data := line[0], line[1:] - - switch op { - case '\n': - data = "\n" - fallthrough // newer GNU diff versions create empty context lines - case ' ': - oldLines-- - newLines-- - if frag.LinesAdded == 0 && frag.LinesDeleted == 0 { - frag.LeadingContext++ - } else { - frag.TrailingContext++ - } - frag.Lines = append(frag.Lines, Line{OpContext, data}) - case '-': - oldLines-- - frag.LinesDeleted++ - frag.TrailingContext = 0 - frag.Lines = append(frag.Lines, Line{OpDelete, data}) - case '+': - newLines-- - frag.LinesAdded++ - frag.TrailingContext = 0 - frag.Lines = append(frag.Lines, Line{OpAdd, data}) - case '\\': - // this may appear in middle of fragment if it's for a deleted line - if isNoNewlineMarker(line) { - removeLastNewline(frag) - break - } - fallthrough - default: - // TODO(bkeyes): if this is because we hit the next header, it - // would be helpful to return the miscounts line error. We could - // either test for the common headers ("@@ -", "diff --git") or - // assume any invalid op ends the fragment; git returns the same - // generic error in all cases so either is compatible - return p.Errorf(0, "invalid line operation: %q", op) - } - - if err := p.Next(); err != nil { - if err == io.EOF { - break - } - return err - } - } - - if oldLines != 0 || newLines != 0 { - hdr := max(frag.OldLines-oldLines, frag.NewLines-newLines) + 1 - return p.Errorf(-hdr, "fragment header miscounts lines: %+d old, %+d new", -oldLines, -newLines) - } - if frag.LinesAdded == 0 && frag.LinesDeleted == 0 { - return p.Errorf(0, "fragment contains no changes") - } - - // check for a final "no newline" marker since it is not included in the - // counters used to stop the loop above - if isNoNewlineMarker(p.Line(0)) { - removeLastNewline(frag) - if err := p.Next(); err != nil && err != io.EOF { - return err - } - } - - return nil -} - -func isNoNewlineMarker(s string) bool { - // test for "\ No newline at end of file" by prefix because the text - // changes by locale (git claims all versions are at least 12 chars) - return len(s) >= 12 && s[:2] == "\\ " -} - -func removeLastNewline(frag *TextFragment) { - if len(frag.Lines) > 0 { - last := &frag.Lines[len(frag.Lines)-1] - last.Line = strings.TrimSuffix(last.Line, "\n") - } -} - -func parseRange(s string) (start int64, end int64, err error) { - parts := strings.SplitN(s, ",", 2) - - if start, err = strconv.ParseInt(parts[0], 10, 64); err != nil { - nerr := err.(*strconv.NumError) - return 0, 0, fmt.Errorf("bad start of range: %s: %v", parts[0], nerr.Err) - } - - if len(parts) > 1 { - if end, err = strconv.ParseInt(parts[1], 10, 64); err != nil { - nerr := err.(*strconv.NumError) - return 0, 0, fmt.Errorf("bad end of range: %s: %v", parts[1], nerr.Err) - } - } else { - end = 1 - } - - return -} - -func max(a, b int64) int64 { - if a > b { - return a - } - return b -} |
