diff options
Diffstat (limited to 'pkg/gitdiff')
117 files changed, 8531 insertions, 0 deletions
diff --git a/pkg/gitdiff/README.md b/pkg/gitdiff/README.md new file mode 100644 index 0000000..974bb70 --- /dev/null +++ b/pkg/gitdiff/README.md @@ -0,0 +1 @@ +This is a vendored copy of the [bluekeyes/go-gitdiff](https://github.com/bluekeyes/go-gitdiff) package. diff --git a/pkg/gitdiff/apply.go b/pkg/gitdiff/apply.go new file mode 100644 index 0000000..44bbcca --- /dev/null +++ b/pkg/gitdiff/apply.go @@ -0,0 +1,147 @@ +package gitdiff + +import ( + "errors" + "fmt" + "io" + "sort" +) + +// Conflict indicates an apply failed due to a conflict between the patch and +// the source content. +// +// Users can test if an error was caused by a conflict by using errors.Is with +// an empty Conflict: +// +// if errors.Is(err, &Conflict{}) { +// // handle conflict +// } +type Conflict struct { + msg string +} + +func (c *Conflict) Error() string { + return "conflict: " + c.msg +} + +// Is implements error matching for Conflict. Passing an empty instance of +// Conflict always returns true. +func (c *Conflict) Is(other error) bool { + if other, ok := other.(*Conflict); ok { + return other.msg == "" || other.msg == c.msg + } + return false +} + +// ApplyError wraps an error that occurs during patch application with +// additional location information, if it is available. +type ApplyError struct { + // Line is the one-indexed line number in the source data + Line int64 + // Fragment is the one-indexed fragment number in the file + Fragment int + // FragmentLine is the one-indexed line number in the fragment + FragmentLine int + + err error +} + +// Unwrap returns the wrapped error. +func (e *ApplyError) Unwrap() error { + return e.err +} + +func (e *ApplyError) Error() string { + return fmt.Sprintf("%v", e.err) +} + +type lineNum int +type fragNum int +type fragLineNum int + +// applyError creates a new *ApplyError wrapping err or augments the information +// in err with args if it is already an *ApplyError. Returns nil if err is nil. +func applyError(err error, args ...interface{}) error { + if err == nil { + return nil + } + + e, ok := err.(*ApplyError) + if !ok { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + e = &ApplyError{err: err} + } + for _, arg := range args { + switch v := arg.(type) { + case lineNum: + e.Line = int64(v) + 1 + case fragNum: + e.Fragment = int(v) + 1 + case fragLineNum: + e.FragmentLine = int(v) + 1 + } + } + return e +} + +var ( + errApplyInProgress = errors.New("gitdiff: incompatible apply in progress") + errApplierClosed = errors.New("gitdiff: applier is closed") +) + +// Apply applies the changes in f to src, writing the result to dst. It can +// apply both text and binary changes. +// +// If an error occurs while applying, Apply returns an *ApplyError that +// annotates the error with additional information. If the error is because of +// a conflict with the source, the wrapped error will be a *Conflict. +func Apply(dst io.Writer, src io.ReaderAt, f *File) error { + if f.IsBinary { + if len(f.TextFragments) > 0 { + return applyError(errors.New("binary file contains text fragments")) + } + if f.BinaryFragment == nil { + return applyError(errors.New("binary file does not contain a binary fragment")) + } + } else { + if f.BinaryFragment != nil { + return applyError(errors.New("text file contains a binary fragment")) + } + } + + switch { + case f.BinaryFragment != nil: + applier := NewBinaryApplier(dst, src) + if err := applier.ApplyFragment(f.BinaryFragment); err != nil { + return err + } + return applier.Close() + + case len(f.TextFragments) > 0: + frags := make([]*TextFragment, len(f.TextFragments)) + copy(frags, f.TextFragments) + + sort.Slice(frags, func(i, j int) bool { + return frags[i].OldPosition < frags[j].OldPosition + }) + + // TODO(bkeyes): consider merging overlapping fragments + // right now, the application fails if fragments overlap, but it should be + // possible to precompute the result of applying them in order + + applier := NewTextApplier(dst, src) + for i, frag := range frags { + if err := applier.ApplyFragment(frag); err != nil { + return applyError(err, fragNum(i)) + } + } + return applier.Close() + + default: + // nothing to apply, just copy all the data + _, err := copyFrom(dst, src, 0) + return err + } +} diff --git a/pkg/gitdiff/apply_binary.go b/pkg/gitdiff/apply_binary.go new file mode 100644 index 0000000..b34772d --- /dev/null +++ b/pkg/gitdiff/apply_binary.go @@ -0,0 +1,206 @@ +package gitdiff + +import ( + "errors" + "fmt" + "io" +) + +// BinaryApplier applies binary changes described in a fragment to source data. +// The applier must be closed after use. +type BinaryApplier struct { + dst io.Writer + src io.ReaderAt + + closed bool + dirty bool +} + +// NewBinaryApplier creates an BinaryApplier that reads data from src and +// writes modified data to dst. +func NewBinaryApplier(dst io.Writer, src io.ReaderAt) *BinaryApplier { + a := BinaryApplier{ + dst: dst, + src: src, + } + return &a +} + +// ApplyFragment applies the changes in the fragment f and writes the result to +// dst. ApplyFragment can be called at most once. +// +// If an error occurs while applying, ApplyFragment returns an *ApplyError that +// annotates the error with additional information. If the error is because of +// a conflict between a fragment and the source, the wrapped error will be a +// *Conflict. +func (a *BinaryApplier) ApplyFragment(f *BinaryFragment) error { + if f == nil { + return applyError(errors.New("nil fragment")) + } + if a.closed { + return applyError(errApplierClosed) + } + if a.dirty { + return applyError(errApplyInProgress) + } + + // mark an apply as in progress, even if it fails before making changes + a.dirty = true + + switch f.Method { + case BinaryPatchLiteral: + if _, err := a.dst.Write(f.Data); err != nil { + return applyError(err) + } + case BinaryPatchDelta: + if err := applyBinaryDeltaFragment(a.dst, a.src, f.Data); err != nil { + return applyError(err) + } + default: + return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method)) + } + return nil +} + +// Close writes any data following the last applied fragment and prevents +// future calls to ApplyFragment. +func (a *BinaryApplier) Close() (err error) { + if a.closed { + return nil + } + + a.closed = true + if !a.dirty { + _, err = copyFrom(a.dst, a.src, 0) + } else { + // do nothing, applying a binary fragment copies all data + } + return err +} + +func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error { + srcSize, delta := readBinaryDeltaSize(frag) + if err := checkBinarySrcSize(src, srcSize); err != nil { + return err + } + + dstSize, delta := readBinaryDeltaSize(delta) + + for len(delta) > 0 { + op := delta[0] + if op == 0 { + return errors.New("invalid delta opcode 0") + } + + var n int64 + var err error + switch op & 0x80 { + case 0x80: + n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src) + case 0x00: + n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:]) + } + if err != nil { + return err + } + dstSize -= n + } + + if dstSize != 0 { + return errors.New("corrupt binary delta: insufficient or extra data") + } + return nil +} + +// readBinaryDeltaSize reads a variable length size from a delta-encoded binary +// fragment, returing the size and the unused data. Data is encoded as: +// +// [[1xxxxxxx]...] [0xxxxxxx] +// +// in little-endian order, with 7 bits of the value per byte. +func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { + shift := uint(0) + for i, b := range d { + size |= int64(b&0x7F) << shift + shift += 7 + if b <= 0x7F { + return size, d[i+1:] + } + } + return size, nil +} + +// applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary +// fragment, returning the amount of data written and the usused part of the +// fragment. An add operation takes the form: +// +// [0xxxxxx][[data1]...] +// +// where the lower seven bits of the opcode is the number of data bytes +// following the opcode. See also pack-format.txt in the Git source. +func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) { + size := int(op) + if len(delta) < size { + return 0, delta, errors.New("corrupt binary delta: incomplete add") + } + _, err = w.Write(delta[:size]) + return int64(size), delta[size:], err +} + +// applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary +// fragment, returing the amount of data written and the unused part of the +// fragment. A copy operation takes the form: +// +// [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] +// +// where the lower seven bits of the opcode determine which non-zero offset and +// size bytes are present in little-endian order: if bit 0 is set, offset1 is +// present, etc. If no offset or size bytes are present, offset is 0 and size +// is 0x10000. See also pack-format.txt in the Git source. +func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) { + const defaultSize = 0x10000 + + unpack := func(start, bits uint) (v int64) { + for i := uint(0); i < bits; i++ { + mask := byte(1 << (i + start)) + if op&mask > 0 { + if len(delta) == 0 { + err = errors.New("corrupt binary delta: incomplete copy") + return + } + v |= int64(delta[0]) << (8 * i) + delta = delta[1:] + } + } + return + } + + offset := unpack(0, 4) + size := unpack(4, 3) + if err != nil { + return 0, delta, err + } + if size == 0 { + size = defaultSize + } + + // TODO(bkeyes): consider pooling these buffers + b := make([]byte, size) + if _, err := src.ReadAt(b, offset); err != nil { + return 0, delta, err + } + + _, err = w.Write(b) + return size, delta, err +} + +func checkBinarySrcSize(r io.ReaderAt, size int64) error { + ok, err := isLen(r, size) + if err != nil { + return err + } + if !ok { + return &Conflict{"fragment src size does not match actual src size"} + } + return nil +} diff --git a/pkg/gitdiff/apply_test.go b/pkg/gitdiff/apply_test.go new file mode 100644 index 0000000..05915ba --- /dev/null +++ b/pkg/gitdiff/apply_test.go @@ -0,0 +1,235 @@ +package gitdiff + +import ( + "bytes" + "errors" + "io" + "io/ioutil" + "path/filepath" + "testing" +) + +func TestApplyTextFragment(t *testing.T) { + tests := map[string]applyTest{ + "createFile": {Files: getApplyFiles("text_fragment_new")}, + "deleteFile": {Files: getApplyFiles("text_fragment_delete_all")}, + + "addStart": {Files: getApplyFiles("text_fragment_add_start")}, + "addMiddle": {Files: getApplyFiles("text_fragment_add_middle")}, + "addEnd": {Files: getApplyFiles("text_fragment_add_end")}, + "addEndNoEOL": {Files: getApplyFiles("text_fragment_add_end_noeol")}, + + "changeStart": {Files: getApplyFiles("text_fragment_change_start")}, + "changeMiddle": {Files: getApplyFiles("text_fragment_change_middle")}, + "changeEnd": {Files: getApplyFiles("text_fragment_change_end")}, + "changeEndEOL": {Files: getApplyFiles("text_fragment_change_end_eol")}, + "changeExact": {Files: getApplyFiles("text_fragment_change_exact")}, + "changeSingleNoEOL": {Files: getApplyFiles("text_fragment_change_single_noeol")}, + + "errorShortSrcBefore": { + Files: applyFiles{ + Src: "text_fragment_error.src", + Patch: "text_fragment_error_short_src_before.patch", + }, + Err: &Conflict{}, + }, + "errorShortSrc": { + Files: applyFiles{ + Src: "text_fragment_error.src", + Patch: "text_fragment_error_short_src.patch", + }, + Err: &Conflict{}, + }, + "errorContextConflict": { + Files: applyFiles{ + Src: "text_fragment_error.src", + Patch: "text_fragment_error_context_conflict.patch", + }, + Err: &Conflict{}, + }, + "errorDeleteConflict": { + Files: applyFiles{ + Src: "text_fragment_error.src", + Patch: "text_fragment_error_delete_conflict.patch", + }, + Err: &Conflict{}, + }, + "errorNewFile": { + Files: applyFiles{ + Src: "text_fragment_error.src", + Patch: "text_fragment_error_new_file.patch", + }, + Err: &Conflict{}, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { + if len(file.TextFragments) != 1 { + t.Fatalf("patch should contain exactly one fragment, but it has %d", len(file.TextFragments)) + } + applier := NewTextApplier(dst, src) + return applier.ApplyFragment(file.TextFragments[0]) + }) + }) + } +} + +func TestApplyBinaryFragment(t *testing.T) { + tests := map[string]applyTest{ + "literalCreate": {Files: getApplyFiles("bin_fragment_literal_create")}, + "literalModify": {Files: getApplyFiles("bin_fragment_literal_modify")}, + "deltaModify": {Files: getApplyFiles("bin_fragment_delta_modify")}, + "deltaModifyLarge": {Files: getApplyFiles("bin_fragment_delta_modify_large")}, + + "errorIncompleteAdd": { + Files: applyFiles{ + Src: "bin_fragment_delta_error.src", + Patch: "bin_fragment_delta_error_incomplete_add.patch", + }, + Err: "incomplete add", + }, + "errorIncompleteCopy": { + Files: applyFiles{ + Src: "bin_fragment_delta_error.src", + Patch: "bin_fragment_delta_error_incomplete_copy.patch", + }, + Err: "incomplete copy", + }, + "errorSrcSize": { + Files: applyFiles{ + Src: "bin_fragment_delta_error.src", + Patch: "bin_fragment_delta_error_src_size.patch", + }, + Err: &Conflict{}, + }, + "errorDstSize": { + Files: applyFiles{ + Src: "bin_fragment_delta_error.src", + Patch: "bin_fragment_delta_error_dst_size.patch", + }, + Err: "insufficient or extra data", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { + applier := NewBinaryApplier(dst, src) + return applier.ApplyFragment(file.BinaryFragment) + }) + }) + } +} + +func TestApplyFile(t *testing.T) { + tests := map[string]applyTest{ + "textModify": { + Files: applyFiles{ + Src: "file_text.src", + Patch: "file_text_modify.patch", + Out: "file_text_modify.out", + }, + }, + "textDelete": { + Files: applyFiles{ + Src: "file_text.src", + Patch: "file_text_delete.patch", + Out: "file_text_delete.out", + }, + }, + "textErrorPartialDelete": { + Files: applyFiles{ + Src: "file_text.src", + Patch: "file_text_error_partial_delete.patch", + }, + Err: &Conflict{}, + }, + "binaryModify": { + Files: getApplyFiles("file_bin_modify"), + }, + "modeChange": { + Files: getApplyFiles("file_mode_change"), + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { + return Apply(dst, src, file) + }) + }) + } +} + +type applyTest struct { + Files applyFiles + Err interface{} +} + +func (at applyTest) run(t *testing.T, apply func(io.Writer, io.ReaderAt, *File) error) { + src, patch, out := at.Files.Load(t) + + files, _, err := Parse(bytes.NewReader(patch)) + if err != nil { + t.Fatalf("failed to parse patch file: %v", err) + } + if len(files) != 1 { + t.Fatalf("patch should contain exactly one file, but it has %d", len(files)) + } + + var dst bytes.Buffer + err = apply(&dst, bytes.NewReader(src), files[0]) + if at.Err != nil { + assertError(t, at.Err, err, "applying fragment") + return + } + if err != nil { + var aerr *ApplyError + if errors.As(err, &aerr) { + t.Fatalf("unexpected error applying: at %d: fragment %d at %d: %v", aerr.Line, aerr.Fragment, aerr.FragmentLine, err) + } else { + t.Fatalf("unexpected error applying: %v", err) + } + } + + if !bytes.Equal(out, dst.Bytes()) { + t.Errorf("incorrect result after apply\nexpected:\n%q\nactual:\n%q", out, dst.Bytes()) + } +} + +type applyFiles struct { + Src string + Patch string + Out string +} + +func getApplyFiles(name string) applyFiles { + return applyFiles{ + Src: name + ".src", + Patch: name + ".patch", + Out: name + ".out", + } +} + +func (f applyFiles) Load(t *testing.T) (src []byte, patch []byte, out []byte) { + load := func(name, kind string) []byte { + d, err := ioutil.ReadFile(filepath.Join("testdata", "apply", name)) + if err != nil { + t.Fatalf("failed to read %s file: %v", kind, err) + } + return d + } + + if f.Src != "" { + src = load(f.Src, "source") + } + if f.Patch != "" { + patch = load(f.Patch, "patch") + } + if f.Out != "" { + out = load(f.Out, "output") + } + return +} diff --git a/pkg/gitdiff/apply_text.go b/pkg/gitdiff/apply_text.go new file mode 100644 index 0000000..8d0accb --- /dev/null +++ b/pkg/gitdiff/apply_text.go @@ -0,0 +1,158 @@ +package gitdiff + +import ( + "errors" + "io" +) + +// TextApplier applies changes described in text fragments to source data. If +// changes are described in multiple fragments, those fragments must be applied +// in order. The applier must be closed after use. +// +// By default, TextApplier operates in "strict" mode, where fragment content +// and positions must exactly match those of the source. +type TextApplier struct { + dst io.Writer + src io.ReaderAt + lineSrc LineReaderAt + nextLine int64 + + closed bool + dirty bool +} + +// NewTextApplier creates a TextApplier that reads data from src and writes +// modified data to dst. If src implements LineReaderAt, it is used directly. +func NewTextApplier(dst io.Writer, src io.ReaderAt) *TextApplier { + a := TextApplier{ + dst: dst, + src: src, + } + + if lineSrc, ok := src.(LineReaderAt); ok { + a.lineSrc = lineSrc + } else { + a.lineSrc = &lineReaderAt{r: src} + } + + return &a +} + +// ApplyFragment applies the changes in the fragment f, writing unwritten data +// before the start of the fragment and any changes from the fragment. If +// multiple text fragments apply to the same content, ApplyFragment must be +// called in order of increasing start position. As a result, each fragment can +// be applied at most once. +// +// If an error occurs while applying, ApplyFragment returns an *ApplyError that +// annotates the error with additional information. If the error is because of +// a conflict between the fragment and the source, the wrapped error will be a +// *Conflict. +func (a *TextApplier) ApplyFragment(f *TextFragment) error { + if a.closed { + return applyError(errApplierClosed) + } + + // mark an apply as in progress, even if it fails before making changes + a.dirty = true + + // application code assumes fragment fields are consistent + if err := f.Validate(); err != nil { + return applyError(err) + } + + // lines are 0-indexed, positions are 1-indexed (but new files have position = 0) + fragStart := f.OldPosition - 1 + if fragStart < 0 { + fragStart = 0 + } + fragEnd := fragStart + f.OldLines + + start := a.nextLine + if fragStart < start { + return applyError(&Conflict{"fragment overlaps with an applied fragment"}) + } + + if f.OldPosition == 0 { + ok, err := isLen(a.src, 0) + if err != nil { + return applyError(err) + } + if !ok { + return applyError(&Conflict{"cannot create new file from non-empty src"}) + } + } + + preimage := make([][]byte, fragEnd-start) + n, err := a.lineSrc.ReadLinesAt(preimage, start) + if err != nil { + // an EOF indicates that source file is shorter than the patch expects, + // which should be reported as a conflict rather than a generic error + if errors.Is(err, io.EOF) { + err = &Conflict{"src has fewer lines than required by fragment"} + } + return applyError(err, lineNum(start+int64(n))) + } + + // copy leading data before the fragment starts + for i, line := range preimage[:fragStart-start] { + if _, err := a.dst.Write(line); err != nil { + a.nextLine = start + int64(i) + return applyError(err, lineNum(a.nextLine)) + } + } + preimage = preimage[fragStart-start:] + + // apply the changes in the fragment + used := int64(0) + for i, line := range f.Lines { + if err := applyTextLine(a.dst, line, preimage, used); err != nil { + a.nextLine = fragStart + used + return applyError(err, lineNum(a.nextLine), fragLineNum(i)) + } + if line.Old() { + used++ + } + } + a.nextLine = fragStart + used + + // new position of +0,0 mean a full delete, so check for leftovers + if f.NewPosition == 0 && f.NewLines == 0 { + var b [1][]byte + n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine) + if err != nil && err != io.EOF { + return applyError(err, lineNum(a.nextLine)) + } + if n > 0 { + return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine)) + } + } + + return nil +} + +func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) { + if line.Old() && string(preimage[i]) != line.Line { + return &Conflict{"fragment line does not match src line"} + } + if line.New() { + _, err = io.WriteString(dst, line.Line) + } + return err +} + +// Close writes any data following the last applied fragment and prevents +// future calls to ApplyFragment. +func (a *TextApplier) Close() (err error) { + if a.closed { + return nil + } + + a.closed = true + if !a.dirty { + _, err = copyFrom(a.dst, a.src, 0) + } else { + _, err = copyLinesFrom(a.dst, a.lineSrc, a.nextLine) + } + return err +} diff --git a/pkg/gitdiff/assert_test.go b/pkg/gitdiff/assert_test.go new file mode 100644 index 0000000..878f13c --- /dev/null +++ b/pkg/gitdiff/assert_test.go @@ -0,0 +1,30 @@ +package gitdiff + +import ( + "errors" + "strings" + "testing" +) + +func assertError(t *testing.T, expected interface{}, actual error, action string) { + if actual == nil { + t.Fatalf("expected error %s, but got nil", action) + } + + switch exp := expected.(type) { + case bool: + if !exp { + t.Fatalf("unexpected error %s: %v", action, actual) + } + case string: + if !strings.Contains(actual.Error(), exp) { + t.Fatalf("incorrect error %s: %q does not contain %q", action, actual.Error(), exp) + } + case error: + if !errors.Is(actual, exp) { + t.Fatalf("incorrect error %s: expected %T (%v), actual: %T (%v)", action, exp, exp, actual, actual) + } + default: + t.Fatalf("unsupported expected error type: %T", exp) + } +} diff --git a/pkg/gitdiff/base85.go b/pkg/gitdiff/base85.go new file mode 100644 index 0000000..86db117 --- /dev/null +++ b/pkg/gitdiff/base85.go @@ -0,0 +1,91 @@ +package gitdiff + +import ( + "fmt" +) + +var ( + b85Table map[byte]byte + b85Alpha = []byte( + "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "!#$%&()*+-;<=>?@^_`{|}~", + ) +) + +func init() { + b85Table = make(map[byte]byte) + for i, c := range b85Alpha { + b85Table[c] = byte(i) + } +} + +// base85Decode decodes Base85-encoded data from src into dst. It uses the +// alphabet defined by base85.c in the Git source tree. src must contain at +// least len(dst) bytes of encoded data. +func base85Decode(dst, src []byte) error { + var v uint32 + var n, ndst int + for i, b := range src { + if b, ok := b85Table[b]; ok { + v = 85*v + uint32(b) + n++ + } else { + return fmt.Errorf("invalid base85 byte at index %d: 0x%X", i, src[i]) + } + if n == 5 { + rem := len(dst) - ndst + for j := 0; j < 4 && j < rem; j++ { + dst[ndst] = byte(v >> 24) + ndst++ + v <<= 8 + } + v = 0 + n = 0 + } + } + if n > 0 { + return fmt.Errorf("base85 data terminated by underpadded sequence") + } + if ndst < len(dst) { + return fmt.Errorf("base85 data underrun: %d < %d", ndst, len(dst)) + } + return nil +} + +// base85Encode encodes src in Base85, writing the result to dst. It uses the +// alphabet defined by base85.c in the Git source tree. +func base85Encode(dst, src []byte) { + var di, si int + + encode := func(v uint32) { + dst[di+0] = b85Alpha[(v/(85*85*85*85))%85] + dst[di+1] = b85Alpha[(v/(85*85*85))%85] + dst[di+2] = b85Alpha[(v/(85*85))%85] + dst[di+3] = b85Alpha[(v/85)%85] + dst[di+4] = b85Alpha[v%85] + } + + n := (len(src) / 4) * 4 + for si < n { + encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3])) + si += 4 + di += 5 + } + + var v uint32 + switch len(src) - si { + case 3: + v |= uint32(src[si+2]) << 8 + fallthrough + case 2: + v |= uint32(src[si+1]) << 16 + fallthrough + case 1: + v |= uint32(src[si+0]) << 24 + encode(v) + } +} + +// base85Len returns the length of n bytes of Base85 encoded data. +func base85Len(n int) int { + return (n + 3) / 4 * 5 +} diff --git a/pkg/gitdiff/base85_test.go b/pkg/gitdiff/base85_test.go new file mode 100644 index 0000000..672c471 --- /dev/null +++ b/pkg/gitdiff/base85_test.go @@ -0,0 +1,118 @@ +package gitdiff + +import ( + "bytes" + "testing" +) + +func TestBase85Decode(t *testing.T) { + tests := map[string]struct { + Input string + Output []byte + Err bool + }{ + "twoBytes": { + Input: "%KiWV", + Output: []byte{0xCA, 0xFE}, + }, + "fourBytes": { + Input: "007GV", + Output: []byte{0x0, 0x0, 0xCA, 0xFE}, + }, + "sixBytes": { + Input: "007GV%KiWV", + Output: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE}, + }, + "invalidCharacter": { + Input: "00'GV", + Err: true, + }, + "underpaddedSequence": { + Input: "007G", + Err: true, + }, + "dataUnderrun": { + Input: "007GV", + Output: make([]byte, 8), + Err: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + dst := make([]byte, len(test.Output)) + err := base85Decode(dst, []byte(test.Input)) + if test.Err { + if err == nil { + t.Fatalf("expected error decoding base85 data, but got nil") + } + return + } + if err != nil { + t.Fatalf("unexpected error decoding base85 data: %v", err) + } + for i, b := range test.Output { + if dst[i] != b { + t.Errorf("incorrect byte at index %d: expected 0x%X, actual 0x%X", i, b, dst[i]) + } + } + }) + } +} + +func TestBase85Encode(t *testing.T) { + tests := map[string]struct { + Input []byte + Output string + }{ + "zeroBytes": { + Input: []byte{}, + Output: "", + }, + "twoBytes": { + Input: []byte{0xCA, 0xFE}, + Output: "%KiWV", + }, + "fourBytes": { + Input: []byte{0x0, 0x0, 0xCA, 0xFE}, + Output: "007GV", + }, + "sixBytes": { + Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE}, + Output: "007GV%KiWV", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + dst := make([]byte, len(test.Output)) + base85Encode(dst, test.Input) + for i, b := range test.Output { + if dst[i] != byte(b) { + t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i]) + } + } + }) + } +} + +func FuzzBase85Roundtrip(f *testing.F) { + f.Add([]byte{0x2b, 0x0d}) + f.Add([]byte{0xbc, 0xb4, 0x3f}) + f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25}) + f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8}) + + f.Fuzz(func(t *testing.T, in []byte) { + n := len(in) + dst := make([]byte, base85Len(n)) + out := make([]byte, n) + + base85Encode(dst, in) + if err := base85Decode(out, dst); err != nil { + t.Fatalf("unexpected error decoding base85 data: %v", err) + } + if !bytes.Equal(in, out) { + t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst)) + } + }) +} diff --git a/pkg/gitdiff/binary.go b/pkg/gitdiff/binary.go new file mode 100644 index 0000000..282e323 --- /dev/null +++ b/pkg/gitdiff/binary.go @@ -0,0 +1,186 @@ +package gitdiff + +import ( + "bytes" + "compress/zlib" + "fmt" + "io" + "io/ioutil" + "strconv" + "strings" +) + +func (p *parser) ParseBinaryFragments(f *File) (n int, err error) { + isBinary, hasData, err := p.ParseBinaryMarker() + if err != nil || !isBinary { + return 0, err + } + + f.IsBinary = true + if !hasData { + return 0, nil + } + + forward, err := p.ParseBinaryFragmentHeader() + if err != nil { + return 0, err + } + if forward == nil { + return 0, p.Errorf(0, "missing data for binary patch") + } + if err := p.ParseBinaryChunk(forward); err != nil { + return 0, err + } + f.BinaryFragment = forward + + // valid for reverse to not exist, but it must be valid if present + reverse, err := p.ParseBinaryFragmentHeader() + if err != nil { + return 1, err + } + if reverse == nil { + return 1, nil + } + if err := p.ParseBinaryChunk(reverse); err != nil { + return 1, err + } + f.ReverseBinaryFragment = reverse + + return 1, nil +} + +func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { + line := p.Line(0) + switch { + case line == "GIT binary patch\n": + hasData = true + case isBinaryNoDataMarker(line): + default: + return false, false, nil + } + + if err = p.Next(); err != nil && err != io.EOF { + return false, false, err + } + return true, hasData, nil +} + +func isBinaryNoDataMarker(line string) bool { + if strings.HasSuffix(line, " differ\n") { + return strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "Files ") + } + return false +} + +func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) { + parts := strings.SplitN(strings.TrimSuffix(p.Line(0), "\n"), " ", 2) + if len(parts) < 2 { + return nil, nil + } + + frag := &BinaryFragment{} + switch parts[0] { + case "delta": + frag.Method = BinaryPatchDelta + case "literal": + frag.Method = BinaryPatchLiteral + default: + return nil, nil + } + + var err error + if frag.Size, err = strconv.ParseInt(parts[1], 10, 64); err != nil { + nerr := err.(*strconv.NumError) + return nil, p.Errorf(0, "binary patch: invalid size: %v", nerr.Err) + } + + if err := p.Next(); err != nil && err != io.EOF { + return nil, err + } + return frag, nil +} + +func (p *parser) ParseBinaryChunk(frag *BinaryFragment) error { + // Binary fragments are encoded as a series of base85 encoded lines. Each + // line starts with a character in [A-Za-z] giving the number of bytes on + // the line, where A = 1 and z = 52, and ends with a newline character. + // + // The base85 encoding means each line is a multiple of 5 characters + 2 + // additional characters for the length byte and the newline. The fragment + // ends with a blank line. + const ( + shortestValidLine = "A00000\n" + maxBytesPerLine = 52 + ) + + var data bytes.Buffer + buf := make([]byte, maxBytesPerLine) + for { + line := p.Line(0) + if line == "\n" { + break + } + if len(line) < len(shortestValidLine) || (len(line)-2)%5 != 0 { + return p.Errorf(0, "binary patch: corrupt data line") + } + + byteCount, seq := int(line[0]), line[1:len(line)-1] + switch { + case 'A' <= byteCount && byteCount <= 'Z': + byteCount = byteCount - 'A' + 1 + case 'a' <= byteCount && byteCount <= 'z': + byteCount = byteCount - 'a' + 27 + default: + return p.Errorf(0, "binary patch: invalid length byte") + } + + // base85 encodes every 4 bytes into 5 characters, with up to 3 bytes of end padding + maxByteCount := len(seq) / 5 * 4 + if byteCount > maxByteCount || byteCount < maxByteCount-3 { + return p.Errorf(0, "binary patch: incorrect byte count") + } + + if err := base85Decode(buf[:byteCount], []byte(seq)); err != nil { + return p.Errorf(0, "binary patch: %v", err) + } + data.Write(buf[:byteCount]) + + if err := p.Next(); err != nil { + if err == io.EOF { + return p.Errorf(0, "binary patch: unexpected EOF") + } + return err + } + } + + if err := inflateBinaryChunk(frag, &data); err != nil { + return p.Errorf(0, "binary patch: %v", err) + } + + // consume the empty line that ended the fragment + if err := p.Next(); err != nil && err != io.EOF { + return err + } + return nil +} + +func inflateBinaryChunk(frag *BinaryFragment, r io.Reader) error { + zr, err := zlib.NewReader(r) + if err != nil { + return err + } + + data, err := ioutil.ReadAll(zr) + if err != nil { + return err + } + if err := zr.Close(); err != nil { + return err + } + + if int64(len(data)) != frag.Size { + return fmt.Errorf("%d byte fragment inflated to %d", frag.Size, len(data)) + } + frag.Data = data + return nil +} diff --git a/pkg/gitdiff/binary_test.go b/pkg/gitdiff/binary_test.go new file mode 100644 index 0000000..64db243 --- /dev/null +++ b/pkg/gitdiff/binary_test.go @@ -0,0 +1,324 @@ +package gitdiff + +import ( + "encoding/binary" + "io" + "reflect" + "strings" + "testing" +) + +func TestParseBinaryMarker(t *testing.T) { + tests := map[string]struct { + Input string + IsBinary bool + HasData bool + Err bool + }{ + "binaryPatch": { + Input: "GIT binary patch\n", + IsBinary: true, + HasData: true, + }, + "binaryFileNoPatch": { + Input: "Binary files differ\n", + IsBinary: true, + HasData: false, + }, + "binaryFileNoPatchPaths": { + Input: "Binary files a/foo.bin and b/foo.bin differ\n", + IsBinary: true, + HasData: false, + }, + "fileNoPatch": { + Input: "Files differ\n", + IsBinary: true, + HasData: false, + }, + "textFile": { + Input: "@@ -10,14 +22,31 @@\n", + IsBinary: false, + HasData: false, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + isBinary, hasData, err := p.ParseBinaryMarker() + if test.Err { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing binary marker, but got %v", err) + } + return + } + if err != nil { + t.Fatalf("unexpected error parsing binary marker: %v", err) + } + if test.IsBinary != isBinary { + t.Errorf("incorrect isBinary value: expected %t, actual %t", test.IsBinary, isBinary) + } + if test.HasData != hasData { + t.Errorf("incorrect hasData value: expected %t, actual %t", test.HasData, hasData) + } + }) + } +} + +func TestParseBinaryFragmentHeader(t *testing.T) { + tests := map[string]struct { + Input string + Output *BinaryFragment + Err bool + }{ + "delta": { + Input: "delta 1234\n", + Output: &BinaryFragment{ + Method: BinaryPatchDelta, + Size: 1234, + }, + }, + "literal": { + Input: "literal 1234\n", + Output: &BinaryFragment{ + Method: BinaryPatchLiteral, + Size: 1234, + }, + }, + "unknownMethod": { + Input: "compressed 1234\n", + Output: nil, + }, + "notAHeader": { + Input: "Binary files differ\n", + Output: nil, + }, + "invalidSize": { + Input: "delta 123abc\n", + Err: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + frag, err := p.ParseBinaryFragmentHeader() + if test.Err { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing binary header, but got %v", err) + } + return + } + if err != nil { + t.Fatalf("unexpected error parsing binary header: %v", err) + } + if !reflect.DeepEqual(test.Output, frag) { + t.Errorf("incorrect binary fragment\nexpected: %+v\n actual: %+v", test.Output, frag) + } + }) + } +} + +func TestParseBinaryChunk(t *testing.T) { + tests := map[string]struct { + Input string + Fragment BinaryFragment + Output []byte + Err string + }{ + "singleline": { + Input: "TcmZQzU|?i`U?w2V48*Je09XJG\n\n", + Fragment: BinaryFragment{ + Size: 20, + }, + Output: fib(5, binary.BigEndian), + }, + "multiline": { + Input: "zcmZQzU|?i`U?w2V48*KJ%mKu_Kr9NxN<eH5#F0Qe0f=7$l~*z_FeL$%-)3N7vt?l5\n" + + "zl3-vE2xVZ9%4J~CI>f->s?WfX|B-=Vs{#X~svra7Ekg#T|4s}nH;WnAZ)|1Y*`&cB\n" + + "s(sh?X(Uz6L^!Ou&aF*u`J!eibJifSrv0z>$Q%Hd(^HIJ<Y?5`S0gT5UE&u=k\n\n", + Fragment: BinaryFragment{ + Size: 160, + }, + Output: fib(40, binary.BigEndian), + }, + "shortLine": { + Input: "A00\n\n", + Err: "corrupt data line", + }, + "underpaddedLine": { + Input: "H00000000\n\n", + Err: "corrupt data line", + }, + "invalidLengthByte": { + Input: "!00000\n\n", + Err: "invalid length byte", + }, + "miscountedLine": { + Input: "H00000\n\n", + Err: "incorrect byte count", + }, + "invalidEncoding": { + Input: "TcmZQzU|?i'U?w2V48*Je09XJG\n", + Err: "invalid base85 byte", + }, + "noTrailingEmptyLine": { + Input: "TcmZQzU|?i`U?w2V48*Je09XJG\n", + Err: "unexpected EOF", + }, + "invalidCompression": { + Input: "F007GV%KiWV\n\n", + Err: "zlib", + }, + "incorrectSize": { + Input: "TcmZQzU|?i`U?w2V48*Je09XJG\n\n", + Fragment: BinaryFragment{ + Size: 16, + }, + Err: "16 byte fragment inflated to 20", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + frag := test.Fragment + err := p.ParseBinaryChunk(&frag) + if test.Err != "" { + if err == nil || !strings.Contains(err.Error(), test.Err) { + t.Fatalf("expected error containing %q parsing binary chunk, but got %v", test.Err, err) + } + return + } + if err != nil { + t.Fatalf("unexpected error parsing binary chunk: %v", err) + } + if !reflect.DeepEqual(test.Output, frag.Data) { + t.Errorf("incorrect binary chunk\nexpected: %+v\n actual: %+v", test.Output, frag.Data) + } + }) + } +} + +func TestParseBinaryFragments(t *testing.T) { + tests := map[string]struct { + Input string + File File + + Binary bool + Fragment *BinaryFragment + ReverseFragment *BinaryFragment + Err bool + }{ + "dataWithReverse": { + Input: `GIT binary patch +literal 40 +gcmZQzU|?i` + "`" + `U?w2V48*KJ%mKu_Kr9NxN<eH500b)lkN^Mx + +literal 0 +HcmV?d00001 + +`, + Binary: true, + Fragment: &BinaryFragment{ + Method: BinaryPatchLiteral, + Size: 40, + Data: fib(10, binary.BigEndian), + }, + ReverseFragment: &BinaryFragment{ + Method: BinaryPatchLiteral, + Size: 0, + Data: []byte{}, + }, + }, + "dataWithoutReverse": { + Input: `GIT binary patch +literal 40 +gcmZQzU|?i` + "`" + `U?w2V48*KJ%mKu_Kr9NxN<eH500b)lkN^Mx + +`, + Binary: true, + Fragment: &BinaryFragment{ + Method: BinaryPatchLiteral, + Size: 40, + Data: fib(10, binary.BigEndian), + }, + }, + "noData": { + Input: "Binary files differ\n", + Binary: true, + }, + "text": { + Input: `@@ -1 +1 @@ +-old line ++new line +`, + Binary: false, + }, + "missingData": { + Input: "GIT binary patch\n", + Err: true, + }, + "invalidData": { + Input: `GIT binary patch +literal 20 +TcmZQzU|?i'U?w2V48*Je09XJG + +`, + Err: true, + }, + "invalidReverseData": { + Input: `GIT binary patch +literal 20 +TcmZQzU|?i` + "`" + `U?w2V48*Je09XJG + +literal 0 +zcmV?d00001 + +`, + Err: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + file := test.File + _, err := p.ParseBinaryFragments(&file) + if test.Err { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing binary fragments, but got %v", err) + } + return + } + if err != nil { + t.Fatalf("unexpected error parsing binary fragments: %v", err) + } + if test.Binary != file.IsBinary { + t.Errorf("incorrect binary state: expected %t, actual %t", test.Binary, file.IsBinary) + } + if !reflect.DeepEqual(test.Fragment, file.BinaryFragment) { + t.Errorf("incorrect binary fragment\nexpected: %+v\n actual: %+v", test.Fragment, file.BinaryFragment) + } + if !reflect.DeepEqual(test.ReverseFragment, file.ReverseBinaryFragment) { + t.Errorf("incorrect reverse binary fragment\nexpected: %+v\n actual: %+v", test.ReverseFragment, file.ReverseBinaryFragment) + } + }) + } +} + +func fib(n int, ord binary.ByteOrder) []byte { + buf := make([]byte, 4*n) + for i := 0; i < len(buf); i += 4 { + if i < 8 { + ord.PutUint32(buf[i:], 1) + } else { + ord.PutUint32(buf[i:], ord.Uint32(buf[i-4:])+ord.Uint32(buf[i-8:])) + } + } + return buf +} diff --git a/pkg/gitdiff/file_header.go b/pkg/gitdiff/file_header.go new file mode 100644 index 0000000..7ae4bc9 --- /dev/null +++ b/pkg/gitdiff/file_header.go @@ -0,0 +1,546 @@ +package gitdiff + +import ( + "fmt" + "io" + "os" + "strconv" + "strings" + "time" +) + +const ( + devNull = "/dev/null" +) + +// ParseNextFileHeader finds and parses the next file header in the stream. If +// a header is found, it returns a file and all input before the header. It +// returns nil if no headers are found before the end of the input. +func (p *parser) ParseNextFileHeader() (*File, string, error) { + var preamble strings.Builder + var file *File + for { + // check for disconnected fragment headers (corrupt patch) + frag, err := p.ParseTextFragmentHeader() + if err != nil { + // not a valid header, nothing to worry about + goto NextLine + } + if frag != nil { + return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header()) + } + + // check for a git-generated patch + file, err = p.ParseGitFileHeader() + if err != nil { + return nil, "", err + } + if file != nil { + return file, preamble.String(), nil + } + + // check for a "traditional" patch + file, err = p.ParseTraditionalFileHeader() + if err != nil { + return nil, "", err + } + if file != nil { + return file, preamble.String(), nil + } + + NextLine: + preamble.WriteString(p.Line(0)) + if err := p.Next(); err != nil { + if err == io.EOF { + break + } + return nil, "", err + } + } + return nil, preamble.String(), nil +} + +func (p *parser) ParseGitFileHeader() (*File, error) { + const prefix = "diff --git " + + if !strings.HasPrefix(p.Line(0), prefix) { + return nil, nil + } + header := p.Line(0)[len(prefix):] + + defaultName, err := parseGitHeaderName(header) + if err != nil { + return nil, p.Errorf(0, "git file header: %v", err) + } + + f := &File{} + for { + end, err := parseGitHeaderData(f, p.Line(1), defaultName) + if err != nil { + return nil, p.Errorf(1, "git file header: %v", err) + } + + if err := p.Next(); err != nil { + if err == io.EOF { + break + } + return nil, err + } + + if end { + break + } + } + + if f.OldName == "" && f.NewName == "" { + if defaultName == "" { + return nil, p.Errorf(0, "git file header: missing filename information") + } + f.OldName = defaultName + f.NewName = defaultName + } + + if (f.NewName == "" && !f.IsDelete) || (f.OldName == "" && !f.IsNew) { + return nil, p.Errorf(0, "git file header: missing filename information") + } + + return f, nil +} + +func (p *parser) ParseTraditionalFileHeader() (*File, error) { + const shortestValidFragHeader = "@@ -1 +1 @@\n" + const ( + oldPrefix = "--- " + newPrefix = "+++ " + ) + + oldLine, newLine := p.Line(0), p.Line(1) + + if !strings.HasPrefix(oldLine, oldPrefix) || !strings.HasPrefix(newLine, newPrefix) { + return nil, nil + } + // heuristic: only a file header if followed by a (probable) fragment header + if len(p.Line(2)) < len(shortestValidFragHeader) || !strings.HasPrefix(p.Line(2), "@@ -") { + return nil, nil + } + + // advance past the first two lines so parser is after the header + // no EOF check needed because we know there are >=3 valid lines + if err := p.Next(); err != nil { + return nil, err + } + if err := p.Next(); err != nil { + return nil, err + } + + oldName, _, err := parseName(oldLine[len(oldPrefix):], '\t', 0) + if err != nil { + return nil, p.Errorf(0, "file header: %v", err) + } + + newName, _, err := parseName(newLine[len(newPrefix):], '\t', 0) + if err != nil { + return nil, p.Errorf(1, "file header: %v", err) + } + + f := &File{} + switch { + case oldName == devNull || hasEpochTimestamp(oldLine): + f.IsNew = true + f.NewName = newName + case newName == devNull || hasEpochTimestamp(newLine): + f.IsDelete = true + f.OldName = oldName + default: + // if old name is a prefix of new name, use that instead + // this avoids picking variants like "file.bak" or "file~" + if strings.HasPrefix(newName, oldName) { + f.OldName = oldName + f.NewName = oldName + } else { + f.OldName = newName + f.NewName = newName + } + } + + return f, nil +} + +// parseGitHeaderName extracts a default file name from the Git file header +// line. This is required for mode-only changes and creation/deletion of empty +// files. Other types of patch include the file name(s) in the header data. +// If the names in the header do not match because the patch is a rename, +// return an empty default name. +func parseGitHeaderName(header string) (string, error) { + header = strings.TrimSuffix(header, "\n") + if len(header) == 0 { + return "", nil + } + + var err error + var first, second string + + // there are 4 cases to account for: + // + // 1) unquoted unquoted + // 2) unquoted "quoted" + // 3) "quoted" unquoted + // 4) "quoted" "quoted" + // + quote := strings.IndexByte(header, '"') + switch { + case quote < 0: + // case 1 + first = header + + case quote > 0: + // case 2 + first = header[:quote-1] + if !isSpace(header[quote-1]) { + return "", fmt.Errorf("missing separator") + } + + second, _, err = parseQuotedName(header[quote:]) + if err != nil { + return "", err + } + + case quote == 0: + // case 3 or case 4 + var n int + first, n, err = parseQuotedName(header) + if err != nil { + return "", err + } + + // git accepts multiple spaces after a quoted name, but not after an + // unquoted name, since the name might end with one or more spaces + for n < len(header) && isSpace(header[n]) { + n++ + } + if n == len(header) { + return "", nil + } + + if header[n] == '"' { + second, _, err = parseQuotedName(header[n:]) + if err != nil { + return "", err + } + } else { + second = header[n:] + } + } + + first = trimTreePrefix(first, 1) + if second != "" { + if first == trimTreePrefix(second, 1) { + return first, nil + } + return "", nil + } + + // at this point, both names are unquoted (case 1) + // since names may contain spaces, we can't use a known separator + // instead, look for a split that produces two equal names + + for i := 0; i < len(first)-1; i++ { + if !isSpace(first[i]) { + continue + } + second = trimTreePrefix(first[i+1:], 1) + if name := first[:i]; name == second { + return name, nil + } + } + return "", nil +} + +// parseGitHeaderData parses a single line of metadata from a Git file header. +// It returns true when header parsing is complete; in that case, line was the +// first line of non-header content. +func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) { + if len(line) > 0 && line[len(line)-1] == '\n' { + line = line[:len(line)-1] + } + + for _, hdr := range []struct { + prefix string + end bool + parse func(*File, string, string) error + }{ + {"@@ -", true, nil}, + {"--- ", false, parseGitHeaderOldName}, + {"+++ ", false, parseGitHeaderNewName}, + {"old mode ", false, parseGitHeaderOldMode}, + {"new mode ", false, parseGitHeaderNewMode}, + {"deleted file mode ", false, parseGitHeaderDeletedMode}, + {"new file mode ", false, parseGitHeaderCreatedMode}, + {"copy from ", false, parseGitHeaderCopyFrom}, + {"copy to ", false, parseGitHeaderCopyTo}, + {"rename old ", false, parseGitHeaderRenameFrom}, + {"rename new ", false, parseGitHeaderRenameTo}, + {"rename from ", false, parseGitHeaderRenameFrom}, + {"rename to ", false, parseGitHeaderRenameTo}, + {"similarity index ", false, parseGitHeaderScore}, + {"dissimilarity index ", false, parseGitHeaderScore}, + {"index ", false, parseGitHeaderIndex}, + } { + if strings.HasPrefix(line, hdr.prefix) { + if hdr.parse != nil { + err = hdr.parse(f, line[len(hdr.prefix):], defaultName) + } + return hdr.end, err + } + } + + // unknown line indicates the end of the header + // this usually happens if the diff is empty + return true, nil +} + +func parseGitHeaderOldName(f *File, line, defaultName string) error { + name, _, err := parseName(line, '\t', 1) + if err != nil { + return err + } + if f.OldName == "" && !f.IsNew { + f.OldName = name + return nil + } + return verifyGitHeaderName(name, f.OldName, f.IsNew, "old") +} + +func parseGitHeaderNewName(f *File, line, defaultName string) error { + name, _, err := parseName(line, '\t', 1) + if err != nil { + return err + } + if f.NewName == "" && !f.IsDelete { + f.NewName = name + return nil + } + return verifyGitHeaderName(name, f.NewName, f.IsDelete, "new") +} + +func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) { + f.OldMode, err = parseMode(strings.TrimSpace(line)) + return +} + +func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) { + f.NewMode, err = parseMode(strings.TrimSpace(line)) + return +} + +func parseGitHeaderDeletedMode(f *File, line, defaultName string) error { + f.IsDelete = true + f.OldName = defaultName + return parseGitHeaderOldMode(f, line, defaultName) +} + +func parseGitHeaderCreatedMode(f *File, line, defaultName string) error { + f.IsNew = true + f.NewName = defaultName + return parseGitHeaderNewMode(f, line, defaultName) +} + +func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) { + f.IsCopy = true + f.OldName, _, err = parseName(line, 0, 0) + return +} + +func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) { + f.IsCopy = true + f.NewName, _, err = parseName(line, 0, 0) + return +} + +func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) { + f.IsRename = true + f.OldName, _, err = parseName(line, 0, 0) + return +} + +func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) { + f.IsRename = true + f.NewName, _, err = parseName(line, 0, 0) + return +} + +func parseGitHeaderScore(f *File, line, defaultName string) error { + score, err := strconv.ParseInt(strings.TrimSuffix(line, "%"), 10, 32) + if err != nil { + nerr := err.(*strconv.NumError) + return fmt.Errorf("invalid score line: %v", nerr.Err) + } + if score <= 100 { + f.Score = int(score) + } + return nil +} + +func parseGitHeaderIndex(f *File, line, defaultName string) error { + const sep = ".." + + // note that git stops parsing if the OIDs are too long to be valid + // checking this requires knowing if the repository uses SHA1 or SHA256 + // hashes, which we don't know, so we just skip that check + + parts := strings.SplitN(line, " ", 2) + oids := strings.SplitN(parts[0], sep, 2) + + if len(oids) < 2 { + return fmt.Errorf("invalid index line: missing %q", sep) + } + f.OldOIDPrefix, f.NewOIDPrefix = oids[0], oids[1] + + if len(parts) > 1 { + return parseGitHeaderOldMode(f, parts[1], defaultName) + } + return nil +} + +func parseMode(s string) (os.FileMode, error) { + mode, err := strconv.ParseInt(s, 8, 32) + if err != nil { + nerr := err.(*strconv.NumError) + return os.FileMode(0), fmt.Errorf("invalid mode line: %v", nerr.Err) + } + return os.FileMode(mode), nil +} + +// parseName extracts a file name from the start of a string and returns the +// name and the index of the first character after the name. If the name is +// unquoted and term is non-zero, parsing stops at the first occurrence of +// term. +// +// If the name is exactly "/dev/null", no further processing occurs. Otherwise, +// if dropPrefix is greater than zero, that number of prefix components +// separated by forward slashes are dropped from the name and any duplicate +// slashes are collapsed. +func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) { + if len(s) > 0 && s[0] == '"' { + name, n, err = parseQuotedName(s) + } else { + name, n, err = parseUnquotedName(s, term) + } + if err != nil { + return "", 0, err + } + if name == devNull { + return name, n, nil + } + return cleanName(name, dropPrefix), n, nil +} + +func parseQuotedName(s string) (name string, n int, err error) { + for n = 1; n < len(s); n++ { + if s[n] == '"' && s[n-1] != '\\' { + n++ + break + } + } + if n == 2 { + return "", 0, fmt.Errorf("missing name") + } + if name, err = strconv.Unquote(s[:n]); err != nil { + return "", 0, err + } + return name, n, err +} + +func parseUnquotedName(s string, term byte) (name string, n int, err error) { + for n = 0; n < len(s); n++ { + if s[n] == '\n' { + break + } + if term > 0 && s[n] == term { + break + } + } + if n == 0 { + return "", 0, fmt.Errorf("missing name") + } + return s[:n], n, nil +} + +// verifyGitHeaderName checks a parsed name against state set by previous lines +func verifyGitHeaderName(parsed, existing string, isNull bool, side string) error { + if existing != "" { + if isNull { + return fmt.Errorf("expected %s, but filename is set to %s", devNull, existing) + } + if existing != parsed { + return fmt.Errorf("inconsistent %s filename", side) + } + } + if isNull && parsed != devNull { + return fmt.Errorf("expected %s", devNull) + } + return nil +} + +// cleanName removes double slashes and drops prefix segments. +func cleanName(name string, drop int) string { + var b strings.Builder + for i := 0; i < len(name); i++ { + if name[i] == '/' { + if i < len(name)-1 && name[i+1] == '/' { + continue + } + if drop > 0 { + drop-- + b.Reset() + continue + } + } + b.WriteByte(name[i]) + } + return b.String() +} + +// trimTreePrefix removes up to n leading directory components from name. +func trimTreePrefix(name string, n int) string { + i := 0 + for ; i < len(name) && n > 0; i++ { + if name[i] == '/' { + n-- + } + } + return name[i:] +} + +// hasEpochTimestamp returns true if the string ends with a POSIX-formatted +// timestamp for the UNIX epoch after a tab character. According to git, this +// is used by GNU diff to mark creations and deletions. +func hasEpochTimestamp(s string) bool { + const posixTimeLayout = "2006-01-02 15:04:05.9 -0700" + + start := strings.IndexRune(s, '\t') + if start < 0 { + return false + } + + ts := strings.TrimSuffix(s[start+1:], "\n") + + // a valid timestamp can have optional ':' in zone specifier + // remove that if it exists so we have a single format + if len(ts) >= 3 && ts[len(ts)-3] == ':' { + ts = ts[:len(ts)-3] + ts[len(ts)-2:] + } + + t, err := time.Parse(posixTimeLayout, ts) + if err != nil { + return false + } + if !t.Equal(time.Unix(0, 0)) { + return false + } + return true +} + +func isSpace(c byte) bool { + return c == ' ' || c == '\t' || c == '\n' +} diff --git a/pkg/gitdiff/file_header_test.go b/pkg/gitdiff/file_header_test.go new file mode 100644 index 0000000..ef29833 --- /dev/null +++ b/pkg/gitdiff/file_header_test.go @@ -0,0 +1,766 @@ +package gitdiff + +import ( + "io" + "os" + "reflect" + "testing" +) + +func TestParseGitFileHeader(t *testing.T) { + tests := map[string]struct { + Input string + Output *File + Err bool + }{ + "fileContentChange": { + Input: `diff --git a/dir/file.txt b/dir/file.txt +index 1c23fcc..40a1b33 100644 +--- a/dir/file.txt ++++ b/dir/file.txt +@@ -2,3 +4,5 @@ +`, + Output: &File{ + OldName: "dir/file.txt", + NewName: "dir/file.txt", + OldMode: os.FileMode(0100644), + OldOIDPrefix: "1c23fcc", + NewOIDPrefix: "40a1b33", + }, + }, + "newFile": { + Input: `diff --git a/dir/file.txt b/dir/file.txt +new file mode 100644 +index 0000000..f5711e4 +--- /dev/null ++++ b/dir/file.txt +`, + Output: &File{ + NewName: "dir/file.txt", + NewMode: os.FileMode(0100644), + OldOIDPrefix: "0000000", + NewOIDPrefix: "f5711e4", + IsNew: true, + }, + }, + "newEmptyFile": { + Input: `diff --git a/empty.txt b/empty.txt +new file mode 100644 +index 0000000..e69de29 +`, + Output: &File{ + NewName: "empty.txt", + NewMode: os.FileMode(0100644), + OldOIDPrefix: "0000000", + NewOIDPrefix: "e69de29", + IsNew: true, + }, + }, + "deleteFile": { + Input: `diff --git a/dir/file.txt b/dir/file.txt +deleted file mode 100644 +index 44cc321..0000000 +--- a/dir/file.txt ++++ /dev/null +`, + Output: &File{ + OldName: "dir/file.txt", + OldMode: os.FileMode(0100644), + OldOIDPrefix: "44cc321", + NewOIDPrefix: "0000000", + IsDelete: true, + }, + }, + "changeMode": { + Input: `diff --git a/file.sh b/file.sh +old mode 100644 +new mode 100755 +`, + Output: &File{ + OldName: "file.sh", + NewName: "file.sh", + OldMode: os.FileMode(0100644), + NewMode: os.FileMode(0100755), + }, + }, + "rename": { + Input: `diff --git a/foo.txt b/bar.txt +similarity index 100% +rename from foo.txt +rename to bar.txt +`, + Output: &File{ + OldName: "foo.txt", + NewName: "bar.txt", + Score: 100, + IsRename: true, + }, + }, + "copy": { + Input: `diff --git a/file.txt b/copy.txt +similarity index 100% +copy from file.txt +copy to copy.txt +`, + Output: &File{ + OldName: "file.txt", + NewName: "copy.txt", + Score: 100, + IsCopy: true, + }, + }, + "missingDefaultFilename": { + Input: `diff --git a/foo.sh b/bar.sh +old mode 100644 +new mode 100755 +`, + Err: true, + }, + "missingNewFilename": { + Input: `diff --git a/file.txt b/file.txt +index 1c23fcc..40a1b33 100644 +--- a/file.txt +`, + Err: true, + }, + "missingOldFilename": { + Input: `diff --git a/file.txt b/file.txt +index 1c23fcc..40a1b33 100644 ++++ b/file.txt +`, + Err: true, + }, + "invalidHeaderLine": { + Input: `diff --git a/file.txt b/file.txt +index deadbeef +--- a/file.txt ++++ b/file.txt +`, + Err: true, + }, + "notGitHeader": { + Input: `--- file.txt ++++ file.txt +@@ -0,0 +1 @@ +`, + Output: nil, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + f, err := p.ParseGitFileHeader() + if test.Err { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing git file header, got %v", err) + } + return + } + if err != nil { + t.Fatalf("unexpected error parsing git file header: %v", err) + } + + if !reflect.DeepEqual(test.Output, f) { + t.Errorf("incorrect file\nexpected: %+v\n actual: %+v", test.Output, f) + } + }) + } +} + +func TestParseTraditionalFileHeader(t *testing.T) { + tests := map[string]struct { + Input string + Output *File + Err bool + }{ + "fileContentChange": { + Input: `--- dir/file_old.txt 2019-03-21 23:00:00.0 -0700 ++++ dir/file_new.txt 2019-03-21 23:30:00.0 -0700 +@@ -0,0 +1 @@ +`, + Output: &File{ + OldName: "dir/file_new.txt", + NewName: "dir/file_new.txt", + }, + }, + "newFile": { + Input: `--- /dev/null 1969-12-31 17:00:00.0 -0700 ++++ dir/file.txt 2019-03-21 23:30:00.0 -0700 +@@ -0,0 +1 @@ +`, + Output: &File{ + NewName: "dir/file.txt", + IsNew: true, + }, + }, + "newFileTimestamp": { + Input: `--- dir/file.txt 1969-12-31 17:00:00.0 -0700 ++++ dir/file.txt 2019-03-21 23:30:00.0 -0700 +@@ -0,0 +1 @@ +`, + Output: &File{ + NewName: "dir/file.txt", + IsNew: true, + }, + }, + "deleteFile": { + Input: `--- dir/file.txt 2019-03-21 23:30:00.0 -0700 ++++ /dev/null 1969-12-31 17:00:00.0 -0700 +@@ -0,0 +1 @@ +`, + Output: &File{ + OldName: "dir/file.txt", + IsDelete: true, + }, + }, + "deleteFileTimestamp": { + Input: `--- dir/file.txt 2019-03-21 23:30:00.0 -0700 ++++ dir/file.txt 1969-12-31 17:00:00.0 -0700 +@@ -0,0 +1 @@ +`, + Output: &File{ + OldName: "dir/file.txt", + IsDelete: true, + }, + }, + "useShortestPrefixName": { + Input: `--- dir/file.txt 2019-03-21 23:00:00.0 -0700 ++++ dir/file.txt~ 2019-03-21 23:30:00.0 -0700 +@@ -0,0 +1 @@ +`, + Output: &File{ + OldName: "dir/file.txt", + NewName: "dir/file.txt", + }, + }, + "notTraditionalHeader": { + Input: `diff --git a/dir/file.txt b/dir/file.txt +--- a/dir/file.txt ++++ b/dir/file.txt +`, + Output: nil, + }, + "noUnifiedFragment": { + Input: `--- dir/file_old.txt 2019-03-21 23:00:00.0 -0700 ++++ dir/file_new.txt 2019-03-21 23:30:00.0 -0700 +context line ++added line +`, + Output: nil, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + f, err := p.ParseTraditionalFileHeader() + if test.Err { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing traditional file header, got %v", err) + } + return + } + if err != nil { + t.Fatalf("unexpected error parsing traditional file header: %v", err) + } + + if !reflect.DeepEqual(test.Output, f) { + t.Errorf("incorrect file\nexpected: %+v\n actual: %+v", test.Output, f) + } + }) + } +} + +func TestCleanName(t *testing.T) { + tests := map[string]struct { + Input string + Drop int + Output string + }{ + "alreadyClean": { + Input: "a/b/c.txt", Output: "a/b/c.txt", + }, + "doubleSlashes": { + Input: "a//b/c.txt", Output: "a/b/c.txt", + }, + "tripleSlashes": { + Input: "a///b/c.txt", Output: "a/b/c.txt", + }, + "dropPrefix": { + Input: "a/b/c.txt", Drop: 2, Output: "c.txt", + }, + "removeDoublesBeforeDrop": { + Input: "a//b/c.txt", Drop: 1, Output: "b/c.txt", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + output := cleanName(test.Input, test.Drop) + if output != test.Output { + t.Fatalf("incorrect output: expected %q, actual %q", test.Output, output) + } + }) + } +} + +func TestParseName(t *testing.T) { + tests := map[string]struct { + Input string + Term byte + Drop int + Output string + N int + Err bool + }{ + "singleUnquoted": { + Input: "dir/file.txt", Output: "dir/file.txt", N: 12, + }, + "singleQuoted": { + Input: `"dir/file.txt"`, Output: "dir/file.txt", N: 14, + }, + "quotedWithEscape": { + Input: `"dir/\"quotes\".txt"`, Output: `dir/"quotes".txt`, N: 20, + }, + "quotedWithSpaces": { + Input: `"dir/space file.txt"`, Output: "dir/space file.txt", N: 20, + }, + "tabTerminator": { + Input: "dir/space file.txt\tfile2.txt", Term: '\t', Output: "dir/space file.txt", N: 18, + }, + "dropPrefix": { + Input: "a/dir/file.txt", Drop: 1, Output: "dir/file.txt", N: 14, + }, + "unquotedWithSpaces": { + Input: "dir/with spaces.txt", Output: "dir/with spaces.txt", N: 19, + }, + "unquotedWithTrailingSpaces": { + Input: "dir/with spaces.space ", Output: "dir/with spaces.space ", N: 23, + }, + "devNull": { + Input: "/dev/null", Term: '\t', Drop: 1, Output: "/dev/null", N: 9, + }, + "newlineSeparates": { + Input: "dir/file.txt\n", Output: "dir/file.txt", N: 12, + }, + "emptyString": { + Input: "", Err: true, + }, + "emptyQuotedString": { + Input: `""`, Err: true, + }, + "unterminatedQuotes": { + Input: `"dir/file.txt`, Err: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + output, n, err := parseName(test.Input, test.Term, test.Drop) + if test.Err { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing name, but got %v", err) + } + return + } + if err != nil { + t.Fatalf("unexpected error parsing name: %v", err) + } + + if output != test.Output { + t.Errorf("incorrect output: expected %q, actual: %q", test.Output, output) + } + if n != test.N { + t.Errorf("incorrect next position: expected %d, actual %d", test.N, n) + } + }) + } +} + +func TestParseGitHeaderData(t *testing.T) { + tests := map[string]struct { + InputFile *File + Line string + DefaultName string + + OutputFile *File + End bool + Err bool + }{ + "fragementEndsParsing": { + Line: "@@ -12,3 +12,2 @@\n", + End: true, + }, + "unknownEndsParsing": { + Line: "GIT binary file\n", + End: true, + }, + "oldFileName": { + Line: "--- a/dir/file.txt\n", + OutputFile: &File{ + OldName: "dir/file.txt", + }, + }, + "oldFileNameDevNull": { + InputFile: &File{ + IsNew: true, + }, + Line: "--- /dev/null\n", + OutputFile: &File{ + IsNew: true, + }, + }, + "oldFileNameInconsistent": { + InputFile: &File{ + OldName: "dir/foo.txt", + }, + Line: "--- a/dir/bar.txt\n", + Err: true, + }, + "oldFileNameExistingCreateMismatch": { + InputFile: &File{ + OldName: "dir/foo.txt", + IsNew: true, + }, + Line: "--- /dev/null\n", + Err: true, + }, + "oldFileNameParsedCreateMismatch": { + InputFile: &File{ + IsNew: true, + }, + Line: "--- a/dir/file.txt\n", + Err: true, + }, + "oldFileNameMissing": { + Line: "--- \n", + Err: true, + }, + "newFileName": { + Line: "+++ b/dir/file.txt\n", + OutputFile: &File{ + NewName: "dir/file.txt", + }, + }, + "newFileNameDevNull": { + InputFile: &File{ + IsDelete: true, + }, + Line: "+++ /dev/null\n", + OutputFile: &File{ + IsDelete: true, + }, + }, + "newFileNameInconsistent": { + InputFile: &File{ + NewName: "dir/foo.txt", + }, + Line: "+++ b/dir/bar.txt\n", + Err: true, + }, + "newFileNameExistingDeleteMismatch": { + InputFile: &File{ + NewName: "dir/foo.txt", + IsDelete: true, + }, + Line: "+++ /dev/null\n", + Err: true, + }, + "newFileNameParsedDeleteMismatch": { + InputFile: &File{ + IsDelete: true, + }, + Line: "+++ b/dir/file.txt\n", + Err: true, + }, + "newFileNameMissing": { + Line: "+++ \n", + Err: true, + }, + "oldMode": { + Line: "old mode 100644\n", + OutputFile: &File{ + OldMode: os.FileMode(0100644), + }, + }, + "oldModeWithTrailingSpace": { + Line: "old mode 100644\r\n", + OutputFile: &File{ + OldMode: os.FileMode(0100644), + }, + }, + "invalidOldMode": { + Line: "old mode rw\n", + Err: true, + }, + "newMode": { + Line: "new mode 100755\n", + OutputFile: &File{ + NewMode: os.FileMode(0100755), + }, + }, + "newModeWithTrailingSpace": { + Line: "new mode 100755\r\n", + OutputFile: &File{ + NewMode: os.FileMode(0100755), + }, + }, + "invalidNewMode": { + Line: "new mode rwx\n", + Err: true, + }, + "deletedFileMode": { + Line: "deleted file mode 100644\n", + DefaultName: "dir/file.txt", + OutputFile: &File{ + OldName: "dir/file.txt", + OldMode: os.FileMode(0100644), + IsDelete: true, + }, + }, + "newFileMode": { + Line: "new file mode 100755\n", + DefaultName: "dir/file.txt", + OutputFile: &File{ + NewName: "dir/file.txt", + NewMode: os.FileMode(0100755), + IsNew: true, + }, + }, + "newFileModeWithTrailingSpace": { + Line: "new file mode 100755\r\n", + DefaultName: "dir/file.txt", + OutputFile: &File{ + NewName: "dir/file.txt", + NewMode: os.FileMode(0100755), + IsNew: true, + }, + }, + "copyFrom": { + Line: "copy from dir/file.txt\n", + OutputFile: &File{ + OldName: "dir/file.txt", + IsCopy: true, + }, + }, + "copyTo": { + Line: "copy to dir/file.txt\n", + OutputFile: &File{ + NewName: "dir/file.txt", + IsCopy: true, + }, + }, + "renameFrom": { + Line: "rename from dir/file.txt\n", + OutputFile: &File{ + OldName: "dir/file.txt", + IsRename: true, + }, + }, + "renameTo": { + Line: "rename to dir/file.txt\n", + OutputFile: &File{ + NewName: "dir/file.txt", + IsRename: true, + }, + }, + "similarityIndex": { + Line: "similarity index 88%\n", + OutputFile: &File{ + Score: 88, + }, + }, + "similarityIndexTooBig": { + Line: "similarity index 9001%\n", + OutputFile: &File{ + Score: 0, + }, + }, + "similarityIndexInvalid": { + Line: "similarity index 12ab%\n", + Err: true, + }, + "indexFullSHA1AndMode": { + Line: "index 79c6d7f7b7e76c75b3d238f12fb1323f2333ba14..04fab916d8f938173cbb8b93469855f0e838f098 100644\n", + OutputFile: &File{ + OldOIDPrefix: "79c6d7f7b7e76c75b3d238f12fb1323f2333ba14", + NewOIDPrefix: "04fab916d8f938173cbb8b93469855f0e838f098", + OldMode: os.FileMode(0100644), + }, + }, + "indexFullSHA1NoMode": { + Line: "index 79c6d7f7b7e76c75b3d238f12fb1323f2333ba14..04fab916d8f938173cbb8b93469855f0e838f098\n", + OutputFile: &File{ + OldOIDPrefix: "79c6d7f7b7e76c75b3d238f12fb1323f2333ba14", + NewOIDPrefix: "04fab916d8f938173cbb8b93469855f0e838f098", + }, + }, + "indexAbbrevSHA1AndMode": { + Line: "index 79c6d7..04fab9 100644\n", + OutputFile: &File{ + OldOIDPrefix: "79c6d7", + NewOIDPrefix: "04fab9", + OldMode: os.FileMode(0100644), + }, + }, + "indexInvalid": { + Line: "index 79c6d7f7b7e76c75b3d238f12fb1323f2333ba14\n", + Err: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + var f File + if test.InputFile != nil { + f = *test.InputFile + } + + end, err := parseGitHeaderData(&f, test.Line, test.DefaultName) + if test.Err { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing header data, but got %v", err) + } + return + } + if err != nil { + t.Fatalf("unexpected error parsing header data: %v", err) + } + + if test.OutputFile != nil && !reflect.DeepEqual(test.OutputFile, &f) { + t.Errorf("incorrect output:\nexpected: %+v\nactual: %+v", test.OutputFile, &f) + } + if end != test.End { + t.Errorf("incorrect end state, expected %t, actual %t", test.End, end) + } + }) + } +} + +func TestParseGitHeaderName(t *testing.T) { + tests := map[string]struct { + Input string + Output string + Err bool + }{ + "twoMatchingNames": { + Input: "a/dir/file.txt b/dir/file.txt", + Output: "dir/file.txt", + }, + "twoDifferentNames": { + Input: "a/dir/foo.txt b/dir/bar.txt", + Output: "", + }, + "matchingNamesWithSpaces": { + Input: "a/dir/file with spaces.txt b/dir/file with spaces.txt", + Output: "dir/file with spaces.txt", + }, + "matchingNamesWithTrailingSpaces": { + Input: "a/dir/spaces b/dir/spaces ", + Output: "dir/spaces ", + }, + "matchingNamesQuoted": { + Input: `"a/dir/\"quotes\".txt" "b/dir/\"quotes\".txt"`, + Output: `dir/"quotes".txt`, + }, + "matchingNamesFirstQuoted": { + Input: `"a/dir/file.txt" b/dir/file.txt`, + Output: "dir/file.txt", + }, + "matchingNamesSecondQuoted": { + Input: `a/dir/file.txt "b/dir/file.txt"`, + Output: "dir/file.txt", + }, + "noSecondName": { + Input: "a/dir/foo.txt", + Output: "", + }, + "noSecondNameQuoted": { + Input: `"a/dir/foo.txt"`, + Output: "", + }, + "invalidName": { + Input: `"a/dir/file.txt b/dir/file.txt`, + Err: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + output, err := parseGitHeaderName(test.Input) + if test.Err { + if err == nil { + t.Fatalf("expected error parsing header name, but got nil") + } + return + } + if err != nil { + t.Fatalf("unexpected error parsing header name: %v", err) + } + + if output != test.Output { + t.Errorf("incorrect output: expected %q, actual %q", test.Output, output) + } + }) + } +} + +func TestHasEpochTimestamp(t *testing.T) { + tests := map[string]struct { + Input string + Output bool + }{ + "utcTimestamp": { + Input: "+++ file.txt\t1970-01-01 00:00:00 +0000\n", + Output: true, + }, + "utcZoneWithColon": { + Input: "+++ file.txt\t1970-01-01 00:00:00 +00:00\n", + Output: true, + }, + "utcZoneWithMilliseconds": { + Input: "+++ file.txt\t1970-01-01 00:00:00.000000 +00:00\n", + Output: true, + }, + "westTimestamp": { + Input: "+++ file.txt\t1969-12-31 16:00:00 -0800\n", + Output: true, + }, + "eastTimestamp": { + Input: "+++ file.txt\t1970-01-01 04:00:00 +0400\n", + Output: true, + }, + "noTab": { + Input: "+++ file.txt 1970-01-01 00:00:00 +0000\n", + Output: false, + }, + "invalidFormat": { + Input: "+++ file.txt\t1970-01-01T00:00:00Z\n", + Output: false, + }, + "notEpoch": { + Input: "+++ file.txt\t2019-03-21 12:34:56.789 -0700\n", + Output: false, + }, + "notTimestamp": { + Input: "+++ file.txt\trandom text\n", + Output: false, + }, + "notTimestampShort": { + Input: "+++ file.txt\t0\n", + Output: false, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + output := hasEpochTimestamp(test.Input) + if output != test.Output { + t.Errorf("incorrect output: expected %t, actual %t", test.Output, output) + } + }) + } +} diff --git a/pkg/gitdiff/format.go b/pkg/gitdiff/format.go new file mode 100644 index 0000000..d97aba9 --- /dev/null +++ b/pkg/gitdiff/format.go @@ -0,0 +1,281 @@ +package gitdiff + +import ( + "bytes" + "compress/zlib" + "fmt" + "io" + "strconv" +) + +type formatter struct { + w io.Writer + err error +} + +func newFormatter(w io.Writer) *formatter { + return &formatter{w: w} +} + +func (fm *formatter) Write(p []byte) (int, error) { + if fm.err != nil { + return len(p), nil + } + if _, err := fm.w.Write(p); err != nil { + fm.err = err + } + return len(p), nil +} + +func (fm *formatter) WriteString(s string) (int, error) { + fm.Write([]byte(s)) + return len(s), nil +} + +func (fm *formatter) WriteByte(c byte) error { + fm.Write([]byte{c}) + return nil +} + +func (fm *formatter) WriteQuotedName(s string) { + qpos := 0 + for i := 0; i < len(s); i++ { + ch := s[i] + if q, quoted := quoteByte(ch); quoted { + if qpos == 0 { + fm.WriteByte('"') + } + fm.WriteString(s[qpos:i]) + fm.Write(q) + qpos = i + 1 + } + } + fm.WriteString(s[qpos:]) + if qpos > 0 { + fm.WriteByte('"') + } +} + +var quoteEscapeTable = map[byte]byte{ + '\a': 'a', + '\b': 'b', + '\t': 't', + '\n': 'n', + '\v': 'v', + '\f': 'f', + '\r': 'r', + '"': '"', + '\\': '\\', +} + +func quoteByte(b byte) ([]byte, bool) { + if q, ok := quoteEscapeTable[b]; ok { + return []byte{'\\', q}, true + } + if b < 0x20 || b >= 0x7F { + return []byte{ + '\\', + '0' + (b>>6)&0o3, + '0' + (b>>3)&0o7, + '0' + (b>>0)&0o7, + }, true + } + return nil, false +} + +func (fm *formatter) FormatFile(f *File) { + fm.WriteString("diff --git ") + + var aName, bName string + switch { + case f.OldName == "": + aName = f.NewName + bName = f.NewName + + case f.NewName == "": + aName = f.OldName + bName = f.OldName + + default: + aName = f.OldName + bName = f.NewName + } + + fm.WriteQuotedName("a/" + aName) + fm.WriteByte(' ') + fm.WriteQuotedName("b/" + bName) + fm.WriteByte('\n') + + if f.OldMode != 0 { + if f.IsDelete { + fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode) + } else if f.NewMode != 0 { + fmt.Fprintf(fm, "old mode %o\n", f.OldMode) + } + } + + if f.NewMode != 0 { + if f.IsNew { + fmt.Fprintf(fm, "new file mode %o\n", f.NewMode) + } else if f.OldMode != 0 { + fmt.Fprintf(fm, "new mode %o\n", f.NewMode) + } + } + + if f.Score > 0 { + if f.IsCopy || f.IsRename { + fmt.Fprintf(fm, "similarity index %d%%\n", f.Score) + } else { + fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score) + } + } + + if f.IsCopy { + if f.OldName != "" { + fm.WriteString("copy from ") + fm.WriteQuotedName(f.OldName) + fm.WriteByte('\n') + } + if f.NewName != "" { + fm.WriteString("copy to ") + fm.WriteQuotedName(f.NewName) + fm.WriteByte('\n') + } + } + + if f.IsRename { + if f.OldName != "" { + fm.WriteString("rename from ") + fm.WriteQuotedName(f.OldName) + fm.WriteByte('\n') + } + if f.NewName != "" { + fm.WriteString("rename to ") + fm.WriteQuotedName(f.NewName) + fm.WriteByte('\n') + } + } + + if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { + fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) + + // Mode is only included on the index line when it is not changing + if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) { + fmt.Fprintf(fm, " %o", f.OldMode) + } + + fm.WriteByte('\n') + } + + if f.IsBinary { + if f.BinaryFragment == nil { + fm.WriteString("Binary files ") + fm.WriteQuotedName("a/" + aName) + fm.WriteString(" and ") + fm.WriteQuotedName("b/" + bName) + fm.WriteString(" differ\n") + } else { + fm.WriteString("GIT binary patch\n") + fm.FormatBinaryFragment(f.BinaryFragment) + if f.ReverseBinaryFragment != nil { + fm.FormatBinaryFragment(f.ReverseBinaryFragment) + } + } + } + + // The "---" and "+++" lines only appear for text patches with fragments + if len(f.TextFragments) > 0 { + fm.WriteString("--- ") + if f.OldName == "" { + fm.WriteString("/dev/null") + } else { + fm.WriteQuotedName("a/" + f.OldName) + } + fm.WriteByte('\n') + + fm.WriteString("+++ ") + if f.NewName == "" { + fm.WriteString("/dev/null") + } else { + fm.WriteQuotedName("b/" + f.NewName) + } + fm.WriteByte('\n') + + for _, frag := range f.TextFragments { + fm.FormatTextFragment(frag) + } + } +} + +func (fm *formatter) FormatTextFragment(f *TextFragment) { + fm.FormatTextFragmentHeader(f) + fm.WriteByte('\n') + + for _, line := range f.Lines { + fm.WriteString(line.Op.String()) + fm.WriteString(line.Line) + if line.NoEOL() { + fm.WriteString("\n\\ No newline at end of file\n") + } + } +} + +func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) { + fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines) + if f.Comment != "" { + fm.WriteByte(' ') + fm.WriteString(f.Comment) + } +} + +func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) { + const ( + maxBytesPerLine = 52 + ) + + switch f.Method { + case BinaryPatchDelta: + fm.WriteString("delta ") + case BinaryPatchLiteral: + fm.WriteString("literal ") + } + fm.Write(strconv.AppendInt(nil, f.Size, 10)) + fm.WriteByte('\n') + + data := deflateBinaryChunk(f.Data) + n := (len(data) / maxBytesPerLine) * maxBytesPerLine + + buf := make([]byte, base85Len(maxBytesPerLine)) + for i := 0; i < n; i += maxBytesPerLine { + base85Encode(buf, data[i:i+maxBytesPerLine]) + fm.WriteByte('z') + fm.Write(buf) + fm.WriteByte('\n') + } + if remainder := len(data) - n; remainder > 0 { + buf = buf[0:base85Len(remainder)] + + sizeChar := byte(remainder) + if remainder <= 26 { + sizeChar = 'A' + sizeChar - 1 + } else { + sizeChar = 'a' + sizeChar - 27 + } + + base85Encode(buf, data[n:]) + fm.WriteByte(sizeChar) + fm.Write(buf) + fm.WriteByte('\n') + } + fm.WriteByte('\n') +} + +func deflateBinaryChunk(data []byte) []byte { + var b bytes.Buffer + + zw := zlib.NewWriter(&b) + _, _ = zw.Write(data) + _ = zw.Close() + + return b.Bytes() +} diff --git a/pkg/gitdiff/format_roundtrip_test.go b/pkg/gitdiff/format_roundtrip_test.go new file mode 100644 index 0000000..a230e91 --- /dev/null +++ b/pkg/gitdiff/format_roundtrip_test.go @@ -0,0 +1,157 @@ +package gitdiff + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "slices" + "testing" +) + +func TestFormatRoundtrip(t *testing.T) { + patches := []struct { + File string + SkipTextCompare bool + }{ + {File: "copy.patch"}, + {File: "copy_modify.patch"}, + {File: "delete.patch"}, + {File: "mode.patch"}, + {File: "mode_modify.patch"}, + {File: "modify.patch"}, + {File: "new.patch"}, + {File: "new_empty.patch"}, + {File: "new_mode.patch"}, + {File: "rename.patch"}, + {File: "rename_modify.patch"}, + + // Due to differences between Go's 'encoding/zlib' package and the zlib + // C library, binary patches cannot be compared directly as the patch + // data is slightly different when re-encoded by Go. + {File: "binary_modify.patch", SkipTextCompare: true}, + {File: "binary_new.patch", SkipTextCompare: true}, + {File: "binary_modify_nodata.patch"}, + } + + for _, patch := range patches { + t.Run(patch.File, func(t *testing.T) { + b, err := os.ReadFile(filepath.Join("testdata", "string", patch.File)) + if err != nil { + t.Fatalf("failed to read patch: %v", err) + } + + original := assertParseSingleFile(t, b, "patch") + str := original.String() + + if !patch.SkipTextCompare { + if string(b) != str { + t.Errorf("incorrect patch text\nexpected: %q\n actual: %q\n", string(b), str) + } + } + + reparsed := assertParseSingleFile(t, []byte(str), "formatted patch") + assertFilesEqual(t, original, reparsed) + }) + } +} + +func assertParseSingleFile(t *testing.T, b []byte, kind string) *File { + files, _, err := Parse(bytes.NewReader(b)) + if err != nil { + t.Fatalf("failed to parse %s: %v", kind, err) + } + if len(files) != 1 { + t.Fatalf("expected %s to contain a single files, but found %d", kind, len(files)) + } + return files[0] +} + +func assertFilesEqual(t *testing.T, expected, actual *File) { + assertEqual(t, expected.OldName, actual.OldName, "OldName") + assertEqual(t, expected.NewName, actual.NewName, "NewName") + + assertEqual(t, expected.IsNew, actual.IsNew, "IsNew") + assertEqual(t, expected.IsDelete, actual.IsDelete, "IsDelete") + assertEqual(t, expected.IsCopy, actual.IsCopy, "IsCopy") + assertEqual(t, expected.IsRename, actual.IsRename, "IsRename") + + assertEqual(t, expected.OldMode, actual.OldMode, "OldMode") + assertEqual(t, expected.NewMode, actual.NewMode, "NewMode") + + assertEqual(t, expected.OldOIDPrefix, actual.OldOIDPrefix, "OldOIDPrefix") + assertEqual(t, expected.NewOIDPrefix, actual.NewOIDPrefix, "NewOIDPrefix") + assertEqual(t, expected.Score, actual.Score, "Score") + + if len(expected.TextFragments) == len(actual.TextFragments) { + for i := range expected.TextFragments { + prefix := fmt.Sprintf("TextFragments[%d].", i) + ef := expected.TextFragments[i] + af := actual.TextFragments[i] + + assertEqual(t, ef.Comment, af.Comment, prefix+"Comment") + + assertEqual(t, ef.OldPosition, af.OldPosition, prefix+"OldPosition") + assertEqual(t, ef.OldLines, af.OldLines, prefix+"OldLines") + + assertEqual(t, ef.NewPosition, af.NewPosition, prefix+"NewPosition") + assertEqual(t, ef.NewLines, af.NewLines, prefix+"NewLines") + + assertEqual(t, ef.LinesAdded, af.LinesAdded, prefix+"LinesAdded") + assertEqual(t, ef.LinesDeleted, af.LinesDeleted, prefix+"LinesDeleted") + + assertEqual(t, ef.LeadingContext, af.LeadingContext, prefix+"LeadingContext") + assertEqual(t, ef.TrailingContext, af.TrailingContext, prefix+"TrailingContext") + + if !slices.Equal(ef.Lines, af.Lines) { + t.Errorf("%sLines: expected %#v, actual %#v", prefix, ef.Lines, af.Lines) + } + } + } else { + t.Errorf("TextFragments: expected length %d, actual length %d", len(expected.TextFragments), len(actual.TextFragments)) + } + + assertEqual(t, expected.IsBinary, actual.IsBinary, "IsBinary") + + if expected.BinaryFragment != nil { + if actual.BinaryFragment == nil { + t.Errorf("BinaryFragment: expected non-nil, actual is nil") + } else { + ef := expected.BinaryFragment + af := expected.BinaryFragment + + assertEqual(t, ef.Method, af.Method, "BinaryFragment.Method") + assertEqual(t, ef.Size, af.Size, "BinaryFragment.Size") + + if !slices.Equal(ef.Data, af.Data) { + t.Errorf("BinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) + } + } + } else if actual.BinaryFragment != nil { + t.Errorf("BinaryFragment: expected nil, actual is non-nil") + } + + if expected.ReverseBinaryFragment != nil { + if actual.ReverseBinaryFragment == nil { + t.Errorf("ReverseBinaryFragment: expected non-nil, actual is nil") + } else { + ef := expected.ReverseBinaryFragment + af := expected.ReverseBinaryFragment + + assertEqual(t, ef.Method, af.Method, "ReverseBinaryFragment.Method") + assertEqual(t, ef.Size, af.Size, "ReverseBinaryFragment.Size") + + if !slices.Equal(ef.Data, af.Data) { + t.Errorf("ReverseBinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) + } + } + } else if actual.ReverseBinaryFragment != nil { + t.Errorf("ReverseBinaryFragment: expected nil, actual is non-nil") + } +} + +func assertEqual[T comparable](t *testing.T, expected, actual T, name string) { + if expected != actual { + t.Errorf("%s: expected %#v, actual %#v", name, expected, actual) + } +} diff --git a/pkg/gitdiff/format_test.go b/pkg/gitdiff/format_test.go new file mode 100644 index 0000000..3325296 --- /dev/null +++ b/pkg/gitdiff/format_test.go @@ -0,0 +1,28 @@ +package gitdiff + +import ( + "strings" + "testing" +) + +func TestFormatter_WriteQuotedName(t *testing.T) { + tests := []struct { + Input string + Expected string + }{ + {"noquotes.txt", `noquotes.txt`}, + {"no quotes.txt", `no quotes.txt`}, + {"new\nline", `"new\nline"`}, + {"escape\x1B null\x00", `"escape\033 null\000"`}, + {"snowman \u2603 snowman", `"snowman \342\230\203 snowman"`}, + {"\"already quoted\"", `"\"already quoted\""`}, + } + + for _, test := range tests { + var b strings.Builder + newFormatter(&b).WriteQuotedName(test.Input) + if b.String() != test.Expected { + t.Errorf("expected %q, got %q", test.Expected, b.String()) + } + } +} diff --git a/pkg/gitdiff/gitdiff.go b/pkg/gitdiff/gitdiff.go new file mode 100644 index 0000000..5365645 --- /dev/null +++ b/pkg/gitdiff/gitdiff.go @@ -0,0 +1,230 @@ +package gitdiff + +import ( + "errors" + "fmt" + "os" + "strings" +) + +// File describes changes to a single file. It can be either a text file or a +// binary file. +type File struct { + OldName string + NewName string + + IsNew bool + IsDelete bool + IsCopy bool + IsRename bool + + OldMode os.FileMode + NewMode os.FileMode + + OldOIDPrefix string + NewOIDPrefix string + Score int + + // TextFragments contains the fragments describing changes to a text file. It + // may be empty if the file is empty or if only the mode changes. + TextFragments []*TextFragment + + // IsBinary is true if the file is a binary file. If the patch includes + // binary data, BinaryFragment will be non-nil and describe the changes to + // the data. If the patch is reversible, ReverseBinaryFragment will also be + // non-nil and describe the changes needed to restore the original file + // after applying the changes in BinaryFragment. + IsBinary bool + BinaryFragment *BinaryFragment + ReverseBinaryFragment *BinaryFragment +} + +// String returns a git diff representation of this file. The value can be +// parsed by this library to obtain the same File, but may not be the same as +// the original input. +func (f *File) String() string { + var diff strings.Builder + newFormatter(&diff).FormatFile(f) + return diff.String() +} + +// TextFragment describes changed lines starting at a specific line in a text file. +type TextFragment struct { + Comment string + + OldPosition int64 + OldLines int64 + + NewPosition int64 + NewLines int64 + + LinesAdded int64 + LinesDeleted int64 + + LeadingContext int64 + TrailingContext int64 + + Lines []Line +} + +// String returns a git diff format of this fragment. See [File.String] for +// more details on this format. +func (f *TextFragment) String() string { + var diff strings.Builder + newFormatter(&diff).FormatTextFragment(f) + return diff.String() +} + +// Header returns a git diff header of this fragment. See [File.String] for +// more details on this format. +func (f *TextFragment) Header() string { + var hdr strings.Builder + newFormatter(&hdr).FormatTextFragmentHeader(f) + return hdr.String() +} + +// Validate checks that the fragment is self-consistent and appliable. Validate +// returns an error if and only if the fragment is invalid. +func (f *TextFragment) Validate() error { + if f == nil { + return errors.New("nil fragment") + } + + var ( + oldLines, newLines int64 + leadingContext, trailingContext int64 + contextLines, addedLines, deletedLines int64 + ) + + // count the types of lines in the fragment content + for i, line := range f.Lines { + switch line.Op { + case OpContext: + oldLines++ + newLines++ + contextLines++ + if addedLines == 0 && deletedLines == 0 { + leadingContext++ + } else { + trailingContext++ + } + case OpAdd: + newLines++ + addedLines++ + trailingContext = 0 + case OpDelete: + oldLines++ + deletedLines++ + trailingContext = 0 + default: + return fmt.Errorf("unknown operator %q on line %d", line.Op, i+1) + } + } + + // check the actual counts against the reported counts + if oldLines != f.OldLines { + return lineCountErr("old", oldLines, f.OldLines) + } + if newLines != f.NewLines { + return lineCountErr("new", newLines, f.NewLines) + } + if leadingContext != f.LeadingContext { + return lineCountErr("leading context", leadingContext, f.LeadingContext) + } + if trailingContext != f.TrailingContext { + return lineCountErr("trailing context", trailingContext, f.TrailingContext) + } + if addedLines != f.LinesAdded { + return lineCountErr("added", addedLines, f.LinesAdded) + } + if deletedLines != f.LinesDeleted { + return lineCountErr("deleted", deletedLines, f.LinesDeleted) + } + + // if a file is being created, it can only contain additions + if f.OldPosition == 0 && f.OldLines != 0 { + return errors.New("file creation fragment contains context or deletion lines") + } + + return nil +} + +func lineCountErr(kind string, actual, reported int64) error { + return fmt.Errorf("fragment contains %d %s lines but reports %d", actual, kind, reported) +} + +// Line is a line in a text fragment. +type Line struct { + Op LineOp + Line string +} + +func (fl Line) String() string { + return fl.Op.String() + fl.Line +} + +// Old returns true if the line appears in the old content of the fragment. +func (fl Line) Old() bool { + return fl.Op == OpContext || fl.Op == OpDelete +} + +// New returns true if the line appears in the new content of the fragment. +func (fl Line) New() bool { + return fl.Op == OpContext || fl.Op == OpAdd +} + +// NoEOL returns true if the line is missing a trailing newline character. +func (fl Line) NoEOL() bool { + return len(fl.Line) == 0 || fl.Line[len(fl.Line)-1] != '\n' +} + +// LineOp describes the type of a text fragment line: context, added, or removed. +type LineOp int + +const ( + // OpContext indicates a context line + OpContext LineOp = iota + // OpDelete indicates a deleted line + OpDelete + // OpAdd indicates an added line + OpAdd +) + +func (op LineOp) String() string { + switch op { + case OpContext: + return " " + case OpDelete: + return "-" + case OpAdd: + return "+" + } + return "?" +} + +// BinaryFragment describes changes to a binary file. +type BinaryFragment struct { + Method BinaryPatchMethod + Size int64 + Data []byte +} + +// BinaryPatchMethod is the method used to create and apply the binary patch. +type BinaryPatchMethod int + +const ( + // BinaryPatchDelta indicates the data uses Git's packfile encoding + BinaryPatchDelta BinaryPatchMethod = iota + // BinaryPatchLiteral indicates the data is the exact file content + BinaryPatchLiteral +) + +// String returns a git diff format of this fragment. Due to differences in +// zlib implementation between Go and Git, encoded binary data in the result +// will likely differ from what Git produces for the same input. See +// [File.String] for more details on this format. +func (f *BinaryFragment) String() string { + var diff strings.Builder + newFormatter(&diff).FormatBinaryFragment(f) + return diff.String() +} diff --git a/pkg/gitdiff/gitdiff_test.go b/pkg/gitdiff/gitdiff_test.go new file mode 100644 index 0000000..4f63d51 --- /dev/null +++ b/pkg/gitdiff/gitdiff_test.go @@ -0,0 +1,161 @@ +package gitdiff + +import ( + "strings" + "testing" +) + +func TestTextFragmentValidate(t *testing.T) { + tests := map[string]struct { + Fragment TextFragment + Err string + }{ + "oldLines": { + Fragment: TextFragment{ + OldPosition: 1, + OldLines: 3, + NewPosition: 1, + NewLines: 2, + LeadingContext: 1, + TrailingContext: 0, + LinesAdded: 1, + LinesDeleted: 1, + Lines: []Line{ + {Op: OpContext, Line: "line 1\n"}, + {Op: OpDelete, Line: "old line 2\n"}, + {Op: OpAdd, Line: "new line 2\n"}, + }, + }, + Err: "2 old lines", + }, + "newLines": { + Fragment: TextFragment{ + OldPosition: 1, + OldLines: 2, + NewPosition: 1, + NewLines: 3, + LeadingContext: 1, + TrailingContext: 0, + LinesAdded: 1, + LinesDeleted: 1, + Lines: []Line{ + {Op: OpContext, Line: "line 1\n"}, + {Op: OpDelete, Line: "old line 2\n"}, + {Op: OpAdd, Line: "new line 2\n"}, + }, + }, + Err: "2 new lines", + }, + "leadingContext": { + Fragment: TextFragment{ + OldPosition: 1, + OldLines: 2, + NewPosition: 1, + NewLines: 2, + LeadingContext: 0, + TrailingContext: 0, + LinesAdded: 1, + LinesDeleted: 1, + Lines: []Line{ + {Op: OpContext, Line: "line 1\n"}, + {Op: OpDelete, Line: "old line 2\n"}, + {Op: OpAdd, Line: "new line 2\n"}, + }, + }, + Err: "1 leading context lines", + }, + "trailingContext": { + Fragment: TextFragment{ + OldPosition: 1, + OldLines: 4, + NewPosition: 1, + NewLines: 3, + LeadingContext: 1, + TrailingContext: 1, + LinesAdded: 1, + LinesDeleted: 2, + Lines: []Line{ + {Op: OpContext, Line: "line 1\n"}, + {Op: OpDelete, Line: "old line 2\n"}, + {Op: OpAdd, Line: "new line 2\n"}, + {Op: OpContext, Line: "line 3\n"}, + {Op: OpDelete, Line: "old line 4\n"}, + }, + }, + Err: "0 trailing context lines", + }, + "linesAdded": { + Fragment: TextFragment{ + OldPosition: 1, + OldLines: 4, + NewPosition: 1, + NewLines: 3, + LeadingContext: 1, + TrailingContext: 0, + LinesAdded: 2, + LinesDeleted: 2, + Lines: []Line{ + {Op: OpContext, Line: "line 1\n"}, + {Op: OpDelete, Line: "old line 2\n"}, + {Op: OpAdd, Line: "new line 2\n"}, + {Op: OpContext, Line: "line 3\n"}, + {Op: OpDelete, Line: "old line 4\n"}, + }, + }, + Err: "1 added lines", + }, + "linesDeleted": { + Fragment: TextFragment{ + OldPosition: 1, + OldLines: 4, + NewPosition: 1, + NewLines: 3, + LeadingContext: 1, + TrailingContext: 0, + LinesAdded: 1, + LinesDeleted: 1, + Lines: []Line{ + {Op: OpContext, Line: "line 1\n"}, + {Op: OpDelete, Line: "old line 2\n"}, + {Op: OpAdd, Line: "new line 2\n"}, + {Op: OpContext, Line: "line 3\n"}, + {Op: OpDelete, Line: "old line 4\n"}, + }, + }, + Err: "2 deleted lines", + }, + "fileCreation": { + Fragment: TextFragment{ + OldPosition: 0, + OldLines: 2, + NewPosition: 1, + NewLines: 1, + LeadingContext: 0, + TrailingContext: 0, + LinesAdded: 1, + LinesDeleted: 2, + Lines: []Line{ + {Op: OpDelete, Line: "old line 1\n"}, + {Op: OpDelete, Line: "old line 2\n"}, + {Op: OpAdd, Line: "new line\n"}, + }, + }, + Err: "creation fragment", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + err := test.Fragment.Validate() + if test.Err == "" && err != nil { + t.Fatalf("unexpected validation error: %v", err) + } + if test.Err != "" && err == nil { + t.Fatal("expected validation error, but got nil") + } + if !strings.Contains(err.Error(), test.Err) { + t.Fatalf("incorrect validation error: %q is not in %q", test.Err, err.Error()) + } + }) + } +} diff --git a/pkg/gitdiff/io.go b/pkg/gitdiff/io.go new file mode 100644 index 0000000..8143238 --- /dev/null +++ b/pkg/gitdiff/io.go @@ -0,0 +1,220 @@ +package gitdiff + +import ( + "errors" + "io" +) + +const ( + byteBufferSize = 32 * 1024 // from io.Copy + lineBufferSize = 32 + indexBufferSize = 1024 +) + +// LineReaderAt is the interface that wraps the ReadLinesAt method. +// +// ReadLinesAt reads len(lines) into lines starting at line offset. It returns +// the number of lines read (0 <= n <= len(lines)) and any error encountered. +// Line numbers are zero-indexed. +// +// If n < len(lines), ReadLinesAt returns a non-nil error explaining why more +// lines were not returned. +// +// Lines read by ReadLinesAt include the newline character. The last line does +// not have a final newline character if the input ends without one. +type LineReaderAt interface { + ReadLinesAt(lines [][]byte, offset int64) (n int, err error) +} + +type lineReaderAt struct { + r io.ReaderAt + index []int64 + eof bool +} + +func (r *lineReaderAt) ReadLinesAt(lines [][]byte, offset int64) (n int, err error) { + if offset < 0 { + return 0, errors.New("ReadLinesAt: negative offset") + } + if len(lines) == 0 { + return 0, nil + } + + count := len(lines) + startLine := offset + endLine := startLine + int64(count) + + if endLine > int64(len(r.index)) && !r.eof { + if err := r.indexTo(endLine); err != nil { + return 0, err + } + } + if startLine >= int64(len(r.index)) { + return 0, io.EOF + } + + buf, byteOffset, err := r.readBytes(startLine, int64(count)) + if err != nil { + return 0, err + } + + for n = 0; n < count && startLine+int64(n) < int64(len(r.index)); n++ { + lineno := startLine + int64(n) + start, end := int64(0), r.index[lineno]-byteOffset + if lineno > 0 { + start = r.index[lineno-1] - byteOffset + } + lines[n] = buf[start:end] + } + + if n < count { + return n, io.EOF + } + return n, nil +} + +// indexTo reads data and computes the line index until there is information +// for line or a read returns io.EOF. It returns an error if and only if there +// is an error reading data. +func (r *lineReaderAt) indexTo(line int64) error { + var buf [indexBufferSize]byte + + offset := r.lastOffset() + for int64(len(r.index)) < line { + n, err := r.r.ReadAt(buf[:], offset) + if err != nil && err != io.EOF { + return err + } + for _, b := range buf[:n] { + offset++ + if b == '\n' { + r.index = append(r.index, offset) + } + } + if err == io.EOF { + if offset > r.lastOffset() { + r.index = append(r.index, offset) + } + r.eof = true + break + } + } + return nil +} + +func (r *lineReaderAt) lastOffset() int64 { + if n := len(r.index); n > 0 { + return r.index[n-1] + } + return 0 +} + +// readBytes reads the bytes of the n lines starting at line and returns the +// bytes and the offset of the first byte in the underlying source. +func (r *lineReaderAt) readBytes(line, n int64) (b []byte, offset int64, err error) { + indexLen := int64(len(r.index)) + + var size int64 + if line > indexLen { + offset = r.index[indexLen-1] + } else if line > 0 { + offset = r.index[line-1] + } + if n > 0 { + if line+n > indexLen { + size = r.index[indexLen-1] - offset + } else { + size = r.index[line+n-1] - offset + } + } + + b = make([]byte, size) + if _, err := r.r.ReadAt(b, offset); err != nil { + if err == io.EOF { + err = errors.New("ReadLinesAt: corrupt line index or changed source data") + } + return nil, 0, err + } + return b, offset, nil +} + +func isLen(r io.ReaderAt, n int64) (bool, error) { + off := n - 1 + if off < 0 { + off = 0 + } + + var b [2]byte + nr, err := r.ReadAt(b[:], off) + if err == io.EOF { + return (n == 0 && nr == 0) || (n > 0 && nr == 1), nil + } + return false, err +} + +// copyFrom writes bytes starting from offset off in src to dst stopping at the +// end of src or at the first error. copyFrom returns the number of bytes +// written and any error. +func copyFrom(dst io.Writer, src io.ReaderAt, off int64) (written int64, err error) { + buf := make([]byte, byteBufferSize) + for { + nr, rerr := src.ReadAt(buf, off) + if nr > 0 { + nw, werr := dst.Write(buf[0:nr]) + if nw > 0 { + written += int64(nw) + } + if werr != nil { + err = werr + break + } + if nr != nw { + err = io.ErrShortWrite + break + } + off += int64(nr) + } + if rerr != nil { + if rerr != io.EOF { + err = rerr + } + break + } + } + return written, err +} + +// copyLinesFrom writes lines starting from line off in src to dst stopping at +// the end of src or at the first error. copyLinesFrom returns the number of +// lines written and any error. +func copyLinesFrom(dst io.Writer, src LineReaderAt, off int64) (written int64, err error) { + buf := make([][]byte, lineBufferSize) +ReadLoop: + for { + nr, rerr := src.ReadLinesAt(buf, off) + if nr > 0 { + for _, line := range buf[0:nr] { + nw, werr := dst.Write(line) + if nw > 0 { + written++ + } + if werr != nil { + err = werr + break ReadLoop + } + if len(line) != nw { + err = io.ErrShortWrite + break ReadLoop + } + } + off += int64(nr) + } + if rerr != nil { + if rerr != io.EOF { + err = rerr + } + break + } + } + return written, err +} diff --git a/pkg/gitdiff/io_test.go b/pkg/gitdiff/io_test.go new file mode 100644 index 0000000..bd242a7 --- /dev/null +++ b/pkg/gitdiff/io_test.go @@ -0,0 +1,254 @@ +package gitdiff + +import ( + "bytes" + "fmt" + "io" + "math/rand" + "testing" +) + +func TestLineReaderAt(t *testing.T) { + const lineTemplate = "generated test line %d\n" + + tests := map[string]struct { + InputLines int + Offset int64 + Count int + Err bool + EOF bool + EOFCount int + }{ + "readLines": { + InputLines: 32, + Offset: 0, + Count: 4, + }, + "readLinesOffset": { + InputLines: 32, + Offset: 8, + Count: 4, + }, + "readLinesLargeOffset": { + InputLines: 8192, + Offset: 4096, + Count: 64, + }, + "readSingleLine": { + InputLines: 4, + Offset: 2, + Count: 1, + }, + "readZeroLines": { + InputLines: 4, + Offset: 2, + Count: 0, + }, + "readAllLines": { + InputLines: 64, + Offset: 0, + Count: 64, + }, + "readThroughEOF": { + InputLines: 16, + Offset: 12, + Count: 8, + EOF: true, + EOFCount: 4, + }, + "emptyInput": { + InputLines: 0, + Offset: 0, + Count: 2, + EOF: true, + EOFCount: 0, + }, + "offsetAfterEOF": { + InputLines: 8, + Offset: 10, + Count: 2, + EOF: true, + EOFCount: 0, + }, + "offsetNegative": { + InputLines: 8, + Offset: -1, + Count: 2, + Err: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + var input bytes.Buffer + for i := 0; i < test.InputLines; i++ { + fmt.Fprintf(&input, lineTemplate, i) + } + + output := make([][]byte, test.Count) + for i := 0; i < test.Count; i++ { + output[i] = []byte(fmt.Sprintf(lineTemplate, test.Offset+int64(i))) + } + + r := &lineReaderAt{r: bytes.NewReader(input.Bytes())} + lines := make([][]byte, test.Count) + + n, err := r.ReadLinesAt(lines, test.Offset) + if test.Err { + if err == nil { + t.Fatal("expected error reading lines, but got nil") + } + return + } + if err != nil && (!test.EOF || err != io.EOF) { + t.Fatalf("unexpected error reading lines: %v", err) + } + + count := test.Count + if test.EOF { + count = test.EOFCount + } + + if n != count { + t.Fatalf("incorrect number of lines read: expected %d, actual %d", count, n) + } + for i := 0; i < n; i++ { + if !bytes.Equal(output[i], lines[i]) { + t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], lines[i]) + } + } + }) + } + + newlineTests := map[string]struct { + InputSize int + }{ + "readLinesNoFinalNewline": { + InputSize: indexBufferSize + indexBufferSize/2, + }, + "readLinesNoFinalNewlineBufferMultiple": { + InputSize: 4 * indexBufferSize, + }, + } + + for name, test := range newlineTests { + t.Run(name, func(t *testing.T) { + input := bytes.Repeat([]byte("0"), test.InputSize) + + var output [][]byte + for i := 0; i < len(input); i++ { + last := i + i += rand.Intn(80) + if i < len(input)-1 { // last character of input must not be a newline + input[i] = '\n' + output = append(output, input[last:i+1]) + } else { + output = append(output, input[last:]) + } + } + + r := &lineReaderAt{r: bytes.NewReader(input)} + lines := make([][]byte, len(output)) + + n, err := r.ReadLinesAt(lines, 0) + if err != nil { + t.Fatalf("unexpected error reading reading lines: %v", err) + } + + if n != len(output) { + t.Fatalf("incorrect number of lines read: expected %d, actual %d", len(output), n) + } + + for i, line := range lines { + if !bytes.Equal(output[i], line) { + t.Errorf("incorrect content in line %d:\nexpected: %q\nactual: %q", i, output[i], line) + } + } + }) + } +} + +func TestCopyFrom(t *testing.T) { + tests := map[string]struct { + Bytes int64 + Offset int64 + }{ + "copyAll": { + Bytes: byteBufferSize / 2, + }, + "copyPartial": { + Bytes: byteBufferSize / 2, + Offset: byteBufferSize / 4, + }, + "copyLarge": { + Bytes: 8 * byteBufferSize, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + data := make([]byte, test.Bytes) + rand.Read(data) + + var dst bytes.Buffer + n, err := copyFrom(&dst, bytes.NewReader(data), test.Offset) + if err != nil { + t.Fatalf("unexpected error copying data: %v", err) + } + if n != test.Bytes-test.Offset { + t.Fatalf("incorrect number of bytes copied: expected %d, actual %d", test.Bytes-test.Offset, n) + } + + expected := data[test.Offset:] + if !bytes.Equal(expected, dst.Bytes()) { + t.Fatalf("incorrect data copied:\nexpected: %v\nactual: %v", expected, dst.Bytes()) + } + }) + } +} + +func TestCopyLinesFrom(t *testing.T) { + tests := map[string]struct { + Lines int64 + Offset int64 + }{ + "copyAll": { + Lines: lineBufferSize / 2, + }, + "copyPartial": { + Lines: lineBufferSize / 2, + Offset: lineBufferSize / 4, + }, + "copyLarge": { + Lines: 8 * lineBufferSize, + }, + } + + const lineLength = 128 + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + data := make([]byte, test.Lines*lineLength) + for i := range data { + data[i] = byte(32 + rand.Intn(95)) // ascii letters, numbers, symbols + if i%lineLength == lineLength-1 { + data[i] = '\n' + } + } + + var dst bytes.Buffer + n, err := copyLinesFrom(&dst, &lineReaderAt{r: bytes.NewReader(data)}, test.Offset) + if err != nil { + t.Fatalf("unexpected error copying data: %v", err) + } + if n != test.Lines-test.Offset { + t.Fatalf("incorrect number of lines copied: expected %d, actual %d", test.Lines-test.Offset, n) + } + + expected := data[test.Offset*lineLength:] + if !bytes.Equal(expected, dst.Bytes()) { + t.Fatalf("incorrect data copied:\nexpected: %v\nactual: %v", expected, dst.Bytes()) + } + }) + } +} diff --git a/pkg/gitdiff/parser.go b/pkg/gitdiff/parser.go new file mode 100644 index 0000000..e8f8430 --- /dev/null +++ b/pkg/gitdiff/parser.go @@ -0,0 +1,142 @@ +// Package gitdiff parses and applies patches generated by Git. It supports +// line-oriented text patches, binary patches, and can also parse standard +// unified diffs generated by other tools. +package gitdiff + +import ( + "bufio" + "fmt" + "io" +) + +// Parse parses a patch with changes to one or more files. Any content before +// the first file is returned as the second value. If an error occurs while +// parsing, it returns all files parsed before the error. +// +// Parse expects to receive a single patch. If the input may contain multiple +// patches (for example, if it is an mbox file), callers should split it into +// individual patches and call Parse on each one. +func Parse(r io.Reader) ([]*File, string, error) { + p := newParser(r) + + if err := p.Next(); err != nil { + if err == io.EOF { + return nil, "", nil + } + return nil, "", err + } + + var preamble string + var files []*File + for { + file, pre, err := p.ParseNextFileHeader() + if err != nil { + return files, preamble, err + } + if len(files) == 0 { + preamble = pre + } + if file == nil { + break + } + + for _, fn := range []func(*File) (int, error){ + p.ParseTextFragments, + p.ParseBinaryFragments, + } { + n, err := fn(file) + if err != nil { + return files, preamble, err + } + if n > 0 { + break + } + } + + files = append(files, file) + } + + return files, preamble, nil +} + +// TODO(bkeyes): consider exporting the parser type with configuration +// this would enable OID validation, p-value guessing, and prefix stripping +// by allowing users to set or override defaults + +// parser invariants: +// - methods that parse objects: +// - start with the parser on the first line of the first object +// - if returning nil, do not advance +// - if returning an error, do not advance past the object +// - if returning an object, advance to the first line after the object +// - any exported parsing methods must initialize the parser by calling Next() + +type stringReader interface { + ReadString(delim byte) (string, error) +} + +type parser struct { + r stringReader + + eof bool + lineno int64 + lines [3]string +} + +func newParser(r io.Reader) *parser { + if r, ok := r.(stringReader); ok { + return &parser{r: r} + } + return &parser{r: bufio.NewReader(r)} +} + +// Next advances the parser by one line. It returns any error encountered while +// reading the line, including io.EOF when the end of stream is reached. +func (p *parser) Next() error { + if p.eof { + return io.EOF + } + + if p.lineno == 0 { + // on first call to next, need to shift in all lines + for i := 0; i < len(p.lines)-1; i++ { + if err := p.shiftLines(); err != nil && err != io.EOF { + return err + } + } + } + + err := p.shiftLines() + if err != nil && err != io.EOF { + return err + } + + p.lineno++ + if p.lines[0] == "" { + p.eof = true + return io.EOF + } + return nil +} + +func (p *parser) shiftLines() (err error) { + for i := 0; i < len(p.lines)-1; i++ { + p.lines[i] = p.lines[i+1] + } + p.lines[len(p.lines)-1], err = p.r.ReadString('\n') + return +} + +// Line returns a line from the parser without advancing it. A delta of 0 +// returns the current line, while higher deltas return read-ahead lines. It +// returns an empty string if the delta is higher than the available lines, +// either because of the buffer size or because the parser reached the end of +// the input. Valid lines always contain at least a newline character. +func (p *parser) Line(delta uint) string { + return p.lines[delta] +} + +// Errorf generates an error and appends the current line information. +func (p *parser) Errorf(delta int64, msg string, args ...interface{}) error { + return fmt.Errorf("gitdiff: line %d: %s", p.lineno+delta, fmt.Sprintf(msg, args...)) +} diff --git a/pkg/gitdiff/parser_test.go b/pkg/gitdiff/parser_test.go new file mode 100644 index 0000000..15a5d67 --- /dev/null +++ b/pkg/gitdiff/parser_test.go @@ -0,0 +1,511 @@ +package gitdiff + +import ( + "bytes" + "encoding/binary" + "encoding/json" + "io" + "os" + "reflect" + "testing" +) + +func TestLineOperations(t *testing.T) { + const content = "the first line\nthe second line\nthe third line\n" + + t.Run("read", func(t *testing.T) { + p := newTestParser(content, false) + + for i, expected := range []string{ + "the first line\n", + "the second line\n", + "the third line\n", + } { + if err := p.Next(); err != nil { + t.Fatalf("error advancing parser after line %d: %v", i, err) + } + if p.lineno != int64(i+1) { + t.Fatalf("incorrect line number: expected %d, actual: %d", i+1, p.lineno) + } + + line := p.Line(0) + if line != expected { + t.Fatalf("incorrect line %d: expected %q, was %q", i+1, expected, line) + } + } + + // reading after the last line should return EOF + if err := p.Next(); err != io.EOF { + t.Fatalf("expected EOF after end, but got: %v", err) + } + if p.lineno != 4 { + t.Fatalf("incorrect line number: expected %d, actual: %d", 4, p.lineno) + } + + // reading again returns EOF again and does not advance the line + if err := p.Next(); err != io.EOF { + t.Fatalf("expected EOF after end, but got: %v", err) + } + if p.lineno != 4 { + t.Fatalf("incorrect line number: expected %d, actual: %d", 4, p.lineno) + } + }) + + t.Run("peek", func(t *testing.T) { + p := newTestParser(content, false) + if err := p.Next(); err != nil { + t.Fatalf("error advancing parser: %v", err) + } + + line := p.Line(1) + if line != "the second line\n" { + t.Fatalf("incorrect peek line: %s", line) + } + + if err := p.Next(); err != nil { + t.Fatalf("error advancing parser after peek: %v", err) + } + + line = p.Line(0) + if line != "the second line\n" { + t.Fatalf("incorrect read line: %s", line) + } + }) + + t.Run("emptyInput", func(t *testing.T) { + p := newTestParser("", false) + if err := p.Next(); err != io.EOF { + t.Fatalf("expected EOF on first Next(), but got: %v", err) + } + }) +} + +func TestParserInvariant_Advancement(t *testing.T) { + tests := map[string]struct { + Input string + Parse func(p *parser) error + EndLine string + }{ + "ParseGitFileHeader": { + Input: `diff --git a/dir/file.txt b/dir/file.txt +index 9540595..30e6333 100644 +--- a/dir/file.txt ++++ b/dir/file.txt +@@ -1,2 +1,3 @@ +context line +`, + Parse: func(p *parser) error { + _, err := p.ParseGitFileHeader() + return err + }, + EndLine: "@@ -1,2 +1,3 @@\n", + }, + "ParseTraditionalFileHeader": { + Input: `--- dir/file.txt ++++ dir/file.txt +@@ -1,2 +1,3 @@ +context line +`, + Parse: func(p *parser) error { + _, err := p.ParseTraditionalFileHeader() + return err + }, + EndLine: "@@ -1,2 +1,3 @@\n", + }, + "ParseTextFragmentHeader": { + Input: `@@ -1,2 +1,3 @@ +context line +`, + Parse: func(p *parser) error { + _, err := p.ParseTextFragmentHeader() + return err + }, + EndLine: "context line\n", + }, + "ParseTextChunk": { + Input: ` context line +-old line ++new line + context line +@@ -1 +1 @@ +`, + Parse: func(p *parser) error { + return p.ParseTextChunk(&TextFragment{OldLines: 3, NewLines: 3}) + }, + EndLine: "@@ -1 +1 @@\n", + }, + "ParseTextFragments": { + Input: `@@ -1,2 +1,2 @@ + context line +-old line ++new line +@@ -1,2 +1,2 @@ +-old line ++new line + context line +diff --git a/file.txt b/file.txt +`, + Parse: func(p *parser) error { + _, err := p.ParseTextFragments(&File{}) + return err + }, + EndLine: "diff --git a/file.txt b/file.txt\n", + }, + "ParseNextFileHeader": { + Input: `not a header +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1,2 +1,2 @@ +`, + Parse: func(p *parser) error { + _, _, err := p.ParseNextFileHeader() + return err + }, + EndLine: "@@ -1,2 +1,2 @@\n", + }, + "ParseBinaryMarker": { + Input: `Binary files differ +diff --git a/file.txt b/file.txt +`, + Parse: func(p *parser) error { + _, _, err := p.ParseBinaryMarker() + return err + }, + EndLine: "diff --git a/file.txt b/file.txt\n", + }, + "ParseBinaryFragmentHeader": { + Input: `literal 0 +HcmV?d00001 +`, + Parse: func(p *parser) error { + _, err := p.ParseBinaryFragmentHeader() + return err + }, + EndLine: "HcmV?d00001\n", + }, + "ParseBinaryChunk": { + Input: "TcmZQzU|?i`" + `U?w2V48*Je09XJG + +literal 0 +`, + Parse: func(p *parser) error { + return p.ParseBinaryChunk(&BinaryFragment{Size: 20}) + }, + EndLine: "literal 0\n", + }, + "ParseBinaryFragments": { + Input: `GIT binary patch +literal 40 +gcmZQzU|?i` + "`" + `U?w2V48*KJ%mKu_Kr9NxN<eH500b)lkN^Mx + +literal 0 +HcmV?d00001 + +diff --git a/file.txt b/file.txt +`, + Parse: func(p *parser) error { + _, err := p.ParseBinaryFragments(&File{}) + return err + }, + EndLine: "diff --git a/file.txt b/file.txt\n", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + if err := test.Parse(p); err != nil { + t.Fatalf("unexpected error while parsing: %v", err) + } + + if test.EndLine != p.Line(0) { + t.Errorf("incorrect position after parsing\nexpected: %q\n actual: %q", test.EndLine, p.Line(0)) + } + }) + } +} + +func TestParseNextFileHeader(t *testing.T) { + tests := map[string]struct { + Input string + Output *File + Preamble string + Err bool + }{ + "gitHeader": { + Input: `commit 1acbae563cd6ef5750a82ee64e116c6eb065cb94 +Author: Morton Haypenny <mhaypenny@example.com> +Date: Tue Apr 2 22:30:00 2019 -0700 + + This is a sample commit message. + +diff --git a/file.txt b/file.txt +index cc34da1..1acbae5 100644 +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,4 @@ +`, + Output: &File{ + OldName: "file.txt", + NewName: "file.txt", + OldMode: os.FileMode(0100644), + OldOIDPrefix: "cc34da1", + NewOIDPrefix: "1acbae5", + }, + Preamble: `commit 1acbae563cd6ef5750a82ee64e116c6eb065cb94 +Author: Morton Haypenny <mhaypenny@example.com> +Date: Tue Apr 2 22:30:00 2019 -0700 + + This is a sample commit message. + +`, + }, + "traditionalHeader": { + Input: ` +--- file.txt 2019-04-01 22:58:14.833597918 -0700 ++++ file.txt 2019-04-01 22:58:14.833597918 -0700 +@@ -1,3 +1,4 @@ +`, + Output: &File{ + OldName: "file.txt", + NewName: "file.txt", + }, + Preamble: "\n", + }, + "noHeaders": { + Input: ` +this is a line +this is another line +--- could this be a header? +nope, it's just some dashes +`, + Output: nil, + Preamble: ` +this is a line +this is another line +--- could this be a header? +nope, it's just some dashes +`, + }, + "detatchedFragmentLike": { + Input: ` +a wild fragment appears? +@@ -1,3 +1,4 ~1,5 @@ +`, + Output: nil, + Preamble: ` +a wild fragment appears? +@@ -1,3 +1,4 ~1,5 @@ +`, + }, + "detatchedFragment": { + Input: ` +a wild fragment appears? +@@ -1,3 +1,4 @@ +`, + Err: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + f, pre, err := p.ParseNextFileHeader() + if test.Err { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing next file header, but got %v", err) + } + return + } + if err != nil { + t.Fatalf("unexpected error parsing next file header: %v", err) + } + + if test.Preamble != pre { + t.Errorf("incorrect preamble\nexpected: %q\n actual: %q", test.Preamble, pre) + } + if !reflect.DeepEqual(test.Output, f) { + t.Errorf("incorrect file\nexpected: %+v\n actual: %+v", test.Output, f) + } + }) + } +} + +func TestParse(t *testing.T) { + textFragments := []*TextFragment{ + { + OldPosition: 3, + OldLines: 6, + NewPosition: 3, + NewLines: 8, + Comment: "fragment 1", + Lines: []Line{ + {OpContext, "context line\n"}, + {OpDelete, "old line 1\n"}, + {OpDelete, "old line 2\n"}, + {OpContext, "context line\n"}, + {OpAdd, "new line 1\n"}, + {OpAdd, "new line 2\n"}, + {OpAdd, "new line 3\n"}, + {OpContext, "context line\n"}, + {OpDelete, "old line 3\n"}, + {OpAdd, "new line 4\n"}, + {OpAdd, "new line 5\n"}, + }, + LinesAdded: 5, + LinesDeleted: 3, + LeadingContext: 1, + }, + { + OldPosition: 31, + OldLines: 2, + NewPosition: 33, + NewLines: 2, + Comment: "fragment 2", + Lines: []Line{ + {OpContext, "context line\n"}, + {OpDelete, "old line 4\n"}, + {OpAdd, "new line 6\n"}, + }, + LinesAdded: 1, + LinesDeleted: 1, + LeadingContext: 1, + }, + } + + textPreamble := `commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe +Author: Morton Haypenny <mhaypenny@example.com> +Date: Tue Apr 2 22:55:40 2019 -0700 + + A file with multiple fragments. + + The content is arbitrary. + +` + + binaryPreamble := `commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe +Author: Morton Haypenny <mhaypenny@example.com> +Date: Tue Apr 2 22:55:40 2019 -0700 + + A binary file with the first 10 fibonacci numbers. + +` + tests := map[string]struct { + InputFile string + Output []*File + Preamble string + Err bool + }{ + "oneFile": { + InputFile: "testdata/one_file.patch", + Output: []*File{ + { + OldName: "dir/file1.txt", + NewName: "dir/file1.txt", + OldMode: os.FileMode(0100644), + OldOIDPrefix: "ebe9fa54", + NewOIDPrefix: "fe103e1d", + TextFragments: textFragments, + }, + }, + Preamble: textPreamble, + }, + "twoFiles": { + InputFile: "testdata/two_files.patch", + Output: []*File{ + { + OldName: "dir/file1.txt", + NewName: "dir/file1.txt", + OldMode: os.FileMode(0100644), + OldOIDPrefix: "ebe9fa54", + NewOIDPrefix: "fe103e1d", + TextFragments: textFragments, + }, + { + OldName: "dir/file2.txt", + NewName: "dir/file2.txt", + OldMode: os.FileMode(0100644), + OldOIDPrefix: "417ebc70", + NewOIDPrefix: "67514b7f", + TextFragments: textFragments, + }, + }, + Preamble: textPreamble, + }, + "noFiles": { + InputFile: "testdata/no_files.patch", + Output: nil, + Preamble: textPreamble, + }, + "newBinaryFile": { + InputFile: "testdata/new_binary_file.patch", + Output: []*File{ + { + OldName: "", + NewName: "dir/ten.bin", + NewMode: os.FileMode(0100644), + OldOIDPrefix: "0000000000000000000000000000000000000000", + NewOIDPrefix: "77b068ba48c356156944ea714740d0d5ca07bfec", + IsNew: true, + IsBinary: true, + BinaryFragment: &BinaryFragment{ + Method: BinaryPatchLiteral, + Size: 40, + Data: fib(10, binary.BigEndian), + }, + ReverseBinaryFragment: &BinaryFragment{ + Method: BinaryPatchLiteral, + Size: 0, + Data: []byte{}, + }, + }, + }, + Preamble: binaryPreamble, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + f, err := os.Open(test.InputFile) + if err != nil { + t.Fatalf("unexpected error opening input file: %v", err) + } + + files, pre, err := Parse(f) + if test.Err { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing patch, but got %v", err) + } + return + } + if err != nil { + t.Fatalf("unexpected error parsing patch: %v", err) + } + + if len(test.Output) != len(files) { + t.Fatalf("incorrect number of parsed files: expected %d, actual %d", len(test.Output), len(files)) + } + if test.Preamble != pre { + t.Errorf("incorrect preamble\nexpected: %q\n actual: %q", test.Preamble, pre) + } + for i := range test.Output { + if !reflect.DeepEqual(test.Output[i], files[i]) { + exp, _ := json.MarshalIndent(test.Output[i], "", " ") + act, _ := json.MarshalIndent(files[i], "", " ") + t.Errorf("incorrect file at position %d\nexpected: %s\n actual: %s", i, exp, act) + } + } + }) + } +} + +func newTestParser(input string, init bool) *parser { + p := newParser(bytes.NewBufferString(input)) + if init { + _ = p.Next() + } + return p +} diff --git a/pkg/gitdiff/patch_header.go b/pkg/gitdiff/patch_header.go new file mode 100644 index 0000000..f047059 --- /dev/null +++ b/pkg/gitdiff/patch_header.go @@ -0,0 +1,470 @@ +package gitdiff + +import ( + "bufio" + "errors" + "fmt" + "io" + "io/ioutil" + "mime/quotedprintable" + "net/mail" + "strconv" + "strings" + "time" + "unicode" +) + +const ( + mailHeaderPrefix = "From " + prettyHeaderPrefix = "commit " + mailMinimumHeaderPrefix = "From:" +) + +// PatchHeader is a parsed version of the preamble content that appears before +// the first diff in a patch. It includes metadata about the patch, such as the +// author and a subject. +type PatchHeader struct { + // The SHA of the commit the patch was generated from. Empty if the SHA is + // not included in the header. + SHA string + + // The author details of the patch. If these details are not included in + // the header, Author is nil and AuthorDate is the zero time. + Author *PatchIdentity + AuthorDate time.Time + + // The committer details of the patch. If these details are not included in + // the header, Committer is nil and CommitterDate is the zero time. + Committer *PatchIdentity + CommitterDate time.Time + + // The title and body of the commit message describing the changes in the + // patch. Empty if no message is included in the header. + Title string + Body string + + // If the preamble looks like an email, ParsePatchHeader will + // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the + // Title and place them here. + SubjectPrefix string + + // If the preamble looks like an email, and it contains a `---` + // line, that line will be removed and everything after it will be + // placed in BodyAppendix. + BodyAppendix string +} + +// Message returns the commit message for the header. The message consists of +// the title and the body separated by an empty line. +func (h *PatchHeader) Message() string { + var msg strings.Builder + if h != nil { + msg.WriteString(h.Title) + if h.Body != "" { + msg.WriteString("\n\n") + msg.WriteString(h.Body) + } + } + return msg.String() +} + +// ParsePatchDate parses a patch date string. It returns the parsed time or an +// error if s has an unknown format. ParsePatchDate supports the iso, rfc, +// short, raw, unix, and default formats (with local variants) used by the +// --date flag in Git. +func ParsePatchDate(s string) (time.Time, error) { + const ( + isoFormat = "2006-01-02 15:04:05 -0700" + isoStrictFormat = "2006-01-02T15:04:05-07:00" + rfc2822Format = "Mon, 2 Jan 2006 15:04:05 -0700" + shortFormat = "2006-01-02" + defaultFormat = "Mon Jan 2 15:04:05 2006 -0700" + defaultLocalFormat = "Mon Jan 2 15:04:05 2006" + ) + + if s == "" { + return time.Time{}, nil + } + + for _, fmt := range []string{ + isoFormat, + isoStrictFormat, + rfc2822Format, + shortFormat, + defaultFormat, + defaultLocalFormat, + } { + if t, err := time.ParseInLocation(fmt, s, time.Local); err == nil { + return t, nil + } + } + + // unix format + if unix, err := strconv.ParseInt(s, 10, 64); err == nil { + return time.Unix(unix, 0), nil + } + + // raw format + if space := strings.IndexByte(s, ' '); space > 0 { + unix, uerr := strconv.ParseInt(s[:space], 10, 64) + zone, zerr := time.Parse("-0700", s[space+1:]) + if uerr == nil && zerr == nil { + return time.Unix(unix, 0).In(zone.Location()), nil + } + } + + return time.Time{}, fmt.Errorf("unknown date format: %s", s) +} + +// A PatchHeaderOption modifies the behavior of ParsePatchHeader. +type PatchHeaderOption func(*patchHeaderOptions) + +// SubjectCleanMode controls how ParsePatchHeader cleans subject lines when +// parsing mail-formatted patches. +type SubjectCleanMode int + +const ( + // SubjectCleanWhitespace removes leading and trailing whitespace. + SubjectCleanWhitespace SubjectCleanMode = iota + + // SubjectCleanAll removes leading and trailing whitespace, leading "Re:", + // "re:", and ":" strings, and leading strings enclosed by '[' and ']'. + // This is the default behavior of git (see `git mailinfo`) and this + // package. + SubjectCleanAll + + // SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes + // leading strings enclosed by '[' and ']' if they start with "PATCH". + SubjectCleanPatchOnly +) + +// WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By +// default, uses SubjectCleanAll. +func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption { + return func(opts *patchHeaderOptions) { + opts.subjectCleanMode = m + } +} + +type patchHeaderOptions struct { + subjectCleanMode SubjectCleanMode +} + +// ParsePatchHeader parses the preamble string returned by [Parse] into a +// PatchHeader. Due to the variety of header formats, some fields of the parsed +// PatchHeader may be unset after parsing. +// +// Supported formats are the short, medium, full, fuller, and email pretty +// formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox +// format used by `git format-patch`. +// +// When parsing mail-formatted headers, ParsePatchHeader tries to remove +// email-specific content from the title and body: +// +// - Based on the SubjectCleanMode, remove prefixes like reply markers and +// "[PATCH]" strings from the subject, saving any removed content in the +// SubjectPrefix field. Parsing always discards leading and trailing +// whitespace from the subject line. The default mode is SubjectCleanAll. +// +// - If the body contains a "---" line (3 hyphens), remove that line and any +// content after it from the body and save it in the BodyAppendix field. +// +// ParsePatchHeader tries to process content it does not understand wthout +// returning errors, but will return errors if well-identified content like +// dates or identies uses unknown or invalid formats. +func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) { + opts := patchHeaderOptions{ + subjectCleanMode: SubjectCleanAll, // match git defaults + } + for _, optFn := range options { + optFn(&opts) + } + + header = strings.TrimSpace(header) + if header == "" { + return &PatchHeader{}, nil + } + + var firstLine, rest string + if idx := strings.IndexByte(header, '\n'); idx >= 0 { + firstLine = header[:idx] + rest = header[idx+1:] + } else { + firstLine = header + rest = "" + } + + switch { + case strings.HasPrefix(firstLine, mailHeaderPrefix): + return parseHeaderMail(firstLine, strings.NewReader(rest), opts) + + case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix): + // With a minimum header, the first line is part of the actual mail + // content and needs to be parsed as part of the "rest" + return parseHeaderMail("", strings.NewReader(header), opts) + + case strings.HasPrefix(firstLine, prettyHeaderPrefix): + return parseHeaderPretty(firstLine, strings.NewReader(rest)) + } + + return nil, errors.New("unrecognized patch header format") +} + +func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) { + const ( + authorPrefix = "Author:" + commitPrefix = "Commit:" + datePrefix = "Date:" + authorDatePrefix = "AuthorDate:" + commitDatePrefix = "CommitDate:" + ) + + h := &PatchHeader{} + + prettyLine = strings.TrimPrefix(prettyLine, prettyHeaderPrefix) + if i := strings.IndexByte(prettyLine, ' '); i > 0 { + h.SHA = prettyLine[:i] + } else { + h.SHA = prettyLine + } + + s := bufio.NewScanner(r) + for s.Scan() { + line := s.Text() + + // empty line marks end of fields, remaining lines are title/message + if strings.TrimSpace(line) == "" { + break + } + + switch { + case strings.HasPrefix(line, authorPrefix): + u, err := ParsePatchIdentity(line[len(authorPrefix):]) + if err != nil { + return nil, err + } + h.Author = &u + + case strings.HasPrefix(line, commitPrefix): + u, err := ParsePatchIdentity(line[len(commitPrefix):]) + if err != nil { + return nil, err + } + h.Committer = &u + + case strings.HasPrefix(line, datePrefix): + d, err := ParsePatchDate(strings.TrimSpace(line[len(datePrefix):])) + if err != nil { + return nil, err + } + h.AuthorDate = d + + case strings.HasPrefix(line, authorDatePrefix): + d, err := ParsePatchDate(strings.TrimSpace(line[len(authorDatePrefix):])) + if err != nil { + return nil, err + } + h.AuthorDate = d + + case strings.HasPrefix(line, commitDatePrefix): + d, err := ParsePatchDate(strings.TrimSpace(line[len(commitDatePrefix):])) + if err != nil { + return nil, err + } + h.CommitterDate = d + } + } + if s.Err() != nil { + return nil, s.Err() + } + + title, indent := scanMessageTitle(s) + if s.Err() != nil { + return nil, s.Err() + } + h.Title = title + + if title != "" { + // Don't check for an appendix, pretty headers do not contain them + body, _ := scanMessageBody(s, indent, false) + if s.Err() != nil { + return nil, s.Err() + } + h.Body = body + } + + return h, nil +} + +func scanMessageTitle(s *bufio.Scanner) (title string, indent string) { + var b strings.Builder + for i := 0; s.Scan(); i++ { + line := s.Text() + trimLine := strings.TrimSpace(line) + if trimLine == "" { + break + } + + if i == 0 { + if start := strings.IndexFunc(line, func(c rune) bool { return !unicode.IsSpace(c) }); start > 0 { + indent = line[:start] + } + } + if b.Len() > 0 { + b.WriteByte(' ') + } + b.WriteString(trimLine) + } + return b.String(), indent +} + +func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) { + // Body and appendix + var body, appendix strings.Builder + c := &body + var empty int + for i := 0; s.Scan(); i++ { + line := s.Text() + + line = strings.TrimRightFunc(line, unicode.IsSpace) + line = strings.TrimPrefix(line, indent) + + if line == "" { + empty++ + continue + } + + // If requested, parse out "appendix" information (often added + // by `git format-patch` and removed by `git am`). + if separateAppendix && c == &body && line == "---" { + c = &appendix + continue + } + + if c.Len() > 0 { + c.WriteByte('\n') + if empty > 0 { + c.WriteByte('\n') + } + } + empty = 0 + + c.WriteString(line) + } + return body.String(), appendix.String() +} + +func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) { + msg, err := mail.ReadMessage(r) + if err != nil { + return nil, err + } + + h := &PatchHeader{} + + if strings.HasPrefix(mailLine, mailHeaderPrefix) { + mailLine = strings.TrimPrefix(mailLine, mailHeaderPrefix) + if i := strings.IndexByte(mailLine, ' '); i > 0 { + h.SHA = mailLine[:i] + } + } + + from := msg.Header.Get("From") + if from != "" { + u, err := ParsePatchIdentity(from) + if err != nil { + return nil, err + } + h.Author = &u + } + + date := msg.Header.Get("Date") + if date != "" { + d, err := ParsePatchDate(date) + if err != nil { + return nil, err + } + h.AuthorDate = d + } + + subject := msg.Header.Get("Subject") + h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode) + + s := bufio.NewScanner(msg.Body) + h.Body, h.BodyAppendix = scanMessageBody(s, "", true) + if s.Err() != nil { + return nil, s.Err() + } + + return h, nil +} + +func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) { + switch mode { + case SubjectCleanAll, SubjectCleanPatchOnly: + case SubjectCleanWhitespace: + return "", strings.TrimSpace(decodeSubject(s)) + default: + panic(fmt.Sprintf("unknown clean mode: %d", mode)) + } + + // Based on the algorithm from Git in mailinfo.c:cleanup_subject() + // If compatibility with `git am` drifts, go there to see if there are any updates. + + at := 0 + for at < len(s) { + switch s[at] { + case 'r', 'R': + // Detect re:, Re:, rE: and RE: + if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' { + at += 3 + continue + } + + case ' ', '\t', ':': + // Delete whitespace and duplicate ':' characters + at++ + continue + + case '[': + if i := strings.IndexByte(s[at:], ']'); i > 0 { + if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") { + at += i + 1 + continue + } + } + } + + // Nothing was removed, end processing + break + } + + prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace) + subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace) + return +} + +// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result +// of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji). +// See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject +func decodeSubject(encoded string) string { + if !strings.HasPrefix(encoded, "=?UTF-8?q?") { + // not UTF-8 encoded + return encoded + } + + // If the subject is too long, `git format-patch` may produce a subject line across + // multiple lines. When parsed, this can look like the following: + // <UTF8-prefix><first-line> <UTF8-prefix><second-line> + payload := " " + encoded + payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "") + payload = strings.ReplaceAll(payload, "?=", "") + + decoded, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(payload))) + if err != nil { + // if err, abort decoding and return original subject + return encoded + } + + return string(decoded) +} diff --git a/pkg/gitdiff/patch_header_test.go b/pkg/gitdiff/patch_header_test.go new file mode 100644 index 0000000..c8559b0 --- /dev/null +++ b/pkg/gitdiff/patch_header_test.go @@ -0,0 +1,590 @@ +package gitdiff + +import ( + "testing" + "time" +) + +func TestParsePatchDate(t *testing.T) { + expected := time.Date(2020, 4, 9, 8, 7, 6, 0, time.UTC) + + tests := map[string]struct { + Input string + Output time.Time + Err interface{} + }{ + "default": { + Input: "Thu Apr 9 01:07:06 2020 -0700", + Output: expected, + }, + "defaultLocal": { + Input: "Thu Apr 9 01:07:06 2020", + Output: time.Date(2020, 4, 9, 1, 7, 6, 0, time.Local), + }, + "iso": { + Input: "2020-04-09 01:07:06 -0700", + Output: expected, + }, + "isoStrict": { + Input: "2020-04-09T01:07:06-07:00", + Output: expected, + }, + "rfc": { + Input: "Thu, 9 Apr 2020 01:07:06 -0700", + Output: expected, + }, + "short": { + Input: "2020-04-09", + Output: time.Date(2020, 4, 9, 0, 0, 0, 0, time.Local), + }, + "raw": { + Input: "1586419626 -0700", + Output: expected, + }, + "unix": { + Input: "1586419626", + Output: expected, + }, + "unknownFormat": { + Input: "4/9/2020 01:07:06 PDT", + Err: "unknown date format", + }, + "empty": { + Input: "", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + d, err := ParsePatchDate(test.Input) + if test.Err != nil { + assertError(t, test.Err, err, "parsing date") + return + } + if err != nil { + t.Fatalf("unexpected error parsing date: %v", err) + } + if !test.Output.Equal(d) { + t.Errorf("incorrect parsed date: expected %v, actual %v", test.Output, d) + } + }) + } +} + +func TestParsePatchHeader(t *testing.T) { + expectedSHA := "61f5cd90bed4d204ee3feb3aa41ee91d4734855b" + expectedIdentity := &PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + } + expectedDate := time.Date(2020, 04, 11, 15, 21, 23, 0, time.FixedZone("PDT", -7*60*60)) + expectedTitle := "A sample commit to test header parsing" + expectedEmojiOneLineTitle := "🤖 Enabling auto-merging" + expectedEmojiMultiLineTitle := "[IA64] Put ia64 config files on the Uwe Kleine-König diet" + expectedBody := "The medium format shows the body, which\nmay wrap on to multiple lines.\n\nAnother body line." + expectedBodyAppendix := "CC: Joe Smith <joe.smith@company.com>" + + tests := map[string]struct { + Input string + Options []PatchHeaderOption + Header PatchHeader + Err interface{} + }{ + "prettyShort": { + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b +Author: Morton Haypenny <mhaypenny@example.com> + + A sample commit to test header parsing +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + Title: expectedTitle, + }, + }, + "prettyMedium": { + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b +Author: Morton Haypenny <mhaypenny@example.com> +Date: Sat Apr 11 15:21:23 2020 -0700 + + A sample commit to test header parsing + + The medium format shows the body, which + may wrap on to multiple lines. + + Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: expectedTitle, + Body: expectedBody, + }, + }, + "prettyFull": { + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b +Author: Morton Haypenny <mhaypenny@example.com> +Commit: Morton Haypenny <mhaypenny@example.com> + + A sample commit to test header parsing + + The medium format shows the body, which + may wrap on to multiple lines. + + Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + Committer: expectedIdentity, + Title: expectedTitle, + Body: expectedBody, + }, + }, + "prettyFuller": { + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b +Author: Morton Haypenny <mhaypenny@example.com> +AuthorDate: Sat Apr 11 15:21:23 2020 -0700 +Commit: Morton Haypenny <mhaypenny@example.com> +CommitDate: Sat Apr 11 15:21:23 2020 -0700 + + A sample commit to test header parsing + + The medium format shows the body, which + may wrap on to multiple lines. + + Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Committer: expectedIdentity, + CommitterDate: expectedDate, + Title: expectedTitle, + Body: expectedBody, + }, + }, + "prettyAppendix": { + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b +Author: Morton Haypenny <mhaypenny@example.com> +AuthorDate: Sat Apr 11 15:21:23 2020 -0700 +Commit: Morton Haypenny <mhaypenny@example.com> +CommitDate: Sat Apr 11 15:21:23 2020 -0700 + + A sample commit to test header parsing + + The medium format shows the body, which + may wrap on to multiple lines. + + Another body line. + --- + CC: Joe Smith <joe.smith@company.com> +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Committer: expectedIdentity, + CommitterDate: expectedDate, + Title: expectedTitle, + Body: expectedBody + "\n---\n" + expectedBodyAppendix, + }, + }, + "mailbox": { + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 +From: Morton Haypenny <mhaypenny@example.com> +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] A sample commit to test header parsing + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: expectedTitle, + Body: expectedBody, + }, + }, + "mailboxPatchOnly": { + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 +From: Morton Haypenny <mhaypenny@example.com> +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] [BUG-123] A sample commit to test header parsing + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Options: []PatchHeaderOption{ + WithSubjectCleanMode(SubjectCleanPatchOnly), + }, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: "[BUG-123] " + expectedTitle, + Body: expectedBody, + }, + }, + "mailboxEmojiOneLine": { + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 +From: Morton Haypenny <mhaypenny@example.com> +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Enabling=20auto-merging?= + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: expectedEmojiOneLineTitle, + Body: expectedBody, + }, + }, + "mailboxEmojiMultiLine": { + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 +From: Morton Haypenny <mhaypenny@example.com> +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] =?UTF-8?q?[IA64]=20Put=20ia64=20config=20files=20on=20the=20?= + =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig=20diet?= + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: expectedEmojiMultiLineTitle, + Body: expectedBody, + }, + }, + "mailboxRFC5322SpecialCharacters": { + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 +From: "dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com> +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] A sample commit to test header parsing + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: &PatchIdentity{ + Name: "dependabot[bot]", + Email: "12345+dependabot[bot]@users.noreply.github.com", + }, + AuthorDate: expectedDate, + Title: expectedTitle, + Body: expectedBody, + }, + }, + "mailboxAppendix": { + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 +From: Morton Haypenny <mhaypenny@example.com> +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] A sample commit to test header parsing + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +--- +CC: Joe Smith <joe.smith@company.com> +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: expectedTitle, + Body: expectedBody, + BodyAppendix: expectedBodyAppendix, + }, + }, + "mailboxMinimalNoName": { + Input: `From: <mhaypenny@example.com> +Subject: [PATCH] A sample commit to test header parsing + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + Author: &PatchIdentity{expectedIdentity.Email, expectedIdentity.Email}, + Title: expectedTitle, + Body: expectedBody, + }, + }, + "mailboxMinimal": { + Input: `From: Morton Haypenny <mhaypenny@example.com> +Subject: [PATCH] A sample commit to test header parsing + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + Author: expectedIdentity, + Title: expectedTitle, + Body: expectedBody, + }, + }, + "unwrapTitle": { + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b +Author: Morton Haypenny <mhaypenny@example.com> +Date: Sat Apr 11 15:21:23 2020 -0700 + + A sample commit to test header parsing with a long + title that is wrapped. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: expectedTitle + " with a long title that is wrapped.", + }, + }, + "normalizeBodySpace": { + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b +Author: Morton Haypenny <mhaypenny@example.com> +Date: Sat Apr 11 15:21:23 2020 -0700 + + A sample commit to test header parsing + + + The medium format shows the body, which + may wrap on to multiple lines. + + + Another body line. + + +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: expectedTitle, + Body: expectedBody, + }, + }, + "ignoreLeadingBlankLines": { + Input: ` + +` + " " + ` +commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b +Author: Morton Haypenny <mhaypenny@example.com> + + A sample commit to test header parsing +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + Title: expectedTitle, + }, + }, + "emptyHeader": { + Input: "", + Header: PatchHeader{}, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + h, err := ParsePatchHeader(test.Input, test.Options...) + if test.Err != nil { + assertError(t, test.Err, err, "parsing patch header") + return + } + if err != nil { + t.Fatalf("unexpected error parsing patch header: %v", err) + } + if h == nil { + t.Fatalf("expected non-nil header, but got nil") + } + + exp := test.Header + act := *h + + if exp.SHA != act.SHA { + t.Errorf("incorrect parsed SHA: expected %q, actual %q", exp.SHA, act.SHA) + } + + assertPatchIdentity(t, "author", exp.Author, act.Author) + if !exp.AuthorDate.Equal(act.AuthorDate) { + t.Errorf("incorrect parsed author date: expected %v, but got %v", exp.AuthorDate, act.AuthorDate) + } + + assertPatchIdentity(t, "committer", exp.Committer, act.Committer) + if !exp.CommitterDate.Equal(act.CommitterDate) { + t.Errorf("incorrect parsed committer date: expected %v, but got %v", exp.CommitterDate, act.CommitterDate) + } + + if exp.Title != act.Title { + t.Errorf("incorrect parsed title:\n expected: %q\n actual: %q", exp.Title, act.Title) + } + if exp.Body != act.Body { + t.Errorf("incorrect parsed body:\n expected: %q\n actual: %q", exp.Body, act.Body) + } + if exp.BodyAppendix != act.BodyAppendix { + t.Errorf("incorrect parsed body appendix:\n expected: %q\n actual: %q", + exp.BodyAppendix, act.BodyAppendix) + } + }) + } +} + +func assertPatchIdentity(t *testing.T, kind string, exp, act *PatchIdentity) { + switch { + case exp == nil && act == nil: + case exp == nil && act != nil: + t.Errorf("incorrect parsed %s: expected nil, but got %+v", kind, act) + case exp != nil && act == nil: + t.Errorf("incorrect parsed %s: expected %+v, but got nil", kind, exp) + case exp.Name != act.Name || exp.Email != act.Email: + t.Errorf("incorrect parsed %s, expected %+v, bot got %+v", kind, exp, act) + } +} + +func TestCleanSubject(t *testing.T) { + expectedSubject := "A sample commit to test header parsing" + + tests := map[string]struct { + Input string + Mode SubjectCleanMode + Prefix string + Subject string + }{ + "CleanAll/noPrefix": { + Input: expectedSubject, + Mode: SubjectCleanAll, + Subject: expectedSubject, + }, + "CleanAll/patchPrefix": { + Input: "[PATCH] " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "[PATCH] ", + Subject: expectedSubject, + }, + "CleanAll/patchPrefixNoSpace": { + Input: "[PATCH]" + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "[PATCH]", + Subject: expectedSubject, + }, + "CleanAll/patchPrefixContent": { + Input: "[PATCH 3/7] " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "[PATCH 3/7] ", + Subject: expectedSubject, + }, + "CleanAll/spacePrefix": { + Input: " " + expectedSubject, + Mode: SubjectCleanAll, + Subject: expectedSubject, + }, + "CleanAll/replyLowerPrefix": { + Input: "re: " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "re: ", + Subject: expectedSubject, + }, + "CleanAll/replyMixedPrefix": { + Input: "Re: " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "Re: ", + Subject: expectedSubject, + }, + "CleanAll/replyCapsPrefix": { + Input: "RE: " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "RE: ", + Subject: expectedSubject, + }, + "CleanAll/replyDoublePrefix": { + Input: "Re: re: " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "Re: re: ", + Subject: expectedSubject, + }, + "CleanAll/noPrefixSubjectHasRe": { + Input: "Reimplement parsing", + Mode: SubjectCleanAll, + Subject: "Reimplement parsing", + }, + "CleanAll/patchPrefixSubjectHasRe": { + Input: "[PATCH 1/2] Reimplement parsing", + Mode: SubjectCleanAll, + Prefix: "[PATCH 1/2] ", + Subject: "Reimplement parsing", + }, + "CleanAll/unclosedPrefix": { + Input: "[Just to annoy people", + Mode: SubjectCleanAll, + Subject: "[Just to annoy people", + }, + "CleanAll/multiplePrefix": { + Input: " Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject + " ", + Mode: SubjectCleanAll, + Prefix: "Re:Re: [PATCH 1/2][DRAFT] ", + Subject: expectedSubject, + }, + "CleanPatchOnly/patchPrefix": { + Input: "[PATCH] " + expectedSubject, + Mode: SubjectCleanPatchOnly, + Prefix: "[PATCH] ", + Subject: expectedSubject, + }, + "CleanPatchOnly/mixedPrefix": { + Input: "[PATCH] [TICKET-123] " + expectedSubject, + Mode: SubjectCleanPatchOnly, + Prefix: "[PATCH] ", + Subject: "[TICKET-123] " + expectedSubject, + }, + "CleanPatchOnly/multiplePrefix": { + Input: "Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject, + Mode: SubjectCleanPatchOnly, + Prefix: "Re:Re: [PATCH 1/2]", + Subject: "[DRAFT] " + expectedSubject, + }, + "CleanWhitespace/leadingSpace": { + Input: " [PATCH] " + expectedSubject, + Mode: SubjectCleanWhitespace, + Subject: "[PATCH] " + expectedSubject, + }, + "CleanWhitespace/trailingSpace": { + Input: "[PATCH] " + expectedSubject + " ", + Mode: SubjectCleanWhitespace, + Subject: "[PATCH] " + expectedSubject, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + prefix, subject := cleanSubject(test.Input, test.Mode) + if prefix != test.Prefix { + t.Errorf("incorrect prefix: expected %q, actual %q", test.Prefix, prefix) + } + if subject != test.Subject { + t.Errorf("incorrect subject: expected %q, actual %q", test.Subject, subject) + } + }) + } +} diff --git a/pkg/gitdiff/patch_identity.go b/pkg/gitdiff/patch_identity.go new file mode 100644 index 0000000..018f80c --- /dev/null +++ b/pkg/gitdiff/patch_identity.go @@ -0,0 +1,166 @@ +package gitdiff + +import ( + "fmt" + "strings" +) + +// PatchIdentity identifies a person who authored or committed a patch. +type PatchIdentity struct { + Name string + Email string +} + +func (i PatchIdentity) String() string { + name := i.Name + if name == "" { + name = `""` + } + return fmt.Sprintf("%s <%s>", name, i.Email) +} + +// ParsePatchIdentity parses a patch identity string. A patch identity contains +// an email address and an optional name in [RFC 5322] format. This is either a +// plain email adddress or a name followed by an address in angle brackets: +// +// author@example.com +// Author Name <author@example.com> +// +// If the input is not one of these formats, ParsePatchIdentity applies a +// heuristic to separate the name and email portions. If both the name and +// email are missing or empty, ParsePatchIdentity returns an error. It +// otherwise does not validate the result. +// +// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322 +func ParsePatchIdentity(s string) (PatchIdentity, error) { + s = normalizeSpace(s) + s = unquotePairs(s) + + var name, email string + if at := strings.IndexByte(s, '@'); at >= 0 { + start, end := at, at + for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' { + start-- + } + for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' { + end++ + } + email = s[start+1 : end] + + // Adjust the boundaries so that we drop angle brackets, but keep + // spaces when removing the email to form the name. + if start < 0 || s[start] != '<' { + start++ + } + if end >= len(s) || s[end] != '>' { + end-- + } + name = s[:start] + s[end+1:] + } else { + start, end := 0, 0 + for i := 0; i < len(s); i++ { + if s[i] == '<' && start == 0 { + start = i + 1 + } + if s[i] == '>' && start > 0 { + end = i + break + } + } + if start > 0 && end >= start { + email = strings.TrimSpace(s[start:end]) + name = s[:start-1] + } + } + + // After extracting the email, the name might contain extra whitespace + // again and may be surrounded by comment characters. The git source gives + // these examples of when this can happen: + // + // "Name <email@domain>" + // "email@domain (Name)" + // "Name <email@domain> (Comment)" + // + name = normalizeSpace(name) + if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") { + name = name[1 : len(name)-1] + } + name = strings.TrimSpace(name) + + // If the name is empty or contains email-like characters, use the email + // instead (assuming one exists) + if name == "" || strings.ContainsAny(name, "@<>") { + name = email + } + + if name == "" && email == "" { + return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s) + } + return PatchIdentity{Name: name, Email: email}, nil +} + +// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to +// remove any "quoted-pairs" (backslash-espaced characters). It also removes +// the quotes from any quoted strings, but leaves the comment delimiters. +func unquotePairs(s string) string { + quote := false + comments := 0 + escaped := false + + var out strings.Builder + for i := 0; i < len(s); i++ { + if escaped { + escaped = false + } else { + switch s[i] { + case '\\': + // quoted-pair is only allowed in quoted-string/comment + if quote || comments > 0 { + escaped = true + continue // drop '\' character + } + + case '"': + if comments == 0 { + quote = !quote + continue // drop '"' character + } + + case '(': + if !quote { + comments++ + } + case ')': + if comments > 0 { + comments-- + } + } + } + out.WriteByte(s[i]) + } + return out.String() +} + +// normalizeSpace trims leading and trailing whitespace from s and converts +// inner sequences of one or more whitespace characters to single spaces. +func normalizeSpace(s string) string { + var sb strings.Builder + for i := 0; i < len(s); i++ { + c := s[i] + if !isRFC5332Space(c) { + if sb.Len() > 0 && isRFC5332Space(s[i-1]) { + sb.WriteByte(' ') + } + sb.WriteByte(c) + } + } + return sb.String() +} + +func isRFC5332Space(c byte) bool { + switch c { + case '\t', '\n', '\r', ' ': + return true + } + return false +} diff --git a/pkg/gitdiff/patch_identity_test.go b/pkg/gitdiff/patch_identity_test.go new file mode 100644 index 0000000..f15fe38 --- /dev/null +++ b/pkg/gitdiff/patch_identity_test.go @@ -0,0 +1,127 @@ +package gitdiff + +import ( + "testing" +) + +func TestParsePatchIdentity(t *testing.T) { + tests := map[string]struct { + Input string + Output PatchIdentity + Err interface{} + }{ + "simple": { + Input: "Morton Haypenny <mhaypenny@example.com>", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + }, + "extraWhitespace": { + Input: "\t Morton Haypenny \r\n<mhaypenny@example.com> ", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + }, + "trailingCharacters": { + Input: "Morton Haypenny <mhaypenny@example.com> II", + Output: PatchIdentity{ + Name: "Morton Haypenny II", + Email: "mhaypenny@example.com", + }, + }, + "onlyEmail": { + Input: "mhaypenny@example.com", + Output: PatchIdentity{ + Name: "mhaypenny@example.com", + Email: "mhaypenny@example.com", + }, + }, + "onlyEmailInBrackets": { + Input: "<mhaypenny@example.com>", + Output: PatchIdentity{ + Name: "mhaypenny@example.com", + Email: "mhaypenny@example.com", + }, + }, + "rfc5322SpecialCharacters": { + Input: `"dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>`, + Output: PatchIdentity{ + Name: "dependabot[bot]", + Email: "12345+dependabot[bot]@users.noreply.github.com", + }, + }, + "rfc5322QuotedPairs": { + Input: `"Morton \"Old-Timer\" Haypenny" <"mhaypenny\+[1900]"@example.com> (III \(PhD\))`, + Output: PatchIdentity{ + Name: `Morton "Old-Timer" Haypenny (III (PhD))`, + Email: "mhaypenny+[1900]@example.com", + }, + }, + "rfc5322QuotedPairsOutOfContext": { + Input: `Morton \\Backslash Haypenny <mhaypenny@example.com>`, + Output: PatchIdentity{ + Name: `Morton \\Backslash Haypenny`, + Email: "mhaypenny@example.com", + }, + }, + "emptyEmail": { + Input: "Morton Haypenny <>", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "", + }, + }, + "unclosedEmail": { + Input: "Morton Haypenny <mhaypenny@example.com", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + }, + "bogusEmail": { + Input: "Morton Haypenny <mhaypenny>", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny", + }, + }, + "bogusEmailWithWhitespace": { + Input: "Morton Haypenny < mhaypenny >", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny", + }, + }, + "missingEmail": { + Input: "Morton Haypenny", + Err: "invalid identity", + }, + "missingNameAndEmptyEmail": { + Input: "<>", + Err: "invalid identity", + }, + "empty": { + Input: "", + Err: "invalid identity", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + id, err := ParsePatchIdentity(test.Input) + if test.Err != nil { + assertError(t, test.Err, err, "parsing identity") + return + } + if err != nil { + t.Fatalf("unexpected error parsing identity: %v", err) + } + + if test.Output != id { + t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) + } + }) + } +} diff --git a/pkg/gitdiff/testdata/apply/bin.go b/pkg/gitdiff/testdata/apply/bin.go new file mode 100644 index 0000000..e34f06b --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin.go @@ -0,0 +1,124 @@ +//go:build ignore + +// bin.go is a helper CLI to manipulate binary diff data for testing purposes. +// It can decode patches generated by git using the standard parsing functions +// or it can encode binary data back into the format expected by Git. It +// operates on stdin writes results (possibly binary) to stdout. + +package main + +import ( + "bytes" + "compress/zlib" + "encoding/binary" + "flag" + "io/ioutil" + "log" + "os" + "strings" + + "github.com/bluekeyes/go-gitdiff/gitdiff" +) + +var ( + b85Powers = []uint32{52200625, 614125, 7225, 85, 1} + b85Alpha = []byte( + "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "!#$%&()*+-;<=>?@^_`{|}~", + ) +) + +var mode string + +func base85Encode(data []byte) []byte { + chunks, remaining := len(data)/4, len(data)%4 + if remaining > 0 { + data = append(data, make([]byte, 4-remaining)...) + chunks++ + } + + var n int + out := make([]byte, 5*chunks) + + for i := 0; i < len(data); i += 4 { + v := binary.BigEndian.Uint32(data[i : i+4]) + for j := 0; j < 5; j++ { + p := v / b85Powers[j] + out[n+j] = b85Alpha[p] + v -= b85Powers[j] * p + } + n += 5 + } + + return out +} + +func compress(data []byte) ([]byte, error) { + var b bytes.Buffer + w := zlib.NewWriter(&b) + + if _, err := w.Write(data); err != nil { + return nil, err + } + if err := w.Close(); err != nil { + return nil, err + } + + return b.Bytes(), nil +} + +func wrap(data []byte) string { + var s strings.Builder + for i := 0; i < len(data); i += 52 { + c := 52 + if c > len(data)-i { + c = len(data) - i + } + b := (c / 5) * 4 + + if b <= 26 { + s.WriteByte(byte('A' + b - 1)) + } else { + s.WriteByte(byte('a' + b - 27)) + } + s.Write(data[i : i+c]) + s.WriteByte('\n') + } + return s.String() +} + +func init() { + flag.StringVar(&mode, "mode", "parse", "operation mode, one of 'parse' or 'encode'") +} + +func main() { + flag.Parse() + + switch mode { + case "parse": + files, _, err := gitdiff.Parse(os.Stdin) + if err != nil { + log.Fatalf("failed to parse file: %v", err) + } + if len(files) != 1 { + log.Fatalf("patch contains more than one file: %d", len(files)) + } + if files[0].BinaryFragment == nil { + log.Fatalf("patch file does not contain a binary fragment") + } + os.Stdout.Write(files[0].BinaryFragment.Data) + + case "encode": + data, err := ioutil.ReadAll(os.Stdin) + if err != nil { + log.Fatalf("failed to read input: %v", err) + } + data, err = compress(data) + if err != nil { + log.Fatalf("failed to compress data: %v", err) + } + os.Stdout.WriteString(wrap(base85Encode(data))) + + default: + log.Fatalf("unknown mode: %s", mode) + } +} diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_delta_error.src b/pkg/gitdiff/testdata/apply/bin_fragment_delta_error.src Binary files differnew file mode 100644 index 0000000..d4edf89 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_delta_error.src diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_delta_error_dst_size.patch b/pkg/gitdiff/testdata/apply/bin_fragment_delta_error_dst_size.patch new file mode 100644 index 0000000..6d5bb42 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_delta_error_dst_size.patch @@ -0,0 +1,5 @@ +diff --git a/gitdiff/testdata/apply/bin_fragment_delta_error.src b/gitdiff/testdata/apply/bin_fragment_delta_error.src +GIT binary patch +delta 18 +fc${itY+{<=z`_4AtEhVK$zKyatN;N30RR6$D+j^= + diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_delta_error_incomplete_add.patch b/pkg/gitdiff/testdata/apply/bin_fragment_delta_error_incomplete_add.patch new file mode 100644 index 0000000..b8c1835 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_delta_error_incomplete_add.patch @@ -0,0 +1,5 @@ +diff --git a/gitdiff/testdata/apply/bin_fragment_delta_error.src b/gitdiff/testdata/apply/bin_fragment_delta_error.src +GIT binary patch +delta 11 +Xc${itY+{_?z`_4As|XMP0RR6K8UwQc + diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_delta_error_incomplete_copy.patch b/pkg/gitdiff/testdata/apply/bin_fragment_delta_error_incomplete_copy.patch new file mode 100644 index 0000000..8db8f84 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_delta_error_incomplete_copy.patch @@ -0,0 +1,5 @@ +diff --git a/gitdiff/testdata/apply/bin_fragment_delta_error.src b/gitdiff/testdata/apply/bin_fragment_delta_error.src +GIT binary patch +delta 17 +fc${itY+{_?z`_4AtEhVK$zKya00961|Nl5!2ZsOv + diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_delta_error_src_size.patch b/pkg/gitdiff/testdata/apply/bin_fragment_delta_error_src_size.patch new file mode 100644 index 0000000..29cb26b --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_delta_error_src_size.patch @@ -0,0 +1,5 @@ +diff --git a/gitdiff/testdata/apply/bin_fragment_delta_error.src b/gitdiff/testdata/apply/bin_fragment_delta_error.src +GIT binary patch +delta 18 +fc${itYGRz=z`_4AtEhVK$zKyatN;N30RR6$EeFB? + diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify.out b/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify.out Binary files differnew file mode 100644 index 0000000..f3386d1 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify.out diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify.patch b/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify.patch new file mode 100644 index 0000000..1801ef2 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify.patch @@ -0,0 +1,13 @@ +diff --git a/gitdiff/testdata/apply/bin_fragment_delta_modify.src b/gitdiff/testdata/apply/bin_fragment_delta_modify.src +GIT binary patch +delta 172 +zcmV;d08{^f2)qc8AP{I3VQ>J`s>wb0HU+h#6w8q?tUO~cHmDjZi2<8yZ9XmKhhMdo +zWu(4bg|8QwzZ|1e*rL4P#)`Fen<n~ik=E?$qG6?hzJ6$u{l5W#?uwHb0q6w)00000 +zlLZ3%0RfW%1N%UMJ{~Z~0@X${&1Kk#98tb3==a{J7A;`O`v&<T@514_mvMTz72b#n +atf$#NLoPbNe?RPFJVt1aCFGoQbiKD!OHgJ2 + +delta 112 +zcmV-$0FVE?2!IHXAP~DY<7&llQfwqYA%tL<sR@xVtUMD;+4ZG>XTQ5=J2y;^BfB}4 +zWkisH791|vOVl5e-@^VLX0s~Ky_UyN!3;CgPr>Edj0j+0gOSwSsFsr$0q6zUJph<q +SlLZ3%0XmZb1N#I__7UCuR5Dxu + diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify.src b/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify.src Binary files differnew file mode 100644 index 0000000..fb85478 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify.src diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify_large.out b/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify_large.out Binary files differnew file mode 100644 index 0000000..f0f7f14 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify_large.out diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify_large.patch b/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify_large.patch new file mode 100644 index 0000000..093936b --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify_large.patch @@ -0,0 +1,166 @@ +diff --git a/gitdiff/testdata/apply/bin_fragment_delta_modify_large.src b/gitdiff/testdata/apply/bin_fragment_delta_modify_large.src +GIT binary patch +delta 4145 +zcmV-15YF#_fCzwq2!NCU6n}UD@BJ(nwumRXTBWU&(r44U#UTR?Hg2j#E9tcD8>JCY +zqz<L)JrNXpEfw|b8Ts&UEkO5+T?jiM$x7@_P2&_*&`$=fcypBBj#$(pjJ9|+w!nF; +z*sgN!^;$QEf?3|e>)g`N&ViW9W6D7kX7GX{y{m=JvFi1-r`PEE?SE&^2StK}qz5hC +zom0ywdCWuNb#YXdd0%FMNHft!FTER$>uzu5gxdoGBy789raJBW7jAhN2TWFL{P%2l +z|AX{}Jz8U}Y*X|~=?4<;F4)94!-e?w)#D0h8n1_ORWNCOC&7=!U0MP3<v=_v9C5!i +z0Q+7#pUXRc?6%;cFn?F`n#;%_dGwni+^Ev;mF)3qj28^tfxPMqDV12HP;vJ_p9F#w +z{re$r!khX1Grt6PEg!>BI0~M)pZ-cf6aFkVFzO&JOkcv7FeEq|)DcBDAP&_&ZgBk* +zVp(I^5-bN3L{~g{bHnkWX%0Hj02~njkKX8Zz%Ih#=LBD%Pk%TkW4ize_HweE#@_-2 +zvpyE#e@^n#rRGx;O84LB3bMdrxdv%Gkc)ZQq%8pkxT9b*)}Z&t5bibZ0)8H8T33vN +zgTj)j_%wz13x+TZ6LgdupD^ke2!n7E-YZ%8n3OTzK5*T(BH>ltfU|QJ7VTAu<e}on +z$=JJjjciFFN`Fzsxf-MARHfr7D9c#x?{62mGRmgzrR~VD+Ec3xhX?ouWpC(VJF31{ +zSg`N(Bv@0v75=Jj5L%(L+)rQK7Yqo!ZI;@6S<+%;8n*To-C%h>r}ah?0sC%ZUY?<b +zv(0AQ|7cLp<eKKWxX)Y#(4*sOJ(f!elXc4>&xwkEUw@Y9LqO1~eF>>5cDlg_YxuF| +zZ58JVlu7IBfP1@TDmG;<{X>(&*Hy<j=OVk<Z{4Xw!q_uh3PF;mQ=%{@DP($Ym^O7u +z()CP&Lg$k9$86TRX~S*o)PMp4ltJ3>C-7zqRrY~#3W>hP8@a&jwYi_$k(j`D$Ta97 +z{(rL4B7Ysa#+5QC40p?w_V}r&G%{W$)8R1P`wet5K>`4D&EanFW{d2mOB<F_6i4{2 +zyvNjwqpz2xNywg~atQE~7i+4HXtp*BZ=iZbxqB9%ln@_KVIKuO(nLG^6V%DDaJYn* +z=!{Jo%`{rQ1*~_S3Xt4d{uL-EROqk|1qV~gXMd@K!*vHg@UsOgfcNC~T%s^yNa-mE +zWwvGu5f1Z&pe6o5bGG4F@r5(pTx6;)9Ly8(reL*<OVspK=%BY6Le~m;0dvr`0#=GZ +zlA<Vi?w~V5RN|C0iy|D|@2CGptYlBJMz31?1p!JP^z~%w&3`BBU&UCo@6HFkrK-3O +z^M6Vy;CO*>vm~hJviTXh@a)Qp^TpF9X}vEVN-{LA%5X<MH)9ZCktsL5)O9)!?Qy`S +zDd4*FDIj>p^*>%23c&{6HgPH4ahb@257yLNuno+(_9IJ!cgw97b*@K~X-9f8rdsd` +zOSj362)!mBsX%<QkLK_a4;5C`Hw&r*C4c$CJwCk3vNJG1z*ZUK_s-To!tlwRObW19 +zUka<y!0g)DN^Vs$_rw9OJP`AO*l3*zd7z+1UHYUg7uw9?U+{6;pts4(?Z!@3C6;>A +zQ*StV4WzVHG#+*IwPf7@BwNAUN{Tre0&7KHK8Gq6W2Jeqe%S(>+{1HM$cFn>J%6m& +z|8$kpBb=Dxh6$MwDCzl1g;Y`~f7F-wEny-#iI1X?l(D;Z$PLR3jay0@l4S5PR8fS< +zi+>Hek8mDV9ItDt3}cF;v-yN79ZUwT1^LyF1n$Cy%gt_C&r9r*A*)&$ZFl<eghAvH +zRkrj-L9@*P`M#b14DP@YWqKrGihs`_$9}?b&`hgRiub41qZ?7&Nh%}HpP3Tbttvv5 +zq=W1Nc?DQo<|}6{xbpW%2@<O6f0JU{G6JrwI_yfvDsdp$CDKVJe{UP|{F2$LMyq;S +z;6*a3u>HnVf=mg#Q%sK{dyD8$2z{ndd#G^c?_~0mb!M$7ZxU$373E=vV}GR5OrWDm +zBsRhxx$#Ql_U#b2h}uPg{qm53MhlGO!F^=m@8@Ie88^<EsYD1;{veTT&+b|bRnb#@ +z2Am8hM&Q?IXN2#P3~!urpHr8+JIMR`C~)B}()1`g{Nu|NSpJQdrZ#t5ZDCpLl)y5a +zI3es-`NqV4QL_t{&2)4pvwsR>`%?=q2+>^o91FPtaKgX{mUR_AEr9NwT^_^@8>UHx +z6Vql9ovz7$NpnPl5QlqsKP0|}EskXfTz-@ZpBlsI-~?B-4%w_GY9s9?ln=!A8wNMf +z=;eAIwp6&0nZ9Qq({C*2AXx<ZqTxd)R(xsn?F4+P$}#JS<u|x9n19vknU^?{8C$(2 +z{+;MK!HljtP!Bu31=DpsC-~V`aJ>KEGCjGa)2K^@ryP2!6dWiRy0M0vL*145f@BzR +z`IXs~3pGOq&;GIFDC<KN4u<vU;>=$JbDmK$C`A~nS$@*@hY$2e@kN0E;7jK@*TzW0 +z2FmwjqM0G-dfMDB3x5@pycb${TTJ7dzu+teFC9mUiSr(jrvQ~;sj`>uoen9{iXP5; +zbF9`-K065Zx&Tv2gK4wf8(U++M31oGlTiV$(^)qux9<b>7HP~_mSNPtqZA#7xOo;Q +zc-B?N(jYE1ix_L{M|cpCS4s5I3Tx_Hh1#!O-iBP5iS>iM{C~ZUez^pglqH5IAdOAD +zE;AjwjL)_IsQU6y4t4Uc9KX<!`vlPxjFRGAG1tyHS7KLyRQx=Xb|!%gliE%^%h3dy +zy_B7SK7#Pnxw+(BwLgXV=mq;Xy%bzqFkM7}Ja4VdI`gUr`jY+&xsnc80&0~P<68@c +z{u<TCS!o65e}CwEOz7FcRp}Klf4OpWu~<6nq0Bfg&arHyU8C4!6fwm-i*mmh?)6iz +zKf_(OO)Qy-luDqjA>l~!BYeA;B8*wK)|cg^tiD0aN2iWzR>YnUUS9?3jKhiAKpgQ0 +zCRNTKwkeO~{pU<lXv(vu7Dx<e`Gj7IBkT6lpsco*Mt@D}KlB*lL|<QAS}Q%n%XFvg +zH*NegM<bJ@$syCdbs<RdsmXzbAZE0`((<O&d(r5^{{vn>2|S8)UYp21g#5$lGV|mN +zdQNSkH%R6}>C_cemrZCjR|-)D<8Y^P74a}>FKs&95{8d9SiTvhB4{eTUGFbTa-IbX +z8Lw1xoPPp_ZdIbl#dCzP1`N@V?eb7SizWtmm5ujG(rKZHZw$>Ozzf4E=Yf*+eH}kn +zL{&n><${iESA&t~5pz;&vs%HlLYxXMjPlLH{?*-#T4#$(P;pke+8K*f)n*P<u~mu| +zeR&;)e}Xp;P-smwSxfyE7_JA!0QRkDN~OJm4}W0Lw($YeyX)3OOY&fT$MrlC=TkO7 +zW{~(xl%jTs^n3F;k}Vc^eq7RCg8GuW-Aviu+r;$?G9+>h>SB9|8)|f;w=%OP6`nms +zdFVyjJf#&^=o+P_Y1Q{$$Aql8VqX+>=4)lx*&wB#{$tlb&SwC3E~G-pKOFXRQrRSk +zMt}Fa|4IQc;^N}stu@1BVjk>`8grvj#WVz9k#>7N?#JQZdag9deih!2aWVB}1klbo +zXKhuKy*h7m?``T}aM*}PexaGXqeZCm>ME7BQi64D>9lL0AoF2`Tzo^hNSMC9B=jT@ +zjA%viojl=gm6?1mPDg-)fy98Cb}Bdio_{dD+i;%nIJ6ZfQ^LPvl@~R#wk@K-##0ma +zj|D(^Fr8U(o(FRy$Z0FGyp@<r=6Jk;ZS~i6PLo%BfGyU%%&PK=Cd{mo$^aBm71sz9 +z!b|Dl3+8A-e+{qm)PHbh4jn6it1^fhu{8?r62V15rUxqn)Y2|@zH!vqg?{4<et$(K +zA?oQG3Vt}Nao4K3@C~cP)!cmK<q0(7iDfh<(I{qwIq$s=;ms)qU50x1eBN}%Cl9o- +zwRUox>Yam&`H8oVv>6n8UKEVec>#*NVp)YrdOPgay4SxQ!PjT#gf^PISz?!W;(=9W +zC3sT|H>d}ErTr;c8LY_P{IPZ;Wq&_a4mM`d<q|?_Y3|+P2w!A;3~GDKi<NZwt81V{ +z#<XjJk{Nh~dU&Bc4nc;%e$m9B1SEbX2GL`Mh%4|dmPXqAo_SnHQ~ydAo2pWNJNr#r +z&qMJ7Rm4cAa{Z?MBu-{<c!m#F`J8~VL}izx&JjpCwXp6YqQKH=rH`ccpMPM){}>Lu +zn^@7N(YC~q-qV~usbu?>{mD#~LNHhWdnNcaB&J>J@M)=+i^{*SwmS-XE-ywoYoZ3U +zdb4|M(!Ok$LY!K2m*Wjy>w_)zTm#k_g&J$V<PC_-!ZtfnOT~DwbQAMPmaiNnPYa}E +zC0qUqV#JtW?*@6Xi2qh=#(y1ht<mz1hk2_-BW3zWZQOn&BF-DJNiVJ^UX?IbdyF-0 +zmYg7{z)#}hLcrrTmcVBJ-7O#q)Ue|c4_m)wviwN|OXYK1i@&98_P^=u3fJGVWKtPj +zLs?WVkSk!O^@K;NKZL<c0zjFD)s9i7FY7DW#K^@KP^{b~G3k3kS$~nO@HF(x9_=3o +z>Cb%~Wdvc^;lfge`vp1dg{O^Aqhg$$l5i=+C+Ser`<Wg)9QRGDYvyN4sSVl`6z?{B +z2h<>KQQfEhZr}z4>;>#SGZZ*qGHd~H-az)BPnlSx{cgt);r6Mgt;@-+9-E2t@FXz( +zkh$)!g3>JMRK2Fj;(t$e?z7fJ;>m@#_j;z{x_|Hzf&NnB2s)A$gs4~;$GCPhUTy3P +z%V#3D9@E1QNOb5UL}Ny_3_nU3V+}u&a}K`l<S=I_q0bw!SVz^BZHUO(TE4MzZG&hv +z)+F*X8}QBDV_CEe<6f<02^%m7*+MEwvjSRh#odK&cU%lpy?^`{7NcR^sU%E*+(Vq| +zF~fBmxj2s}!*-mJ%K;2$0V^tq99=5EopA;?6BncK8H3Nu^jJ;;Kab!7;sYgKSZ@Gt +zKv8=%d_ID#m7X)W2w?-i6t|7q0>A2AVv-@33!d)C9C2b%Dl#4Wp`G3Y#VXS2vODOK +zH6Ik?Qx|Shj(@mOGi-V8X@6z3u=_{}wqW)r%+?8?joDa^!2`v;{CIT;J4sydrLxNW +zgUBvqK&gHOJyAP#hP2e3p;XKF@8~4;2})IVed0tk&>;1X$v5<9B1$g>xX)iE=u<cm +zo?`eC3Yy%Ey0VMh!y;fRqb!;mk=8A@RDT10v?wO&6Mu~_WsIoxKM+xi---tWuims! +zj=~aAegI;IcBVmAc9q7(=cq;}OnHg8)o<i1`)eG>55Vhuq!Be_Zh7H_L0ojmJ-L7F +zFE;pV4HH_WmO_~*TJ#EHu@A`M)iV-3k0!7^nIQ%08mJA-{<~wkIj0>_AP%Vz_4#<* +vVkRz)@E~<X?BYZ$D8XaYxPKYw=O=*!-kqz(*(SYUmC&YW)6KUO5CQD(E<g{{ + +delta 4145 +zcmV-15YF#_fCzwq2!NCU6n~B!vvqUuE-w#_b4*snf+s0CI;4#C=?y1M+-)DLw@)?B +zKRDATsFIhqobL+;`&`GcXOdshu1FiN*-EB(T2+t?#Ao(J-&3E)JS*>iIs*PG)1#O3 +z^y`3>TH+sKUrDep!F5A*^DLBShsp6!+cwX9)&)8gv+|D%3&Z;I1%KSpdiN0>xk9!c +zDy`)OH&-h44JMjA=aom~2^DT%XWWk@BGsF>T2qX9tzqChzyU07Q5O!7pHe8DLN|bp +zdlTM)yHOJqRKS{Ok~8#6aa|1~wdC!f6HE-YeYg|0WDJ@d5_mH_Xk7QkFCJ);Ovbo( +zM*o?Jc;mjWG9!T<mVad4wt<-<5ODj-OXdT_RfJdQ;iI=U{TFC^BFGP!3U#-yT9SEa +zS>2#lADbM8kdZGB*L;W@^4gw!CX?raCf7DLTZhN{Ky@jK1r2x?YmoKqE|=?ny{_>M +zPW`hAEW;McTgizc-%KZOZ(LC{iJ26YFd!W@TJ{Cm#XaWxI)6ed<E%fKe|ZJRo1pBb +z=tzd?y3cy;q1x!k>CQgN9H-J&#etfq2BL8u2mpTqV;-qg8O*hlGo~w*^QU&D8~wh* +zjRoXGhq}{c%8{%P7XC3-Gtv_4vPhC&G({}4=soxR<|OO8=U32O6B6X4cu}vawOLA8 +zUCsnRtC7@}bbt5OjoNEaRt9SW579M@O#ke{I?9P!pHGx8+mS!>XB(pRy3MCefz~7F +zw>%NX4brs~1S)zzfOF(+Ek>{e>P?F^u#}8;o=P;Q#CD^EyGpfE6zi<UUIXy1e8fo^ +zsdeU;LY<OP*5)-5?w2k(W2c*(X++`^o;W-0Yh}DpOn)yEHK;aai@&7d#o!_;cq*A7 +zJ575q9cPe?a0DRDuDa})nta5;k)5jvi}^1YR@iI=4GvRS=>sF&7Wp0@PM#uQ8l(zm +zP$=UTG!hf$iSFO~fxdOLf78q%Z-e$nFbWUzJ1GphTCI4>g(9IgBR5F8ark@QBWdEc +z#zwo4Yk#W3G2@1tNzg@lLP}&U1pA#O7MZO99tC&}9^@dg>AOG<CIRyH1czdV;c;~E +zhJVkbt|oi<j<h>&wsG)`L6)m8D^F-bi%B9Fk!}EJ;7-NTD*1)VJblm%?L@A$yiNR_ +zN*tPjf$n}UNI)oYV<>TQzBo6=Js6=KI%9xd0e}BZ7R)GU!se_DK2rXrAU6Li*}JNk +z#OhSK^H=?bSi&TwvW+tB(6HGW@aslSEso+M=;CSx(a|*2cpNSek=YCt0xna?)V|}{ +zTrlxT(K!Y~T&oi%3ZE#@!{L<{lsr)-9)*xnPKtF8N_ai2o#MZ0IBC!g<B>L$Qsp6Z +z8Gn6s&gnB+R1tMMtKG)p(&tf2mQR=p-i$vp0Miv<=k6$|Zzncr&mA(7R@^rLWDs=I +zYQH*agSALP4X>gKE|9hMvV&2kw^AvZ!faW}gmNmbaQYn9l7n@!u{?2sM8nv;Tmv{O +zBGR5{oW=M)QBwCHaH^{#iZy8#xtUkH+J6it`5)T}v?(oj_fcQINt+8O%OF`^vYP<? +zs1Kr&3Fao?KvF*pHOm>4)mg#jyW+dip{0)NO!-BTlR_?Vq;{U272sSWrCqIVm8cQL +zrQmZDOJ2`L8Ma$4xNjy5tYTIfMP$0Cem%%TLvRcw`DGc<Hd1&t>D@_|S%DDQ4u3UK +z8OEd2toTA8!T^Fm!2&_<VWjqtSu0kZiSKAYMA?^gk{;m)aUvwgHI$q@^(HdhX?Kv8 +zc8~@b%=lgG;`(fOf}`Zcy1k%{A{i0@HLU5mvE3;pqd{HWr8>{Jf5ZMEi=opVlV!Ik +z<##3|qPvm^@~emp!AcWqZnh=Kjei*4uG>XNb8+}}YQ^1W{{z9%$|}v5_lcs><zZ=N +z6s2OwqF>dBQ)!1^xT1u+<cUIm_aOVF7EsUkN+yP_2uC%zYR*XE(M@wHIHnb{*zn2t +zH5g`W(anqqYEp&L^>L5lB_@cL=1D~x<L_Pa3J2%c9!b1L{pP}aU0R2#%YTo~>9Wuz +zxNt@}3^A_ZoI>X}!Mx*>E&MhCXYogWFhGnpZyeU<Ef+d*FVUvINA*MDB92j7+{COY +z@MS1RrMTd9tIF_&mK~lp;?Jn-A_9=jfVIgysB}3f9v}Oj7KNFs#z^BPJk@BLDCQYK +z|9#~s7x(-alLl&Y!#9ydwSUa%!<a75-Hg<qsOZseWTOXiA`NJ51e#Dv+sPUs5o8kC +z6N;-`^U8M~`hSO1#FVjgl|t{Kn_#>!If!?eQb$yAbIIg><^*Non_1wm3-B0LADTnL +z@Hcy)r!Y%8x7clR<ZYNc-M@D3zaS5*Td^+RS3<#y2y@(L#7J`#1%Jr=sM+-j{QNaP +zI8^b+V<BDjMy*4|^;5}Z_32Qmtq&-Kw%{EZ{~*sLraA2U79G6Gr-+>E<adM@92nTA +zo6YzLl!zoy|MvD5oFM0@)On8)%ZOit`Uio7OJI$yC(5Z=U)StSZ-#fFhmac9=Jjb1 +zgE%nl>|bsgq9dRISAT07rTRBH&nUUbh`5p}Mb34r|9T->C}v<iG>-0X5Ec}{F9eQ~ +zhpvHGyil+IHRzd!=^rDs{FlTe*YP3?X>tO`A|pa&VSnR#Teu~TOBJYPw~MpSAuLU& +zyl44lEnDGx9xOCB98{3a*(-><Fn>ajsd_h%GmzCJno?Q&Pk&<<UaQ%9pMs#de9EPJ +zx&P^*=qnoP>XzvfiOofjV7KjkT8RO)tQL0)FS;%Xt5w2$x+^;B_k^yfQRtHC!ndJ& +z&$LQY(4EC(dem!kO3UB}z~JrL$`X<y!c1c5%jlB}>9Qg4CLqd7*@Hn2yxVThL!~BW +za6(zTA3VB62Y($=87Nd(yuJH&{z_$9;Eo)VO3Rv;-+gC34F02W_29+|Br6D$(fQ&u +z--#+MO#x4xiRrjERDA?xBfD@AaPlspDCD~m)QT=JS6a}X`|BZPaBeJ2pzNE=Uv-7K +zn1=v3M4?6-!r&5zU^ey=zQ|mJwNTM3ynPRaeEP2%Ab&NAz23L!<tj7>+3J78>DNz@ +zcRGVQ{T78^a%8e;7hmY!<1!>F%P3kh+(XpJvYZi2+_KJ{TpjXm2?SpLtC$kZ^u(dN +zk6GUPl;ciEb+#wFOVmH>IJ*$ACbE1kae%lKRxO7TlRgGD&%hdcWBJ_hl&X@eH`oP+ +zE<RrBK7SH4^XD?LWf6lHn2$j_p;A~7Zf%6lfm`>E(=8aL=VamW$>w$<-v#=-Gn(S+ +z3;|DiLStTzl)8WRlm|&b);zFN)QobrszD9rwkLNNY*<QlFS~r~$VJ*pC%cv#P1#!8 +zf90BhxfTB1`>MK+{OAn&2FY03tEpf5Jpj54Gk>^{cy&yUeV3R*pRs^>fqgveU|_5X +zX+e%27qs2?7c(6BMHwnKprk+Z!)v4I=V4BAvdR#Lu7_d4gJ5#jsXEF%=!5*qv7*_k +zM%5WM$*Vou!Y<SK8gABjuPq(^-aTcob|FwpG8oF-cvhY)!xbJxsl9(LHA}$~+e}g^ +z?tkqDHa-)6V6AhB(WLtjSxh*sU80Puaq7Tk7-k#xJ?m#;4p*&yNI-c{{>7D@*jCa6 +zYHwni68{B(ythcY+j4tKR&BK~wpq+p_I^Ee5*>UjeSbn=Z@M{3)IDB1h`wTBv8Vf) +zGUabKI^vx;3Bt3)O=|zXZF^CRCWGFRoqrAlxT0y&$j5q6SnOOU5o0?&Ps&D6Ph}`# +zwY!d(eP-j3KN_5`jU!b3rM~A)_kI~RXoFy;RSw!9pX9GG#B*F)`BST~98N+GqC9J7 +zyK!;S%6g@oNzmD<anC%Z(U5n}^w3WG-g$dA(D!KdikqKoTHqzsy<DSoUy^Fe*?+z= +zBeLyHX7lnaCEr>1wzp`{WZzW@_X`T-&+vR;$j+YidW6`>)~^xl)>6cuUrT~*Lu`sa +ztO}<9;#H(Q==8o5Tl}2UX=uGs%2mo19NqD{5U5NNDyo1Rz+mR`!bH$50%+yM7He^f +zdBH00TyclwDiI=}Q>9_P*rcl2Xnz%4ZX#_#&l$m~RIyIW^MA=hnUPFr%4bUq`Q(Rz +zpGTKslcC%{fbRe!3oa<}4EKVhQ%zS`J2GeEVr+blZ0VAxrNnYnS|C25Uov}{GHoRv +zfFHfa&J<g>wlv(G^#}8A+}RLJN4n=M4UA|@AT(`^>Fu_AKhK^yT<NqVv46*)z6y$G +z9qA@LU+;U>$XPA9`v4eB_FD$ie0HD3Mi4;`s$hx2m9!(i8H!RzGg?C)?9Pb`H(UMj +z<8h|0U)ScABl{{?sOhBJJ+yFPUOt7$z&nj*C(+UXB1_UZbG@s_K&)51_vPKxQ4~mI +zVsBl7$^wYH(tS=4B*eHGsefF8JLWa`(eLnE!hn1l1gzVr56RAj0G_?aY8PsdnewAx +zZT0?fpiS{s0Aw+t2$UUQ8M-rW={ko_;sJpjJ1cye3c$(VR4{`2w#4pbFy+`(TlH;+ +zsRJ^EQ{%@!-i8GRBqYoy6jIyiS8YC#b<%d)^wC2C>^bvFCR##kmw%eyk5ClDOBLPk +z?ZY@ePP8_qq>U*{?_kesZmky4-_cLmu?|B2);KDCQ?spBIJE?6j8`9&{>UoVIRUPw +zrx_TuibGY$dcSSH{d>KOqk_s$i=X_v>|dz1MMzQ0CJX+$*Y(PaiAhNRl8^?%L6tN! +zoCVZZRG(i1T9lcmLVv7ZE)U68M1}eqD2QeQo3F+z`_kj^3BAQ~kifx^iJVoht|)uQ +zp@Zi4b(sUN|HikW5)IF|`VA(5R}Oig)4g3o@Edr6G~IUUj9NaSza}p$jk_pOfzO2r +z`Uq<(Z;%5A7DwJe@~J%KO>wC&&NuCjN8iAHC?0eTu|CfS+kf-2P)BX2mNxYJOt)<# +zud7R9Is%@q9H$%no<iF0Mg!vUaKIi_mjT=-&`OK1D?5##b<_J+ypuBhJ6(w#3HQm% +zv(vmcXqh?CaDq*qm>|irWi2&HJ00Qekek!<&=4kke7cKnH-x$$Mg_!j+R4)~S@4-7 +zr5;gyzbMDhd4E#B$j(G@-z05T!$YZ=j)t^KKgHs!H_s_|8VC%~m+HAY-YK*Nx44gH +zaUG54Wez*M!YaxNS#)BjN)7?2M!A3NOY99Oa}Rb;C#RPNionqlK3@N0PTt)<OoHh@ +zB}w)|mquN2{wE_&PLflsn+VmFcMpw|)iba8d7J0}`G17K*U(SrP0j#OvO@*N!OT-# +zzEpKkNa8QpZIp2k)(rxnM^3t+Fh8Z;of0T$R8iTkT|KuYhYLTpiVpIpFQn$^5dkWB +zH>Op#&Rj>ipw!qDZki{+-<T93w~SVrQN&dbS7P#JZ8VZeO?d+fmp|Y6KSr1kp+tQm +vo8tug5g^N;cu0IZnx8~7m2vIJqA6RF`a~W~SpmK&%Tz&0WAC>V5CQD(Yj^-6 + diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify_large.src b/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify_large.src Binary files differnew file mode 100644 index 0000000..90a9f6d --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_delta_modify_large.src diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_literal_create.out b/pkg/gitdiff/testdata/apply/bin_fragment_literal_create.out Binary files differnew file mode 100644 index 0000000..c62cba2 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_literal_create.out diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_literal_create.patch b/pkg/gitdiff/testdata/apply/bin_fragment_literal_create.patch new file mode 100644 index 0000000..b3b846a --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_literal_create.patch @@ -0,0 +1,8 @@ +diff --git a/gitdiff/testdata/apply/bin_fragment_literal_create.src b/gitdiff/testdata/apply/bin_fragment_literal_create.src +GIT binary patch +literal 32 +ocmZQzU`lR_IpTEv<rOIzfy0`|3>L*`JUiBtu5<US@-j_*0M$he#sB~S + +literal 0 +HcmV?d00001 + diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_literal_create.src b/pkg/gitdiff/testdata/apply/bin_fragment_literal_create.src new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_literal_create.src diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_literal_modify.out b/pkg/gitdiff/testdata/apply/bin_fragment_literal_modify.out Binary files differnew file mode 100644 index 0000000..0bf736e --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_literal_modify.out diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_literal_modify.patch b/pkg/gitdiff/testdata/apply/bin_fragment_literal_modify.patch new file mode 100644 index 0000000..c3da024 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_literal_modify.patch @@ -0,0 +1,8 @@ +diff --git a/gitdiff/testdata/apply/bin_fragment_literal_modify.src b/gitdiff/testdata/apply/bin_fragment_literal_modify.src +GIT binary patch +literal 32 +ocmZQzU`lR_IpTEv<rOIzfy0_p?@r&O@$6Vny3XCR%F8tM0pJ`CjQ{`u + +literal 32 +ocmZQzU`lR_IpTEv<rOIzfy0`|3>L*`JUiBtu5<US@-j_*0M$he#sB~S + diff --git a/pkg/gitdiff/testdata/apply/bin_fragment_literal_modify.src b/pkg/gitdiff/testdata/apply/bin_fragment_literal_modify.src Binary files differnew file mode 100644 index 0000000..c62cba2 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/bin_fragment_literal_modify.src diff --git a/pkg/gitdiff/testdata/apply/file_bin_modify.out b/pkg/gitdiff/testdata/apply/file_bin_modify.out Binary files differnew file mode 100644 index 0000000..f3386d1 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/file_bin_modify.out diff --git a/pkg/gitdiff/testdata/apply/file_bin_modify.patch b/pkg/gitdiff/testdata/apply/file_bin_modify.patch new file mode 100644 index 0000000..af38f36 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/file_bin_modify.patch @@ -0,0 +1,13 @@ +diff --git a/gitdiff/testdata/apply/file_bin_modify.src b/gitdiff/testdata/apply/file_bin_modify.src +GIT binary patch +delta 172 +zcmV;d08{^f2)qc8AP{I3VQ>J`s>wb0HU+h#6w8q?tUO~cHmDjZi2<8yZ9XmKhhMdo +zWu(4bg|8QwzZ|1e*rL4P#)`Fen<n~ik=E?$qG6?hzJ6$u{l5W#?uwHb0q6w)00000 +zlLZ3%0RfW%1N%UMJ{~Z~0@X${&1Kk#98tb3==a{J7A;`O`v&<T@514_mvMTz72b#n +atf$#NLoPbNe?RPFJVt1aCFGoQbiKD!OHgJ2 + +delta 112 +zcmV-$0FVE?2!IHXAP~DY<7&llQfwqYA%tL<sR@xVtUMD;+4ZG>XTQ5=J2y;^BfB}4 +zWkisH791|vOVl5e-@^VLX0s~Ky_UyN!3;CgPr>Edj0j+0gOSwSsFsr$0q6zUJph<q +SlLZ3%0XmZb1N#I__7UCuR5Dxu + diff --git a/pkg/gitdiff/testdata/apply/file_bin_modify.src b/pkg/gitdiff/testdata/apply/file_bin_modify.src Binary files differnew file mode 100644 index 0000000..fb85478 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/file_bin_modify.src diff --git a/pkg/gitdiff/testdata/apply/file_mode_change.out b/pkg/gitdiff/testdata/apply/file_mode_change.out new file mode 100644 index 0000000..ec3b7fd --- /dev/null +++ b/pkg/gitdiff/testdata/apply/file_mode_change.out @@ -0,0 +1,2 @@ +#!/bin/bash +echo "this file is executable" diff --git a/pkg/gitdiff/testdata/apply/file_mode_change.patch b/pkg/gitdiff/testdata/apply/file_mode_change.patch new file mode 100644 index 0000000..b1f6859 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/file_mode_change.patch @@ -0,0 +1,3 @@ +diff --git a/gitdiff/testdata/apply/file_mode_change.src b/gitdiff/testdata/apply/file_mode_change.src +old mode 100644 +new mode 100755 diff --git a/pkg/gitdiff/testdata/apply/file_mode_change.src b/pkg/gitdiff/testdata/apply/file_mode_change.src new file mode 100644 index 0000000..ec3b7fd --- /dev/null +++ b/pkg/gitdiff/testdata/apply/file_mode_change.src @@ -0,0 +1,2 @@ +#!/bin/bash +echo "this file is executable" diff --git a/pkg/gitdiff/testdata/apply/file_text.src b/pkg/gitdiff/testdata/apply/file_text.src new file mode 100644 index 0000000..3805ad4 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/file_text.src @@ -0,0 +1,200 @@ +this is line 1 +this is line 2 +this is line 3 +this is line 4 +this is line 5 +this is line 6 +this is line 7 +this is line 8 +this is line 9 +this is line 10 +this is line 11 +this is line 12 +this is line 13 +this is line 14 +this is line 15 +this is line 16 +this is line 17 +this is line 18 +this is line 19 +this is line 20 +this is line 21 +this is line 22 +this is line 23 +this is line 24 +this is line 25 +this is line 26 +this is line 27 +this is line 28 +this is line 29 +this is line 30 +this is line 31 +this is line 32 +this is line 33 +this is line 34 +this is line 35 +this is line 36 +this is line 37 +this is line 38 +this is line 39 +this is line 40 +this is line 41 +this is line 42 +this is line 43 +this is line 44 +this is line 45 +this is line 46 +this is line 47 +this is line 48 +this is line 49 +this is line 50 +this is line 51 +this is line 52 +this is line 53 +this is line 54 +this is line 55 +this is line 56 +this is line 57 +this is line 58 +this is line 59 +this is line 60 +this is line 61 +this is line 62 +this is line 63 +this is line 64 +this is line 65 +this is line 66 +this is line 67 +this is line 68 +this is line 69 +this is line 70 +this is line 71 +this is line 72 +this is line 73 +this is line 74 +this is line 75 +this is line 76 +this is line 77 +this is line 78 +this is line 79 +this is line 80 +this is line 81 +this is line 82 +this is line 83 +this is line 84 +this is line 85 +this is line 86 +this is line 87 +this is line 88 +this is line 89 +this is line 90 +this is line 91 +this is line 92 +this is line 93 +this is line 94 +this is line 95 +this is line 96 +this is line 97 +this is line 98 +this is line 99 +this is line 100 +this is line 101 +this is line 102 +this is line 103 +this is line 104 +this is line 105 +this is line 106 +this is line 107 +this is line 108 +this is line 109 +this is line 110 +this is line 111 +this is line 112 +this is line 113 +this is line 114 +this is line 115 +this is line 116 +this is line 117 +this is line 118 +this is line 119 +this is line 120 +this is line 121 +this is line 122 +this is line 123 +this is line 124 +this is line 125 +this is line 126 +this is line 127 +this is line 128 +this is line 129 +this is line 130 +this is line 131 +this is line 132 +this is line 133 +this is line 134 +this is line 135 +this is line 136 +this is line 137 +this is line 138 +this is line 139 +this is line 140 +this is line 141 +this is line 142 +this is line 143 +this is line 144 +this is line 145 +this is line 146 +this is line 147 +this is line 148 +this is line 149 +this is line 150 +this is line 151 +this is line 152 +this is line 153 +this is line 154 +this is line 155 +this is line 156 +this is line 157 +this is line 158 +this is line 159 +this is line 160 +this is line 161 +this is line 162 +this is line 163 +this is line 164 +this is line 165 +this is line 166 +this is line 167 +this is line 168 +this is line 169 +this is line 170 +this is line 171 +this is line 172 +this is line 173 +this is line 174 +this is line 175 +this is line 176 +this is line 177 +this is line 178 +this is line 179 +this is line 180 +this is line 181 +this is line 182 +this is line 183 +this is line 184 +this is line 185 +this is line 186 +this is line 187 +this is line 188 +this is line 189 +this is line 190 +this is line 191 +this is line 192 +this is line 193 +this is line 194 +this is line 195 +this is line 196 +this is line 197 +this is line 198 +this is line 199 +this is line 200 diff --git a/pkg/gitdiff/testdata/apply/file_text_delete.out b/pkg/gitdiff/testdata/apply/file_text_delete.out new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/file_text_delete.out diff --git a/pkg/gitdiff/testdata/apply/file_text_delete.patch b/pkg/gitdiff/testdata/apply/file_text_delete.patch new file mode 100644 index 0000000..9ac710b --- /dev/null +++ b/pkg/gitdiff/testdata/apply/file_text_delete.patch @@ -0,0 +1,206 @@ +diff --git a/gitdiff/testdata/apply/file_text.src.src b/gitdiff/testdata/apply/file_text.src +deleted file mode 100644 +index 3805ad4..0000000 +--- a/gitdiff/testdata/apply/file_text.src.src ++++ /dev/null +@@ -1,200 +0,0 @@ +-this is line 1 +-this is line 2 +-this is line 3 +-this is line 4 +-this is line 5 +-this is line 6 +-this is line 7 +-this is line 8 +-this is line 9 +-this is line 10 +-this is line 11 +-this is line 12 +-this is line 13 +-this is line 14 +-this is line 15 +-this is line 16 +-this is line 17 +-this is line 18 +-this is line 19 +-this is line 20 +-this is line 21 +-this is line 22 +-this is line 23 +-this is line 24 +-this is line 25 +-this is line 26 +-this is line 27 +-this is line 28 +-this is line 29 +-this is line 30 +-this is line 31 +-this is line 32 +-this is line 33 +-this is line 34 +-this is line 35 +-this is line 36 +-this is line 37 +-this is line 38 +-this is line 39 +-this is line 40 +-this is line 41 +-this is line 42 +-this is line 43 +-this is line 44 +-this is line 45 +-this is line 46 +-this is line 47 +-this is line 48 +-this is line 49 +-this is line 50 +-this is line 51 +-this is line 52 +-this is line 53 +-this is line 54 +-this is line 55 +-this is line 56 +-this is line 57 +-this is line 58 +-this is line 59 +-this is line 60 +-this is line 61 +-this is line 62 +-this is line 63 +-this is line 64 +-this is line 65 +-this is line 66 +-this is line 67 +-this is line 68 +-this is line 69 +-this is line 70 +-this is line 71 +-this is line 72 +-this is line 73 +-this is line 74 +-this is line 75 +-this is line 76 +-this is line 77 +-this is line 78 +-this is line 79 +-this is line 80 +-this is line 81 +-this is line 82 +-this is line 83 +-this is line 84 +-this is line 85 +-this is line 86 +-this is line 87 +-this is line 88 +-this is line 89 +-this is line 90 +-this is line 91 +-this is line 92 +-this is line 93 +-this is line 94 +-this is line 95 +-this is line 96 +-this is line 97 +-this is line 98 +-this is line 99 +-this is line 100 +-this is line 101 +-this is line 102 +-this is line 103 +-this is line 104 +-this is line 105 +-this is line 106 +-this is line 107 +-this is line 108 +-this is line 109 +-this is line 110 +-this is line 111 +-this is line 112 +-this is line 113 +-this is line 114 +-this is line 115 +-this is line 116 +-this is line 117 +-this is line 118 +-this is line 119 +-this is line 120 +-this is line 121 +-this is line 122 +-this is line 123 +-this is line 124 +-this is line 125 +-this is line 126 +-this is line 127 +-this is line 128 +-this is line 129 +-this is line 130 +-this is line 131 +-this is line 132 +-this is line 133 +-this is line 134 +-this is line 135 +-this is line 136 +-this is line 137 +-this is line 138 +-this is line 139 +-this is line 140 +-this is line 141 +-this is line 142 +-this is line 143 +-this is line 144 +-this is line 145 +-this is line 146 +-this is line 147 +-this is line 148 +-this is line 149 +-this is line 150 +-this is line 151 +-this is line 152 +-this is line 153 +-this is line 154 +-this is line 155 +-this is line 156 +-this is line 157 +-this is line 158 +-this is line 159 +-this is line 160 +-this is line 161 +-this is line 162 +-this is line 163 +-this is line 164 +-this is line 165 +-this is line 166 +-this is line 167 +-this is line 168 +-this is line 169 +-this is line 170 +-this is line 171 +-this is line 172 +-this is line 173 +-this is line 174 +-this is line 175 +-this is line 176 +-this is line 177 +-this is line 178 +-this is line 179 +-this is line 180 +-this is line 181 +-this is line 182 +-this is line 183 +-this is line 184 +-this is line 185 +-this is line 186 +-this is line 187 +-this is line 188 +-this is line 189 +-this is line 190 +-this is line 191 +-this is line 192 +-this is line 193 +-this is line 194 +-this is line 195 +-this is line 196 +-this is line 197 +-this is line 198 +-this is line 199 +-this is line 200 diff --git a/pkg/gitdiff/testdata/apply/file_text_error_partial_delete.patch b/pkg/gitdiff/testdata/apply/file_text_error_partial_delete.patch new file mode 100644 index 0000000..01d2a6f --- /dev/null +++ b/pkg/gitdiff/testdata/apply/file_text_error_partial_delete.patch @@ -0,0 +1,106 @@ +diff --git a/gitdiff/testdata/apply/file_text.src.src b/gitdiff/testdata/apply/file_text.src +deleted file mode 100644 +index 3805ad4..0000000 +--- a/gitdiff/testdata/apply/file_text.src.src ++++ /dev/null +@@ -1,100 +0,0 @@ +-this is line 1 +-this is line 2 +-this is line 3 +-this is line 4 +-this is line 5 +-this is line 6 +-this is line 7 +-this is line 8 +-this is line 9 +-this is line 10 +-this is line 11 +-this is line 12 +-this is line 13 +-this is line 14 +-this is line 15 +-this is line 16 +-this is line 17 +-this is line 18 +-this is line 19 +-this is line 20 +-this is line 21 +-this is line 22 +-this is line 23 +-this is line 24 +-this is line 25 +-this is line 26 +-this is line 27 +-this is line 28 +-this is line 29 +-this is line 30 +-this is line 31 +-this is line 32 +-this is line 33 +-this is line 34 +-this is line 35 +-this is line 36 +-this is line 37 +-this is line 38 +-this is line 39 +-this is line 40 +-this is line 41 +-this is line 42 +-this is line 43 +-this is line 44 +-this is line 45 +-this is line 46 +-this is line 47 +-this is line 48 +-this is line 49 +-this is line 50 +-this is line 51 +-this is line 52 +-this is line 53 +-this is line 54 +-this is line 55 +-this is line 56 +-this is line 57 +-this is line 58 +-this is line 59 +-this is line 60 +-this is line 61 +-this is line 62 +-this is line 63 +-this is line 64 +-this is line 65 +-this is line 66 +-this is line 67 +-this is line 68 +-this is line 69 +-this is line 70 +-this is line 71 +-this is line 72 +-this is line 73 +-this is line 74 +-this is line 75 +-this is line 76 +-this is line 77 +-this is line 78 +-this is line 79 +-this is line 80 +-this is line 81 +-this is line 82 +-this is line 83 +-this is line 84 +-this is line 85 +-this is line 86 +-this is line 87 +-this is line 88 +-this is line 89 +-this is line 90 +-this is line 91 +-this is line 92 +-this is line 93 +-this is line 94 +-this is line 95 +-this is line 96 +-this is line 97 +-this is line 98 +-this is line 99 +-this is line 100 diff --git a/pkg/gitdiff/testdata/apply/file_text_modify.out b/pkg/gitdiff/testdata/apply/file_text_modify.out new file mode 100644 index 0000000..dc20c07 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/file_text_modify.out @@ -0,0 +1,195 @@ +the first line is different +this is line 2 +this is line 3 +this is line 4 +this is line 5 +this is line 6 +this is line 7 +this is line 8 +this is line 9 +this is line 10 +this is line 11 +this is line 12 +this is line 13 +this is line 14 +this is line 15 +this is line 16 +this is line 17 +this is line 18 +this is line 19 +this line offsets all the line numbers! +this is line 20 +this is line 21 +until here, now we're back on track! +this is line 24 +this is line 25 +this is line 26 +this is line 27 +this is line 28 +this is line 29 +this is line 30 +this is line 31 +this is line 32 +this is line 33 +this is line 34 +this is line 35 +this is line 36 +this is line 37 +this is line 38 +this is line 39 +this is line 40 +this is line 41 +this is line 42 +this is line 43 +this is line 44 +this is line 45 +this is line 46 +this is line 47 +this is line 48 +this is line 49 +this is line 50 +this is line 51 +this is line 52 +this is line 53 +this is line 54 +this is line 55 +once upon a time, a line + in a text + file + changed +this is line 60 +this is line 61 +this is line 62 +this is line 63 +this is line 64 +this is line 65 +this is line 66 +this is line 67 +this is line 68 +this is line 69 +this is line 70 +this is line 71 +this is line 72 +this is line 73 +this is line 74 +this is line 75 +this is line 76 +this is line 77 +this is line 78 +this is line 79 +this is line 80 +this is line 81 +this is line 82 +this is line 83 +this is line 84 +this is line 85 +this is line 86 +this is line 87 +this is line 88 +this is line 89 +this is line 90 +this is line 91 +this is line 92 +this is line 93 +this is line 94 +this is line 95 +this is line 96 +this is line 97 +this is line 98 +this is line 99 +this is line 100 +this is line 101 +this is line 102 +this is line 103 +this is line 104 +this is line 105 +this is line 106 +this is line 107 +this is line 108 +this is line 109 +this is line 110 +this is line 111 +this is line 112 +this is line 113 +this is line 114 +this is line 115 +this is line 116 +this is line 117 +this is line 118 +this is line 119 +this is line 120 +this is line 121 +this is line 122 +this is line 123 +this is line 124 +this is line 125 +this is line 126 +this is line 127 +this is line 128 +this is line 129 +this is line 130 +this is line 131 +this is line 132 +this line was bad and has been removed +this line was REDACTED and has been REDACTED +this is line 135 +this is line 136 +this is line 137 +this is line 138 +this is line 139 +this is line 140 +this is line 141 +this is line 142 +this is line 143 +this is line 144 +this is line 145 +this is line 146 +this is line 147 +this is line 148 +this is line 149 +this is line 150 +this is line 151 +this is line 152 +this is line 153 +this is line 154 +this is line 155 +this is line 156 +this is line 157 +this is line 158 +this is line 159 +this is line 160 +this is line 161 +this is line 162 +this is line 163 +the number on the remaining lines is 5 ahead of their actual position in the file +this is line 170 +this is line 171 +this is line 172 +this is line 173 +this is line 174 +this is line 175 +this is line 176 +this is line 177 +this is line 178 +this is line 179 +this is line 180 +this is line 181 +this is line 182 +this is line 183 +this is line 184 +this is line 185 +this is line 186 +this is line 187 +this is line 188 +this is line 189 +this is line 190 +this is line 191 +this is line 192 +this is line 193 +this is line 194 +this is line 195 +this is line 196 +this is line 197 +this is line 198 +this is line 199 +this is line 200 diff --git a/pkg/gitdiff/testdata/apply/file_text_modify.patch b/pkg/gitdiff/testdata/apply/file_text_modify.patch new file mode 100644 index 0000000..362d9a9 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/file_text_modify.patch @@ -0,0 +1,62 @@ +diff --git a/gitdiff/testdata/apply/file_text.src b/gitdiff/testdata/apply/file_text.src +--- a/gitdiff/testdata/apply/file_text.src ++++ b/gitdiff/testdata/apply/file_text.src +@@ -1,4 +1,4 @@ +-this is line 1 ++the first line is different + this is line 2 + this is line 3 + this is line 4 +@@ -17,10 +17,10 @@ this is line 16 + this is line 17 + this is line 18 + this is line 19 ++this line offsets all the line numbers! + this is line 20 + this is line 21 +-this is line 22 +-this is line 23 ++until here, now we're back on track! + this is line 24 + this is line 25 + this is line 26 +@@ -53,10 +53,10 @@ this is line 52 + this is line 53 + this is line 54 + this is line 55 +-this is line 56 +-this is line 57 +-this is line 58 +-this is line 59 ++once upon a time, a line ++ in a text ++ file ++ changed + this is line 60 + this is line 61 + this is line 62 +@@ -130,8 +130,8 @@ this is line 129 + this is line 130 + this is line 131 + this is line 132 +-this is line 133 +-this is line 134 ++this line was bad and has been removed ++this line was REDACTED and has been REDACTED + this is line 135 + this is line 136 + this is line 137 +@@ -161,12 +161,7 @@ this is line 160 + this is line 161 + this is line 162 + this is line 163 +-this is line 164 +-this is line 165 +-this is line 166 +-this is line 167 +-this is line 168 +-this is line 169 ++the number on the remaining lines is 5 ahead of their actual position in the file + this is line 170 + this is line 171 + this is line 172 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_add_end.out b/pkg/gitdiff/testdata/apply/text_fragment_add_end.out new file mode 100644 index 0000000..648fd44 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_add_end.out @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +new line a +new line b diff --git a/pkg/gitdiff/testdata/apply/text_fragment_add_end.patch b/pkg/gitdiff/testdata/apply/text_fragment_add_end.patch new file mode 100644 index 0000000..de708be --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_add_end.patch @@ -0,0 +1,9 @@ +diff --git a/gitdiff/testdata/apply/fragment_add_end.src b/gitdiff/testdata/apply/fragment_add_end.src +--- a/gitdiff/testdata/apply/fragment_add_end.src ++++ b/gitdiff/testdata/apply/fragment_add_end.src +@@ -1,3 +1,5 @@ + line 1 + line 2 + line 3 ++new line a ++new line b diff --git a/pkg/gitdiff/testdata/apply/text_fragment_add_end.src b/pkg/gitdiff/testdata/apply/text_fragment_add_end.src new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_add_end.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_add_end_noeol.out b/pkg/gitdiff/testdata/apply/text_fragment_add_end_noeol.out new file mode 100644 index 0000000..94c99a3 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_add_end_noeol.out @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line 5 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_add_end_noeol.patch b/pkg/gitdiff/testdata/apply/text_fragment_add_end_noeol.patch new file mode 100644 index 0000000..ec3cea4 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_add_end_noeol.patch @@ -0,0 +1,11 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_add_end_noeol.src b/gitdiff/testdata/apply/text_fragment_add_end_noeol.src +--- a/gitdiff/testdata/apply/text_fragment_add_end_noeol.src ++++ b/gitdiff/testdata/apply/text_fragment_add_end_noeol.src +@@ -1,3 +1,5 @@ + line 1 + line 2 +-line 3 +\ No newline at end of file ++line 3 ++line 4 ++line 5 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_add_end_noeol.src b/pkg/gitdiff/testdata/apply/text_fragment_add_end_noeol.src new file mode 100644 index 0000000..8cf2f17 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_add_end_noeol.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3
\ No newline at end of file diff --git a/pkg/gitdiff/testdata/apply/text_fragment_add_middle.out b/pkg/gitdiff/testdata/apply/text_fragment_add_middle.out new file mode 100644 index 0000000..ded20d8 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_add_middle.out @@ -0,0 +1,5 @@ +line 1 +line 2 +new line a +new line b +line 3 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_add_middle.patch b/pkg/gitdiff/testdata/apply/text_fragment_add_middle.patch new file mode 100644 index 0000000..43aee3b --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_add_middle.patch @@ -0,0 +1,9 @@ +diff --git a/gitdiff/testdata/apply/fragment_add_middle.src b/gitdiff/testdata/apply/fragment_add_middle.src +--- a/gitdiff/testdata/apply/fragment_add_middle.src ++++ b/gitdiff/testdata/apply/fragment_add_middle.src +@@ -1,3 +1,5 @@ + line 1 + line 2 ++new line a ++new line b + line 3 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_add_middle.src b/pkg/gitdiff/testdata/apply/text_fragment_add_middle.src new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_add_middle.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_add_start.out b/pkg/gitdiff/testdata/apply/text_fragment_add_start.out new file mode 100644 index 0000000..b153f60 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_add_start.out @@ -0,0 +1,4 @@ +new line a +line 1 +line 2 +line 3 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_add_start.patch b/pkg/gitdiff/testdata/apply/text_fragment_add_start.patch new file mode 100644 index 0000000..5218764 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_add_start.patch @@ -0,0 +1,8 @@ +diff --git a/gitdiff/testdata/apply/fragment_add_start.src b/gitdiff/testdata/apply/fragment_add_start.src +--- a/gitdiff/testdata/apply/fragment_add_start.src ++++ b/gitdiff/testdata/apply/fragment_add_start.src +@@ -1,3 +1,4 @@ ++new line a + line 1 + line 2 + line 3 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_add_start.src b/pkg/gitdiff/testdata/apply/text_fragment_add_start.src new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_add_start.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_end.out b/pkg/gitdiff/testdata/apply/text_fragment_change_end.out new file mode 100644 index 0000000..e3cbece --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_end.out @@ -0,0 +1,10 @@ +line 1 +line 2 +line 3 +line 4 +line 5 +line 6 +line 7 +line 8 +line 9 +new line a diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_end.patch b/pkg/gitdiff/testdata/apply/text_fragment_change_end.patch new file mode 100644 index 0000000..5655880 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_end.patch @@ -0,0 +1,9 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_change_end.src b/gitdiff/testdata/apply/text_fragment_change_end.src +--- a/gitdiff/testdata/apply/text_fragment_change_end.src ++++ b/gitdiff/testdata/apply/text_fragment_change_end.src +@@ -7,4 +7,4 @@ line 6 + line 7 + line 8 + line 9 +-line 10 ++new line a diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_end.src b/pkg/gitdiff/testdata/apply/text_fragment_change_end.src new file mode 100644 index 0000000..fa2da6e --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_end.src @@ -0,0 +1,10 @@ +line 1 +line 2 +line 3 +line 4 +line 5 +line 6 +line 7 +line 8 +line 9 +line 10 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_end_eol.out b/pkg/gitdiff/testdata/apply/text_fragment_change_end_eol.out new file mode 100644 index 0000000..8cf2f17 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_end_eol.out @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3
\ No newline at end of file diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_end_eol.patch b/pkg/gitdiff/testdata/apply/text_fragment_change_end_eol.patch new file mode 100644 index 0000000..f1c9477 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_end_eol.patch @@ -0,0 +1,10 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src +index a92d664..8cf2f17 100644 +--- a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src ++++ b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src +@@ -1,3 +1,3 @@ + line 1 + line 2 +-line 3 ++line 3 +\ No newline at end of file diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_end_eol.src b/pkg/gitdiff/testdata/apply/text_fragment_change_end_eol.src new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_end_eol.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_exact.out b/pkg/gitdiff/testdata/apply/text_fragment_change_exact.out new file mode 100644 index 0000000..4655a0a --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_exact.out @@ -0,0 +1,19 @@ +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +new line a +line +line +line diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_exact.patch b/pkg/gitdiff/testdata/apply/text_fragment_change_exact.patch new file mode 100644 index 0000000..395de4d --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_exact.patch @@ -0,0 +1,12 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_change_exact.src b/gitdiff/testdata/apply/text_fragment_change_exact.src +--- a/gitdiff/testdata/apply/text_fragment_change_exact.src ++++ b/gitdiff/testdata/apply/text_fragment_change_exact.src +@@ -13,7 +13,7 @@ line + line + line + line +-line ++new line a + line + line + line diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_exact.src b/pkg/gitdiff/testdata/apply/text_fragment_change_exact.src new file mode 100644 index 0000000..316a8f0 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_exact.src @@ -0,0 +1,30 @@ +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_middle.out b/pkg/gitdiff/testdata/apply/text_fragment_change_middle.out new file mode 100644 index 0000000..fd0a9ad --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_middle.out @@ -0,0 +1,9 @@ +line 1 +line 2 +line 3 +line 4 +line 5 +new line a +line 7 +line 8 +line 9 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_middle.patch b/pkg/gitdiff/testdata/apply/text_fragment_change_middle.patch new file mode 100644 index 0000000..139a0fe --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_middle.patch @@ -0,0 +1,12 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_change_middle.src b/gitdiff/testdata/apply/text_fragment_change_middle.src +--- a/gitdiff/testdata/apply/text_fragment_change_middle.src ++++ b/gitdiff/testdata/apply/text_fragment_change_middle.src +@@ -3,7 +3,7 @@ line 2 + line 3 + line 4 + line 5 +-line 6 ++new line a + line 7 + line 8 + line 9 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_middle.src b/pkg/gitdiff/testdata/apply/text_fragment_change_middle.src new file mode 100644 index 0000000..fa2da6e --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_middle.src @@ -0,0 +1,10 @@ +line 1 +line 2 +line 3 +line 4 +line 5 +line 6 +line 7 +line 8 +line 9 +line 10 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_single_noeol.out b/pkg/gitdiff/testdata/apply/text_fragment_change_single_noeol.out new file mode 100644 index 0000000..ed59e08 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_single_noeol.out @@ -0,0 +1 @@ +new line a
\ No newline at end of file diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_single_noeol.patch b/pkg/gitdiff/testdata/apply/text_fragment_change_single_noeol.patch new file mode 100644 index 0000000..f945234 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_single_noeol.patch @@ -0,0 +1,8 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_change_single_noeol.src b/gitdiff/testdata/apply/text_fragment_change_single_noeol.src +--- a/gitdiff/testdata/apply/text_fragment_change_single_noeol.src ++++ b/gitdiff/testdata/apply/text_fragment_change_single_noeol.src +@@ -1 +1 @@ +-line 1 +\ No newline at end of file ++new line a +\ No newline at end of file diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_single_noeol.src b/pkg/gitdiff/testdata/apply/text_fragment_change_single_noeol.src new file mode 100644 index 0000000..dcf168c --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_single_noeol.src @@ -0,0 +1 @@ +line 1
\ No newline at end of file diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_start.out b/pkg/gitdiff/testdata/apply/text_fragment_change_start.out new file mode 100644 index 0000000..5156941 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_start.out @@ -0,0 +1,4 @@ +new line a +line 2 +line 3 +line 4 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_start.patch b/pkg/gitdiff/testdata/apply/text_fragment_change_start.patch new file mode 100644 index 0000000..d0a6653 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_start.patch @@ -0,0 +1,9 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_change_start.src b/gitdiff/testdata/apply/text_fragment_change_start.src +--- a/gitdiff/testdata/apply/text_fragment_change_start.src ++++ b/gitdiff/testdata/apply/text_fragment_change_start.src +@@ -1,4 +1,4 @@ +-line 1 ++new line a + line 2 + line 3 + line 4 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_change_start.src b/pkg/gitdiff/testdata/apply/text_fragment_change_start.src new file mode 100644 index 0000000..fa2da6e --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_change_start.src @@ -0,0 +1,10 @@ +line 1 +line 2 +line 3 +line 4 +line 5 +line 6 +line 7 +line 8 +line 9 +line 10 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_delete_all.out b/pkg/gitdiff/testdata/apply/text_fragment_delete_all.out new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_delete_all.out diff --git a/pkg/gitdiff/testdata/apply/text_fragment_delete_all.patch b/pkg/gitdiff/testdata/apply/text_fragment_delete_all.patch new file mode 100644 index 0000000..8a2fb9c --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_delete_all.patch @@ -0,0 +1,8 @@ +diff --git a/gitdiff/testdata/apply/fragment_delete_all.src b/gitdiff/testdata/apply/fragment_delete_all.src +--- a/gitdiff/testdata/apply/fragment_delete_all.src ++++ b/gitdiff/testdata/apply/fragment_delete_all.src +@@ -1,4 +0,0 @@ +-line a +-line b +-line c +-line d diff --git a/pkg/gitdiff/testdata/apply/text_fragment_delete_all.src b/pkg/gitdiff/testdata/apply/text_fragment_delete_all.src new file mode 100644 index 0000000..47d03ac --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_delete_all.src @@ -0,0 +1,4 @@ +line a +line b +line c +line d diff --git a/pkg/gitdiff/testdata/apply/text_fragment_error.src b/pkg/gitdiff/testdata/apply/text_fragment_error.src new file mode 100644 index 0000000..f8b6f0a --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_error.src @@ -0,0 +1,13 @@ +line 1 +line 2 +line 3 +line 4 +line 5 +line 6 +line 7 +line 8 +line 9 +line 10 +line 11 +line 12 +line 13 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_error_context_conflict.patch b/pkg/gitdiff/testdata/apply/text_fragment_error_context_conflict.patch new file mode 100644 index 0000000..a262796 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_error_context_conflict.patch @@ -0,0 +1,12 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_error.src b/gitdiff/testdata/apply/text_fragment_error.src +--- a/gitdiff/testdata/apply/text_fragment_error.src ++++ b/gitdiff/testdata/apply/text_fragment_error.src +@@ -4,7 +4,7 @@ line 3 + line 4 + line 5 + line conflict +-line 7 ++new line a + line 8 + line 9 + line 10 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_error_delete_conflict.patch b/pkg/gitdiff/testdata/apply/text_fragment_error_delete_conflict.patch new file mode 100644 index 0000000..17ea166 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_error_delete_conflict.patch @@ -0,0 +1,12 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_error.src b/gitdiff/testdata/apply/text_fragment_error.src +--- a/gitdiff/testdata/apply/text_fragment_error.src ++++ b/gitdiff/testdata/apply/text_fragment_error.src +@@ -4,7 +4,7 @@ line 3 + line 4 + line 5 + line 6 +-line conflict ++new line a + line 8 + line 9 + line 10 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_error_new_file.patch b/pkg/gitdiff/testdata/apply/text_fragment_error_new_file.patch new file mode 100644 index 0000000..f4fbee6 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_error_new_file.patch @@ -0,0 +1,7 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_error.src b/gitdiff/testdata/apply/text_fragment_error.src +--- a/gitdiff/testdata/apply/text_fragment_error.src ++++ b/gitdiff/testdata/apply/text_fragment_error.src +@@ -0,0 +1,3 @@ ++line 1 ++line 2 ++line 3 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_error_short_src.patch b/pkg/gitdiff/testdata/apply/text_fragment_error_short_src.patch new file mode 100644 index 0000000..bfe7b96 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_error_short_src.patch @@ -0,0 +1,12 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_error.src b/gitdiff/testdata/apply/text_fragment_error.src +--- a/gitdiff/testdata/apply/text_fragment_error.src ++++ b/gitdiff/testdata/apply/text_fragment_error.src +@@ -9,7 +9,7 @@ line 8 + line 9 + line 10 + line 11 +-line 12 ++new line a + line 13 + line 14 + line 15 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_error_short_src_before.patch b/pkg/gitdiff/testdata/apply/text_fragment_error_short_src_before.patch new file mode 100644 index 0000000..0a96018 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_error_short_src_before.patch @@ -0,0 +1,12 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_error.src b/gitdiff/testdata/apply/text_fragment_error.src +--- a/gitdiff/testdata/apply/text_fragment_error.src ++++ b/gitdiff/testdata/apply/text_fragment_error.src +@@ -15,7 +15,7 @@ line 14 + line 15 + line 16 + line 17 +-line 18 ++new line a + line 19 + line 20 + line 21 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_new.out b/pkg/gitdiff/testdata/apply/text_fragment_new.out new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_new.out @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_new.patch b/pkg/gitdiff/testdata/apply/text_fragment_new.patch new file mode 100644 index 0000000..c87487b --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_new.patch @@ -0,0 +1,7 @@ +diff --git a/gitdiff/testdata/apply/fragment_new.src b/gitdiff/testdata/apply/fragment_new.src +--- a/gitdiff/testdata/apply/fragment_new.src ++++ b/gitdiff/testdata/apply/fragment_new.src +@@ -0,0 +1,3 @@ ++line 1 ++line 2 ++line 3 diff --git a/pkg/gitdiff/testdata/apply/text_fragment_new.src b/pkg/gitdiff/testdata/apply/text_fragment_new.src new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/pkg/gitdiff/testdata/apply/text_fragment_new.src diff --git a/pkg/gitdiff/testdata/new_binary_file.patch b/pkg/gitdiff/testdata/new_binary_file.patch new file mode 100644 index 0000000..e9ad45d --- /dev/null +++ b/pkg/gitdiff/testdata/new_binary_file.patch @@ -0,0 +1,16 @@ +commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe +Author: Morton Haypenny <mhaypenny@example.com> +Date: Tue Apr 2 22:55:40 2019 -0700 + + A binary file with the first 10 fibonacci numbers. + +diff --git a/dir/ten.bin b/dir/ten.bin +new file mode 100644 +index 0000000000000000000000000000000000000000..77b068ba48c356156944ea714740d0d5ca07bfec +GIT binary patch +literal 40 +gcmZQzU|?i`U?w2V48*KJ%mKu_Kr9NxN<eH500b)lkN^Mx + +literal 0 +HcmV?d00001 + diff --git a/pkg/gitdiff/testdata/no_files.patch b/pkg/gitdiff/testdata/no_files.patch new file mode 100644 index 0000000..9eea12d --- /dev/null +++ b/pkg/gitdiff/testdata/no_files.patch @@ -0,0 +1,8 @@ +commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe +Author: Morton Haypenny <mhaypenny@example.com> +Date: Tue Apr 2 22:55:40 2019 -0700 + + A file with multiple fragments. + + The content is arbitrary. + diff --git a/pkg/gitdiff/testdata/one_file.patch b/pkg/gitdiff/testdata/one_file.patch new file mode 100644 index 0000000..1aefec3 --- /dev/null +++ b/pkg/gitdiff/testdata/one_file.patch @@ -0,0 +1,28 @@ +commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe +Author: Morton Haypenny <mhaypenny@example.com> +Date: Tue Apr 2 22:55:40 2019 -0700 + + A file with multiple fragments. + + The content is arbitrary. + +diff --git a/dir/file1.txt b/dir/file1.txt +index ebe9fa54..fe103e1d 100644 +--- a/dir/file1.txt ++++ b/dir/file1.txt +@@ -3,6 +3,8 @@ fragment 1 + context line +-old line 1 +-old line 2 + context line ++new line 1 ++new line 2 ++new line 3 + context line +-old line 3 ++new line 4 ++new line 5 +@@ -31,2 +33,2 @@ fragment 2 + context line +-old line 4 ++new line 6 diff --git a/pkg/gitdiff/testdata/string/binary_modify.patch b/pkg/gitdiff/testdata/string/binary_modify.patch new file mode 100644 index 0000000..12ddad5 --- /dev/null +++ b/pkg/gitdiff/testdata/string/binary_modify.patch @@ -0,0 +1,9 @@ +diff --git a/file.bin b/file.bin +index a7f4d5d6975ec021016c02b6d58345ebf434f38c..bdc9a70f055892146612dcdb413f0e339faaa0df 100644 +GIT binary patch +delta 66 +QcmeZhVVvM$!$1K50C&Ox;s5{u + +delta 5 +McmZo+^qAlQ00i9urT_o{ + diff --git a/pkg/gitdiff/testdata/string/binary_modify_nodata.patch b/pkg/gitdiff/testdata/string/binary_modify_nodata.patch new file mode 100644 index 0000000..833a534 --- /dev/null +++ b/pkg/gitdiff/testdata/string/binary_modify_nodata.patch @@ -0,0 +1,3 @@ +diff --git a/file.bin b/file.bin +index a7f4d5d..bdc9a70 100644 +Binary files a/file.bin and b/file.bin differ diff --git a/pkg/gitdiff/testdata/string/binary_new.patch b/pkg/gitdiff/testdata/string/binary_new.patch new file mode 100644 index 0000000..c56f35e --- /dev/null +++ b/pkg/gitdiff/testdata/string/binary_new.patch @@ -0,0 +1,11 @@ +diff --git a/file.bin b/file.bin +new file mode 100644 +index 0000000000000000000000000000000000000000..a7f4d5d6975ec021016c02b6d58345ebf434f38c +GIT binary patch +literal 72 +zcmV-O0Jr~td-`u6JcK&{KDK=<a#;v1^LR5&K)zQ0=Goz82(?nJ6_nD`f#8O9p}}{P +eiXim+rDI+BDadMQmMsO5Sw@;DbrCA+PamP;Ng_@F + +literal 0 +HcmV?d00001 + diff --git a/pkg/gitdiff/testdata/string/copy.patch b/pkg/gitdiff/testdata/string/copy.patch new file mode 100644 index 0000000..f002f07 --- /dev/null +++ b/pkg/gitdiff/testdata/string/copy.patch @@ -0,0 +1,4 @@ +diff --git a/file.txt b/numbers.txt +similarity index 100% +copy from file.txt +copy to numbers.txt diff --git a/pkg/gitdiff/testdata/string/copy_modify.patch b/pkg/gitdiff/testdata/string/copy_modify.patch new file mode 100644 index 0000000..558a511 --- /dev/null +++ b/pkg/gitdiff/testdata/string/copy_modify.patch @@ -0,0 +1,21 @@ +diff --git a/file.txt b/numbers.txt +similarity index 57% +copy from file.txt +copy to numbers.txt +index c9e9e05..6c4a3e0 100644 +--- a/file.txt ++++ b/numbers.txt +@@ -1,6 +1,6 @@ + one + two +-three ++three three three + four + five + six +@@ -8,3 +8,5 @@ seven + eight + nine + ten ++eleven ++twelve diff --git a/pkg/gitdiff/testdata/string/delete.patch b/pkg/gitdiff/testdata/string/delete.patch new file mode 100644 index 0000000..f32dc25 --- /dev/null +++ b/pkg/gitdiff/testdata/string/delete.patch @@ -0,0 +1,16 @@ +diff --git a/file.txt b/file.txt +deleted file mode 100644 +index c9e9e05..0000000 +--- a/file.txt ++++ /dev/null +@@ -1,10 +0,0 @@ +-one +-two +-three +-four +-five +-six +-seven +-eight +-nine +-ten diff --git a/pkg/gitdiff/testdata/string/mode.patch b/pkg/gitdiff/testdata/string/mode.patch new file mode 100644 index 0000000..953ab25 --- /dev/null +++ b/pkg/gitdiff/testdata/string/mode.patch @@ -0,0 +1,3 @@ +diff --git a/file.txt b/file.txt +old mode 100644 +new mode 100755 diff --git a/pkg/gitdiff/testdata/string/mode_modify.patch b/pkg/gitdiff/testdata/string/mode_modify.patch new file mode 100644 index 0000000..f1554a7 --- /dev/null +++ b/pkg/gitdiff/testdata/string/mode_modify.patch @@ -0,0 +1,10 @@ +diff --git a/script.sh b/script.sh +old mode 100644 +new mode 100755 +index 7a870bd..68d501e +--- a/script.sh ++++ b/script.sh +@@ -1,2 +1,2 @@ + #!/bin/bash +-echo "Hello World" ++echo "Hello, World!" diff --git a/pkg/gitdiff/testdata/string/modify.patch b/pkg/gitdiff/testdata/string/modify.patch new file mode 100644 index 0000000..9d89753 --- /dev/null +++ b/pkg/gitdiff/testdata/string/modify.patch @@ -0,0 +1,16 @@ +diff --git a/file.txt b/file.txt +index c9e9e05..7d5fdc6 100644 +--- a/file.txt ++++ b/file.txt +@@ -3,8 +3,10 @@ two + three + four + five +-six ++six six six six six six + seven + eight + nine + ten ++eleven ++twelve diff --git a/pkg/gitdiff/testdata/string/new.patch b/pkg/gitdiff/testdata/string/new.patch new file mode 100644 index 0000000..941fe25 --- /dev/null +++ b/pkg/gitdiff/testdata/string/new.patch @@ -0,0 +1,16 @@ +diff --git a/file.txt b/file.txt +new file mode 100644 +index 0000000..c9e9e05 +--- /dev/null ++++ b/file.txt +@@ -0,0 +1,10 @@ ++one ++two ++three ++four ++five ++six ++seven ++eight ++nine ++ten diff --git a/pkg/gitdiff/testdata/string/new_empty.patch b/pkg/gitdiff/testdata/string/new_empty.patch new file mode 100644 index 0000000..5cc7cf7 --- /dev/null +++ b/pkg/gitdiff/testdata/string/new_empty.patch @@ -0,0 +1,3 @@ +diff --git a/file.txt b/file.txt +new file mode 100644 +index 0000000..e69de29 diff --git a/pkg/gitdiff/testdata/string/new_mode.patch b/pkg/gitdiff/testdata/string/new_mode.patch new file mode 100644 index 0000000..f9d7f1f --- /dev/null +++ b/pkg/gitdiff/testdata/string/new_mode.patch @@ -0,0 +1,16 @@ +diff --git a/file.sh b/file.sh +new file mode 100755 +index 0000000..c9e9e05 +--- /dev/null ++++ b/file.sh +@@ -0,0 +1,10 @@ ++one ++two ++three ++four ++five ++six ++seven ++eight ++nine ++ten diff --git a/pkg/gitdiff/testdata/string/rename.patch b/pkg/gitdiff/testdata/string/rename.patch new file mode 100644 index 0000000..3c0ca6f --- /dev/null +++ b/pkg/gitdiff/testdata/string/rename.patch @@ -0,0 +1,4 @@ +diff --git a/file.txt b/numbers.txt +similarity index 100% +rename from file.txt +rename to numbers.txt diff --git a/pkg/gitdiff/testdata/string/rename_modify.patch b/pkg/gitdiff/testdata/string/rename_modify.patch new file mode 100644 index 0000000..52a32af --- /dev/null +++ b/pkg/gitdiff/testdata/string/rename_modify.patch @@ -0,0 +1,18 @@ +diff --git a/file.txt b/numbers.txt +similarity index 77% +rename from file.txt +rename to numbers.txt +index c9e9e05..a6b31d6 100644 +--- a/file.txt ++++ b/numbers.txt +@@ -3,8 +3,9 @@ two + three + four + five +-six ++ six + seven + eight + nine + ten ++eleven diff --git a/pkg/gitdiff/testdata/two_files.patch b/pkg/gitdiff/testdata/two_files.patch new file mode 100644 index 0000000..dd14421 --- /dev/null +++ b/pkg/gitdiff/testdata/two_files.patch @@ -0,0 +1,48 @@ +commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe +Author: Morton Haypenny <mhaypenny@example.com> +Date: Tue Apr 2 22:55:40 2019 -0700 + + A file with multiple fragments. + + The content is arbitrary. + +diff --git a/dir/file1.txt b/dir/file1.txt +index ebe9fa54..fe103e1d 100644 +--- a/dir/file1.txt ++++ b/dir/file1.txt +@@ -3,6 +3,8 @@ fragment 1 + context line +-old line 1 +-old line 2 + context line ++new line 1 ++new line 2 ++new line 3 + context line +-old line 3 ++new line 4 ++new line 5 +@@ -31,2 +33,2 @@ fragment 2 + context line +-old line 4 ++new line 6 +diff --git a/dir/file2.txt b/dir/file2.txt +index 417ebc70..67514b7f 100644 +--- a/dir/file2.txt ++++ b/dir/file2.txt +@@ -3,6 +3,8 @@ fragment 1 + context line +-old line 1 +-old line 2 + context line ++new line 1 ++new line 2 ++new line 3 + context line +-old line 3 ++new line 4 ++new line 5 +@@ -31,2 +33,2 @@ fragment 2 + context line +-old line 4 ++new line 6 diff --git a/pkg/gitdiff/text.go b/pkg/gitdiff/text.go new file mode 100644 index 0000000..ee30792 --- /dev/null +++ b/pkg/gitdiff/text.go @@ -0,0 +1,192 @@ +package gitdiff + +import ( + "fmt" + "io" + "strconv" + "strings" +) + +// ParseTextFragments parses text fragments until the next file header or the +// end of the stream and attaches them to the given file. It returns the number +// of fragments that were added. +func (p *parser) ParseTextFragments(f *File) (n int, err error) { + for { + frag, err := p.ParseTextFragmentHeader() + if err != nil { + return n, err + } + if frag == nil { + return n, nil + } + + if f.IsNew && frag.OldLines > 0 { + return n, p.Errorf(-1, "new file depends on old contents") + } + if f.IsDelete && frag.NewLines > 0 { + return n, p.Errorf(-1, "deleted file still has contents") + } + + if err := p.ParseTextChunk(frag); err != nil { + return n, err + } + + f.TextFragments = append(f.TextFragments, frag) + n++ + } +} + +func (p *parser) ParseTextFragmentHeader() (*TextFragment, error) { + const ( + startMark = "@@ -" + endMark = " @@" + ) + + if !strings.HasPrefix(p.Line(0), startMark) { + return nil, nil + } + + parts := strings.SplitAfterN(p.Line(0), endMark, 2) + if len(parts) < 2 { + return nil, p.Errorf(0, "invalid fragment header") + } + + f := &TextFragment{} + f.Comment = strings.TrimSpace(parts[1]) + + header := parts[0][len(startMark) : len(parts[0])-len(endMark)] + ranges := strings.Split(header, " +") + if len(ranges) != 2 { + return nil, p.Errorf(0, "invalid fragment header") + } + + var err error + if f.OldPosition, f.OldLines, err = parseRange(ranges[0]); err != nil { + return nil, p.Errorf(0, "invalid fragment header: %v", err) + } + if f.NewPosition, f.NewLines, err = parseRange(ranges[1]); err != nil { + return nil, p.Errorf(0, "invalid fragment header: %v", err) + } + + if err := p.Next(); err != nil && err != io.EOF { + return nil, err + } + return f, nil +} + +func (p *parser) ParseTextChunk(frag *TextFragment) error { + if p.Line(0) == "" { + return p.Errorf(0, "no content following fragment header") + } + + oldLines, newLines := frag.OldLines, frag.NewLines + for oldLines > 0 || newLines > 0 { + line := p.Line(0) + op, data := line[0], line[1:] + + switch op { + case '\n': + data = "\n" + fallthrough // newer GNU diff versions create empty context lines + case ' ': + oldLines-- + newLines-- + if frag.LinesAdded == 0 && frag.LinesDeleted == 0 { + frag.LeadingContext++ + } else { + frag.TrailingContext++ + } + frag.Lines = append(frag.Lines, Line{OpContext, data}) + case '-': + oldLines-- + frag.LinesDeleted++ + frag.TrailingContext = 0 + frag.Lines = append(frag.Lines, Line{OpDelete, data}) + case '+': + newLines-- + frag.LinesAdded++ + frag.TrailingContext = 0 + frag.Lines = append(frag.Lines, Line{OpAdd, data}) + case '\\': + // this may appear in middle of fragment if it's for a deleted line + if isNoNewlineMarker(line) { + removeLastNewline(frag) + break + } + fallthrough + default: + // TODO(bkeyes): if this is because we hit the next header, it + // would be helpful to return the miscounts line error. We could + // either test for the common headers ("@@ -", "diff --git") or + // assume any invalid op ends the fragment; git returns the same + // generic error in all cases so either is compatible + return p.Errorf(0, "invalid line operation: %q", op) + } + + if err := p.Next(); err != nil { + if err == io.EOF { + break + } + return err + } + } + + if oldLines != 0 || newLines != 0 { + hdr := max(frag.OldLines-oldLines, frag.NewLines-newLines) + 1 + return p.Errorf(-hdr, "fragment header miscounts lines: %+d old, %+d new", -oldLines, -newLines) + } + if frag.LinesAdded == 0 && frag.LinesDeleted == 0 { + return p.Errorf(0, "fragment contains no changes") + } + + // check for a final "no newline" marker since it is not included in the + // counters used to stop the loop above + if isNoNewlineMarker(p.Line(0)) { + removeLastNewline(frag) + if err := p.Next(); err != nil && err != io.EOF { + return err + } + } + + return nil +} + +func isNoNewlineMarker(s string) bool { + // test for "\ No newline at end of file" by prefix because the text + // changes by locale (git claims all versions are at least 12 chars) + return len(s) >= 12 && s[:2] == "\\ " +} + +func removeLastNewline(frag *TextFragment) { + if len(frag.Lines) > 0 { + last := &frag.Lines[len(frag.Lines)-1] + last.Line = strings.TrimSuffix(last.Line, "\n") + } +} + +func parseRange(s string) (start int64, end int64, err error) { + parts := strings.SplitN(s, ",", 2) + + if start, err = strconv.ParseInt(parts[0], 10, 64); err != nil { + nerr := err.(*strconv.NumError) + return 0, 0, fmt.Errorf("bad start of range: %s: %v", parts[0], nerr.Err) + } + + if len(parts) > 1 { + if end, err = strconv.ParseInt(parts[1], 10, 64); err != nil { + nerr := err.(*strconv.NumError) + return 0, 0, fmt.Errorf("bad end of range: %s: %v", parts[1], nerr.Err) + } + } else { + end = 1 + } + + return +} + +func max(a, b int64) int64 { + if a > b { + return a + } + return b +} diff --git a/pkg/gitdiff/text_test.go b/pkg/gitdiff/text_test.go new file mode 100644 index 0000000..990b3bc --- /dev/null +++ b/pkg/gitdiff/text_test.go @@ -0,0 +1,488 @@ +package gitdiff + +import ( + "io" + "reflect" + "testing" +) + +func TestParseTextFragmentHeader(t *testing.T) { + tests := map[string]struct { + Input string + Output *TextFragment + Err bool + }{ + "shortest": { + Input: "@@ -1 +1 @@\n", + Output: &TextFragment{ + OldPosition: 1, + OldLines: 1, + NewPosition: 1, + NewLines: 1, + }, + }, + "standard": { + Input: "@@ -21,5 +28,9 @@\n", + Output: &TextFragment{ + OldPosition: 21, + OldLines: 5, + NewPosition: 28, + NewLines: 9, + }, + }, + "trailingComment": { + Input: "@@ -21,5 +28,9 @@ func test(n int) {\n", + Output: &TextFragment{ + Comment: "func test(n int) {", + OldPosition: 21, + OldLines: 5, + NewPosition: 28, + NewLines: 9, + }, + }, + "incomplete": { + Input: "@@ -12,3 +2\n", + Err: true, + }, + "badNumbers": { + Input: "@@ -1a,2b +3c,4d @@\n", + Err: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + frag, err := p.ParseTextFragmentHeader() + if test.Err { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing header, but got %v", err) + } + return + } + if err != nil { + t.Fatalf("error parsing header: %v", err) + } + + if !reflect.DeepEqual(test.Output, frag) { + t.Errorf("incorrect fragment\nexpected: %+v\nactual: %+v", test.Output, frag) + } + }) + } +} + +func TestParseTextChunk(t *testing.T) { + tests := map[string]struct { + Input string + Fragment TextFragment + + Output *TextFragment + Err bool + }{ + "addWithContext": { + Input: ` context line ++new line 1 ++new line 2 + context line +`, + Fragment: TextFragment{ + OldLines: 2, + NewLines: 4, + }, + Output: &TextFragment{ + OldLines: 2, + NewLines: 4, + Lines: []Line{ + {OpContext, "context line\n"}, + {OpAdd, "new line 1\n"}, + {OpAdd, "new line 2\n"}, + {OpContext, "context line\n"}, + }, + LinesAdded: 2, + LeadingContext: 1, + TrailingContext: 1, + }, + }, + "deleteWithContext": { + Input: ` context line +-old line 1 +-old line 2 + context line +`, + Fragment: TextFragment{ + OldLines: 4, + NewLines: 2, + }, + Output: &TextFragment{ + OldLines: 4, + NewLines: 2, + Lines: []Line{ + {OpContext, "context line\n"}, + {OpDelete, "old line 1\n"}, + {OpDelete, "old line 2\n"}, + {OpContext, "context line\n"}, + }, + LinesDeleted: 2, + LeadingContext: 1, + TrailingContext: 1, + }, + }, + "replaceWithContext": { + Input: ` context line +-old line 1 ++new line 1 + context line +`, + Fragment: TextFragment{ + OldLines: 3, + NewLines: 3, + }, + Output: &TextFragment{ + OldLines: 3, + NewLines: 3, + Lines: []Line{ + {OpContext, "context line\n"}, + {OpDelete, "old line 1\n"}, + {OpAdd, "new line 1\n"}, + {OpContext, "context line\n"}, + }, + LinesDeleted: 1, + LinesAdded: 1, + LeadingContext: 1, + TrailingContext: 1, + }, + }, + "middleContext": { + Input: ` context line +-old line 1 + context line ++new line 1 + context line +`, + Fragment: TextFragment{ + OldLines: 4, + NewLines: 4, + }, + Output: &TextFragment{ + OldLines: 4, + NewLines: 4, + Lines: []Line{ + {OpContext, "context line\n"}, + {OpDelete, "old line 1\n"}, + {OpContext, "context line\n"}, + {OpAdd, "new line 1\n"}, + {OpContext, "context line\n"}, + }, + LinesDeleted: 1, + LinesAdded: 1, + LeadingContext: 1, + TrailingContext: 1, + }, + }, + "deleteFinalNewline": { + Input: ` context line +-old line 1 ++new line 1 +\ No newline at end of file +`, + Fragment: TextFragment{ + OldLines: 2, + NewLines: 2, + }, + Output: &TextFragment{ + OldLines: 2, + NewLines: 2, + Lines: []Line{ + {OpContext, "context line\n"}, + {OpDelete, "old line 1\n"}, + {OpAdd, "new line 1"}, + }, + LinesDeleted: 1, + LinesAdded: 1, + LeadingContext: 1, + }, + }, + "addFinalNewline": { + Input: ` context line +-old line 1 +\ No newline at end of file ++new line 1 +`, + Fragment: TextFragment{ + OldLines: 2, + NewLines: 2, + }, + Output: &TextFragment{ + OldLines: 2, + NewLines: 2, + Lines: []Line{ + {OpContext, "context line\n"}, + {OpDelete, "old line 1"}, + {OpAdd, "new line 1\n"}, + }, + LinesDeleted: 1, + LinesAdded: 1, + LeadingContext: 1, + }, + }, + "addAll": { + Input: `+new line 1 ++new line 2 ++new line 3 +`, + Fragment: TextFragment{ + OldLines: 0, + NewLines: 3, + }, + Output: &TextFragment{ + OldLines: 0, + NewLines: 3, + Lines: []Line{ + {OpAdd, "new line 1\n"}, + {OpAdd, "new line 2\n"}, + {OpAdd, "new line 3\n"}, + }, + LinesAdded: 3, + }, + }, + "deleteAll": { + Input: `-old line 1 +-old line 2 +-old line 3 +`, + Fragment: TextFragment{ + OldLines: 3, + NewLines: 0, + }, + Output: &TextFragment{ + OldLines: 3, + NewLines: 0, + Lines: []Line{ + {OpDelete, "old line 1\n"}, + {OpDelete, "old line 2\n"}, + {OpDelete, "old line 3\n"}, + }, + LinesDeleted: 3, + }, + }, + "emptyContextLine": { + Input: ` context line + ++new line + context line +`, + Fragment: TextFragment{ + OldLines: 3, + NewLines: 4, + }, + Output: &TextFragment{ + OldLines: 3, + NewLines: 4, + Lines: []Line{ + {OpContext, "context line\n"}, + {OpContext, "\n"}, + {OpAdd, "new line\n"}, + {OpContext, "context line\n"}, + }, + LinesAdded: 1, + LeadingContext: 2, + TrailingContext: 1, + }, + }, + "emptyChunk": { + Input: "", + Err: true, + }, + "invalidOperation": { + Input: ` context line +?wat line + context line +`, + Fragment: TextFragment{ + OldLines: 3, + NewLines: 3, + }, + Err: true, + }, + "unbalancedHeader": { + Input: ` context line +-old line 1 ++new line 1 + context line +`, + Fragment: TextFragment{ + OldLines: 2, + NewLines: 5, + }, + Err: true, + }, + "onlyContext": { + Input: ` context line + context line +`, + Fragment: TextFragment{ + OldLines: 2, + NewLines: 2, + }, + Err: true, + }, + "unexpectedNoNewlineMarker": { + Input: `\ No newline at end of file`, + Fragment: TextFragment{ + OldLines: 1, + NewLines: 1, + }, + Err: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + frag := test.Fragment + err := p.ParseTextChunk(&frag) + if test.Err { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing text chunk, but got %v", err) + } + return + } + if err != nil { + t.Fatalf("error parsing text chunk: %v", err) + } + + if !reflect.DeepEqual(test.Output, &frag) { + t.Errorf("incorrect fragment\nexpected: %+v\nactual: %+v", test.Output, &frag) + } + }) + } +} + +func TestParseTextFragments(t *testing.T) { + tests := map[string]struct { + Input string + File File + + Fragments []*TextFragment + Err bool + }{ + "multipleChanges": { + Input: `@@ -1,3 +1,2 @@ + context line +-old line 1 + context line +@@ -8,3 +7,3 @@ + context line +-old line 2 ++new line 1 + context line +@@ -15,3 +14,4 @@ + context line +-old line 3 ++new line 2 ++new line 3 + context line +`, + Fragments: []*TextFragment{ + { + OldPosition: 1, + OldLines: 3, + NewPosition: 1, + NewLines: 2, + Lines: []Line{ + {OpContext, "context line\n"}, + {OpDelete, "old line 1\n"}, + {OpContext, "context line\n"}, + }, + LinesDeleted: 1, + LeadingContext: 1, + TrailingContext: 1, + }, + { + OldPosition: 8, + OldLines: 3, + NewPosition: 7, + NewLines: 3, + Lines: []Line{ + {OpContext, "context line\n"}, + {OpDelete, "old line 2\n"}, + {OpAdd, "new line 1\n"}, + {OpContext, "context line\n"}, + }, + LinesDeleted: 1, + LinesAdded: 1, + LeadingContext: 1, + TrailingContext: 1, + }, + { + OldPosition: 15, + OldLines: 3, + NewPosition: 14, + NewLines: 4, + Lines: []Line{ + {OpContext, "context line\n"}, + {OpDelete, "old line 3\n"}, + {OpAdd, "new line 2\n"}, + {OpAdd, "new line 3\n"}, + {OpContext, "context line\n"}, + }, + LinesDeleted: 1, + LinesAdded: 2, + LeadingContext: 1, + TrailingContext: 1, + }, + }, + }, + "badNewFile": { + Input: `@@ -1 +1,2 @@ +-old line 1 ++new line 1 ++new line 2 +`, + File: File{ + IsNew: true, + }, + Err: true, + }, + "badDeletedFile": { + Input: `@@ -1,2 +1 @@ +-old line 1 + context line +`, + File: File{ + IsDelete: true, + }, + Err: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + p := newTestParser(test.Input, true) + + file := test.File + n, err := p.ParseTextFragments(&file) + if test.Err { + if err == nil || err == io.EOF { + t.Fatalf("expected error parsing text fragments, but got %v", err) + } + return + } + if err != nil { + t.Fatalf("error parsing text fragments: %v", err) + } + + if len(test.Fragments) != n { + t.Fatalf("incorrect number of added fragments: expected %d, actual %d", len(test.Fragments), n) + } + + for i, frag := range test.Fragments { + if !reflect.DeepEqual(frag, file.TextFragments[i]) { + t.Errorf("incorrect fragment at position %d\nexpected: %+v\nactual: %+v", i, frag, file.TextFragments[i]) + } + } + }) + } +} |
