summaryrefslogtreecommitdiff
path: root/internal/gitdiff/file_header.go
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2026-01-30 19:31:54 -0700
committermo khan <mo@mokhan.ca>2026-01-30 19:31:54 -0700
commitc4260fbcae4a2112c80d448bb277abe235f724d0 (patch)
treee7f7ecdc67c1cf028e4017e5ce7731ea9cf2c52d /internal/gitdiff/file_header.go
parent95ae423958564f810e2f3f833498dc792c589dd7 (diff)
refactor: replace gitdiff with external package
Diffstat (limited to 'internal/gitdiff/file_header.go')
-rw-r--r--internal/gitdiff/file_header.go546
1 files changed, 0 insertions, 546 deletions
diff --git a/internal/gitdiff/file_header.go b/internal/gitdiff/file_header.go
deleted file mode 100644
index 7ae4bc9..0000000
--- a/internal/gitdiff/file_header.go
+++ /dev/null
@@ -1,546 +0,0 @@
-package gitdiff
-
-import (
- "fmt"
- "io"
- "os"
- "strconv"
- "strings"
- "time"
-)
-
-const (
- devNull = "/dev/null"
-)
-
-// ParseNextFileHeader finds and parses the next file header in the stream. If
-// a header is found, it returns a file and all input before the header. It
-// returns nil if no headers are found before the end of the input.
-func (p *parser) ParseNextFileHeader() (*File, string, error) {
- var preamble strings.Builder
- var file *File
- for {
- // check for disconnected fragment headers (corrupt patch)
- frag, err := p.ParseTextFragmentHeader()
- if err != nil {
- // not a valid header, nothing to worry about
- goto NextLine
- }
- if frag != nil {
- return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header())
- }
-
- // check for a git-generated patch
- file, err = p.ParseGitFileHeader()
- if err != nil {
- return nil, "", err
- }
- if file != nil {
- return file, preamble.String(), nil
- }
-
- // check for a "traditional" patch
- file, err = p.ParseTraditionalFileHeader()
- if err != nil {
- return nil, "", err
- }
- if file != nil {
- return file, preamble.String(), nil
- }
-
- NextLine:
- preamble.WriteString(p.Line(0))
- if err := p.Next(); err != nil {
- if err == io.EOF {
- break
- }
- return nil, "", err
- }
- }
- return nil, preamble.String(), nil
-}
-
-func (p *parser) ParseGitFileHeader() (*File, error) {
- const prefix = "diff --git "
-
- if !strings.HasPrefix(p.Line(0), prefix) {
- return nil, nil
- }
- header := p.Line(0)[len(prefix):]
-
- defaultName, err := parseGitHeaderName(header)
- if err != nil {
- return nil, p.Errorf(0, "git file header: %v", err)
- }
-
- f := &File{}
- for {
- end, err := parseGitHeaderData(f, p.Line(1), defaultName)
- if err != nil {
- return nil, p.Errorf(1, "git file header: %v", err)
- }
-
- if err := p.Next(); err != nil {
- if err == io.EOF {
- break
- }
- return nil, err
- }
-
- if end {
- break
- }
- }
-
- if f.OldName == "" && f.NewName == "" {
- if defaultName == "" {
- return nil, p.Errorf(0, "git file header: missing filename information")
- }
- f.OldName = defaultName
- f.NewName = defaultName
- }
-
- if (f.NewName == "" && !f.IsDelete) || (f.OldName == "" && !f.IsNew) {
- return nil, p.Errorf(0, "git file header: missing filename information")
- }
-
- return f, nil
-}
-
-func (p *parser) ParseTraditionalFileHeader() (*File, error) {
- const shortestValidFragHeader = "@@ -1 +1 @@\n"
- const (
- oldPrefix = "--- "
- newPrefix = "+++ "
- )
-
- oldLine, newLine := p.Line(0), p.Line(1)
-
- if !strings.HasPrefix(oldLine, oldPrefix) || !strings.HasPrefix(newLine, newPrefix) {
- return nil, nil
- }
- // heuristic: only a file header if followed by a (probable) fragment header
- if len(p.Line(2)) < len(shortestValidFragHeader) || !strings.HasPrefix(p.Line(2), "@@ -") {
- return nil, nil
- }
-
- // advance past the first two lines so parser is after the header
- // no EOF check needed because we know there are >=3 valid lines
- if err := p.Next(); err != nil {
- return nil, err
- }
- if err := p.Next(); err != nil {
- return nil, err
- }
-
- oldName, _, err := parseName(oldLine[len(oldPrefix):], '\t', 0)
- if err != nil {
- return nil, p.Errorf(0, "file header: %v", err)
- }
-
- newName, _, err := parseName(newLine[len(newPrefix):], '\t', 0)
- if err != nil {
- return nil, p.Errorf(1, "file header: %v", err)
- }
-
- f := &File{}
- switch {
- case oldName == devNull || hasEpochTimestamp(oldLine):
- f.IsNew = true
- f.NewName = newName
- case newName == devNull || hasEpochTimestamp(newLine):
- f.IsDelete = true
- f.OldName = oldName
- default:
- // if old name is a prefix of new name, use that instead
- // this avoids picking variants like "file.bak" or "file~"
- if strings.HasPrefix(newName, oldName) {
- f.OldName = oldName
- f.NewName = oldName
- } else {
- f.OldName = newName
- f.NewName = newName
- }
- }
-
- return f, nil
-}
-
-// parseGitHeaderName extracts a default file name from the Git file header
-// line. This is required for mode-only changes and creation/deletion of empty
-// files. Other types of patch include the file name(s) in the header data.
-// If the names in the header do not match because the patch is a rename,
-// return an empty default name.
-func parseGitHeaderName(header string) (string, error) {
- header = strings.TrimSuffix(header, "\n")
- if len(header) == 0 {
- return "", nil
- }
-
- var err error
- var first, second string
-
- // there are 4 cases to account for:
- //
- // 1) unquoted unquoted
- // 2) unquoted "quoted"
- // 3) "quoted" unquoted
- // 4) "quoted" "quoted"
- //
- quote := strings.IndexByte(header, '"')
- switch {
- case quote < 0:
- // case 1
- first = header
-
- case quote > 0:
- // case 2
- first = header[:quote-1]
- if !isSpace(header[quote-1]) {
- return "", fmt.Errorf("missing separator")
- }
-
- second, _, err = parseQuotedName(header[quote:])
- if err != nil {
- return "", err
- }
-
- case quote == 0:
- // case 3 or case 4
- var n int
- first, n, err = parseQuotedName(header)
- if err != nil {
- return "", err
- }
-
- // git accepts multiple spaces after a quoted name, but not after an
- // unquoted name, since the name might end with one or more spaces
- for n < len(header) && isSpace(header[n]) {
- n++
- }
- if n == len(header) {
- return "", nil
- }
-
- if header[n] == '"' {
- second, _, err = parseQuotedName(header[n:])
- if err != nil {
- return "", err
- }
- } else {
- second = header[n:]
- }
- }
-
- first = trimTreePrefix(first, 1)
- if second != "" {
- if first == trimTreePrefix(second, 1) {
- return first, nil
- }
- return "", nil
- }
-
- // at this point, both names are unquoted (case 1)
- // since names may contain spaces, we can't use a known separator
- // instead, look for a split that produces two equal names
-
- for i := 0; i < len(first)-1; i++ {
- if !isSpace(first[i]) {
- continue
- }
- second = trimTreePrefix(first[i+1:], 1)
- if name := first[:i]; name == second {
- return name, nil
- }
- }
- return "", nil
-}
-
-// parseGitHeaderData parses a single line of metadata from a Git file header.
-// It returns true when header parsing is complete; in that case, line was the
-// first line of non-header content.
-func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) {
- if len(line) > 0 && line[len(line)-1] == '\n' {
- line = line[:len(line)-1]
- }
-
- for _, hdr := range []struct {
- prefix string
- end bool
- parse func(*File, string, string) error
- }{
- {"@@ -", true, nil},
- {"--- ", false, parseGitHeaderOldName},
- {"+++ ", false, parseGitHeaderNewName},
- {"old mode ", false, parseGitHeaderOldMode},
- {"new mode ", false, parseGitHeaderNewMode},
- {"deleted file mode ", false, parseGitHeaderDeletedMode},
- {"new file mode ", false, parseGitHeaderCreatedMode},
- {"copy from ", false, parseGitHeaderCopyFrom},
- {"copy to ", false, parseGitHeaderCopyTo},
- {"rename old ", false, parseGitHeaderRenameFrom},
- {"rename new ", false, parseGitHeaderRenameTo},
- {"rename from ", false, parseGitHeaderRenameFrom},
- {"rename to ", false, parseGitHeaderRenameTo},
- {"similarity index ", false, parseGitHeaderScore},
- {"dissimilarity index ", false, parseGitHeaderScore},
- {"index ", false, parseGitHeaderIndex},
- } {
- if strings.HasPrefix(line, hdr.prefix) {
- if hdr.parse != nil {
- err = hdr.parse(f, line[len(hdr.prefix):], defaultName)
- }
- return hdr.end, err
- }
- }
-
- // unknown line indicates the end of the header
- // this usually happens if the diff is empty
- return true, nil
-}
-
-func parseGitHeaderOldName(f *File, line, defaultName string) error {
- name, _, err := parseName(line, '\t', 1)
- if err != nil {
- return err
- }
- if f.OldName == "" && !f.IsNew {
- f.OldName = name
- return nil
- }
- return verifyGitHeaderName(name, f.OldName, f.IsNew, "old")
-}
-
-func parseGitHeaderNewName(f *File, line, defaultName string) error {
- name, _, err := parseName(line, '\t', 1)
- if err != nil {
- return err
- }
- if f.NewName == "" && !f.IsDelete {
- f.NewName = name
- return nil
- }
- return verifyGitHeaderName(name, f.NewName, f.IsDelete, "new")
-}
-
-func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) {
- f.OldMode, err = parseMode(strings.TrimSpace(line))
- return
-}
-
-func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) {
- f.NewMode, err = parseMode(strings.TrimSpace(line))
- return
-}
-
-func parseGitHeaderDeletedMode(f *File, line, defaultName string) error {
- f.IsDelete = true
- f.OldName = defaultName
- return parseGitHeaderOldMode(f, line, defaultName)
-}
-
-func parseGitHeaderCreatedMode(f *File, line, defaultName string) error {
- f.IsNew = true
- f.NewName = defaultName
- return parseGitHeaderNewMode(f, line, defaultName)
-}
-
-func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) {
- f.IsCopy = true
- f.OldName, _, err = parseName(line, 0, 0)
- return
-}
-
-func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) {
- f.IsCopy = true
- f.NewName, _, err = parseName(line, 0, 0)
- return
-}
-
-func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) {
- f.IsRename = true
- f.OldName, _, err = parseName(line, 0, 0)
- return
-}
-
-func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) {
- f.IsRename = true
- f.NewName, _, err = parseName(line, 0, 0)
- return
-}
-
-func parseGitHeaderScore(f *File, line, defaultName string) error {
- score, err := strconv.ParseInt(strings.TrimSuffix(line, "%"), 10, 32)
- if err != nil {
- nerr := err.(*strconv.NumError)
- return fmt.Errorf("invalid score line: %v", nerr.Err)
- }
- if score <= 100 {
- f.Score = int(score)
- }
- return nil
-}
-
-func parseGitHeaderIndex(f *File, line, defaultName string) error {
- const sep = ".."
-
- // note that git stops parsing if the OIDs are too long to be valid
- // checking this requires knowing if the repository uses SHA1 or SHA256
- // hashes, which we don't know, so we just skip that check
-
- parts := strings.SplitN(line, " ", 2)
- oids := strings.SplitN(parts[0], sep, 2)
-
- if len(oids) < 2 {
- return fmt.Errorf("invalid index line: missing %q", sep)
- }
- f.OldOIDPrefix, f.NewOIDPrefix = oids[0], oids[1]
-
- if len(parts) > 1 {
- return parseGitHeaderOldMode(f, parts[1], defaultName)
- }
- return nil
-}
-
-func parseMode(s string) (os.FileMode, error) {
- mode, err := strconv.ParseInt(s, 8, 32)
- if err != nil {
- nerr := err.(*strconv.NumError)
- return os.FileMode(0), fmt.Errorf("invalid mode line: %v", nerr.Err)
- }
- return os.FileMode(mode), nil
-}
-
-// parseName extracts a file name from the start of a string and returns the
-// name and the index of the first character after the name. If the name is
-// unquoted and term is non-zero, parsing stops at the first occurrence of
-// term.
-//
-// If the name is exactly "/dev/null", no further processing occurs. Otherwise,
-// if dropPrefix is greater than zero, that number of prefix components
-// separated by forward slashes are dropped from the name and any duplicate
-// slashes are collapsed.
-func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) {
- if len(s) > 0 && s[0] == '"' {
- name, n, err = parseQuotedName(s)
- } else {
- name, n, err = parseUnquotedName(s, term)
- }
- if err != nil {
- return "", 0, err
- }
- if name == devNull {
- return name, n, nil
- }
- return cleanName(name, dropPrefix), n, nil
-}
-
-func parseQuotedName(s string) (name string, n int, err error) {
- for n = 1; n < len(s); n++ {
- if s[n] == '"' && s[n-1] != '\\' {
- n++
- break
- }
- }
- if n == 2 {
- return "", 0, fmt.Errorf("missing name")
- }
- if name, err = strconv.Unquote(s[:n]); err != nil {
- return "", 0, err
- }
- return name, n, err
-}
-
-func parseUnquotedName(s string, term byte) (name string, n int, err error) {
- for n = 0; n < len(s); n++ {
- if s[n] == '\n' {
- break
- }
- if term > 0 && s[n] == term {
- break
- }
- }
- if n == 0 {
- return "", 0, fmt.Errorf("missing name")
- }
- return s[:n], n, nil
-}
-
-// verifyGitHeaderName checks a parsed name against state set by previous lines
-func verifyGitHeaderName(parsed, existing string, isNull bool, side string) error {
- if existing != "" {
- if isNull {
- return fmt.Errorf("expected %s, but filename is set to %s", devNull, existing)
- }
- if existing != parsed {
- return fmt.Errorf("inconsistent %s filename", side)
- }
- }
- if isNull && parsed != devNull {
- return fmt.Errorf("expected %s", devNull)
- }
- return nil
-}
-
-// cleanName removes double slashes and drops prefix segments.
-func cleanName(name string, drop int) string {
- var b strings.Builder
- for i := 0; i < len(name); i++ {
- if name[i] == '/' {
- if i < len(name)-1 && name[i+1] == '/' {
- continue
- }
- if drop > 0 {
- drop--
- b.Reset()
- continue
- }
- }
- b.WriteByte(name[i])
- }
- return b.String()
-}
-
-// trimTreePrefix removes up to n leading directory components from name.
-func trimTreePrefix(name string, n int) string {
- i := 0
- for ; i < len(name) && n > 0; i++ {
- if name[i] == '/' {
- n--
- }
- }
- return name[i:]
-}
-
-// hasEpochTimestamp returns true if the string ends with a POSIX-formatted
-// timestamp for the UNIX epoch after a tab character. According to git, this
-// is used by GNU diff to mark creations and deletions.
-func hasEpochTimestamp(s string) bool {
- const posixTimeLayout = "2006-01-02 15:04:05.9 -0700"
-
- start := strings.IndexRune(s, '\t')
- if start < 0 {
- return false
- }
-
- ts := strings.TrimSuffix(s[start+1:], "\n")
-
- // a valid timestamp can have optional ':' in zone specifier
- // remove that if it exists so we have a single format
- if len(ts) >= 3 && ts[len(ts)-3] == ':' {
- ts = ts[:len(ts)-3] + ts[len(ts)-2:]
- }
-
- t, err := time.Parse(posixTimeLayout, ts)
- if err != nil {
- return false
- }
- if !t.Equal(time.Unix(0, 0)) {
- return false
- }
- return true
-}
-
-func isSpace(c byte) bool {
- return c == ' ' || c == '\t' || c == '\n'
-}