diff options
| author | mo khan <mo@mokhan.ca> | 2026-01-30 19:31:54 -0700 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2026-01-30 19:31:54 -0700 |
| commit | c4260fbcae4a2112c80d448bb277abe235f724d0 (patch) | |
| tree | e7f7ecdc67c1cf028e4017e5ce7731ea9cf2c52d /internal/gitdiff/file_header.go | |
| parent | 95ae423958564f810e2f3f833498dc792c589dd7 (diff) | |
refactor: replace gitdiff with external package
Diffstat (limited to 'internal/gitdiff/file_header.go')
| -rw-r--r-- | internal/gitdiff/file_header.go | 546 |
1 files changed, 0 insertions, 546 deletions
diff --git a/internal/gitdiff/file_header.go b/internal/gitdiff/file_header.go deleted file mode 100644 index 7ae4bc9..0000000 --- a/internal/gitdiff/file_header.go +++ /dev/null @@ -1,546 +0,0 @@ -package gitdiff - -import ( - "fmt" - "io" - "os" - "strconv" - "strings" - "time" -) - -const ( - devNull = "/dev/null" -) - -// ParseNextFileHeader finds and parses the next file header in the stream. If -// a header is found, it returns a file and all input before the header. It -// returns nil if no headers are found before the end of the input. -func (p *parser) ParseNextFileHeader() (*File, string, error) { - var preamble strings.Builder - var file *File - for { - // check for disconnected fragment headers (corrupt patch) - frag, err := p.ParseTextFragmentHeader() - if err != nil { - // not a valid header, nothing to worry about - goto NextLine - } - if frag != nil { - return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header()) - } - - // check for a git-generated patch - file, err = p.ParseGitFileHeader() - if err != nil { - return nil, "", err - } - if file != nil { - return file, preamble.String(), nil - } - - // check for a "traditional" patch - file, err = p.ParseTraditionalFileHeader() - if err != nil { - return nil, "", err - } - if file != nil { - return file, preamble.String(), nil - } - - NextLine: - preamble.WriteString(p.Line(0)) - if err := p.Next(); err != nil { - if err == io.EOF { - break - } - return nil, "", err - } - } - return nil, preamble.String(), nil -} - -func (p *parser) ParseGitFileHeader() (*File, error) { - const prefix = "diff --git " - - if !strings.HasPrefix(p.Line(0), prefix) { - return nil, nil - } - header := p.Line(0)[len(prefix):] - - defaultName, err := parseGitHeaderName(header) - if err != nil { - return nil, p.Errorf(0, "git file header: %v", err) - } - - f := &File{} - for { - end, err := parseGitHeaderData(f, p.Line(1), defaultName) - if err != nil { - return nil, p.Errorf(1, "git file header: %v", err) - } - - if err := p.Next(); err != nil { - if err == io.EOF { - break - } - return nil, err - } - - if end { - break - } - } - - if f.OldName == "" && f.NewName == "" { - if defaultName == "" { - return nil, p.Errorf(0, "git file header: missing filename information") - } - f.OldName = defaultName - f.NewName = defaultName - } - - if (f.NewName == "" && !f.IsDelete) || (f.OldName == "" && !f.IsNew) { - return nil, p.Errorf(0, "git file header: missing filename information") - } - - return f, nil -} - -func (p *parser) ParseTraditionalFileHeader() (*File, error) { - const shortestValidFragHeader = "@@ -1 +1 @@\n" - const ( - oldPrefix = "--- " - newPrefix = "+++ " - ) - - oldLine, newLine := p.Line(0), p.Line(1) - - if !strings.HasPrefix(oldLine, oldPrefix) || !strings.HasPrefix(newLine, newPrefix) { - return nil, nil - } - // heuristic: only a file header if followed by a (probable) fragment header - if len(p.Line(2)) < len(shortestValidFragHeader) || !strings.HasPrefix(p.Line(2), "@@ -") { - return nil, nil - } - - // advance past the first two lines so parser is after the header - // no EOF check needed because we know there are >=3 valid lines - if err := p.Next(); err != nil { - return nil, err - } - if err := p.Next(); err != nil { - return nil, err - } - - oldName, _, err := parseName(oldLine[len(oldPrefix):], '\t', 0) - if err != nil { - return nil, p.Errorf(0, "file header: %v", err) - } - - newName, _, err := parseName(newLine[len(newPrefix):], '\t', 0) - if err != nil { - return nil, p.Errorf(1, "file header: %v", err) - } - - f := &File{} - switch { - case oldName == devNull || hasEpochTimestamp(oldLine): - f.IsNew = true - f.NewName = newName - case newName == devNull || hasEpochTimestamp(newLine): - f.IsDelete = true - f.OldName = oldName - default: - // if old name is a prefix of new name, use that instead - // this avoids picking variants like "file.bak" or "file~" - if strings.HasPrefix(newName, oldName) { - f.OldName = oldName - f.NewName = oldName - } else { - f.OldName = newName - f.NewName = newName - } - } - - return f, nil -} - -// parseGitHeaderName extracts a default file name from the Git file header -// line. This is required for mode-only changes and creation/deletion of empty -// files. Other types of patch include the file name(s) in the header data. -// If the names in the header do not match because the patch is a rename, -// return an empty default name. -func parseGitHeaderName(header string) (string, error) { - header = strings.TrimSuffix(header, "\n") - if len(header) == 0 { - return "", nil - } - - var err error - var first, second string - - // there are 4 cases to account for: - // - // 1) unquoted unquoted - // 2) unquoted "quoted" - // 3) "quoted" unquoted - // 4) "quoted" "quoted" - // - quote := strings.IndexByte(header, '"') - switch { - case quote < 0: - // case 1 - first = header - - case quote > 0: - // case 2 - first = header[:quote-1] - if !isSpace(header[quote-1]) { - return "", fmt.Errorf("missing separator") - } - - second, _, err = parseQuotedName(header[quote:]) - if err != nil { - return "", err - } - - case quote == 0: - // case 3 or case 4 - var n int - first, n, err = parseQuotedName(header) - if err != nil { - return "", err - } - - // git accepts multiple spaces after a quoted name, but not after an - // unquoted name, since the name might end with one or more spaces - for n < len(header) && isSpace(header[n]) { - n++ - } - if n == len(header) { - return "", nil - } - - if header[n] == '"' { - second, _, err = parseQuotedName(header[n:]) - if err != nil { - return "", err - } - } else { - second = header[n:] - } - } - - first = trimTreePrefix(first, 1) - if second != "" { - if first == trimTreePrefix(second, 1) { - return first, nil - } - return "", nil - } - - // at this point, both names are unquoted (case 1) - // since names may contain spaces, we can't use a known separator - // instead, look for a split that produces two equal names - - for i := 0; i < len(first)-1; i++ { - if !isSpace(first[i]) { - continue - } - second = trimTreePrefix(first[i+1:], 1) - if name := first[:i]; name == second { - return name, nil - } - } - return "", nil -} - -// parseGitHeaderData parses a single line of metadata from a Git file header. -// It returns true when header parsing is complete; in that case, line was the -// first line of non-header content. -func parseGitHeaderData(f *File, line, defaultName string) (end bool, err error) { - if len(line) > 0 && line[len(line)-1] == '\n' { - line = line[:len(line)-1] - } - - for _, hdr := range []struct { - prefix string - end bool - parse func(*File, string, string) error - }{ - {"@@ -", true, nil}, - {"--- ", false, parseGitHeaderOldName}, - {"+++ ", false, parseGitHeaderNewName}, - {"old mode ", false, parseGitHeaderOldMode}, - {"new mode ", false, parseGitHeaderNewMode}, - {"deleted file mode ", false, parseGitHeaderDeletedMode}, - {"new file mode ", false, parseGitHeaderCreatedMode}, - {"copy from ", false, parseGitHeaderCopyFrom}, - {"copy to ", false, parseGitHeaderCopyTo}, - {"rename old ", false, parseGitHeaderRenameFrom}, - {"rename new ", false, parseGitHeaderRenameTo}, - {"rename from ", false, parseGitHeaderRenameFrom}, - {"rename to ", false, parseGitHeaderRenameTo}, - {"similarity index ", false, parseGitHeaderScore}, - {"dissimilarity index ", false, parseGitHeaderScore}, - {"index ", false, parseGitHeaderIndex}, - } { - if strings.HasPrefix(line, hdr.prefix) { - if hdr.parse != nil { - err = hdr.parse(f, line[len(hdr.prefix):], defaultName) - } - return hdr.end, err - } - } - - // unknown line indicates the end of the header - // this usually happens if the diff is empty - return true, nil -} - -func parseGitHeaderOldName(f *File, line, defaultName string) error { - name, _, err := parseName(line, '\t', 1) - if err != nil { - return err - } - if f.OldName == "" && !f.IsNew { - f.OldName = name - return nil - } - return verifyGitHeaderName(name, f.OldName, f.IsNew, "old") -} - -func parseGitHeaderNewName(f *File, line, defaultName string) error { - name, _, err := parseName(line, '\t', 1) - if err != nil { - return err - } - if f.NewName == "" && !f.IsDelete { - f.NewName = name - return nil - } - return verifyGitHeaderName(name, f.NewName, f.IsDelete, "new") -} - -func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) { - f.OldMode, err = parseMode(strings.TrimSpace(line)) - return -} - -func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) { - f.NewMode, err = parseMode(strings.TrimSpace(line)) - return -} - -func parseGitHeaderDeletedMode(f *File, line, defaultName string) error { - f.IsDelete = true - f.OldName = defaultName - return parseGitHeaderOldMode(f, line, defaultName) -} - -func parseGitHeaderCreatedMode(f *File, line, defaultName string) error { - f.IsNew = true - f.NewName = defaultName - return parseGitHeaderNewMode(f, line, defaultName) -} - -func parseGitHeaderCopyFrom(f *File, line, defaultName string) (err error) { - f.IsCopy = true - f.OldName, _, err = parseName(line, 0, 0) - return -} - -func parseGitHeaderCopyTo(f *File, line, defaultName string) (err error) { - f.IsCopy = true - f.NewName, _, err = parseName(line, 0, 0) - return -} - -func parseGitHeaderRenameFrom(f *File, line, defaultName string) (err error) { - f.IsRename = true - f.OldName, _, err = parseName(line, 0, 0) - return -} - -func parseGitHeaderRenameTo(f *File, line, defaultName string) (err error) { - f.IsRename = true - f.NewName, _, err = parseName(line, 0, 0) - return -} - -func parseGitHeaderScore(f *File, line, defaultName string) error { - score, err := strconv.ParseInt(strings.TrimSuffix(line, "%"), 10, 32) - if err != nil { - nerr := err.(*strconv.NumError) - return fmt.Errorf("invalid score line: %v", nerr.Err) - } - if score <= 100 { - f.Score = int(score) - } - return nil -} - -func parseGitHeaderIndex(f *File, line, defaultName string) error { - const sep = ".." - - // note that git stops parsing if the OIDs are too long to be valid - // checking this requires knowing if the repository uses SHA1 or SHA256 - // hashes, which we don't know, so we just skip that check - - parts := strings.SplitN(line, " ", 2) - oids := strings.SplitN(parts[0], sep, 2) - - if len(oids) < 2 { - return fmt.Errorf("invalid index line: missing %q", sep) - } - f.OldOIDPrefix, f.NewOIDPrefix = oids[0], oids[1] - - if len(parts) > 1 { - return parseGitHeaderOldMode(f, parts[1], defaultName) - } - return nil -} - -func parseMode(s string) (os.FileMode, error) { - mode, err := strconv.ParseInt(s, 8, 32) - if err != nil { - nerr := err.(*strconv.NumError) - return os.FileMode(0), fmt.Errorf("invalid mode line: %v", nerr.Err) - } - return os.FileMode(mode), nil -} - -// parseName extracts a file name from the start of a string and returns the -// name and the index of the first character after the name. If the name is -// unquoted and term is non-zero, parsing stops at the first occurrence of -// term. -// -// If the name is exactly "/dev/null", no further processing occurs. Otherwise, -// if dropPrefix is greater than zero, that number of prefix components -// separated by forward slashes are dropped from the name and any duplicate -// slashes are collapsed. -func parseName(s string, term byte, dropPrefix int) (name string, n int, err error) { - if len(s) > 0 && s[0] == '"' { - name, n, err = parseQuotedName(s) - } else { - name, n, err = parseUnquotedName(s, term) - } - if err != nil { - return "", 0, err - } - if name == devNull { - return name, n, nil - } - return cleanName(name, dropPrefix), n, nil -} - -func parseQuotedName(s string) (name string, n int, err error) { - for n = 1; n < len(s); n++ { - if s[n] == '"' && s[n-1] != '\\' { - n++ - break - } - } - if n == 2 { - return "", 0, fmt.Errorf("missing name") - } - if name, err = strconv.Unquote(s[:n]); err != nil { - return "", 0, err - } - return name, n, err -} - -func parseUnquotedName(s string, term byte) (name string, n int, err error) { - for n = 0; n < len(s); n++ { - if s[n] == '\n' { - break - } - if term > 0 && s[n] == term { - break - } - } - if n == 0 { - return "", 0, fmt.Errorf("missing name") - } - return s[:n], n, nil -} - -// verifyGitHeaderName checks a parsed name against state set by previous lines -func verifyGitHeaderName(parsed, existing string, isNull bool, side string) error { - if existing != "" { - if isNull { - return fmt.Errorf("expected %s, but filename is set to %s", devNull, existing) - } - if existing != parsed { - return fmt.Errorf("inconsistent %s filename", side) - } - } - if isNull && parsed != devNull { - return fmt.Errorf("expected %s", devNull) - } - return nil -} - -// cleanName removes double slashes and drops prefix segments. -func cleanName(name string, drop int) string { - var b strings.Builder - for i := 0; i < len(name); i++ { - if name[i] == '/' { - if i < len(name)-1 && name[i+1] == '/' { - continue - } - if drop > 0 { - drop-- - b.Reset() - continue - } - } - b.WriteByte(name[i]) - } - return b.String() -} - -// trimTreePrefix removes up to n leading directory components from name. -func trimTreePrefix(name string, n int) string { - i := 0 - for ; i < len(name) && n > 0; i++ { - if name[i] == '/' { - n-- - } - } - return name[i:] -} - -// hasEpochTimestamp returns true if the string ends with a POSIX-formatted -// timestamp for the UNIX epoch after a tab character. According to git, this -// is used by GNU diff to mark creations and deletions. -func hasEpochTimestamp(s string) bool { - const posixTimeLayout = "2006-01-02 15:04:05.9 -0700" - - start := strings.IndexRune(s, '\t') - if start < 0 { - return false - } - - ts := strings.TrimSuffix(s[start+1:], "\n") - - // a valid timestamp can have optional ':' in zone specifier - // remove that if it exists so we have a single format - if len(ts) >= 3 && ts[len(ts)-3] == ':' { - ts = ts[:len(ts)-3] + ts[len(ts)-2:] - } - - t, err := time.Parse(posixTimeLayout, ts) - if err != nil { - return false - } - if !t.Equal(time.Unix(0, 0)) { - return false - } - return true -} - -func isSpace(c byte) bool { - return c == ' ' || c == '\t' || c == '\n' -} |
