summaryrefslogtreecommitdiff
path: root/internal/gitdiff/patch_header.go
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2026-01-30 19:31:54 -0700
committermo khan <mo@mokhan.ca>2026-01-30 19:31:54 -0700
commitc4260fbcae4a2112c80d448bb277abe235f724d0 (patch)
treee7f7ecdc67c1cf028e4017e5ce7731ea9cf2c52d /internal/gitdiff/patch_header.go
parent95ae423958564f810e2f3f833498dc792c589dd7 (diff)
refactor: replace gitdiff with external package
Diffstat (limited to 'internal/gitdiff/patch_header.go')
-rw-r--r--internal/gitdiff/patch_header.go470
1 files changed, 0 insertions, 470 deletions
diff --git a/internal/gitdiff/patch_header.go b/internal/gitdiff/patch_header.go
deleted file mode 100644
index f047059..0000000
--- a/internal/gitdiff/patch_header.go
+++ /dev/null
@@ -1,470 +0,0 @@
-package gitdiff
-
-import (
- "bufio"
- "errors"
- "fmt"
- "io"
- "io/ioutil"
- "mime/quotedprintable"
- "net/mail"
- "strconv"
- "strings"
- "time"
- "unicode"
-)
-
-const (
- mailHeaderPrefix = "From "
- prettyHeaderPrefix = "commit "
- mailMinimumHeaderPrefix = "From:"
-)
-
-// PatchHeader is a parsed version of the preamble content that appears before
-// the first diff in a patch. It includes metadata about the patch, such as the
-// author and a subject.
-type PatchHeader struct {
- // The SHA of the commit the patch was generated from. Empty if the SHA is
- // not included in the header.
- SHA string
-
- // The author details of the patch. If these details are not included in
- // the header, Author is nil and AuthorDate is the zero time.
- Author *PatchIdentity
- AuthorDate time.Time
-
- // The committer details of the patch. If these details are not included in
- // the header, Committer is nil and CommitterDate is the zero time.
- Committer *PatchIdentity
- CommitterDate time.Time
-
- // The title and body of the commit message describing the changes in the
- // patch. Empty if no message is included in the header.
- Title string
- Body string
-
- // If the preamble looks like an email, ParsePatchHeader will
- // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the
- // Title and place them here.
- SubjectPrefix string
-
- // If the preamble looks like an email, and it contains a `---`
- // line, that line will be removed and everything after it will be
- // placed in BodyAppendix.
- BodyAppendix string
-}
-
-// Message returns the commit message for the header. The message consists of
-// the title and the body separated by an empty line.
-func (h *PatchHeader) Message() string {
- var msg strings.Builder
- if h != nil {
- msg.WriteString(h.Title)
- if h.Body != "" {
- msg.WriteString("\n\n")
- msg.WriteString(h.Body)
- }
- }
- return msg.String()
-}
-
-// ParsePatchDate parses a patch date string. It returns the parsed time or an
-// error if s has an unknown format. ParsePatchDate supports the iso, rfc,
-// short, raw, unix, and default formats (with local variants) used by the
-// --date flag in Git.
-func ParsePatchDate(s string) (time.Time, error) {
- const (
- isoFormat = "2006-01-02 15:04:05 -0700"
- isoStrictFormat = "2006-01-02T15:04:05-07:00"
- rfc2822Format = "Mon, 2 Jan 2006 15:04:05 -0700"
- shortFormat = "2006-01-02"
- defaultFormat = "Mon Jan 2 15:04:05 2006 -0700"
- defaultLocalFormat = "Mon Jan 2 15:04:05 2006"
- )
-
- if s == "" {
- return time.Time{}, nil
- }
-
- for _, fmt := range []string{
- isoFormat,
- isoStrictFormat,
- rfc2822Format,
- shortFormat,
- defaultFormat,
- defaultLocalFormat,
- } {
- if t, err := time.ParseInLocation(fmt, s, time.Local); err == nil {
- return t, nil
- }
- }
-
- // unix format
- if unix, err := strconv.ParseInt(s, 10, 64); err == nil {
- return time.Unix(unix, 0), nil
- }
-
- // raw format
- if space := strings.IndexByte(s, ' '); space > 0 {
- unix, uerr := strconv.ParseInt(s[:space], 10, 64)
- zone, zerr := time.Parse("-0700", s[space+1:])
- if uerr == nil && zerr == nil {
- return time.Unix(unix, 0).In(zone.Location()), nil
- }
- }
-
- return time.Time{}, fmt.Errorf("unknown date format: %s", s)
-}
-
-// A PatchHeaderOption modifies the behavior of ParsePatchHeader.
-type PatchHeaderOption func(*patchHeaderOptions)
-
-// SubjectCleanMode controls how ParsePatchHeader cleans subject lines when
-// parsing mail-formatted patches.
-type SubjectCleanMode int
-
-const (
- // SubjectCleanWhitespace removes leading and trailing whitespace.
- SubjectCleanWhitespace SubjectCleanMode = iota
-
- // SubjectCleanAll removes leading and trailing whitespace, leading "Re:",
- // "re:", and ":" strings, and leading strings enclosed by '[' and ']'.
- // This is the default behavior of git (see `git mailinfo`) and this
- // package.
- SubjectCleanAll
-
- // SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes
- // leading strings enclosed by '[' and ']' if they start with "PATCH".
- SubjectCleanPatchOnly
-)
-
-// WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By
-// default, uses SubjectCleanAll.
-func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption {
- return func(opts *patchHeaderOptions) {
- opts.subjectCleanMode = m
- }
-}
-
-type patchHeaderOptions struct {
- subjectCleanMode SubjectCleanMode
-}
-
-// ParsePatchHeader parses the preamble string returned by [Parse] into a
-// PatchHeader. Due to the variety of header formats, some fields of the parsed
-// PatchHeader may be unset after parsing.
-//
-// Supported formats are the short, medium, full, fuller, and email pretty
-// formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox
-// format used by `git format-patch`.
-//
-// When parsing mail-formatted headers, ParsePatchHeader tries to remove
-// email-specific content from the title and body:
-//
-// - Based on the SubjectCleanMode, remove prefixes like reply markers and
-// "[PATCH]" strings from the subject, saving any removed content in the
-// SubjectPrefix field. Parsing always discards leading and trailing
-// whitespace from the subject line. The default mode is SubjectCleanAll.
-//
-// - If the body contains a "---" line (3 hyphens), remove that line and any
-// content after it from the body and save it in the BodyAppendix field.
-//
-// ParsePatchHeader tries to process content it does not understand wthout
-// returning errors, but will return errors if well-identified content like
-// dates or identies uses unknown or invalid formats.
-func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) {
- opts := patchHeaderOptions{
- subjectCleanMode: SubjectCleanAll, // match git defaults
- }
- for _, optFn := range options {
- optFn(&opts)
- }
-
- header = strings.TrimSpace(header)
- if header == "" {
- return &PatchHeader{}, nil
- }
-
- var firstLine, rest string
- if idx := strings.IndexByte(header, '\n'); idx >= 0 {
- firstLine = header[:idx]
- rest = header[idx+1:]
- } else {
- firstLine = header
- rest = ""
- }
-
- switch {
- case strings.HasPrefix(firstLine, mailHeaderPrefix):
- return parseHeaderMail(firstLine, strings.NewReader(rest), opts)
-
- case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix):
- // With a minimum header, the first line is part of the actual mail
- // content and needs to be parsed as part of the "rest"
- return parseHeaderMail("", strings.NewReader(header), opts)
-
- case strings.HasPrefix(firstLine, prettyHeaderPrefix):
- return parseHeaderPretty(firstLine, strings.NewReader(rest))
- }
-
- return nil, errors.New("unrecognized patch header format")
-}
-
-func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) {
- const (
- authorPrefix = "Author:"
- commitPrefix = "Commit:"
- datePrefix = "Date:"
- authorDatePrefix = "AuthorDate:"
- commitDatePrefix = "CommitDate:"
- )
-
- h := &PatchHeader{}
-
- prettyLine = strings.TrimPrefix(prettyLine, prettyHeaderPrefix)
- if i := strings.IndexByte(prettyLine, ' '); i > 0 {
- h.SHA = prettyLine[:i]
- } else {
- h.SHA = prettyLine
- }
-
- s := bufio.NewScanner(r)
- for s.Scan() {
- line := s.Text()
-
- // empty line marks end of fields, remaining lines are title/message
- if strings.TrimSpace(line) == "" {
- break
- }
-
- switch {
- case strings.HasPrefix(line, authorPrefix):
- u, err := ParsePatchIdentity(line[len(authorPrefix):])
- if err != nil {
- return nil, err
- }
- h.Author = &u
-
- case strings.HasPrefix(line, commitPrefix):
- u, err := ParsePatchIdentity(line[len(commitPrefix):])
- if err != nil {
- return nil, err
- }
- h.Committer = &u
-
- case strings.HasPrefix(line, datePrefix):
- d, err := ParsePatchDate(strings.TrimSpace(line[len(datePrefix):]))
- if err != nil {
- return nil, err
- }
- h.AuthorDate = d
-
- case strings.HasPrefix(line, authorDatePrefix):
- d, err := ParsePatchDate(strings.TrimSpace(line[len(authorDatePrefix):]))
- if err != nil {
- return nil, err
- }
- h.AuthorDate = d
-
- case strings.HasPrefix(line, commitDatePrefix):
- d, err := ParsePatchDate(strings.TrimSpace(line[len(commitDatePrefix):]))
- if err != nil {
- return nil, err
- }
- h.CommitterDate = d
- }
- }
- if s.Err() != nil {
- return nil, s.Err()
- }
-
- title, indent := scanMessageTitle(s)
- if s.Err() != nil {
- return nil, s.Err()
- }
- h.Title = title
-
- if title != "" {
- // Don't check for an appendix, pretty headers do not contain them
- body, _ := scanMessageBody(s, indent, false)
- if s.Err() != nil {
- return nil, s.Err()
- }
- h.Body = body
- }
-
- return h, nil
-}
-
-func scanMessageTitle(s *bufio.Scanner) (title string, indent string) {
- var b strings.Builder
- for i := 0; s.Scan(); i++ {
- line := s.Text()
- trimLine := strings.TrimSpace(line)
- if trimLine == "" {
- break
- }
-
- if i == 0 {
- if start := strings.IndexFunc(line, func(c rune) bool { return !unicode.IsSpace(c) }); start > 0 {
- indent = line[:start]
- }
- }
- if b.Len() > 0 {
- b.WriteByte(' ')
- }
- b.WriteString(trimLine)
- }
- return b.String(), indent
-}
-
-func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) {
- // Body and appendix
- var body, appendix strings.Builder
- c := &body
- var empty int
- for i := 0; s.Scan(); i++ {
- line := s.Text()
-
- line = strings.TrimRightFunc(line, unicode.IsSpace)
- line = strings.TrimPrefix(line, indent)
-
- if line == "" {
- empty++
- continue
- }
-
- // If requested, parse out "appendix" information (often added
- // by `git format-patch` and removed by `git am`).
- if separateAppendix && c == &body && line == "---" {
- c = &appendix
- continue
- }
-
- if c.Len() > 0 {
- c.WriteByte('\n')
- if empty > 0 {
- c.WriteByte('\n')
- }
- }
- empty = 0
-
- c.WriteString(line)
- }
- return body.String(), appendix.String()
-}
-
-func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) {
- msg, err := mail.ReadMessage(r)
- if err != nil {
- return nil, err
- }
-
- h := &PatchHeader{}
-
- if strings.HasPrefix(mailLine, mailHeaderPrefix) {
- mailLine = strings.TrimPrefix(mailLine, mailHeaderPrefix)
- if i := strings.IndexByte(mailLine, ' '); i > 0 {
- h.SHA = mailLine[:i]
- }
- }
-
- from := msg.Header.Get("From")
- if from != "" {
- u, err := ParsePatchIdentity(from)
- if err != nil {
- return nil, err
- }
- h.Author = &u
- }
-
- date := msg.Header.Get("Date")
- if date != "" {
- d, err := ParsePatchDate(date)
- if err != nil {
- return nil, err
- }
- h.AuthorDate = d
- }
-
- subject := msg.Header.Get("Subject")
- h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode)
-
- s := bufio.NewScanner(msg.Body)
- h.Body, h.BodyAppendix = scanMessageBody(s, "", true)
- if s.Err() != nil {
- return nil, s.Err()
- }
-
- return h, nil
-}
-
-func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) {
- switch mode {
- case SubjectCleanAll, SubjectCleanPatchOnly:
- case SubjectCleanWhitespace:
- return "", strings.TrimSpace(decodeSubject(s))
- default:
- panic(fmt.Sprintf("unknown clean mode: %d", mode))
- }
-
- // Based on the algorithm from Git in mailinfo.c:cleanup_subject()
- // If compatibility with `git am` drifts, go there to see if there are any updates.
-
- at := 0
- for at < len(s) {
- switch s[at] {
- case 'r', 'R':
- // Detect re:, Re:, rE: and RE:
- if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' {
- at += 3
- continue
- }
-
- case ' ', '\t', ':':
- // Delete whitespace and duplicate ':' characters
- at++
- continue
-
- case '[':
- if i := strings.IndexByte(s[at:], ']'); i > 0 {
- if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") {
- at += i + 1
- continue
- }
- }
- }
-
- // Nothing was removed, end processing
- break
- }
-
- prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace)
- subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace)
- return
-}
-
-// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result
-// of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji).
-// See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject
-func decodeSubject(encoded string) string {
- if !strings.HasPrefix(encoded, "=?UTF-8?q?") {
- // not UTF-8 encoded
- return encoded
- }
-
- // If the subject is too long, `git format-patch` may produce a subject line across
- // multiple lines. When parsed, this can look like the following:
- // <UTF8-prefix><first-line> <UTF8-prefix><second-line>
- payload := " " + encoded
- payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "")
- payload = strings.ReplaceAll(payload, "?=", "")
-
- decoded, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(payload)))
- if err != nil {
- // if err, abort decoding and return original subject
- return encoded
- }
-
- return string(decoded)
-}