summaryrefslogtreecommitdiff
path: root/internal/gitdiff/patch_identity.go
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2026-01-30 18:16:31 -0700
committermo khan <mo@mokhan.ca>2026-01-30 18:16:31 -0700
commitfeee7d43ef63ae607c6fd4cca88a356a93553ebe (patch)
tree2969055a894dc4e72d8d79a9ac74cc30d78aff64 /internal/gitdiff/patch_identity.go
parente0db8f82e96acadf6968e0cf9c805a7b22d835db (diff)
refactor: move packages to internal/
Diffstat (limited to 'internal/gitdiff/patch_identity.go')
-rw-r--r--internal/gitdiff/patch_identity.go166
1 files changed, 166 insertions, 0 deletions
diff --git a/internal/gitdiff/patch_identity.go b/internal/gitdiff/patch_identity.go
new file mode 100644
index 0000000..018f80c
--- /dev/null
+++ b/internal/gitdiff/patch_identity.go
@@ -0,0 +1,166 @@
+package gitdiff
+
+import (
+ "fmt"
+ "strings"
+)
+
+// PatchIdentity identifies a person who authored or committed a patch.
+type PatchIdentity struct {
+ Name string
+ Email string
+}
+
+func (i PatchIdentity) String() string {
+ name := i.Name
+ if name == "" {
+ name = `""`
+ }
+ return fmt.Sprintf("%s <%s>", name, i.Email)
+}
+
+// ParsePatchIdentity parses a patch identity string. A patch identity contains
+// an email address and an optional name in [RFC 5322] format. This is either a
+// plain email adddress or a name followed by an address in angle brackets:
+//
+// author@example.com
+// Author Name <author@example.com>
+//
+// If the input is not one of these formats, ParsePatchIdentity applies a
+// heuristic to separate the name and email portions. If both the name and
+// email are missing or empty, ParsePatchIdentity returns an error. It
+// otherwise does not validate the result.
+//
+// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322
+func ParsePatchIdentity(s string) (PatchIdentity, error) {
+ s = normalizeSpace(s)
+ s = unquotePairs(s)
+
+ var name, email string
+ if at := strings.IndexByte(s, '@'); at >= 0 {
+ start, end := at, at
+ for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' {
+ start--
+ }
+ for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' {
+ end++
+ }
+ email = s[start+1 : end]
+
+ // Adjust the boundaries so that we drop angle brackets, but keep
+ // spaces when removing the email to form the name.
+ if start < 0 || s[start] != '<' {
+ start++
+ }
+ if end >= len(s) || s[end] != '>' {
+ end--
+ }
+ name = s[:start] + s[end+1:]
+ } else {
+ start, end := 0, 0
+ for i := 0; i < len(s); i++ {
+ if s[i] == '<' && start == 0 {
+ start = i + 1
+ }
+ if s[i] == '>' && start > 0 {
+ end = i
+ break
+ }
+ }
+ if start > 0 && end >= start {
+ email = strings.TrimSpace(s[start:end])
+ name = s[:start-1]
+ }
+ }
+
+ // After extracting the email, the name might contain extra whitespace
+ // again and may be surrounded by comment characters. The git source gives
+ // these examples of when this can happen:
+ //
+ // "Name <email@domain>"
+ // "email@domain (Name)"
+ // "Name <email@domain> (Comment)"
+ //
+ name = normalizeSpace(name)
+ if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") {
+ name = name[1 : len(name)-1]
+ }
+ name = strings.TrimSpace(name)
+
+ // If the name is empty or contains email-like characters, use the email
+ // instead (assuming one exists)
+ if name == "" || strings.ContainsAny(name, "@<>") {
+ name = email
+ }
+
+ if name == "" && email == "" {
+ return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s)
+ }
+ return PatchIdentity{Name: name, Email: email}, nil
+}
+
+// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to
+// remove any "quoted-pairs" (backslash-espaced characters). It also removes
+// the quotes from any quoted strings, but leaves the comment delimiters.
+func unquotePairs(s string) string {
+ quote := false
+ comments := 0
+ escaped := false
+
+ var out strings.Builder
+ for i := 0; i < len(s); i++ {
+ if escaped {
+ escaped = false
+ } else {
+ switch s[i] {
+ case '\\':
+ // quoted-pair is only allowed in quoted-string/comment
+ if quote || comments > 0 {
+ escaped = true
+ continue // drop '\' character
+ }
+
+ case '"':
+ if comments == 0 {
+ quote = !quote
+ continue // drop '"' character
+ }
+
+ case '(':
+ if !quote {
+ comments++
+ }
+ case ')':
+ if comments > 0 {
+ comments--
+ }
+ }
+ }
+ out.WriteByte(s[i])
+ }
+ return out.String()
+}
+
+// normalizeSpace trims leading and trailing whitespace from s and converts
+// inner sequences of one or more whitespace characters to single spaces.
+func normalizeSpace(s string) string {
+ var sb strings.Builder
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if !isRFC5332Space(c) {
+ if sb.Len() > 0 && isRFC5332Space(s[i-1]) {
+ sb.WriteByte(' ')
+ }
+ sb.WriteByte(c)
+ }
+ }
+ return sb.String()
+}
+
+func isRFC5332Space(c byte) bool {
+ switch c {
+ case '\t', '\n', '\r', ' ':
+ return true
+ }
+ return false
+}