pkg/gitdiff/patch_identity.go


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166

package gitdiff

import (
	"fmt"
	"strings"
)

// PatchIdentity identifies a person who authored or committed a patch.
type PatchIdentity struct {
	Name  string
	Email string
}

func (i PatchIdentity) String() string {
	name := i.Name
	if name == "" {
		name = `""`
	}
	return fmt.Sprintf("%s <%s>", name, i.Email)
}

// ParsePatchIdentity parses a patch identity string. A patch identity contains
// an email address and an optional name in [RFC 5322] format. This is either a
// plain email adddress or a name followed by an address in angle brackets:
//
//	author@example.com
//	Author Name <author@example.com>
//
// If the input is not one of these formats, ParsePatchIdentity applies a
// heuristic to separate the name and email portions. If both the name and
// email are missing or empty, ParsePatchIdentity returns an error. It
// otherwise does not validate the result.
//
// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322
func ParsePatchIdentity(s string) (PatchIdentity, error) {
	s = normalizeSpace(s)
	s = unquotePairs(s)

	var name, email string
	if at := strings.IndexByte(s, '@'); at >= 0 {
		start, end := at, at
		for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' {
			start--
		}
		for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' {
			end++
		}
		email = s[start+1 : end]

		// Adjust the boundaries so that we drop angle brackets, but keep
		// spaces when removing the email to form the name.
		if start < 0 || s[start] != '<' {
			start++
		}
		if end >= len(s) || s[end] != '>' {
			end--
		}
		name = s[:start] + s[end+1:]
	} else {
		start, end := 0, 0
		for i := 0; i < len(s); i++ {
			if s[i] == '<' && start == 0 {
				start = i + 1
			}
			if s[i] == '>' && start > 0 {
				end = i
				break
			}
		}
		if start > 0 && end >= start {
			email = strings.TrimSpace(s[start:end])
			name = s[:start-1]
		}
	}

	// After extracting the email, the name might contain extra whitespace
	// again and may be surrounded by comment characters. The git source gives
	// these examples of when this can happen:
	//
	//   "Name <email@domain>"
	//   "email@domain (Name)"
	//   "Name <email@domain> (Comment)"
	//
	name = normalizeSpace(name)
	if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") {
		name = name[1 : len(name)-1]
	}
	name = strings.TrimSpace(name)

	// If the name is empty or contains email-like characters, use the email
	// instead (assuming one exists)
	if name == "" || strings.ContainsAny(name, "@<>") {
		name = email
	}

	if name == "" && email == "" {
		return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s)
	}
	return PatchIdentity{Name: name, Email: email}, nil
}

// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to
// remove any "quoted-pairs" (backslash-espaced characters). It also removes
// the quotes from any quoted strings, but leaves the comment delimiters.
func unquotePairs(s string) string {
	quote := false
	comments := 0
	escaped := false

	var out strings.Builder
	for i := 0; i < len(s); i++ {
		if escaped {
			escaped = false
		} else {
			switch s[i] {
			case '\\':
				// quoted-pair is only allowed in quoted-string/comment
				if quote || comments > 0 {
					escaped = true
					continue // drop '\' character
				}

			case '"':
				if comments == 0 {
					quote = !quote
					continue // drop '"' character
				}

			case '(':
				if !quote {
					comments++
				}
			case ')':
				if comments > 0 {
					comments--
				}
			}
		}
		out.WriteByte(s[i])
	}
	return out.String()
}

// normalizeSpace trims leading and trailing whitespace from s and converts
// inner sequences of one or more whitespace characters to single spaces.
func normalizeSpace(s string) string {
	var sb strings.Builder
	for i := 0; i < len(s); i++ {
		c := s[i]
		if !isRFC5332Space(c) {
			if sb.Len() > 0 && isRFC5332Space(s[i-1]) {
				sb.WriteByte(' ')
			}
			sb.WriteByte(c)
		}
	}
	return sb.String()
}

func isRFC5332Space(c byte) bool {
	switch c {
	case '\t', '\n', '\r', ' ':
		return true
	}
	return false
}