1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
|
package git
import (
"bufio"
"context"
"fmt"
"log"
"strconv"
"strings"
)
type ObjectType string
const (
ObjectTypeCommit ObjectType = "commit"
ObjectTypeTree ObjectType = "tree"
ObjectTypeBlob ObjectType = "blob"
ObjectTypeTag ObjectType = "tag"
)
type GitObject struct {
Type ObjectType
Size int64
Data []byte
SHA string
}
type CommitObject struct {
Tree string
Parents []string
Author string
Committer string
Message string
}
type TreeEntry struct {
Mode string
Name string
SHA string
}
type TreeObject struct {
Entries []TreeEntry
}
// StorageBackend interface for accessing Git objects
type StorageBackend interface {
GetObject(ctx context.Context, repo, objectID string) ([]byte, error)
}
type ObjectReader struct {
storage StorageBackend
}
func NewObjectReader(storage StorageBackend) *ObjectReader {
return &ObjectReader{storage: storage}
}
func (or *ObjectReader) ReadObject(ctx context.Context, repo, sha string) (*GitObject, error) {
log.Printf("Reading object %s from repo %s", sha, repo)
data, err := or.storage.GetObject(ctx, repo, sha)
if err != nil {
log.Printf("Failed to read object %s from S3: %v", sha, err)
return nil, fmt.Errorf("failed to read object %s: %w", sha, err)
}
log.Printf("Successfully read object %s, size: %d bytes", sha, len(data))
obj, err := ParseGitObject(data, sha)
if err != nil {
log.Printf("Failed to parse object %s: %v", sha, err)
return nil, err
}
log.Printf("Successfully parsed object %s, type: %s", sha, obj.Type)
return obj, nil
}
func ParseGitObject(data []byte, sha string) (*GitObject, error) {
// Git objects are stored as: "type size\0content"
nullIndex := -1
for i, b := range data {
if b == 0 {
nullIndex = i
break
}
}
if nullIndex == -1 {
return nil, fmt.Errorf("invalid git object: no null terminator")
}
header := string(data[:nullIndex])
content := data[nullIndex+1:]
parts := strings.Fields(header)
if len(parts) != 2 {
return nil, fmt.Errorf("invalid git object header: %s", header)
}
objType := ObjectType(parts[0])
size, err := strconv.ParseInt(parts[1], 10, 64)
if err != nil {
return nil, fmt.Errorf("invalid object size: %s", parts[1])
}
return &GitObject{
Type: objType,
Size: size,
Data: content,
SHA: sha,
}, nil
}
func (or *ObjectReader) ReadCommit(ctx context.Context, repo, sha string) (*CommitObject, error) {
obj, err := or.ReadObject(ctx, repo, sha)
if err != nil {
return nil, err
}
if obj.Type != ObjectTypeCommit {
return nil, fmt.Errorf("expected commit object, got %s", obj.Type)
}
return ParseCommitObject(obj.Data)
}
func ParseCommitObject(data []byte) (*CommitObject, error) {
commit := &CommitObject{}
scanner := bufio.NewScanner(strings.NewReader(string(data)))
// Parse header lines
for scanner.Scan() {
line := scanner.Text()
if line == "" {
break // End of headers, commit message follows
}
if strings.HasPrefix(line, "tree ") {
commit.Tree = strings.TrimPrefix(line, "tree ")
} else if strings.HasPrefix(line, "parent ") {
commit.Parents = append(commit.Parents, strings.TrimPrefix(line, "parent "))
} else if strings.HasPrefix(line, "author ") {
commit.Author = strings.TrimPrefix(line, "author ")
} else if strings.HasPrefix(line, "committer ") {
commit.Committer = strings.TrimPrefix(line, "committer ")
}
}
// Rest is commit message
var messageLines []string
for scanner.Scan() {
messageLines = append(messageLines, scanner.Text())
}
commit.Message = strings.Join(messageLines, "\n")
return commit, nil
}
func (or *ObjectReader) ReadTree(ctx context.Context, repo, sha string) (*TreeObject, error) {
obj, err := or.ReadObject(ctx, repo, sha)
if err != nil {
return nil, err
}
if obj.Type != ObjectTypeTree {
return nil, fmt.Errorf("expected tree object, got %s", obj.Type)
}
return ParseTreeObject(obj.Data)
}
func ParseTreeObject(data []byte) (*TreeObject, error) {
tree := &TreeObject{}
pos := 0
for pos < len(data) {
// Find space after mode
spaceIndex := pos
for spaceIndex < len(data) && data[spaceIndex] != ' ' {
spaceIndex++
}
if spaceIndex >= len(data) {
break
}
mode := string(data[pos:spaceIndex])
pos = spaceIndex + 1
// Find null terminator after name
nullIndex := pos
for nullIndex < len(data) && data[nullIndex] != 0 {
nullIndex++
}
if nullIndex >= len(data) {
break
}
name := string(data[pos:nullIndex])
pos = nullIndex + 1
// Read 20-byte SHA
if pos+20 > len(data) {
break
}
sha := fmt.Sprintf("%x", data[pos:pos+20])
pos += 20
tree.Entries = append(tree.Entries, TreeEntry{
Mode: mode,
Name: name,
SHA: sha,
})
}
return tree, nil
}
// GetObjectsToSend traverses the commit graph and returns all objects needed
func (or *ObjectReader) GetObjectsToSend(ctx context.Context, repo string, wants []string, haves []string) ([]string, error) {
// Simple implementation: for each wanted commit, get all reachable objects
// TODO: Optimize by excluding objects reachable from 'haves'
log.Printf("Getting objects to send for %d wants, %d haves", len(wants), len(haves))
var allObjects []string
visited := make(map[string]bool)
for _, want := range wants {
log.Printf("Processing want: %s", want)
objects, err := or.getReachableObjects(ctx, repo, want, visited)
if err != nil {
log.Printf("Failed to get objects for %s: %v", want, err)
return nil, fmt.Errorf("failed to get objects for %s: %w", want, err)
}
log.Printf("Found %d objects for want %s", len(objects), want)
allObjects = append(allObjects, objects...)
}
log.Printf("Total objects to send: %d", len(allObjects))
return allObjects, nil
}
func (or *ObjectReader) getReachableObjects(ctx context.Context, repo, sha string, visited map[string]bool) ([]string, error) {
if visited[sha] {
return nil, nil
}
visited[sha] = true
obj, err := or.ReadObject(ctx, repo, sha)
if err != nil {
return nil, err
}
var objects []string
objects = append(objects, sha)
switch obj.Type {
case ObjectTypeCommit:
commit, err := ParseCommitObject(obj.Data)
if err != nil {
return nil, err
}
// Add tree objects
treeObjects, err := or.getReachableObjects(ctx, repo, commit.Tree, visited)
if err != nil {
return nil, err
}
objects = append(objects, treeObjects...)
// Add parent commits
for _, parent := range commit.Parents {
parentObjects, err := or.getReachableObjects(ctx, repo, parent, visited)
if err != nil {
return nil, err
}
objects = append(objects, parentObjects...)
}
case ObjectTypeTree:
tree, err := ParseTreeObject(obj.Data)
if err != nil {
return nil, err
}
// Add all tree entries
for _, entry := range tree.Entries {
entryObjects, err := or.getReachableObjects(ctx, repo, entry.SHA, visited)
if err != nil {
return nil, err
}
objects = append(objects, entryObjects...)
}
}
return objects, nil
}
|