summaryrefslogtreecommitdiff
path: root/internal/git/objects.go
blob: faa1fe5bee9b63ad780f8555ce8940c21a811207 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
package git

import (
	"bufio"
	"context"
	"fmt"
	"log"
	"strconv"
	"strings"
)

type ObjectType string

const (
	ObjectTypeCommit ObjectType = "commit"
	ObjectTypeTree   ObjectType = "tree"
	ObjectTypeBlob   ObjectType = "blob"
	ObjectTypeTag    ObjectType = "tag"
)

type GitObject struct {
	Type ObjectType
	Size int64
	Data []byte
	SHA  string
}

type CommitObject struct {
	Tree      string
	Parents   []string
	Author    string
	Committer string
	Message   string
}

type TreeEntry struct {
	Mode string
	Name string
	SHA  string
}

type TreeObject struct {
	Entries []TreeEntry
}

// StorageBackend interface for accessing Git objects
type StorageBackend interface {
	GetObject(ctx context.Context, repo, objectID string) ([]byte, error)
}

type ObjectReader struct {
	storage StorageBackend
}

func NewObjectReader(storage StorageBackend) *ObjectReader {
	return &ObjectReader{storage: storage}
}

func (or *ObjectReader) ReadObject(ctx context.Context, repo, sha string) (*GitObject, error) {
	log.Printf("Reading object %s from repo %s", sha, repo)
	data, err := or.storage.GetObject(ctx, repo, sha)
	if err != nil {
		log.Printf("Failed to read object %s from S3: %v", sha, err)
		return nil, fmt.Errorf("failed to read object %s: %w", sha, err)
	}

	log.Printf("Successfully read object %s, size: %d bytes", sha, len(data))
	obj, err := ParseGitObject(data, sha)
	if err != nil {
		log.Printf("Failed to parse object %s: %v", sha, err)
		return nil, err
	}
	
	log.Printf("Successfully parsed object %s, type: %s", sha, obj.Type)
	return obj, nil
}

func ParseGitObject(data []byte, sha string) (*GitObject, error) {
	// Git objects are stored as: "type size\0content"
	nullIndex := -1
	for i, b := range data {
		if b == 0 {
			nullIndex = i
			break
		}
	}
	
	if nullIndex == -1 {
		return nil, fmt.Errorf("invalid git object: no null terminator")
	}

	header := string(data[:nullIndex])
	content := data[nullIndex+1:]

	parts := strings.Fields(header)
	if len(parts) != 2 {
		return nil, fmt.Errorf("invalid git object header: %s", header)
	}

	objType := ObjectType(parts[0])
	size, err := strconv.ParseInt(parts[1], 10, 64)
	if err != nil {
		return nil, fmt.Errorf("invalid object size: %s", parts[1])
	}

	return &GitObject{
		Type: objType,
		Size: size,
		Data: content,
		SHA:  sha,
	}, nil
}

func (or *ObjectReader) ReadCommit(ctx context.Context, repo, sha string) (*CommitObject, error) {
	obj, err := or.ReadObject(ctx, repo, sha)
	if err != nil {
		return nil, err
	}

	if obj.Type != ObjectTypeCommit {
		return nil, fmt.Errorf("expected commit object, got %s", obj.Type)
	}

	return ParseCommitObject(obj.Data)
}

func ParseCommitObject(data []byte) (*CommitObject, error) {
	commit := &CommitObject{}
	scanner := bufio.NewScanner(strings.NewReader(string(data)))
	
	// Parse header lines
	for scanner.Scan() {
		line := scanner.Text()
		if line == "" {
			break // End of headers, commit message follows
		}
		
		if strings.HasPrefix(line, "tree ") {
			commit.Tree = strings.TrimPrefix(line, "tree ")
		} else if strings.HasPrefix(line, "parent ") {
			commit.Parents = append(commit.Parents, strings.TrimPrefix(line, "parent "))
		} else if strings.HasPrefix(line, "author ") {
			commit.Author = strings.TrimPrefix(line, "author ")
		} else if strings.HasPrefix(line, "committer ") {
			commit.Committer = strings.TrimPrefix(line, "committer ")
		}
	}
	
	// Rest is commit message
	var messageLines []string
	for scanner.Scan() {
		messageLines = append(messageLines, scanner.Text())
	}
	commit.Message = strings.Join(messageLines, "\n")
	
	return commit, nil
}

func (or *ObjectReader) ReadTree(ctx context.Context, repo, sha string) (*TreeObject, error) {
	obj, err := or.ReadObject(ctx, repo, sha)
	if err != nil {
		return nil, err
	}

	if obj.Type != ObjectTypeTree {
		return nil, fmt.Errorf("expected tree object, got %s", obj.Type)
	}

	return ParseTreeObject(obj.Data)
}

func ParseTreeObject(data []byte) (*TreeObject, error) {
	tree := &TreeObject{}
	pos := 0
	
	for pos < len(data) {
		// Find space after mode
		spaceIndex := pos
		for spaceIndex < len(data) && data[spaceIndex] != ' ' {
			spaceIndex++
		}
		if spaceIndex >= len(data) {
			break
		}
		
		mode := string(data[pos:spaceIndex])
		pos = spaceIndex + 1
		
		// Find null terminator after name
		nullIndex := pos
		for nullIndex < len(data) && data[nullIndex] != 0 {
			nullIndex++
		}
		if nullIndex >= len(data) {
			break
		}
		
		name := string(data[pos:nullIndex])
		pos = nullIndex + 1
		
		// Read 20-byte SHA
		if pos+20 > len(data) {
			break
		}
		
		sha := fmt.Sprintf("%x", data[pos:pos+20])
		pos += 20
		
		tree.Entries = append(tree.Entries, TreeEntry{
			Mode: mode,
			Name: name,
			SHA:  sha,
		})
	}
	
	return tree, nil
}

// GetObjectsToSend traverses the commit graph and returns all objects needed
func (or *ObjectReader) GetObjectsToSend(ctx context.Context, repo string, wants []string, haves []string) ([]string, error) {
	// Simple implementation: for each wanted commit, get all reachable objects
	// TODO: Optimize by excluding objects reachable from 'haves'
	
	log.Printf("Getting objects to send for %d wants, %d haves", len(wants), len(haves))
	
	var allObjects []string
	visited := make(map[string]bool)
	
	for _, want := range wants {
		log.Printf("Processing want: %s", want)
		objects, err := or.getReachableObjects(ctx, repo, want, visited)
		if err != nil {
			log.Printf("Failed to get objects for %s: %v", want, err)
			return nil, fmt.Errorf("failed to get objects for %s: %w", want, err)
		}
		log.Printf("Found %d objects for want %s", len(objects), want)
		allObjects = append(allObjects, objects...)
	}
	
	log.Printf("Total objects to send: %d", len(allObjects))
	return allObjects, nil
}

func (or *ObjectReader) getReachableObjects(ctx context.Context, repo, sha string, visited map[string]bool) ([]string, error) {
	if visited[sha] {
		return nil, nil
	}
	visited[sha] = true
	
	obj, err := or.ReadObject(ctx, repo, sha)
	if err != nil {
		return nil, err
	}
	
	var objects []string
	objects = append(objects, sha)
	
	switch obj.Type {
	case ObjectTypeCommit:
		commit, err := ParseCommitObject(obj.Data)
		if err != nil {
			return nil, err
		}
		
		// Add tree objects
		treeObjects, err := or.getReachableObjects(ctx, repo, commit.Tree, visited)
		if err != nil {
			return nil, err
		}
		objects = append(objects, treeObjects...)
		
		// Add parent commits
		for _, parent := range commit.Parents {
			parentObjects, err := or.getReachableObjects(ctx, repo, parent, visited)
			if err != nil {
				return nil, err
			}
			objects = append(objects, parentObjects...)
		}
		
	case ObjectTypeTree:
		tree, err := ParseTreeObject(obj.Data)
		if err != nil {
			return nil, err
		}
		
		// Add all tree entries
		for _, entry := range tree.Entries {
			entryObjects, err := or.getReachableObjects(ctx, repo, entry.SHA, visited)
			if err != nil {
				return nil, err
			}
			objects = append(objects, entryObjects...)
		}
	}
	
	return objects, nil
}