package git import ( "bufio" "context" "fmt" "log" "strconv" "strings" ) type ObjectType string const ( ObjectTypeCommit ObjectType = "commit" ObjectTypeTree ObjectType = "tree" ObjectTypeBlob ObjectType = "blob" ObjectTypeTag ObjectType = "tag" ) type GitObject struct { Type ObjectType Size int64 Data []byte SHA string } type CommitObject struct { Tree string Parents []string Author string Committer string Message string } type TreeEntry struct { Mode string Name string SHA string } type TreeObject struct { Entries []TreeEntry } // StorageBackend interface for accessing Git objects type StorageBackend interface { GetObject(ctx context.Context, repo, objectID string) ([]byte, error) } type ObjectReader struct { storage StorageBackend } func NewObjectReader(storage StorageBackend) *ObjectReader { return &ObjectReader{storage: storage} } func (or *ObjectReader) ReadObject(ctx context.Context, repo, sha string) (*GitObject, error) { log.Printf("Reading object %s from repo %s", sha, repo) data, err := or.storage.GetObject(ctx, repo, sha) if err != nil { log.Printf("Failed to read object %s from S3: %v", sha, err) return nil, fmt.Errorf("failed to read object %s: %w", sha, err) } log.Printf("Successfully read object %s, size: %d bytes", sha, len(data)) obj, err := ParseGitObject(data, sha) if err != nil { log.Printf("Failed to parse object %s: %v", sha, err) return nil, err } log.Printf("Successfully parsed object %s, type: %s", sha, obj.Type) return obj, nil } func ParseGitObject(data []byte, sha string) (*GitObject, error) { // Git objects are stored as: "type size\0content" nullIndex := -1 for i, b := range data { if b == 0 { nullIndex = i break } } if nullIndex == -1 { return nil, fmt.Errorf("invalid git object: no null terminator") } header := string(data[:nullIndex]) content := data[nullIndex+1:] parts := strings.Fields(header) if len(parts) != 2 { return nil, fmt.Errorf("invalid git object header: %s", header) } objType := ObjectType(parts[0]) size, err := strconv.ParseInt(parts[1], 10, 64) if err != nil { return nil, fmt.Errorf("invalid object size: %s", parts[1]) } return &GitObject{ Type: objType, Size: size, Data: content, SHA: sha, }, nil } func (or *ObjectReader) ReadCommit(ctx context.Context, repo, sha string) (*CommitObject, error) { obj, err := or.ReadObject(ctx, repo, sha) if err != nil { return nil, err } if obj.Type != ObjectTypeCommit { return nil, fmt.Errorf("expected commit object, got %s", obj.Type) } return ParseCommitObject(obj.Data) } func ParseCommitObject(data []byte) (*CommitObject, error) { commit := &CommitObject{} scanner := bufio.NewScanner(strings.NewReader(string(data))) // Parse header lines for scanner.Scan() { line := scanner.Text() if line == "" { break // End of headers, commit message follows } if strings.HasPrefix(line, "tree ") { commit.Tree = strings.TrimPrefix(line, "tree ") } else if strings.HasPrefix(line, "parent ") { commit.Parents = append(commit.Parents, strings.TrimPrefix(line, "parent ")) } else if strings.HasPrefix(line, "author ") { commit.Author = strings.TrimPrefix(line, "author ") } else if strings.HasPrefix(line, "committer ") { commit.Committer = strings.TrimPrefix(line, "committer ") } } // Rest is commit message var messageLines []string for scanner.Scan() { messageLines = append(messageLines, scanner.Text()) } commit.Message = strings.Join(messageLines, "\n") return commit, nil } func (or *ObjectReader) ReadTree(ctx context.Context, repo, sha string) (*TreeObject, error) { obj, err := or.ReadObject(ctx, repo, sha) if err != nil { return nil, err } if obj.Type != ObjectTypeTree { return nil, fmt.Errorf("expected tree object, got %s", obj.Type) } return ParseTreeObject(obj.Data) } func ParseTreeObject(data []byte) (*TreeObject, error) { tree := &TreeObject{} pos := 0 for pos < len(data) { // Find space after mode spaceIndex := pos for spaceIndex < len(data) && data[spaceIndex] != ' ' { spaceIndex++ } if spaceIndex >= len(data) { break } mode := string(data[pos:spaceIndex]) pos = spaceIndex + 1 // Find null terminator after name nullIndex := pos for nullIndex < len(data) && data[nullIndex] != 0 { nullIndex++ } if nullIndex >= len(data) { break } name := string(data[pos:nullIndex]) pos = nullIndex + 1 // Read 20-byte SHA if pos+20 > len(data) { break } sha := fmt.Sprintf("%x", data[pos:pos+20]) pos += 20 tree.Entries = append(tree.Entries, TreeEntry{ Mode: mode, Name: name, SHA: sha, }) } return tree, nil } // GetObjectsToSend traverses the commit graph and returns all objects needed func (or *ObjectReader) GetObjectsToSend(ctx context.Context, repo string, wants []string, haves []string) ([]string, error) { // Simple implementation: for each wanted commit, get all reachable objects // TODO: Optimize by excluding objects reachable from 'haves' log.Printf("Getting objects to send for %d wants, %d haves", len(wants), len(haves)) var allObjects []string visited := make(map[string]bool) for _, want := range wants { log.Printf("Processing want: %s", want) objects, err := or.getReachableObjects(ctx, repo, want, visited) if err != nil { log.Printf("Failed to get objects for %s: %v", want, err) return nil, fmt.Errorf("failed to get objects for %s: %w", want, err) } log.Printf("Found %d objects for want %s", len(objects), want) allObjects = append(allObjects, objects...) } log.Printf("Total objects to send: %d", len(allObjects)) return allObjects, nil } func (or *ObjectReader) getReachableObjects(ctx context.Context, repo, sha string, visited map[string]bool) ([]string, error) { if visited[sha] { return nil, nil } visited[sha] = true obj, err := or.ReadObject(ctx, repo, sha) if err != nil { return nil, err } var objects []string objects = append(objects, sha) switch obj.Type { case ObjectTypeCommit: commit, err := ParseCommitObject(obj.Data) if err != nil { return nil, err } // Add tree objects treeObjects, err := or.getReachableObjects(ctx, repo, commit.Tree, visited) if err != nil { return nil, err } objects = append(objects, treeObjects...) // Add parent commits for _, parent := range commit.Parents { parentObjects, err := or.getReachableObjects(ctx, repo, parent, visited) if err != nil { return nil, err } objects = append(objects, parentObjects...) } case ObjectTypeTree: tree, err := ParseTreeObject(obj.Data) if err != nil { return nil, err } // Add all tree entries for _, entry := range tree.Entries { entryObjects, err := or.getReachableObjects(ctx, repo, entry.SHA, visited) if err != nil { return nil, err } objects = append(objects, entryObjects...) } } return objects, nil }