diff options
| author | mo khan <mo@mokhan.ca> | 2026-01-30 18:16:31 -0700 |
|---|---|---|
| committer | mo khan <mo@mokhan.ca> | 2026-01-30 18:16:31 -0700 |
| commit | feee7d43ef63ae607c6fd4cca88a356a93553ebe (patch) | |
| tree | 2969055a894dc4e72d8d79a9ac74cc30d78aff64 /internal/git | |
| parent | e0db8f82e96acadf6968e0cf9c805a7b22d835db (diff) | |
refactor: move packages to internal/
Diffstat (limited to 'internal/git')
| -rw-r--r-- | internal/git/git.go | 365 | ||||
| -rw-r--r-- | internal/git/types.go | 76 | ||||
| -rw-r--r-- | internal/git/utils.go | 91 |
3 files changed, 532 insertions, 0 deletions
diff --git a/internal/git/git.go b/internal/git/git.go new file mode 100644 index 0000000..1e05d60 --- /dev/null +++ b/internal/git/git.go @@ -0,0 +1,365 @@ +package git + +import ( + "bufio" + "fmt" + "io" + "os/exec" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" +) + +func Branches(repoDir string, filter *regexp.Regexp, defaultBranch string) ([]Ref, error) { + cmd := exec.Command("git", "for-each-ref", "--format=%(refname:short)", "refs/heads/") + if repoDir != "" { + cmd.Dir = repoDir + } + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("failed to list branches: %w", err) + } + lines := strings.Split(string(out), "\n") + branches := make([]Ref, 0, len(lines)) + for _, line := range lines { + if line == "" { + continue + } + + if filter != nil && !filter.MatchString(line) && line != defaultBranch { + continue + } + branches = append(branches, NewRef(line)) + } + return branches, nil +} + +func Tags(repoDir string) ([]Tag, error) { + format := []string{ + "%(refname:short)", // tag name + "%(creatordate:unix)", // creation date + "%(objectname)", // commit hash for lightweight tags + "%(*objectname)", // peeled object => commit hash + } + args := []string{ + "for-each-ref", + "--sort=-creatordate", + "--format=" + strings.Join(format, "%00"), + "refs/tags", + } + cmd := exec.Command("git", args...) + if repoDir != "" { + cmd.Dir = repoDir + } + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("failed to list tags: %w", err) + } + + lines := strings.Split(strings.TrimSpace(string(out)), "\n") + tags := make([]Tag, 0, len(lines)) + + for _, line := range lines { + if line == "" { + continue + } + parts := strings.Split(line, "\x00") + if len(parts) != len(format) { + continue + } + name, timestamp, objectName, commitHash := parts[0], parts[1], parts[2], parts[3] + timestampInt, err := strconv.Atoi(timestamp) + if err != nil { + return nil, fmt.Errorf("failed to parse tag creation date: %w", err) + } + if commitHash == "" { + commitHash = objectName // tag is lightweight + } + tags = append(tags, Tag{ + Name: name, + Date: time.Unix(int64(timestampInt), 0), + CommitHash: commitHash, + }) + } + + return tags, nil +} + +func Files(ref Ref, repoDir string) ([]Blob, error) { + if ref.IsEmpty() { + ref = NewRef("HEAD") + } + + // -r: recurse into subtrees + // -l: include blob size + cmd := exec.Command("git", "ls-tree", "--full-tree", "-r", "-l", ref.String()) + if repoDir != "" { + cmd.Dir = repoDir + } + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("failed to get stdout pipe: %w", err) + } + + stderr, err := cmd.StderrPipe() + if err != nil { + return nil, fmt.Errorf("failed to get stderr pipe: %w", err) + } + + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("failed to start git ls-tree: %w", err) + } + + files := make([]Blob, 0, 256) + + // Read stdout line by line; each line is like: + // <mode> <type> <object> <size>\t<path> + // Example: "100644 blob e69de29... 12\tREADME.md" + scanner := bufio.NewScanner(stdout) + + // Allow long paths by increasing the scanner buffer limit + scanner.Buffer(make([]byte, 0, 64*1024), 2*1024*1024) + + for scanner.Scan() { + line := scanner.Text() + if line == "" { + continue + } + + // Split header and path using the tab delimiter + // to preserve spaces in file names + tab := strings.IndexByte(line, '\t') + if tab == -1 { + return nil, fmt.Errorf("expected tab delimiter in ls-tree output: %s", line) + } + header := line[:tab] + path := line[tab+1:] + + // header fields: mode, type, object, size + parts := strings.Fields(header) + if len(parts) < 4 { + return nil, fmt.Errorf("unexpected ls-tree output format: %s", line) + } + modeNumber := parts[0] + typ := parts[1] + // object := parts[2] + sizeStr := parts[3] + + if typ != "blob" { + // We only care about files (blobs) + continue + } + + // Size could be "-" for non-blobs in some forms; + // for blobs it should be a number. + size, err := strconv.ParseInt(sizeStr, 10, 64) + if err != nil { + return nil, err + } + + mode, err := ParseFileMode(modeNumber) + if err != nil { + return nil, err + } + + files = append(files, Blob{ + Ref: ref, + Mode: mode, + Path: path, + FileName: filepath.Base(path), + Size: size, + }) + } + + if err := scanner.Err(); err != nil { + // Drain stderr to include any git error message + _ = cmd.Wait() + b, _ := io.ReadAll(stderr) + if len(b) > 0 { + return nil, fmt.Errorf("failed to read ls-tree output: %v: %s", err, string(b)) + } + return nil, fmt.Errorf("failed to read ls-tree output: %w", err) + } + + // Ensure the command completed successfully + if err := cmd.Wait(); err != nil { + b, _ := io.ReadAll(stderr) + if len(b) > 0 { + return nil, fmt.Errorf("git ls-tree %q failed: %v: %s", ref, err, string(b)) + } + return nil, fmt.Errorf("git ls-tree %q failed: %w", ref, err) + } + + return files, nil +} + +func BlobContent(ref Ref, path string, repoDir string) ([]byte, bool, error) { + if ref.IsEmpty() { + ref = NewRef("HEAD") + } + // Use `git show ref:path` to get the blob content at that ref + cmd := exec.Command("git", "show", ref.String()+":"+path) + if repoDir != "" { + cmd.Dir = repoDir + } + out, err := cmd.Output() + if err != nil { + // include stderr if available + if ee, ok := err.(*exec.ExitError); ok { + return nil, false, fmt.Errorf("git show failed: %v: %s", err, string(ee.Stderr)) + } + return nil, false, fmt.Errorf("git show failed: %w", err) + } + return out, IsBinary(out), nil +} + +func Commits(ref Ref, repoDir string) ([]Commit, error) { + format := []string{ + "%H", // commit hash + "%h", // abbreviated commit hash + "%s", // subject + "%b", // body + "%an", // author name + "%ae", // author email + "%ad", // author date + "%cn", // committer name + "%ce", // committer email + "%cd", // committer date + "%P", // parent hashes + "%D", // ref names without the "(", ")" wrapping. + } + + args := []string{ + "log", + "--date=unix", + "--pretty=format:" + strings.Join(format, "\x1F"), + "-z", // Separate the commits with NULs instead of newlines + ref.String(), + } + + cmd := exec.Command("git", args...) + if repoDir != "" { + cmd.Dir = repoDir + } + + out, err := cmd.Output() + if err != nil { + return nil, err + } + + lines := strings.Split(string(out), "\x00") + commits := make([]Commit, 0, len(lines)) + for _, line := range lines { + if line == "" { + continue + } + parts := strings.Split(line, "\x1F") + if len(parts) != len(format) { + return nil, fmt.Errorf("unexpected commit format: %s", line) + } + full, short, subject, body, author, email, date := + parts[0], parts[1], parts[2], parts[3], parts[4], parts[5], parts[6] + committerName, committerEmail, committerDate, parents, refs := + parts[7], parts[8], parts[9], parts[10], parts[11] + timestamp, err := strconv.Atoi(date) + if err != nil { + return nil, fmt.Errorf("failed to parse commit date: %w", err) + } + committerTimestamp, err := strconv.Atoi(committerDate) + if err != nil { + return nil, fmt.Errorf("failed to parse committer date: %w", err) + } + commits = append(commits, Commit{ + Hash: full, + ShortHash: short, + Subject: subject, + Body: body, + Author: author, + Email: email, + Date: time.Unix(int64(timestamp), 0), + CommitterName: committerName, + CommitterEmail: committerEmail, + CommitterDate: time.Unix(int64(committerTimestamp), 0), + Parents: strings.Fields(parents), + RefNames: parseRefNames(refs), + }) + } + return commits, nil +} + +func parseRefNames(refNames string) []RefName { + refNames = strings.TrimSpace(refNames) + if refNames == "" { + return nil + } + + parts := strings.Split(refNames, ", ") + out := make([]RefName, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p == "" { + continue + } + + // tag: v1.2.3 + if strings.HasPrefix(p, "tag: ") { + out = append(out, RefName{ + Kind: RefKindTag, + Name: strings.TrimSpace(strings.TrimPrefix(p, "tag: ")), + }) + continue + } + + // HEAD -> main + if strings.HasPrefix(p, "HEAD -> ") { + out = append(out, RefName{ + Kind: RefKindHEAD, + Name: "HEAD", + Target: strings.TrimSpace(strings.TrimPrefix(p, "HEAD -> ")), + }) + continue + } + + // origin/HEAD -> origin/main + if strings.Contains(p, " -> ") && strings.HasSuffix(strings.SplitN(p, " -> ", 2)[0], "/HEAD") { + leftRight := strings.SplitN(p, " -> ", 2) + out = append(out, RefName{ + Kind: RefKindRemoteHEAD, + Name: strings.TrimSpace(leftRight[0]), + Target: strings.TrimSpace(leftRight[1]), + }) + continue + } + + // Remote branch like origin/main + if strings.Contains(p, "/") { + out = append(out, RefName{ + Kind: RefKindRemote, + Name: p, + }) + continue + } + + // Local branch + out = append(out, RefName{ + Kind: RefKindBranch, + Name: p, + }) + } + return out +} + +func CommitDiff(hash, repoDir string) (string, error) { + // unified diff without a commit header + cmd := exec.Command("git", "show", "--pretty=format:", "--patch", hash) + if repoDir != "" { + cmd.Dir = repoDir + } + out, err := cmd.Output() + if err != nil { + return "", err + } + return string(out), nil +} diff --git a/internal/git/types.go b/internal/git/types.go new file mode 100644 index 0000000..beecfa4 --- /dev/null +++ b/internal/git/types.go @@ -0,0 +1,76 @@ +package git + +import ( + "time" +) + +type Ref struct { + ref string + dirName string +} + +func NewRef(ref string) Ref { + return Ref{ + ref: ref, + dirName: RefToFileName(ref), + } +} + +func (r Ref) IsEmpty() bool { + return r.ref == "" +} + +func (r Ref) String() string { + return r.ref +} + +func (r Ref) DirName() string { + return r.dirName +} + +type Blob struct { + Ref Ref + Mode string + Path string + FileName string + Size int64 +} + +type Commit struct { + Hash string + ShortHash string + Subject string + Body string + Author string + Email string + Date time.Time + CommitterName string + CommitterEmail string + CommitterDate time.Time + Parents []string + Branch Ref + RefNames []RefName + Href string +} + +type RefKind string + +const ( + RefKindHEAD RefKind = "HEAD" + RefKindRemoteHEAD RefKind = "RemoteHEAD" + RefKindBranch RefKind = "Branch" + RefKindRemote RefKind = "Remote" + RefKindTag RefKind = "Tag" +) + +type RefName struct { + Kind RefKind + Name string // Name is the primary name of the ref as shown by `git log %D` token (left side for pointers) + Target string // Target is set for symbolic refs like "HEAD -> main" or "origin/HEAD -> origin/main" +} + +type Tag struct { + Name string + Date time.Time + CommitHash string +} diff --git a/internal/git/utils.go b/internal/git/utils.go new file mode 100644 index 0000000..68e4497 --- /dev/null +++ b/internal/git/utils.go @@ -0,0 +1,91 @@ +package git + +import ( + "fmt" + "strconv" + "strings" +) + +// ParseFileMode converts a git-style file mode (e.g. "100644") +// into a human-readable string like "rw-r--r--". +func ParseFileMode(modeStr string) (string, error) { + // Git modes are typically 6 digits. The last 3 represent permissions. + // e.g. 100644 → 644 + if len(modeStr) < 3 { + return "", fmt.Errorf("invalid mode: %s", modeStr) + } + + permStr := modeStr[len(modeStr)-3:] + permVal, err := strconv.Atoi(permStr) + if err != nil { + return "", err + } + + return numericPermToLetters(permVal), nil +} + +func numericPermToLetters(perm int) string { + // Map each octal digit to rwx letters + lookup := map[int]string{ + 0: "---", + 1: "--x", + 2: "-w-", + 3: "-wx", + 4: "r--", + 5: "r-x", + 6: "rw-", + 7: "rwx", + } + + u := perm / 100 // user + g := (perm / 10) % 10 // group + o := perm % 10 // others + + return lookup[u] + lookup[g] + lookup[o] +} + +// IsBinary performs a heuristic check to determine if data is binary. +// Rules: +// - Any NUL byte => binary +// - Consider only a sample (up to 8 KiB). If >30% of bytes are control characters +// outside the common whitespace/newline range, treat as binary. +func IsBinary(b []byte) bool { + n := len(b) + if n == 0 { + return false + } + if n > 8192 { + n = 8192 + } + sample := b[:n] + bad := 0 + for _, c := range sample { + if c == 0x00 { + return true + } + // Allow common whitespace and control: tab(9), LF(10), CR(13) + if c == 9 || c == 10 || c == 13 { + continue + } + // Count other control chars and DEL as non-text + if c < 32 || c == 127 { + bad++ + } + } + // If more than 30% of sampled bytes are non-text, consider binary + return bad*100 > n*30 +} + +func RefToFileName(ref string) string { + var result strings.Builder + for _, c := range ref { + if (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' || c == '.' { + result.WriteByte(byte(c)) + } else if c >= 'A' && c <= 'Z' { + result.WriteByte(byte(c - 'A' + 'a')) + } else { + result.WriteByte('-') + } + } + return result.String() +} |
