summaryrefslogtreecommitdiff
path: root/lib/gitem.rb
blob: bc79f67994ccf5b9e0d22dded6e636ff4a05f497 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# frozen_string_literal: true

require 'fileutils'
require 'json'
require 'rugged'
require 'time'

require_relative "gitem/version"

module Gitem
  class Error < StandardError; end

  class GitToJson
    def initialize(repo_path, output_dir = nil)
      @repo = Rugged::Repository.new(repo_path)
      @output_dir = output_dir || File.join(@repo.path, 'srv')
      @processed_trees = Set.new
      @processed_blobs = Set.new
    end

    def export!
      setup_directories
      export_branches
      export_tags
      export_commits
      export_repo_info
      puts "\n✓ Export complete! Files written to #{@output_dir}"
      puts "  Serve with: cd #{@output_dir} && ruby -run -e httpd . 8000"
    end

    private

    def setup_directories
      %w[commits trees blobs refs/heads refs/tags].each do |dir|
        FileUtils.mkdir_p(File.join(@output_dir, dir))
      end
    end

    def export_repo_info
      default_branch = default_branch_name
      branch = @repo.branches[default_branch]
      readme_content = nil
      readme_name = nil

      if branch&.target
        tree = branch.target.tree
        %w[README.md README.markdown readme.md README.txt README].each do |name|
          entry = tree.each.find { |e| e[:name].downcase == name.downcase }
          if entry && entry[:type] == :blob
            blob = @repo.lookup(entry[:oid])
            readme_content = blob.content.encode('UTF-8', invalid: :replace, undef: :replace) unless blob.binary?
            readme_name = entry[:name]
            break
          end
        end
      end

      info = {
        name: File.basename(@repo.workdir || @repo.path.chomp('/.git/').chomp('.git')),
        default_branch: default_branch,
        branches_count: @repo.branches.count { |b| b.name && !b.name.include?('/') },
        tags_count: @repo.tags.count,
        readme: readme_content,
        readme_name: readme_name,
        generated_at: Time.now.iso8601
      }
      write_json('repo.json', info)
    end

    def default_branch_name
      %w[main master].find { |name| @repo.branches[name] } || @repo.branches.first&.name || 'main'
    end

    def export_branches
      branches = @repo.branches.map do |branch|
        next if branch.name.nil? || branch.name.include?('/')
        target = branch.target rescue nil
        next unless target
        {
          name: branch.name,
          sha: target.oid,
          is_head: branch.head?,
          committed_at: target.committer[:time].iso8601,
          author: target.author[:name],
          message: target.message.lines.first&.strip || ''
        }
      end.compact.sort_by { |b| b[:is_head] ? 0 : 1 }

      write_json('branches.json', branches)
      branches.each { |b| write_json("refs/heads/#{b[:name]}.json", b) }
    end

    def export_tags
      tags = @repo.tags.map do |tag|
        target = tag.target rescue nil
        next unless target
        commit = target.is_a?(Rugged::Tag::Annotation) ? target.target : target
        {
          name: tag.name,
          sha: commit.oid,
          annotated: target.is_a?(Rugged::Tag::Annotation),
          message: target.is_a?(Rugged::Tag::Annotation) ? target.message : nil,
          committed_at: commit.committer[:time].iso8601
        }
      end.compact.sort_by { |t| t[:committed_at] }.reverse

      write_json('tags.json', tags)
      tags.each { |t| write_json("refs/tags/#{t[:name]}.json", t) }
    end

    def export_commits
      commits_list = []
      walker = Rugged::Walker.new(@repo)

      @repo.branches.each do |branch|
        next if branch.target.nil?
        walker.push(branch.target.oid) rescue nil
      end

      walker.sorting(Rugged::SORT_TOPO | Rugged::SORT_DATE)

      walker.each_with_index do |commit, idx|
        print "\rProcessing commit #{idx + 1}..." if (idx + 1) % 10 == 0

        commit_data = extract_commit(commit)
        commits_list << commit_data.slice(:sha, :short_sha, :message_headline, :author, :committed_at)

        write_json("commits/#{commit.oid}.json", commit_data)
        export_tree(commit.tree, '')
      end

      commits_list.sort_by! { |c| c[:committed_at] }.reverse!
      write_json('commits.json', commits_list)
      puts "\rProcessed #{commits_list.size} commits"
    end

    def extract_commit(commit)
      parents = commit.parents.map { |p| { sha: p.oid, short_sha: p.oid[0..6] } }
      diff_stats = commit.parents.empty? ? initial_diff_stats(commit) : parent_diff_stats(commit)

      {
        sha: commit.oid,
        short_sha: commit.oid[0..6],
        message: commit.message,
        message_headline: commit.message.lines.first&.strip || '',
        author: { name: commit.author[:name], email: commit.author[:email], date: commit.author[:time].iso8601 },
        committer: { name: commit.committer[:name], email: commit.committer[:email], date: commit.committer[:time].iso8601 },
        committed_at: commit.committer[:time].iso8601,
        parents: parents,
        tree_sha: commit.tree.oid,
        stats: diff_stats[:stats],
        files: diff_stats[:files]
      }
    end

    def initial_diff_stats(commit)
      files = []
      collect_tree_files(commit.tree, '', files)
      { stats: { additions: files.sum { |f| f[:additions] }, deletions: 0, changed: files.size }, files: files }
    end

    def collect_tree_files(tree, path, files)
      tree.each do |entry|
        full_path = path.empty? ? entry[:name] : "#{path}/#{entry[:name]}"
        if entry[:type] == :blob
          blob = @repo.lookup(entry[:oid])
          lines = blob.binary? ? 0 : blob.content.lines.count
          files << { path: full_path, additions: lines, deletions: 0, status: 'added' }
        elsif entry[:type] == :tree
          collect_tree_files(@repo.lookup(entry[:oid]), full_path, files)
        end
      end
    end

    def parent_diff_stats(commit)
      diff = commit.parents.first.diff(commit)
      files = []
      additions = deletions = 0

      diff.each_patch do |patch|
        file_adds = file_dels = 0
        patch.each_hunk { |h| h.each_line { |l| l.addition? ? file_adds += 1 : (file_dels += 1 if l.deletion?) } }
        additions += file_adds
        deletions += file_dels

        status = case patch.delta.status
        when :added then 'added'
        when :deleted then 'deleted'
        when :renamed then 'renamed'
        else 'modified'
        end

        files << { path: patch.delta.new_file[:path], additions: file_adds, deletions: file_dels, status: status }
      end

      { stats: { additions: additions, deletions: deletions, changed: files.size }, files: files }
    end

    def export_tree(tree, path)
      return if @processed_trees.include?(tree.oid)
      @processed_trees.add(tree.oid)

      entries = tree.map do |entry|
        entry_data = {
          name: entry[:name],
          path: path.empty? ? entry[:name] : "#{path}/#{entry[:name]}",
          type: entry[:type].to_s,
          sha: entry[:oid],
          mode: entry[:filemode].to_s(8)
        }

        if entry[:type] == :tree
          export_tree(@repo.lookup(entry[:oid]), entry_data[:path])
        elsif entry[:type] == :blob
          export_blob(entry[:oid], entry_data[:path])
        end

        entry_data
      end

      write_json("trees/#{tree.oid}.json", { sha: tree.oid, path: path, entries: entries })
    end

    def export_blob(oid, path)
      return if @processed_blobs.include?(oid)
      @processed_blobs.add(oid)

      blob = @repo.lookup(oid)
      data = {
        sha: oid,
        path: path,
        size: blob.size,
        binary: blob.binary?,
        content: blob.binary? ? nil : safe_content(blob.content),
        truncated: !blob.binary? && blob.size > 100_000
      }

      write_json("blobs/#{oid}.json", data)
    end

    def safe_content(content)
      return content[0..100_000] + "\n... [truncated]" if content.size > 100_000
      content.encode('UTF-8', invalid: :replace, undef: :replace, replace: '�')
    end

    def write_json(path, data)
      File.write(File.join(@output_dir, path), JSON.pretty_generate(data))
    end
  end
end