summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authormo khan <mo.khan@gmail.com>2020-05-15 12:13:34 -0600
committermo khan <mo.khan@gmail.com>2020-05-15 12:13:34 -0600
commit81f8960379e2eb05f63275ca3d2caa9986d426cf (patch)
treedb0f6bed740e0ea4fed68ea2f598fdd13f9e3dc2 /lib
parent1d7e7d741cd6e7487bffc23d810a9616d5c3e599 (diff)
Load entries in index on demand
Diffstat (limited to 'lib')
-rw-r--r--lib/spandx/core/cache.rb5
-rw-r--r--lib/spandx/core/data_file.rb8
-rw-r--r--lib/spandx/core/git.rb2
-rw-r--r--lib/spandx/core/index_file.rb51
-rw-r--r--lib/spandx/core/relation.rb4
5 files changed, 30 insertions, 40 deletions
diff --git a/lib/spandx/core/cache.rb b/lib/spandx/core/cache.rb
index fa7896b..528e182 100644
--- a/lib/spandx/core/cache.rb
+++ b/lib/spandx/core/cache.rb
@@ -34,6 +34,11 @@ module Spandx
datafile_for(name).insert(name, version, licenses)
end
+ def insert!(*args)
+ insert(*args)
+ rebuild_index
+ end
+
def datafile_for(name)
datafiles.fetch(key_for(name))
end
diff --git a/lib/spandx/core/data_file.rb b/lib/spandx/core/data_file.rb
index d5d1e8b..14d7883 100644
--- a/lib/spandx/core/data_file.rb
+++ b/lib/spandx/core/data_file.rb
@@ -3,6 +3,8 @@
module Spandx
module Core
class DataFile
+ include Enumerable
+
attr_reader :absolute_path
def initialize(absolute_path)
@@ -13,9 +15,9 @@ module Spandx
def each
return unless exist?
- index.scan do |table|
- table.each do |row|
- yield row
+ open_file(mode: 'rb') do |io|
+ while (line = io.gets)
+ yield CsvParser.parse(line)
end
end
end
diff --git a/lib/spandx/core/git.rb b/lib/spandx/core/git.rb
index a4872ea..51949a0 100644
--- a/lib/spandx/core/git.rb
+++ b/lib/spandx/core/git.rb
@@ -33,7 +33,7 @@ module Spandx
end
def clone!
- system('git', 'clone', '--quiet', url, root)
+ system('git', 'clone', '--quiet', '--depth=1', '--single-branch', '--branch', 'master', url, root)
end
def pull!
diff --git a/lib/spandx/core/index_file.rb b/lib/spandx/core/index_file.rb
index 169a4f4..31b63c7 100644
--- a/lib/spandx/core/index_file.rb
+++ b/lib/spandx/core/index_file.rb
@@ -11,12 +11,13 @@ module Spandx
def initialize(data_file)
@data_file = data_file
@path = Pathname.new("#{data_file.absolute_path}.idx")
- @entries = {}
+ @entries = size.positive? ? Array.new(size) : []
end
def each
- data.each do |position|
- yield position
+ total = path.size / UINT_32_SIZE
+ total.times do |n|
+ yield position_for(n)
end
end
@@ -43,21 +44,19 @@ module Spandx
end
def size
- path.exist? ? path.size / UINT_32_SIZE : (data&.size || 0)
+ path.exist? ? path.size / UINT_32_SIZE : 0
end
def position_for(row_number)
- data.fetch(row_number)
+ return if row_number > size
- # @entries.fetch(row_number) do |key|
- # offset = row_number * 2
- # @entries[key] = IO.read(path, 2, offset, mode: 'rb').unpack1('v')
+ entry = entries[row_number]
+ return entry if entry
- # #@entries[key] = File.open(path, mode: 'rb') do |io|
- # #io.seek(row_number * 2)
- # #io.read(2).unpack1('v')
- # #end
- # end
+ bytes = IO.binread(path, UINT_32_SIZE, offset_for(row_number))
+ entry = bytes.unpack1(UINT_32_DIRECTIVE)
+ entries[row_number] = entry
+ entry
end
def scan
@@ -75,8 +74,10 @@ module Spandx
private
- def data
- @data ||= load
+ attr_reader :entries
+
+ def offset_for(row_number)
+ row_number * UINT_32_SIZE
end
def sort(data_file)
@@ -93,26 +94,6 @@ module Spandx
end
end
- def load
- return build_index_from_data_file unless path.exist?
-
- [].tap do |items|
- each_index do |position|
- items << position
- end
- end
- end
-
- def build_index_from_data_file
- data_file.open_file { |io| lines_in(io) }
- end
-
- def each_index
- File.open(path, mode: 'rb') do |io|
- yield io.read(UINT_32_SIZE).unpack1(UINT_32_DIRECTIVE) until io.eof?
- end
- end
-
def lines_in(io)
lines = [0]
io.seek(0)
diff --git a/lib/spandx/core/relation.rb b/lib/spandx/core/relation.rb
index 05ebd5b..164cdec 100644
--- a/lib/spandx/core/relation.rb
+++ b/lib/spandx/core/relation.rb
@@ -21,7 +21,9 @@ module Spandx
end
def row(number)
- offset = index.position_for(number)
+ offset = number.zero? ? 0 : index.position_for(number)
+ return unless offset
+
io.seek(offset)
parse_row(io.gets)
end