summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormo khan <mo.khan@gmail.com>2020-05-10 21:18:26 -0600
committermo khan <mo.khan@gmail.com>2020-05-10 21:21:05 -0600
commit727e48b57c8c1ca14240fdb425b4d3654a7b00e7 (patch)
tree524c7ad853100b92897c048a46425547a4b0510a
parentc19558661b919522950a8f3f8006fd647f18de22 (diff)
Optimize processing of index file
-rw-r--r--lib/spandx/core/data_file.rb10
-rw-r--r--lib/spandx/core/index_file.rb48
-rw-r--r--spec/integration/core/cache_spec.rb20
3 files changed, 41 insertions, 37 deletions
diff --git a/lib/spandx/core/data_file.rb b/lib/spandx/core/data_file.rb
index e958153..9ca8e95 100644
--- a/lib/spandx/core/data_file.rb
+++ b/lib/spandx/core/data_file.rb
@@ -14,13 +14,15 @@ module Spandx
return unless exist?
open_file do |io|
- loop { yield parse_row(io) }
+ while (line = io.gets)
+ yield ::CsvParser.parse_line(line)
+ end
end
end
def search(name:, version:)
open_file do |io|
- search_for("#{name}-#{version}", io, index)
+ search_for("#{name}-#{version}", io, index.data)
end
end
@@ -36,7 +38,7 @@ module Spandx
absolute_path.exist?
end
- def open_file(mode: 'r')
+ def open_file(mode: 'rb')
absolute_path.open(mode) { |io| yield io }
rescue EOFError => error
Spandx.logger.error(error)
@@ -56,7 +58,7 @@ module Spandx
return if lines.empty?
mid = lines.size == 1 ? 0 : lines.size / 2
- io.seek(lines[mid].to_i)
+ io.seek(lines[mid])
comparison = matches?(term, parse_row(io)) do |row|
return row
end
diff --git a/lib/spandx/core/index_file.rb b/lib/spandx/core/index_file.rb
index f8833ff..87158d9 100644
--- a/lib/spandx/core/index_file.rb
+++ b/lib/spandx/core/index_file.rb
@@ -10,26 +10,8 @@ module Spandx
@path = Pathname.new("#{data_file.absolute_path}.lines")
end
- def each
- data.each do |position|
- yield position
- end
- end
-
- def size
- data.size
- end
-
- def empty?
- data.empty?
- end
-
- def [](index)
- data[index]
- end
-
- def slice(min, max)
- data.slice(min, max)
+ def data
+ @data ||= load
end
def update!
@@ -46,23 +28,25 @@ module Spandx
end
def rebuild_index!
- data_file.open_file do |io|
- lines = lines_in(io)
- path.write(lines.map(&:to_s).join(','))
- @data = lines
+ data_file.open_file do |data_io|
+ Zlib::GzipWriter.open(path) do |index_io|
+ lines_in(data_io).each do |line|
+ index_io.write([line].pack('v'))
+ end
+ end
end
end
- def data
- @data ||= load
- end
-
def load
- if path.exist?
- FastestCSV.parse_line(path.read).map(&:to_i)
- else
- data_file.open_file { |io| lines_in(io) }
+ return data_file.open_file { |io| lines_in(io) } unless path.exist?
+
+ [].tap do |items|
+ Zlib::GzipReader.open(path) do |io|
+ items << io.read(2).unpack1('v') until io.eof?
+ end
end
+ rescue Zlib::GzipFile::Error
+ data_file.open_file { |io| lines_in(io) }
end
def lines_in(io)
diff --git a/spec/integration/core/cache_spec.rb b/spec/integration/core/cache_spec.rb
index 3eb9de3..a982fde 100644
--- a/spec/integration/core/cache_spec.rb
+++ b/spec/integration/core/cache_spec.rb
@@ -111,7 +111,7 @@ RSpec.describe Spandx::Core::Cache do
it 'builds an index that contains the seek position for the start of each line' do
data_file = subject.datafile_for('spandx')
data_file.open_file do |io|
- data_file.index.each do |position|
+ data_file.index.data.each do |position|
unless position.zero?
io.seek(position - 1)
expect(io.readchar).to eql("\n")
@@ -161,6 +161,24 @@ RSpec.describe Spandx::Core::Cache do
it 'yields each item quickly' do
expect { subject.take(100_000).count }.to perform_under(0.1).sample(10)
end
+
+ xit 'profiles each option' do
+ datafile = Spandx::Core::DataFile.new('~/.local/share/spandx/rubygems-cache/.index/02/rubygems')
+ Benchmark.ips do |x|
+ x.report('fastest-csv') { FastestCSV.foreach(datafile.absolute_path) { |y| } }
+ x.report('manual-gets') do
+ datafile.open_file(mode: 'rb') do |io|
+ while (x = io.gets)
+ ::CsvParser.parse_line(x)
+ end
+ end
+ end
+ x.report('manual-eof') { datafile.open_file { |io| FastestCSV.parse_line(io.readline) until io.eof? } }
+ x.report('manual-exception') { datafile.open_file { |io| loop { FastestCSV.parse_line(io.readline) } } }
+
+ x.compare!
+ end
+ end
end
end
end