diff options
| author | mo khan <mo.khan@gmail.com> | 2020-05-10 21:18:26 -0600 |
|---|---|---|
| committer | mo khan <mo.khan@gmail.com> | 2020-05-10 21:21:05 -0600 |
| commit | 727e48b57c8c1ca14240fdb425b4d3654a7b00e7 (patch) | |
| tree | 524c7ad853100b92897c048a46425547a4b0510a | |
| parent | c19558661b919522950a8f3f8006fd647f18de22 (diff) | |
Optimize processing of index file
| -rw-r--r-- | lib/spandx/core/data_file.rb | 10 | ||||
| -rw-r--r-- | lib/spandx/core/index_file.rb | 48 | ||||
| -rw-r--r-- | spec/integration/core/cache_spec.rb | 20 |
3 files changed, 41 insertions, 37 deletions
diff --git a/lib/spandx/core/data_file.rb b/lib/spandx/core/data_file.rb index e958153..9ca8e95 100644 --- a/lib/spandx/core/data_file.rb +++ b/lib/spandx/core/data_file.rb @@ -14,13 +14,15 @@ module Spandx return unless exist? open_file do |io| - loop { yield parse_row(io) } + while (line = io.gets) + yield ::CsvParser.parse_line(line) + end end end def search(name:, version:) open_file do |io| - search_for("#{name}-#{version}", io, index) + search_for("#{name}-#{version}", io, index.data) end end @@ -36,7 +38,7 @@ module Spandx absolute_path.exist? end - def open_file(mode: 'r') + def open_file(mode: 'rb') absolute_path.open(mode) { |io| yield io } rescue EOFError => error Spandx.logger.error(error) @@ -56,7 +58,7 @@ module Spandx return if lines.empty? mid = lines.size == 1 ? 0 : lines.size / 2 - io.seek(lines[mid].to_i) + io.seek(lines[mid]) comparison = matches?(term, parse_row(io)) do |row| return row end diff --git a/lib/spandx/core/index_file.rb b/lib/spandx/core/index_file.rb index f8833ff..87158d9 100644 --- a/lib/spandx/core/index_file.rb +++ b/lib/spandx/core/index_file.rb @@ -10,26 +10,8 @@ module Spandx @path = Pathname.new("#{data_file.absolute_path}.lines") end - def each - data.each do |position| - yield position - end - end - - def size - data.size - end - - def empty? - data.empty? - end - - def [](index) - data[index] - end - - def slice(min, max) - data.slice(min, max) + def data + @data ||= load end def update! @@ -46,23 +28,25 @@ module Spandx end def rebuild_index! - data_file.open_file do |io| - lines = lines_in(io) - path.write(lines.map(&:to_s).join(',')) - @data = lines + data_file.open_file do |data_io| + Zlib::GzipWriter.open(path) do |index_io| + lines_in(data_io).each do |line| + index_io.write([line].pack('v')) + end + end end end - def data - @data ||= load - end - def load - if path.exist? - FastestCSV.parse_line(path.read).map(&:to_i) - else - data_file.open_file { |io| lines_in(io) } + return data_file.open_file { |io| lines_in(io) } unless path.exist? + + [].tap do |items| + Zlib::GzipReader.open(path) do |io| + items << io.read(2).unpack1('v') until io.eof? + end end + rescue Zlib::GzipFile::Error + data_file.open_file { |io| lines_in(io) } end def lines_in(io) diff --git a/spec/integration/core/cache_spec.rb b/spec/integration/core/cache_spec.rb index 3eb9de3..a982fde 100644 --- a/spec/integration/core/cache_spec.rb +++ b/spec/integration/core/cache_spec.rb @@ -111,7 +111,7 @@ RSpec.describe Spandx::Core::Cache do it 'builds an index that contains the seek position for the start of each line' do data_file = subject.datafile_for('spandx') data_file.open_file do |io| - data_file.index.each do |position| + data_file.index.data.each do |position| unless position.zero? io.seek(position - 1) expect(io.readchar).to eql("\n") @@ -161,6 +161,24 @@ RSpec.describe Spandx::Core::Cache do it 'yields each item quickly' do expect { subject.take(100_000).count }.to perform_under(0.1).sample(10) end + + xit 'profiles each option' do + datafile = Spandx::Core::DataFile.new('~/.local/share/spandx/rubygems-cache/.index/02/rubygems') + Benchmark.ips do |x| + x.report('fastest-csv') { FastestCSV.foreach(datafile.absolute_path) { |y| } } + x.report('manual-gets') do + datafile.open_file(mode: 'rb') do |io| + while (x = io.gets) + ::CsvParser.parse_line(x) + end + end + end + x.report('manual-eof') { datafile.open_file { |io| FastestCSV.parse_line(io.readline) until io.eof? } } + x.report('manual-exception') { datafile.open_file { |io| loop { FastestCSV.parse_line(io.readline) } } } + + x.compare! + end + end end end end |
