diff options
| -rw-r--r-- | Gemfile.lock | 4 | ||||
| -rwxr-xr-x | bin/csv-benchmark | 28 | ||||
| -rw-r--r-- | lib/spandx.rb | 1 | ||||
| -rw-r--r-- | lib/spandx/core/datafile.rb | 11 | ||||
| -rw-r--r-- | spandx.gemspec | 2 | ||||
| -rw-r--r-- | spec/integration/core/cache_spec.rb | 30 | ||||
| -rw-r--r-- | spec/spec_helper.rb | 1 | ||||
| -rw-r--r-- | spec/unit/ruby/parsers/gemfile_lock_spec.rb | 2 |
8 files changed, 65 insertions, 14 deletions
diff --git a/Gemfile.lock b/Gemfile.lock index 4f83d52..f382551 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -4,6 +4,7 @@ PATH spandx (0.12.3) addressable (~> 2.7) bundler (>= 1.16, < 3.0.0) + fastest-csv (~> 0.0) net-hippie (~> 0.3) nokogiri (~> 1.10) parslet (~> 2.0) @@ -16,6 +17,7 @@ GEM addressable (2.7.0) public_suffix (>= 2.0.2, < 5.0) ast (2.4.0) + benchmark-ips (2.8.2) benchmark-malloc (0.1.0) benchmark-perf (0.5.0) benchmark-trend (0.3.0) @@ -29,6 +31,7 @@ GEM dotenv (2.7.5) faraday (1.0.0) multipart-post (>= 1.2, < 3) + fastest-csv (0.0.4) hashdiff (1.0.1) jaro_winkler (1.5.4) licensed (2.8.0) @@ -114,6 +117,7 @@ PLATFORMS ruby DEPENDENCIES + benchmark-ips (~> 2.8) bundler-audit (~> 0.6) byebug (~> 11.1) licensed (~> 2.8) diff --git a/bin/csv-benchmark b/bin/csv-benchmark new file mode 100755 index 0000000..f7573f2 --- /dev/null +++ b/bin/csv-benchmark @@ -0,0 +1,28 @@ +#!/usr/bin/env ruby + +require 'bundler/inline' + +gemfile do + source 'https://rubygems.org' + + gem 'benchmark-ips', '~> 2.8' + gem 'fastcsv', '~> 0.0' + gem 'fastest-csv' +end + +require 'benchmark/ips' +require 'csv' +require 'fastcsv' +require 'fastest-csv' + +csv = "\"spandx\",\"0.0.0\",\"MIT\"" + +Benchmark.ips do |x| + x.report("CSV.parse") { CSV.parse(csv)[0] } + x.report("csv.split") { csv.split(',', 3) } + x.report("csv.split-with-slice") { csv.chomp.split(',', 3).slice(1...-1) } + x.report("csv.regex") { csv.scan(/"(\S+)","*(\d+.\d+.\d+)","(\S+)"/)[0] } + x.report("csv.fastcsv") { FastCSV.raw_parse(csv) { |x| x } } + x.report("csv.fastestcsv") { FastestCSV.parse_line(csv) } + x.compare! +end diff --git a/lib/spandx.rb b/lib/spandx.rb index 6930794..9031c49 100644 --- a/lib/spandx.rb +++ b/lib/spandx.rb @@ -3,6 +3,7 @@ require 'addressable/uri' require 'bundler' require 'csv' +require 'fastest-csv' require 'forwardable' require 'json' require 'logger' diff --git a/lib/spandx/core/datafile.rb b/lib/spandx/core/datafile.rb index f0a0a34..1f08f92 100644 --- a/lib/spandx/core/datafile.rb +++ b/lib/spandx/core/datafile.rb @@ -15,7 +15,7 @@ module Spandx return unless exist? open_file do |io| - yield parse_row(io) + loop { yield parse_row(io) } end end @@ -23,9 +23,6 @@ module Spandx open_file do |io| search_for("#{name}-#{version}", io, index) end - rescue Errno::ENOENT => error - Spandx.logger.error(error) - nil end def insert(name, version, licenses) @@ -68,6 +65,10 @@ module Spandx absolute_path.open(mode) { |io| yield io } rescue EOFError => error Spandx.logger.error(error) + nil + rescue Errno::ENOENT => error + Spandx.logger.error(error) + nil end def search_for(term, io, lines) @@ -87,7 +88,7 @@ module Spandx end def parse_row(io) - CSV.parse(io.readline)[0] + FastestCSV.parse_line(io.readline) end def partition(comparison, mid, lines) diff --git a/spandx.gemspec b/spandx.gemspec index 8b344ea..68dc013 100644 --- a/spandx.gemspec +++ b/spandx.gemspec @@ -32,12 +32,14 @@ Gem::Specification.new do |spec| spec.add_dependency 'addressable', '~> 2.7' spec.add_dependency 'bundler', '>= 1.16', '< 3.0.0' + spec.add_dependency 'fastest-csv', '~> 0.0' spec.add_dependency 'net-hippie', '~> 0.3' spec.add_dependency 'nokogiri', '~> 1.10' spec.add_dependency 'parslet', '~> 2.0' spec.add_dependency 'thor' spec.add_dependency 'zeitwerk', '~> 2.3' + spec.add_development_dependency 'benchmark-ips', '~> 2.8' spec.add_development_dependency 'bundler-audit', '~> 0.6' spec.add_development_dependency 'byebug', '~> 11.1' spec.add_development_dependency 'licensed', '~> 2.8' diff --git a/spec/integration/core/cache_spec.rb b/spec/integration/core/cache_spec.rb index 06cdfa3..3f947b2 100644 --- a/spec/integration/core/cache_spec.rb +++ b/spec/integration/core/cache_spec.rb @@ -132,20 +132,20 @@ RSpec.describe Spandx::Core::Cache do end describe '#each' do - subject { described_class.new('rubygems', root: root_dir) } + context 'when a single item is present in the cache' do + subject { described_class.new('rubygems', root: root_dir) } - let(:root_dir) { Dir.mktmpdir } + let(:root_dir) { Dir.mktmpdir } - after do - FileUtils.remove_entry(root_dir) - end - - context 'when a single item is present in the cache' do before do subject.insert('spandx', '0.0.0', ['MIT']) end - it 'yields each item in the index' do + after do + FileUtils.remove_entry(root_dir) + end + + it 'yields each item in the cache' do collect = [] subject.each do |item| @@ -155,5 +155,19 @@ RSpec.describe Spandx::Core::Cache do expect(collect).to match_array([['spandx', '0.0.0', 'MIT']]) end end + + context 'when multiple items are in multiple datafiles' do + subject { described_class.new('rubygems', root: root_dir) } + + let(:root_dir) { "#{Spandx.git[:rubygems].root}/.index" } + + it 'yields each item in the cache' do + expect(subject.count).to be > 800_000 + end + + it 'yields each item quickly' do + expect { subject.take(100_000).count }.to perform_under(0.1).sample(10) + end + end end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 82f8c4c..63af672 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -3,6 +3,7 @@ require 'bundler/setup' require 'spandx' +require 'benchmark/ips' require 'parslet/convenience' require 'parslet/rig/rspec' require 'rspec-benchmark' diff --git a/spec/unit/ruby/parsers/gemfile_lock_spec.rb b/spec/unit/ruby/parsers/gemfile_lock_spec.rb index caa8341..cf75463 100644 --- a/spec/unit/ruby/parsers/gemfile_lock_spec.rb +++ b/spec/unit/ruby/parsers/gemfile_lock_spec.rb @@ -24,7 +24,7 @@ RSpec.describe Spandx::Ruby::Parsers::GemfileLock do specify { expect(spandx.name).to eql('spandx') } specify { expect(spandx.version).to eql(Spandx::VERSION) } - specify { expect(spandx.meta[:dependencies].map(&:name)).to match_array(%w[addressable bundler net-hippie nokogiri parslet thor zeitwerk]) } + specify { expect(spandx.meta[:dependencies].map(&:name)).to match_array(%w[addressable bundler fastest-csv net-hippie nokogiri parslet thor zeitwerk]) } specify { expect(spandx.meta[:platform]).to eql('ruby') } specify { expect(spandx.meta[:source]).to be_a_kind_of(Bundler::Source) } end |
