summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Gemfile.lock4
-rwxr-xr-xbin/csv-benchmark28
-rw-r--r--lib/spandx.rb1
-rw-r--r--lib/spandx/core/datafile.rb11
-rw-r--r--spandx.gemspec2
-rw-r--r--spec/integration/core/cache_spec.rb30
-rw-r--r--spec/spec_helper.rb1
-rw-r--r--spec/unit/ruby/parsers/gemfile_lock_spec.rb2
8 files changed, 65 insertions, 14 deletions
diff --git a/Gemfile.lock b/Gemfile.lock
index 4f83d52..f382551 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -4,6 +4,7 @@ PATH
spandx (0.12.3)
addressable (~> 2.7)
bundler (>= 1.16, < 3.0.0)
+ fastest-csv (~> 0.0)
net-hippie (~> 0.3)
nokogiri (~> 1.10)
parslet (~> 2.0)
@@ -16,6 +17,7 @@ GEM
addressable (2.7.0)
public_suffix (>= 2.0.2, < 5.0)
ast (2.4.0)
+ benchmark-ips (2.8.2)
benchmark-malloc (0.1.0)
benchmark-perf (0.5.0)
benchmark-trend (0.3.0)
@@ -29,6 +31,7 @@ GEM
dotenv (2.7.5)
faraday (1.0.0)
multipart-post (>= 1.2, < 3)
+ fastest-csv (0.0.4)
hashdiff (1.0.1)
jaro_winkler (1.5.4)
licensed (2.8.0)
@@ -114,6 +117,7 @@ PLATFORMS
ruby
DEPENDENCIES
+ benchmark-ips (~> 2.8)
bundler-audit (~> 0.6)
byebug (~> 11.1)
licensed (~> 2.8)
diff --git a/bin/csv-benchmark b/bin/csv-benchmark
new file mode 100755
index 0000000..f7573f2
--- /dev/null
+++ b/bin/csv-benchmark
@@ -0,0 +1,28 @@
+#!/usr/bin/env ruby
+
+require 'bundler/inline'
+
+gemfile do
+ source 'https://rubygems.org'
+
+ gem 'benchmark-ips', '~> 2.8'
+ gem 'fastcsv', '~> 0.0'
+ gem 'fastest-csv'
+end
+
+require 'benchmark/ips'
+require 'csv'
+require 'fastcsv'
+require 'fastest-csv'
+
+csv = "\"spandx\",\"0.0.0\",\"MIT\""
+
+Benchmark.ips do |x|
+ x.report("CSV.parse") { CSV.parse(csv)[0] }
+ x.report("csv.split") { csv.split(',', 3) }
+ x.report("csv.split-with-slice") { csv.chomp.split(',', 3).slice(1...-1) }
+ x.report("csv.regex") { csv.scan(/"(\S+)","*(\d+.\d+.\d+)","(\S+)"/)[0] }
+ x.report("csv.fastcsv") { FastCSV.raw_parse(csv) { |x| x } }
+ x.report("csv.fastestcsv") { FastestCSV.parse_line(csv) }
+ x.compare!
+end
diff --git a/lib/spandx.rb b/lib/spandx.rb
index 6930794..9031c49 100644
--- a/lib/spandx.rb
+++ b/lib/spandx.rb
@@ -3,6 +3,7 @@
require 'addressable/uri'
require 'bundler'
require 'csv'
+require 'fastest-csv'
require 'forwardable'
require 'json'
require 'logger'
diff --git a/lib/spandx/core/datafile.rb b/lib/spandx/core/datafile.rb
index f0a0a34..1f08f92 100644
--- a/lib/spandx/core/datafile.rb
+++ b/lib/spandx/core/datafile.rb
@@ -15,7 +15,7 @@ module Spandx
return unless exist?
open_file do |io|
- yield parse_row(io)
+ loop { yield parse_row(io) }
end
end
@@ -23,9 +23,6 @@ module Spandx
open_file do |io|
search_for("#{name}-#{version}", io, index)
end
- rescue Errno::ENOENT => error
- Spandx.logger.error(error)
- nil
end
def insert(name, version, licenses)
@@ -68,6 +65,10 @@ module Spandx
absolute_path.open(mode) { |io| yield io }
rescue EOFError => error
Spandx.logger.error(error)
+ nil
+ rescue Errno::ENOENT => error
+ Spandx.logger.error(error)
+ nil
end
def search_for(term, io, lines)
@@ -87,7 +88,7 @@ module Spandx
end
def parse_row(io)
- CSV.parse(io.readline)[0]
+ FastestCSV.parse_line(io.readline)
end
def partition(comparison, mid, lines)
diff --git a/spandx.gemspec b/spandx.gemspec
index 8b344ea..68dc013 100644
--- a/spandx.gemspec
+++ b/spandx.gemspec
@@ -32,12 +32,14 @@ Gem::Specification.new do |spec|
spec.add_dependency 'addressable', '~> 2.7'
spec.add_dependency 'bundler', '>= 1.16', '< 3.0.0'
+ spec.add_dependency 'fastest-csv', '~> 0.0'
spec.add_dependency 'net-hippie', '~> 0.3'
spec.add_dependency 'nokogiri', '~> 1.10'
spec.add_dependency 'parslet', '~> 2.0'
spec.add_dependency 'thor'
spec.add_dependency 'zeitwerk', '~> 2.3'
+ spec.add_development_dependency 'benchmark-ips', '~> 2.8'
spec.add_development_dependency 'bundler-audit', '~> 0.6'
spec.add_development_dependency 'byebug', '~> 11.1'
spec.add_development_dependency 'licensed', '~> 2.8'
diff --git a/spec/integration/core/cache_spec.rb b/spec/integration/core/cache_spec.rb
index 06cdfa3..3f947b2 100644
--- a/spec/integration/core/cache_spec.rb
+++ b/spec/integration/core/cache_spec.rb
@@ -132,20 +132,20 @@ RSpec.describe Spandx::Core::Cache do
end
describe '#each' do
- subject { described_class.new('rubygems', root: root_dir) }
+ context 'when a single item is present in the cache' do
+ subject { described_class.new('rubygems', root: root_dir) }
- let(:root_dir) { Dir.mktmpdir }
+ let(:root_dir) { Dir.mktmpdir }
- after do
- FileUtils.remove_entry(root_dir)
- end
-
- context 'when a single item is present in the cache' do
before do
subject.insert('spandx', '0.0.0', ['MIT'])
end
- it 'yields each item in the index' do
+ after do
+ FileUtils.remove_entry(root_dir)
+ end
+
+ it 'yields each item in the cache' do
collect = []
subject.each do |item|
@@ -155,5 +155,19 @@ RSpec.describe Spandx::Core::Cache do
expect(collect).to match_array([['spandx', '0.0.0', 'MIT']])
end
end
+
+ context 'when multiple items are in multiple datafiles' do
+ subject { described_class.new('rubygems', root: root_dir) }
+
+ let(:root_dir) { "#{Spandx.git[:rubygems].root}/.index" }
+
+ it 'yields each item in the cache' do
+ expect(subject.count).to be > 800_000
+ end
+
+ it 'yields each item quickly' do
+ expect { subject.take(100_000).count }.to perform_under(0.1).sample(10)
+ end
+ end
end
end
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 82f8c4c..63af672 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -3,6 +3,7 @@
require 'bundler/setup'
require 'spandx'
+require 'benchmark/ips'
require 'parslet/convenience'
require 'parslet/rig/rspec'
require 'rspec-benchmark'
diff --git a/spec/unit/ruby/parsers/gemfile_lock_spec.rb b/spec/unit/ruby/parsers/gemfile_lock_spec.rb
index caa8341..cf75463 100644
--- a/spec/unit/ruby/parsers/gemfile_lock_spec.rb
+++ b/spec/unit/ruby/parsers/gemfile_lock_spec.rb
@@ -24,7 +24,7 @@ RSpec.describe Spandx::Ruby::Parsers::GemfileLock do
specify { expect(spandx.name).to eql('spandx') }
specify { expect(spandx.version).to eql(Spandx::VERSION) }
- specify { expect(spandx.meta[:dependencies].map(&:name)).to match_array(%w[addressable bundler net-hippie nokogiri parslet thor zeitwerk]) }
+ specify { expect(spandx.meta[:dependencies].map(&:name)).to match_array(%w[addressable bundler fastest-csv net-hippie nokogiri parslet thor zeitwerk]) }
specify { expect(spandx.meta[:platform]).to eql('ruby') }
specify { expect(spandx.meta[:source]).to be_a_kind_of(Bundler::Source) }
end