diff options
| -rw-r--r-- | .github/licensed/bundler/fastest-csv.dep.yml | 33 | ||||
| -rw-r--r-- | Gemfile.lock | 2 | ||||
| -rw-r--r-- | Rakefile | 8 | ||||
| -rw-r--r-- | ext/spandx/extconf.rb | 2 | ||||
| -rw-r--r-- | ext/spandx/spandx.c | 45 | ||||
| -rw-r--r-- | lib/spandx.rb | 2 | ||||
| -rw-r--r-- | lib/spandx/core/data_file.rb | 8 | ||||
| -rw-r--r-- | lib/spandx/core/line_io.rb | 2 | ||||
| -rw-r--r-- | spandx.gemspec | 2 | ||||
| -rw-r--r-- | spec/integration/core/cache_spec.rb | 11 | ||||
| -rw-r--r-- | spec/unit/core/csv_parser_spec.rb | 19 | ||||
| -rw-r--r-- | spec/unit/ruby/parsers/gemfile_lock_spec.rb | 2 |
12 files changed, 88 insertions, 48 deletions
diff --git a/.github/licensed/bundler/fastest-csv.dep.yml b/.github/licensed/bundler/fastest-csv.dep.yml deleted file mode 100644 index 9f19b1a..0000000 --- a/.github/licensed/bundler/fastest-csv.dep.yml +++ /dev/null @@ -1,33 +0,0 @@ ---- -name: fastest-csv -version: 0.0.4 -type: bundler -summary: Fastest standard CSV parser for MRI Ruby and JRuby -homepage: https://github.com/brightcode/fastest-csv -license: mit -licenses: -- sources: LICENSE - text: |- - Copyright (c) 2012 Maarten Oelering - - MIT License - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -notices: [] diff --git a/Gemfile.lock b/Gemfile.lock index 8881bbf..64d5e43 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -4,7 +4,6 @@ PATH spandx (0.12.3) addressable (~> 2.7) bundler (>= 1.16, < 3.0.0) - fastest-csv (~> 0.0) net-hippie (~> 0.3) nokogiri (~> 1.10) parslet (~> 2.0) @@ -122,6 +121,7 @@ DEPENDENCIES benchmark-ips (~> 2.8) bundler-audit (~> 0.6) byebug (~> 11.1) + fastest-csv (~> 0.0) licensed (~> 2.8) rake (~> 13.0) rake-compiler (~> 1.1) @@ -10,10 +10,10 @@ RSpec::Core::RakeTask.new(:spec) RuboCop::RakeTask.new(:rubocop) Bundler::Audit::Task.new -task :build => :compile +task build: :compile -Rake::ExtensionTask.new("spandx") do |ext| - ext.lib_dir = "lib/spandx" +Rake::ExtensionTask.new('spandx') do |ext| + ext.lib_dir = 'lib/spandx' end task :licensed do @@ -22,4 +22,4 @@ task :licensed do end task lint: [:rubocop, 'bundle:audit', :licensed] -task default: [:clobber, :compile, :spec] +task default: %i[clobber compile spec] diff --git a/ext/spandx/extconf.rb b/ext/spandx/extconf.rb index 20ce23f..1a58f4f 100644 --- a/ext/spandx/extconf.rb +++ b/ext/spandx/extconf.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'mkmf' create_makefile('spandx/spandx') diff --git a/ext/spandx/spandx.c b/ext/spandx/spandx.c index 15eb61d..9004bab 100644 --- a/ext/spandx/spandx.c +++ b/ext/spandx/spandx.c @@ -1,8 +1,53 @@ #include "spandx.h" VALUE rb_mSpandx; +VALUE rb_mCore; +VALUE rb_mCsvParser; + +static VALUE parse(VALUE self, VALUE line) +{ + VALUE items = rb_ary_new2(3); + const char *line_ptr = RSTRING_PTR(line); + int length = (int) RSTRING_LEN(line); + char current_value[length]; + int current_value_length = 0; + char current_charactor; + int state = 1; + int items_in_array = 0; + + for (int i = 1; i < length; i++) { + current_charactor = line_ptr[i]; + + switch(current_charactor) { + case '"': + if (state == 0) { + state = 1; + } else { + rb_ary_push(items, rb_str_new(current_value, current_value_length)); + items_in_array++; + if (items_in_array == 3) return items; + + current_value_length = 0; + state = 0; + } + break; + case ',': + if (state == 1) + current_value[current_value_length++] = current_charactor; + break; + default: + current_value[current_value_length++] = current_charactor; + break; + } + } + + return items; +} void Init_spandx(void) { rb_mSpandx = rb_define_module("Spandx"); + rb_mCore = rb_define_module_under(rb_mSpandx, "Core"); + rb_mCsvParser = rb_define_module_under(rb_mCore, "CsvParser"); + rb_define_module_function(rb_mCsvParser, "parse", parse, 1); } diff --git a/lib/spandx.rb b/lib/spandx.rb index 91a7495..ff51288 100644 --- a/lib/spandx.rb +++ b/lib/spandx.rb @@ -3,7 +3,6 @@ require 'addressable/uri' require 'bundler' require 'csv' -require 'fastest-csv' require 'forwardable' require 'json' require 'logger' @@ -52,5 +51,4 @@ module Spandx end end -loader.inflector.inflect('line_io' => 'LineIO') loader.eager_load diff --git a/lib/spandx/core/data_file.rb b/lib/spandx/core/data_file.rb index 9ca8e95..eda2e86 100644 --- a/lib/spandx/core/data_file.rb +++ b/lib/spandx/core/data_file.rb @@ -15,7 +15,7 @@ module Spandx open_file do |io| while (line = io.gets) - yield ::CsvParser.parse_line(line) + yield parse_row(line) end end end @@ -59,7 +59,7 @@ module Spandx mid = lines.size == 1 ? 0 : lines.size / 2 io.seek(lines[mid]) - comparison = matches?(term, parse_row(io)) do |row| + comparison = matches?(term, parse_row(io.readline)) do |row| return row end @@ -70,8 +70,8 @@ module Spandx (term <=> "#{row[0]}-#{row[1]}").tap { |x| yield row if x.zero? } end - def parse_row(io) - FastestCSV.parse_line(io.readline) + def parse_row(line) + CsvParser.parse(line) end def partition(comparison, mid, lines) diff --git a/lib/spandx/core/line_io.rb b/lib/spandx/core/line_io.rb index a609145..e8c42e5 100644 --- a/lib/spandx/core/line_io.rb +++ b/lib/spandx/core/line_io.rb @@ -2,7 +2,7 @@ module Spandx module Core - class LineIO + class LineIo def initialize(absolute_path) file_descriptor = IO.sysopen(absolute_path) @io = IO.new(file_descriptor) diff --git a/spandx.gemspec b/spandx.gemspec index 606cf20..50d21cd 100644 --- a/spandx.gemspec +++ b/spandx.gemspec @@ -33,7 +33,6 @@ Gem::Specification.new do |spec| spec.add_dependency 'addressable', '~> 2.7' spec.add_dependency 'bundler', '>= 1.16', '< 3.0.0' - spec.add_dependency 'fastest-csv', '~> 0.0' spec.add_dependency 'net-hippie', '~> 0.3' spec.add_dependency 'nokogiri', '~> 1.10' spec.add_dependency 'parslet', '~> 2.0' @@ -43,6 +42,7 @@ Gem::Specification.new do |spec| spec.add_development_dependency 'benchmark-ips', '~> 2.8' spec.add_development_dependency 'bundler-audit', '~> 0.6' spec.add_development_dependency 'byebug', '~> 11.1' + spec.add_development_dependency 'fastest-csv', '~> 0.0' spec.add_development_dependency 'licensed', '~> 2.8' spec.add_development_dependency 'rake', '~> 13.0' spec.add_development_dependency 'rake-compiler', '~> 1.1' diff --git a/spec/integration/core/cache_spec.rb b/spec/integration/core/cache_spec.rb index a982fde..f57c766 100644 --- a/spec/integration/core/cache_spec.rb +++ b/spec/integration/core/cache_spec.rb @@ -163,7 +163,9 @@ RSpec.describe Spandx::Core::Cache do end xit 'profiles each option' do - datafile = Spandx::Core::DataFile.new('~/.local/share/spandx/rubygems-cache/.index/02/rubygems') + require 'fastest-csv' + + datafile = Spandx::Core::DataFile.new("#{Dir.home}/.local/share/spandx/rubygems-cache/.index/d9/rubygems") Benchmark.ips do |x| x.report('fastest-csv') { FastestCSV.foreach(datafile.absolute_path) { |y| } } x.report('manual-gets') do @@ -173,6 +175,13 @@ RSpec.describe Spandx::Core::Cache do end end end + x.report('manual-gets-c-ext') do + datafile.open_file(mode: 'rb') do |io| + while (x = io.gets) + ::Spandx::Core::CsvParser.parse(x) + end + end + end x.report('manual-eof') { datafile.open_file { |io| FastestCSV.parse_line(io.readline) until io.eof? } } x.report('manual-exception') { datafile.open_file { |io| loop { FastestCSV.parse_line(io.readline) } } } diff --git a/spec/unit/core/csv_parser_spec.rb b/spec/unit/core/csv_parser_spec.rb new file mode 100644 index 0000000..6453d9d --- /dev/null +++ b/spec/unit/core/csv_parser_spec.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +RSpec.describe Spandx::Core::CsvParser do + describe '.parse' do + let(:subject) { described_class.parse(line) } + + context 'when parsing a single line of csv' do + let(:line) { '"spandx","0.0.0","MIT"' + "\n" } + + specify { expect(subject).to eql(['spandx', '0.0.0', 'MIT']) } + end + + context 'when parsing a line of csv that contains a `,` in the value' do + let(:line) { '"spa,ndx","0.0.0","MIT"' + "\n" } + + specify { expect(subject).to eql(['spa,ndx', '0.0.0', 'MIT']) } + end + end +end diff --git a/spec/unit/ruby/parsers/gemfile_lock_spec.rb b/spec/unit/ruby/parsers/gemfile_lock_spec.rb index cf75463..caa8341 100644 --- a/spec/unit/ruby/parsers/gemfile_lock_spec.rb +++ b/spec/unit/ruby/parsers/gemfile_lock_spec.rb @@ -24,7 +24,7 @@ RSpec.describe Spandx::Ruby::Parsers::GemfileLock do specify { expect(spandx.name).to eql('spandx') } specify { expect(spandx.version).to eql(Spandx::VERSION) } - specify { expect(spandx.meta[:dependencies].map(&:name)).to match_array(%w[addressable bundler fastest-csv net-hippie nokogiri parslet thor zeitwerk]) } + specify { expect(spandx.meta[:dependencies].map(&:name)).to match_array(%w[addressable bundler net-hippie nokogiri parslet thor zeitwerk]) } specify { expect(spandx.meta[:platform]).to eql('ruby') } specify { expect(spandx.meta[:source]).to be_a_kind_of(Bundler::Source) } end |
