summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormo khan <mo.khan@gmail.com>2020-05-03 09:31:08 -0600
committermo khan <mo.khan@gmail.com>2020-05-03 09:31:08 -0600
commit71ad69ec80e7c327f219f6cd2d4e8093c241f409 (patch)
treed4d20f24828499f8794cfd62a58a40da597ebd13
parentaad1f8c405cea3f54734246ecfa9e07ad6e42a39 (diff)
Remove fuzzy match
-rw-r--r--.github/licensed/bundler/fuzzy_match.dep.yml32
-rw-r--r--Gemfile.lock5
-rw-r--r--lib/spandx.rb1
-rw-r--r--lib/spandx/core/content.rb4
-rw-r--r--lib/spandx/core/guess.rb9
-rw-r--r--spandx.gemspec1
-rw-r--r--spec/unit/core/guess_spec.rb4
-rw-r--r--spec/unit/ruby/parsers/gemfile_lock_spec.rb2
8 files changed, 14 insertions, 44 deletions
diff --git a/.github/licensed/bundler/fuzzy_match.dep.yml b/.github/licensed/bundler/fuzzy_match.dep.yml
deleted file mode 100644
index 7351191..0000000
--- a/.github/licensed/bundler/fuzzy_match.dep.yml
+++ /dev/null
@@ -1,32 +0,0 @@
----
-name: fuzzy_match
-version: 2.1.0
-type: bundler
-summary: Find a needle in a haystack using string similarity and (optionally) regexp
- rules. Replaces loose_tight_dictionary.
-homepage: https://github.com/seamusabshere/fuzzy_match
-license: mit
-licenses:
-- sources: LICENSE
- text: |
- Copyright 2011 Brighter Planet, Inc.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-notices: []
diff --git a/Gemfile.lock b/Gemfile.lock
index c392ca2..781d384 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -4,7 +4,6 @@ PATH
spandx (0.12.3)
addressable (~> 2.7)
bundler (>= 1.16, < 3.0.0)
- fuzzy_match (~> 2.1)
net-hippie (~> 0.3)
nokogiri (~> 1.10)
thor
@@ -29,7 +28,6 @@ GEM
dotenv (2.7.5)
faraday (1.0.0)
multipart-post (>= 1.2, < 3)
- fuzzy_match (2.1.0)
hashdiff (1.0.1)
jaro_winkler (1.5.4)
licensed (2.8.0)
@@ -101,7 +99,6 @@ GEM
sawyer (0.8.2)
addressable (>= 2.3.5)
faraday (> 0.8, < 2.0)
- text (1.3.1)
thor (0.20.3)
tomlrb (1.2.9)
unicode-display_width (1.7.0)
@@ -118,7 +115,6 @@ PLATFORMS
DEPENDENCIES
bundler-audit (~> 0.6)
byebug (~> 11.1)
- jaro_winkler (~> 1.5)
licensed (~> 2.8)
parallel_tests (~> 2.32)
rake (~> 13.0)
@@ -127,7 +123,6 @@ DEPENDENCIES
rubocop (~> 0.52)
rubocop-rspec (~> 1.22)
spandx!
- text (~> 1.3)
vcr (~> 5.0)
webmock (~> 3.7)
diff --git a/lib/spandx.rb b/lib/spandx.rb
index adafb68..dbd09e6 100644
--- a/lib/spandx.rb
+++ b/lib/spandx.rb
@@ -4,7 +4,6 @@ require 'addressable/uri'
require 'bundler'
require 'csv'
require 'forwardable'
-require 'fuzzy_match'
require 'json'
require 'logger'
require 'net/hippie'
diff --git a/lib/spandx/core/content.rb b/lib/spandx/core/content.rb
index 4cd73c0..aec45b5 100644
--- a/lib/spandx/core/content.rb
+++ b/lib/spandx/core/content.rb
@@ -13,8 +13,8 @@ module Spandx
@tokens ||= tokenize(canonicalize(raw)).to_set
end
- def similar?(other)
- similarity_score(other) > 89.0
+ def similar?(other, threshold: 89.0)
+ similarity_score(other) > threshold
end
def similarity_score(other)
diff --git a/lib/spandx/core/guess.rb b/lib/spandx/core/guess.rb
index f59ab7a..8b8e9c1 100644
--- a/lib/spandx/core/guess.rb
+++ b/lib/spandx/core/guess.rb
@@ -7,7 +7,6 @@ module Spandx
def initialize(catalogue)
@catalogue = catalogue
- @name_search = FuzzyMatch.new(catalogue, read: :name)
end
def license_for(raw)
@@ -43,7 +42,13 @@ module Spandx
def match_name(content)
return if content.tokens.size < 2 || content.tokens.size > 10
- @name_search.find(content.raw)
+ threshold = 85.0
+ catalogue.find do |license|
+ next if license.deprecated_license_id?
+
+ other_name = ::Spandx::Core::Content.new(license.name)
+ content.similar?(other_name, threshold: threshold)
+ end
end
def match_body(content)
diff --git a/spandx.gemspec b/spandx.gemspec
index 9ed5c50..0436415 100644
--- a/spandx.gemspec
+++ b/spandx.gemspec
@@ -32,7 +32,6 @@ Gem::Specification.new do |spec|
spec.add_dependency 'addressable', '~> 2.7'
spec.add_dependency 'bundler', '>= 1.16', '< 3.0.0'
- spec.add_dependency 'fuzzy_match', '~> 2.1'
spec.add_dependency 'net-hippie', '~> 0.3'
spec.add_dependency 'nokogiri', '~> 1.10'
spec.add_dependency 'thor'
diff --git a/spec/unit/core/guess_spec.rb b/spec/unit/core/guess_spec.rb
index 4d5364d..96dc99b 100644
--- a/spec/unit/core/guess_spec.rb
+++ b/spec/unit/core/guess_spec.rb
@@ -48,5 +48,9 @@ RSpec.describe Spandx::Core::Guess do
specify { expect(subject.license_for(content)&.id).to eql('MIT') }
end
+
+ specify do
+ expect(subject.license_for('Common Public License Version 1.0')&.id).to eql('CPL-1.0')
+ end
end
end
diff --git a/spec/unit/ruby/parsers/gemfile_lock_spec.rb b/spec/unit/ruby/parsers/gemfile_lock_spec.rb
index 0ba564d..50b6da1 100644
--- a/spec/unit/ruby/parsers/gemfile_lock_spec.rb
+++ b/spec/unit/ruby/parsers/gemfile_lock_spec.rb
@@ -24,7 +24,7 @@ RSpec.describe Spandx::Ruby::Parsers::GemfileLock do
specify { expect(spandx.name).to eql('spandx') }
specify { expect(spandx.version).to eql(Spandx::VERSION) }
- specify { expect(spandx.meta[:dependencies].map(&:name)).to match_array(%w[addressable bundler fuzzy_match net-hippie nokogiri thor zeitwerk]) }
+ specify { expect(spandx.meta[:dependencies].map(&:name)).to match_array(%w[addressable bundler net-hippie nokogiri thor zeitwerk]) }
specify { expect(spandx.meta[:platform]).to eql('ruby') }
specify { expect(spandx.meta[:source]).to be_a_kind_of(Bundler::Source) }
end