# frozen_string_literal: true module License module Management class Repository include Loggable include Verifiable KNOWN_SOURCES = [ 'licenses.nuget.org', 'opensource.org', 'www.opensource.org' ].freeze def initialize( compatibility_path: License::Management.root.join('normalized-licenses.yml'), spdx_path: License::Management.root.join('spdx-licenses.json') ) @compatibility_data = YAML.safe_load(IO.read(compatibility_path)) @spdx_data = load_spdx_data_from(spdx_path) end def item_for(license) spdx_data_for(id_for(license)) || spdx_data_for(license.short_name) || generate_item_for(license) end private attr_reader :spdx_data, :compatibility_data def spdx_data_for(id) return if blank?(id) data = spdx_data[id] return unless data { 'id' => data['licenseId'], 'name' => data['name'], 'url' => data['seeAlso'][-1] } end def id_for(license) ids = compatibility_data['ids'] ids[license.short_name] || ids[license.url] || known_sources(license.short_name) || known_sources(license.url) end # When `license_finder` is unable to determine the license it will use the full # content of the file as the name of the license. This method shrinks that name # down to just take the first line of the file. def take_first_line_from(content) return '' if blank?(content) content.split(/[\r\n]+/)[0] end def generate_item_for(license) name = take_first_line_from(license.name) { 'id' => name.downcase, 'name' => name, 'url' => normalized_url(license.url) } end def load_spdx_data_from(path) content = IO.read(path) json = JSON.parse(content) licenses = json['licenses'] licenses.each_with_object({}) do |license, memo| memo[license['licenseId']] = license end end def known_sources(url) return if blank?(url) return unless /\A#{::URI::DEFAULT_PARSER.make_regexp(%w[http https])}\z/.match?(url) uri = URI.parse(url) return unless KNOWN_SOURCES.include?(uri.host.downcase) uri.path.split('/')[-1] rescue StandardError => e logger.error(e) nil end def normalized_url(url) return '' unless present?(url) return 'http://en.wikipedia.org/wiki/BSD_licenses' if url.match?('BSD_licenses#4-clause_license') url end end end end