summaryrefslogtreecommitdiff
path: root/lib/license/management/repository.rb
blob: 9490af2599a5e027997aa500d43e0839bbdbcde6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# frozen_string_literal: true

module License
  module Management
    class Repository
      include Loggable
      include Verifiable
      KNOWN_SOURCES = [
        'licenses.nuget.org',
        'opensource.org',
        'www.opensource.org',
      ].freeze

      def initialize(
        compatibility_path: License::Management.root.join('normalized-licenses.yml'),
        spdx_path: License::Management.root.join('spdx-licenses.json')
      )
        @compatibility_data = YAML.safe_load(IO.read(compatibility_path))
        @spdx_data = load_spdx_data_from(spdx_path)
      end

      def item_for(license)
        spdx_data_for(id_for(license)) ||
          spdx_data_for(license.send(:short_name)) ||
          generate_item_for(license)
      end

      private

      attr_reader :spdx_data, :compatibility_data

      def spdx_data_for(id)
        return if blank?(id)

        data = spdx_data[id]
        if data
          {
            'id' => data['licenseId'],
            'name' => data['name'],
            'url' => data['seeAlso'][-1]
          }
        end
      end

      def id_for(license)
        ids = compatibility_data['ids']
        ids[license.send(:short_name)] ||
          ids[license.url] ||
          known_sources(license.send(:short_name)) ||
          known_sources(license.url)
      end

      # When `license_finder` is unable to determine the license it will use the full
      # content of the file as the name of the license. This method shrinks that name
      # down to just take the first line of the file.
      def take_first_line_from(content)
        return '' if blank?(content)

        content.split(/[\r\n]+/)[0]
      end

      def generate_item_for(license)
        log_info("Detected unknown license `#{license.send(:short_name)}`. Contribute to https://gitlab.com/gitlab-org/security-products/license-management#contributing.")
        name = take_first_line_from(license.name)
        {
          'id' => name.downcase,
          'name' => name,
          'url' => present?(license.url) ? license.url : ''
        }
      end

      def load_spdx_data_from(path)
        content = IO.read(path)
        json = JSON.parse(content)
        licenses = json['licenses']

        licenses.inject({}) do |memo, license|
          memo[license['licenseId']] = license
          memo
        end
      end

      def known_sources(url)
        return if blank?(url)
        return unless url =~ /\A#{::URI::DEFAULT_PARSER.make_regexp(['http', 'https'])}\z/

        uri = URI.parse(url)
        return unless KNOWN_SOURCES.include?(uri.host.downcase)
        uri.path.split('/')[-1]
      rescue => error
        log_info(error)
        nil
      end
    end
  end
end