summaryrefslogtreecommitdiff
path: root/import/rubygems.to_lorry
blob: cd83e33b29815801c5481568709a059b8d2c0aac (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/python
#
# Create a Baserock .lorry file for a given RubyGem
#
# Copyright (C) 2014  Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.


import requests
import requests_cache
import yaml

import logging
import json
import os
import sys
import urlparse

from importer_base import ImportException, ImportExtension


class GenerateLorryException(ImportException):
    pass


class RubyGemsWebServiceClient(object):
    def __init__(self):
        # Save hammering the rubygems.org API: 'requests' API calls are
        # transparently cached in an SQLite database, instead.
        requests_cache.install_cache('rubygems_api_cache')

    def _request(self, url):
        r = requests.get(url)
        if r.ok:
            return json.loads(r.text)
        else:
            raise GenerateLorryException(
                'Request to %s failed: %s' % (r.url, r.reason))

    def get_gem_info(self, gem_name):
        info = self._request(
            'http://rubygems.org/api/v1/gems/%s.json' % gem_name)

        if info['name'] != gem_name:
            # Sanity check
            raise GenerateLorryException(
                 'Received info for Gem "%s", requested "%s"' % info['name'],
                  gem_name)

        return info


class RubyGemLorryGenerator(ImportExtension):
    def __init__(self):
        super(RubyGemLorryGenerator, self).__init__()

        with open('rubygems.yaml', 'r') as f:
            local_data = yaml.load(f.read())

        self.known_source_uris = local_data['known-source-uris']

        logging.debug(
            "Loaded %i known source URIs from local metadata.", len(self.known_source_uris))

    def process_args(self, args):
        if len(args) != 1:
            raise ImportException(
                'Please call me with the name of a RubyGem as an argument.\n')

        gem_name = args[0]

        lorry = self.generate_lorry_for_gem(gem_name)
        self.write_lorry(sys.stdout, lorry)

    def find_upstream_repo_for_gem(self, gem_name, gem_info):
        source_code_uri = gem_info['source_code_uri']

        if gem_name in self.known_source_uris:
            logging.debug('Found %s in known-source-uris', gem_name)
            known_uri = self.known_source_uris[gem_name]
            if source_code_uri is not None and known_uri != source_code_uri:
                sys.stderr.write(
                    '%s: Hardcoded source URI %s doesn\'t match spec URI %s\n' %
                    (gem_name, known_uri, source_code_uri))
            return known_uri

        if source_code_uri is not None and len(source_code_uri) > 0:
            logging.debug('Got source_code_uri %s', source_code_uri)
            if source_code_uri.endswith('/tree'):
                source_code_uri = source_code_uri[:-len('/tree')]

            return source_code_uri

        homepage_uri = gem_info['homepage_uri']
        if homepage_uri is not None and len(homepage_uri) > 0:
            logging.debug('Got homepage_uri %s', source_code_uri)
            netloc = urlparse.urlsplit(homepage_uri)[1]
            if netloc == 'github.com':
                return homepage_uri

        # Further possible leads on locating source code.
        # http://ruby-toolbox.com/projects/$gemname -> sometimes contains an
        #   upstream link, even if the gem info does not.
        # https://github.com/search?q=$gemname -> often the first result is
        #   the correct one, but you can never know.

        raise GenerateLorryException(
            "Did not manage to find the upstream source URL for Gem '%s'. "
            "Please manually create a .lorry file, or add the Gem to "
            "known-source-uris in rubygems.yaml." % gem_name)

    def project_name_from_repo(self, repo_url):
        if repo_url.endswith('/tree/master'):
            repo_url = repo_url[:-len('/tree/master')]
        if repo_url.endswith('/'):
            repo_url = repo_url[:-1]
        if repo_url.endswith('.git'):
            repo_url = repo_url[:-len('.git')]
        return os.path.basename(repo_url)

    def generate_lorry_for_gem(self, gem_name):
        rubygems_client = RubyGemsWebServiceClient()

        gem_info = rubygems_client.get_gem_info(gem_name)

        gem_source_url = self.find_upstream_repo_for_gem(gem_name, gem_info)
        logging.info('Got URL <%s> for %s', gem_source_url, gem_name)

        project_name = self.project_name_from_repo(gem_source_url)

        # One repo may produce multiple Gems. It's up to the caller to merge
        # multiple .lorry files that get generated for the same repo.

        lorry = {
            project_name: {
                'type': 'git',
                'url': gem_source_url,
                'x-products-rubygems': [gem_name]
            }
        }

        return lorry

    def write_lorry(self, stream, lorry):
        json.dump(lorry, stream, indent=4)
        # Needed so the morphlib.extensions code will pick up the last line.
        stream.write('\n')


if __name__ == '__main__':
    RubyGemLorryGenerator().run()