summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Thursfield <sam.thursfield@codethink.co.uk>2014-12-03 17:29:53 +0000
committerSam Thursfield <sam.thursfield@codethink.co.uk>2014-12-03 17:59:12 +0000
commitd9f45a9e485ab7fefb86fc2a6658ff80b61c1533 (patch)
tree42028ea78189cbc0f40778714f881a7946b95f1f
parent2759a248785152916d9f89ec94face5e5db9a83c (diff)
downloadimport-d9f45a9e485ab7fefb86fc2a6658ff80b61c1533.tar.gz
rubygems: Improve heuristic for when homepage_uri points to Github
This was motivated by <https://github.com/mislav/will_paginate>, which links to <https://github.com/mislav/will_paginate/wiki> as its homepage.
-rwxr-xr-xbaserockimport/exts/rubygems.to_lorry38
1 files changed, 35 insertions, 3 deletions
diff --git a/baserockimport/exts/rubygems.to_lorry b/baserockimport/exts/rubygems.to_lorry
index 6807b21..d5f1efa 100755
--- a/baserockimport/exts/rubygems.to_lorry
+++ b/baserockimport/exts/rubygems.to_lorry
@@ -107,9 +107,9 @@ class RubyGemLorryGenerator(ImportExtension):
homepage_uri = gem_info['homepage_uri']
if homepage_uri is not None and len(homepage_uri) > 0:
logging.debug('Got homepage_uri %s', source_code_uri)
- netloc = urlparse.urlsplit(homepage_uri)[1]
- if netloc == 'github.com':
- return homepage_uri
+ uri = self.detect_source_code_uri_from_homepage(homepage_uri)
+ if uri is not None:
+ return uri
# Further possible leads on locating source code.
# http://ruby-toolbox.com/projects/$gemname -> sometimes contains an
@@ -121,6 +121,38 @@ class RubyGemLorryGenerator(ImportExtension):
"Gem metadata for '%s' does not point to its source code "
"repository." % gem_name)
+ def detect_source_code_uri_from_homepage(self, homepage_uri):
+ '''Try to detect source code location based on homepage_uri.
+
+ It seems common for RubyGem projects to be hosted on Github, and for
+ them to use link to a URL inside their Github project as their
+ homepage, and for them to not set source_code_uri. This heuristic saves
+ the user from manually writing .lorry files for such projects.
+
+ '''
+
+ uri_parts = urlparse.urlsplit(homepage_uri)
+ scheme, netloc = uri_parts[0:2]
+
+ if netloc == 'github.com':
+ path = uri_parts[2]
+ path_parts = path.lstrip('/').split('/')
+
+ if len(path_parts) < 2:
+ logging.debug(
+ '%s points to Github but not a specific repo.',
+ homepage_uri)
+ return None
+
+ # Strip off any trailing components, stuff like '/wiki'.
+ path = '/'.join(path_parts[0:2])
+ uri = '%s://%s/%s' % (scheme, netloc, path)
+
+ logging.debug('Assuming %s is the source code URI.', uri)
+ return uri
+ else:
+ return None
+
def project_name_from_repo(self, repo_url):
if repo_url.endswith('/tree/master'):
repo_url = repo_url[:-len('/tree/master')]