summaryrefslogtreecommitdiff
path: root/extract_license
diff options
context:
space:
mode:
authordwheeler <dwheeler@d762cc98-fd17-0410-9a0d-d09172385bc5>2006-07-07 13:36:27 +0000
committerdwheeler <dwheeler@d762cc98-fd17-0410-9a0d-d09172385bc5>2006-07-07 13:36:27 +0000
commit05095851346f52c8e918176e8e2abdf0b21de5ec (patch)
tree8de964f5eea4c7d80faf34d5d744e215a053ba8f /extract_license
downloadsloccount-05095851346f52c8e918176e8e2abdf0b21de5ec.tar.gz
Initial import (sloccount 2.26)HEADmaster
git-svn-id: svn://svn.code.sf.net/p/sloccount/code/trunk@1 d762cc98-fd17-0410-9a0d-d09172385bc5
Diffstat (limited to 'extract_license')
-rwxr-xr-xextract_license178
1 files changed, 178 insertions, 0 deletions
diff --git a/extract_license b/extract_license
new file mode 100755
index 0000000..bde556e
--- /dev/null
+++ b/extract_license
@@ -0,0 +1,178 @@
+#!/usr/bin/perl
+# extract_license
+# Determine the license of a program, given 2 parameters:
+# (1) the directory containing the program's source code.
+# (2) the RPM spec file (which may be /dev/null)
+
+# This "regularizes" license names. For example,
+# BSD-style, BSDish, and BSD-like all become "BSD-like".
+# License names "sentence capitalization", e.g., "Freely distributable".
+# It also fixes a lot of errors in Red Hat spec files.
+
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+#
+#
+
+$program_dir = shift;
+$rpm_spec = shift;
+
+$license = $copyright = "";
+
+%all_licenses = ();
+
+
+sub read_license_file() {
+ my $filename = shift(@_);
+ my $license = "";
+ if ((-s $filename) && open(LICENSE_FILE, "<$filename")) {
+ # TODO: detect even more licenses automatically.
+ # It'd hard to detect BSD/MIT licenses,
+ # because these licenses make changes in the MIDDLE of their text.
+ # Thus, it's hard to avoid falsely detecting "almost" licenses.
+ # For example, ipf has license text that looks like a BSD/MIT license,
+ # but it's not even open source.
+ # However, we CAN detect many other kinds, so let's at least do that.
+ for ($i=1; $i < 9; $i++) {
+ $line = <LICENSE_FILE>;
+ if ($line =~ m/GNU GENERAL PUBLIC LICENSE/i) {$license = "GPL";}
+ elsif ($line =~ m/GNU LIBRARY GENERAL PUBLIC LICENSE/i) {$license = "LGPL";}
+ elsif ($line =~ m/GNU LESSER GENERAL PUBLIC LICENSE/i) {$license = "LGPL";}
+ elsif ($line =~ m/Mozilla PUBLIC LICENSE/i) {$license = "MPL";}
+ elsif ($line =~ m/Netscape PUBLIC LICENSE/i) {$license = "NPL";}
+ elsif ($line =~ m/IBM PUBLIC LICENSE/i) {$license = "IBM Public License";}
+ elsif ($line =~ m/\bApache Software License\b/i) {$license = "Apache";}
+ elsif ($line =~ m/\bThe "Artistic License"/i) {$license = "Artistic";}
+ }
+ close(LICENSE_FILE);
+ }
+ return $license;
+}
+
+sub add_license() {
+ # Add to license list "all_licenses" the license in the given file, if one.
+ my $filename = shift(@_);
+ my $license = &read_license_file($filename);
+ if ($license) { $all_licenses{$license} = 1; }
+}
+
+open(RPM_SPEC, "<$rpm_spec");
+
+while (<RPM_SPEC>) {
+ if (/^Copyright:(.*)/i) {$copyright=$1;}
+ if (/^License:(.*)/i) {$license=$1;}
+}
+close(RPM_SPEC);
+
+if (! $license) {
+ $license = $copyright;
+}
+
+# print "GOT: $license\n";
+
+if ( $license ) {
+ $_ = $license;
+
+ # Remove extraneous material in the middile of the license text.
+ s/ \(see: [^)]*\)//; # Delete parenthetical see: references.
+ s/, ?no warranties//; # "No warranties" not important for our purposes.
+ s/See COPYRIGHT file//i;
+ s/\b,?URW holds copyright\b//i;
+
+ # Clean up front and back.
+ s/^\s*//;
+ s/[ \t\.]*$//; # Delete trailing periods and blanks.
+
+ $_ = ucfirst($_); # Uppercase first character. Remove this line if need to.
+
+ if (/^GPL2?$/i || /^GNU$/ || /^GNU ?GPL *(Version 2)?$/i) {$_ = "GPL"};
+ if (/^Apache ?Group License$/i) {$_ = "Apache"};
+ if (/^Apacheish$/i || /^Apache-style$/i) {$_ = "Apache-like"};
+ if (/^Artistic$/i) {$_ = "Artistic"};
+ if (/^BSD$/i) {$_ = "BSD"};
+ if (/^BSDish$/i || /^BSD-style$/i || /^BSD-like$/) {$_ = "BSD-like"};
+ if (/^Distributable$/i) {$_ = "Distributable"};
+ if (/^Distributable ?\(BSD-like\)$/i) {$_ = "BSD-like"};
+ if (/^Freely ?Distributable$/i) {$_ = "Freely distributable"};
+ if (/^Free,no warranties.?$/i) {$_ = "Free"};
+ if (/^freeware. See COPYRIGHT file.?$/i) {$_ = "Free"};
+ if (/^freeware.?$/i) {$_ = "Free"};
+ if (/^GPLand Artistic$/i) {$_ = "GPL and Artistic"};
+ if (/^GPL ?or BSD$/i) {$_ = "GPL or BSD"};
+ if (/^GPL\/XFree86$/i) {$_ = "GPL/MIT"};
+ if (/^distributable- most of it GPL$/i) {$_ = "Distributable - mostly GPL"};
+ if (/^IBM ?Public License Version 1.0 -/i) {$_ = "IBM Public License"};
+ if (/^IBM ?Public License$/i) {$_ = "IBM Public License"};
+ if (/^MIT, ?freely distributable/i) {$_ = "MIT"};
+ if (/^MIT\/X ?Consortium$/i) {$_ = "MIT"};
+ if (/^Non[- ]commercial[- ]use[- ]only$/i) {$_ = "Non-commercial use only"};
+ if (/^Proprietary$/i) {$_ = "Proprietary"};
+ if (/^Public ?domain$/i) {$_ = "Public domain"};
+ if (/^Universityof Washington's Free-Fork License$/i)
+ {$_ = "U of Washington's Free-Fork License"};
+ if (/^W3CCopyright \(BSD like\)$/i) {$_ = "BSD-like"};
+ if (/^X ?Consortium[ -]?like$/i) {$_ = "MIT-like"};
+ if (/^XFree86$/i) {$_ = "MIT"};
+ if (/^W3C Copyright \(BSD[- ]like\)$/i) {$_ = "BSD-like"};
+
+ # Eliminate license if it isn't really a license.
+ if (/^2000Red Hat, Inc.?$/i) {$_ = ""};
+ if (/^OMRON ?Corporation, OMRON Software Co., Ltd.?$/i) {$_ = ""};
+ if (/^Copyright\s?.?\s?[1-9][0-9][0-9][0-9]/i) {$_ = ""}; # Not a license.
+ if (/^\(C\)\s?[1-9][0-9][0-9][0-9]/i) {$_ = ""}; # Not a license.
+ if (/^[1-9][0-9][0-9][0-9]\s/i) {$_ = ""}; # A date, not a license.
+
+ $license = $_;
+}
+
+if ($license) {
+ print $license;
+} else {
+ # The spec file didn't tell us anything. Let's look for files that tell us.
+ &add_license("${program_dir}/LICENSE");
+ &add_license("${program_dir}/COPYING");
+ &add_license("${program_dir}/COPYING.LIB");
+ &add_license("${program_dir}/Artistic");
+ &add_license("${program_dir}/COPYING-2.0");
+ &add_license("${program_dir}/COPYING.WTFPL");
+ &add_license("${program_dir}/COPYING.GPL");
+ &add_license("${program_dir}/COPYING.NEWLIB");
+ &add_license("${program_dir}/COPYING.kdb");
+ if (-s "${program_dir}/COPYING.BSD") { # Assume there's a BSD license.
+ $all_licenses{"BSD"} = 1;
+ }
+ if (-s "${program_dir}/COPYING.MIT") { # Assume there's an MIT license.
+ $all_licenses{"MIT"} = 1;
+ }
+
+ if (%all_licenses) {
+ $license = "";
+ foreach $license_fragment (sort(keys(%all_licenses))) {
+ $license .= "${license_fragment}, "
+ }
+ $license =~ s/, $//;
+ print $license;
+ }
+
+}
+print "\n";
+