summaryrefslogtreecommitdiff
path: root/get_sloc
diff options
context:
space:
mode:
Diffstat (limited to 'get_sloc')
-rwxr-xr-xget_sloc544
1 files changed, 544 insertions, 0 deletions
diff --git a/get_sloc b/get_sloc
new file mode 100755
index 0000000..f590a8e
--- /dev/null
+++ b/get_sloc
@@ -0,0 +1,544 @@
+#!/usr/bin/perl -w
+
+# get_sloc
+# Take a list of dirs, and get the SLOC or filecount data from them.
+# NOTE: The intended input data ignores zero-length files & ignores dups,
+# so if that's true for the input data, it'll be true for the output data!
+
+# This code works but is NOT cleaned up-- it basically grew like
+# topsy. Many of the variable names are misleading, as my needs for
+# output changed.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+
+
+
+# Default values for the effort estimation model; the model is
+# effort = ($effort_factor * KiloSLOC) ** $effort_exponent.
+# The following numbers are for basic COCOMO:
+
+$effort_factor = 2.40;
+$effort_exponent = 1.05;
+$effort_estimation_message = "Basic COCOMO model,";
+
+$schedule_factor = 2.5;
+$schedule_exponent = 0.38;
+$schedule_estimation_message = "Basic COCOMO model,";
+
+# Average Salary / year.
+# Source: ComputerWorld, Sep. 4, 2000 Salary Survey,
+# average (U.S.) programmer/analyst salary.
+
+$person_cost = 56286.;
+
+# Overhead; the person cost is multiplied by this value to determine
+# true annual costs.
+
+$overhead = 2.4;
+
+@license_list = ( "GPL", "LGPL", "MIT", "BSD", "distributable",
+ "public domain", "MPL");
+
+%license_of = (); # input is name of program, output is license.
+
+$no_license_total = 0;
+
+%non_language_list = (
+ "dup" => 1,
+ "not" => 1,
+ "unknown" => 1,
+ "auto" => 1,
+ "zero" => 1,
+);
+
+%ignore_language_list = (
+ "makefile" => 1,
+ "sql" => 1,
+ "html" => 1,
+);
+
+# Default input values
+$dirs_in_stdin = 0; # 0: dirs to analyze as arguments, 1: in stdin
+
+# Default Output Values:
+
+$computing_sloc = 1; # 0= showing filecounts, 1= showing SLOC.
+$narrow = 1;
+$sort_by = "total"; # If empty, sort by name; else "total" or lang name.
+$show_effort = 0; # Show effort for each component?
+$break_line = 1; # Break up long lines into multiple lines?
+$show_non_lang = 0; # Show non-language counts?
+$one_program = 0; # Are all files part of a single program?
+$show_header = 1; # Show header?
+$show_footer = 1; # Show footer?
+
+
+# Global variables:
+
+@dirs = (); # Directories to examine
+
+%examined_directories = (); # Keys = Names of directories examined this run.
+
+# Subroutines.
+
+sub commify {
+# TODO: Needs to be internationalized.
+ my $text = reverse $_[0];
+ $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
+ return scalar reverse $text;
+}
+
+sub numformat {
+# Format number nicely with commas.
+ my $num = shift;
+ my $digits = shift;
+ return commify(sprintf("%0.${digits}f", $num));
+}
+
+sub effort_person_months {
+ # Given the SLOC, reply an estimate of the number of person-months
+ # needed to develop it traditionally.
+ my $total_sloc = shift;
+ return ( ($effort_factor*(($total_sloc/1000.0)**$effort_exponent)));
+}
+
+sub estimate_schedule {
+ # Given the person-months, reply an estimate of the number of months
+ # needed to develop it traditionally.
+ my $person_months = shift;
+ return ($schedule_factor*($person_months**$schedule_exponent));
+}
+
+sub get_lang_total {
+ my $lang = shift;
+ if (defined($lang_total{$lang})) {return $lang_total{$lang}}
+ else {return 0;}
+}
+
+# MAIN PROGRAM
+
+
+# Process options (if any):
+
+if ($#ARGV < 0) {
+ print STDERR "Error! You must list at least one directory to process, or --stdin.\n";
+ exit(1);
+}
+
+while ((scalar (@ARGV) > 0) && ($ARGV[0] =~ m/^-/)) {
+ $arg = shift;
+ if ($arg eq "--") {last;}
+ elsif ($arg eq "--filecount") {$computing_sloc = 0;}
+ elsif ($arg eq "--filecounts") {$computing_sloc = 0;}
+ elsif ($arg eq "--sloc") {$computing_sloc = 1;}
+ elsif ($arg eq "--narrow") {$narrow = 1;}
+ elsif ($arg eq "--wide") {$narrow = 0;}
+ elsif ($arg eq "--break") {$break_line = 1;}
+ elsif ($arg eq "--nobreak") {$break_line = 0;}
+ elsif ($arg eq "--sort") {$sort_by = shift;} # Must be "total" or a lang.
+ elsif ($arg eq "--nosort") {$sort_by = "";}
+ elsif ($arg eq "--showother") {$show_non_lang = 1;}
+ elsif ($arg eq "--noshowother") {$show_non_lang = 0;}
+ elsif ($arg eq "--oneprogram") {$one_program = 1;}
+ elsif ($arg eq "--noheader") {$show_header = 0;}
+ elsif ($arg eq "--nofooter") {$show_footer = 0;}
+ elsif ($arg eq "--addlang") { $lang = shift;
+ if (!defined($ignore_language_list{$lang})) {
+ die "Sorry, but $lang isn't ignored"; };
+ delete $ignore_language_list{$lang}; }
+ elsif ($arg eq "--addlangall") { %ignore_language_list = (); }
+ elsif ($arg eq "--effort") {$effort_factor = (shift)*1.0;
+ $effort_exponent = (shift)*1.0;
+ $effort_estimation_message = "effort model"}
+ elsif ($arg eq "--schedule") {$schedule_factor = (shift)*1.0;
+ $schedule_exponent = (shift)*1.0;
+ $schedule_estimation_message = "schedule model"}
+ elsif ($arg eq "--personcost") {$person_cost = (shift)*1.0;}
+ elsif ($arg eq "--overhead") {$overhead = (shift)*1.0;}
+ elsif ($arg eq "--stdin") {$dirs_in_stdin = 1;}
+ else {die "Unknown option: $arg\n";}
+}
+
+
+# Determine the languages to show:
+
+
+if ($computing_sloc) { $show_non_lang = 0; }
+
+if (!$show_non_lang) {
+ # Add the non_language_list to the ignored languages.
+ foreach $langname (keys(%non_language_list))
+ {$ignore_language_list{$langname} = 1;}
+}
+
+
+%lang_total = ();
+%license_total = ();
+
+@data_lines = ();
+
+$sloc = 0;
+$total_sloc = 0;
+$total_lang_sloc = 0;
+$grand_total_sloc = 0;
+$grand_total_lang_sloc = 0;
+$effort = 0.0;
+$grand_total_effort = 0.0;
+$grand_schedule = 0.0;
+
+if (!$narrow) {
+ # Ouch! To accurately determine the column positions and names,
+ # without "pre-knowing" them, we need to look through the data.
+ # So, we'll do it twice. This isn't efficient - if needed,
+ # speed it up by rewriting this to do it in-memory.
+ while (defined($_ = <DATAFILE>)) {
+ ($lang, $sloc) = split;
+ next if ( (!defined($lang)) || (!defined($sloc)) );
+ next if ($ignore_language_list{$lang});
+ $lang_total{$lang} = 0;
+ }
+}
+
+
+# Print the header.
+if ($show_header) {
+if ($narrow) {
+ if ($computing_sloc) { print "SLOC\t"; }
+ else { print "#Files\t"; }
+ if ($show_effort) {print "P.Y.\t";}
+ print "Directory\t";
+ if ($computing_sloc) { print "SLOC-by-Language (Sorted)"; }
+ else { print "#Files-by-Language (Sorted)"; }
+ print "\n";
+} else {
+ if ($computing_sloc) { print "SLOC\t"; }
+ else { print "#Files\t"; }
+ if ($show_effort) {print "P.M.\t";}
+ printf "%-22s\t", "Dir";
+ foreach $lang (keys(%lang_total)) {
+ print "$lang\t";
+ $lang_total{$lang} = 0;
+ };
+ print "\n";
+}
+}
+
+if ($dirs_in_stdin == 1) {
+ while (defined($dir = <STDIN>)) {
+ chomp ($dir);
+ push (@dirs, $dir);
+ }
+}
+
+while ($dir = shift) {
+ push (@dirs, $dir);
+}
+
+
+foreach $dir (@dirs) {
+ if (! -d "$dir") {
+ # print "Skipping non-directory $dir\n";
+ next;
+ }
+
+ # Skip previously-examined directories.
+ if ($examined_directories{$dir}) {
+ # print "Skipping already-examined directory $dir\n";
+ next;
+ }
+ $examined_directories{$dir} = 1;
+
+ if (! -r "${dir}/filelist") {
+ # print "Skipping directory $dir; it doesn't contain a file 'filelist'\n";
+ next;
+ }
+
+
+ $simplename = $dir;
+ $simplename =~ s!^.*\/!!;
+ $total_sloc = 0;
+ $total_lang_sloc = 0;
+ $preceding_entry = 0;
+
+ $line = "";
+ %lang_data = ();
+
+ if ($computing_sloc) {
+ $filename = "${dir}/all-physical.sloc";
+ } else {
+ $filename = "${dir}/all.filecount";
+ }
+ if (open(DATAFILE, "<$filename")) {
+ while (defined($_ = <DATAFILE>)) {
+ ($lang, $sloc) = split;
+ next if ( (!defined($lang)) || (!defined($sloc)) );
+ next if ($ignore_language_list{$lang});
+ if ($narrow) { if ($sloc) {$lang_data{$lang} = $sloc;}}
+ else { $line .= "${sloc}\t"; }
+ if ($lang eq $sort_by) {$interesting_lang_sloc = $sloc;}
+ $total_sloc += $sloc;
+ $total_lang_sloc += $sloc unless ($non_language_list{$lang});
+ $lang_total{$lang} += $sloc;
+ }
+ close(DATAFILE);
+ } else {
+ print STDERR "Error openinig $filename\n";
+ }
+ if ($narrow) {
+ # For narrow view, sort the language entries.
+ foreach $entry (sort {$lang_data{$b} <=> $lang_data{$a}} keys %lang_data){
+ if ($preceding_entry) {$line .= ",";}
+ $preceding_entry = 1;
+ $line .= "${entry}=${lang_data{$entry}}";
+ }
+ if (!$preceding_entry) {$line .= "(none)";}
+ }
+
+ $grand_total_sloc += $total_sloc;
+ $grand_total_lang_sloc += $total_lang_sloc;
+
+ $effort = effort_person_months($total_sloc);
+ $grand_total_effort += $effort;
+
+ $schedule = estimate_schedule($effort);
+ if ($schedule > $grand_schedule) {
+ $grand_schedule = $schedule; # The longest leg wins.
+ }
+
+ $displayed_effort = "";
+ if ($show_effort) { $displayed_effort = sprintf "%.2f\t", $effort; }
+ if ($narrow) {
+ $displayed_name = "$simplename";
+ } else {
+ $displayed_name = sprintf "%-22s\t", $simplename;
+ }
+
+ # Add to the corresponding license, if the license is known.
+ $license = "";
+ if (open(LICENSE_FILE, "<${dir}/PROGRAM_LICENSE")) {
+ $license = <LICENSE_FILE>;
+ chomp($license);
+ close(LICENSE_FILE);
+ if ($license) {
+ $license_of{$simplename} = $license; # Hash currently unused.
+ if (! defined($license_total{$license})) {
+ $license_total{$license} = 0;
+ }
+ $license_total{$license} = $license_total{$license} + $total_sloc;
+ }
+ } else {
+ $no_license_total += $total_sloc;
+ }
+
+ if ($narrow) {
+ $line = sprintf "%-7d %s%-15s %-s\n", $total_sloc, $displayed_effort,
+ $simplename, $line;
+ if ($break_line && (length($line) > 77)) { # Break up long line.
+ $line =~ s/(.{71})([^,]*),(.*)/$1$2,\n $3/;
+ }
+ if ($license) {
+ $line .= " [$license]\n";
+ }
+ } else {
+ $line = "${total_sloc}\t${displayed_effort}${displayed_name}${line}\n";
+ }
+ if ($sort_by) {
+ if ($sort_by eq "total") {$line = "$total_sloc\t$line";}
+ else {$line = "$interesting_lang_sloc\t$line";}
+ $data_lines[$#data_lines+1] = $line; # Add to data lines.
+ } else {
+ print $line; # No sort - print immediately for speed.
+ }
+
+}
+
+if ($sort_by) {
+ # Print sorted version. This is a little inefficient, but for
+ # only a few hundred or thousand values it doesn't matter.
+ @sorted_data_lines = sort { ($b =~ /^(\d+)/)[0] <=> ($a =~ /^(\d+)/)[0] }
+ @data_lines;
+ foreach $line (@sorted_data_lines) {
+ $short_line = $line;
+ $short_line =~ s/^[^\t]*\t//; # Remove sort field.
+ print $short_line;
+ }
+}
+
+
+if (! $show_footer) {exit(0);}
+if ($grand_total_sloc == 0) {
+ print "SLOC total is zero, no further analysis performed.\n";
+ exit(1);
+}
+
+# Print the footer.
+if ($narrow) {
+ print "\n";
+ print "\n";
+ print "Totals grouped by language (dominant language first):\n";
+ # If you don't want the list sorted by size of language, just do:
+ # foreach $lang (@language_list) {
+ foreach $lang (sort {&get_lang_total($b) <=> &get_lang_total($a) } keys(%lang_total) ) {
+ $percent = get_lang_total($lang) * 100.0 / $grand_total_sloc;
+ if ($percent > 0.0) {
+ printf "%-9s %9d (%.2f%%)\n", $lang . ":", $lang_total{$lang}, $percent;
+ }
+ };
+
+ if ($show_non_lang) {
+ # The previous list showed "non-languages", so now we'll show only the
+ # data for data associated with a normal language:
+ print "\n";
+ print "\n";
+ foreach $lang (sort {&get_lang_total($b) <=> &get_lang_total($a) } keys(%lang_total)) {
+ next if (defined($non_language_list{$lang}));
+ $percent = $lang_total{$lang} * 100.0 / $grand_total_lang_sloc;
+ if ($percent > 0.0) {
+ printf "%-9s %9d (%.2f%%)\n", $lang . ":", $lang_total{$lang}, $percent;
+ }
+ };
+ }
+
+} else { # Not narrow.
+
+ print "$grand_total_sloc\t";
+ if ($show_effort) {printf "%.2f\t", $grand_total_effort;}
+
+ printf "%-22s", "Totals";
+ foreach $lang (keys(%lang_total)) {
+ print "\t$lang_total{$lang}";
+ };
+
+ print "\t";
+ if ($show_effort) {printf "\t";}
+ printf "%-22s\t", "Percentages";
+ foreach $lang (keys(%lang_total)) {
+ $percent = $lang_total{$lang} * 100.0 / $grand_total_sloc;
+ printf "\t%0.2f", $percent;
+ };
+ print "\n";
+
+ print "\t";
+ if ($show_effort) {printf "\t";}
+ printf "%-22s\t", "Code Percentages";
+ foreach $lang (keys(%lang_total)) {
+ next if (defined($non_language_list{$lang}));
+ $percent = $lang_total{$lang} * 100.0 / $grand_total_lang_sloc;
+ printf "\t%0.2f", $percent;
+ };
+ print "\n";
+}
+
+print "\n";
+print "\n";
+
+
+if (%license_total) {
+ # We have license info on something, so if there's anything
+ # unallocated, add that to the list.
+ if ($no_license_total) {
+ $license_total{"Not listed"} = $no_license_total;
+ }
+ print "Licenses:\n";
+ foreach $license (sort {$license_total{$b} <=> $license_total{$a} } keys(%license_total)) {
+ $percent = $license_total{$license} * 100.0 / $grand_total_sloc;
+ if ($percent > 0.0) {
+ printf "%9d (%.2f%%) %s\n", $license_total{$license}, $percent, $license;
+ }
+ };
+ print "\n";
+ print "\n";
+
+ print "Percentage of Licenses containing selected key phrases:\n";
+ %license_phrase = ();
+ foreach $license (keys(%license_total)) {
+ foreach $phrase (@license_list) {
+ if ($license =~ m/\b$phrase\b/i) {
+ if (!defined($license_phrase{$phrase})) {$license_phrase{$phrase} = 0;}
+ $license_phrase{$phrase} = $license_phrase{$phrase} +
+ $license_total{$license};
+ }
+ }
+ }
+
+ foreach $phrase (sort {$license_phrase{$b} <=> $license_phrase{$a} } keys(%license_phrase)) {
+ $percent = $license_phrase{$phrase} * 100.0 / $grand_total_sloc;
+ if ($percent > 0.0) {
+ printf "%9d (%.2f%%) %s\n", $license_phrase{$phrase}, $percent, $phrase;
+ }
+ };
+
+}
+
+
+print "\n";
+print "\n";
+
+if ($computing_sloc) {
+ if ($one_program) {
+ # If it's one program, override the grand total of effort
+ # and the schedule calculations by using the total SLOC.
+
+ $grand_total_effort = effort_person_months($grand_total_sloc);
+ $grand_schedule = estimate_schedule($grand_total_effort);
+ }
+ printf "Total Physical Source Lines of Code (SLOC) = %s\n",
+ commify($grand_total_sloc);
+
+ printf "Development Effort Estimate, Person-Years (Person-Months) = %s (%s)\n",
+ numformat($grand_total_effort/12.0, 2),
+ numformat($grand_total_effort, 2);
+ print " ($effort_estimation_message " .
+ "Person-Months = $effort_factor * (KSLOC**$effort_exponent))\n";
+
+ printf "Schedule Estimate, Years (Months) = %s (%s)\n",
+ numformat($grand_schedule/12.0, 2),
+ numformat($grand_schedule, 2);
+ print " ($schedule_estimation_message " .
+ "Months = $schedule_factor * (person-months**$schedule_exponent))\n";
+
+ # Don't show this if there are multiple programs, because the computation
+ # is essentially meaningless: after the "smaller" projects have completed,
+ # the longest one would keep going:
+ if ($one_program && ($grand_schedule > 0.0)) {
+ printf "Estimated Average Number of Developers (Effort/Schedule) = %s\n",
+ numformat($grand_total_effort / $grand_schedule, 2);
+ }
+
+
+ $value = ($grand_total_effort / 12.0) * $person_cost * $overhead;
+ printf "Total Estimated Cost to Develop = \$ %s\n",
+ numformat($value, 0);
+ printf " (average salary = \$%s/year, overhead = %0.2f).\n",
+ commify($person_cost), $overhead;
+
+} else {
+print "Total Number of Files = $grand_total_sloc\n";
+print "Total Number of Source Code Files = $grand_total_lang_sloc\n";
+}
+print "SLOCCount, Copyright (C) 2001-2004 David A. Wheeler\n";
+print "SLOCCount is Open Source Software/Free Software, licensed under the GNU GPL.\n";
+print "SLOCCount comes with ABSOLUTELY NO WARRANTY, and you are welcome to\n";
+print "redistribute it under certain conditions as specified by the GNU GPL license;\n";
+print "see the documentation for details.\n";
+print "Please credit this data as \"generated using David A. Wheeler's 'SLOCCount'.\"\n";
+