summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid A. Wheeler <dwheeler@dwheeler.com>2013-09-02 18:07:15 -0400
committerDavid A. Wheeler <dwheeler@dwheeler.com>2013-09-02 18:07:15 -0400
commitb01dd02bf67b722fb76b00f8750b1e2ee26db7e1 (patch)
treecbbbebef56704c7eb237b771fa737b5a35cb74c4
parent5c545e92fb2c41ca6c8d5e8ef3cfb8d5babb9107 (diff)
downloadsloccount-git-b01dd02bf67b722fb76b00f8750b1e2ee26db7e1.tar.gz
Put verbose messages to STDERR [from Dirk Jagmann]
- Send verbos messages to STDERR, not stdout. - https://sourceforge.net/p/sloccount/patches/5/
-rwxr-xr-xbreak_filelist4
-rwxr-xr-xbreak_filelist.orig356
-rwxr-xr-xget_sloc13
-rwxr-xr-xmake_filelists2
-rwxr-xr-xsloccount8
5 files changed, 305 insertions, 78 deletions
diff --git a/break_filelist b/break_filelist
index ed70475..b6acf45 100755
--- a/break_filelist
+++ b/break_filelist
@@ -724,7 +724,7 @@ sub compute_digest {
# There doesn't seem to be a way in perl to disable an error message
# display if the command is missing, which is annoying. However, the
# program is more robust if we check for the command each time we run.
- print "Finding a working MD5 command....\n";
+ print STDERR "Finding a working MD5 command....\n";
foreach $m ("md5sum", "md5", "openssl") {
$result = compute_digest_given_method($filename, $m);
if (defined($result)) {
@@ -735,7 +735,7 @@ sub compute_digest {
if (!defined($digest_method)) {
die "Failure - could not find a working md5 program using $filename.";
}
- print "Found a working MD5 command.\n";
+ print STDERR "Found a working MD5 command.\n";
}
return $result;
}
diff --git a/break_filelist.orig b/break_filelist.orig
index b34c702..ed70475 100755
--- a/break_filelist.orig
+++ b/break_filelist.orig
@@ -8,9 +8,27 @@
# uses heuristics to determine this.
# The list of .h files is also contained in h_list.dat.
-# (C) Copyright 2000-2001 David A. Wheeler
-# Part of "SLOCCount", and released under the GPL version 2;
-# see the documentation for details.
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# To contact David A. Wheeler, see his website at:
+# http://www.dwheeler.com.
+
# If adding a new language: add the logic to open the file,
# close the file, and detect & write to the file listing that language.
@@ -58,7 +76,7 @@ $noisy = 0; # Set to 1 if you want noisy reports.
"bdf" => 1,
"sgml" => 1,
"mf" => 1,
- "txt" => 1,
+ "txt" => 1, "text" => 1,
"man" => 1,
"xbm" => 1,
"Tag" => 1,
@@ -70,7 +88,7 @@ $noisy = 0; # Set to 1 if you want noisy reports.
"dic" => 1,
"pfb" => 1,
"fig" => 1,
- "afm" => 1,
+ "afm" => 1, # font metrics
"jpg" => 1,
"bmp" => 1,
"htm" => 1,
@@ -85,6 +103,18 @@ $noisy = 0; # Set to 1 if you want noisy reports.
"o" => 1, # Object code is generated from source code.
"a" => 1, # Static object code.
"so" => 1, # Dynamically-loaded object code.
+ "Y" => 1, # file compressed with "Yabba"
+ "Z" => 1, # file compressed with "compress"
+ "ad" => 1, # X application default resource file.
+ "arc" => 1, # arc(1) archive
+ "arj" => 1, # arj(1) archive
+ "au" => 1, # Audio sound filearj(1) archive
+ "wav" => 1,
+ "bak" => 1, # Backup files - we only want to count the "real" files.
+ "bz2" => 1, # bzip2(1) compressed file
+ "mp3" => 1, # zip archive
+ "tgz" => 1, # tarball
+ "zip" => 1, # zip archive
);
# The following filenames are NOT code:
@@ -114,6 +144,10 @@ $noisy = 0; # Set to 1 if you want noisy reports.
# A filename ending in the following extensions usually maps to the
# given language:
+# TODO: See suffixes(7)
+# .al Perl autoload file
+# .am automake input
+
%file_extensions = (
"c" => "ansic",
"ec" => "ansic", # Informix C.
@@ -122,21 +156,36 @@ $noisy = 0; # Set to 1 if you want noisy reports.
"C" => "cpp", "cpp" => "cpp", "cxx" => "cpp", "cc" => "cpp",
"pcc" => "cpp", # Input to Oracle C++ preproc.
"m" => "objc",
- "h" => "h", "H" => "h", "hpp" => "h",
+ # C# (C-sharp) is named 'cs', not 'c#', because
+ # the '#' is a comment character and I'm trying to
+ # avoid bug-prone conventions.
+ # C# doesn't support header files.
+ "cs" => "cs",
+ # Header files are allocated to the "h" language, and then
+ # copied to the correct location later so that C/C++/Objective-C
+ # can be separated.
+ "h" => "h", "H" => "h", "hpp" => "h", "hh" => "h",
"ada" => "ada", "adb" => "ada", "ads" => "ada",
"pad" => "ada", # Oracle Ada preprocessor.
- "f" => "fortran",
- "p" => "pascal",
+ "f" => "fortran", "F" => "fortran", # This catches "wokka.F" as Fortran.
+ # Warning: "Freeze" format also uses .f. Haven't heard of problems,
+ # freeze is extremely rare and even more rare in source code directories.
+ "f77" => "fortran", "F77" => "fortran",
+ "f90" => "f90", "F90" => "f90",
+ "cob" => "cobol", "cbl" => "cobol",
+ "COB" => "cobol", "CBL" => "cobol", # Yes, people do create wokka.CBL files
+ "p" => "pascal", "pas" => "pascal", "pp" => "pascal", "dpr" => "pascal",
"py" => "python",
"s" => "asm", "S" => "asm", "asm" => "asm",
"sh" => "sh", "bash" => "sh",
"csh" => "csh", "tcsh" => "csh",
"java" => "java",
- "lisp" => "lisp", "el" => "lisp", "scm" => "lisp", "lsp" => "lisp",
+ "lisp" => "lisp", "el" => "lisp", "scm" => "lisp", "sc" => "lisp",
+ "lsp" => "lisp", "cl" => "lisp",
"jl" => "lisp",
"tcl" => "tcl", "tk" => "tcl", "itk" => "tcl",
"exp" => "exp",
- "pl" => "perl", "pm" => "perl", "perl" => "perl",
+ "pl" => "perl", "pm" => "perl", "perl" => "perl", "ph" => "perl",
"awk" => "awk",
"sed" => "sed",
"y" => "yacc",
@@ -146,7 +195,17 @@ $noisy = 0; # Set to 1 if you want noisy reports.
"php" => "php", "php3" => "php", "php4" => "php", "php5" => "php",
"php6" => "php",
"inc" => "inc", # inc MAY be PHP - we'll handle it specially.
- # ???: .pco is Oracle Cobol, need to add with a Cobol counter.
+ "m3" => "modula3", "i3" => "modula3",
+ "mg" => "modula3", "ig" => "modula3",
+ "ml" => "ml", "mli" => "ml",
+ "mly" => "ml", # ocamlyacc. In fact this is half-yacc half-ML, especially
+ # comments in yacc part are C-like, not ML like.
+ "mll" => "ml", # ocamllex, no such problems as in ocamlyacc
+ "rb" => "ruby",
+ "hs" => "haskell", "lhs" => "haskell",
+ # ???: .pco is Oracle Cobol
+ "jsp" => "jsp", # Java server pages
+ "js" => "javascript",
);
@@ -184,7 +243,7 @@ sub reopen {
seek CODE_FILE, 0, 0; # Rewind.
} else { # We're opening a new file.
if ($opened_file_name) {close(CODE_FILE)}
- open(CODE_FILE, "<$filename") || die "Can't open $filename";
+ open(CODE_FILE, "<$filename\0") || die "Can't open $filename";
$opened_file_name = $filename;
}
}
@@ -197,7 +256,7 @@ sub looks_like_cpp {
my $confidence = 0;
chomp($filename);
open( SUSPECT, "<$filename");
- while (<SUSPECT>) {
+ while (defined($_ = <SUSPECT>)) {
if (m/^\s*class\b.*\{/) { # "}"
close(SUSPECT);
return 2;
@@ -385,18 +444,33 @@ sub really_is_pascal {
my $filename = shift;
chomp($filename);
-# The heuristic is as follows: it's Pascal _IF_ it has all of the following:
-# 1. "^..program NAME(...);" or "..unit NAME".
-# 2. "procedure", "function", "^..interface", or "^..implementation"
+# The heuristic is as follows: it's Pascal _IF_ it has all of the following
+# (ignoring {...} and (*...*) comments):
+# 1. "^..program NAME" or "^..unit NAME",
+# 2. "procedure", "function", "^..interface", or "^..implementation",
# 3. a "begin", and
-# 4. it ends with "end." (ignoring {...} comments).
-# The last requirement in particular filters out non-Pascal.
-
+# 4. it ends with "end.",
+#
+# Or it has all of the following:
+# 1. "^..module NAME" and
+# 2. it ends with "end.".
+#
+# Or it has all of the following:
+# 1. "^..program NAME",
+# 2. a "begin", and
+# 3. it ends with "end.".
+#
+# The "end." requirements in particular filter out non-Pascal.
+#
+# Note (jgb): this does not detect Pascal main files in fpc, like
+# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in
+# it
my $is_pascal = 0; # Value to determine.
my $has_program = 0;
my $has_unit = 0;
+ my $has_module = 0;
my $has_procedure_or_function = 0;
my $found_begin = 0;
my $found_terminating_end = 0;
@@ -407,26 +481,35 @@ sub really_is_pascal {
open(PASCAL_FILE, "<$filename") ||
die "Can't open $filename to determine if it's pascal.\n";
while(<PASCAL_FILE>) {
+ s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective.
+ s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective.
if (m/\bprogram\s+[A-Za-z]/i) {$has_program=1;}
if (m/\bunit\s+[A-Za-z]/i) {$has_unit=1;}
+ if (m/\bmodule\s+[A-Za-z]/i) {$has_module=1;}
if (m/\bprocedure\b/i) { $has_procedure_or_function = 1; }
if (m/\bfunction\b/i) { $has_procedure_or_function = 1; }
if (m/^\s*interface\s+/i) { $has_procedure_or_function = 1; }
if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
if (m/\bbegin\b/i) { $has_begin = 1; }
- s/\{.*?\}//g; # Ignore comments on this line; imperfect, but effective.
- # This heuristic fails if there are multi-line comments after
- # "end."; I haven't seen that in real Pascal programs:
+ # Originally I said:
+ # "This heuristic fails if there are multi-line comments after
+ # "end."; I haven't seen that in real Pascal programs:"
+ # But jgb found there are a good quantity of them in Debian, specially in
+ # fpc (at the end of a lot of files there is a multiline comment
+ # with the changelog for the file).
+ # Therefore, assume Pascal if "end." appears anywhere in the file.
if (m/end\.\s*$/i) {$found_terminating_end = 1;}
- elsif (m/\S/) {$found_terminating_end = 0;}
+# elsif (m/\S/) {$found_terminating_end = 0;}
}
close(PASCAL_FILE);
# Okay, we've examined the entire file looking for clues;
# let's use those clues to determine if it's really Pascal:
- if ( ($has_unit || $has_program) && $has_procedure_or_function &&
- $has_begin && $found_terminating_end)
+ if ( ( ($has_unit || $has_program) && $has_procedure_or_function &&
+ $has_begin && $found_terminating_end ) ||
+ ( $has_module && $found_terminating_end ) ||
+ ( $has_program && $has_begin && $found_terminating_end ) )
{$is_pascal = 1;}
$pascal_files{$filename} = $is_pascal; # Store result in cache.
@@ -434,6 +517,51 @@ sub really_is_pascal {
return $is_pascal;
}
+sub really_is_incpascal {
+# Given filename, returns TRUE if its contents really are Pascal.
+# For .inc files (mainly seen in fpc)
+
+ my $filename = shift;
+ chomp($filename);
+
+# The heuristic is as follows: it is Pacal if any of the following:
+# 1. really_is_pascal returns true
+# 2. Any usual reserverd word is found (program, unit, const, begin...)
+
+ # If the general routine for Pascal files works, we have it
+ if (&really_is_pascal ($filename)) {
+ $pascal_files{$filename} = 1;
+ return 1;
+ }
+
+ my $is_pascal = 0; # Value to determine.
+ my $found_begin = 0;
+
+ open(PASCAL_FILE, "<$filename") ||
+ die "Can't open $filename to determine if it's pascal.\n";
+ while(<PASCAL_FILE>) {
+ s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective.
+ s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective.
+ if (m/\bprogram\s+[A-Za-z]/i) {$is_pascal=1;}
+ if (m/\bunit\s+[A-Za-z]/i) {$is_pascal=1;}
+ if (m/\bmodule\s+[A-Za-z]/i) {$is_pascal=1;}
+ if (m/\bprocedure\b/i) {$is_pascal = 1; }
+ if (m/\bfunction\b/i) {$is_pascal = 1; }
+ if (m/^\s*interface\s+/i) {$is_pascal = 1; }
+ if (m/^\s*implementation\s+/i) {$is_pascal = 1; }
+ if (m/\bconstant\s+/i) {$is_pascal=1;}
+ if (m/\bbegin\b/i) { $found_begin = 1; }
+ if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;}
+ if ($is_pascal) {
+ last;
+ }
+ }
+
+ close(PASCAL_FILE);
+ $pascal_files{$filename} = $is_pascal; # Store result in cache.
+ return $is_pascal;
+}
+
# Cache which files are php or not.
# Key is the full file pathname; value is 1 if it is (else 0).
%php_files = ();
@@ -490,7 +618,7 @@ sub examine_dir {
my $saw_cpp_in_dir = 0;
my $saw_objc_in_dir = 0;
opendir(DIR, $dirname) || die "can't opendir $dirname";
- while ($_ = readdir(DIR)) {
+ while (defined($_ = readdir(DIR))) {
chomp;
next if (!$_);
if (m/\.(cpp|C|cxx|cc)$/ && -f "$dirname/$_") {$saw_cpp_in_dir = 1;}
@@ -515,8 +643,8 @@ sub examine_dir {
sub was_generated_automatically() {
# Determine if the file was generated automatically.
- # Use a simple heuristic: check if first few lines have the
- # phrase "generated automatically", or "automatically generated",
+ # Use a simple heuristic: check if first few lines have phrases like
+ # "generated automatically", "automatically generated", "Generated by",
# or "do not edit" as the first
# words in the line (after possible comment markers and spaces).
my $filename = shift;
@@ -526,9 +654,11 @@ sub was_generated_automatically() {
chomp($filename);
reopen($filename);
$i = 15; # Look at first 15 lines.
- while (<CODE_FILE>) {
+ while (defined($_ = <CODE_FILE>)) {
if (m/^[\s#\/\*;\-\%]*generated automatically/i ||
m/^[\s#\/\*;\-\%]*automatically generated/i ||
+ m/^[\s#\/\*;\-\%]*generated by /i || # libtool uses this.
+ m/^[\s#\/\*;\-\%]*a lexical scanner generated by flex/i ||
m/^[\s#\/\*;\-\%]*this is a generated file/i || # TeTex uses this.
m/^[\s#\/\*;\-\%]*generated with the.*utility/i || # TeTex uses this.
m/^[\s#\/\*;\-\%]*do not edit/i) {
@@ -548,23 +678,83 @@ sub was_generated_automatically() {
$cached_digest = "";
$cached_digest_filename = "";
+$digest_method = undef;
+
+sub compute_digest_given_method {
+ my $filename = shift;
+ my $method = shift;
+ my $result;
+
+ if ($method eq "md5sum") {
+ open(FH, "-|", "md5sum", $filename) or return undef;
+ $result = <FH>;
+ close FH;
+ return undef if ! defined($result);
+ chomp($result);
+ $result =~ s/^\s*//; # Not needed for GNU Textutils.
+ $result =~ s/[^a-fA-F0-9].*//; # Strip away end.
+ } elsif ($method eq "md5") {
+ open(FH, "-|", "md5", $filename) or return undef;
+ $result = <FH>;
+ close FH;
+ return undef if ! defined($result);
+ chomp($result);
+ $result =~ s/^.* //; # Strip away beginning.
+ } elsif ($method eq "openssl") {
+ open(FH, "-|", "openssl", "dgst", "-md5", $filename) or return undef;
+ $result = <FH>;
+ close FH;
+ return undef if ! defined($result);
+ chomp($result);
+ $result =~ s/^.* //; # Strip away beginning.
+ } else {
+ # "Can't happen"
+ die "Unknown method";
+ }
+ return $result;
+}
+
+sub compute_digest {
+ my $filename = shift;
+ my $result;
+ if (defined($digest_method)) {
+ $result = compute_digest_given_method($filename, $digest_method);
+ } else {
+ # Try each method in turn until one works.
+ # There doesn't seem to be a way in perl to disable an error message
+ # display if the command is missing, which is annoying. However, the
+ # program is more robust if we check for the command each time we run.
+ print "Finding a working MD5 command....\n";
+ foreach $m ("md5sum", "md5", "openssl") {
+ $result = compute_digest_given_method($filename, $m);
+ if (defined($result)) {
+ $digest_method = $m;
+ last;
+ }
+ }
+ if (!defined($digest_method)) {
+ die "Failure - could not find a working md5 program using $filename.";
+ }
+ print "Found a working MD5 command.\n";
+ }
+ return $result;
+}
+
sub get_digest {
my $filename = shift;
+ my $result;
# First, check the cache -- did we just compute this?
if ($filename eq $cached_digest_filename) {
return $cached_digest; # We did, so here's what it was.
}
- my $results = `md5sum "$filename"`;
- chomp($results);
- $results =~ s/^\s*//; # Not needed for GNU Textutils.
- $results =~ s/[^a-fA-F0-9].*//; # Strip away end.
- $cached_digest = $results; # Store in cache.
+ $result = compute_digest($filename);
+ # Store in most-recently-used cache.
+ $cached_digest = $result;
$cached_digest_filename = $filename;
- return $results;
+ return $result;
}
-
sub already_added {
# returns the first file's name with the same contents,
# else returns the empty string.
@@ -678,43 +868,53 @@ sub file_type_from_contents() {
# what I'd missed.
$command = "";
- if ($firstline =~ m@^#!\s*/(usr/)?bin/env\s+([a-zA-Z0-9\._]+)(\s|\Z)@) {
+
+ # Strip out any calls to sudo
+ if ($firstline =~ m@^#!\s*/(usr/)?bin/sudo\s+(/.*)@) {
+ $firstline = "#!" . $2;
+ }
+
+ if ($firstline =~ m@^#!\s*/(usr/)?bin/env\s+([a-zA-Z0-9\._]+)(\s|\Z)@i) {
$command = $2;
- } elsif ($firstline =~ m@^#!\s*([a-zA-Z0-9\/\.]+\/)?([a-zA-Z0-9\._]+)(\s|\Z)@) {
+ } elsif ($firstline =~ m@^#!\s*([a-zA-Z0-9\/\.]+\/)?([a-zA-Z0-9\._]+)(\s|\Z)@) {
$command = $2;
}
- if ( ($command =~ m/^(bash|ksh|zsh|pdksh|sh)[0-9\.]*(\.exe)?$/) ||
+ if ( ($command =~ m/^(bash|ksh|zsh|pdksh|sh)[0-9\.]*(\.exe)?$/i) ||
($firstline =~
m~^#!\s*\@_?(SCRIPT_)?(PATH_)?(BA|K)?SH(ELL)?(\d+)?\@?(\s|\Z)~)) {
# Note: wish(1) uses a funny trick; see wish(1) for more info.
# The following code detects this unusual wish convention.
- if ($firstline =~ m@exec wish(\s|\Z)@) {
+ if ($firstline =~ m@exec wish(\s|\Z)@i) {
return "tcl"; # return the type for wish.
}
# Otherwise, it's shell.
return "sh";
}
- if ( ($command =~ m/^(t?csh\d*)[0-9\.]*(\.exe)?$/) ||
+ if ( ($command =~ m/^(t?csh\d*)[0-9\.]*(\.exe)?$/i) ||
($firstline =~ m@^#!\s*xCSH_PATHx(\s|\Z)@)) {
return "csh";
}
- if ( ($command =~ m/^(mini)?perl[0-9\.]*(\.exe)?$/) ||
+ if ( ($command =~ m/^(mini)?perl[0-9\.]*(\.exe)?$/i) ||
+ ($command =~ m/^speedycgi[0-9\.]*(\.exe)?$/i) ||
($firstline =~ m~^#!\s*\@_?(PATH_)?PERL\d*(PROG)?\@(\s|\Z)~) ||
($firstline =~ m~^#!\s*xPERL_PATHx(\s|\Z)~)) {
return "perl";
}
- if ($command =~ m/^python[0-9\.]*(\.exe)?$/) {
+ if ($command =~ m/^python[0-9\.]*(\.exe)?$/i) {
return "python";
}
- if ($command =~ m/^(tcl|tclsh|bltwish|wish|wishx|WISH)[0-9\.]*(\.exe)?$/) {
+ if ($command =~ m/^ruby[0-9\.]*(\.exe)?$/i) {
+ return "ruby";
+ }
+ if ($command =~ m/^(tcl|tclsh|bltwish|wish|wishx|WISH)[0-9\.]*(\.exe)?$/i) {
return "tcl";
}
- if ($command =~ m/^expectk?[0-9\.]*(\.exe)?$/) { return "exp"; }
- if ($command =~ m/^[ng]?awk[0-9\.]*(\.exe)?$/) { return "awk"; }
- if ($command =~ m/^sed$/) { return "sed"; }
- if ($command =~ m/^guile[0-9\.]*$/) { return "lisp"; }
- if ($firstline =~ m@^#!.*make\b@) { # We'll claim that #! make is a makefile.
+ if ($command =~ m/^expectk?[0-9\.]*(\.exe)?$/i) { return "exp"; }
+ if ($command =~ m/^[ng]?awk[0-9\.]*(\.exe)?$/i) { return "awk"; }
+ if ($command =~ m/^sed$/i) { return "sed"; }
+ if ($command =~ m/^guile[0-9\.]*$/i) { return "lisp"; }
+ if ($firstline =~ m@^#!.*make\b@i) { # We'll claim that #! make is a makefile.
return "makefile";
}
if ($firstline =~ m@^#!\s*\.(\s|\Z)@) { # Lonely period.
@@ -747,7 +947,7 @@ sub file_type_from_contents() {
sub get_file_type {
my $file_to_examine = shift;
# Return the given file's type.
- # It looks at the contents, then the filename, then file extension.
+ # Consider the file's contents, filename, and file extension.
$warning_from_first_line = "";
@@ -816,7 +1016,8 @@ sub get_file_type {
# Use filename to determine if it's a makefile:
if (($file_to_examine =~ m/\bmakefile$/i) ||
($file_to_examine =~ m/\bmakefile\.txt$/i) ||
- ($file_to_examine =~ m/\bmakefile\.pc$/i)) {
+ ($file_to_examine =~ m/\bmakefile\.pc$/i) ||
+ ($file_to_examine =~ m/\bdebian\/rules$/i)) { # "debian/rules" too.
return "makefile";
}
@@ -824,6 +1025,19 @@ sub get_file_type {
if ($file_to_examine =~ m/\.([^.\/]+)$/) {
$type = $1;
+ # More ugly problems: some source filenames only use
+ # UPPERCASE, and they can be mixed with regular files.
+ # Since normally filenames are lowercase or mixed case,
+ # presume that an all-uppercase filename means we have to assume
+ # that the extension must be lowercased. This particularly affects
+ # .C, which usually means C++ but in this case would mean plain C.
+ my $uppercase_filename = 0;
+ if (($file_to_examine =~ m/[A-Z]/) &&
+ (! ($file_to_examine =~ m/[a-z]/))) {
+ $uppercase_filename = 1;
+ $type = lc($type); # Use lowercase version of type.
+ }
+
# Is this type known to NOT be a program?
if ($not_code_extensions{$type}) {
return "not";
@@ -834,21 +1048,23 @@ sub get_file_type {
# pine4.21/pine/makefile.hpp and pine4.21/pico/makefile.hpp
# Note that pine also includes pine4.21/pine/osdep/diskquot.hpp.
# Kaffe uses .hpp for C++ header files.
- if (($type eq "hpp") && ($file_to_examine =~ m/makefile\.hpp$/))
+ if (($type eq "hpp") && ($file_to_examine =~ m/makefile\.hpp$/i))
{return "makefile";}
# If it's a C file but there's a ".pc" or ".pgc" file, then presume that
# it was automatically generated:
if ($type eq "c") {
$pc_name = $file_to_examine;
- $pc_name =~ s/\.c$/\.pc/;
+ if ($uppercase_filename) { $pc_name =~ s/\.C$/\.PC/; }
+ else { $pc_name =~ s/\.c$/\.pc/; }
if (-s "$pc_name" ) {
print "Note: Auto-generated C file (from .pc file) $file_to_examine\n"
if $noisy;
return "auto";
}
$pc_name = $file_to_examine;
- $pc_name =~ s/\.c$/\.pgc/;
+ if ($uppercase_filename) { $pc_name =~ s/\.C$/\.PGC/; }
+ else { $pc_name =~ s/\.c$/\.pgc/; }
if (-s "$pc_name" ) {
print "Note: Auto-generated C file (from .pgc file) $file_to_examine\n"
if $noisy;
@@ -875,10 +1091,17 @@ sub get_file_type {
(($type eq "tk") && (!&really_is_expect($file_to_examine))) ||
(($type eq "objc") && (!&really_is_objc($file_to_examine))) ||
(($type eq "lex") && (!&really_is_lex($file_to_examine))) ||
- (($type eq "pascal") && (!&really_is_pascal($file_to_examine))) ||
- (($type eq "inc") && (!&really_is_php($file_to_examine))))
- {$type = "unknown";}
- if ($type eq "inc") { $type = "php"; }; # Hey, the .inc is PHP!
+ (($type eq "pascal") && (!&really_is_pascal($file_to_examine)))) {
+ $type = "unknown";
+ } elsif ($type eq "inc") {
+ if (&really_is_php($file_to_examine)) {
+ $type = "php"; # Hey, the .inc is PHP!
+ } elsif (&really_is_incpascal($file_to_examine)) {
+ $type = "pascal";
+ } else {
+ $type = "unknown";
+ }
+ };
return $type;
}
@@ -914,17 +1137,17 @@ sub convert_h_files {
if ($saw_ansic && (!$saw_cpp) && (!$saw_objc)) {
# Only C, let's assume .h files are too
- while (<H_LIST>) { chomp; force_record_file_type($_, "c"); };
+ while (defined($_ = <H_LIST>)) { chomp; force_record_file_type($_, "c"); };
} elsif ($saw_cpp && (!$saw_ansic) && (!$saw_objc)) { # Only C++
- while (<H_LIST>) { chomp; force_record_file_type($_, "cpp"); };
+ while (defined($_ = <H_LIST>)) { chomp; force_record_file_type($_, "cpp"); };
} elsif ($saw_objc && (!$saw_ansic) && (!$saw_cpp)) { # Only Obj-C
- while (<H_LIST>) { chomp; force_record_file_type($_, "objc"); };
+ while (defined($_ = <H_LIST>)) { chomp; force_record_file_type($_, "objc"); };
} else {
# Ugh, we have a mixture. Let's try to determine what we have, using
# various heuristics (looking for a matching name in the directory,
# reading the file contents, the contents in the directory, etc.)
# When all else fails, assume C.
- while (<H_LIST>) {
+ while (defined($_=<H_LIST>)) {
chomp;
next if (!$_);
# print "DEBUG: H file $_\n";
@@ -1002,7 +1225,7 @@ if ($#ARGV < 0) {
if ($duplistfile) {
if (-e $duplistfile) {
open(DUPLIST, "<$duplistfile") || die "Can't open $duplistfile";
- while (<DUPLIST>) {
+ while (defined($_ = <DUPLIST>)) {
chomp;
($digest, $filename) = split(/ /, $_, 2);
if (defined($digest) && defined($filename)) {
@@ -1051,10 +1274,15 @@ while ( $dir = shift ) {
$dup_count = 0;
- while (<FILELIST>) {
+ while (defined($_ = <FILELIST>)) {
chomp;
$file = $_;
next if (!defined($file) || ($file eq ""));
+ if ($file =~ m/\n/) {
+ print STDERR "WARNING! File name contains embedded newline; it'll be IGNORED.\n";
+ print STDERR "Filename is: $file\n";
+ next;
+ }
$file_type = &get_file_type($file);
if ($file_type) {
&record_file_type($file, $file_type);
diff --git a/get_sloc b/get_sloc
index f590a8e..9fdd3e4 100755
--- a/get_sloc
+++ b/get_sloc
@@ -535,10 +535,9 @@ if ($computing_sloc) {
print "Total Number of Files = $grand_total_sloc\n";
print "Total Number of Source Code Files = $grand_total_lang_sloc\n";
}
-print "SLOCCount, Copyright (C) 2001-2004 David A. Wheeler\n";
-print "SLOCCount is Open Source Software/Free Software, licensed under the GNU GPL.\n";
-print "SLOCCount comes with ABSOLUTELY NO WARRANTY, and you are welcome to\n";
-print "redistribute it under certain conditions as specified by the GNU GPL license;\n";
-print "see the documentation for details.\n";
-print "Please credit this data as \"generated using David A. Wheeler's 'SLOCCount'.\"\n";
-
+print STDERR "SLOCCount, Copyright (C) 2001-2004 David A. Wheeler\n";
+print STDERR "SLOCCount is Open Source Software/Free Software, licensed under the GNU GPL.\n";
+print STDERR "SLOCCount comes with ABSOLUTELY NO WARRANTY, and you are welcome to\n";
+print STDERR "redistribute it under certain conditions as specified by the GNU GPL license;\n";
+print STDERR "see the documentation for details.\n";
+print STDERR "Please credit this data as \"generated using David A. Wheeler's 'SLOCCount'.\"\n";
diff --git a/make_filelists b/make_filelists
index 5440d50..1aae2df 100755
--- a/make_filelists
+++ b/make_filelists
@@ -130,7 +130,7 @@ do
mkdir "$childname"
fi
- echo "Creating filelist for $childname"
+ echo "Creating filelist for $childname" 1>&2
find "$dir" $follow -type f -print > "${childname}/filelist"
# If it exists, copy the PROGRAM_LICENSE.
diff --git a/sloccount b/sloccount
index 9491227..99dc028 100755
--- a/sloccount
+++ b/sloccount
@@ -225,15 +225,15 @@ in
esac
cd $datadir
- if echo "Categorizing files." &&
+ if echo "Categorizing files." 1>&2 &&
break_filelist --duplistfile sloc_hashes $duplicate_control $autogen * &&
- echo "Computing results." &&
+ echo "Computing results." 1>&2 &&
compute_all *
then
display_results=y
fi
- echo
- echo
+ echo 1>&2
+ echo 1>&2
;;
esac