From 05095851346f52c8e918176e8e2abdf0b21de5ec Mon Sep 17 00:00:00 2001
From: dwheeler <dwheeler@d762cc98-fd17-0410-9a0d-d09172385bc5>
Date: Fri, 7 Jul 2006 13:36:27 +0000
Subject: Initial import (sloccount 2.26)

git-svn-id: svn://svn.code.sf.net/p/sloccount/code/trunk@1 d762cc98-fd17-0410-9a0d-d09172385bc5
---
 COPYING                    |  340 ++++++
 ChangeLog                  | 1018 ++++++++++++++++++
 PROGRAM_LICENSE            |    1 +
 README                     |   51 +
 SOURCES                    |   29 +
 TODO                       |  161 +++
 TODO.orig                  |  153 +++
 ada_count                  |   27 +
 append_license             |   62 ++
 append_specname            |   57 +
 asm_count                  |  166 +++
 awk_count                  |   27 +
 break_filelist             | 1308 +++++++++++++++++++++++
 break_filelist.orig        | 1084 +++++++++++++++++++
 c_count.c                  |  225 ++++
 c_lines_environment.dat    |   98 ++
 c_outfile.dat              |    1 +
 cobol_count                |   82 ++
 compute_all                |   87 ++
 compute_c_usc              |   77 ++
 compute_java_usc           |   59 ++
 compute_sloc_lang          |   66 ++
 count_extensions           |   56 +
 count_unknown_ext          |   32 +
 csh_count                  |   27 +
 dirmatch                   |   37 +
 driver.c                   |  110 ++
 driver.h                   |   50 +
 exp_count                  |   27 +
 extract-count              |   83 ++
 extract_license            |  178 ++++
 f90_count                  |   81 ++
 fortran_count              |   83 ++
 generic_count              |   77 ++
 get_sloc                   |  544 ++++++++++
 get_sloc_details           |  103 ++
 haskell_count              |  122 +++
 java_lines_environment.dat |   98 ++
 jsp_count.c                | 1787 ++++++++++++++++++++++++++++++++
 jsp_count.l                |   90 ++
 lex_count                  |   70 ++
 lexcount1.c                |   58 ++
 linux_unique               |   64 ++
 lisp_count                 |   27 +
 make_filelists             |  193 ++++
 makefile                   |  246 +++++
 makefile.orig              |  222 ++++
 makefile_count             |   27 +
 ml_count.c                 |  209 ++++
 modula3_count              |   65 ++
 objc_count                 |   89 ++
 pascal_count.c             | 1714 ++++++++++++++++++++++++++++++
 pascal_count.l             |   81 ++
 perl_count                 |  147 +++
 php_count.c                |  335 ++++++
 print_sum                  |   40 +
 print_sum_subset           |   41 +
 python_count               |  120 +++
 redo_licenses              |   42 +
 rpm_unpacker               |   71 ++
 ruby_count                 |   27 +
 sed_count                  |   27 +
 sh_count                   |   27 +
 show_filecount             |   58 ++
 simplecount                |   84 ++
 sloccount                  |  258 +++++
 sloccount.1                |  235 +++++
 sloccount.1.gz             |  Bin 0 -> 3377 bytes
 sloccount.html             | 2464 ++++++++++++++++++++++++++++++++++++++++++++
 sloccount.html.orig        | 2440 +++++++++++++++++++++++++++++++++++++++++++
 sloccount.spec             |   56 +
 sql_count                  |   76 ++
 stripccomments.c           |   50 +
 stub                       |   22 +
 stubsh                     |   23 +
 table.html                 |  569 ++++++++++
 tcl_count                  |   27 +
 testcode/conditions.CBL    |   31 +
 testcode/hello.f           |   10 +
 testcode/hello.f90         |    7 +
 testcode/hello.pas         |    9 +
 testcode/hello1.pas        |   12 +
 testcode/messages.rb       |  152 +++
 testcode/temp.c            |    5 +
 testcode/test.hs           |   19 +
 testcode/test1.inc         |   23 +
 testcode/test1.lhs         |   15 +
 testcode/test1.php         |   27 +
 testcode/test2.lhs         |   44 +
 testcode/wokka.cbl         |    4 +
 testcode/wokka.cs          |    8 +
 usc_subset.tar             |    1 +
 92 files changed, 19435 insertions(+)
 create mode 100644 COPYING
 create mode 100644 ChangeLog
 create mode 100644 PROGRAM_LICENSE
 create mode 100644 README
 create mode 100644 SOURCES
 create mode 100644 TODO
 create mode 100644 TODO.orig
 create mode 100755 ada_count
 create mode 100755 append_license
 create mode 100755 append_specname
 create mode 100755 asm_count
 create mode 100755 awk_count
 create mode 100755 break_filelist
 create mode 100755 break_filelist.orig
 create mode 100644 c_count.c
 create mode 100644 c_lines_environment.dat
 create mode 100644 c_outfile.dat
 create mode 100755 cobol_count
 create mode 100755 compute_all
 create mode 100755 compute_c_usc
 create mode 100755 compute_java_usc
 create mode 100755 compute_sloc_lang
 create mode 100755 count_extensions
 create mode 100755 count_unknown_ext
 create mode 100755 csh_count
 create mode 100755 dirmatch
 create mode 100644 driver.c
 create mode 100644 driver.h
 create mode 100755 exp_count
 create mode 100755 extract-count
 create mode 100755 extract_license
 create mode 100755 f90_count
 create mode 100755 fortran_count
 create mode 100755 generic_count
 create mode 100755 get_sloc
 create mode 100755 get_sloc_details
 create mode 100755 haskell_count
 create mode 100644 java_lines_environment.dat
 create mode 100644 jsp_count.c
 create mode 100644 jsp_count.l
 create mode 100755 lex_count
 create mode 100644 lexcount1.c
 create mode 100755 linux_unique
 create mode 100755 lisp_count
 create mode 100755 make_filelists
 create mode 100644 makefile
 create mode 100644 makefile.orig
 create mode 100755 makefile_count
 create mode 100644 ml_count.c
 create mode 100644 modula3_count
 create mode 100755 objc_count
 create mode 100644 pascal_count.c
 create mode 100644 pascal_count.l
 create mode 100755 perl_count
 create mode 100644 php_count.c
 create mode 100755 print_sum
 create mode 100755 print_sum_subset
 create mode 100755 python_count
 create mode 100755 redo_licenses
 create mode 100755 rpm_unpacker
 create mode 100755 ruby_count
 create mode 100755 sed_count
 create mode 100755 sh_count
 create mode 100755 show_filecount
 create mode 100755 simplecount
 create mode 100755 sloccount
 create mode 100644 sloccount.1
 create mode 100644 sloccount.1.gz
 create mode 100644 sloccount.html
 create mode 100644 sloccount.html.orig
 create mode 100644 sloccount.spec
 create mode 100755 sql_count
 create mode 100644 stripccomments.c
 create mode 100644 stub
 create mode 100644 stubsh
 create mode 100644 table.html
 create mode 100755 tcl_count
 create mode 100644 testcode/conditions.CBL
 create mode 100644 testcode/hello.f
 create mode 100644 testcode/hello.f90
 create mode 100644 testcode/hello.pas
 create mode 100644 testcode/hello1.pas
 create mode 100644 testcode/messages.rb
 create mode 100644 testcode/temp.c
 create mode 100644 testcode/test.hs
 create mode 100644 testcode/test1.inc
 create mode 100644 testcode/test1.lhs
 create mode 100644 testcode/test1.php
 create mode 100644 testcode/test2.lhs
 create mode 100644 testcode/wokka.cbl
 create mode 100644 testcode/wokka.cs
 create mode 100644 usc_subset.tar

diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..eeb586b
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) 19yy  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) 19yy name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..6db1fdf
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,1018 @@
+2004-08-01 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.26.
+	* Modified driver.h to clearly state the GPL license.
+	  This doesn't change anything, but it makes the
+	  Savannah people happy.
+
+2004-07-31 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.25. Changes are:
+	* Per request from Savannah, added the more detailed licensing
+	  text to every source file.
+	* Modified the assembly language counting code, based on useful
+	  feedback and a test case from Purnendu Ghosh, so that
+	  the heuristics work better at guessing the right comment character
+	  and they perform well.
+	  In particular, the comment character '*' is far better supported.
+	* Added support for Delphi project files (.dpr files, which are
+	  essentially in Pascal syntax), thanks to Christian Iversen.
+	* Some versions of Perl are apparantly causing trouble, but
+	  I have not yet found the solution for them (other than using
+	  a different version of Perl).  The troublesome line of code
+	  in break_filelist, which currently says:
+	      open(FH, "-|", "md5sum", $filename) or return undef;
+	  This could be changed to:
+              open(FH, "-|", "md5sum $filename") or return undef;
+	  But I dare not fix it that way, because that would create
+	  a security problem.  Imagine downloading someone
+	  else's source code (who you don't know), using sloccount, and
+	  that other person has created in their source tree a file
+	  named like this: "; rm -fr /*" or its variations.
+	  I'd rather have the program fail in specific circumstances
+	  (users will know when it won't work!) than to insert a known
+	  dangerous security vulnerability.  I can't reproduce this problem;
+	  it's my hope that those who CAN will help me find a good
+	  solution.  For the moment, I'm documenting the problem here and
+	  in the TODO list, so that people will realize WHY it hasn't
+	  just been "fixed" with the "obvious solution".
+	  The answer: I care about security.
+
+2004-05-10 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.24 - a few minor bugfixes and improvements.
+	  Automatically tries to use several different MD5 programs, until
+	  it finds one that works - this is more flexible, and as a result,
+	  it now works out-of-the-box on Apple Mac OS X.
+	  SLOCCount now accepts "." as the directory to analyze,
+	  it correctly identifies wrapper scripts left by libtool as
+	  automatically generated code, and correctly identifies debian/rules
+	  files as makefiles. Also, installation documentation has improved.
+	  My thanks to Jesus M. Gonzalez-Barahona for telling me about the
+	  Debian bug reports and testing of candidate versions.
+	  My thanks to Koryn Grant, who told me what needed to be done
+	  to get SLOCCount running on Mac OS X (and for testing my change).
+	  This version resolves Debian Bug reports #173699,
+	  #159609, and #200348.
+
+2004-04-27 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Automatically try several different MD5 programs, looking for
+	  a working one.  Originally this program REQUIRED md5sum.
+	  This new version tried md5sum, then md5, then openssl.
+	  The good news - the program should now 'just work' on
+	  Apple Mac OS X.  The bad news - if md5sum doesn't exist,
+	  sloccount still has a good chance of working, but will display
+	  odd error messages while it searches for a working MD5 program.
+	  There doesn't seem to be an easy way in perl to suppress such
+	  messages while still permitting "trouble reading data file"
+	  messages.  However, doing the test at run-time is much more
+	  robust, and this way it at least has a chance of working on
+	  systems it didn't work on at all before.
+	* Removed the "debian" subdirectory.  There was no need for it;
+	  it's best for the Debian package maintainers to control that
+	  information on their own.
+
+2004-04-25 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Allow "." and ".." as specifications for directories even
+	  when they have no subdirectories.
+	  This resolves Debian bug report log #200348
+	  ("Sloccount . fails").
+	* Correctly identify wrapper scripts left by libtool as
+	  automatically generated code.
+	  When linking against a libtool library, libtool leaves a wrapper
+	  script in the source tree (so that the binary can be executed
+	  in-place, without installing it), which includes this:
+	  (line) # foo - temporary wrapper script for .libs/foo
+	  (line) # Generated by ltmain.sh - GNU libtool 1.4.3 
+	                                  (1.922.2.111 2002/10/23 02:54:36)
+	  I fixed this by saying that any comment beginning with
+	  "Generated by" in the first few lines must be auto-generated
+	  code.  This should correctly catch other auto-generated code too.
+	  There is a risk that code NOT automatically generated will be
+	  incorrectly labelled, but that's unlikely. 
+	  This resolves Debian Bug report logs #173699,
+	  "sloccount should ignore libtool-generated wrapper scripts".
+	* Now identifies "debian/rules" files as a makefile.
+	  This resolves Debian Bug report logs - #159609,
+	  "sloccount Does not consider debian/rules to be a makefile".
+	* Minor fix to sloccount makefile, so that man page installs
+	  correctly in some situations that didn't before.
+	  My thanks to Jesus M. Gonzalez-Barahona.
+
+2003-11-01 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Version 2.23 - a few minor bugfixes and improvements.
+
+2003-11-01 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Fixed incorrect UTF-8 warnings.  Perl 5.8.0 creates warnings
+	  when the LANG value includes ".UTF-8" but the text files read
+	  aren't UTF-8.  This causes problems on Red Hat Linux 9 and others,
+	  which set LANG to include ".UTF-8" by default.
+	  This version quietly removes ".UTF-8" from the LANG value for
+	  purposes of sloccount, to eliminate the problem.
+
+2003-11-01 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Fixed bad link to "options" in sloccount.html; my thanks to
+	  Barak Zalstein (<Barak.Zalstein, at, ParthusCeva.com) for
+	  telling me.
+	* Added "--version" option that prints the version number.
+	  Thanks to Auke Jilderda (auke.jilderda, at, philips.com)
+	  for suggesting this.
+
+2003-11-01 Sam Tregar <sam, at, tregar.com>
+	* Fixed a bug in perl_count that prevents it from
+	  properly skipping POD.
+
+2003-10-30 Julian Squires <julian, at, greyfirst.ca>
+	* Added simple literate Haskell support.
+	* Added test cases for literate Haskell support.
+	* Updated Common LISP and Modula 3 extensions.
+
+2003-03-08 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Version 2.22 - improved OCAML support, thanks to Michal Moskal.
+	  Other minor improvements.
+
+2003-02-15  Jay A. St. Pierre
+	* Fixed uninstalling documents to always remove DOC_DIR.
+
+2003-02-15 Michal Moskal
+	* Significantly improved OCAML support - complete rewrite of
+	  ML handling.
+
+2003-01-28 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Version 2.21 - improved Fortran support (inc. Fortran 90);
+	  my thanks to Erik Schnetter for implementing this!
+
+2002-12-17  Erik Schnetter  <schnetter, at, uni-tuebingen.de>
+	* Added support for Fortran 90.  Extensions are ".f90" and ".F90".
+	* Changed handling of Fortran 77 to include HPF and Open MP
+	  statements, and to accept uppercase ".F77" as extension.
+
+2002-12-04 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Version 2.20 - minor portability and documentation improvements.
+	* Documentation improvements - more discussion on Intermediate COCOMO.
+
+2002-12-04 Linh Luong <Linh.Luong, at, trw.com>
+	* Modified SLOCCount so that it would run on Solaris 2.7 
+	  (once Perl is installed and the PATH is set correctly to include
+	  the directory where SLOCCount is installed).
+	  This required modifying file sloccount to eliminate the
+	  test ("[") option "-e", replacing it with the "-r" option
+	  ("test -e" is apparantly not supported by Solaris 2.7).
+	  Since "-r" should be available on any implementation of "test",
+	  this is a nice portable change.
+
+2002-11-16 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Version 2.19, documentation improvement.
+	* Documented the "Improved COCOMO" model from Boehm,
+	  so that users who want more accurate estimates can do at
+	  least a little bit straight from the documentation.
+	  For more, as always, see Boehm's book.
+	  If anyone wants to implement logical SLOC counting, please be
+	  my guest!  Then, COCOMO II could be implemented too.
+	* Modified this ChangeLog to document more fully the SGI MIPS problem.
+
+2002-11-16 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Version 2.18, minor bugfix release.
+	* Updated the "wc -l" check; it would cause problems for users
+	  who had never used sloccount before (because datadir had not
+	  been created yet).  Also, the "wc -l" check itself would not
+	  reliably identify SGI systems that had horribly buggy "wc"
+	  programs; it's believed this is a better check.
+	  Thanks to Randal P. Andress for helping with this.
+	* Fixed this ChangeLog.  It was Randal P. Andress who identified
+	  the "wc -l" bug, not Bob Brown.  Sorry for the misattribution,
+	  and thanks for the bugfixing help!
+	* Changed rpm building command to work with rpm version 4
+	  (as shipped with Red Hat Linux 8.0).  As of Red Hat Linux 8,
+	  the "rpm" command only loads files, while there is now a
+	  separate "rpmbuild" command for creating rpm files.
+	  Those rebuilding with Red Hat Linux 7.X or less (rpm < version 4)
+	  will need to edit the makefile slightly, as documented
+	  in the makefile, to modify the variable RPMBUILD.
+	* "make rpm" now automatically uninstalls sloccount first if it can,
+	  to eliminate unnecessary errors when building new versions of
+	  sloccount RPMs.  This only affects people modifying and
+	  redistributing the code of sloccount (mainly, me).
+
+2002-11-16 Randal P. Andress
+	* Fixed get_sloc so that it
+	  also accepts --filecounts as well as --filecount.
+
+2002-11-05 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.17, which adds support for Java Server Pages
+	  (.jsp), eliminates some warnings in newer Perl implementations,
+	  and has a few minor fixes/improvments.
+
+2002-11-18 Randal P. Andress
+	* Randal provided the following additional information about this
+	really nasty problem on SGI MIPS machines.  It causes gcc
+	to not work properly, and thus "wc" won't work properly either.
+	SLOCCount now detects that there's a problem and will refuse to
+	run if things are screwed up this badly.  For those unfortunate
+	few who have to deal with this case, here's additional information
+	from Randal Andress:
+
+	When gcc is installed on SGI MIPS from source, sgi-mips-sgi-irix6.x,
+	an option specification in the 'specs' file is set
+	incorrectly for n32. The offending line is:
+	  %{!mno-long64:-D__LONG_MAX__=9223372036854775807LL}
+	Which (unless option '-mno-long64' is specified), means that
+	LONG_MAX is 64 bits.  The trouble is two fold:
+	 1. This should not be the default, since for n32,
+	    normally, long is only 32 bits. and
+	 2. The option did not carry into the
+	    compiler past the pre-processor - so it did not work.
+	The simplest fix for gcc (it seems that it can be done locally by 
+	editing the specs file) is to have the following line to
+	replace the offending line in the specs file:
+	  %{long64:-D__LONG_MAX__=9223372036854775807LL}
+	This makes the default 32 and only sets it to 64 if you specify 
+	'-long64' which *does* work all the way through the compiler.
+
+	I had the binary for gcc 3 on the sgi freeware site installed here and
+	looked at it's specs file and found no problem (they have the '-long64'
+	option). So it seems that when they build gcc for their freeware
+	distribution, they fix it.
+
+	The problem comes when someone downloads and builds gcc for themselves 
+	on sgi. Then the installation is faulty and any n32 code that they
+	build is subject to this flaw if the source makes use of LONG_MAX
+	or any of the values derived from it.
+
+	The real problem turned out to be quite general for sgi n32 gcc. The
+	'specs' file and mips.h are not consistent resulting in 'LONG_MAX'
+	being given an incorrect value.
+
+	The following 'c' program shows inconsistent values for macros for
+	mips-irix n32:
+	__LONG_MAX__ (LONG_MAX) and
+	_MIPS_SZLONG
+	This seems to stem from an improper default option in the specs file
+	forcing -D__LONG_MAX__=0x7fffffffffffffff
+	to be passed to each compile.
+
+	 Here is the test case, compile command, and output:
+	
+	 # include <limits.h>
+	 #define LONG_MAX_32_BITS 2147483647
+	 #include <sys/types.h>
+	 int main () {
+	 #if LONG_MAX <= LONG_MAX_32_BITS
+	   printf ("LONG_MAX <= LONG_MAX_32_BITS = 0x%lx\n",LONG_MAX);
+	 #else
+	   printf ("LONG_MAX  > LONG_MAX_32_BITS = 0x%llx\n",LONG_MAX);
+	 #endif
+	
+	   printf ("_MIPS_SZLONG     = 0x%x\n",_MIPS_SZLONG);
+	   printf ("__LONG_MAX__     = 0x%llx (size:%d)\n",__LONG_MAX__,
+	 sizeof
+	 (__LONG_MAX__));
+	
+	 #if LONG_MAX <= LONG_MAX_32_BITS
+	   printf ("LONG_MAX         = 0x%lx (size:%d)
+	 \n",LONG_MAX,sizeof(LONG_MAX));
+	 #else
+	   printf ("LONG_MAX         = 0x%llx (size:%d)
+	 \n",LONG_MAX,sizeof(LONG_MAX));
+	 #endif
+	
+	   printf ("LONG_MAX_32_BITS = 0x%x (size:%d)
+	 \n",LONG_MAX_32_BITS,sizeof(LONG_MAX_32_BITS));
+	   return 0;
+	 }
+	 ============ end test case source.
+	
+	 >gcc -n32 -v -o test_limits -O0 -v -g test_limits.c
+	
+	 defines include:....-D__LONG_MAX__=9223372036854775807LL....
+	
+	 =========== test output:
+	 >test_limits
+	 LONG_MAX  > LONG_MAX_32_BITS = 0x7fffffffffffffff
+	 _MIPS_SZLONG     = 0x20
+	 __LONG_MAX__     = 0x7fffffffffffffff (size:8)
+	 LONG_MAX         = 0x7fffffffffffffff (size:8)
+	 LONG_MAX_32_BITS = 0x7fffffff (size:4)
+	
+	
+	 ======== end test case output
+	
+	 By changing the specs entry:
+	    %{!mno-long64:-D__LONG_MAX__=9223372036854775807LL}
+	 to
+	    %{long64:-D__LONG_MAX__=9223372036854775807LL}
+	 as is discussed in one of the internet reports I sent earlier, the
+	 output,
+	 after recompiling and running is:
+	
+	 LONG_MAX <= LONG_MAX_32_BITS = 0x7fffffff
+	 _MIPS_SZLONG     = 0x20
+	 __LONG_MAX__     = 0x7fffffff (size:4)
+	 LONG_MAX         = 0x7fffffff (size:4)
+	 LONG_MAX_32_BITS = 0x7fffffff (size:4)
+
+	Although I have not studied it well enough to know exactly why, the 
+	problem has to do with the size of (long int) and the attempt of the
+	'memchr' code to determine whether or not it can use 64 bit words
+	rather than 32 bit words in chunking through the string looking
+	for the specified character, "\n"(0x0a) in the case of 'wc'.
+
+2002-11-03 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Fixed makefile install/uninstall scripts to properly handle
+	  documentation.
+	* Added simple check at beginning of sloccount's execution
+	  to make sure "wc -l" actually works.
+	  Randal P. Andress has found that on certain SGI machines, "wc -l"
+	  produces the wrong answers.  He reports,
+	  "You may already know this, but just in case you do not, there is an
+	  apparent bug in textutils-1.19  function 'wc' (at least as built on
+	  SGI-n32) which is caused by an apparent bug in memchr (*s, c, n).
+	  The bug is only evident when counting 'lines only' or
+	  'lines and characters' (i.e., when NOT counting words).
+	  The result is that the filecount is short...
+	  I replaced the memchr with very simple code and it corrected the
+	  problem. I then installed textutils-2.1 which does not seem have
+	  the problem."
+	  I thought about adding this information just to the documentation,
+	  but no one would notice it.  By adding a check to the code,
+	  most people will neither know nor care about the problem, and
+	  the few people it DOES affect will know about the problem
+	  right away (instead of reporting wrong answers).
+	  Yes, a failing "wc -l" is a pretty horrific bug, but rather
+	  than ignore the problem, it's better to detect and address it.
+	* Modified documentation everywhere so that it consistently
+	  documents "--filecount" as the correct option for filecounts,
+	  not "--filecounts".  That way, the documentation is consistent.
+	* However, in an effort to "do the right thing", the program sloccount
+	  will accept "--filecounts" as an alternative way to specify
+	  --filecount.
+
+2002-11-02 Bob Brown <rlb, at, bluemartini.com>
+	* Contributed code changes to count Java Server Page (.jsp) files.
+	  The code does not pull comments out of embedded
+	  javascript.  We don't consider that a serious limitation at all,
+	  since no one should be sending embedded javascript comments
+	  to client browsers anyhow.  They're extremely rare.
+	  David A. Wheeler notes that you could
+	  argue that if you _DO_ include such comments, they're
+	  not really functioning as comments (since they DO have an
+	  affect on the result - they're more like print statements in an
+	  older language instead of a traditional language's comments).
+
+2002-11-02 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Eliminated more Perl warnings by adding more
+	  defined() wrappers to while() loops in Perl code
+	  (based on Randal's suggestion).  The problem is that Perl
+	  handles the last line of a file oddly if it doesn't end with
+	  a newline indicator, and it consists solely of "0".
+
+2002-11-02 Randal P Andress <Randal_P_Andress, at, raytheon.com>
+	* Eliminated some Perl warnings by adding
+	  defined() wrappers to while() loops in Perl code.
+
+2002-8-24 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.16, fixed limitations of old Pascal counter.
+
+2002-8-24 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Re-implemented Pascal counter (in flex).  This fixes some problems
+	  the old counter had - it handles nested comments with different
+	  formats, and strings as well.
+	* Removed the BUGS information that described the Pascal counter
+	  weaknesses.. since now they're gone!
+	* Added an additional detector of automatically generated files -
+	  it's an auto-generated file if it starts with
+	  "A lexical scanner generated by flex", since flex adds this.
+	  Generally, this isn't a problem, since we already detect
+	  the filename and matching .c files, but it seems worth doing.
+
+2002-8-22 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.15, a bugfix + small feature improvement.
+	  My sincere thanks to Jesus M. Gonzalez-Barahona, who provided
+	  patches with lots of useful improvements.
+
+2002-8-22 Jesus M. Gonzalez-Barahona
+	* Added support for Standard ML (as language "ml").
+	* A patch suggested to the Debian BTS; .hh is also a C++ extension.
+	* Some ".inc" files are actually Pascal, not PHP;
+	  now ".inc" files are examined binned to either Pascal or PHP
+	  depending on their content.
+	* Improved detection of Pascal files (particularly for Debian 
+	  package fpc-1.0.4).
+	* php_count was not closing open files before opening a new one,
+	  and therefore sloccount could fail to count PHP code given
+	  a VERY LONG list of PHP files in one package.
+	* break_filelist had problems with files including <CR> and other
+	  weird characters at the end of the filename.  Now fixed.
+
+2002-7-24 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.14. Improved Pascal detection, improved
+	  Pascal counting, added a reference to CCCC.
+
+2002-7-24 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Modified Pascal counting; the older (*..*) commenting structure
+	  is now supported.  Note that the Pascal counter is still imperfect;
+	  it doesn't handle the prioritization between these two commenting
+	  systems, and can be fooled by strings that include a
+	  comment start indicator.  Rewrites welcome, however, for most
+	  people the current code is sufficient.  This really needs to be
+	  rewritten in flex; languages with strings and multiline comment
+	  structures aren't handled correctly with naive Perl code.
+	* Documented the weaknesses in the Pascal counter as BUGS.
+
+2002-7-24 Ian West IWest, at, aethersystems, dot com 
+	* Improved heuristic for detecting Pascal programs in break_filelist.
+	  Sloccount will now categorize files as Pascal if they have
+	  the file type ".pas" as well as ".p", though it still checks
+	  the contents to make sure it's really pascal.
+	  The heuristic was modified so that it's also considered Pascal
+	  if it contains "module" and "end.",
+	  or "program", "begin", and "end." in addition to the existing cases.
+
+	  (Ian West used sloccount to analyze a system containing
+	   about 1.2 million lines of code in almost 10,000 files;
+	   ninety percent of it is Ada, and the bulk of the remainder
+	   is split between Pascal and SQL. The following is Ian's
+	   more detailed explanation for the change):
+
+	VAX Pascal uses "module" instead of "program" for files that
+	have no program block and therefore no "begin".
+	There is also no requirement for a Pascal file to have
+	procedures or functions, which is the case for files that are
+	equivalents of C headers. So I modified the function to
+	allow files to be accepted that only contain either:
+	"module" and "end."; or "program", "begin", and "end.".
+	I considered adding checks for "const", "type", and "var" but
+	decided they were not necessary. I have added the extra cases
+	without changing the existing logic so as not to upset
+	any cases for "unit".  It is possible to optimize the logic
+	somewhat, but I felt clarity was better than efficiency.
+
+	I found that some of my Pascal files were getting through
+	only because the word "unit" appeared in certain comments.
+	So I moved the line for filtering out comments above the lines
+	that look for the keywords.
+
+	Pascal in general allows comments in the form (*...*) as well
+	as {...}, so I added a line to remove these.
+
+	After making these changes, all my files were correctly
+	categorized. I also verified that the sample Pascal files
+	from p2c still had the same counts.
+
+	Thank you for developing SLOCCount. It is a very useful tool.
+
+2002-7-15 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Added a reference to CCCC; http://cccc.sourceforge.net/
+
+2002-5-31 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.13.
+	* Code cleanups.  Turned on gcc warnings ("-Wall" option) and
+	  cleaned up all code that set off a warning.
+	  This should make the code more portable as well as cleaner.
+	  Made a minor speed optimization on an error branch.
+
+2002-3-30 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.12.
+	* Added a "testcode" directory with some sample source code
+	  files for testing.  It's small now, but growth is expected.
+	  Contributions for this test directory (especially for
+	  edge/oddball cases) are welcome.
+
+2002-3-25 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Changed first-line recognizers so that the first line (#!) will
+	  matched ignoring case.  For most Unix/Linux systems uppercase
+	  script statements won't work, but Windows users.
+	* Now recognize SpeedyCGI, a persistent CGI interface for Perl.
+	  SpeedyCGI has most of the speed advantages of FastCGI, but
+	  has the security advantages of CGI and has the CGI interface
+	  (from the application writer's point of view).
+	  SpeedyCGI perl scripts have #!/usr/bin/speedy lines instead of
+	  #!/usr/bin/perl. More information about SpeedyCGI
+	  can be found at http://daemoninc.com/speedycgi/     
+	  Thanks to Priyadi Iman Nurcahyo for noticing this.
+
+2002-3-15 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Added filter to remove calls to sudo, so
+	  "#!/usr/bin/sudo /usr/bin/python" etc as the first line
+	  are correctly identified.
+
+2002-3-7 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Added cross-references to LOCC and CodeCount.  They don't
+	  do what I want.. which is why I wrote my own! .. but others
+	  may find them useful.
+
+2002-2-28 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.11.
+	* Added support for C#.  Any ".cs" file is presumed
+	  to be a C# file.  The C SLOC counter is used to count SLOC.
+	  Note that C# doesn't have a "header" type (Java doesn't either),
+	  so disambiguating headers isn't needed.
+	* Added support for regular Haskell source files (.hs).
+	  Their syntax is sufficiently similar that just the regular
+	  C SLOC counter works.
+	  Note that literate Haskell files (.lhs) are _not_ supported,
+	  so be sure to process .lhs files into .hs files before counting.
+	  There are two different .lhs conventions; for more info, see:
+	  http://www.haskell.org/onlinereport/literate.html
+	* Tweaked COBOL counter slightly.  Added support in fixed (default)
+	  format for "*" and "/" as comment markers in column 1.
+	* Modified list of file extensions known not to be source code,
+	  based on suffixes(7).  This speeds things very slightly, but the
+	  main goal is to make the "unknown" list smaller.
+	  That way, it's much easier to see if many source code files
+	  were incorectly ignored.  In particular, compressed formats
+	  (e.g., ".tgz") and multimedia formats (".wav") were added.
+	* Modified documentation to make things clear: If you want source
+	  in a compressed file to be counted (e.g. .zip, .tar, .tgz),
+	  you need to uncompress the file first!!
+	* Modified documentation to clarify that literate programming
+	  files must be expanded first.
+	* Now recognize ".ph" as Perl (it's "Perl header" code).
+	  Please let me know if this creates many false positives
+	  (i.e., if there are programs using ".ph" in other ways).
+	* File count_unknown_ext modified slightly so that it now examines
+	  ~/.slocdata.  Modified documentation so that its use is
+	  recommended and explained.  It's been there for a while, but
+	  with poor documentation I bet few understand its value.
+	* Modified output to clearly say that it's Open Source Software /
+	  Free Software, licensed under the GPL.  It was already stated
+	  that way in the documentation and code, but clearly stating this
+	  on every run makes it even harder to miss.
+
+2002-2-27 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.10.
+	* COBOL support added! Now ".cbl" and ".cob" are recognized
+	  as COBOL extensions, as well as their uppercase ".CBL" and ".COB".
+	  The COBOL counter works as follows:
+	  it detects if a "freeform" command has been given.  Unless a
+	  freeform command's given, a comment has "*" or "/" in column 7,
+	  and a SLOC is a non-comment line with
+	  at least one non-whitespace in column 8 or later (including
+          columns 72 or greater; it's arguable if a line that's empty
+          before column 72 is really a line or a comment, but I've decided
+          to count such odd things as lines).
+	  If we've gone free-format, a comment is a line that has optional
+	  whitespace and then "*".. otherwise, a line with nonwhitespace
+	  is a SLOC.
+	  Is this good enough?  I think so, but I'm not a major COBOL user.
+	  Feedback from real COBOL users would be welcome.
+	  A source for COBOL test programs is:
+	    http://www.csis.ul.ie/cobol/examples/default.htm
+	  Information on COBOL syntax gathered from various locations, inc.:
+	    http://cs.hofstra.edu/~vmaffea1/cobol.html
+	    http://support.merant.com/websupport/docs/microfocus/books/
+	                               nx31books/lrintr.htm
+	* Modified handling of uppercase filename extensions so they'll
+	  be recognized as well as the more typicaly lowercase extensions.
+	  If a file has one or more uppercase letters - and NO
+	  lowercase letters - it's assumed that it may be a refugee from
+	  an old OS that supported only uppercase filenames.
+	  In that circumstance, if the filename extension doesn't match the
+	  set of known extensions, it's made into lowercase and recompared
+	  against the set of extensions for source code files.
+	  This heuristic should improve recognition of source
+	  file types for "old" programs using upper-case-only characters.
+	  I do have concern that this may be "too greedy" an algorithm, i.e.,
+	  it might claim that some files that aren't really source code
+	  are now source code.  I don't think it will be a problem, though;
+	  many people create filename
+	  extensions that only differ by case in most circumstances; the
+	  ".c" vs. ".C" thing is an exception, and since Windows folds
+	  case it's not a very portable practice.  This is a pretty
+	  conservative heuristic; I found Cobol programs with lowercase
+	  filenames and uppercase extensions ("x.CBL"), which wouldn't
+	  be matched by this heuristic.  For Cobol and Fortran I put in
+	  special ".F", ".CBL", and ".COB" patterns to catch them.
+	  With those two actions, the program should manage to
+	  correctly identify more source files without incorrectly
+	  matching non-source files.
+	* ".f77" is now also accepted as a Fortran77 extension.
+	  Thanks to http://www.webopedia.com/quick_ref/fileextensionsfull.html
+	  which has lots of extension information.
+	* Fixed a bug in handling top-level directories where there were NO
+	  source files at all; in certain cases this would create
+	  spurious error messages.  (Fix in compute_all).
+
+2002-1-7 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.09.
+
+2002-1-9 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Added support for the Ruby programming language, thanks to
+	  patches from Josef Spillner.
+	* Documentation change: added more discussion about COCOMO,
+	  in particular why its cost estimates appeared so large.
+	  Some programmers think of just the coding part, and only what
+	  they'd get paid directly.. but that's less than 10% of the
+	  costs.
+
+2002-1-7 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Minor documentation fix - the example for --effort in
+	  sloccount.html wasn't quite right (the base documentation
+	  for --effort was right, it was just the example that was wrong).
+	  My thanks to Kevin the Blue for pointing this out.
+
+2002-1-3 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.08.
+
+2002-1-3 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Based on suggestions by Greg Sjaardema <gdsjaar@sandia.gov>:
+	* Modified c_count.c, function count_file to close the stream
+	  after the file is analyzed.  Otherwise, this can cause problems
+	  with too many open files on some systems, particularly on
+	  operating systems with small limits (e.g., Solaris).
+	* Added '.F' as a Fortran extension.
+
+2002-1-2 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 2.07.
+
+2002-1-2 Vaclav Slavik <vaclav.slavik@matfyz.cz>
+	* Modified the RPM .spec file in the following ways: 
+	* By default the RPM package now installs into /usr (so binaries
+	  go into /usr/bin).  Note that those who use the makefile directly
+	  ("make install"), including tarball users,
+	  will still default to /usr/local instead.
+	  You can still make the RPM install to /usr/local by using
+	  the prefix option, e.g.:
+	      rpm -Uvh --prefix=/usr/local sloccount*.rpm
+	* Made it use %{_prefix} variable, i.e. changing it to install 
+	  in /usr/local or /usr is a matter of changing one line
+	* Use wildcards in %files section, so that you don't have to modify 
+	  the specfile when you add new executable
+	* Mods to make it possible to build the RPM as non-root (i.e. 
+	  BuildRoot support, %defattr in %files, PREFIX passed to make install)
+
+2002-1-2 Jesus M. Gonzalez Barahona <jgb@debian.org>
+	* Added support for Modula-3 (.m3, .i3).
+	* ".sc" files are counted as Lisp.
+	* Modified sloccount to handle EVEN LARGER systems (i.e.,
+	  so sloccount will scale even more).
+	  In a few cases, parameters were passed on the command line
+	  and large systems could be so large that the command line was
+	  too long.  E.G., Debian GNU/Linux.  This caused a large number
+	  of changes to different files to remove these scaleability
+	  limitations.
+	* All *_count programs now accept "-f filename" and "-f -" options,
+	  where 'filename' is a file with a list of filenames to count.
+	  Internally the "-f" option with a filename is always used, so
+	  that an arbitrarily long list of files can be measured and so
+	  that "ps" will show more status information.
+	* compute_sloc_lang modified accordingly.
+	* get_sloc now has a "--stdin" option.
+	* Some small fixes here and there.
+	* This closes Debian bug #126503.
+
+2001-12-28 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released sloccount 2.06.
+
+2001-12-27 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Fixed a minor bug in break_filelist, which caused
+	  (in extremely unusual circumstances) a problem when
+	  disambiguating C from C++ files in complicated situations
+	  where this difference was hard to tell.  The symptom: When
+	  analyzing some packages (for instance, afterstep-1.6.10 as
+	  packaged in Debian 2.2) you would get the following error:
+	    Use of uninitialized value in pattern match (m//) at
+	    /usr/bin/break_filelist line 962.
+	  This could only happen after many other disambiguating rules
+	  failed to determine if a file was C or C++ code, so the problem
+	  was quite rare.
+	  My thanks to Jesus M. Gonzalez-Barahona (in
+	  Mostoles, Spain) for the patch that fixes this problem.
+	* Modified man page, explaining the problems of filenames with
+	  newlines, and also noting the problems with directories
+	  beginning with "-" (they might be confused as options).
+	* Minor improvements to Changelog text, so that the
+	  changes over time were documented more clearly.
+	* Note that CEPIS "Upgrade" includes a paper that depends
+	  on sloccount.  This is "Counting Potatoes: the Size of Debian 2.2"
+	  which counts the size of Debian 2.2 (instead of Red Hat Linux,
+	  which is what I counted). The original release is at:
+	  <http://www.upgrade-cepis.org/issues/2001/6/upgrade-vII-6.html>.
+	  I understand that they'll make some tweaks and
+	  release a revision of the paper on the Debian website.
+	  It's interesting; Debian 2.2 (released in 2000, and
+	  which did NOT have KDE), has 56 million physical SLOC and
+	  would have cost $1.8 billion USD to develop traditionally.
+	  That's more than Red Hat; see <http://www.dwheeler.com/sloc>.
+	  Top languages: C (71.12%), C++ (9.79%), LISP, Shell, Perl,
+	  Fotran, Tcl, Objective-C, Assembler, Ada, and Python in that
+	  order.  My thanks to the authors!
+
+2001-10-25 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released sloccount 2.05.
+	* Added support for detecting and counting PHP code.
+	  This was slightly tricky, because PHP's syntax has a few "gotchas"
+	  like "here document" strings, closing working even in C++ or sh
+	  style comments, and so on.
+	  Note - HTML files (.html, .htm, etc) are not examined for PHP code.
+          You really shouldn't put a lot of PHP code in HTML documents, because
+	  it's a maintenance problem later anyway.
+	  The tool assigns every file a single type.. which is a problem,
+	  because HTML files could have multiple simultaneous embedded types
+	  (PHP, javascript, and HTML text).  If the tool was modified to
+	  assign multiple languages to a single file, I'm not sure how
+	  to handle the file counts (counts of files for each language).
+	  For the moment, I just assign HTML to "html".
+	* Modified output so that it adds a header before the language list.
+
+2001-10-23 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released sloccount 2.01 - a minor modification to support
+	  Cygwin users.
+	* Modified compute_all to make it more portable (== became =);
+	  in particular this should help users using Cygwin.
+	* Modified documentation to note that, if you install Cygwin,
+	  you HAVE to use Unix newlines (not DOS newlines) for the Cygwin
+	  install.  Thanks to Mark Ericson for the bug report & for helping
+	  me track that down.
+	* Minor cleanups to the ChangeLog.
+
+2001-08-26 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released sloccount 2.0 - it's getting a new version number because
+	  its internal data format changed. You'll have to re-analyze
+	  your system for the new sloccount to work.
+	* Improved the heuristics to identify files (esp. .h files)
+	  as C, C++, or objective-C.  The code now recognizes
+	  ".H" (as well as ".h") as header files.
+	  The code realizes that ".cpp" files that begin with .\"
+	  or ,\" aren't really C++ files - XFree86 stores many
+	  man pages with these extensions (ugh).
+	* Added the ability to "--append" analyses.
+	  This means that you can analyze some projects, and then
+	  repeatedly add new projects.  sloccount even stores and
+	  recovers md5 checksums, so it even detects duplicates
+	  across the projects (the "first" project gets the duplicate).
+	* Added the ability to mark a data directory so that it's not
+	  erased (just create a file named "sloc_noerase" in the
+	  data directory).  From then on, sloccount won't erase it until
+	  you remove the file.
+	* Many changes made aren't user-visible.
+	  Completely re-organized break_filelist, which was getting
+	  incredibly baroque.  I've improved the sloccount code
+	  so that adding new languages is much simpler; before, it
+	  required a number of changes in different places, which was bad.
+	* SLOCCount now creates far fewer files, which is important for
+	  analyzing big systems (I was starting to run out of inodes when
+	  analyzing entire GNU/Linux distributions).
+	  Previous versions created stub files in every child directory
+	  for every possible language, even those that weren't used;
+	  since most projects only use a few languages, this was costly in
+	  terms of inodes.  Also, the totals for each language for a given
+	  child directory are now in a single file (all-physical.sloc)
+	  instead of being in separate files; this not only reduces inode
+	  counts, but it also greatly simplifies later processing & eliminated
+	  a bug (now, to process all physical SLOC counts in a given child
+	  directory, just process that one file).
+
+2001-06-22 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Per Prabhu Ramachandran's suggestion, recognize ".H" files as
+	  ".h"/".hpp" files (note the upper case).
+
+2001-06-20 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 1.9.  This eliminates installation errors
+	  with "sql_count" and "makefile_count",
+	  detects PostgreSQL embedded C (in addition to Oracle and Informix),
+	  improves detection of Pascal code, and includes support for
+	  analyzing licenses (if a directory has the file PROGRAM_LICENSE,
+	  the file's contents are assumed to have the license name for that
+	  top-level program).  It eliminates a portability problem, so
+	  hopefully it'll be easier to run it on Unix-like systems.
+	  It _still_ requires the "md5sum" program to run.
+
+2001-06-14 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Changed the logic in make_filelists.
+	  This version doesn't require a "-L" option to test which GNU
+	  programs supported but which others (e.g., Solaris) didn't.
+	  It still doesn't normally follow symlinks.
+	  Not following subordinate symlinks is important for
+	  handling oddities such as pine's build directory
+	  /usr/src/redhat/BUILD/pine4.33/ldap in Red Hat 7.1, which
+	  includes symlinks to directories not actually inside the
+	  package at all (/usr/include and /usr/lib).
+	* Added display of licenses in the summary form, if license
+	  information is available.
+	* Added undocumented programs rpm_unpacker and extract_license.
+	  These are not installed at this time, they're just provided as
+	  a useful starting point if someone wants them.
+
+2001-06-12 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Added support for license counting.  If the top directory
+	  of a program has a file named "PROGRAM_LICENSE", it's copied to
+	  the .slocdata entry, and it's reported as part of a licensing total.
+	  Note that the file LICENSE is ignored, that's often more complex.
+
+2001-06-08 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Fixed RPM spec file - it accidentally didn't install
+	  makefile_count and sql_count.  This would produce spurious
+	  errors and inhibited the option of counting makefiles and SQL.
+	  Also fixed the makefile to include sql_count in the executable list.
+
+2001-05-16 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Added support for auto-detecting ".pgc" files, which are
+	  embedded PostgreSQL - they are assumed to be C files (they COULD
+	  be C++ instead; while this will affect categorization it
+	  won't affect final SLOC counts).  Also, if there's a ".c" with
+	  a corresponding ".pgc" file, the ".c" file is assumed to be
+	  auto-generated.
+	* Thus, SLOCCount now supports embedded database commands for
+	  Oracle, Informix, and PostgreSQL.  MySQL doesn't use an
+	  "embedded" approach, but uses a library approach that SLOCCount
+	  could already handle.
+	* Fixed documentation: HTML reserved characters misused,
+	  sql_count undocumented.
+
+
+2001-05-14 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Added modifications from Gordon Hart to improve detection
+	  of Pascal source code files.
+	  Pascal files which only have a "unit" in them (not a full program),
+	  or have "interface" or "implementation",
+	  are now detected as Pascal programs.
+	  The original Pascal specification didn't support units, but
+	  there are Pascal programs which use them.  This should result in
+	  more accurate counts of Pascal software that uses units.
+	  He also reminded me that Pascal is case-insensitive, spurring a
+	  modification in the detection routines (for those who insist on
+	  uppercase keywords.. a truly UGLY format, but we need to
+	  support it to correctly identify such source code as Pascal).
+	* Modified the documentation to note that I prefer unified diffs.
+	  I also added a reference to the TODO file, and from here on
+	  I'll post the TODO file separately on my web site.
+
+2001-05-02 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 1.8.  Added several features to support
+	  measuring programs with embedded database commands.
+	  This includes suporting many Oracle & Informix embedded file types
+	  (.pc, .pcc, .pad, .ec, .ecp).  It also optionally counts
+	  SQL files (.sql) and makefiles (makefile, Makefile, etc.),
+	  though by default they are NOT included in lines-of-code counts.
+	  See the (new) TODO file for limitations on makefile identification.
+
+2001-04-30 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Per suggestion from Gary Myer, added optional "--addlang" option
+	  to add languages not NORMALLY counted.  Currently it only
+	  supports "makefile" and "sql".  The scheme for detecting
+	  automatically generated makefiles could use improvement.
+	  Normally, makefiles and sql won't be counted in the final reports,
+	  but the front-end will make the calculations and if requested their
+	  values will be provided.
+	* Added an "SQL" counter and a "makefile" counter.
+	* Per suggestions from Gary Myer, added detection for files where
+	  database commands (Oracle and Informix) are embedded in the code:
+	   .pc -> Oracle Preprocessed C code
+	   .pcc -> Oracle preprocessed C++ Code
+	   .pad -> Oracle preprocessed Ada Code
+	   .ec -> Informix preprocessed C code
+	   .ecp -> Informix preprocessed C code which calls the C preprocessor 
+	           before calling the Informix preprocessor.
+	  Handling ".pc" has heuristics, since many use ".pc" to mean
+	  "stuff about PCs". Certain filenames not counted as C files (e.g.,
+	  "makefile.pc" and "README.pc") if they end in ".pc".
+	  Note that if you stick C++ code into .pc files, it's counted as C.
+
+	  These embedded files are normal source files of the respective
+	  language, with database commands stuck into them, e.g.,
+	    EXEC SQL select FIELD into :variable from TABLE;
+	  which performs a select statement and puts the result into the 
+	  variable.  The database preprocessor simply reads this file,
+	  and converts all "EXEC SQL" statements into the appropriate calls
+	  and outputs a normal program.
+
+	  Currently the "automatically generated" detectors don't detect
+	  this case.  For the moment, just make sure the generated files
+	  aren't around while running SLOCCount.
+
+	  Currently the following are not handled (future release?):
+	   .pco -> Oracle preprocessed Cobol Code
+	   .pfo -> Oracle preprocessed Fortran Code
+	  I don't have a Cobol counter.  The Fortran counter only works
+	  for f77, and I doubt .pfo is limited to that.
+
+
+
+2001-04-27 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Per suggestions from Gary Myer,
+	  added ".a" and ".so" to the "not" list, since these are 
+	  libraries not source, and added the filename "Root" to the
+	  "not" file list ("Root" has special meaning to CVS).
+	* Added a note about needing "md5sum" (Gary Myer)
+	* Added a TODO file.  If something's on the TODO list that you'd
+	  like, please write the code and send it in.
+	* Noted that running on Cygwin is MUCH slower than when running
+	  on Linux.  Truth in advertizing is only fair.
+
+2001-04-26 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Release version 1.6: the big change is support for running on
+	  Windows.  Windows users must install Cygwin first.
+	* Modified makefile so that SLOCCount can run on Windows systems
+	  if "Cygwin" is installed.  The basic modifications to do this
+	  were developed by John Clezy -- Thanks!!!  I spent time merging
+	  his makefile and mine so that a single makefile could be used on
+	  both Windows and Unix.
+	* Documented how to install and run SLOCCount on Windows using cygwin.
+	* Changed default prefix to /usr/local; you can set PREFIX to
+	  change this, e.g., "make PREFIX=/usr".
+	* When counting a single project, sloccount now also reports
+	  "Estimated average number of developers", which is simply
+	  the person-months divided by months.  As with all estimates, take
+	  it with an ocean of salt.  This isn't reported for multiproject
+	  queries; properly doing this would require "packing" to compensate
+	  for the fact that small projects complete before large ones if
+	  started simultaneously.
+	* Improved man page (fixed a typo, etc.).
+
+2001-01-10 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released version 1.4.  This is an "ease of use" release,
+	  greatly simplifying the installation and use of SLOCCount.
+	  The new front-end tool "sloccount" does all the work in one step -
+	  now just type "sloccount DIRECTORY" and it's all counted.
+	  An RPM makes installation trivial for RPM-based systems.
+	  A man page is now available.  There are now rules for
+	  "make install" and "make uninstall" too.
+	  Other improvements include a schedule estimator and options
+	  to control the effort and schedule estimators.
+
+2001-01-07 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Added an estimator of schedule as well as effort.
+	* Added various options to control the effort and
+	  cost estimation: "--effort", "--personcost", "--overhead",
+	  and "--schedule".
+	  Now people can (through options) control the assumptions made
+	  in the effort and cost estimations from the command line.
+	  The output now shows the effort estimation model used.
+	* Changed the output slightly to pretty it up and note that
+	  it's development EFFORT not TIME that is shown.
+	* Added a note at bottom asking for credit.  I don't ask for any
+	  money, but I'd like some credit if you refer to the data the
+	  tool generates; a gentle reminder in the output seemed like the
+	  easiest way to ask for this credit.
+	* Created an RPM package; now RPM-based systems can EASILY
+	  install it.  It's a relocatable package, so hopefully
+	  "alien" can easily translate it to other formats
+	  (such as Debian's .deb format).
+	* Created a "man" page for sloccount.
+
+2001-01-06 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Added front-end tool "sloccount", GREATLY improving ease-of-use.
+	  The tool "sloccount" invokes all the other SLOCCount tools
+	  in the right order, performing a count of a typical project
+	  or set of projects.  From now on, this is expected to be the
+	  "usual" interface, though the pieces will still be documented
+	  to help those with more unusual needs.
+	  From now on, "SLOCCount" is the entire package, and
+	  "sloccount" is this front-end tool.
+	* Added "--datadir" option to make_filelists (to support
+	  "sloccount").
+	* get_sloc: No longer displays languages with 0 counts.
+	* Documentation: documented "sloccount"; this caused major changes,
+	  since "sloccount" is now the recommended interface for all but
+	  those with complicated requirements.
+	* compute_filecount: minor optimization/simplication
+
+2001-01-05 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Released vesion 1.2.
+	* Changed the name of many programs, as part of a general clean-up.
+	  I changed "compute_all" to "compute_sloc", and eliminated
+	  most of the other "compute_*" files (replacing it with
+	  "compute_sloc_lang").  I also changed "get_data" to "get_sloc".
+	  This is part of a general clean-up, so that
+	  if someone wants to package this program for installation they
+	  don't have a thousand tiny programs polluting the namespace.
+	  Adding "sloc" to the names makes namespace collisions less likely.
+	  I also worked to make the program simpler.
+	* Made a number of documentation fixes - my thanks to Clyde Roby
+	  for giving me feedback.
+	* Changed all "*_count" programs to consistently print at the end
+	  "Total:" on a line by itself, followed on the next line by
+	  the total lines of code all by itself.  This makes the new program
+	  get_sloc_detail simpler to implement, and also enables
+	  get_sloc_detail to perform some error detection.
+	* Changed name of compressed file to ".tar.gz" and modified docs
+	  appropriately.  The problem is a bug in Netscape 4.7 clients
+	  running on Windows; it appears that ".tgz" files don't get fully
+	  downloaded from my hosting webserver because no type information
+	  is provided.  Originally, I tried to change the website to fix this
+	  by creating ".htaccess" files, but that didn't work with either:
+	    AddEncoding x-gzip gz tgz
+	    AddType application/x-tar .tgz
+	  or:
+	       AddEncoding application/octet-stream tgz
+	  So, we'll switch to .tar.gz, which works.
+	  My thanks to Christopher Lott for this feedback.
+	* Removed a few garbage files.
+	* Added information to documentation on how to handle HUGE sets
+	  of data directory children, i.e., where you can't even use "*"
+	  to list the data directory children.  I don't have a directory
+	  of that kind of scale, so I can't test it directly,
+	  but I can at least discuss how to do it; it SHOULD work.
+	* Changed makefile so that "ChangeLog" is now visible on the web.
+
+
+2001-01-04 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* Minor fixes to documentation.
+	* Added "--crossdups" option to break_filelist.
+	* Documented count_unknown_ext.
+	* Created new tool, "get_sloc_detail", and documented it.
+	  Now you can get a complete report of all the SLOC data in one big
+	  file (e.g., for exporting to another tool for analysis).
+
+2001-01-03 David A. Wheeler <dwheeler, at, dwheeler.com>
+	* First public release, version "1.0", of "SLOCCount".
+	  Main website: http://www.dwheeler.com/sloccount
+
diff --git a/PROGRAM_LICENSE b/PROGRAM_LICENSE
new file mode 100644
index 0000000..505faa1
--- /dev/null
+++ b/PROGRAM_LICENSE
@@ -0,0 +1 @@
+GPL
diff --git a/README b/README
new file mode 100644
index 0000000..6b47bea
--- /dev/null
+++ b/README
@@ -0,0 +1,51 @@
+SLOCCount README
+=================
+
+This directory contains "SLOCCount", a set of the programs for counting
+source lines of code (SLOC) in large software systems.
+It was developed by David A. Wheeler (dwheeler@dwheeler.com),
+originally to count SLOC in a Linux (GNU/Linux) system, but it can be
+used for counting other software systems.
+
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+For more information, including installation instructions and license
+information, see the HTML file "sloccount.html".
+The file COPYING includes the license, the standard GNU GPL version 2 license.
+Even better, see the website http://www.dwheeler.com/sloccount.
+
+Some of the files contained in this directory aren't normally used -
+instead, they're scrap special-purpose files I used when I created these
+programs.  Feel free to use them, but if you're packaging this program,
+be selective - don't install in /usr/bin every executable here!
+Just install the ones documented, plus the executables they depend on.
+
+Packagers: I've developed an RPM spec file and RPM, so RPM systems can
+just load-and-go.  If you use a different package format, you may still
+find the spec file helpful for identifying what to load, and the "alien"
+program can apparantly translate the RPM file to Debian '.deb' format
+without any problem.
+
+Debian already includes a SLOCCount package.
+SLOCCount 2.08 is available in Debian 3.0.
+More info on the status of SLOCCount in Debian should see
+http://packages.debian.org/cgi-bin/search_packages.pl?keywords=sloccount&searchon=names&subword=1&version=all&release=all
+
+
diff --git a/SOURCES b/SOURCES
new file mode 100644
index 0000000..33e8392
--- /dev/null
+++ b/SOURCES
@@ -0,0 +1,29 @@
+
+"CodeCount" toolset for counting SLOC.
+  http://sunset.usc.edu/research/CODECOUNT/
+Covers C/C++/Ada/Java and a few others, but
+NOT Python, TCL, Perl, or LISP.
+I used this to count C, C++, and Java code.
+These tools are under a GPL-like license, but it's NOT the GPL.
+See their website for more info.
+
+A huge list of tools is at (mostly big $$):
+  http://www.concentricmc.com/toolsreport/5-3-2tools1.html
+which extracts from this 1995 paper from STSC:
+  http://www.stsc.hill.af.mil/CrossTalk/1995/apr/Metrics.asp
+
+Another list:
+ http://www.qucis.queensu.ca/Software-Engineering/toolcat.html#label181
+
+USC tools (inc. CodeCount) and info on COCOMO II is at:
+  http://sunset.usc.edu/available_tools/availabletools_main.html
+
+
+Software Metrics: An Analysis of the Evolution of COCOMO and Function Points
+Roger E. Masse
+University of Maryland
+July 8, 1997
+http://www.python.org/~rmasse/papers/software-metrics/
+(good overview paper)
+
+
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..efb2a8a
--- /dev/null
+++ b/TODO
@@ -0,0 +1,161 @@
+TODO List:
+
+
+As with all open source projects... if you want something strongly
+enough, then please (1) code it and submit it, or (2) pay me to add it.
+You have the source, you have the power - use it. Or has been said for years:
+
+  Use the Source, Luke.
+
+I _do_ listen to user requests, but I cannot do everything myself.
+I've released this program under the GPL _specifically_ so that others
+will help debug and extend it.
+
+
+
+Obviously, a general "TODO" is adding support for other computer languages;
+here are languages I'd like to add support for specifically:
++ Eiffel.
++ Sather (much like Eiffel).
++ CORBA IDL.
++ Forth.  Comments can start with "\" (backslash) and continue to end-of-line,
+  or be surrounded by parens.  In both cases, they must be on word
+  bounds-- .( is not a comment!  Variable names often begin with "\"!
+  For example:
+    : 2dup ( n1 n2 -- n1 n2 n1 n2 ) \ Duplicate two numbers.
+                                    \ Pronounced: two-dupe.
+        over over  ;
+  Strings begin with " (doublequote) or p" (p doublequote, for
+  packed strings), and these must be separate words
+  (e.g., followed by a whitespace).  They end with a matching ".
+  Also, the ." word begins a string that ends in " (this word immediately
+  prints it the given string).
+  Note that "copy is a perfectly legitimate Forth word, and does NOT
+  start a string.
+  Forth sources can be stored as blocks, or as more conventional text.
+  Any way to detect them?
+  See http://www.forth.org/dpans/dpans.html for syntax definition.
+  See also http://www.taygeta.com/forth_style.html
+  and http://www.forth.org/fig.html
++ Create a "javascript" category. ".js" extention, "js" type.
+  (see below for a discussion of the issues with embedded scripts)
++ .pco -> Oracle preprocessed Cobol Code
++ .pfo -> Oracle preprocessed Fortran Code
++ PL/1.
++ BASIC, including Visual Basic, Future Basic, GW-Basic, QBASIC, etc.
++ Improve Ocamlyacc, comments in yacc part are C-like, but I'm not sure
+  about comment nesting. 
+
+  For more language examples, see the ACM "Hello World" project, which tries
+  to collect "Hello World" in every computer language. It's at:
+   http://www2.latech.edu/~acm/HelloWorld.shtml
+
+
+
+Here are other TODOs:
+
+
+* A big one is to add support for logical SLOC, at least for C/C++.
+  Then add support for COCOMO II.  Even partial support would be great
+  (e.g., not all languages)... other languages could be displayed as
+  "UNK" (unknown) and be considered 0.
+  Add options to allow display of only one,
+  or of both.  See Park's paper, COCOMO II, and Humphrey's 1995 book.
+
+* In general, modify the program so that it ports more easily.  Currently,
+  it assumes a Unix-like system (esp. in the shell programs), and it requires
+  md5sum as a separate executable.
+  There are probably some other nonportable constructs, in particular
+  for non-Unix systems (e.g., symlink handling and file/dirnames).
+
+* Rewrite Bourne shell code to either Perl or Python (prob. Python), and
+  make the call to md5sum optional.  That way, the program
+  could run on Windows without Cygwin.
+
+* Improve the heuristics for detecting language type.
+  They're actually pretty good already.
+
+* Clean up the program.  This was originally written as a one-off program
+  that wouldn't be used again (or distributed!), and it shows.
+
+  The heuristics used to detect language type should
+  be made more modular, so it could be reused in other programs, and
+  so you don't HAVE to write out a list of filenames first if you
+  don't want to.
+
+* Consider rewriting everything not in C into Python.  Perl is
+  a write-only language, and it's absurdly hard to read Perl code later.
+  I find Python code much cleaner.  And shell isn't as portable.
+
+  One reason I didn't rewrite it in Python is that I had concerns about
+  Python's licensing issues; Python versions 1.6 and up have questionable
+  compatibility with the GPL.  Thankfully, the Free Software Foundation (FSF)
+  and the Python developers have worked together, and the Python 
+  developers have fixed the license for version 2.0.1 and up.
+  Joy!!  I'm VERY happy about this!
+
+* Improve the speed, primarily to support analysis of massive amounts
+  of data.  There's a generic routine in Perl; switching that
+  to C would probably help.  Perhaps rewriting many of the counters
+  using flex would speed things up, simplify maintenance, and make
+  supporting logical SLOC easier.
+
+* Handle scripts embedded in data.
+  Perhaps create a category, "only the code embedded in HTML"
+  (e.g., Javascript scripts, PHP statements, etc.).
+  This is currently complicated - the whole program assumes that a file
+  can be assigned a specific type, and HTML (etc.) might have multiple
+  languages embedded in it.
+
+* Are any CGI files (.cgi) unhandled?  Are files unidentified?
+
+* Improve makefile identification and counting.
+  Currently the program does not identify as makefiles "Imakefile"
+  (generated by xmkmf and processed by imake, used by MIT X server)
+  nor automake/autoconf files (Makefile.am/Makefile.in).
+  Need to handle ".rules" too.
+
+  I didn't just add these files to the "makefile" list, because
+  I have concerns about processing them correctly using the
+  makefile counter.  Since most people won't count makefiles anyway,
+  this isn't an issue for most.  I welcome patches to change this,
+  _IF_ you ensure that the resulting counts are correct.
+
+  The current version is sufficient for handling programs who have
+  ordinary makefiles that are to be included in the SLOC count when
+  they enable the option to count makefiles.
+
+  Currently the makefiles count "all non-blank lines"; conceivably
+  someone might want to count only the actual directives, not the
+  conditions under which they fire.
+
+* Improve the flexibility in symlink handling; see "make_filelists".
+  It should be rewritten.  Some systems don't allow
+  "test"ing for symlinks, which was a portability problem - that problem
+  at least has been removed.
+
+* I've added a few utilities that I use for counting whole Linux systems
+  to the tar file, but they're not installed by the RPM and they're not
+  documented.
+
+* More testing!  COBOL in particular is undertested.
+
+* Modify the code, esp. sloccount, to handle systems so large that
+  the data directory list can't be expanded using "*".
+  This would involve using "xargs" in sloccount, maybe getting rid
+  of the separate filelist creation, and having break_filelist
+  call compute_all directly (break_filelist needs to run all the time,
+  or its reloading of hashes during initialization would become the
+  bottleneck).  Some of this work has already been done.
+
+* Perl variation support.
+  The code says:
+    open(FH, "-|", "md5sum", $filename) or return undef;
+  but this doesn't work on some Perls.
+  This could be changed to:
+    open(FH, "-|", "md5sum $filename") or return undef;
+  But I dare not fix it that way;
+  imagine a file named "; rm -fr /*" and variations.
+
+
+
diff --git a/TODO.orig b/TODO.orig
new file mode 100644
index 0000000..86c0c20
--- /dev/null
+++ b/TODO.orig
@@ -0,0 +1,153 @@
+TODO List:
+
+
+As with all open source projects... if you want something strongly
+enough, then please (1) code it and submit it, or (2) pay me to add it.
+You have the source, you have the power - use it. Or has been said for years:
+
+  Use the Source, Luke.
+
+I _do_ listen to user requests, but I cannot do everything myself.
+I've released this program under the GPL _specifically_ so that others
+will help debug and extend it.
+
+
+
+Obviously, a general "TODO" is adding support for other computer languages;
+here are languages I'd like to add support for specifically:
++ Eiffel.
++ Sather (much like Eiffel).
++ CORBA IDL.
++ Forth.  Comments can start with "\" (backslash) and continue to end-of-line,
+  or be surrounded by parens.  In both cases, they must be on word
+  bounds-- .( is not a comment!  Variable names often begin with "\"!
+  For example:
+    : 2dup ( n1 n2 -- n1 n2 n1 n2 ) \ Duplicate two numbers.
+                                    \ Pronounced: two-dupe.
+        over over  ;
+  Strings begin with " (doublequote) or p" (p doublequote, for
+  packed strings), and these must be separate words
+  (e.g., followed by a whitespace).  They end with a matching ".
+  Also, the ." word begins a string that ends in " (this word immediately
+  prints it the given string).
+  Note that "copy is a perfectly legitimate Forth word, and does NOT
+  start a string.
+  Forth sources can be stored as blocks, or as more conventional text.
+  Any way to detect them?
+  See http://www.forth.org/dpans/dpans.html for syntax definition.
+  See also http://www.taygeta.com/forth_style.html
+  and http://www.forth.org/fig.html
++ Create a "javascript" category. ".js" extention, "js" type.
+  (see below for a discussion of the issues with embedded scripts)
++ .pco -> Oracle preprocessed Cobol Code
++ .pfo -> Oracle preprocessed Fortran Code
++ Fortran beyond Fortran 77 (.f90).
++ PL/1.
++ BASIC, including Visual Basic, Future Basic, GW-Basic, QBASIC, etc.
++ Improve ML/CAML.  It uses Pascal-style comments (*..*),
+  double-quoted C-like strings "\n...", and .ml or .mli file extensions
+  (.mli is an interface file for CAML).
+
+  For more language examples, see the ACM "Hello World" project, which tries
+  to collect "Hello World" in every computer language. It's at:
+   http://www2.latech.edu/~acm/HelloWorld.shtml
+
+
+
+Here are other TODOs:
+
+
+* A big one is to add support for logical SLOC, at least for C/C++.
+  Then add support for COCOMO II.  Even partial support would be great
+  (e.g., not all languages)... other languages could be displayed as
+  "UNK" (unknown) and be considered 0.
+  Add options to allow display of only one,
+  or of both.  See Park's paper, COCOMO II, and Humphrey's 1995 book.
+
+* In general, modify the program so that it ports more easily.  Currently,
+  it assumes a Unix-like system (esp. in the shell programs), and it requires
+  md5sum as a separate executable.
+  There are probably some other nonportable constructs, in particular
+  for non-Unix systems (e.g., symlink handling and file/dirnames).
+
+* Rewrite Bourne shell code to either Perl or Python (prob. Python), and
+  make the call to md5sum optional.  That way, the program
+  could run on Windows without Cygwin.
+
+* Improve the heuristics for detecting language type.
+  They're actually pretty good already.
+
+* Clean up the program.  This was originally written as a one-off program
+  that wouldn't be used again (or distributed!), and it shows.
+
+  The heuristics used to detect language type should
+  be made more modular, so it could be reused in other programs, and
+  so you don't HAVE to write out a list of filenames first if you
+  don't want to.
+
+* Consider rewriting everything not in C into Python.  Perl is
+  a write-only language, and it's absurdly hard to read Perl code later.
+  I find Python code much cleaner.  And shell isn't as portable.
+
+  One reason I didn't rewrite it in Python is that I had concerns about
+  Python's licensing issues; Python versions 1.6 and up have questionable
+  compatibility with the GPL.  Thankfully, the Free Software Foundation (FSF)
+  and the Python developers have worked together, and the Python 
+  developers have fixed the license for version 2.0.1 and up.
+  Joy!!  I'm VERY happy about this!
+
+* Improve the speed, primarily to support analysis of massive amounts
+  of data.  There's a generic routine in Perl; switching that
+  to C would probably help.  Perhaps rewriting many of the counters
+  using flex would speed things up, simplify maintenance, and make
+  supporting logical SLOC easier.
+
+* Handle scripts embedded in data.
+  Perhaps create a category, "only the code embedded in HTML"
+  (e.g., Javascript scripts, PHP statements, etc.).
+  This is currently complicated - the whole program assumes that a file
+  can be assigned a specific type, and HTML (etc.) might have multiple
+  languages embedded in it.
+
+* Are any CGI files (.cgi) unhandled?  Are files unidentified?
+
+* Improve makefile identification and counting.
+  Currently the program does not identify as makefiles "Imakefile"
+  (generated by xmkmf and processed by imake, used by MIT X server)
+  nor automake/autoconf files (Makefile.am/Makefile.in).
+  Need to handle ".rules" too.
+
+  I didn't just add these files to the "makefile" list, because
+  I have concerns about processing them correctly using the
+  makefile counter.  Since most people won't count makefiles anyway,
+  this isn't an issue for most.  I welcome patches to change this,
+  _IF_ you ensure that the resulting counts are correct.
+
+  The current version is sufficient for handling programs who have
+  ordinary makefiles that are to be included in the SLOC count when
+  they enable the option to count makefiles.
+
+  Currently the makefiles count "all non-blank lines"; conceivably
+  someone might want to count only the actual directives, not the
+  conditions under which they fire.
+
+* Improve the flexibility in symlink handling; see "make_filelists".
+  It should be rewritten.  Some systems don't allow
+  "test"ing for symlinks, which was a portability problem - that problem
+  at least has been removed.
+
+* I've added a few utilities that I use for counting whole Linux systems
+  to the tar file, but they're not installed by the RPM and they're not
+  documented.
+
+* More testing!  COBOL in particular is undertested.
+
+* Modify the code, esp. sloccount, to handle systems so large that
+  the data directory list can't be expanded using "*".
+  This would involve using "xargs" in sloccount, maybe getting rid
+  of the separate filelist creation, and having break_filelist
+  call compute_all directly (break_filelist needs to run all the time,
+  or its reloading of hashes during initialization would become the
+  bottleneck).  Some of this work has already been done.
+
+
diff --git a/ada_count b/ada_count
new file mode 100755
index 0000000..3204f56
--- /dev/null
+++ b/ada_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+generic_count '--' "$@"
+
diff --git a/append_license b/append_license
new file mode 100755
index 0000000..4cea6d5
--- /dev/null
+++ b/append_license
@@ -0,0 +1,62 @@
+#!/usr/bin/perl -w
+
+# Given a 3-column list "sloc build-directory-name spec-filename",
+# output a 4-column list which appends the license.
+# You'll need to fix this up afterwards.
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+$specdir = "/usr/src/redhat/SPECS";
+$garbage = "";
+
+while (<>) {
+ ($sloc, $buildname, $specname, $garbage) = split;
+ chomp($specname);
+ print "$sloc $buildname $specname ";
+
+ if (! (-f "$specdir/$specname")) {
+    die "ERROR.  Cound not find spec file $specname\n";
+ }
+
+
+ # Get "Copyright:" or "License:"
+ $license = "";
+ $summary = "";
+ open(SPECFILE, "<$specdir/$specname") || die "Can't open $specname\n";
+ while (<SPECFILE>) {
+   # print;
+   if (m/^Summary\:(.*)/i) { $summary = $1; }
+   if (m/^License\:(.*)/i) { $license = $1; }
+   if ((! $license) && (m/^Copyright\:(.*)/i)) { $license = $1; }
+ }
+ close(SPECFILE);
+ 
+ if ($license) {print "$license";}
+ else          {print "?";}
+
+ # print "\t";
+ # print $summary;
+
+ print "\n";
+ 
+}
diff --git a/append_specname b/append_specname
new file mode 100755
index 0000000..9b8e97c
--- /dev/null
+++ b/append_specname
@@ -0,0 +1,57 @@
+#!/usr/bin/perl -w
+
+# Given a 2-column list "sloc build-directory-name",
+# output a 3-column list which appends the name of the spec file.
+# You'll need to fix this up afterwards.
+#
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+$specdir = "/usr/src/redhat/SPECS";
+$garbage = "";
+
+while (<>) {
+ ($sloc, $buildname, $garbage) = split;
+ chomp($buildname);
+ print "$sloc $buildname ";
+
+ $unversioned = $buildname;
+ $unversioned =~ s/-[^\-]*$//;
+
+ $reallyshort = $buildname;
+ $reallyshort =~ s/[0-9\.\-_]*$//;
+
+
+ if    (-f "$specdir/${buildname}.spec")       {print "$buildname";}
+ elsif (-f "$specdir/${buildname}.spec.alpha") {print "${buildname}.alpha";}
+ elsif (-f "$specdir/${buildname}.spec.sparc") {print "${buildname}.sparc";}
+ elsif (-f "$specdir/${unversioned}.spec")       {print "$unversioned";}
+ elsif (-f "$specdir/${unversioned}.spec.alpha") {print "${unversioned}.alpha";}
+ elsif (-f "$specdir/${unversioned}.spec.sparc") {print "${unversioned}.sparc";}
+ elsif (-f "$specdir/${reallyshort}.spec")        {print "$reallyshort";}
+ elsif (-f "$specdir/${reallyshort}.spec.alpha")  {print "${reallyshort}.alpha";}
+ elsif (-f "$specdir/${reallyshort}.spec.sparc")  {print "${reallyshort}.sparc";}
+ else {print "?";}
+
+ print "\n";
+ 
+}
diff --git a/asm_count b/asm_count
new file mode 100755
index 0000000..d7ad0b1
--- /dev/null
+++ b/asm_count
@@ -0,0 +1,166 @@
+#!/usr/bin/perl -w
+# asm_count - count physical lines of code in Assembly programs.
+# Usage: asm_count [-f file] [list_of_files]
+#  file: file with a list of files to count (if "-", read list from stdin)
+#  list_of_files: list of files to count
+#  -f file or list_of_files can be used, or both
+# This is a trivial/naive program.
+
+# For each file, it looks at the contents to heuristically determine
+# if C comments are permitted and what the "comment" character is.
+# If /* and */ are in the file, then C comments are permitted.
+# The punctuation mark that starts the most lines must be the comment
+# character (but ignoring "/" if C comments are allowed, and
+# ignoring '#' if cpp commands appear to be used)
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+
+
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+  # Yes, we have -f
+  if ($ARGV[1] eq "-") {
+    # The list of files is in STDIN
+    while (<STDIN>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+  } else {
+    # The list of files is in the file $ARGV[1]
+    open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+    while (<FILEWITHLIST>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+    close FILEWITHLIST;
+  }
+  shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+  &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+  my ($file) = @_;
+  # First, use heuristics to determine the comment char and if it uses C comments
+  $found_c_start = 0;
+  $found_c_end = 0;
+  $cpp_suspicious = 0;
+  $cpp_likely = 0;
+  $cpp_used = 0;
+  %count = ();
+  if ($file eq "") {
+    *CURRENTFILE = *STDIN
+  } else {
+    open(CURRENTFILE, "<$file");
+  }
+  while (<CURRENTFILE>) {
+    if (m!\/\*!) { $found_c_start++;}
+    if (m!\*\/!) { $found_c_end++;}
+    if ( (m!^#\s*define\s!) || (m!^#\s*else!)) {$cpp_suspicious++;}
+    if ( (m!^#\s*ifdef\s!) || (m!^#\s*endif!) || (m!#\s*include!)) {$cpp_likely++;}
+    if (m/^\s*([;!\/#\@\|\*])/) { $count{$1}++; }  # Found a likely comment char.
+  }
+  # Done examing file, let's figure out the parameters.
+  if ($found_c_start && $found_c_end) {
+    $ccomments = 1;
+    $count{'/'} = 0;
+    # $count{'*'} = 0;  # Do this to ignore '*' if C comments are used.
+  } else {
+    $ccomments = 0;
+  }
+  if (($cpp_suspicious > 2) || ($cpp_likely >= 1)) {
+    $cpp_used = 1;
+    $count{'#'} = 0;
+  } else {
+    $cpp_used = 0;
+  }
+  $likeliest = ';';
+  $likeliest_count = 0;
+  foreach $i (keys(%count)) {
+    # print "DEBUG: key=$i count=$count{$i}\n";
+    if ($count{$i} > $likeliest_count) {
+      $likeliest = $i;
+      $likeliest_count = $count{$i};
+    }
+  }
+  # print "DEBUG: likeliest = $likeliest\n";
+  $commentchar=$likeliest;
+  close(CURRENTFILE);
+
+  # Now count SLOC.
+  $sloc = 0;
+  $isincomment = 0;
+  open(CURRENTFILE, "<$file");
+  while (<CURRENTFILE>) {
+    # We handle C comments first, so that if an EOL-comment
+    # occurs inside a C comment, it's ignored.
+    if ($ccomments) {
+      # Handle C /* */ comments; this will get fooled if they're in strings,
+      # but that would be rare in assembly.
+      while ( (m!\/\*!) || (m!\*\/!)) {  # While unprocessed C comment.
+	if ($isincomment) {
+	  s!.*?\*\/.*!!;
+	  $isincomment = 0;
+	} else {           # Not in C comment, but have end comment marker.
+	  if (! m/\/\*/) {  # Whups, there's no starting marker!
+	    print STDERR "Warning: file $file line $. has unmatched comment end\n";
+	    # Get us back to a plausible state:
+	    s/.*//; # Destroy everything
+	      $isincomment = 0;
+	  } else {
+	    if (! s!\/\*.*?\*\/!!) { # Try to delete whole comment.
+              # We couldn't delete whole comment.  Delete what's there.
+              s!\/\*.*!!;
+              $isincomment = 1;
+	    }
+	  }
+	}
+      }
+    }  # End of handling C comments.
+    # This requires $[ be unchanged.
+    $locate_comment = index($_, $commentchar);
+    if ($locate_comment >= 0) {  # We found a comment character, delete comment
+       $_ = substr($_, 0, $locate_comment);
+       # print "DEBUG New text: @",$_,"@\n";
+    }
+    # old: s/${commentchar}.*//;  # Delete leading comments.
+
+    # FOR DEBUG: print "Finally isincomment=$isincomment line=$_\n";
+    if ((! $isincomment) && (m/\S/)) {$sloc++;}
+  }
+
+  # End-of-file processing
+  print "$sloc (commentchar=$commentchar C-comments=$ccomments) $file\n";
+  $total_sloc += $sloc;
+  $sloc = 0;
+  if ($isincomment) {
+    print STDERR "Missing comment close in file $file\n";
+  }
+}
diff --git a/awk_count b/awk_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/awk_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+generic_count '#' $@
+
diff --git a/break_filelist b/break_filelist
new file mode 100755
index 0000000..7df41ab
--- /dev/null
+++ b/break_filelist
@@ -0,0 +1,1308 @@
+#!/usr/bin/perl -w
+
+# break_filelist
+# Take a list of dirs which contain a "filelist";
+# creates files in each directory identifying which are C, C++, Perl, etc.
+# For example, "ansic.dat" lists all ANSI C files contained in filelist.
+# Note: ".h" files are ambiguous (they could be C or C++); the program
+# uses heuristics to determine this.
+# The list of .h files is also contained in h_list.dat.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+
+
+# If adding a new language: add the logic to open the file,
+# close the file, and detect & write to the file listing that language.
+
+# Debatable decisions:
+#  Doesn't count .dsl files (stylesheets, which are partially LISP).
+#  Doesn't count .sql files (SQL queries & commands)
+
+# Note - I don't try to distinguish between TCL and [incr TCL] (itcl),
+# an OO extended version of TCL.  For our purposes, it's all TCL.
+
+
+use FileHandle;
+
+
+# Set default configuration:
+
+$duplicates_okay = 0;  # Set to 1 if you want to count file duplicates.
+$crossdups_okay = 0;   # Set to 1 if duplicates okay in different filelists.
+$autogen_okay = 0;     # Set to 1 if you want to count autogen'ed files.
+$noisy = 0;            # Set to 1 if you want noisy reports.
+%lang_list_files = ();
+
+# The following extensions are NOT code:
+%not_code_extensions = (
+   "html" => 1,
+   "in" => 1,    # Debatable.
+   "xpm" => 1,
+   "po" => 1,
+   "am" => 1,    # Debatable.
+   "1" => 1,     # Man pages (documentation):
+   "2" => 1,
+   "3" => 1,
+   "4" => 1,
+   "5" => 1,
+   "6" => 1,
+   "7" => 1,
+   "8" => 1,
+   "9" => 1,
+   "n" => 1,
+   "gif" => 1,
+   "tfm" => 1,
+   "png" => 1,
+   "m4" => 1,    # Debatable.
+   "bdf" => 1,
+   "sgml" => 1,
+   "mf" => 1,
+   "txt" => 1, "text" => 1,
+   "man" => 1,
+   "xbm" => 1,
+   "Tag" => 1,
+   "sgm" => 1,
+   "vf" => 1,
+   "tex" => 1,
+   "elc" => 1,
+   "gz" => 1,
+   "dic" => 1,
+   "pfb" => 1,
+   "fig" => 1,
+   "afm" => 1,  # font metrics
+   "jpg" => 1,
+   "bmp" => 1,
+   "htm" => 1,
+   "kdelnk" => 1,
+   "desktop" => 1,
+   "pbm" => 1,
+   "pdf" => 1,
+   "ps" => 1,    # Postscript is _USUALLY_ generated automatically.
+   "eps" => 1,
+   "doc" => 1,
+   "man" => 1,
+   "o" => 1,    # Object code is generated from source code.
+   "a" => 1,    # Static object code.
+   "so" => 1,   # Dynamically-loaded object code.
+   "Y" => 1,    # file compressed with "Yabba"
+   "Z" => 1,    # file compressed with "compress"
+   "ad" => 1,   # X application default resource file.
+   "arc" => 1,  # arc(1) archive
+   "arj" => 1,  # arj(1) archive
+   "au" => 1,   # Audio sound filearj(1) archive
+   "wav" => 1,
+   "bak" => 1,  # Backup files - we only want to count the "real" files.
+   "bz2" => 1,  # bzip2(1) compressed file
+   "mp3" => 1,  # zip archive
+   "tgz" => 1,  # tarball
+   "zip" => 1,  # zip archive
+);
+
+# The following filenames are NOT code:
+%not_code_filenames = (
+   "README" => 1,
+   "Readme" => 1,
+   "readme" => 1,
+   "README.tk" => 1, # used in kdemultimedia, it's confusing.
+   "Changelog" => 1,
+   "ChangeLog" => 1,
+   "Repository" => 1,
+   "CHANGES" => 1,
+   "Changes" => 1,
+   ".cvsignore" => 1,
+   "Root" => 1,       # CVS.
+   "BUGS" => 1,
+   "TODO" => 1,
+   "COPYING" => 1,
+   "MAINTAINERS" => 1,
+   "Entries" => 1,
+        # Skip "iconfig.h" files; they're used in Imakefiles
+        # (used in xlockmore):
+   "iconfig.h" => 1,
+);
+
+
+# A filename ending in the following extensions usually maps to the
+# given language:
+
+# TODO: See suffixes(7)
+# .al Perl autoload file
+# .am automake input
+
+%file_extensions = (
+  "c" => "ansic",
+  "ec" => "ansic",   # Informix C.
+  "ecp" => "ansic",  # Informix C.
+  "pgc" => "ansic",  # Postgres embedded C/C++ (guess C)
+  "C" => "cpp", "cpp" => "cpp", "cxx" => "cpp", "cc" => "cpp",
+  "pcc" => "cpp", # Input to Oracle C++ preproc.
+  "m" => "objc",
+  # C# (C-sharp) is named 'cs', not 'c#', because
+  # the '#' is a comment character and I'm trying to
+  # avoid bug-prone conventions.
+  #  C# doesn't support header files.
+  "cs" => "cs",
+  # Header files are allocated to the "h" language, and then
+  # copied to the correct location later so that C/C++/Objective-C
+  # can be separated.
+  "h" => "h", "H" => "h", "hpp" => "h", "hh" => "h",
+  "ada" => "ada", "adb" => "ada", "ads" => "ada",
+  "pad" => "ada",     # Oracle Ada preprocessor.
+  "f" => "fortran", "F" => "fortran", # This catches "wokka.F" as Fortran.
+  # Warning: "Freeze" format also uses .f.  Haven't heard of problems,
+  # freeze is extremely rare and even more rare in source code directories.
+  "f77" => "fortran", "F77" => "fortran",
+  "f90" => "f90", "F90" => "f90",
+  "cob" => "cobol", "cbl" => "cobol",
+  "COB" => "cobol", "CBL" => "cobol",  # Yes, people do create wokka.CBL files
+  "p" => "pascal", "pas" => "pascal", "pp" => "pascal", "dpr" => "pascal",
+  "py" => "python",
+  "s" => "asm", "S" => "asm", "asm" => "asm",
+  "sh" => "sh", "bash" => "sh",
+  "csh" => "csh", "tcsh" => "csh", 
+  "java" => "java",
+  "lisp" => "lisp", "el" => "lisp", "scm" => "lisp", "sc" => "lisp", 
+  "lsp" => "lisp", "cl" => "lisp",
+  "jl" => "lisp",
+  "tcl" => "tcl", "tk" => "tcl", "itk" => "tcl",
+  "exp" => "exp",
+  "pl" => "perl", "pm" => "perl", "perl" => "perl", "ph" => "perl",
+  "awk" => "awk",
+  "sed" => "sed",
+  "y" => "yacc",
+  "l" => "lex",
+  "makefile" => "makefile",
+  "sql" => "sql",
+  "php" => "php", "php3" => "php", "php4" => "php", "php5" => "php",
+  "php6" => "php",
+  "inc" => "inc", # inc MAY be PHP - we'll handle it specially.
+  "m3" => "modula3", "i3" => "modula3",
+  "mg" => "modula3", "ig" => "modula3",
+  "ml" => "ml", "mli" => "ml",
+  "mly" => "ml", # ocamlyacc. In fact this is half-yacc half-ML, especially
+  		 # comments in yacc part are C-like, not ML like.
+  "mll" => "ml", # ocamllex, no such problems as in ocamlyacc
+  "rb" => "ruby",
+  "hs" => "haskell", "lhs" => "haskell",
+   # ???: .pco is Oracle Cobol
+  "jsp" => "jsp",  # Java server pages
+);
+
+
+# GLOBAL VARIABLES
+
+$dup_count = 0;
+
+$warning_from_first_line = "";
+
+%examined_directories = ();  # Keys = Names of directories examined this run.
+
+$duplistfile = "";
+
+###########
+
+
+# Handle re-opening individual CODE_FILEs.
+# CODE_FILE is public
+
+# Private value:
+$opened_file_name = "";
+
+sub reopen {
+ # Open file if it isn't already, else rewind.
+ # If filename is "", close any open file.
+ my $filename = shift;
+ chomp($filename);
+ # print("DEBUG: reopen($filename)\n");
+ if ($filename eq "") {
+    if ($opened_file_name) {close(CODE_FILE);}
+    $opened_file_name = "";
+    return;
+ }
+ if ($filename eq $opened_file_name) {
+   seek CODE_FILE, 0, 0;  # Rewind.
+ } else {   # We're opening a new file.
+   if ($opened_file_name) {close(CODE_FILE)}
+   open(CODE_FILE, "<$filename\0") || die "Can't open $filename";
+   $opened_file_name = $filename;
+ }
+}
+
+###########
+
+sub looks_like_cpp {
+ # returns a confidence level - does the file looks like it's C++?
+ my $filename = shift;
+ my $confidence = 0;
+ chomp($filename);
+ open( SUSPECT, "<$filename");
+ while (defined($_ = <SUSPECT>)) {
+    if (m/^\s*class\b.*\{/) {  # "}"
+       close(SUSPECT);
+       return 2;
+    }
+    if (m/^\s*class\b/) {
+       $confidence = 1;
+    }
+ }
+ close(SUSPECT);
+ return $confidence;
+}
+
+
+# Cache which files are objective-C or not.
+# Key is the full file pathname; value is 1 if objective-C (else 0).
+%objective_c_files = ();
+
+sub really_is_objc {
+# Given filename, returns TRUE if its contents really are objective-C.
+ my $filename = shift;
+ chomp($filename);
+
+ my $is_objc = 0;      # Value to determine.
+ my $brace_lines = 0;  # Lines that begin/end with curly braces.
+ my $plus_minus = 0;   # Lines that begin with + or -.
+ my $word_main = 0;    # Did we find "main("?
+ my $special = 0;      # Did we find a special Objective-C pattern?
+
+ # Return cached result, if available:
+ if ($objective_c_files{$filename}) { return $objective_c_files{$filename};}
+
+ open(OBJC_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's objective C.\n";
+ while(<OBJC_FILE>) {
+
+   if (m/^\s*[{}]/ || m/[{}];?\s*$/) { $brace_lines++;}
+   if (m/^\s*[+-]/) {$plus_minus++;}
+   if (m/\bmain\s*\(/) {$word_main++;} # "main" followed by "("?
+   # Handle /usr/src/redhat/BUILD/egcs-1.1.2/gcc/objc/linking.m:
+   if (m/^\s*\[object name\];\s*$/i) {$special=1;}
+ }
+ close(OBJC_FILE);
+
+ if (($brace_lines > 1) && (($plus_minus > 1) || $word_main || $special))
+          {$is_objc = 1;}
+
+ $objective_c_files{$filename} = $is_objc; # Store result in cache.
+
+ return $is_objc;
+}
+
+
+# Cache which files are lex or not.
+# Key is the full file pathname; value is 1 if lex (else 0).
+%lex_files = ();
+
+sub really_is_lex {
+# Given filename, returns TRUE if its contents really is lex.
+# lex file must have "%%", "%{", and "%}".
+# In theory, a lex file doesn't need "%{" and "%}", but in practice
+# they all have them, and requiring them avoid mislabeling a
+# non-lexfile as a lex file.
+
+ my $filename = shift;
+ chomp($filename);
+
+ my $is_lex = 0;      # Value to determine.
+ my $percent_percent = 0;
+ my $percent_opencurly = 0;
+ my $percent_closecurly = 0;
+
+ # Return cached result, if available:
+ if ($lex_files{$filename}) { return $lex_files{$filename};}
+
+ open(LEX_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's lex.\n";
+ while(<LEX_FILE>) {
+   $percent_percent++     if (m/^\s*\%\%/);
+   $percent_opencurly++   if (m/^\s*\%\{/);
+   $percent_closecurly++   if (m/^\s*\%\}/);
+ }
+ close(LEX_FILE);
+
+ if ($percent_percent && $percent_opencurly && $percent_closecurly)
+          {$is_lex = 1;}
+
+ $lex_files{$filename} = $is_lex; # Store result in cache.
+
+ return $is_lex;
+}
+
+
+# Cache which files are expect or not.
+# Key is the full file pathname; value is 1 if it is (else 0).
+%expect_files = ();
+
+sub really_is_expect {
+# Given filename, returns TRUE if its contents really are Expect.
+# Many "exp" files (such as in Apache and Mesa) are just "export" data,
+# summarizing something else # (e.g., its interface).
+# Sometimes (like in RPM) it's just misc. data.
+# Thus, we need to look at the file to determine
+# if it's really an "expect" file.
+
+ my $filename = shift;
+ chomp($filename);
+
+# The heuristic is as follows: it's Expect _IF_ it:
+# 1. has "load_lib" command and either "#" comments or {}.
+# 2. {, }, and one of: proc, if, [...], expect
+
+ my $is_expect = 0;      # Value to determine.
+
+ my $begin_brace = 0;  # Lines that begin with curly braces.
+ my $end_brace = 0;    # Lines that begin with curly braces.
+ my $load_lib = 0;     # Lines with the Load_lib command.
+ my $found_proc = 0;
+ my $found_if = 0;
+ my $found_brackets = 0;
+ my $found_expect = 0;
+ my $found_pound = 0;
+
+ # Return cached result, if available:
+ if ($expect_files{$filename}) { return expect_files{$filename};}
+
+ open(EXPECT_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's expect.\n";
+ while(<EXPECT_FILE>) {
+
+   if (m/#/) {$found_pound++; s/#.*//;}
+   if (m/^\s*\{/) { $begin_brace++;}
+   if (m/\{\s*$/) { $begin_brace++;}
+   if (m/^\s*\}/) { $end_brace++;}
+   if (m/\};?\s*$/) { $end_brace++;}
+   if (m/^\s*load_lib\s+\S/) { $load_lib++;}
+   if (m/^\s*proc\s/) { $found_proc++;}
+   if (m/^\s*if\s/) { $found_if++;}
+   if (m/\[.*\]/) { $found_brackets++;}
+   if (m/^\s*expect\s/) { $found_expect++;}
+ }
+ close(EXPECT_FILE);
+
+ if ($load_lib && ($found_pound || ($begin_brace && $end_brace)))
+          {$is_expect = 1;}
+ if ( $begin_brace && $end_brace &&
+      ($found_proc || $found_if || $found_brackets || $found_expect))
+          {$is_expect = 1;}
+
+ $expect_files{$filename} = $is_expect; # Store result in cache.
+
+ return $is_expect;
+}
+
+
+# Cached values.
+%pascal_files = ();
+
+sub really_is_pascal {
+# Given filename, returns TRUE if its contents really are Pascal.
+
+# This isn't as obvious as it seems.
+# Many ".p" files are Perl files
+# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
+# others are C extractions
+# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
+# and some files in linuxconf).
+# However, test files in "p2c" really are Pascal, for example.
+
+# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
+# is actually C code.  The heuristics determine that they're not Pascal,
+# but because it ends in ".p" it's not counted as C code either.
+# I believe this is actually correct behavior, because frankly it
+# looks like it's automatically generated (it's a bitmap expressed as code).
+# Rather than guess otherwise, we don't include it in a list of
+# source files.  Let's face it, someone who creates C files ending in ".p"
+# and expects them to be counted by default as C files in SLOCCount needs
+# their head examined.  I suggest examining their head
+# with a sucker rod (see syslogd(8) for more on sucker rods).
+
+# This heuristic counts as Pascal such files such as:
+#  /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
+# Which is hand-generated.  We don't count woven documents now anyway,
+# so this is justifiable.
+
+ my $filename = shift;
+ chomp($filename);
+
+# The heuristic is as follows: it's Pascal _IF_ it has all of the following
+# (ignoring {...} and (*...*) comments):
+# 1. "^..program NAME" or "^..unit NAME",
+# 2. "procedure", "function", "^..interface", or "^..implementation",
+# 3. a "begin", and
+# 4. it ends with "end.",
+#
+# Or it has all of the following:
+# 1. "^..module NAME" and
+# 2. it ends with "end.".
+#
+# Or it has all of the following:
+# 1. "^..program NAME",
+# 2. a "begin", and
+# 3. it ends with "end.".
+#
+# The "end." requirements in particular filter out non-Pascal.
+#
+# Note (jgb): this does not detect Pascal main files in fpc, like
+# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in
+# it
+
+ my $is_pascal = 0;      # Value to determine.
+
+ my $has_program = 0;
+ my $has_unit = 0;
+ my $has_module = 0;
+ my $has_procedure_or_function = 0;
+ my $found_begin = 0;
+ my $found_terminating_end = 0;
+
+ # Return cached result, if available:
+ if ($pascal_files{$filename}) { return pascal_files{$filename};}
+
+ open(PASCAL_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's pascal.\n";
+ while(<PASCAL_FILE>) {
+   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
+   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
+   if (m/\bprogram\s+[A-Za-z]/i)  {$has_program=1;}
+   if (m/\bunit\s+[A-Za-z]/i)     {$has_unit=1;}
+   if (m/\bmodule\s+[A-Za-z]/i)   {$has_module=1;}
+   if (m/\bprocedure\b/i)         { $has_procedure_or_function = 1; }
+   if (m/\bfunction\b/i)          { $has_procedure_or_function = 1; }
+   if (m/^\s*interface\s+/i)      { $has_procedure_or_function = 1; }
+   if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
+   if (m/\bbegin\b/i) { $has_begin = 1; }
+   # Originally I said:
+   # "This heuristic fails if there are multi-line comments after
+   # "end."; I haven't seen that in real Pascal programs:"
+   # But jgb found there are a good quantity of them in Debian, specially in 
+   # fpc (at the end of a lot of files there is a multiline comment
+   # with the changelog for the file).
+   # Therefore, assume Pascal if "end." appears anywhere in the file.
+   if (m/end\.\s*$/i) {$found_terminating_end = 1;}
+#   elsif (m/\S/) {$found_terminating_end = 0;}
+ }
+ close(PASCAL_FILE);
+
+ # Okay, we've examined the entire file looking for clues;
+ # let's use those clues to determine if it's really Pascal:
+
+ if ( ( ($has_unit || $has_program) && $has_procedure_or_function &&
+     $has_begin && $found_terminating_end ) ||
+      ( $has_module && $found_terminating_end ) ||
+      ( $has_program && $has_begin && $found_terminating_end ) )
+          {$is_pascal = 1;}
+
+ $pascal_files{$filename} = $is_pascal; # Store result in cache.
+
+ return $is_pascal;
+}
+
+sub really_is_incpascal {
+# Given filename, returns TRUE if its contents really are Pascal.
+# For .inc files (mainly seen in fpc)
+
+ my $filename = shift;
+ chomp($filename);
+
+# The heuristic is as follows: it is Pacal if any of the following:
+# 1. really_is_pascal returns true
+# 2. Any usual reserverd word is found (program, unit, const, begin...)
+
+ # If the general routine for Pascal files works, we have it
+ if (&really_is_pascal ($filename)) { 
+   $pascal_files{$filename} = 1;
+   return 1;
+ }
+
+ my $is_pascal = 0;      # Value to determine.
+ my $found_begin = 0;
+
+ open(PASCAL_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's pascal.\n";
+ while(<PASCAL_FILE>) {
+   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
+   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
+   if (m/\bprogram\s+[A-Za-z]/i)  {$is_pascal=1;}
+   if (m/\bunit\s+[A-Za-z]/i)     {$is_pascal=1;}
+   if (m/\bmodule\s+[A-Za-z]/i)   {$is_pascal=1;}
+   if (m/\bprocedure\b/i)         {$is_pascal = 1; }
+   if (m/\bfunction\b/i)          {$is_pascal = 1; }
+   if (m/^\s*interface\s+/i)      {$is_pascal = 1; }
+   if (m/^\s*implementation\s+/i) {$is_pascal = 1; }
+   if (m/\bconstant\s+/i)         {$is_pascal=1;}
+   if (m/\bbegin\b/i) { $found_begin = 1; }
+   if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;}
+   if ($is_pascal) {
+     last;
+   }
+ }
+
+ close(PASCAL_FILE);
+ $pascal_files{$filename} = $is_pascal; # Store result in cache.
+ return $is_pascal;
+}
+
+# Cache which files are php or not.
+# Key is the full file pathname; value is 1 if it is (else 0).
+%php_files = ();
+
+sub really_is_php {
+# Given filename, returns TRUE if its contents really is php.
+
+ my $filename = shift;
+ chomp($filename);
+
+ my $is_php = 0;      # Value to determine.
+ # Need to find a matching pair of surrounds, with ending after beginning:
+ my $normal_surround = 0;  # <?; bit 0 = <?, bit 1 = ?>
+ my $script_surround = 0;  # <script..>; bit 0 = <script language="php">
+ my $asp_surround = 0;     # <%; bit 0 = <%, bit 1 = %>
+
+ # Return cached result, if available:
+ if ($php_files{$filename}) { return $php_files{$filename};}
+
+ open(PHP_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's php.\n";
+ while(<PHP_FILE>) {
+   if (m/\<\?/)                           { $normal_surround |= 1; }
+   if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
+   if (m/\<script.*language="?php"?/i)    { $script_surround |= 1; }
+   if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
+   if (m/\<\%/)                           { $asp_surround |= 1; }
+   if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
+ }
+ close(PHP_FILE);
+
+ if ( ($normal_surround == 3) || ($script_surround == 3) ||
+      ($asp_surround == 3)) {
+   $is_php = 1;
+ }
+
+ $php_files{$filename} = $is_php; # Store result in cache.
+
+ return $is_php;
+}
+
+
+
+sub examine_dir {
+ # Given a file, determine if there are only C++, OBJC, C, or a mixture
+ # in the same directory. Returns "ansic", "cpp", "objc" or "mix"
+ my $filename = shift;
+ chomp($filename);
+ my $dirname = $filename;
+ $dirname =~ s/\/[^\/]*$//;
+ my $saw_ansic_in_dir = 0;
+ my $saw_pc_in_dir = 0;  # ".pc" may mean Oracle C.
+ my $saw_pcc_in_dir = 0;  # ".pc" may mean Oracle C++.
+ my $saw_cpp_in_dir = 0;
+ my $saw_objc_in_dir = 0;
+ opendir(DIR, $dirname) || die "can't opendir $dirname";
+ while (defined($_ = readdir(DIR))) {
+   chomp;
+   next if (!$_);
+   if (m/\.(cpp|C|cxx|cc)$/ && -f "$dirname/$_") {$saw_cpp_in_dir = 1;}
+   if (m/\.c$/ && -f "$dirname/$_")           {$saw_ansic_in_dir = 1;}
+   if (m/\.pc$/ && -f "$dirname/$_")          {$saw_pc_in_dir = 1;}
+   if (m/\.pcc$/ && -f "$dirname/$_")         {$saw_pcc_in_dir = 1;}
+   if (m/\.m$/ && -f "$dirname/$_" && &really_is_objc($dirname . "/" . $_))
+                                              {$saw_objc_in_dir = 1;}
+   if (($saw_ansic_in_dir + $saw_cpp_in_dir + $saw_objc_in_dir) > 1) {
+    closedir(DIR);
+    return "mix";
+   }
+ }
+ # Done searching; we saw at most one type.
+ if ($saw_ansic_in_dir) {return "c";}
+ elsif ($saw_cpp_in_dir) {return "cpp";}
+ elsif ($saw_objc_in_dir) {return "objc";}
+ elsif ($saw_pc_in_dir && (!$saw_pcc_in_dir)) {return "c";} # Guess "C".
+ elsif ($saw_pcc_in_dir && (!$saw_pc_in_dir)) {return "cpp";} # Guess "C".
+ else {return "mix";}  # We didn't see anything... so let's say "mix".
+}
+
+sub was_generated_automatically() {
+ # Determine if the file was generated automatically.
+ # Use a simple heuristic: check if first few lines have phrases like
+ # "generated automatically", "automatically generated", "Generated by",
+ # or "do not edit" as the first
+ # words in the line (after possible comment markers and spaces).
+ my $filename = shift;
+
+ if ($autogen_okay) {return 0;};
+
+ chomp($filename);
+ reopen($filename);
+ $i = 15;  # Look at first 15 lines.
+ while (defined($_ = <CODE_FILE>)) {
+   if (m/^[\s#\/\*;\-\%]*generated automatically/i ||
+       m/^[\s#\/\*;\-\%]*automatically generated/i ||
+       m/^[\s#\/\*;\-\%]*generated by /i || # libtool uses this.
+       m/^[\s#\/\*;\-\%]*a lexical scanner generated by flex/i ||
+       m/^[\s#\/\*;\-\%]*this is a generated file/i ||     # TeTex uses this.
+       m/^[\s#\/\*;\-\%]*generated with the.*utility/i ||  # TeTex uses this.
+       m/^[\s#\/\*;\-\%]*do not edit/i) {
+     return 1;
+   }
+   $i--;
+   last if $i <= 0;
+ }
+ return 0;
+}
+
+
+# Previous files added, indexed by digest:
+
+%previous_files = ();
+
+$cached_digest = "";
+$cached_digest_filename = "";
+
+$digest_method = undef;
+
+sub compute_digest_given_method {
+ my $filename = shift;
+ my $method = shift;
+ my $result;
+
+ if ($method eq "md5sum") {
+   open(FH, "-|", "md5sum", $filename) or return undef;
+   $result = <FH>;
+   close FH;
+   return undef if ! defined($result);
+   chomp($result);
+   $result =~ s/^\s*//;  # Not needed for GNU Textutils.
+   $result =~ s/[^a-fA-F0-9].*//; # Strip away end.
+ } elsif ($method eq "md5") {
+   open(FH, "-|", "md5", $filename) or return undef;
+   $result = <FH>;
+   close FH;
+   return undef if ! defined($result);
+   chomp($result);
+   $result =~ s/^.* //; # Strip away beginning.
+ } elsif ($method eq "openssl") {
+   open(FH, "-|", "openssl", "dgst", "-md5", $filename) or return undef;
+   $result = <FH>;
+   close FH;
+   return undef if ! defined($result);
+   chomp($result);
+   $result =~ s/^.* //; # Strip away beginning.
+ } else {
+   # "Can't happen"
+   die "Unknown method";
+ }
+ return $result;
+}
+
+sub compute_digest {
+ my $filename = shift;
+ my $result;
+ if (defined($digest_method)) {
+   $result = compute_digest_given_method($filename, $digest_method);
+ } else {
+   # Try each method in turn until one works.
+   # There doesn't seem to be a way in perl to disable an error message
+   # display if the command is missing, which is annoying.  However, the
+   # program is more robust if we check for the command each time we run.
+   print "Finding a working MD5 command....\n";
+   foreach $m ("md5sum", "md5", "openssl") {
+     $result = compute_digest_given_method($filename, $m);
+     if (defined($result)) {
+       $digest_method = $m;
+       last;
+     }
+   }
+   if (!defined($digest_method)) {
+     die "Failure - could not find a working md5 program using $filename.";
+   }
+   print "Found a working MD5 command.\n";
+ }
+ return $result;
+}
+
+sub get_digest {
+ my $filename = shift;
+ my $result;
+ # First, check the cache -- did we just compute this?
+ if ($filename eq $cached_digest_filename) {
+   return $cached_digest;  # We did, so here's what it was.
+ }
+
+ $result = compute_digest($filename);
+ # Store in most-recently-used cache.
+ $cached_digest = $result;
+ $cached_digest_filename = $filename;
+ return $result;
+}
+
+sub already_added {
+ # returns the first file's name with the same contents,
+ # else returns the empty string.
+
+ my $filename = shift;
+ my $digest = &get_digest($filename);
+
+ if ($previous_files{$digest}) {
+   return $previous_files{$digest};
+ } else {
+   return "";
+ }
+}
+
+sub close_lang_lists {
+  my $lang;
+  my $file;
+  while (($lang, $file) = each(%lang_list_files)) {
+    $file->close();  # Ignore any errors on close, there's little we can do.
+  }
+  %lang_list_files = ();
+}
+
+sub force_record_file_type {
+  my ($filename, $type) = @_;
+
+  if (!$type) {die "ERROR! File $filename, type $file_type\n";}
+  if ($type eq "c") {$type = "ansic";};
+  if (!defined($lang_list_files{$type})) {
+    $lang_list_files{$type} = new FileHandle("${dir}/${type}_list.dat", "w") ||
+         die "Could not open ${dir}/${type}_list.dat";
+  }
+  $lang_list_files{$type}->printf("%s\n", $filename);
+}
+
+
+sub record_file_type {
+ my ($filename, $type) = @_;
+ # First check if the file should be auto, dup, or zero - and add there
+ # if so.  Otherwise, add to record of 'type'.
+
+ my $first_filename;
+
+ if (-z $filename) {
+   force_record_file_type($filename, "zero");
+   return;
+ }
+
+ if (&was_generated_automatically($filename)) {
+   force_record_file_type($filename, "auto");
+   return;
+ }
+
+ unless (($duplicates_okay) || ($type eq "not") || ($type eq "unknown")) {
+   $first_filename = &already_added($filename);
+   if ($first_filename) {
+    print "Note: $filename dups $first_filename\n" if $noisy;
+    force_record_file_type("$filename dups $first_filename", "dup");
+    $dup_count++;
+    return;
+   } else { # This isn't a duplicate - record that info, as needed.
+     my $digest = &get_digest($filename);
+     $previous_files{$digest} = $filename;
+     if ($duplistfile) {
+       print DUPLIST "$digest $filename\n";
+     }
+   }
+ }
+
+ force_record_file_type($filename, $type);
+}
+
+
+
+sub file_type_from_contents() {
+ # Determine if file type is a scripting language, and if so, return it.
+ # Returns its type as a string, or the empty string if it's undetermined.
+ my $filename = shift;
+ my $command;
+ chomp($filename);
+ reopen($filename);
+ # Don't do $firstline = <CODE_FILE> here because the file may be binary;
+ # instead, read in a fixed number of bytes:
+ read CODE_FILE, $firstline, 200;
+ return "" if (!$_);
+ chomp($firstline);
+ if (!$_)         {return "";}
+ if (!$firstline) {return "";}
+
+ # Handle weirdness: If there's a ".cpp" file beginning with .\"
+ # then it clearly isn't C/C++... it's a man page.  People who create
+ # and distribute man pages with such filename extensions should have
+ # a fingernail removed, slowly :-).
+ if (($firstline =~ m@^[,.]\\"@) &&
+     $filename =~ m@\.(c|cpp|C|cxx|cc)$@) {return "not";}
+
+
+ if (!($firstline =~ m@^#!@)) {return "";} # No script indicator here.
+
+ # studying $firstline doesn't speed things up, unfortunately.
+
+ # I once used a pattern that only acknowledged very specific directories,
+ # but I found that many test cases use unusual script locations
+ # (to ensure that they're invoking the correct program they're testing).
+ # Thus, we depend on the program being named with postfixed whitespace,
+ # and either begin named by itself or with a series of lowercase
+ # directories ending in "/".
+
+ # I developed these patterns by starting with patterns that appeared
+ # correct, and then examined the output (esp. warning messages) to see
+ # what I'd missed.
+
+ $command = "";
+
+ # Strip out any calls to sudo
+ if ($firstline =~ m@^#!\s*/(usr/)?bin/sudo\s+(/.*)@)  {
+   $firstline = "#!" . $2;
+ }
+
+ if ($firstline =~ m@^#!\s*/(usr/)?bin/env\s+([a-zA-Z0-9\._]+)(\s|\Z)@i) {
+  $command = $2;
+ } elsif ($firstline =~ m@^#!\s*([a-zA-Z0-9\/\.]+\/)?([a-zA-Z0-9\._]+)(\s|\Z)@)  {
+  $command = $2;
+ }
+
+ if ( ($command =~ m/^(bash|ksh|zsh|pdksh|sh)[0-9\.]*(\.exe)?$/i) ||
+     ($firstline =~
+          m~^#!\s*\@_?(SCRIPT_)?(PATH_)?(BA|K)?SH(ELL)?(\d+)?\@?(\s|\Z)~)) {
+    # Note: wish(1) uses a funny trick; see wish(1) for more info.
+    # The following code detects this unusual wish convention.
+    if ($firstline =~ m@exec wish(\s|\Z)@i) {
+      return "tcl"; # return the type for wish.
+    }
+    # Otherwise, it's shell.
+    return "sh";
+ }
+ if ( ($command =~ m/^(t?csh\d*)[0-9\.]*(\.exe)?$/i) ||
+      ($firstline =~ m@^#!\s*xCSH_PATHx(\s|\Z)@)) {
+    return "csh";
+ } 
+ if ( ($command =~ m/^(mini)?perl[0-9\.]*(\.exe)?$/i) ||
+      ($command =~ m/^speedycgi[0-9\.]*(\.exe)?$/i) ||
+      ($firstline =~ m~^#!\s*\@_?(PATH_)?PERL\d*(PROG)?\@(\s|\Z)~)  ||
+      ($firstline =~ m~^#!\s*xPERL_PATHx(\s|\Z)~)) {
+    return "perl";
+ } 
+ if ($command =~ m/^python[0-9\.]*(\.exe)?$/i) {
+    return "python";
+ } 
+ if ($command =~ m/^(tcl|tclsh|bltwish|wish|wishx|WISH)[0-9\.]*(\.exe)?$/i) {
+    return "tcl";
+ } 
+ if ($command =~ m/^expectk?[0-9\.]*(\.exe)?$/i) { return "exp"; } 
+ if ($command =~ m/^[ng]?awk[0-9\.]*(\.exe)?$/i) { return "awk"; } 
+ if ($command =~ m/^sed$/i) { return "sed"; } 
+ if ($command =~ m/^guile[0-9\.]*$/i) { return "lisp"; } 
+ if ($firstline =~ m@^#!.*make\b@i) {  # We'll claim that #! make is a makefile.
+    return "makefile";
+ } 
+ if ($firstline =~ m@^#!\s*\.(\s|\Z)@) {  # Lonely period.
+    return "";  # Ignore the first line, it's not helping.
+ } 
+ if ($firstline =~ m@^#!\s*\Z@) {  # Empty line.
+    return "";  # Ignore the first line, it's not helping.
+ } 
+ if ($firstline =~ m@^#!\s*/dev/null@) {  # /dev/null is the script?!?
+    return "";  # Ignore nonsense ("/dev/null").
+ } 
+ if ($firstline =~ m@^#!\s*/unix(\s|Z)@) {
+    return "";  # Ignore nonsense ("/unix").
+ } 
+ if (($filename =~ m@\.pl$@) || ($filename =~ m@\.pm$@)) {
+    return "";  # Don't warn about files that will be ID'd as perl files.
+ } 
+ if (($filename =~ m@\.sh$@)) {
+    return "";  # Don't warn about files that will be ID'd as sh files.
+ } 
+ if ($firstline =~ m@^#!\s*\S@) {
+    $firstline =~ s/\n.*//s;  # Delete everything after first line.
+    $warning_from_first_line = "WARNING! File $filename has unknown start: $firstline";
+    return "";
+ }
+ return "";
+}
+
+
+sub get_file_type {
+   my $file_to_examine = shift;
+   # Return the given file's type.
+   # Consider the file's contents, filename, and file extension.
+
+   $warning_from_first_line = "";
+
+   # Skip file names known to not be program files.
+   $basename = $file_to_examine;
+   $basename =~ s!^.*/!!;
+   if ($not_code_filenames{$basename}) {
+     print "Note: Skipping non-program filename: $file_to_examine\n"
+           if $noisy;
+     return "not";
+   }
+
+   # Skip "configure" files if there's a corresponding "configure.in"
+   # file; such a situation suggests that "configure" is automatically
+   # generated by "autoconf" from "configure.in".
+   if (($file_to_examine =~ m!/configure$!) &&
+       (-s "${file_to_examine}.in")) {
+     print "Note: Auto-generated configure file $file_to_examine\n"
+           if $noisy;
+     return "auto";
+   }
+
+   if (($basename eq "lex.yy.c") ||    # Flex/Lex output!
+       ($basename eq "lex.yy.cc") ||   # Flex/Lex output - C++ scanner.
+       ($basename eq "y.code.c") ||    # yacc/bison output.
+       ($basename eq "y.tab.c") ||     # yacc output.
+       ($basename eq "y.tab.h")) {     # yacc output.
+     print "Note: Auto-generated lex/yacc file $file_to_examine\n"
+           if $noisy;
+     return "auto";
+   }
+
+   # Bison is more flexible than yacc -- it can create arbitrary
+   # .c/.h files.  If we have a .tab.[ch] file, with a corresponding
+   # .y file, then it's been automatically generated.
+   # Bison can actually save to any filename, and of course a Makefile
+   # can rename any file, but we can't help that.
+   if ($basename =~ m/\.tab\.[ch]$/) {
+     $possible_bison = $file_to_examine;
+     $possible_bison =~ s/\.tab\.[ch]$/\.y/;
+     if  (-s "$possible_bison") {
+       print "Note: found bison-generated file $file_to_examine\n"
+           if $noisy;
+       return "auto";
+     }
+   }
+
+   # If there's a corresponding ".MASTER" file, treat this file
+   # as automatically-generated derivative.  This handles "exmh".
+   if (-s "${file_to_examine}.MASTER") {
+     print "Note: Auto-generated non-.MASTER file $file_to_examine\n"
+           if $noisy;
+       return "auto";
+   }
+
+   # Peek at first line to determine type.  Note that the file contents
+   # take precedence over the filename extension, because there are files
+   # (such as /usr/src/redhat/BUILD/teTeX-1.0/texmf/doc/mkhtml.nawk)
+   # which have one extension (say, ".nawk") but actually contain 
+   # something else (at least in part):
+   $type = &file_type_from_contents($file_to_examine);
+   if ($type) {
+      return $type;
+   }
+
+   # Use filename to determine if it's a makefile:
+   if (($file_to_examine =~ m/\bmakefile$/i) ||
+        ($file_to_examine =~ m/\bmakefile\.txt$/i) ||
+        ($file_to_examine =~ m/\bmakefile\.pc$/i) ||
+        ($file_to_examine =~ m/\bdebian\/rules$/i)) {  # "debian/rules" too.
+      return "makefile";
+   }
+
+   # Try to use filename extension to determine type:
+   if ($file_to_examine =~ m/\.([^.\/]+)$/) {
+      $type = $1;
+
+      # More ugly problems: some source filenames only use
+      # UPPERCASE, and they can be mixed with regular files.
+      # Since normally filenames are lowercase or mixed case,
+      # presume that an all-uppercase filename means we have to assume
+      # that the extension must be lowercased.  This particularly affects
+      # .C, which usually means C++ but in this case would mean plain C.
+      my $uppercase_filename = 0;
+      if (($file_to_examine =~ m/[A-Z]/) &&
+          (! ($file_to_examine =~ m/[a-z]/))) {
+        $uppercase_filename = 1;
+        $type = lc($type);  # Use lowercase version of type.
+      }
+
+      # Is this type known to NOT be a program?
+      if ($not_code_extensions{$type}) {
+         return "not";
+      }
+
+      # Handle weirdness: ".hpp" is a C/C++ header file, UNLESS it's
+      # makefile.hpp (a makefile); see /usr/src/redhat/BUILD,
+      # pine4.21/pine/makefile.hpp and pine4.21/pico/makefile.hpp
+      # Note that pine also includes pine4.21/pine/osdep/diskquot.hpp.
+      # Kaffe uses .hpp for C++ header files.
+      if (($type eq "hpp") && ($file_to_examine =~ m/makefile\.hpp$/i))
+            {return "makefile";}
+
+      # If it's a C file but there's a ".pc" or ".pgc" file, then presume that
+      # it was automatically generated:
+      if ($type eq "c") {
+        $pc_name = $file_to_examine;
+        if ($uppercase_filename) { $pc_name =~ s/\.C$/\.PC/; }
+        else                     { $pc_name =~ s/\.c$/\.pc/; }
+        if (-s "$pc_name" ) {
+          print "Note: Auto-generated C file (from .pc file) $file_to_examine\n"
+              if $noisy;
+          return "auto";
+        }
+        $pc_name = $file_to_examine;
+        if ($uppercase_filename) { $pc_name =~ s/\.C$/\.PGC/; }
+        else                     { $pc_name =~ s/\.c$/\.pgc/; }
+        if (-s "$pc_name" ) {
+          print "Note: Auto-generated C file (from .pgc file) $file_to_examine\n"
+              if $noisy;
+          return "auto";
+        }
+      }
+
+      # ".pc" is the official extension for Oracle C programs with
+      # Embedded C commands, but many programs use ".pc" to indicate
+      # the "PC" (MS-DOS/Windows) version of a file.
+      # We'll use heuristics to detect when it's not really C,
+      # otherwise claim it's C and move on.
+      if ($type eq "pc") {  # If it has one of these filenames, it's not C.
+         if ($file_to_examine =~ m/\bmakefile\.pc$/i) { return "makefile"; }
+         if   (($file_to_examine =~ m/\bREADME\.pc$/i) ||
+              ($file_to_examine =~ m/\binstall\.pc$/i) ||
+              ($file_to_examine =~ m/\bchanges\.pc$/i)) {return "not";}
+         else {  return "c";}
+      }
+
+      if (defined($file_extensions{$type})) {
+        $type = $file_extensions{$type};
+        if ( (($type eq "exp") && (!&really_is_expect($file_to_examine))) ||
+             (($type eq "tk") && (!&really_is_expect($file_to_examine))) ||
+             (($type eq "objc") && (!&really_is_objc($file_to_examine))) ||
+             (($type eq "lex") && (!&really_is_lex($file_to_examine))) ||
+             (($type eq "pascal") && (!&really_is_pascal($file_to_examine)))) {
+	  $type = "unknown";
+	} elsif ($type eq "inc") {
+	  if (&really_is_php($file_to_examine)) {
+	    $type = "php";  # Hey, the .inc is PHP!
+	  } elsif (&really_is_incpascal($file_to_examine)) {
+	    $type = "pascal";
+	  } else {
+	    $type = "unknown";
+	  }
+	};
+        return $type;
+      }
+
+   }
+  # If we were expecting a script, warn about that.
+  if ($warning_from_first_line) {print "$warning_from_first_line\n";}
+  # Don't know what it is, so report "unknown".
+  return "unknown";
+}
+
+
+
+
+sub convert_h_files {
+ # Determine if the ".h" files we saw are C, OBJC, C++, or a mixture (!)
+ # Usually ".hpp" files are C++, but if we didn't see any C++ files then
+ # it probably isn't.  This handles situations like pine; its has a file
+ # /usr/src/redhat/BUILD/pine4.21/pine/osdep/diskquot.hpp
+ # where the ".hpp" is for HP, not C++.  (Of course, we completely miss
+ # the other files in that pine directory because they have truly bizarre
+ # extensions, but there's no easy way to handle such nonstandard things).
+
+ if (!defined($lang_list_files{"h"})) { return; }
+
+ my $saw_ansic = defined($lang_list_files{"ansic"});
+ my $saw_cpp   = defined($lang_list_files{"cpp"});
+ my $saw_objc  = defined($lang_list_files{"objc"});
+ my $confidence;
+
+ $lang_list_files{"h"}->close();
+
+ open(H_LIST, "<${dir}/h_list.dat") ||  die "Can't reopen h_list\n";
+
+ if ($saw_ansic && (!$saw_cpp) && (!$saw_objc)) {
+     # Only C, let's assume .h files are too
+    while (defined($_ = <H_LIST>)) { chomp; force_record_file_type($_, "c"); };
+ } elsif ($saw_cpp && (!$saw_ansic) && (!$saw_objc)) {  # Only C++
+    while (defined($_ = <H_LIST>)) { chomp; force_record_file_type($_, "cpp"); };
+ } elsif ($saw_objc && (!$saw_ansic) && (!$saw_cpp)) {  # Only Obj-C
+    while (defined($_ = <H_LIST>)) { chomp; force_record_file_type($_, "objc"); };
+ } else {
+   # Ugh, we have a mixture. Let's try to determine what we have, using
+   # various heuristics (looking for a matching name in the directory,
+   # reading the file contents, the contents in the directory, etc.)
+   # When all else fails, assume C.
+   while (defined($_=<H_LIST>)) {
+      chomp;
+      next if (!$_);
+      # print "DEBUG: H file $_\n";
+
+      $h_file = $_;
+      $cpp2_equivalent =
+            $cpp3_equivalent = $cpp4_equivalent = $objc_equivalent = $_;
+      $ansic_equivalent = $cpp_equivalent = $_;
+      $ansic_equivalent =~ s/h$/c/;
+      $cpp_equivalent   =~ s/h$/C/;
+      $cpp2_equivalent  =~ s/h$/cpp/;
+      $cpp3_equivalent  =~ s/h$/cxx/;
+      $cpp4_equivalent  =~ s/h$/cc/;
+      $objc_equivalent   =~ s/h$/m/;
+      if (m!\.hpp$!) { force_record_file_type($h_file, "cpp"); }
+      elsif ( (-s $cpp2_equivalent) ||
+              (-s $cpp3_equivalent) || (-s $cpp4_equivalent))
+              { force_record_file_type($h_file, "cpp"); }
+         # Note: linuxconf has many ".m" files that match .h files,
+         # but the ".m" files are straight C and _NOT_ objective-C.
+         # The following test handles cases like this:
+      elsif ($saw_objc && (-s $objc_equivalent) &&
+              &really_is_objc($objc_equivalent))
+              { &force_record_file_type($h_file, "objc"); }
+      elsif (( -s $ansic_equivalent) && (! -s $cpp_equivalent))
+             { force_record_file_type($h_file, "c"); }
+      elsif ((-s $cpp_equivalent) && (! -s $ansic_equivalent))
+               { force_record_file_type($h_file, "cpp"); }
+      else {
+         $confidence = &looks_like_cpp($h_file);
+         if ($confidence == 2)
+              { &force_record_file_type($h_file, "cpp"); }
+         else {
+           $files_in_dir = &examine_dir($h_file);
+           if ($files_in_dir eq "cpp")
+              { &force_record_file_type($h_file, "cpp"); }
+           elsif ($files_in_dir eq "objc")
+              { &force_record_file_type($h_file, "objc"); }
+           elsif ($confidence == 1)
+              { &force_record_file_type($h_file, "cpp"); }
+           elsif ($h_file =~ m![a-z][0-9]*\.H$!)
+              # Mixed-case filename, .H extension.
+              { &force_record_file_type($h_file, "cpp"); }
+           else  # We're clueless.  Let's guess C.
+              { &force_record_file_type($h_file, "c"); };
+          }
+      }
+   }
+ }  # Done handling ".h" files.
+ close(H_LIST);
+}
+
+
+# MAIN PROGRAM STARTS HERE.
+
+# Handle options.
+while (($#ARGV >= 0) && ($ARGV[0] =~ m/^--/)) {
+  $duplicates_okay = 1 if ($ARGV[0] =~ m/^--duplicates$/); # Count duplicates.
+  $crossdups_okay = 1 if ($ARGV[0] =~ m/^--crossdups$/);   # Count crossdups.
+  $autogen_okay = 1 if ($ARGV[0] =~ m/^--autogen$/);       # Count autogen.
+  $noisy = 1 if ($ARGV[0] =~ m/^--verbose$/);              # Verbose output.
+  if ($ARGV[0] =~ m/^--duplistfile$/) {   # File to get/record dups.
+    shift;
+    $duplistfile = $ARGV[0];
+  }
+  last if ($ARGV[0] =~ m/^--$/);
+  shift;
+}
+
+if ($#ARGV < 0) {
+ print "Error: No directory names given.\n";
+ exit(1);
+}
+
+if ($duplistfile) {
+ if (-e $duplistfile) {
+   open(DUPLIST, "<$duplistfile") || die "Can't open $duplistfile";
+   while (defined($_ = <DUPLIST>)) {
+     chomp;
+     ($digest, $filename) = split(/ /, $_, 2);
+     if (defined($digest) && defined($filename)) {
+       $previous_files{$digest} = $filename;
+     }
+   }
+   close(DUPLIST);
+ }
+ open(DUPLIST, ">>$duplistfile") || die "Can't open for writing $duplistfile";
+}
+
+
+while ( $dir = shift ) {
+
+ if (! -d "$dir") {
+   print "Skipping non-directory $dir\n";
+   next;
+ }
+
+ if ($examined_directories{$dir}) {
+   print "Skipping already-examined directory $dir\n";
+   next;
+ }
+ $examined_directories{$dir} = 1;
+
+ if (! open(FILELIST, "<${dir}/filelist")) {
+   print "Skipping directory $dir; it doesn't contain a file 'filelist'\n";
+   next;
+ }
+
+ if (-r "${dir}/all-physical.sloc") {
+   # Skip already-analyzed directories; if it's been analyzed, we've already
+   # broken them down.
+   next;
+ }
+
+ if ($crossdups_okay) {   # Cross-dups okay; forget the hash of previous files.
+   %previous_files = ();
+ }
+
+ # insert blank lines, in case we need to recover from a midway crash
+ if ($duplistfile) {
+   print DUPLIST "\n";
+ }
+
+
+ $dup_count = 0;
+ 
+ while (defined($_ = <FILELIST>)) {
+   chomp;
+   $file = $_;
+   next if (!defined($file) || ($file eq ""));
+   if ($file =~ m/\n/) {
+     print STDERR "WARNING! File name contains embedded newline; it'll be IGNORED.\n";
+     print STDERR "Filename is: $file\n";
+     next;
+   }
+   $file_type = &get_file_type($file);
+   if ($file_type) {
+     &record_file_type($file, $file_type);
+   } else {
+     print STDERR "WARNING! No file type selected for $file\n";
+   }
+ }
+
+ # Done with straightline processing.  Now we need to determine if
+ # the ".h" files we saw are C, OBJC, C++, or a mixture (!)
+ &convert_h_files();
+
+
+ # Done processing the directory.  Close up shop so we're
+ # ready for the next directory.
+
+ close(FILELIST);
+ close_lang_lists();
+ reopen("");   # Close code file.
+
+ if ($dup_count > 50) {
+  print "Warning: in $dir, number of duplicates=$dup_count\n";
+ }
+
+}
+
+
diff --git a/break_filelist.orig b/break_filelist.orig
new file mode 100755
index 0000000..b34c702
--- /dev/null
+++ b/break_filelist.orig
@@ -0,0 +1,1084 @@
+#!/usr/bin/perl -w
+
+# break_filelist
+# Take a list of dirs which contain a "filelist";
+# creates files in each directory identifying which are C, C++, Perl, etc.
+# For example, "ansic.dat" lists all ANSI C files contained in filelist.
+# Note: ".h" files are ambiguous (they could be C or C++); the program
+# uses heuristics to determine this.
+# The list of .h files is also contained in h_list.dat.
+
+# (C) Copyright 2000-2001 David A. Wheeler
+# Part of "SLOCCount", and released under the GPL version 2;
+# see the documentation for details.
+
+# If adding a new language: add the logic to open the file,
+# close the file, and detect & write to the file listing that language.
+
+# Debatable decisions:
+#  Doesn't count .dsl files (stylesheets, which are partially LISP).
+#  Doesn't count .sql files (SQL queries & commands)
+
+# Note - I don't try to distinguish between TCL and [incr TCL] (itcl),
+# an OO extended version of TCL.  For our purposes, it's all TCL.
+
+
+use FileHandle;
+
+
+# Set default configuration:
+
+$duplicates_okay = 0;  # Set to 1 if you want to count file duplicates.
+$crossdups_okay = 0;   # Set to 1 if duplicates okay in different filelists.
+$autogen_okay = 0;     # Set to 1 if you want to count autogen'ed files.
+$noisy = 0;            # Set to 1 if you want noisy reports.
+%lang_list_files = ();
+
+# The following extensions are NOT code:
+%not_code_extensions = (
+   "html" => 1,
+   "in" => 1,    # Debatable.
+   "xpm" => 1,
+   "po" => 1,
+   "am" => 1,    # Debatable.
+   "1" => 1,     # Man pages (documentation):
+   "2" => 1,
+   "3" => 1,
+   "4" => 1,
+   "5" => 1,
+   "6" => 1,
+   "7" => 1,
+   "8" => 1,
+   "9" => 1,
+   "n" => 1,
+   "gif" => 1,
+   "tfm" => 1,
+   "png" => 1,
+   "m4" => 1,    # Debatable.
+   "bdf" => 1,
+   "sgml" => 1,
+   "mf" => 1,
+   "txt" => 1,
+   "man" => 1,
+   "xbm" => 1,
+   "Tag" => 1,
+   "sgm" => 1,
+   "vf" => 1,
+   "tex" => 1,
+   "elc" => 1,
+   "gz" => 1,
+   "dic" => 1,
+   "pfb" => 1,
+   "fig" => 1,
+   "afm" => 1,
+   "jpg" => 1,
+   "bmp" => 1,
+   "htm" => 1,
+   "kdelnk" => 1,
+   "desktop" => 1,
+   "pbm" => 1,
+   "pdf" => 1,
+   "ps" => 1,    # Postscript is _USUALLY_ generated automatically.
+   "eps" => 1,
+   "doc" => 1,
+   "man" => 1,
+   "o" => 1,    # Object code is generated from source code.
+   "a" => 1,    # Static object code.
+   "so" => 1,   # Dynamically-loaded object code.
+);
+
+# The following filenames are NOT code:
+%not_code_filenames = (
+   "README" => 1,
+   "Readme" => 1,
+   "readme" => 1,
+   "README.tk" => 1, # used in kdemultimedia, it's confusing.
+   "Changelog" => 1,
+   "ChangeLog" => 1,
+   "Repository" => 1,
+   "CHANGES" => 1,
+   "Changes" => 1,
+   ".cvsignore" => 1,
+   "Root" => 1,       # CVS.
+   "BUGS" => 1,
+   "TODO" => 1,
+   "COPYING" => 1,
+   "MAINTAINERS" => 1,
+   "Entries" => 1,
+        # Skip "iconfig.h" files; they're used in Imakefiles
+        # (used in xlockmore):
+   "iconfig.h" => 1,
+);
+
+
+# A filename ending in the following extensions usually maps to the
+# given language:
+
+%file_extensions = (
+  "c" => "ansic",
+  "ec" => "ansic",   # Informix C.
+  "ecp" => "ansic",  # Informix C.
+  "pgc" => "ansic",  # Postgres embedded C/C++ (guess C)
+  "C" => "cpp", "cpp" => "cpp", "cxx" => "cpp", "cc" => "cpp",
+  "pcc" => "cpp", # Input to Oracle C++ preproc.
+  "m" => "objc",
+  "h" => "h", "H" => "h", "hpp" => "h",
+  "ada" => "ada", "adb" => "ada", "ads" => "ada",
+  "pad" => "ada",     # Oracle Ada preprocessor.
+  "f" => "fortran",
+  "p" => "pascal",
+  "py" => "python",
+  "s" => "asm", "S" => "asm", "asm" => "asm",
+  "sh" => "sh", "bash" => "sh",
+  "csh" => "csh", "tcsh" => "csh", 
+  "java" => "java",
+  "lisp" => "lisp", "el" => "lisp", "scm" => "lisp", "lsp" => "lisp",
+  "jl" => "lisp",
+  "tcl" => "tcl", "tk" => "tcl", "itk" => "tcl",
+  "exp" => "exp",
+  "pl" => "perl", "pm" => "perl", "perl" => "perl",
+  "awk" => "awk",
+  "sed" => "sed",
+  "y" => "yacc",
+  "l" => "lex",
+  "makefile" => "makefile",
+  "sql" => "sql",
+  "php" => "php", "php3" => "php", "php4" => "php", "php5" => "php",
+  "php6" => "php",
+  "inc" => "inc", # inc MAY be PHP - we'll handle it specially.
+   # ???: .pco is Oracle Cobol, need to add with a Cobol counter.
+);
+
+
+# GLOBAL VARIABLES
+
+$dup_count = 0;
+
+$warning_from_first_line = "";
+
+%examined_directories = ();  # Keys = Names of directories examined this run.
+
+$duplistfile = "";
+
+###########
+
+
+# Handle re-opening individual CODE_FILEs.
+# CODE_FILE is public
+
+# Private value:
+$opened_file_name = "";
+
+sub reopen {
+ # Open file if it isn't already, else rewind.
+ # If filename is "", close any open file.
+ my $filename = shift;
+ chomp($filename);
+ # print("DEBUG: reopen($filename)\n");
+ if ($filename eq "") {
+    if ($opened_file_name) {close(CODE_FILE);}
+    $opened_file_name = "";
+    return;
+ }
+ if ($filename eq $opened_file_name) {
+   seek CODE_FILE, 0, 0;  # Rewind.
+ } else {   # We're opening a new file.
+   if ($opened_file_name) {close(CODE_FILE)}
+   open(CODE_FILE, "<$filename") || die "Can't open $filename";
+   $opened_file_name = $filename;
+ }
+}
+
+###########
+
+sub looks_like_cpp {
+ # returns a confidence level - does the file looks like it's C++?
+ my $filename = shift;
+ my $confidence = 0;
+ chomp($filename);
+ open( SUSPECT, "<$filename");
+ while (<SUSPECT>) {
+    if (m/^\s*class\b.*\{/) {  # "}"
+       close(SUSPECT);
+       return 2;
+    }
+    if (m/^\s*class\b/) {
+       $confidence = 1;
+    }
+ }
+ close(SUSPECT);
+ return $confidence;
+}
+
+
+# Cache which files are objective-C or not.
+# Key is the full file pathname; value is 1 if objective-C (else 0).
+%objective_c_files = ();
+
+sub really_is_objc {
+# Given filename, returns TRUE if its contents really are objective-C.
+ my $filename = shift;
+ chomp($filename);
+
+ my $is_objc = 0;      # Value to determine.
+ my $brace_lines = 0;  # Lines that begin/end with curly braces.
+ my $plus_minus = 0;   # Lines that begin with + or -.
+ my $word_main = 0;    # Did we find "main("?
+ my $special = 0;      # Did we find a special Objective-C pattern?
+
+ # Return cached result, if available:
+ if ($objective_c_files{$filename}) { return $objective_c_files{$filename};}
+
+ open(OBJC_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's objective C.\n";
+ while(<OBJC_FILE>) {
+
+   if (m/^\s*[{}]/ || m/[{}];?\s*$/) { $brace_lines++;}
+   if (m/^\s*[+-]/) {$plus_minus++;}
+   if (m/\bmain\s*\(/) {$word_main++;} # "main" followed by "("?
+   # Handle /usr/src/redhat/BUILD/egcs-1.1.2/gcc/objc/linking.m:
+   if (m/^\s*\[object name\];\s*$/i) {$special=1;}
+ }
+ close(OBJC_FILE);
+
+ if (($brace_lines > 1) && (($plus_minus > 1) || $word_main || $special))
+          {$is_objc = 1;}
+
+ $objective_c_files{$filename} = $is_objc; # Store result in cache.
+
+ return $is_objc;
+}
+
+
+# Cache which files are lex or not.
+# Key is the full file pathname; value is 1 if lex (else 0).
+%lex_files = ();
+
+sub really_is_lex {
+# Given filename, returns TRUE if its contents really is lex.
+# lex file must have "%%", "%{", and "%}".
+# In theory, a lex file doesn't need "%{" and "%}", but in practice
+# they all have them, and requiring them avoid mislabeling a
+# non-lexfile as a lex file.
+
+ my $filename = shift;
+ chomp($filename);
+
+ my $is_lex = 0;      # Value to determine.
+ my $percent_percent = 0;
+ my $percent_opencurly = 0;
+ my $percent_closecurly = 0;
+
+ # Return cached result, if available:
+ if ($lex_files{$filename}) { return $lex_files{$filename};}
+
+ open(LEX_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's lex.\n";
+ while(<LEX_FILE>) {
+   $percent_percent++     if (m/^\s*\%\%/);
+   $percent_opencurly++   if (m/^\s*\%\{/);
+   $percent_closecurly++   if (m/^\s*\%\}/);
+ }
+ close(LEX_FILE);
+
+ if ($percent_percent && $percent_opencurly && $percent_closecurly)
+          {$is_lex = 1;}
+
+ $lex_files{$filename} = $is_lex; # Store result in cache.
+
+ return $is_lex;
+}
+
+
+# Cache which files are expect or not.
+# Key is the full file pathname; value is 1 if it is (else 0).
+%expect_files = ();
+
+sub really_is_expect {
+# Given filename, returns TRUE if its contents really are Expect.
+# Many "exp" files (such as in Apache and Mesa) are just "export" data,
+# summarizing something else # (e.g., its interface).
+# Sometimes (like in RPM) it's just misc. data.
+# Thus, we need to look at the file to determine
+# if it's really an "expect" file.
+
+ my $filename = shift;
+ chomp($filename);
+
+# The heuristic is as follows: it's Expect _IF_ it:
+# 1. has "load_lib" command and either "#" comments or {}.
+# 2. {, }, and one of: proc, if, [...], expect
+
+ my $is_expect = 0;      # Value to determine.
+
+ my $begin_brace = 0;  # Lines that begin with curly braces.
+ my $end_brace = 0;    # Lines that begin with curly braces.
+ my $load_lib = 0;     # Lines with the Load_lib command.
+ my $found_proc = 0;
+ my $found_if = 0;
+ my $found_brackets = 0;
+ my $found_expect = 0;
+ my $found_pound = 0;
+
+ # Return cached result, if available:
+ if ($expect_files{$filename}) { return expect_files{$filename};}
+
+ open(EXPECT_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's expect.\n";
+ while(<EXPECT_FILE>) {
+
+   if (m/#/) {$found_pound++; s/#.*//;}
+   if (m/^\s*\{/) { $begin_brace++;}
+   if (m/\{\s*$/) { $begin_brace++;}
+   if (m/^\s*\}/) { $end_brace++;}
+   if (m/\};?\s*$/) { $end_brace++;}
+   if (m/^\s*load_lib\s+\S/) { $load_lib++;}
+   if (m/^\s*proc\s/) { $found_proc++;}
+   if (m/^\s*if\s/) { $found_if++;}
+   if (m/\[.*\]/) { $found_brackets++;}
+   if (m/^\s*expect\s/) { $found_expect++;}
+ }
+ close(EXPECT_FILE);
+
+ if ($load_lib && ($found_pound || ($begin_brace && $end_brace)))
+          {$is_expect = 1;}
+ if ( $begin_brace && $end_brace &&
+      ($found_proc || $found_if || $found_brackets || $found_expect))
+          {$is_expect = 1;}
+
+ $expect_files{$filename} = $is_expect; # Store result in cache.
+
+ return $is_expect;
+}
+
+
+# Cached values.
+%pascal_files = ();
+
+sub really_is_pascal {
+# Given filename, returns TRUE if its contents really are Pascal.
+
+# This isn't as obvious as it seems.
+# Many ".p" files are Perl files
+# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
+# others are C extractions
+# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
+# and some files in linuxconf).
+# However, test files in "p2c" really are Pascal, for example.
+
+# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
+# is actually C code.  The heuristics determine that they're not Pascal,
+# but because it ends in ".p" it's not counted as C code either.
+# I believe this is actually correct behavior, because frankly it
+# looks like it's automatically generated (it's a bitmap expressed as code).
+# Rather than guess otherwise, we don't include it in a list of
+# source files.  Let's face it, someone who creates C files ending in ".p"
+# and expects them to be counted by default as C files in SLOCCount needs
+# their head examined.  I suggest examining their head
+# with a sucker rod (see syslogd(8) for more on sucker rods).
+
+# This heuristic counts as Pascal such files such as:
+#  /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
+# Which is hand-generated.  We don't count woven documents now anyway,
+# so this is justifiable.
+
+ my $filename = shift;
+ chomp($filename);
+
+# The heuristic is as follows: it's Pascal _IF_ it has all of the following:
+# 1. "^..program NAME(...);" or "..unit NAME".
+# 2. "procedure", "function", "^..interface", or "^..implementation"
+# 3. a "begin", and
+# 4. it ends with "end." (ignoring {...} comments).
+# The last requirement in particular filters out non-Pascal.
+
+
+ my $is_pascal = 0;      # Value to determine.
+
+ my $has_program = 0;
+ my $has_unit = 0;
+ my $has_procedure_or_function = 0;
+ my $found_begin = 0;
+ my $found_terminating_end = 0;
+
+ # Return cached result, if available:
+ if ($pascal_files{$filename}) { return pascal_files{$filename};}
+
+ open(PASCAL_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's pascal.\n";
+ while(<PASCAL_FILE>) {
+   if (m/\bprogram\s+[A-Za-z]/i)  {$has_program=1;}
+   if (m/\bunit\s+[A-Za-z]/i)     {$has_unit=1;}
+   if (m/\bprocedure\b/i)         { $has_procedure_or_function = 1; }
+   if (m/\bfunction\b/i)          { $has_procedure_or_function = 1; }
+   if (m/^\s*interface\s+/i)      { $has_procedure_or_function = 1; }
+   if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
+   if (m/\bbegin\b/i) { $has_begin = 1; }
+   s/\{.*?\}//g;  # Ignore comments on this line; imperfect, but effective.
+   # This heuristic fails if there are multi-line comments after
+   # "end."; I haven't seen that in real Pascal programs:
+   if (m/end\.\s*$/i) {$found_terminating_end = 1;}
+   elsif (m/\S/) {$found_terminating_end = 0;}
+ }
+ close(PASCAL_FILE);
+
+ # Okay, we've examined the entire file looking for clues;
+ # let's use those clues to determine if it's really Pascal:
+
+ if ( ($has_unit || $has_program) && $has_procedure_or_function &&
+     $has_begin && $found_terminating_end)
+          {$is_pascal = 1;}
+
+ $pascal_files{$filename} = $is_pascal; # Store result in cache.
+
+ return $is_pascal;
+}
+
+# Cache which files are php or not.
+# Key is the full file pathname; value is 1 if it is (else 0).
+%php_files = ();
+
+sub really_is_php {
+# Given filename, returns TRUE if its contents really is php.
+
+ my $filename = shift;
+ chomp($filename);
+
+ my $is_php = 0;      # Value to determine.
+ # Need to find a matching pair of surrounds, with ending after beginning:
+ my $normal_surround = 0;  # <?; bit 0 = <?, bit 1 = ?>
+ my $script_surround = 0;  # <script..>; bit 0 = <script language="php">
+ my $asp_surround = 0;     # <%; bit 0 = <%, bit 1 = %>
+
+ # Return cached result, if available:
+ if ($php_files{$filename}) { return $php_files{$filename};}
+
+ open(PHP_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's php.\n";
+ while(<PHP_FILE>) {
+   if (m/\<\?/)                           { $normal_surround |= 1; }
+   if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
+   if (m/\<script.*language="?php"?/i)    { $script_surround |= 1; }
+   if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
+   if (m/\<\%/)                           { $asp_surround |= 1; }
+   if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
+ }
+ close(PHP_FILE);
+
+ if ( ($normal_surround == 3) || ($script_surround == 3) ||
+      ($asp_surround == 3)) {
+   $is_php = 1;
+ }
+
+ $php_files{$filename} = $is_php; # Store result in cache.
+
+ return $is_php;
+}
+
+
+
+sub examine_dir {
+ # Given a file, determine if there are only C++, OBJC, C, or a mixture
+ # in the same directory. Returns "ansic", "cpp", "objc" or "mix"
+ my $filename = shift;
+ chomp($filename);
+ my $dirname = $filename;
+ $dirname =~ s/\/[^\/]*$//;
+ my $saw_ansic_in_dir = 0;
+ my $saw_pc_in_dir = 0;  # ".pc" may mean Oracle C.
+ my $saw_pcc_in_dir = 0;  # ".pc" may mean Oracle C++.
+ my $saw_cpp_in_dir = 0;
+ my $saw_objc_in_dir = 0;
+ opendir(DIR, $dirname) || die "can't opendir $dirname";
+ while ($_ = readdir(DIR)) {
+   chomp;
+   next if (!$_);
+   if (m/\.(cpp|C|cxx|cc)$/ && -f "$dirname/$_") {$saw_cpp_in_dir = 1;}
+   if (m/\.c$/ && -f "$dirname/$_")           {$saw_ansic_in_dir = 1;}
+   if (m/\.pc$/ && -f "$dirname/$_")          {$saw_pc_in_dir = 1;}
+   if (m/\.pcc$/ && -f "$dirname/$_")         {$saw_pcc_in_dir = 1;}
+   if (m/\.m$/ && -f "$dirname/$_" && &really_is_objc($dirname . "/" . $_))
+                                              {$saw_objc_in_dir = 1;}
+   if (($saw_ansic_in_dir + $saw_cpp_in_dir + $saw_objc_in_dir) > 1) {
+    closedir(DIR);
+    return "mix";
+   }
+ }
+ # Done searching; we saw at most one type.
+ if ($saw_ansic_in_dir) {return "c";}
+ elsif ($saw_cpp_in_dir) {return "cpp";}
+ elsif ($saw_objc_in_dir) {return "objc";}
+ elsif ($saw_pc_in_dir && (!$saw_pcc_in_dir)) {return "c";} # Guess "C".
+ elsif ($saw_pcc_in_dir && (!$saw_pc_in_dir)) {return "cpp";} # Guess "C".
+ else {return "mix";}  # We didn't see anything... so let's say "mix".
+}
+
+sub was_generated_automatically() {
+ # Determine if the file was generated automatically.
+ # Use a simple heuristic: check if first few lines have the
+ # phrase "generated automatically", or "automatically generated",
+ # or "do not edit" as the first
+ # words in the line (after possible comment markers and spaces).
+ my $filename = shift;
+
+ if ($autogen_okay) {return 0;};
+
+ chomp($filename);
+ reopen($filename);
+ $i = 15;  # Look at first 15 lines.
+ while (<CODE_FILE>) {
+   if (m/^[\s#\/\*;\-\%]*generated automatically/i ||
+       m/^[\s#\/\*;\-\%]*automatically generated/i ||
+       m/^[\s#\/\*;\-\%]*this is a generated file/i ||     # TeTex uses this.
+       m/^[\s#\/\*;\-\%]*generated with the.*utility/i ||  # TeTex uses this.
+       m/^[\s#\/\*;\-\%]*do not edit/i) {
+     return 1;
+   }
+   $i--;
+   last if $i <= 0;
+ }
+ return 0;
+}
+
+
+# Previous files added, indexed by digest:
+
+%previous_files = ();
+
+$cached_digest = "";
+$cached_digest_filename = "";
+
+sub get_digest {
+ my $filename = shift;
+ # First, check the cache -- did we just compute this?
+ if ($filename eq $cached_digest_filename) {
+   return $cached_digest;  # We did, so here's what it was.
+ }
+
+ my $results = `md5sum "$filename"`;
+ chomp($results);
+ $results =~ s/^\s*//;  # Not needed for GNU Textutils.
+ $results =~ s/[^a-fA-F0-9].*//; # Strip away end.
+ $cached_digest = $results;           # Store in cache.
+ $cached_digest_filename = $filename;
+ return $results;
+}
+
+
+sub already_added {
+ # returns the first file's name with the same contents,
+ # else returns the empty string.
+
+ my $filename = shift;
+ my $digest = &get_digest($filename);
+
+ if ($previous_files{$digest}) {
+   return $previous_files{$digest};
+ } else {
+   return "";
+ }
+}
+
+sub close_lang_lists {
+  my $lang;
+  my $file;
+  while (($lang, $file) = each(%lang_list_files)) {
+    $file->close();  # Ignore any errors on close, there's little we can do.
+  }
+  %lang_list_files = ();
+}
+
+sub force_record_file_type {
+  my ($filename, $type) = @_;
+
+  if (!$type) {die "ERROR! File $filename, type $file_type\n";}
+  if ($type eq "c") {$type = "ansic";};
+  if (!defined($lang_list_files{$type})) {
+    $lang_list_files{$type} = new FileHandle("${dir}/${type}_list.dat", "w") ||
+         die "Could not open ${dir}/${type}_list.dat";
+  }
+  $lang_list_files{$type}->printf("%s\n", $filename);
+}
+
+
+sub record_file_type {
+ my ($filename, $type) = @_;
+ # First check if the file should be auto, dup, or zero - and add there
+ # if so.  Otherwise, add to record of 'type'.
+
+ my $first_filename;
+
+ if (-z $filename) {
+   force_record_file_type($filename, "zero");
+   return;
+ }
+
+ if (&was_generated_automatically($filename)) {
+   force_record_file_type($filename, "auto");
+   return;
+ }
+
+ unless (($duplicates_okay) || ($type eq "not") || ($type eq "unknown")) {
+   $first_filename = &already_added($filename);
+   if ($first_filename) {
+    print "Note: $filename dups $first_filename\n" if $noisy;
+    force_record_file_type("$filename dups $first_filename", "dup");
+    $dup_count++;
+    return;
+   } else { # This isn't a duplicate - record that info, as needed.
+     my $digest = &get_digest($filename);
+     $previous_files{$digest} = $filename;
+     if ($duplistfile) {
+       print DUPLIST "$digest $filename\n";
+     }
+   }
+ }
+
+ force_record_file_type($filename, $type);
+}
+
+
+
+sub file_type_from_contents() {
+ # Determine if file type is a scripting language, and if so, return it.
+ # Returns its type as a string, or the empty string if it's undetermined.
+ my $filename = shift;
+ my $command;
+ chomp($filename);
+ reopen($filename);
+ # Don't do $firstline = <CODE_FILE> here because the file may be binary;
+ # instead, read in a fixed number of bytes:
+ read CODE_FILE, $firstline, 200;
+ return "" if (!$_);
+ chomp($firstline);
+ if (!$_)         {return "";}
+ if (!$firstline) {return "";}
+
+ # Handle weirdness: If there's a ".cpp" file beginning with .\"
+ # then it clearly isn't C/C++... it's a man page.  People who create
+ # and distribute man pages with such filename extensions should have
+ # a fingernail removed, slowly :-).
+ if (($firstline =~ m@^[,.]\\"@) &&
+     $filename =~ m@\.(c|cpp|C|cxx|cc)$@) {return "not";}
+
+
+ if (!($firstline =~ m@^#!@)) {return "";} # No script indicator here.
+
+ # studying $firstline doesn't speed things up, unfortunately.
+
+ # I once used a pattern that only acknowledged very specific directories,
+ # but I found that many test cases use unusual script locations
+ # (to ensure that they're invoking the correct program they're testing).
+ # Thus, we depend on the program being named with postfixed whitespace,
+ # and either begin named by itself or with a series of lowercase
+ # directories ending in "/".
+
+ # I developed these patterns by starting with patterns that appeared
+ # correct, and then examined the output (esp. warning messages) to see
+ # what I'd missed.
+
+ $command = "";
+ if ($firstline =~ m@^#!\s*/(usr/)?bin/env\s+([a-zA-Z0-9\._]+)(\s|\Z)@) {
+  $command = $2;
+ } elsif ($firstline =~ m@^#!\s*([a-zA-Z0-9\/\.]+\/)?([a-zA-Z0-9\._]+)(\s|\Z)@) {
+  $command = $2;
+ }
+
+ if ( ($command =~ m/^(bash|ksh|zsh|pdksh|sh)[0-9\.]*(\.exe)?$/) ||
+     ($firstline =~
+          m~^#!\s*\@_?(SCRIPT_)?(PATH_)?(BA|K)?SH(ELL)?(\d+)?\@?(\s|\Z)~)) {
+    # Note: wish(1) uses a funny trick; see wish(1) for more info.
+    # The following code detects this unusual wish convention.
+    if ($firstline =~ m@exec wish(\s|\Z)@) {
+      return "tcl"; # return the type for wish.
+    }
+    # Otherwise, it's shell.
+    return "sh";
+ }
+ if ( ($command =~ m/^(t?csh\d*)[0-9\.]*(\.exe)?$/) ||
+      ($firstline =~ m@^#!\s*xCSH_PATHx(\s|\Z)@)) {
+    return "csh";
+ } 
+ if ( ($command =~ m/^(mini)?perl[0-9\.]*(\.exe)?$/) ||
+      ($firstline =~ m~^#!\s*\@_?(PATH_)?PERL\d*(PROG)?\@(\s|\Z)~)  ||
+      ($firstline =~ m~^#!\s*xPERL_PATHx(\s|\Z)~)) {
+    return "perl";
+ } 
+ if ($command =~ m/^python[0-9\.]*(\.exe)?$/) {
+    return "python";
+ } 
+ if ($command =~ m/^(tcl|tclsh|bltwish|wish|wishx|WISH)[0-9\.]*(\.exe)?$/) {
+    return "tcl";
+ } 
+ if ($command =~ m/^expectk?[0-9\.]*(\.exe)?$/) { return "exp"; } 
+ if ($command =~ m/^[ng]?awk[0-9\.]*(\.exe)?$/) { return "awk"; } 
+ if ($command =~ m/^sed$/) { return "sed"; } 
+ if ($command =~ m/^guile[0-9\.]*$/) { return "lisp"; } 
+ if ($firstline =~ m@^#!.*make\b@) {  # We'll claim that #! make is a makefile.
+    return "makefile";
+ } 
+ if ($firstline =~ m@^#!\s*\.(\s|\Z)@) {  # Lonely period.
+    return "";  # Ignore the first line, it's not helping.
+ } 
+ if ($firstline =~ m@^#!\s*\Z@) {  # Empty line.
+    return "";  # Ignore the first line, it's not helping.
+ } 
+ if ($firstline =~ m@^#!\s*/dev/null@) {  # /dev/null is the script?!?
+    return "";  # Ignore nonsense ("/dev/null").
+ } 
+ if ($firstline =~ m@^#!\s*/unix(\s|Z)@) {
+    return "";  # Ignore nonsense ("/unix").
+ } 
+ if (($filename =~ m@\.pl$@) || ($filename =~ m@\.pm$@)) {
+    return "";  # Don't warn about files that will be ID'd as perl files.
+ } 
+ if (($filename =~ m@\.sh$@)) {
+    return "";  # Don't warn about files that will be ID'd as sh files.
+ } 
+ if ($firstline =~ m@^#!\s*\S@) {
+    $firstline =~ s/\n.*//s;  # Delete everything after first line.
+    $warning_from_first_line = "WARNING! File $filename has unknown start: $firstline";
+    return "";
+ }
+ return "";
+}
+
+
+sub get_file_type {
+   my $file_to_examine = shift;
+   # Return the given file's type.
+   # It looks at the contents, then the filename, then file extension.
+
+   $warning_from_first_line = "";
+
+   # Skip file names known to not be program files.
+   $basename = $file_to_examine;
+   $basename =~ s!^.*/!!;
+   if ($not_code_filenames{$basename}) {
+     print "Note: Skipping non-program filename: $file_to_examine\n"
+           if $noisy;
+     return "not";
+   }
+
+   # Skip "configure" files if there's a corresponding "configure.in"
+   # file; such a situation suggests that "configure" is automatically
+   # generated by "autoconf" from "configure.in".
+   if (($file_to_examine =~ m!/configure$!) &&
+       (-s "${file_to_examine}.in")) {
+     print "Note: Auto-generated configure file $file_to_examine\n"
+           if $noisy;
+     return "auto";
+   }
+
+   if (($basename eq "lex.yy.c") ||    # Flex/Lex output!
+       ($basename eq "lex.yy.cc") ||   # Flex/Lex output - C++ scanner.
+       ($basename eq "y.code.c") ||    # yacc/bison output.
+       ($basename eq "y.tab.c") ||     # yacc output.
+       ($basename eq "y.tab.h")) {     # yacc output.
+     print "Note: Auto-generated lex/yacc file $file_to_examine\n"
+           if $noisy;
+     return "auto";
+   }
+
+   # Bison is more flexible than yacc -- it can create arbitrary
+   # .c/.h files.  If we have a .tab.[ch] file, with a corresponding
+   # .y file, then it's been automatically generated.
+   # Bison can actually save to any filename, and of course a Makefile
+   # can rename any file, but we can't help that.
+   if ($basename =~ m/\.tab\.[ch]$/) {
+     $possible_bison = $file_to_examine;
+     $possible_bison =~ s/\.tab\.[ch]$/\.y/;
+     if  (-s "$possible_bison") {
+       print "Note: found bison-generated file $file_to_examine\n"
+           if $noisy;
+       return "auto";
+     }
+   }
+
+   # If there's a corresponding ".MASTER" file, treat this file
+   # as automatically-generated derivative.  This handles "exmh".
+   if (-s "${file_to_examine}.MASTER") {
+     print "Note: Auto-generated non-.MASTER file $file_to_examine\n"
+           if $noisy;
+       return "auto";
+   }
+
+   # Peek at first line to determine type.  Note that the file contents
+   # take precedence over the filename extension, because there are files
+   # (such as /usr/src/redhat/BUILD/teTeX-1.0/texmf/doc/mkhtml.nawk)
+   # which have one extension (say, ".nawk") but actually contain 
+   # something else (at least in part):
+   $type = &file_type_from_contents($file_to_examine);
+   if ($type) {
+      return $type;
+   }
+
+   # Use filename to determine if it's a makefile:
+   if (($file_to_examine =~ m/\bmakefile$/i) ||
+        ($file_to_examine =~ m/\bmakefile\.txt$/i) ||
+        ($file_to_examine =~ m/\bmakefile\.pc$/i)) {
+      return "makefile";
+   }
+
+   # Try to use filename extension to determine type:
+   if ($file_to_examine =~ m/\.([^.\/]+)$/) {
+      $type = $1;
+
+      # Is this type known to NOT be a program?
+      if ($not_code_extensions{$type}) {
+         return "not";
+      }
+
+      # Handle weirdness: ".hpp" is a C/C++ header file, UNLESS it's
+      # makefile.hpp (a makefile); see /usr/src/redhat/BUILD,
+      # pine4.21/pine/makefile.hpp and pine4.21/pico/makefile.hpp
+      # Note that pine also includes pine4.21/pine/osdep/diskquot.hpp.
+      # Kaffe uses .hpp for C++ header files.
+      if (($type eq "hpp") && ($file_to_examine =~ m/makefile\.hpp$/))
+            {return "makefile";}
+
+      # If it's a C file but there's a ".pc" or ".pgc" file, then presume that
+      # it was automatically generated:
+      if ($type eq "c") {
+        $pc_name = $file_to_examine;
+        $pc_name =~ s/\.c$/\.pc/;
+        if (-s "$pc_name" ) {
+          print "Note: Auto-generated C file (from .pc file) $file_to_examine\n"
+              if $noisy;
+          return "auto";
+        }
+        $pc_name = $file_to_examine;
+        $pc_name =~ s/\.c$/\.pgc/;
+        if (-s "$pc_name" ) {
+          print "Note: Auto-generated C file (from .pgc file) $file_to_examine\n"
+              if $noisy;
+          return "auto";
+        }
+      }
+
+      # ".pc" is the official extension for Oracle C programs with
+      # Embedded C commands, but many programs use ".pc" to indicate
+      # the "PC" (MS-DOS/Windows) version of a file.
+      # We'll use heuristics to detect when it's not really C,
+      # otherwise claim it's C and move on.
+      if ($type eq "pc") {  # If it has one of these filenames, it's not C.
+         if ($file_to_examine =~ m/\bmakefile\.pc$/i) { return "makefile"; }
+         if   (($file_to_examine =~ m/\bREADME\.pc$/i) ||
+              ($file_to_examine =~ m/\binstall\.pc$/i) ||
+              ($file_to_examine =~ m/\bchanges\.pc$/i)) {return "not";}
+         else {  return "c";}
+      }
+
+      if (defined($file_extensions{$type})) {
+        $type = $file_extensions{$type};
+        if ( (($type eq "exp") && (!&really_is_expect($file_to_examine))) ||
+             (($type eq "tk") && (!&really_is_expect($file_to_examine))) ||
+             (($type eq "objc") && (!&really_is_objc($file_to_examine))) ||
+             (($type eq "lex") && (!&really_is_lex($file_to_examine))) ||
+             (($type eq "pascal") && (!&really_is_pascal($file_to_examine))) ||
+             (($type eq "inc") && (!&really_is_php($file_to_examine))))
+              {$type = "unknown";}
+        if ($type eq "inc") {  $type = "php"; }; # Hey, the .inc is PHP!
+        return $type;
+      }
+
+   }
+  # If we were expecting a script, warn about that.
+  if ($warning_from_first_line) {print "$warning_from_first_line\n";}
+  # Don't know what it is, so report "unknown".
+  return "unknown";
+}
+
+
+
+
+sub convert_h_files {
+ # Determine if the ".h" files we saw are C, OBJC, C++, or a mixture (!)
+ # Usually ".hpp" files are C++, but if we didn't see any C++ files then
+ # it probably isn't.  This handles situations like pine; its has a file
+ # /usr/src/redhat/BUILD/pine4.21/pine/osdep/diskquot.hpp
+ # where the ".hpp" is for HP, not C++.  (Of course, we completely miss
+ # the other files in that pine directory because they have truly bizarre
+ # extensions, but there's no easy way to handle such nonstandard things).
+
+ if (!defined($lang_list_files{"h"})) { return; }
+
+ my $saw_ansic = defined($lang_list_files{"ansic"});
+ my $saw_cpp   = defined($lang_list_files{"cpp"});
+ my $saw_objc  = defined($lang_list_files{"objc"});
+ my $confidence;
+
+ $lang_list_files{"h"}->close();
+
+ open(H_LIST, "<${dir}/h_list.dat") ||  die "Can't reopen h_list\n";
+
+ if ($saw_ansic && (!$saw_cpp) && (!$saw_objc)) {
+     # Only C, let's assume .h files are too
+    while (<H_LIST>) { chomp; force_record_file_type($_, "c"); };
+ } elsif ($saw_cpp && (!$saw_ansic) && (!$saw_objc)) {  # Only C++
+    while (<H_LIST>) { chomp; force_record_file_type($_, "cpp"); };
+ } elsif ($saw_objc && (!$saw_ansic) && (!$saw_cpp)) {  # Only Obj-C
+    while (<H_LIST>) { chomp; force_record_file_type($_, "objc"); };
+ } else {
+   # Ugh, we have a mixture. Let's try to determine what we have, using
+   # various heuristics (looking for a matching name in the directory,
+   # reading the file contents, the contents in the directory, etc.)
+   # When all else fails, assume C.
+   while (<H_LIST>) {
+      chomp;
+      next if (!$_);
+      # print "DEBUG: H file $_\n";
+
+      $h_file = $_;
+      $cpp2_equivalent =
+            $cpp3_equivalent = $cpp4_equivalent = $objc_equivalent = $_;
+      $ansic_equivalent = $cpp_equivalent = $_;
+      $ansic_equivalent =~ s/h$/c/;
+      $cpp_equivalent   =~ s/h$/C/;
+      $cpp2_equivalent  =~ s/h$/cpp/;
+      $cpp3_equivalent  =~ s/h$/cxx/;
+      $cpp4_equivalent  =~ s/h$/cc/;
+      $objc_equivalent   =~ s/h$/m/;
+      if (m!\.hpp$!) { force_record_file_type($h_file, "cpp"); }
+      elsif ( (-s $cpp2_equivalent) ||
+              (-s $cpp3_equivalent) || (-s $cpp4_equivalent))
+              { force_record_file_type($h_file, "cpp"); }
+         # Note: linuxconf has many ".m" files that match .h files,
+         # but the ".m" files are straight C and _NOT_ objective-C.
+         # The following test handles cases like this:
+      elsif ($saw_objc && (-s $objc_equivalent) &&
+              &really_is_objc($objc_equivalent))
+              { &force_record_file_type($h_file, "objc"); }
+      elsif (( -s $ansic_equivalent) && (! -s $cpp_equivalent))
+             { force_record_file_type($h_file, "c"); }
+      elsif ((-s $cpp_equivalent) && (! -s $ansic_equivalent))
+               { force_record_file_type($h_file, "cpp"); }
+      else {
+         $confidence = &looks_like_cpp($h_file);
+         if ($confidence == 2)
+              { &force_record_file_type($h_file, "cpp"); }
+         else {
+           $files_in_dir = &examine_dir($h_file);
+           if ($files_in_dir eq "cpp")
+              { &force_record_file_type($h_file, "cpp"); }
+           elsif ($files_in_dir eq "objc")
+              { &force_record_file_type($h_file, "objc"); }
+           elsif ($confidence == 1)
+              { &force_record_file_type($h_file, "cpp"); }
+           elsif ($h_file =~ m![a-z][0-9]*\.H$!)
+              # Mixed-case filename, .H extension.
+              { &force_record_file_type($h_file, "cpp"); }
+           else  # We're clueless.  Let's guess C.
+              { &force_record_file_type($h_file, "c"); };
+          }
+      }
+   }
+ }  # Done handling ".h" files.
+ close(H_LIST);
+}
+
+
+# MAIN PROGRAM STARTS HERE.
+
+# Handle options.
+while (($#ARGV >= 0) && ($ARGV[0] =~ m/^--/)) {
+  $duplicates_okay = 1 if ($ARGV[0] =~ m/^--duplicates$/); # Count duplicates.
+  $crossdups_okay = 1 if ($ARGV[0] =~ m/^--crossdups$/);   # Count crossdups.
+  $autogen_okay = 1 if ($ARGV[0] =~ m/^--autogen$/);       # Count autogen.
+  $noisy = 1 if ($ARGV[0] =~ m/^--verbose$/);              # Verbose output.
+  if ($ARGV[0] =~ m/^--duplistfile$/) {   # File to get/record dups.
+    shift;
+    $duplistfile = $ARGV[0];
+  }
+  last if ($ARGV[0] =~ m/^--$/);
+  shift;
+}
+
+if ($#ARGV < 0) {
+ print "Error: No directory names given.\n";
+ exit(1);
+}
+
+if ($duplistfile) {
+ if (-e $duplistfile) {
+   open(DUPLIST, "<$duplistfile") || die "Can't open $duplistfile";
+   while (<DUPLIST>) {
+     chomp;
+     ($digest, $filename) = split(/ /, $_, 2);
+     if (defined($digest) && defined($filename)) {
+       $previous_files{$digest} = $filename;
+     }
+   }
+   close(DUPLIST);
+ }
+ open(DUPLIST, ">>$duplistfile") || die "Can't open for writing $duplistfile";
+}
+
+
+while ( $dir = shift ) {
+
+ if (! -d "$dir") {
+   print "Skipping non-directory $dir\n";
+   next;
+ }
+
+ if ($examined_directories{$dir}) {
+   print "Skipping already-examined directory $dir\n";
+   next;
+ }
+ $examined_directories{$dir} = 1;
+
+ if (! open(FILELIST, "<${dir}/filelist")) {
+   print "Skipping directory $dir; it doesn't contain a file 'filelist'\n";
+   next;
+ }
+
+ if (-r "${dir}/all-physical.sloc") {
+   # Skip already-analyzed directories; if it's been analyzed, we've already
+   # broken them down.
+   next;
+ }
+
+ if ($crossdups_okay) {   # Cross-dups okay; forget the hash of previous files.
+   %previous_files = ();
+ }
+
+ # insert blank lines, in case we need to recover from a midway crash
+ if ($duplistfile) {
+   print DUPLIST "\n";
+ }
+
+
+ $dup_count = 0;
+ 
+ while (<FILELIST>) {
+   chomp;
+   $file = $_;
+   next if (!defined($file) || ($file eq ""));
+   $file_type = &get_file_type($file);
+   if ($file_type) {
+     &record_file_type($file, $file_type);
+   } else {
+     print STDERR "WARNING! No file type selected for $file\n";
+   }
+ }
+
+ # Done with straightline processing.  Now we need to determine if
+ # the ".h" files we saw are C, OBJC, C++, or a mixture (!)
+ &convert_h_files();
+
+
+ # Done processing the directory.  Close up shop so we're
+ # ready for the next directory.
+
+ close(FILELIST);
+ close_lang_lists();
+ reopen("");   # Close code file.
+
+ if ($dup_count > 50) {
+  print "Warning: in $dir, number of duplicates=$dup_count\n";
+ }
+
+}
+
+
diff --git a/c_count.c b/c_count.c
new file mode 100644
index 0000000..8581e55
--- /dev/null
+++ b/c_count.c
@@ -0,0 +1,225 @@
+/* c_count: given a list of C/C++/Java files on the command line,
+   count the SLOC in each one.  SLOC = physical, non-comment lines.
+   This program knows about C++ and C comments (and how they interact),
+   and correctly ignores comment markers inside strings.
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+   Usage: Use in one of the following ways:
+     c_count                      # As filter
+     c_count [-f file] [list_of_files]
+       file: file with a list of files to count (if "-", read list from stdin)
+       list_of_files: list of files to count
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+/* Modes */
+#define NORMAL 0
+#define INSTRING 1
+#define INCOMMENT 2
+
+/* Types of comments: */
+#define ANSIC_STYLE 0
+#define CPP_STYLE 1
+
+/* Not all C compilers support a boolean type, so for portability's sake,
+   we'll fake it. */
+#define BOOLEAN int
+#define TRUE 1
+#define FALSE 0
+
+
+/* Globals */
+long total_sloc;
+
+static BOOLEAN warn_embedded_newlines = FALSE;
+
+int peek(FILE *stream) {
+ int c = getc(stream);
+ ungetc(c, stream);
+ return c;
+}
+
+int ispeek(int c, FILE *stream) {
+ if (c == peek(stream)) {return 1;}
+ return 0;
+}
+
+long line_number;
+
+int getachar(FILE *stream) {
+/* Like getchar(), but keep track of line number. */
+ static BOOLEAN last_char_was_newline = 0;
+ int c;
+
+ c = getc(stream); 
+ if (last_char_was_newline) line_number++;
+ if (c == '\n') last_char_was_newline=1;
+ else           last_char_was_newline=0;
+ return c;
+}
+
+
+long sloc_count(char *filename, FILE *stream) {
+ /* Count the sloc in the program in stdin. */
+
+ long sloc = 0;
+
+ int sawchar = 0;                /* Did you see a character on this line? */
+ int c;
+ int mode = NORMAL;              /* NORMAL, INSTRING, or INCOMMENT */
+ int comment_type = ANSIC_STYLE; /* ANSIC_STYLE or CPP_STYLE */
+ 
+
+ /* The following implements a state machine with transitions; the
+    main state is "mode" and "comment_type", the transitions are
+    triggered by characters input. */
+
+ while ( (c = getachar(stream)) != EOF) {
+   if      (mode == NORMAL) {
+     if (c == '"') {sawchar=1; mode = INSTRING;}
+     else if (c == '\'') {  /* Consume single-character 'xxxx' values */
+       sawchar=1;
+       c = getachar(stream);
+       if (c == '\\') c = getachar(stream);
+       do {
+         c = getachar(stream);
+       } while ((c != '\'') && (c != '\n') & (c != EOF));
+     } else if ((c == '/') && ispeek('*', stream)) {
+          c = getachar(stream);
+          mode = INCOMMENT;
+          comment_type = ANSIC_STYLE;
+     } else if ((c == '/') && ispeek('/', stream)) {
+          c = getachar(stream);
+          mode = INCOMMENT;
+          comment_type = CPP_STYLE;
+     } else if (!isspace(c)) {sawchar = 1;}
+   } else if (mode == INSTRING) {
+     /* We only count string lines with non-whitespace -- this is to
+        gracefully handle syntactically invalid programs.
+        You could argue that multiline strings with whitespace are
+        still executable and should be counted. */
+     if (!isspace(c)) sawchar = 1;
+     if (c == '"') {mode = NORMAL;}
+     else if ((c == '\\') && (ispeek('\"', stream) || ispeek('\\', stream))) {c = getachar(stream);}
+     else if ((c == '\\') && ispeek('\n', stream)) {c = getachar(stream);}
+     else if ((c == '\n') && warn_embedded_newlines) {
+       /* We found a bare newline in a string without preceding backslash. */
+       fprintf(stderr, "c_count WARNING - newline in string, line %ld, file %s\n", line_number, filename);
+       /* We COULD warn & reset mode to "Normal", but lots of code does this,
+          so we'll just depend on the warning for ending the program
+          in a string to catch syntactically erroneous programs. */
+     }
+   } else {  /* INCOMMENT mode */
+     if ((c == '\n') && (comment_type == CPP_STYLE)) { mode = NORMAL;}
+     if ((comment_type == ANSIC_STYLE) && (c == '*') &&
+          ispeek('/', stream)) { c= getachar(stream); mode = NORMAL;}
+   }
+   if (c == '\n') {
+     if (sawchar) sloc++;
+     sawchar = 0;
+   }
+ }
+ /* We're done with the file.  Handle EOF-without-EOL. */
+ if (sawchar) sloc++;
+ sawchar = 0;
+ if ((mode == INCOMMENT) && (comment_type == CPP_STYLE)) { mode = NORMAL;}
+
+ if (mode == INCOMMENT) {
+     fprintf(stderr, "c_count ERROR - terminated in comment in %s\n", filename);
+ } else if (mode == INSTRING) {
+     fprintf(stderr, "c_count ERROR - terminated in string in %s\n", filename);
+ }
+
+ return sloc;
+}
+
+
+void count_file(char *filename) {
+  long sloc;
+  FILE *stream;
+
+  stream = fopen(filename, "r");
+  line_number = 1;
+  sloc = sloc_count(filename, stream);
+  total_sloc += sloc;
+  printf("%ld %s\n", sloc, filename);
+  fclose(stream);
+}
+
+char *read_a_line(FILE *file) {
+ /* Read a line in, and return a malloc'ed buffer with the line contents.
+    Any newline at the end is stripped.
+    If there's nothing left to read, returns NULL. */
+
+ /* We'll create a monstrously long buffer to make life easy for us: */
+ char buffer[10000];
+ char *returnval;
+ char *newlinepos;
+
+ returnval = fgets(buffer, sizeof(buffer), file);
+ if (returnval) {
+   newlinepos = buffer + strlen(buffer) - 1;
+   if (*newlinepos == '\n') {*newlinepos = '\0';};
+   return strdup(buffer);
+ } else {
+   return NULL;
+ }
+}
+
+
+int main(int argc, char *argv[]) {
+ long sloc;
+ int i;
+ FILE *file_list;
+ char *s;
+
+ total_sloc = 0;
+ line_number = 1;
+
+ if (argc <= 1) {
+   sloc = sloc_count("-", stdin);
+   printf("%ld %s\n", sloc, "-");
+   total_sloc += sloc;
+ } else if ((argc == 3) && (!strcmp(argv[1], "-f"))) {
+   if (!strcmp (argv[2], "-")) {
+     file_list = stdin;
+   } else {
+     file_list = fopen(argv[2], "r");
+   }
+   if (file_list) {
+     while ((s = read_a_line(file_list))) {
+       count_file(s);
+       free(s);
+     }
+   }
+ } else {
+   for (i=1; i < argc; i++) { count_file(argv[i]); }
+ }
+ printf("Total:\n");
+ printf("%ld\n", total_sloc);
+ return 0; /* Report success */
+}
+
diff --git a/c_lines_environment.dat b/c_lines_environment.dat
new file mode 100644
index 0000000..e4a99a0
--- /dev/null
+++ b/c_lines_environment.dat
@@ -0,0 +1,98 @@
+         Temporary Project Name              (* Project_Name,in 45 spaces    *)
+0                                            (* QA_Switch                    *)
+1                                            (* Compare_Spec                 *)
+999                                          (* Line_Length                  *)
+1000                                         (* Exec_Lines                   *)
+500                                          (* Data_Lines                   *)
+60.0                                         (* Min_Percent                  *)
+0.0                                          (* Inc_Percent                  *)
+0                                            (* Display_File                 *)
+0                                            (* Intro_Msg                    *)
+P                                            (* SLOC_Def                     *)
+(*---------------------------------------------------------------------------*)
+(*                                                                           *)
+(*  Refer to the source code file, 'c_lines.c', for further information      *)
+(* pertaining to the INSTALLATION PROCEDURES and EXECUTION PROCEDURES of     *)
+(* this code counting tool.                                                  *)
+(*                                                                           *)
+(* Note:                                                                     *)
+(*   1. The above user-defined parameters must be spaced one entry per line  *)
+(*      of this file.  Numeric entries, with the exception of 'Inc_Percent', *)
+(*      are of type Integer.                                                 *)
+(*                                                                           *)
+(*   2. The 'c_lines_environment.dat' file must be co-located in the         *)
+(*      directory/path whereas the code counting tool is to be invoked.      *)
+(*      Failure to do so will result in the insertion of predefined default  *)
+(*      values for the entries contained herein.                             *)
+(*                                                                           *)
+(*---------------------------------------------------------------------------*)
+(*                                                                           *)
+(* USER DEFINEABLE PARAMETERS                                                *)
+(*                                                                           *)
+(*   Project_Name -- Allows the user to insert the name of the Program or    *)
+(*                   Project that the source code to be counted pertains.    *)
+(*                   The Project_Name will appear within at the headings of  *)
+(*                   of the 'c_outfile.dat' file produced upon execution     *)
+(*                   of the 'c_lines' code counting tool.                    *)
+(*                                                                           *)
+(*   QA_Switch    -- Allows the user to turn on '1' or to turn off '0' the   *)
+(*                   reporting of programming language reserve word usage    *)
+(*                   as found in the summary page of 'c_outfile.dat'.        *)
+(*                                                                           *)
+(*   Compare_Spec -- Allows the user to control the case sensitivity of the  *)
+(*                   code counting tool.  A setting of '1' indicates that    *)
+(*                   full case sensitive comparisons must be made.  A setting*)
+(*                   of '0' allows valid comparisons to occur between like   *)
+(*                   letters of upper and lower case.                        *)
+(*                                                                           *)
+(*   Line_Length  -- Allows user to force the code counting tool to ignore   *)
+(*                   information beyond 'Line_Length' characters per physical*)
+(*                   line of input.  It is recommended that the length of    *)
+(*                   the longest physical line to be read be used, i.e. 132. *)
+(*                                                                           *)
+(*   Exec_Lines   -- Allows the user to set a threshold whereby the number   *)
+(*                   of files processed with executable lines in exceedance  *)
+(*                   of 'Exec_Lines' will be reported on the summary page of *)
+(*                   'c_outfile.dat'.                                        *)
+(*                                                                           *)
+(*   Data_Lines   -- Allows the user to set a threshold whereby the number   *)
+(*                   of files processed with data declaration lines in       *)
+(*                   exceedance of 'Data_Lines' will be reported on the      *)
+(*                   summary page of 'c_outfile.dat'.                        *)
+(*                                                                           *)
+(*   Min_Percent  -- Allows the user to set a threshold whereby the number   *)
+(*                   of files processed with a ratio of comments (whole &    *)
+(*                   embedded) to SLOC (physical or logical) is less than    *)
+(*                   'Min_Percent'.                                          *)
+(*                                                                           *)
+(*   Inc_Percent  -- Allows the user to set a progress increment whereby a   *)
+(*                   progress message will appear on the terminal screen     *)
+(*                   during execution of the 'c_lines' tool.  The progress   *)
+(*                   message indicates that approximately 'Inc_Percent' of   *)
+(*                   source code files to be processed have completed since  *)
+(*                   the previous progress message appeared.  The progress   *)
+(*                   reporting is based solely on the number of files        *)
+(*                   contained in 'c_list.dat'.  Actual run-time progress    *)
+(*                   is dependent on the relative size of each source code   *)
+(*                   file and the user loading of the host platform machine. *)
+(*                   A setting of 0.0 will disable the reporting of the      *)
+(*                   progress message.                                       *)
+(*                                                                           *)
+(*   Display_File -- Allows the user to turn on '1' or to turn off '0' the   *)
+(*                   reporting of last file to be processed within the       *)
+(*                   c_list.dat file.                                        *)
+(*                                                                           *)
+(*   Intro_Msg    -- Allows the user to turn on '1' or to turn off '0' the   *)
+(*                   output of the introduction message as the first page    *)
+(*                   of the 'c_outfile.dat' file.                            *)
+(*                                                                           *)
+(*   SLOC_Def     -- Allows the user to select the defintion of a Source     *)
+(*                   Line of Code (SLOC) to be used during the operation of  *)
+(*                   the CodeCount tool.  A setting of 'P' envokes the SLOC  *)
+(*                   definition of Physical lines, a.k.a., non-comment,      *)
+(*                   non-blank, physical lines of code or Deliverable Source *)
+(*                   Instructions (DSIs).  A setting of 'L' envokes the SLOC *)
+(*                   definition of Logical lines, a.k.a., non-comment,       *)
+(*                   non-blank, logical lines of code.                       *)
+(*                                                                           *)
+(*---------------------------------------------------------------------------*)
diff --git a/c_outfile.dat b/c_outfile.dat
new file mode 100644
index 0000000..7d60dbc
--- /dev/null
+++ b/c_outfile.dat
@@ -0,0 +1 @@
+ERROR, unable to read c_list.dat file
diff --git a/cobol_count b/cobol_count
new file mode 100755
index 0000000..adda598
--- /dev/null
+++ b/cobol_count
@@ -0,0 +1,82 @@
+#!/usr/bin/perl
+# cobol_count - count physical lines of code.
+# Usage: cobol_count [-f file] [list_of_files]
+#  file: file with a list of files to count (if "-", read list from stdin)
+#  list_of_files: list of files to count
+#  -f file or list_of_files can be used, or both
+# This is a trivial/naive program for scripts, etc.
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+  # Yes, we have -f
+  if ($ARGV[1] eq "-") {
+    # The list of files is in STDIN
+    while (<STDIN>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+  } else {
+    # The list of files is in the file $ARGV[1]
+    open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $filewithlist\n";
+    while (<FILEWITHLIST>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+    close FILEWITHLIST;
+  }
+  shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+  &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+  my ($file) = @_;
+  my $sloc = 0;
+  my $free_format = 0;  # Support "free format" source code.
+
+  open (FILE, $file);
+  while (<FILE>) {
+    if (m/^......\$.*SET.*SOURCEFORMAT.*FREE/i) {$free_format = 1;}
+    if ($free_format) {
+      if (m/^\s*[\*\/]/) { }  # Saw a comment.
+      elsif (m/\S/) {$sloc++;} # Saw a non-whitespace, it's SLOC.
+    } else {
+      if (m/^......[\*\/]/) {}  # Saw a comment - marked in indicator area.
+      elsif (m/^[\*\/]/) {}     # Saw a comment.
+      elsif (m/^........*\S/) {$sloc++;} # Saw a non-whitespace, it's SLOC.
+    }
+  }
+  print "$sloc $file\n";
+  $total_sloc += $sloc;
+  close (FILE);
+}
diff --git a/compute_all b/compute_all
new file mode 100755
index 0000000..20d5dcd
--- /dev/null
+++ b/compute_all
@@ -0,0 +1,87 @@
+#!/bin/sh
+# Computes filecounts and SLOC counts in the listed data directories
+# if the don't already exist.
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+if [ "$#" -eq 0 ]
+then
+  echo "Error: You must provide a list of directories to examine."
+  exit 1
+fi
+
+starting_dir=`pwd`
+
+for dir
+do
+ if [ -d "$dir" -a -r "${dir}/filelist" ]
+ then
+  cd "$dir"
+
+  if [ ! -r all.filecount ]
+  then
+    # Create all.filecount and all-physical.sloc; create them in
+    # separate files, so that we can safely restart if it stops in the middle:
+    > all.filecount.new
+    > all-physical.sloc.new
+    for listfile in *_list.dat
+    do
+     language=`echo $listfile | sed -e 's/_list\.dat$//'`
+
+     # Skip language "*" - this happens if there are NO source
+     # files in the given directory.
+     if [ "$language" = "*" ]; then
+       continue
+     fi
+
+     # Skip language "h" - it's counted in the ansic, cpp, and objc lists.
+     if [ "$language" = "h" ]; then
+       continue
+     fi
+
+     numfiles=`wc -l < $listfile | tr -d " "`
+     echo "$language	$numfiles" >> all.filecount.new
+
+     # Ignore certain "languages" when counting SLOC:
+     case "$language"
+     in
+       not) true ;;
+       unknown) true ;;
+       zero) true ;;
+       dup) true ;;
+       auto) true ;;
+       *)
+         numsloc=`compute_sloc_lang $language "." | tr -d " "`
+         echo "$language	$numsloc" >> all-physical.sloc.new
+       ;;
+     esac
+    done
+    mv all.filecount.new all.filecount
+    mv all-physical.sloc.new all-physical.sloc
+  fi
+
+  cd "$starting_dir"
+ fi
+done
+
diff --git a/compute_c_usc b/compute_c_usc
new file mode 100755
index 0000000..96ec59c
--- /dev/null
+++ b/compute_c_usc
@@ -0,0 +1,77 @@
+#!/bin/sh
+# Computes C and C++ code sizes for the list of directories given;
+# each directory must contain ansic_list.dat and cpp_list.dat.
+# 
+# Change the following if it's the wrong place:
+
+C_DATA_ENV_FILE="/home/dwheeler/sloc/bin/c_lines_environment.dat"
+
+# Unfortunately, USC's code fails when c_list.dat is 0-length,
+# so we work around it.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+
+starting_dir=`pwd`
+
+for dir in $@
+do
+ if [ -d "$dir" -a -r "${dir}/filelist" ]
+ then
+  # ??? DEBUG: print out the name of each directory.
+  echo " $dir"
+  cd $dir
+  cp $C_DATA_ENV_FILE .
+
+  if [ -s ansic_list.dat ]
+  then
+   rm -f c_list.dat
+   ln -s ansic_list.dat c_list.dat
+   c_lines
+   mv c_outfile.dat ansic_outfile.dat.usc
+   extract-count < ansic_outfile.dat.usc
+   mv logical.sloc ansic-logical.sloc
+   mv physical.sloc ansic-physical.sloc.usc
+  else
+   echo 0 > ansic-logical.sloc
+   echo 0 > ansic-physical.sloc.usc
+  fi
+
+  if [ -s cpp_list.dat ]
+  then
+   rm -f c_list.dat
+   ln -s cpp_list.dat c_list.dat
+   c_lines
+   mv c_outfile.dat cpp_outfile.dat.usc
+   extract-count < cpp_outfile.dat.usc
+   mv logical.sloc cpp-logical.sloc
+   mv physical.sloc cpp-physical.sloc.usc
+  else
+   echo 0 > cpp-logical.sloc
+   echo 0 > cpp-physical.sloc.usc
+  fi
+
+  cd $starting_dir
+ fi
+done
diff --git a/compute_java_usc b/compute_java_usc
new file mode 100755
index 0000000..a0ffb25
--- /dev/null
+++ b/compute_java_usc
@@ -0,0 +1,59 @@
+#!/bin/sh
+# Computes Java sloc in the listed directories.
+# each directory must contain ansic_list.dat and cpp_list.dat.
+
+# Change the following if it's the wrong place:
+
+JAVA_DATA_ENV_FILE="/home/dwheeler/sloc/bin/java_lines_environment.dat"
+
+# Unfortunately, USC's code fails when c_list.dat is 0-length,
+# so we work around it.
+
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+
+starting_dir=`pwd`
+
+for dir in $@
+do
+ if [ -d "$dir" -a -r "${dir}/filelist" ]
+ then
+  cd $dir
+  cp $JAVA_DATA_ENV_FILE .
+
+  if [ -s java_list.dat ]
+  then
+   java_lines
+   extract-count < java_outfile.dat
+   mv logical.sloc java-logical.sloc
+   mv physical.sloc java-physical.sloc
+  else
+   echo 0 > java-logical.sloc
+   echo 0 > java-physical.sloc
+  fi
+
+  cd $starting_dir
+ fi
+done
diff --git a/compute_sloc_lang b/compute_sloc_lang
new file mode 100755
index 0000000..df635f7
--- /dev/null
+++ b/compute_sloc_lang
@@ -0,0 +1,66 @@
+#!/bin/sh
+# Computes sloc in the listed directories.
+# first parameter = language.
+# Creates the "outfile", and prints the total.
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+language="$1"
+shift
+
+starting_dir=`pwd`
+
+for dir in "$@"
+do
+ if [ -d "$dir" -a -r "${dir}/filelist" ]
+ then
+  cd "$dir"
+
+  if [ -s ${language}_list.dat ]
+  then
+   case "$language" in
+     ansic)  use_c_count="y" ;;
+     cpp)    use_c_count="y" ;;
+     java)   use_c_count="y" ;;
+     yacc)   use_c_count="y" ;;
+     cs)     use_c_count="y" ;;
+     *)      use_c_count="n" ;;
+   esac
+
+   case "$use_c_count" in
+     y) c_count -f ${language}_list.dat > ${language}_outfile.dat ;;
+     *) ${language}_count -f ${language}_list.dat > ${language}_outfile.dat
+        ;;
+   esac
+   tail -1 < ${language}_outfile.dat
+
+  else
+   rm -f ${language}_outfile.dat
+   echo 0
+  fi
+
+  cd "$starting_dir"
+ fi
+done
+
diff --git a/count_extensions b/count_extensions
new file mode 100755
index 0000000..1547d3f
--- /dev/null
+++ b/count_extensions
@@ -0,0 +1,56 @@
+#!/usr/bin/perl -w
+
+# Read from standard input a list of filenames, and
+# report a sorted list of extensions and filenames
+# (most common ones first).
+
+# The format is "name count", where "count" is the number of appearances.
+# "name" usually begins with a "." followed by the name of the extension.
+# In the case where the filename has no extension, the name begins with "/"
+# followed by the entire basename.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+%extensions = ();
+
+while (<>) {
+   if (m/\.([^.\/]+)$/) {
+      $type = $1;
+      chomp($type);
+      $type = ".$type";
+      if (defined($extensions{$type})) { $extensions{$type}++; }
+      else                             { $extensions{$type} = 1; }
+   } elsif (m!/([^/]+)$!) {
+      $filename = $1;
+      chomp($filename);
+      $filename = "/$filename";
+      if (defined($extensions{$filename})) { $extensions{$filename}++; }
+      else                                 { $extensions{$filename} = 1; }
+   }
+}
+
+foreach $entry (sort {$extensions{$b} <=> $extensions{$a}} keys %extensions) {
+  print "${entry}  $extensions{$entry}\n";
+}
+
diff --git a/count_unknown_ext b/count_unknown_ext
new file mode 100755
index 0000000..cf18647
--- /dev/null
+++ b/count_unknown_ext
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+# This reports a sorted list of the "unknown" file extensions
+# analyzed by a previous run of SLOCCount, most common first.
+# Use this to make sure that there isn't a common language type
+# that you are NOT counting.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+
+
+find ${HOME}/.slocdata -name 'unknown_list.dat' -exec cat {} \; | \
+   count_extensions  | less
+
diff --git a/csh_count b/csh_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/csh_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+generic_count '#' $@
+
diff --git a/dirmatch b/dirmatch
new file mode 100755
index 0000000..abe8d49
--- /dev/null
+++ b/dirmatch
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+# Dirmatch - take in standard input a list of directory name patterns,
+# then print the matches of the directory names from each item in the list.
+
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+
+while read line
+do
+ echo
+ echo "=== $line ==="
+ ls -1 | grep -i "$line"
+done
+ 
diff --git a/driver.c b/driver.c
new file mode 100644
index 0000000..1e5159e
--- /dev/null
+++ b/driver.c
@@ -0,0 +1,110 @@
+/* driver: given a list of files on the command line,
+   count the SLOC in each one.
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+*/
+
+/* This is only included so that I can do some kinds of analysis
+ * separately on this file; normally this file is itself included: */
+#include "driver.h"
+
+
+
+void sloc_count(char *current_filename, FILE *stream) {
+ /* Count the sloc in the one file named "current_filename" in "stream",
+  * and add it to the total_sloc. */
+
+ filename = current_filename;
+ sloc = 0;
+ line_number = 1;
+ yyin = stream;
+
+ yylex();
+
+ total_sloc += sloc;
+}
+
+
+void count_file(char *current_filename) {
+  FILE *stream;
+
+  stream = fopen(current_filename, "r");
+  if (!stream) {
+    sloc = 0;
+    fprintf(stderr, "Error: Cannot open %s\n", current_filename);
+    return;
+  }
+  sloc_count(current_filename, stream);
+  printf("%ld %s\n", sloc, current_filename);
+  fclose(stream);
+}
+
+char *read_a_line(FILE *file) {
+ /* Read a line in, and return a malloc'ed buffer with the line contents.
+    Any newline at the end is stripped.
+    If there's nothing left to read, returns NULL. */
+
+ /* We'll create a monstrously long buffer to make life easy for us: */
+ char buffer[10000];
+ char *returnval;
+ char *newlinepos;
+
+ returnval = fgets(buffer, sizeof(buffer), file);
+ if (returnval) {
+   newlinepos = buffer + strlen(buffer) - 1;
+   if (*newlinepos == '\n') {*newlinepos = '\0';};
+   return strdup(buffer);
+ } else {
+   return NULL;
+ }
+}
+
+
+int main(int argc, char *argv[]) {
+ int i;
+ char *s;
+ FILE *file_list = NULL;
+
+ total_sloc = 0;
+
+ if (argc <= 1) {
+   sloc_count("-", stdin);
+   printf("%ld %s\n", sloc, "-");
+ } else if ((argc == 3) && (!strcmp(argv[1], "-f"))) {
+   if (!strcmp (argv[2], "-")) {
+     file_list = stdin;
+   } else {
+     file_list = fopen(argv[2], "r");
+   }
+   if (file_list) {
+     while ((s = read_a_line(file_list))) {
+       count_file(s);
+       free(s);
+     }
+   }
+ } else {
+   for (i=1; i < argc; i++) { count_file(argv[i]); }
+ }
+ printf("Total:\n");
+ printf("%ld\n", total_sloc);
+ return 0; /* Report success */
+}
diff --git a/driver.h b/driver.h
new file mode 100644
index 0000000..ddeb331
--- /dev/null
+++ b/driver.h
@@ -0,0 +1,50 @@
+/* driver: given a list of files on the command line,
+   count the SLOC in each one.
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+*/
+
+#ifndef DRIVER_H
+#define DRIVER_H
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+/* Not all C compilers support a boolean type, so for portability's sake,
+   we'll fake it. */
+#define BOOLEAN int
+#define TRUE 1
+#define FALSE 0
+
+
+/* Globals */
+unsigned long sloc;           /* For current file */
+unsigned long line_number;    /* Of current file */
+char *filename;               /* Name of current file */
+
+unsigned long total_sloc;     /* For all files seen */
+
+
+
+#endif
diff --git a/exp_count b/exp_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/exp_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+generic_count '#' $@
+
diff --git a/extract-count b/extract-count
new file mode 100755
index 0000000..548b261
--- /dev/null
+++ b/extract-count
@@ -0,0 +1,83 @@
+#!/usr/bin/perl
+
+# Given USC output as standard input, find the # of physical and logical SLOC, and save in
+# "physical.sloc" and "logical.sloc".
+
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+$found =0;
+
+while (<>) {
+  if (m/^The Totals/) {
+    $found = 1;
+    last;
+  } 
+}
+
+if (!$found) {
+  die "FAILED to find the totals section in code output.\n";
+}
+
+while (<>) {
+  # DEBUG: print "Read line: $_\n";
+  if (m/Physical/ || m/Logical/) {
+    s/^ *//;
+    ($total, $blank, $whole, $embedded, $compiler, $datadecl, $execinstruction,
+     $number_of_files, $sloc, $file_type, $sloc_definition ) =   split(/[ \|]+/);
+    # DEBUG: print "Found match; file_type='${file_type}', sloc_definition='${sloc_definition}'\n";
+    if ($file_type =~ m/code/i) {
+       if ($sloc_definition =~ m/Physical/i) {
+              `echo $sloc > physical.sloc`
+       }
+       if ($sloc_definition =~ m/Logical/i) {
+              `echo $sloc > logical.sloc`
+       }
+    } elsif ($file_type =~ m/DATA/i) {
+       if ($number_of_files > 0) {
+         print STDERR "WARNING! NONZERO NUMBER OF DATA FILES!\n";
+         $pwd = `pwd`;
+         chomp($pwd);
+         print STDERR "Extract-count in directory ${pwd}.\n";
+         # The mere existence of this file is reason to check it out:
+         `echo $number_of_files > data.count`
+       }
+    }
+  } 
+}
+
+
+__END__
+
+Here's a sample output (the beginning chopped off):
+
+                                    Temporary Project Name              
+The Totals
+   Total   Blank |      Comments     |  Compiler  Data    Exec.  |  Number  |          File  SLOC
+   Lines   Lines |   Whole  Embedded |  Direct.   Decl.   Instr. | of Files |   SLOC   Type  Definition
+------------------------------------------------------------------------------------------------------------------------------------
+ 1938455  359776 |  146182    164828 |        0   12359  1420138 |   3172   | 1432497  CODE  Physical
+ 1938455  359776 |  146182    164828 |        0    6507   613235 |   3172   |  619742  CODE  Logical
+       0       0 |       0         0 |        0       0        0 |      0   |       0  DATA  Physical
+
diff --git a/extract_license b/extract_license
new file mode 100755
index 0000000..bde556e
--- /dev/null
+++ b/extract_license
@@ -0,0 +1,178 @@
+#!/usr/bin/perl
+# extract_license
+# Determine the license of a program, given 2 parameters:
+# (1) the directory containing the program's source code.
+# (2) the RPM spec file (which may be /dev/null)
+
+# This "regularizes" license names.  For example,
+# BSD-style, BSDish, and BSD-like all become "BSD-like".
+# License names "sentence capitalization", e.g., "Freely distributable".
+# It also fixes a lot of errors in Red Hat spec files.
+
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+$program_dir = shift;
+$rpm_spec = shift;
+
+$license = $copyright = "";
+
+%all_licenses = ();
+
+
+sub read_license_file() {
+ my $filename = shift(@_);
+ my $license = "";
+ if ((-s $filename) && open(LICENSE_FILE, "<$filename")) {
+    # TODO: detect even more licenses automatically.
+    # It'd hard to detect BSD/MIT licenses,
+    # because these licenses make changes in the MIDDLE of their text.
+    # Thus, it's hard to avoid falsely detecting "almost" licenses.
+    # For example, ipf has license text that looks like a BSD/MIT license,
+    # but it's not even open source.
+    # However, we CAN detect many other kinds, so let's at least do that.
+    for ($i=1; $i < 9; $i++) {
+      $line = <LICENSE_FILE>;
+      if ($line =~ m/GNU GENERAL PUBLIC LICENSE/i) {$license = "GPL";}
+      elsif ($line =~ m/GNU LIBRARY GENERAL PUBLIC LICENSE/i) {$license = "LGPL";}
+      elsif ($line =~ m/GNU LESSER GENERAL PUBLIC LICENSE/i) {$license = "LGPL";}
+      elsif ($line =~ m/Mozilla PUBLIC LICENSE/i) {$license = "MPL";}
+      elsif ($line =~ m/Netscape PUBLIC LICENSE/i) {$license = "NPL";}
+      elsif ($line =~ m/IBM PUBLIC LICENSE/i) {$license = "IBM Public License";}
+      elsif ($line =~ m/\bApache Software License\b/i) {$license = "Apache";}
+      elsif ($line =~ m/\bThe "Artistic License"/i) {$license = "Artistic";}
+    }
+    close(LICENSE_FILE);
+  }
+  return $license;
+}
+
+sub add_license() {
+ # Add to license list "all_licenses" the license in the given file, if one.
+ my $filename = shift(@_);
+ my $license = &read_license_file($filename);
+ if ($license) { $all_licenses{$license} = 1; }
+}
+
+open(RPM_SPEC, "<$rpm_spec");
+
+while (<RPM_SPEC>) {
+ if (/^Copyright:(.*)/i) {$copyright=$1;}
+ if (/^License:(.*)/i)       {$license=$1;}
+}
+close(RPM_SPEC);
+
+if (! $license) {
+  $license = $copyright;
+}
+
+# print "GOT: $license\n";
+
+if ( $license ) {
+  $_ = $license;
+
+  # Remove extraneous material in the middile of the license text.
+  s/ \(see: [^)]*\)//;  # Delete parenthetical see: references.
+  s/, ?no warranties//;  # "No warranties" not important for our purposes.
+  s/See COPYRIGHT file//i;
+  s/\b,?URW holds copyright\b//i;
+
+  # Clean up front and back.
+  s/^\s*//;
+  s/[ \t\.]*$//;  # Delete trailing periods and blanks.
+
+  $_ = ucfirst($_);  # Uppercase first character. Remove this line if need to.
+
+  if (/^GPL2?$/i || /^GNU$/ || /^GNU ?GPL *(Version 2)?$/i) {$_ = "GPL"};
+  if (/^Apache ?Group License$/i) {$_ = "Apache"};
+  if (/^Apacheish$/i || /^Apache-style$/i) {$_ = "Apache-like"};
+  if (/^Artistic$/i) {$_ = "Artistic"};
+  if (/^BSD$/i) {$_ = "BSD"};
+  if (/^BSDish$/i || /^BSD-style$/i || /^BSD-like$/) {$_ = "BSD-like"};
+  if (/^Distributable$/i) {$_ = "Distributable"};
+  if (/^Distributable ?\(BSD-like\)$/i) {$_ = "BSD-like"};
+  if (/^Freely ?Distributable$/i) {$_ = "Freely distributable"};
+  if (/^Free,no warranties.?$/i) {$_ = "Free"};
+  if (/^freeware. See COPYRIGHT file.?$/i) {$_ = "Free"};
+  if (/^freeware.?$/i) {$_ = "Free"};
+  if (/^GPLand Artistic$/i) {$_ = "GPL and Artistic"};
+  if (/^GPL ?or BSD$/i) {$_ = "GPL or BSD"};
+  if (/^GPL\/XFree86$/i) {$_ = "GPL/MIT"};
+  if (/^distributable- most of it GPL$/i) {$_ = "Distributable - mostly GPL"};
+  if (/^IBM ?Public License Version 1.0 -/i) {$_ = "IBM Public License"};
+  if (/^IBM ?Public License$/i) {$_ = "IBM Public License"};
+  if (/^MIT, ?freely distributable/i) {$_ = "MIT"};
+  if (/^MIT\/X ?Consortium$/i) {$_ = "MIT"};
+  if (/^Non[- ]commercial[- ]use[- ]only$/i) {$_ = "Non-commercial use only"};
+  if (/^Proprietary$/i) {$_ = "Proprietary"};
+  if (/^Public ?domain$/i) {$_ = "Public domain"};
+  if (/^Universityof Washington's Free-Fork License$/i)
+         {$_ = "U of Washington's Free-Fork License"};
+  if (/^W3CCopyright \(BSD like\)$/i) {$_ = "BSD-like"};
+  if (/^X ?Consortium[ -]?like$/i) {$_ = "MIT-like"};
+  if (/^XFree86$/i) {$_ = "MIT"};
+  if (/^W3C Copyright \(BSD[- ]like\)$/i) {$_ = "BSD-like"};
+
+  # Eliminate license if it isn't really a license.
+  if (/^2000Red Hat, Inc.?$/i) {$_ = ""};
+  if (/^OMRON ?Corporation, OMRON Software Co., Ltd.?$/i) {$_ = ""};
+  if (/^Copyright\s?.?\s?[1-9][0-9][0-9][0-9]/i) {$_ = ""}; # Not a license.
+  if (/^\(C\)\s?[1-9][0-9][0-9][0-9]/i) {$_ = ""}; # Not a license.
+  if (/^[1-9][0-9][0-9][0-9]\s/i) {$_ = ""}; # A date, not a license.
+
+  $license = $_;
+}
+
+if ($license) {
+  print $license;
+} else {
+  # The spec file didn't tell us anything.  Let's look for files that tell us.
+  &add_license("${program_dir}/LICENSE");
+  &add_license("${program_dir}/COPYING");
+  &add_license("${program_dir}/COPYING.LIB");
+  &add_license("${program_dir}/Artistic");
+  &add_license("${program_dir}/COPYING-2.0");
+  &add_license("${program_dir}/COPYING.WTFPL");
+  &add_license("${program_dir}/COPYING.GPL");
+  &add_license("${program_dir}/COPYING.NEWLIB");
+  &add_license("${program_dir}/COPYING.kdb");
+  if (-s "${program_dir}/COPYING.BSD") {  # Assume there's a BSD license.
+     $all_licenses{"BSD"} = 1;
+  }
+  if (-s "${program_dir}/COPYING.MIT") {  # Assume there's an MIT license.
+     $all_licenses{"MIT"} = 1;
+  }
+
+  if (%all_licenses) {
+     $license = "";
+     foreach $license_fragment (sort(keys(%all_licenses))) {
+       $license .= "${license_fragment}, "
+     }
+     $license =~ s/, $//;
+     print $license;
+  }
+
+}
+print "\n";
+
diff --git a/f90_count b/f90_count
new file mode 100755
index 0000000..e618493
--- /dev/null
+++ b/f90_count
@@ -0,0 +1,81 @@
+#!/usr/bin/perl
+# f90_count - count physical lines of code in Fortran 90.
+# Usage: f90_count [-f file] [list_of_files]
+#  file: file with a list of files to count (if "-", read list from stdin)
+#  list_of_files: list of files to count
+#  -f file or list_of_files can be used, or both
+
+# Ignores comment-only lines (where first nonblank character = !).
+# Lines beginning with !hpf$ or !omp$ are not comments lines.
+
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+  # Yes, we have -f
+  if ($ARGV[1] eq "-") {
+    # The list of files is in STDIN
+    while (<STDIN>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+  } else {
+    # The list of files is in the file $ARGV[1]
+    open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+    while (<FILEWITHLIST>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+    close FILEWITHLIST;
+  }
+  shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+  &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+  my ($file) = @_;
+  my $sloc = 0;
+
+  open (FILE, $file);
+  while (<FILE>) {
+    # a comment is              m/^\s*!/
+    # an empty line is          m/^\s*$/
+    # a HPF statement is        m/^\s*!hpf\$/i
+    # an Open MP statement is   m/^\s*!omp\$/i
+    if (! m/^(\s*!|\s*$)/ || m/^\s*!(hpf|omp)\$/i) {$sloc++;}
+  }
+  print "$sloc $file\n";
+  $total_sloc += $sloc;
+  $sloc = 0;
+  close (FILE);
+}
diff --git a/fortran_count b/fortran_count
new file mode 100755
index 0000000..4df1f32
--- /dev/null
+++ b/fortran_count
@@ -0,0 +1,83 @@
+#!/usr/bin/perl
+# fortran_count - count physical lines of code in Fortran 77.
+# Usage: fortran_count [-f file] [list_of_files]
+#  file: file with a list of files to count (if "-", read list from stdin)
+#  list_of_files: list of files to count
+#  -f file or list_of_files can be used, or both
+
+# Ignores comment-only lines
+# (where column 1 character = C, c, *, or !,
+#  or where a ! is preceded only by white space)
+# Lines beginning with !hpf$ or !omp$ are not comments lines either.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+  # Yes, we have -f
+  if ($ARGV[1] eq "-") {
+    # The list of files is in STDIN
+    while (<STDIN>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+  } else {
+    # The list of files is in the file $ARGV[1]
+    open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+    while (<FILEWITHLIST>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+    close FILEWITHLIST;
+  }
+  shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+  &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+  my ($file) = @_;
+  my $sloc = 0;
+
+  open (FILE, $file);
+  while (<FILE>) {
+    # a normal comment is       m/^[c*!]/i
+    # a fancier comment is      m/^\s+!/i
+    # an empty line is          m/^\s*$/i
+    # a HPF statement is        m/^[c*!]hpf\$/i
+    # an Open MP statement is   m/^[c*!]omp\$/i
+    if (! m/^([c*!]|\s+!|\s*$)/i || m/^[c*!](hpf|omp)\$/i) {$sloc++;}
+  }
+  print "$sloc $file\n";
+  $total_sloc += $sloc;
+  $sloc = 0;
+  close (FILE);
+}
diff --git a/generic_count b/generic_count
new file mode 100755
index 0000000..e4178eb
--- /dev/null
+++ b/generic_count
@@ -0,0 +1,77 @@
+#!/usr/bin/perl
+# generic_count - count physical lines of code, given a comment marker.
+# Usage: generic_count commentstart [-f file] [list_of_files]
+#  commentstart: string that begins a comment (continuing til end-of-line)
+#  file: file with a list of files to count (if "-", read list from stdin)
+#  list_of_files: list of files to count
+#  -f file or list_of_files can be used, or both
+# This is a trivial/naive program for scripts, etc.
+
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+$commentstart = shift @ARGV;
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+  # Yes, we have -f
+  if ($ARGV[1] eq "-") {
+    # The list of files is in STDIN
+    while (<STDIN>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+  } else {
+    # The list of files is in the file $ARGV[1]
+    open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $filewithlist\n";
+    while (<FILEWITHLIST>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+    close FILEWITHLIST;
+  }
+  shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+  &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+  my ($file) = @_;
+  my $sloc = 0;
+
+  open (FILE, $file);
+  while (<FILE>) {
+    s/${commentstart}.*//;
+    if (m/\S/) {$sloc++;}
+  }
+  print "$sloc $file\n";
+  $total_sloc += $sloc;
+  close (FILE);
+}
diff --git a/get_sloc b/get_sloc
new file mode 100755
index 0000000..f590a8e
--- /dev/null
+++ b/get_sloc
@@ -0,0 +1,544 @@
+#!/usr/bin/perl -w
+
+# get_sloc
+# Take a list of dirs, and get the SLOC or filecount data from them.
+# NOTE: The intended input data ignores zero-length files & ignores dups,
+# so if that's true for the input data, it'll be true for the output data!
+
+# This code works but is NOT cleaned up-- it basically grew like
+# topsy.  Many of the variable names are misleading, as my needs for
+# output changed.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+
+
+
+# Default values for the effort estimation model; the model is
+# effort = ($effort_factor * KiloSLOC) ** $effort_exponent.
+# The following numbers are for basic COCOMO:
+
+$effort_factor =  2.40;
+$effort_exponent =  1.05;
+$effort_estimation_message = "Basic COCOMO model,";
+
+$schedule_factor =  2.5;
+$schedule_exponent =  0.38;
+$schedule_estimation_message = "Basic COCOMO model,";
+
+# Average Salary / year.
+# Source: ComputerWorld, Sep. 4, 2000 Salary Survey,
+# average (U.S.) programmer/analyst salary.
+
+$person_cost = 56286.;
+
+# Overhead; the person cost is multiplied by this value to determine
+# true annual costs.
+
+$overhead = 2.4;
+
+@license_list = ( "GPL", "LGPL", "MIT", "BSD", "distributable",
+                  "public domain", "MPL");
+
+%license_of = ();   # input is name of program, output is license.
+
+$no_license_total = 0;
+
+%non_language_list = (
+ "dup" => 1,
+ "not" => 1,
+ "unknown" => 1,
+ "auto" => 1,
+ "zero" => 1,
+);
+
+%ignore_language_list = (
+ "makefile" => 1,
+ "sql" => 1,
+ "html" => 1,
+);
+
+# Default input values
+$dirs_in_stdin = 0; # 0: dirs to analyze as arguments, 1: in stdin
+
+# Default Output Values:
+
+$computing_sloc = 1; # 0= showing filecounts, 1= showing SLOC.
+$narrow = 1;
+$sort_by = "total";  # If empty, sort by name; else "total" or lang name.
+$show_effort = 0;  # Show effort for each component?
+$break_line = 1; # Break up long lines into multiple lines?
+$show_non_lang = 0; # Show non-language counts?
+$one_program = 0; # Are all files part of a single program?
+$show_header = 1; # Show header?
+$show_footer = 1; # Show footer?
+
+
+# Global variables:
+
+@dirs = (); # Directories to examine
+
+%examined_directories = ();  # Keys = Names of directories examined this run.
+
+# Subroutines.
+
+sub commify {
+# TODO: Needs to be internationalized.
+  my $text = reverse $_[0];
+  $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
+  return scalar reverse $text;
+}
+
+sub numformat {
+# Format number nicely with commas.
+ my $num = shift;
+ my $digits = shift;
+ return commify(sprintf("%0.${digits}f", $num));
+}
+
+sub effort_person_months {
+ # Given the SLOC, reply an estimate of the number of person-months
+ # needed to develop it traditionally.
+ my $total_sloc = shift;
+ return ( ($effort_factor*(($total_sloc/1000.0)**$effort_exponent)));
+}
+
+sub estimate_schedule {
+ # Given the person-months, reply an estimate of the number of months
+ # needed to develop it traditionally.
+ my $person_months = shift;
+ return ($schedule_factor*($person_months**$schedule_exponent));
+}
+
+sub get_lang_total {
+ my $lang = shift;
+ if (defined($lang_total{$lang}))  {return $lang_total{$lang}}
+ else {return 0;}
+}
+
+# MAIN PROGRAM
+
+
+# Process options (if any):
+
+if ($#ARGV < 0) {
+ print STDERR "Error! You must list at least one directory to process, or --stdin.\n";
+ exit(1);
+}
+
+while ((scalar (@ARGV) > 0) && ($ARGV[0] =~ m/^-/)) {
+ $arg = shift;
+ if ($arg eq "--") {last;}
+ elsif ($arg eq "--filecount") {$computing_sloc = 0;}
+ elsif ($arg eq "--filecounts") {$computing_sloc = 0;}
+ elsif ($arg eq "--sloc") {$computing_sloc = 1;}
+ elsif ($arg eq "--narrow") {$narrow = 1;}
+ elsif ($arg eq "--wide") {$narrow = 0;}
+ elsif ($arg eq "--break") {$break_line = 1;}
+ elsif ($arg eq "--nobreak") {$break_line = 0;}
+ elsif ($arg eq "--sort") {$sort_by = shift;}  # Must be "total" or a lang.
+ elsif ($arg eq "--nosort") {$sort_by = "";}
+ elsif ($arg eq "--showother") {$show_non_lang = 1;}
+ elsif ($arg eq "--noshowother") {$show_non_lang = 0;}
+ elsif ($arg eq "--oneprogram") {$one_program = 1;}
+ elsif ($arg eq "--noheader") {$show_header = 0;}
+ elsif ($arg eq "--nofooter") {$show_footer = 0;}
+ elsif ($arg eq "--addlang") { $lang = shift;
+                               if (!defined($ignore_language_list{$lang})) {
+                                  die "Sorry, but $lang isn't ignored"; };
+                               delete $ignore_language_list{$lang}; }
+ elsif ($arg eq "--addlangall") { %ignore_language_list = (); }
+ elsif ($arg eq "--effort") {$effort_factor = (shift)*1.0;
+                             $effort_exponent = (shift)*1.0;
+                             $effort_estimation_message = "effort model"}
+ elsif ($arg eq "--schedule") {$schedule_factor = (shift)*1.0;
+                             $schedule_exponent = (shift)*1.0;
+                             $schedule_estimation_message = "schedule model"}
+ elsif ($arg eq "--personcost") {$person_cost = (shift)*1.0;}
+ elsif ($arg eq "--overhead")   {$overhead = (shift)*1.0;}
+ elsif ($arg eq "--stdin")   {$dirs_in_stdin = 1;}
+ else {die "Unknown option: $arg\n";}
+}
+
+
+# Determine the languages to show:
+
+
+if ($computing_sloc) { $show_non_lang = 0; }
+
+if (!$show_non_lang) {
+ # Add the non_language_list to the ignored languages.
+ foreach $langname (keys(%non_language_list))
+   {$ignore_language_list{$langname} = 1;}
+}
+
+
+%lang_total    = ();
+%license_total = ();
+
+@data_lines = ();
+
+$sloc = 0;
+$total_sloc = 0;
+$total_lang_sloc = 0;
+$grand_total_sloc = 0;
+$grand_total_lang_sloc = 0;
+$effort = 0.0;
+$grand_total_effort = 0.0;
+$grand_schedule = 0.0;
+
+if (!$narrow) {
+ # Ouch!  To accurately determine the column positions and names,
+ # without "pre-knowing" them, we need to look through the data.
+ # So, we'll do it twice.  This isn't efficient - if needed,
+ # speed it up by rewriting this to do it in-memory.
+ while (defined($_ = <DATAFILE>)) {
+   ($lang, $sloc) = split;
+   next if ( (!defined($lang)) || (!defined($sloc)) );
+   next if ($ignore_language_list{$lang});
+   $lang_total{$lang} = 0;
+ }
+}
+
+
+# Print the header.
+if ($show_header) {
+if ($narrow) {
+ if ($computing_sloc) { print "SLOC\t"; }
+ else                 { print "#Files\t"; }
+ if ($show_effort) {print "P.Y.\t";}
+ print "Directory\t";
+ if ($computing_sloc) { print "SLOC-by-Language (Sorted)"; }
+ else                 { print "#Files-by-Language (Sorted)"; }
+ print "\n";
+} else {
+ if ($computing_sloc) { print "SLOC\t"; }
+ else                 { print "#Files\t"; }
+ if ($show_effort) {print "P.M.\t";}
+ printf "%-22s\t", "Dir";
+ foreach $lang (keys(%lang_total)) {
+  print "$lang\t";
+  $lang_total{$lang} = 0;
+ };
+ print "\n";
+}
+}
+
+if ($dirs_in_stdin == 1) {
+  while (defined($dir = <STDIN>)) {
+    chomp ($dir);
+    push (@dirs, $dir);
+  }
+}
+
+while ($dir = shift) {
+  push (@dirs, $dir);
+}
+
+
+foreach $dir (@dirs) {
+ if (! -d "$dir") {
+   # print "Skipping non-directory $dir\n";
+   next;
+ } 
+ 
+ # Skip previously-examined directories.
+ if ($examined_directories{$dir}) {
+   # print "Skipping already-examined directory $dir\n";
+   next;
+ }
+ $examined_directories{$dir} = 1;
+ 
+ if (! -r "${dir}/filelist") {
+   # print "Skipping directory $dir; it doesn't contain a file 'filelist'\n";
+   next;
+ }
+
+
+ $simplename = $dir;
+ $simplename =~ s!^.*\/!!;
+ $total_sloc = 0;
+ $total_lang_sloc = 0;
+ $preceding_entry = 0;
+
+ $line = "";
+ %lang_data = ();
+
+ if ($computing_sloc) {
+     $filename = "${dir}/all-physical.sloc";
+ } else {
+     $filename = "${dir}/all.filecount";
+ }
+ if (open(DATAFILE, "<$filename")) {
+   while (defined($_ = <DATAFILE>)) {
+     ($lang, $sloc) = split;
+     next if ( (!defined($lang)) || (!defined($sloc)) );
+     next if ($ignore_language_list{$lang});
+     if ($narrow) { if ($sloc) {$lang_data{$lang} = $sloc;}}
+     else { $line .= "${sloc}\t"; }
+     if ($lang eq $sort_by) {$interesting_lang_sloc = $sloc;}
+     $total_sloc += $sloc;
+     $total_lang_sloc += $sloc unless ($non_language_list{$lang});
+     $lang_total{$lang} += $sloc;
+   }
+   close(DATAFILE);
+ } else {
+   print STDERR "Error openinig $filename\n";
+ }
+ if ($narrow) {
+      # For narrow view, sort the language entries.
+      foreach $entry (sort {$lang_data{$b} <=> $lang_data{$a}} keys %lang_data){
+        if ($preceding_entry) {$line .= ",";}
+        $preceding_entry = 1;
+        $line .= "${entry}=${lang_data{$entry}}";
+      }
+      if (!$preceding_entry) {$line .= "(none)";}
+ }
+
+ $grand_total_sloc += $total_sloc;
+ $grand_total_lang_sloc += $total_lang_sloc;
+
+ $effort = effort_person_months($total_sloc);
+ $grand_total_effort += $effort;
+
+ $schedule = estimate_schedule($effort);
+ if ($schedule > $grand_schedule) {
+    $grand_schedule = $schedule;   # The longest leg wins.
+ }
+
+ $displayed_effort = "";
+ if ($show_effort) { $displayed_effort = sprintf "%.2f\t", $effort; }
+ if ($narrow) {
+   $displayed_name = "$simplename";
+ } else {
+   $displayed_name = sprintf "%-22s\t", $simplename;
+ }
+
+ # Add to the corresponding license, if the license is known.
+ $license = "";
+ if (open(LICENSE_FILE, "<${dir}/PROGRAM_LICENSE")) {
+   $license = <LICENSE_FILE>;
+   chomp($license);
+   close(LICENSE_FILE);
+   if ($license) {
+     $license_of{$simplename} = $license;  # Hash currently unused.
+     if (! defined($license_total{$license})) {
+       $license_total{$license} = 0;
+     }
+     $license_total{$license} = $license_total{$license} + $total_sloc;
+   }
+ } else {
+  $no_license_total += $total_sloc;
+ }
+
+ if ($narrow) {
+    $line = sprintf "%-7d %s%-15s %-s\n", $total_sloc, $displayed_effort,
+                                          $simplename, $line;
+    if ($break_line && (length($line) > 77)) { # Break up long line.
+      $line =~ s/(.{71})([^,]*),(.*)/$1$2,\n                        $3/;
+    }
+    if ($license) {
+      $line .= "                        [$license]\n";
+    }
+ } else {
+    $line = "${total_sloc}\t${displayed_effort}${displayed_name}${line}\n";
+ }
+ if ($sort_by) {
+   if ($sort_by eq "total") {$line = "$total_sloc\t$line";}
+   else {$line = "$interesting_lang_sloc\t$line";}
+   $data_lines[$#data_lines+1] = $line;   # Add to data lines.
+ } else {
+   print $line;  # No sort - print immediately for speed.
+ }
+
+}
+
+if ($sort_by) {
+ # Print sorted version.  This is a little inefficient, but for
+ # only a few hundred or thousand values it doesn't matter.
+ @sorted_data_lines = sort { ($b =~ /^(\d+)/)[0] <=> ($a =~ /^(\d+)/)[0] }
+                        @data_lines;
+ foreach $line (@sorted_data_lines) {
+    $short_line = $line;
+    $short_line =~ s/^[^\t]*\t//;  # Remove sort field.
+    print $short_line;
+ }
+}
+
+
+if (! $show_footer) {exit(0);}
+if ($grand_total_sloc == 0) {
+  print "SLOC total is zero, no further analysis performed.\n";
+  exit(1);
+}
+
+# Print the footer.
+if ($narrow) {
+ print "\n";
+ print "\n";
+ print "Totals grouped by language (dominant language first):\n";
+ # If you don't want the list sorted by size of language, just do:
+ # foreach $lang (@language_list) {
+ foreach $lang (sort {&get_lang_total($b) <=> &get_lang_total($a) } keys(%lang_total) ) {
+  $percent = get_lang_total($lang) * 100.0 / $grand_total_sloc;
+  if ($percent > 0.0) {
+    printf "%-9s %9d (%.2f%%)\n", $lang . ":", $lang_total{$lang}, $percent;
+  }
+ };
+
+ if ($show_non_lang) {
+  # The previous list showed "non-languages", so now we'll show only the
+  # data for data associated with a normal language:
+  print "\n";
+  print "\n";
+  foreach $lang (sort {&get_lang_total($b) <=> &get_lang_total($a) } keys(%lang_total)) {
+   next if (defined($non_language_list{$lang}));
+   $percent = $lang_total{$lang} * 100.0 / $grand_total_lang_sloc;
+   if ($percent > 0.0) {
+     printf "%-9s %9d (%.2f%%)\n", $lang . ":", $lang_total{$lang}, $percent;
+   }
+  };
+ }
+
+} else {  # Not narrow.
+ 
+ print "$grand_total_sloc\t";
+ if ($show_effort) {printf "%.2f\t", $grand_total_effort;}
+
+ printf "%-22s", "Totals";
+ foreach $lang (keys(%lang_total)) {
+  print "\t$lang_total{$lang}";
+ };
+ 
+ print "\t";
+ if ($show_effort) {printf "\t";}
+ printf "%-22s\t", "Percentages";
+ foreach $lang (keys(%lang_total)) {
+  $percent = $lang_total{$lang} * 100.0 / $grand_total_sloc;
+  printf "\t%0.2f", $percent;
+ };
+ print "\n";
+ 
+ print "\t";
+ if ($show_effort) {printf "\t";}
+ printf "%-22s\t", "Code Percentages";
+ foreach $lang (keys(%lang_total)) {
+   next if (defined($non_language_list{$lang}));
+   $percent = $lang_total{$lang} * 100.0 / $grand_total_lang_sloc;
+   printf "\t%0.2f", $percent;
+ };
+ print "\n";
+}
+
+print "\n";
+print "\n";
+
+
+if (%license_total) {
+ # We have license info on something, so if there's anything
+ # unallocated, add that to the list.
+ if ($no_license_total) {
+   $license_total{"Not listed"} = $no_license_total;
+ }
+ print "Licenses:\n";
+ foreach $license (sort {$license_total{$b} <=> $license_total{$a} } keys(%license_total)) {
+  $percent = $license_total{$license} * 100.0 / $grand_total_sloc;
+  if ($percent > 0.0) {
+     printf "%9d (%.2f%%) %s\n", $license_total{$license}, $percent, $license;
+   }
+ };
+ print "\n";
+ print "\n";
+
+ print "Percentage of Licenses containing selected key phrases:\n";
+ %license_phrase = ();
+ foreach $license (keys(%license_total)) {
+   foreach $phrase (@license_list) {
+     if ($license =~ m/\b$phrase\b/i) {
+       if (!defined($license_phrase{$phrase})) {$license_phrase{$phrase} = 0;}
+       $license_phrase{$phrase} = $license_phrase{$phrase} +
+                                  $license_total{$license};
+     }
+   }
+ }
+
+ foreach $phrase (sort {$license_phrase{$b} <=> $license_phrase{$a} } keys(%license_phrase)) {
+  $percent = $license_phrase{$phrase} * 100.0 / $grand_total_sloc;
+  if ($percent > 0.0) {
+     printf "%9d (%.2f%%) %s\n", $license_phrase{$phrase}, $percent, $phrase;
+   }
+ };
+
+}
+
+
+print "\n";
+print "\n";
+
+if ($computing_sloc) {
+ if ($one_program) {
+   # If it's one program, override the grand total of effort
+   # and the schedule calculations by using the total SLOC.
+
+   $grand_total_effort = effort_person_months($grand_total_sloc);
+   $grand_schedule = estimate_schedule($grand_total_effort);
+ }
+ printf "Total Physical Source Lines of Code (SLOC)                = %s\n",
+        commify($grand_total_sloc);
+
+ printf "Development Effort Estimate, Person-Years (Person-Months) = %s (%s)\n",
+       numformat($grand_total_effort/12.0, 2),
+       numformat($grand_total_effort, 2);
+ print " ($effort_estimation_message " .
+        "Person-Months = $effort_factor * (KSLOC**$effort_exponent))\n";
+
+ printf "Schedule Estimate, Years (Months)                         = %s (%s)\n",
+       numformat($grand_schedule/12.0, 2),
+       numformat($grand_schedule, 2);
+ print " ($schedule_estimation_message " .
+       "Months = $schedule_factor * (person-months**$schedule_exponent))\n";
+
+ # Don't show this if there are multiple programs, because the computation
+ # is essentially meaningless: after the "smaller" projects have completed,
+ # the longest one would keep going:
+ if ($one_program && ($grand_schedule > 0.0)) {
+ printf "Estimated Average Number of Developers (Effort/Schedule)  = %s\n",
+       numformat($grand_total_effort / $grand_schedule, 2);
+ }
+
+
+ $value = ($grand_total_effort / 12.0) * $person_cost * $overhead;
+ printf "Total Estimated Cost to Develop                           = \$ %s\n",
+      numformat($value, 0);
+ printf " (average salary = \$%s/year, overhead = %0.2f).\n",
+      commify($person_cost), $overhead;
+
+} else {
+print "Total Number of Files = $grand_total_sloc\n";
+print "Total Number of Source Code Files = $grand_total_lang_sloc\n";
+}
+print "SLOCCount, Copyright (C) 2001-2004 David A. Wheeler\n";
+print "SLOCCount is Open Source Software/Free Software, licensed under the GNU GPL.\n";
+print "SLOCCount comes with ABSOLUTELY NO WARRANTY, and you are welcome to\n";
+print "redistribute it under certain conditions as specified by the GNU GPL license;\n";
+print "see the documentation for details.\n";
+print "Please credit this data as \"generated using David A. Wheeler's 'SLOCCount'.\"\n";
+
diff --git a/get_sloc_details b/get_sloc_details
new file mode 100755
index 0000000..56ef45a
--- /dev/null
+++ b/get_sloc_details
@@ -0,0 +1,103 @@
+#!/usr/bin/perl -w
+
+# get_sloc_details
+# Take a list of dirs, and get the detailed SLOC entries for every file.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+
+
+sub print_data
+{
+ my $dir = shift;
+ my $langfile = shift;
+ my $saw_total = 0;
+ my $filename = "${dir}/${langfile}";
+ my $lang = $langfile;
+ $lang =~ s/_outfile\.dat$//;
+
+ open(RAWDATA, "<$filename") ||
+     return;
+ #     die "Can't open file in $dir for language $lang.\n";
+
+ if ($lang eq "asm") {
+   while (<RAWDATA>) {
+     if (m/^Total:/) {
+       $saw_total = 1;
+       last;
+     }
+     chomp;
+     if (m/^([0-9]+)\s+\([^\)]+\)\s+(.*)/) {
+       print "$1\t$lang\t$dir\t$2\n";
+     } else {
+       print STDERR "Warning: file $filename has unexpected text: $_\n";
+     }
+   }
+ } else {
+   while (<RAWDATA>) {
+     if (m/^Total:/) {
+       $saw_total = 1;
+       last;
+     }
+     chomp;
+     if (m/^([0-9]+)\s+(.*)/) {
+       print "$1\t$lang\t$dir\t$2\n";
+     } else {
+       print STDERR "Warning: file $filename has unexpected text: $_\n";
+     }
+   }
+ }
+ close(RAWDATA);
+ if (! $saw_total) {
+   print STDERR "Warning! No 'Total' line in $filename.\n";
+ }
+}
+
+# MAIN PROGRAM
+
+
+if ($#ARGV < 0) {
+ print STDERR "Error! You must list at least one directory to process.\n";
+ exit(1);
+}
+
+
+while ( $dir = shift ) {
+
+ if (! -d "$dir") {
+   # print "Skipping non-directory $dir\n";
+   next;
+ } 
+ 
+ if (! -r "${dir}/filelist") {
+   # print "Skipping directory $dir; it doesn't contain a file 'filelist'\n";
+   next;
+ }
+
+  opendir(DATADIR, $dir) || die "can't opendir $dir: $!";
+  @outfiles = grep { /outfile\.dat$/ } readdir(DATADIR);
+  closedir DATADIR;
+  foreach $langfile (@outfiles) {
+    print_data($dir, $langfile);
+ }
+
+}
+
diff --git a/haskell_count b/haskell_count
new file mode 100755
index 0000000..21299aa
--- /dev/null
+++ b/haskell_count
@@ -0,0 +1,122 @@
+#!/usr/bin/perl -w
+# haskell_count - count physical lines of code
+# Strips out {- .. -} and -- comments and counts the rest.
+# Pragmas, {-#...}, are counted as SLOC.
+# BUG: Doesn't handle strings with embedded block comment markers gracefully.
+#      In practice, that shouldn't be a problem.
+# Usage: haskell_count [-f file] [list_of_files]
+#  file: file with a list of files to count (if "-", read list from stdin)
+#  list_of_files: list of files to count
+#  -f file or list_of_files can be used, or both
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+
+
+
+
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+  # Yes, we have -f
+  if ($ARGV[1] eq "-") {
+    # The list of files is in STDIN
+    while (<STDIN>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+  } else {
+    # The list of files is in the file $ARGV[1]
+    open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+    while (<FILEWITHLIST>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+    close FILEWITHLIST;
+  }
+  shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+  &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub determine_lit_type {
+  my ($file) = @_;
+
+  open (FILE, $file);
+  while (<FILE>) {
+    if (m/^\\begin{code}/) { close FILE; return 2; }
+    if (m/^>\s/) { close FILE; return 1; }
+  }
+
+  return 0;
+}
+
+sub count_file {
+  my ($file) = @_;
+  my $sloc = 0;
+  my $incomment = 0;
+  my ($literate, $inlitblock) = (0,0);
+
+  $literate = 1 if $file =~ /\.lhs$/;
+  if($literate) { $literate = determine_lit_type($file) }
+
+  open (FILE, $file);
+  while (<FILE>) {
+    if ($literate == 1) {
+      if (!s/^>//) { s/.*//; }
+    } elsif ($literate == 2) {
+      if ($inlitblock) {
+        if (m/^\\end{code}/) { s/.*//; $inlitblock = 0; }
+      } elsif (!$inlitblock) {
+        if (m/^\\begin{code}/) { s/.*//; $inlitblock = 1; }
+        else { s/.*//; }
+      }
+    }
+
+    if ($incomment) {
+      if (m/\-\}/) { s/^.*?\-\}//;  $incomment = 0;}
+      else { s/.*//; }
+    }
+    if (!$incomment) {
+      s/--.*//;
+      s!{-[^#].*?-}!!g;
+      if (m/{-/ && (!m/{-#/)) {
+        s/{-.*//;
+	$incomment = 1;
+      }
+    }
+    if (m/\S/) {$sloc++;}
+  }
+  print "$sloc $file\n";
+  if ($incomment) {print "ERROR: ended in comment in $ARGV\n";}
+  $total_sloc += $sloc;
+  $sloc = 0;
+  $incomment = 0;
+  close (FILE);
+}
diff --git a/java_lines_environment.dat b/java_lines_environment.dat
new file mode 100644
index 0000000..56897e9
--- /dev/null
+++ b/java_lines_environment.dat
@@ -0,0 +1,98 @@
+         Temporary Project Name              (* Project_Name,in 45 spaces    *)
+0                                            (* QA_Switch                    *)
+1                                            (* Compare_Spec                 *)
+999                                          (* Line_Length                  *)
+1000                                         (* Exec_Lines                   *)
+500                                          (* Data_Lines                   *)
+60.0                                         (* Min_Percent                  *)
+0.0                                          (* Inc_Percent                  *)
+0                                            (* Display_File                 *)
+0                                            (* Intro_Msg                    *)
+P                                            (* SLOC_Def                     *)
+(*---------------------------------------------------------------------------*)
+(*                                                                           *)
+(*  Refer to the source code file, 'java_lines.c', for further information   *)
+(* pertaining to the INSTALLATION PROCEDURES and EXECUTION PROCEDURES of     *)
+(* this code counting tool.                                                  *)
+(*                                                                           *)
+(* Note:                                                                     *)
+(*   1. The above user-defined parameters must be spaced one entry per line  *)
+(*      of this file.  Numeric entries, with the exception of 'Inc_Percent', *)
+(*      are of type Integer.                                                 *)
+(*                                                                           *)
+(*   2. The 'java_lines_environment.dat' file must be co-located in the      *)
+(*      directory/path whereas the code counting tool is to be invoked.      *)
+(*      Failure to do so will result in the insertion of predefined default  *)
+(*      values for the entries contained herein.                             *)
+(*                                                                           *)
+(*---------------------------------------------------------------------------*)
+(*                                                                           *)
+(* USER DEFINEABLE PARAMETERS                                                *)
+(*                                                                           *)
+(*   Project_Name -- Allows the user to insert the name of the Program or    *)
+(*                   Project that the source code to be counted pertains.    *)
+(*                   The Project_Name will appear within at the headings of  *)
+(*                   of the 'java_outfile.dat' file produced upon execution  *)
+(*                   of the 'java_lines' code counting tool.                 *)
+(*                                                                           *)
+(*   QA_Switch    -- Allows the user to turn on '1' or to turn off '0' the   *)
+(*                   reporting of programming language reserve word usage    *)
+(*                   as found in the summary page of 'java_outfile.dat'.     *)
+(*                                                                           *)
+(*   Compare_Spec -- Allows the user to control the case sensitivity of the  *)
+(*                   code counting tool.  A setting of '1' indicates that    *)
+(*                   full case sensitive comparisons must be made.  A setting*)
+(*                   of '0' allows valid comparisons to occur between like   *)
+(*                   letters of upper and lower case.                        *)
+(*                                                                           *)
+(*   Line_Length  -- Allows user to force the code counting tool to ignore   *)
+(*                   information beyond 'Line_Length' characters per physical*)
+(*                   line of input.  It is recommended that the length of    *)
+(*                   the longest physical line to be read be used, i.e. 132. *)
+(*                                                                           *)
+(*   Exec_Lines   -- Allows the user to set a threshold whereby the number   *)
+(*                   of files processed with executable lines in exceedance  *)
+(*                   of 'Exec_Lines' will be reported on the summary page of *)
+(*                   'java_outfile.dat'.                                     *)
+(*                                                                           *)
+(*   Data_Lines   -- Allows the user to set a threshold whereby the number   *)
+(*                   of files processed with data declaration lines in       *)
+(*                   exceedance of 'Data_Lines' will be reported on the      *)
+(*                   summary page of 'java_outfile.dat'.                     *)
+(*                                                                           *)
+(*   Min_Percent  -- Allows the user to set a threshold whereby the number   *)
+(*                   of files processed with a ratio of comments (whole &    *)
+(*                   embedded) to SLOC (physical or logical) is less than    *)
+(*                   'Min_Percent'.                                          *)
+(*                                                                           *)
+(*   Inc_Percent  -- Allows the user to set a progress increment whereby a   *)
+(*                   progress message will appear on the terminal screen     *)
+(*                   during execution of the 'java_lines' tool.  The progress*)
+(*                   message indicates that approximately 'Inc_Percent' of   *)
+(*                   source code files to be processed have completed since  *)
+(*                   the previous progress message appeared.  The progress   *)
+(*                   reporting is based solely on the number of files        *)
+(*                   contained in 'java_list.dat'.  Actual run-time progress *)
+(*                   is dependent on the relative size of each source code   *)
+(*                   file and the user loading of the host platform machine. *)
+(*                   A setting of 0.0 will disable the reporting of the      *)
+(*                   progress message.                                       *)
+(*                                                                           *)
+(*   Display_File -- Allows the user to turn on '1' or to turn off '0' the   *)
+(*                   reporting of last file to be processed within the       *)
+(*                   java_list.dat file.                                     *)
+(*                                                                           *)
+(*   Intro_Msg    -- Allows the user to turn on '1' or to turn off '0' the   *)
+(*                   output of the introduction message as the first page    *)
+(*                   of the 'java_outfile.dat' file.                         *)
+(*                                                                           *)
+(*   SLOC_Def     -- Allows the user to select the definition of a Source    *)
+(*                   Line of Code (SLOC) to be used during the operation of  *)
+(*                   the CodeCount tool.  A setting of 'P' envokes the SLOC  *)
+(*                   definition of Physical lines, a.k.a., non-comment,      *)
+(*                   non-blank, physical lines of code or Deliverable Source *)
+(*                   Instructions (DSIs).  A setting of 'L' envokes the SLOC *)
+(*                   definition of Logical lines, a.k.a., non-comment,       *)
+(*                   non-blank, logical lines of code.                       *)
+(*                                                                           *)
+(*---------------------------------------------------------------------------*)
diff --git a/jsp_count.c b/jsp_count.c
new file mode 100644
index 0000000..42cb2af
--- /dev/null
+++ b/jsp_count.c
@@ -0,0 +1,1787 @@
+/* A lexical scanner generated by flex */
+
+/* Scanner skeleton version:
+ * $Header: /home/daffy/u0/vern/flex/RCS/flex.skl,v 2.91 96/09/10 16:58:48 vern Exp $
+ */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+
+#include <stdio.h>
+#include <unistd.h>
+
+
+/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */
+#ifdef c_plusplus
+#ifndef __cplusplus
+#define __cplusplus
+#endif
+#endif
+
+
+#ifdef __cplusplus
+
+#include <stdlib.h>
+
+/* Use prototypes in function declarations. */
+#define YY_USE_PROTOS
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else	/* ! __cplusplus */
+
+#if __STDC__
+
+#define YY_USE_PROTOS
+#define YY_USE_CONST
+
+#endif	/* __STDC__ */
+#endif	/* ! __cplusplus */
+
+#ifdef __TURBOC__
+ #pragma warn -rch
+ #pragma warn -use
+#include <io.h>
+#include <stdlib.h>
+#define YY_USE_CONST
+#define YY_USE_PROTOS
+#endif
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+
+#ifdef YY_USE_PROTOS
+#define YY_PROTO(proto) proto
+#else
+#define YY_PROTO(proto) ()
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an unsigned
+ * integer for use as an array index.  If the signed char is negative,
+ * we want to instead treat it as an 8-bit unsigned char, hence the
+ * double cast.
+ */
+#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
+
+/* Enter a start condition.  This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN yy_start = 1 + 2 *
+
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state.  The YYSTATE alias is for lex
+ * compatibility.
+ */
+#define YY_START ((yy_start - 1) / 2)
+#define YYSTATE YY_START
+
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+
+/* Special action meaning "start processing a new file". */
+#define YY_NEW_FILE yyrestart( yyin )
+
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#define YY_BUF_SIZE 16384
+
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+
+extern int yyleng;
+extern FILE *yyin, *yyout;
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+/* The funky do-while in the following #define is used to turn the definition
+ * int a single C statement (which needs a semi-colon terminator).  This
+ * avoids problems with code like:
+ *
+ * 	if ( condition_holds )
+ *		yyless( 5 );
+ *	else
+ *		do_something_else();
+ *
+ * Prior to using the do-while the compiler would get upset at the
+ * "else" because it interpreted the "if" statement as being all
+ * done when it reached the ';' after the yyless() call.
+ */
+
+/* Return all but the first 'n' matched characters back to the input stream. */
+
+#define yyless(n) \
+	do \
+		{ \
+		/* Undo effects of setting up yytext. */ \
+		*yy_cp = yy_hold_char; \
+		YY_RESTORE_YY_MORE_OFFSET \
+		yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \
+		YY_DO_BEFORE_ACTION; /* set up yytext again */ \
+		} \
+	while ( 0 )
+
+#define unput(c) yyunput( c, yytext_ptr )
+
+/* The following is because we cannot portably get our hands on size_t
+ * (without autoconf's help, which isn't available because we want
+ * flex-generated scanners to compile on their own).
+ */
+typedef unsigned int yy_size_t;
+
+
+struct yy_buffer_state
+	{
+	FILE *yy_input_file;
+
+	char *yy_ch_buf;		/* input buffer */
+	char *yy_buf_pos;		/* current position in input buffer */
+
+	/* Size of input buffer in bytes, not including room for EOB
+	 * characters.
+	 */
+	yy_size_t yy_buf_size;
+
+	/* Number of characters read into yy_ch_buf, not including EOB
+	 * characters.
+	 */
+	int yy_n_chars;
+
+	/* Whether we "own" the buffer - i.e., we know we created it,
+	 * and can realloc() it to grow it, and should free() it to
+	 * delete it.
+	 */
+	int yy_is_our_buffer;
+
+	/* Whether this is an "interactive" input source; if so, and
+	 * if we're using stdio for input, then we want to use getc()
+	 * instead of fread(), to make sure we stop fetching input after
+	 * each newline.
+	 */
+	int yy_is_interactive;
+
+	/* Whether we're considered to be at the beginning of a line.
+	 * If so, '^' rules will be active on the next match, otherwise
+	 * not.
+	 */
+	int yy_at_bol;
+
+	/* Whether to try to fill the input buffer when we reach the
+	 * end of it.
+	 */
+	int yy_fill_buffer;
+
+	int yy_buffer_status;
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+	/* When an EOF's been seen but there's still some text to process
+	 * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+	 * shouldn't try reading from the input source any more.  We might
+	 * still have a bunch of tokens to match, though, because of
+	 * possible backing-up.
+	 *
+	 * When we actually see the EOF, we change the status to "new"
+	 * (via yyrestart()), so that the user can continue scanning by
+	 * just pointing yyin at a new input file.
+	 */
+#define YY_BUFFER_EOF_PENDING 2
+	};
+
+static YY_BUFFER_STATE yy_current_buffer = 0;
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ */
+#define YY_CURRENT_BUFFER yy_current_buffer
+
+
+/* yy_hold_char holds the character lost when yytext is formed. */
+static char yy_hold_char;
+
+static int yy_n_chars;		/* number of characters read into yy_ch_buf */
+
+
+int yyleng;
+
+/* Points to current character in buffer. */
+static char *yy_c_buf_p = (char *) 0;
+static int yy_init = 1;		/* whether we need to initialize */
+static int yy_start = 0;	/* start state number */
+
+/* Flag which is used to allow yywrap()'s to do buffer switches
+ * instead of setting up a fresh yyin.  A bit of a hack ...
+ */
+static int yy_did_buffer_switch_on_eof;
+
+void yyrestart YY_PROTO(( FILE *input_file ));
+
+void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer ));
+void yy_load_buffer_state YY_PROTO(( void ));
+YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size ));
+void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b ));
+void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file ));
+void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b ));
+#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer )
+
+YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size ));
+YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str ));
+YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len ));
+
+static void *yy_flex_alloc YY_PROTO(( yy_size_t ));
+static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t ));
+static void yy_flex_free YY_PROTO(( void * ));
+
+#define yy_new_buffer yy_create_buffer
+
+#define yy_set_interactive(is_interactive) \
+	{ \
+	if ( ! yy_current_buffer ) \
+		yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
+	yy_current_buffer->yy_is_interactive = is_interactive; \
+	}
+
+#define yy_set_bol(at_bol) \
+	{ \
+	if ( ! yy_current_buffer ) \
+		yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
+	yy_current_buffer->yy_at_bol = at_bol; \
+	}
+
+#define YY_AT_BOL() (yy_current_buffer->yy_at_bol)
+
+
+#define yywrap() 1
+#define YY_SKIP_YYWRAP
+typedef unsigned char YY_CHAR;
+FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
+typedef int yy_state_type;
+extern char *yytext;
+#define yytext_ptr yytext
+static yyconst short yy_nxt[][11] =
+    {
+    {
+        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
+        0
+    },
+
+    {
+        9,   10,   11,   12,   10,   13,   10,   14,   10,   15,
+       10
+    },
+
+    {
+        9,   10,   11,   12,   10,   13,   10,   14,   10,   15,
+       10
+    },
+
+    {
+        9,   16,   16,   17,   16,   16,   16,   16,   18,   16,
+       16
+    },
+
+    {
+        9,   16,   16,   17,   16,   16,   16,   16,   18,   16,
+       16
+
+    },
+
+    {
+        9,   19,   19,   20,   19,   19,   19,   19,   21,   19,
+       19
+    },
+
+    {
+        9,   19,   19,   20,   19,   19,   19,   19,   21,   19,
+       19
+    },
+
+    {
+        9,   22,   22,   23,   22,   24,   22,   22,   22,   22,
+       22
+    },
+
+    {
+        9,   22,   22,   23,   22,   24,   22,   22,   22,   22,
+       22
+    },
+
+    {
+       -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,
+       -9
+
+    },
+
+    {
+        9,   25,   25,  -10,   25,  -10,   25,   25,   25,  -10,
+       25
+    },
+
+    {
+        9,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,
+      -11
+    },
+
+    {
+        9,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,
+      -12
+    },
+
+    {
+        9,  -13,  -13,  -13,  -13,  -13,  -13,  -13,  -13,  -13,
+      -13
+    },
+
+    {
+        9,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,  -14,
+      -14
+
+    },
+
+    {
+        9,  -15,  -15,  -15,   26,  -15,   27,  -15,  -15,  -15,
+      -15
+    },
+
+    {
+        9,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,  -16,
+      -16
+    },
+
+    {
+        9,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,
+      -17
+    },
+
+    {
+        9,  -18,  -18,  -18,  -18,  -18,  -18,  -18,   28,  -18,
+      -18
+    },
+
+    {
+        9,  -19,  -19,  -19,  -19,  -19,  -19,  -19,  -19,  -19,
+      -19
+
+    },
+
+    {
+        9,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,
+      -20
+    },
+
+    {
+        9,  -21,  -21,  -21,  -21,  -21,  -21,  -21,   29,  -21,
+      -21
+    },
+
+    {
+        9,   30,   30,  -22,   30,  -22,   30,   30,   30,   30,
+       30
+    },
+
+    {
+        9,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,
+      -23
+    },
+
+    {
+        9,  -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24,  -24,
+      -24
+
+    },
+
+    {
+        9,   25,   25,  -25,   25,  -25,   25,   25,   25,  -25,
+       25
+    },
+
+    {
+        9,  -26,  -26,  -26,  -26,  -26,  -26,  -26,   31,  -26,
+      -26
+    },
+
+    {
+        9,  -27,  -27,  -27,  -27,  -27,  -27,  -27,   32,  -27,
+      -27
+    },
+
+    {
+        9,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,
+       33
+    },
+
+    {
+        9,  -29,  -29,  -29,  -29,  -29,   34,  -29,  -29,  -29,
+      -29
+
+    },
+
+    {
+        9,   30,   30,  -30,   30,  -30,   30,   30,   30,   30,
+       30
+    },
+
+    {
+        9,  -31,  -31,  -31,  -31,  -31,  -31,  -31,   35,  -31,
+      -31
+    },
+
+    {
+        9,  -32,  -32,  -32,  -32,  -32,  -32,  -32,   36,  -32,
+      -32
+    },
+
+    {
+        9,  -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33,  -33,
+      -33
+    },
+
+    {
+        9,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,
+       37
+
+    },
+
+    {
+        9,  -35,  -35,  -35,  -35,  -35,  -35,  -35,  -35,  -35,
+      -35
+    },
+
+    {
+        9,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,
+      -36
+    },
+
+    {
+        9,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,  -37,
+      -37
+    },
+
+    } ;
+
+
+static yy_state_type yy_get_previous_state YY_PROTO(( void ));
+static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state ));
+static int yy_get_next_buffer YY_PROTO(( void ));
+static void yy_fatal_error YY_PROTO(( yyconst char msg[] ));
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up yytext.
+ */
+#define YY_DO_BEFORE_ACTION \
+	yytext_ptr = yy_bp; \
+	yyleng = (int) (yy_cp - yy_bp); \
+	yy_hold_char = *yy_cp; \
+	*yy_cp = '\0'; \
+	yy_c_buf_p = yy_cp;
+
+#define YY_NUM_RULES 17
+#define YY_END_OF_BUFFER 18
+static yyconst short int yy_accept[38] =
+    {   0,
+        0,    0,    0,    0,    0,    0,    0,    0,   18,    6,
+        1,    4,    5,    7,    7,   10,    9,   10,   13,   12,
+       13,   14,   15,   16,    6,    0,    0,    0,    0,   14,
+        0,    0,    8,    0,    2,    3,   11
+    } ;
+
+static yyconst int yy_ec[256] =
+    {   0,
+        1,    1,    1,    1,    1,    1,    1,    1,    2,    3,
+        1,    2,    2,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    2,    4,    5,    1,    1,    6,    1,    1,    7,
+        1,    1,    1,    1,    8,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    9,
+        1,   10,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1
+    } ;
+
+static yy_state_type yy_last_accepting_state;
+static char *yy_last_accepting_cpos;
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+#define YY_RESTORE_YY_MORE_OFFSET
+char *yytext;
+#line 1 "jsp_count.l"
+#define INITIAL 0
+#line 2 "jsp_count.l"
+
+/*
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler and Bob Brown.
+This is a tweaked version by Bob Brown, derived from
+David A. Wheeler's pascal_count.l.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+Bob Brown's website is: http://www.openeye.com/rlb
+*/
+
+#include "driver.h"
+
+#define YY_NO_UNPUT
+
+/* 1 if we saw a non-comment, non-whitespace char on this line */
+int saw_char = 0;
+static void count(void);
+
+#define chtml 1
+
+#define cjsp 2
+
+#define string 3
+
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int yywrap YY_PROTO(( void ));
+#else
+extern int yywrap YY_PROTO(( void ));
+#endif
+#endif
+
+#ifndef YY_NO_UNPUT
+static void yyunput YY_PROTO(( int c, char *buf_ptr ));
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int ));
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen YY_PROTO(( yyconst char * ));
+#endif
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+static int yyinput YY_PROTO(( void ));
+#else
+static int input YY_PROTO(( void ));
+#endif
+#endif
+
+#if YY_STACK_USED
+static int yy_start_stack_ptr = 0;
+static int yy_start_stack_depth = 0;
+static int *yy_start_stack = 0;
+#ifndef YY_NO_PUSH_STATE
+static void yy_push_state YY_PROTO(( int new_state ));
+#endif
+#ifndef YY_NO_POP_STATE
+static void yy_pop_state YY_PROTO(( void ));
+#endif
+#ifndef YY_NO_TOP_STATE
+static int yy_top_state YY_PROTO(( void ));
+#endif
+
+#else
+#define YY_NO_PUSH_STATE 1
+#define YY_NO_POP_STATE 1
+#define YY_NO_TOP_STATE 1
+#endif
+
+#ifdef YY_MALLOC_DECL
+YY_MALLOC_DECL
+#else
+#if __STDC__
+#ifndef __cplusplus
+#include <stdlib.h>
+#endif
+#else
+/* Just try to get by without declaring the routines.  This will fail
+ * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int)
+ * or sizeof(void*) != sizeof(int).
+ */
+#endif
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#define YY_READ_BUF_SIZE 8192
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO (void) fwrite( yytext, yyleng, 1, yyout )
+#endif
+
+/* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf,result,max_size) \
+	if ( yy_current_buffer->yy_is_interactive ) \
+		{ \
+		int c = '*', n; \
+		for ( n = 0; n < max_size && \
+			     (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
+			buf[n] = (char) c; \
+		if ( c == '\n' ) \
+			buf[n++] = (char) c; \
+		if ( c == EOF && ferror( yyin ) ) \
+			YY_FATAL_ERROR( "input in flex scanner failed" ); \
+		result = n; \
+		} \
+	else if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \
+		  && ferror( yyin ) ) \
+		YY_FATAL_ERROR( "input in flex scanner failed" );
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
+#endif
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL int yylex YY_PROTO(( void ))
+#endif
+
+/* Code executed at the beginning of each rule, after yytext and yyleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK break;
+#endif
+
+#define YY_RULE_SETUP \
+	YY_USER_ACTION
+
+YY_DECL
+	{
+	register yy_state_type yy_current_state;
+	register char *yy_cp = NULL, *yy_bp = NULL;
+	register int yy_act;
+
+#line 46 "jsp_count.l"
+
+	line_number = 1;
+	saw_char = 0;
+	BEGIN(INITIAL);
+
+
+	if ( yy_init )
+		{
+		yy_init = 0;
+
+#ifdef YY_USER_INIT
+		YY_USER_INIT;
+#endif
+
+		if ( ! yy_start )
+			yy_start = 1;	/* first start state */
+
+		if ( ! yyin )
+			yyin = stdin;
+
+		if ( ! yyout )
+			yyout = stdout;
+
+		if ( ! yy_current_buffer )
+			yy_current_buffer =
+				yy_create_buffer( yyin, YY_BUF_SIZE );
+
+		yy_load_buffer_state();
+		}
+
+	while ( 1 )		/* loops until end-of-file is reached */
+		{
+		yy_cp = yy_c_buf_p;
+
+		/* Support of yytext. */
+		*yy_cp = yy_hold_char;
+
+		/* yy_bp points to the position in yy_ch_buf of the start of
+		 * the current run.
+		 */
+		yy_bp = yy_cp;
+
+		yy_current_state = yy_start;
+yy_match:
+		while ( (yy_current_state = yy_nxt[yy_current_state][yy_ec[YY_SC_TO_UI(*yy_cp)]]) > 0 )
+			{
+			if ( yy_accept[yy_current_state] )
+				{
+				yy_last_accepting_state = yy_current_state;
+				yy_last_accepting_cpos = yy_cp;
+				}
+
+			++yy_cp;
+			}
+
+		yy_current_state = -yy_current_state;
+
+yy_find_action:
+		yy_act = yy_accept[yy_current_state];
+
+		YY_DO_BEFORE_ACTION;
+
+
+do_action:	/* This label is used only to access EOF actions. */
+
+
+		switch ( yy_act )
+	{ /* beginning of action switch */
+			case 0: /* must back up */
+			/* undo the effects of YY_DO_BEFORE_ACTION */
+			*yy_cp = yy_hold_char;
+			yy_cp = yy_last_accepting_cpos + 1;
+			yy_current_state = yy_last_accepting_state;
+			goto yy_find_action;
+
+case 1:
+YY_RULE_SETUP
+#line 51 "jsp_count.l"
+/* Do nothing */
+	YY_BREAK
+case 2:
+YY_RULE_SETUP
+#line 52 "jsp_count.l"
+{ BEGIN(chtml); }
+	YY_BREAK
+case 3:
+YY_RULE_SETUP
+#line 53 "jsp_count.l"
+{ BEGIN(cjsp); }
+	YY_BREAK
+case 4:
+YY_RULE_SETUP
+#line 54 "jsp_count.l"
+{ count(); }
+	YY_BREAK
+case 5:
+YY_RULE_SETUP
+#line 56 "jsp_count.l"
+{saw_char = 1; BEGIN(string);}
+	YY_BREAK
+case 6:
+YY_RULE_SETUP
+#line 58 "jsp_count.l"
+{saw_char = 1;}
+	YY_BREAK
+case 7:
+YY_RULE_SETUP
+#line 59 "jsp_count.l"
+{saw_char = 1;}
+	YY_BREAK
+case 8:
+YY_RULE_SETUP
+#line 62 "jsp_count.l"
+{ BEGIN(INITIAL); }
+	YY_BREAK
+case 9:
+YY_RULE_SETUP
+#line 63 "jsp_count.l"
+{ count(); }
+	YY_BREAK
+case 10:
+YY_RULE_SETUP
+#line 64 "jsp_count.l"
+/* no-op */
+	YY_BREAK
+case 11:
+YY_RULE_SETUP
+#line 66 "jsp_count.l"
+{ BEGIN(INITIAL); }
+	YY_BREAK
+case 12:
+YY_RULE_SETUP
+#line 67 "jsp_count.l"
+{ count(); }
+	YY_BREAK
+case 13:
+YY_RULE_SETUP
+#line 68 "jsp_count.l"
+/* no-op */
+	YY_BREAK
+case 14:
+YY_RULE_SETUP
+#line 70 "jsp_count.l"
+{saw_char = 1;}
+	YY_BREAK
+case 15:
+YY_RULE_SETUP
+#line 71 "jsp_count.l"
+{
+		 	fprintf(stderr, "Warning: newline in string - file %s, line %ld\n",
+	                 filename, line_number);
+			count();
+			BEGIN(INITIAL); /* Switch back; this at least limits damage */
+		 }
+	YY_BREAK
+case 16:
+YY_RULE_SETUP
+#line 77 "jsp_count.l"
+{ BEGIN(INITIAL);}
+	YY_BREAK
+case 17:
+YY_RULE_SETUP
+#line 79 "jsp_count.l"
+ECHO;
+	YY_BREAK
+case YY_STATE_EOF(INITIAL):
+case YY_STATE_EOF(chtml):
+case YY_STATE_EOF(cjsp):
+case YY_STATE_EOF(string):
+	yyterminate();
+
+	case YY_END_OF_BUFFER:
+		{
+		/* Amount of text matched not including the EOB char. */
+		int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1;
+
+		/* Undo the effects of YY_DO_BEFORE_ACTION. */
+		*yy_cp = yy_hold_char;
+		YY_RESTORE_YY_MORE_OFFSET
+
+		if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW )
+			{
+			/* We're scanning a new file or input source.  It's
+			 * possible that this happened because the user
+			 * just pointed yyin at a new source and called
+			 * yylex().  If so, then we have to assure
+			 * consistency between yy_current_buffer and our
+			 * globals.  Here is the right place to do so, because
+			 * this is the first action (other than possibly a
+			 * back-up) that will match for the new input source.
+			 */
+			yy_n_chars = yy_current_buffer->yy_n_chars;
+			yy_current_buffer->yy_input_file = yyin;
+			yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL;
+			}
+
+		/* Note that here we test for yy_c_buf_p "<=" to the position
+		 * of the first EOB in the buffer, since yy_c_buf_p will
+		 * already have been incremented past the NUL character
+		 * (since all states make transitions on EOB to the
+		 * end-of-buffer state).  Contrast this with the test
+		 * in input().
+		 */
+		if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] )
+			{ /* This was really a NUL. */
+			yy_state_type yy_next_state;
+
+			yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text;
+
+			yy_current_state = yy_get_previous_state();
+
+			/* Okay, we're now positioned to make the NUL
+			 * transition.  We couldn't have
+			 * yy_get_previous_state() go ahead and do it
+			 * for us because it doesn't know how to deal
+			 * with the possibility of jamming (and we don't
+			 * want to build jamming into it because then it
+			 * will run more slowly).
+			 */
+
+			yy_next_state = yy_try_NUL_trans( yy_current_state );
+
+			yy_bp = yytext_ptr + YY_MORE_ADJ;
+
+			if ( yy_next_state )
+				{
+				/* Consume the NUL. */
+				yy_cp = ++yy_c_buf_p;
+				yy_current_state = yy_next_state;
+				goto yy_match;
+				}
+
+			else
+				{
+				yy_cp = yy_c_buf_p;
+				goto yy_find_action;
+				}
+			}
+
+		else switch ( yy_get_next_buffer() )
+			{
+			case EOB_ACT_END_OF_FILE:
+				{
+				yy_did_buffer_switch_on_eof = 0;
+
+				if ( yywrap() )
+					{
+					/* Note: because we've taken care in
+					 * yy_get_next_buffer() to have set up
+					 * yytext, we can now set up
+					 * yy_c_buf_p so that if some total
+					 * hoser (like flex itself) wants to
+					 * call the scanner after we return the
+					 * YY_NULL, it'll still work - another
+					 * YY_NULL will get returned.
+					 */
+					yy_c_buf_p = yytext_ptr + YY_MORE_ADJ;
+
+					yy_act = YY_STATE_EOF(YY_START);
+					goto do_action;
+					}
+
+				else
+					{
+					if ( ! yy_did_buffer_switch_on_eof )
+						YY_NEW_FILE;
+					}
+				break;
+				}
+
+			case EOB_ACT_CONTINUE_SCAN:
+				yy_c_buf_p =
+					yytext_ptr + yy_amount_of_matched_text;
+
+				yy_current_state = yy_get_previous_state();
+
+				yy_cp = yy_c_buf_p;
+				yy_bp = yytext_ptr + YY_MORE_ADJ;
+				goto yy_match;
+
+			case EOB_ACT_LAST_MATCH:
+				yy_c_buf_p =
+				&yy_current_buffer->yy_ch_buf[yy_n_chars];
+
+				yy_current_state = yy_get_previous_state();
+
+				yy_cp = yy_c_buf_p;
+				yy_bp = yytext_ptr + YY_MORE_ADJ;
+				goto yy_find_action;
+			}
+		break;
+		}
+
+	default:
+		YY_FATAL_ERROR(
+			"fatal flex scanner internal error--no action found" );
+	} /* end of action switch */
+		} /* end of scanning one token */
+	} /* end of yylex */
+
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ *	EOB_ACT_LAST_MATCH -
+ *	EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ *	EOB_ACT_END_OF_FILE - end of file
+ */
+
+static int yy_get_next_buffer()
+	{
+	register char *dest = yy_current_buffer->yy_ch_buf;
+	register char *source = yytext_ptr;
+	register int number_to_move, i;
+	int ret_val;
+
+	if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] )
+		YY_FATAL_ERROR(
+		"fatal flex scanner internal error--end of buffer missed" );
+
+	if ( yy_current_buffer->yy_fill_buffer == 0 )
+		{ /* Don't try to fill the buffer, so this is an EOF. */
+		if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 )
+			{
+			/* We matched a single character, the EOB, so
+			 * treat this as a final EOF.
+			 */
+			return EOB_ACT_END_OF_FILE;
+			}
+
+		else
+			{
+			/* We matched some text prior to the EOB, first
+			 * process it.
+			 */
+			return EOB_ACT_LAST_MATCH;
+			}
+		}
+
+	/* Try to read more data. */
+
+	/* First move last chars to start of buffer. */
+	number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1;
+
+	for ( i = 0; i < number_to_move; ++i )
+		*(dest++) = *(source++);
+
+	if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+		/* don't do the read, it's not guaranteed to return an EOF,
+		 * just force an EOF
+		 */
+		yy_current_buffer->yy_n_chars = yy_n_chars = 0;
+
+	else
+		{
+		int num_to_read =
+			yy_current_buffer->yy_buf_size - number_to_move - 1;
+
+		while ( num_to_read <= 0 )
+			{ /* Not enough room in the buffer - grow it. */
+#ifdef YY_USES_REJECT
+			YY_FATAL_ERROR(
+"input buffer overflow, can't enlarge buffer because scanner uses REJECT" );
+#else
+
+			/* just a shorter name for the current buffer */
+			YY_BUFFER_STATE b = yy_current_buffer;
+
+			int yy_c_buf_p_offset =
+				(int) (yy_c_buf_p - b->yy_ch_buf);
+
+			if ( b->yy_is_our_buffer )
+				{
+				int new_size = b->yy_buf_size * 2;
+
+				if ( new_size <= 0 )
+					b->yy_buf_size += b->yy_buf_size / 8;
+				else
+					b->yy_buf_size *= 2;
+
+				b->yy_ch_buf = (char *)
+					/* Include room in for 2 EOB chars. */
+					yy_flex_realloc( (void *) b->yy_ch_buf,
+							 b->yy_buf_size + 2 );
+				}
+			else
+				/* Can't grow it, we don't own it. */
+				b->yy_ch_buf = 0;
+
+			if ( ! b->yy_ch_buf )
+				YY_FATAL_ERROR(
+				"fatal error - scanner input buffer overflow" );
+
+			yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+			num_to_read = yy_current_buffer->yy_buf_size -
+						number_to_move - 1;
+#endif
+			}
+
+		if ( num_to_read > YY_READ_BUF_SIZE )
+			num_to_read = YY_READ_BUF_SIZE;
+
+		/* Read in more data. */
+		YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]),
+			yy_n_chars, num_to_read );
+
+		yy_current_buffer->yy_n_chars = yy_n_chars;
+		}
+
+	if ( yy_n_chars == 0 )
+		{
+		if ( number_to_move == YY_MORE_ADJ )
+			{
+			ret_val = EOB_ACT_END_OF_FILE;
+			yyrestart( yyin );
+			}
+
+		else
+			{
+			ret_val = EOB_ACT_LAST_MATCH;
+			yy_current_buffer->yy_buffer_status =
+				YY_BUFFER_EOF_PENDING;
+			}
+		}
+
+	else
+		ret_val = EOB_ACT_CONTINUE_SCAN;
+
+	yy_n_chars += number_to_move;
+	yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR;
+	yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
+
+	yytext_ptr = &yy_current_buffer->yy_ch_buf[0];
+
+	return ret_val;
+	}
+
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+static yy_state_type yy_get_previous_state()
+	{
+	register yy_state_type yy_current_state;
+	register char *yy_cp;
+
+	yy_current_state = yy_start;
+
+	for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp )
+		{
+		yy_current_state = yy_nxt[yy_current_state][(*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1)];
+		if ( yy_accept[yy_current_state] )
+			{
+			yy_last_accepting_state = yy_current_state;
+			yy_last_accepting_cpos = yy_cp;
+			}
+		}
+
+	return yy_current_state;
+	}
+
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ *	next_state = yy_try_NUL_trans( current_state );
+ */
+
+#ifdef YY_USE_PROTOS
+static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state )
+#else
+static yy_state_type yy_try_NUL_trans( yy_current_state )
+yy_state_type yy_current_state;
+#endif
+	{
+	register int yy_is_jam;
+	register char *yy_cp = yy_c_buf_p;
+
+	yy_current_state = yy_nxt[yy_current_state][1];
+	yy_is_jam = (yy_current_state <= 0);
+
+	if ( ! yy_is_jam )
+		{
+		if ( yy_accept[yy_current_state] )
+			{
+			yy_last_accepting_state = yy_current_state;
+			yy_last_accepting_cpos = yy_cp;
+			}
+		}
+
+	return yy_is_jam ? 0 : yy_current_state;
+	}
+
+
+#ifndef YY_NO_UNPUT
+#ifdef YY_USE_PROTOS
+static void yyunput( int c, register char *yy_bp )
+#else
+static void yyunput( c, yy_bp )
+int c;
+register char *yy_bp;
+#endif
+	{
+	register char *yy_cp = yy_c_buf_p;
+
+	/* undo effects of setting up yytext */
+	*yy_cp = yy_hold_char;
+
+	if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
+		{ /* need to shift things up to make room */
+		/* +2 for EOB chars. */
+		register int number_to_move = yy_n_chars + 2;
+		register char *dest = &yy_current_buffer->yy_ch_buf[
+					yy_current_buffer->yy_buf_size + 2];
+		register char *source =
+				&yy_current_buffer->yy_ch_buf[number_to_move];
+
+		while ( source > yy_current_buffer->yy_ch_buf )
+			*--dest = *--source;
+
+		yy_cp += (int) (dest - source);
+		yy_bp += (int) (dest - source);
+		yy_current_buffer->yy_n_chars =
+			yy_n_chars = yy_current_buffer->yy_buf_size;
+
+		if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
+			YY_FATAL_ERROR( "flex scanner push-back overflow" );
+		}
+
+	*--yy_cp = (char) c;
+
+
+	yytext_ptr = yy_bp;
+	yy_hold_char = *yy_cp;
+	yy_c_buf_p = yy_cp;
+	}
+#endif	/* ifndef YY_NO_UNPUT */
+
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+static int yyinput()
+#else
+static int input()
+#endif
+	{
+	int c;
+
+	*yy_c_buf_p = yy_hold_char;
+
+	if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
+		{
+		/* yy_c_buf_p now points to the character we want to return.
+		 * If this occurs *before* the EOB characters, then it's a
+		 * valid NUL; if not, then we've hit the end of the buffer.
+		 */
+		if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
+			/* This was really a NUL. */
+			*yy_c_buf_p = '\0';
+
+		else
+			{ /* need more input */
+			int offset = yy_c_buf_p - yytext_ptr;
+			++yy_c_buf_p;
+
+			switch ( yy_get_next_buffer() )
+				{
+				case EOB_ACT_LAST_MATCH:
+					/* This happens because yy_g_n_b()
+					 * sees that we've accumulated a
+					 * token and flags that we need to
+					 * try matching the token before
+					 * proceeding.  But for input(),
+					 * there's no matching to consider.
+					 * So convert the EOB_ACT_LAST_MATCH
+					 * to EOB_ACT_END_OF_FILE.
+					 */
+
+					/* Reset buffer status. */
+					yyrestart( yyin );
+
+					/* fall through */
+
+				case EOB_ACT_END_OF_FILE:
+					{
+					if ( yywrap() )
+						return EOF;
+
+					if ( ! yy_did_buffer_switch_on_eof )
+						YY_NEW_FILE;
+#ifdef __cplusplus
+					return yyinput();
+#else
+					return input();
+#endif
+					}
+
+				case EOB_ACT_CONTINUE_SCAN:
+					yy_c_buf_p = yytext_ptr + offset;
+					break;
+				}
+			}
+		}
+
+	c = *(unsigned char *) yy_c_buf_p;	/* cast for 8-bit char's */
+	*yy_c_buf_p = '\0';	/* preserve yytext */
+	yy_hold_char = *++yy_c_buf_p;
+
+
+	return c;
+	}
+#endif /* YY_NO_INPUT */
+
+#ifdef YY_USE_PROTOS
+void yyrestart( FILE *input_file )
+#else
+void yyrestart( input_file )
+FILE *input_file;
+#endif
+	{
+	if ( ! yy_current_buffer )
+		yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE );
+
+	yy_init_buffer( yy_current_buffer, input_file );
+	yy_load_buffer_state();
+	}
+
+
+#ifdef YY_USE_PROTOS
+void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer )
+#else
+void yy_switch_to_buffer( new_buffer )
+YY_BUFFER_STATE new_buffer;
+#endif
+	{
+	if ( yy_current_buffer == new_buffer )
+		return;
+
+	if ( yy_current_buffer )
+		{
+		/* Flush out information for old buffer. */
+		*yy_c_buf_p = yy_hold_char;
+		yy_current_buffer->yy_buf_pos = yy_c_buf_p;
+		yy_current_buffer->yy_n_chars = yy_n_chars;
+		}
+
+	yy_current_buffer = new_buffer;
+	yy_load_buffer_state();
+
+	/* We don't actually know whether we did this switch during
+	 * EOF (yywrap()) processing, but the only time this flag
+	 * is looked at is after yywrap() is called, so it's safe
+	 * to go ahead and always set it.
+	 */
+	yy_did_buffer_switch_on_eof = 1;
+	}
+
+
+#ifdef YY_USE_PROTOS
+void yy_load_buffer_state( void )
+#else
+void yy_load_buffer_state()
+#endif
+	{
+	yy_n_chars = yy_current_buffer->yy_n_chars;
+	yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos;
+	yyin = yy_current_buffer->yy_input_file;
+	yy_hold_char = *yy_c_buf_p;
+	}
+
+
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_create_buffer( FILE *file, int size )
+#else
+YY_BUFFER_STATE yy_create_buffer( file, size )
+FILE *file;
+int size;
+#endif
+	{
+	YY_BUFFER_STATE b;
+
+	b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
+	if ( ! b )
+		YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+	b->yy_buf_size = size;
+
+	/* yy_ch_buf has to be 2 characters longer than the size given because
+	 * we need to put in 2 end-of-buffer characters.
+	 */
+	b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 );
+	if ( ! b->yy_ch_buf )
+		YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+	b->yy_is_our_buffer = 1;
+
+	yy_init_buffer( b, file );
+
+	return b;
+	}
+
+
+#ifdef YY_USE_PROTOS
+void yy_delete_buffer( YY_BUFFER_STATE b )
+#else
+void yy_delete_buffer( b )
+YY_BUFFER_STATE b;
+#endif
+	{
+	if ( ! b )
+		return;
+
+	if ( b == yy_current_buffer )
+		yy_current_buffer = (YY_BUFFER_STATE) 0;
+
+	if ( b->yy_is_our_buffer )
+		yy_flex_free( (void *) b->yy_ch_buf );
+
+	yy_flex_free( (void *) b );
+	}
+
+
+
+#ifdef YY_USE_PROTOS
+void yy_init_buffer( YY_BUFFER_STATE b, FILE *file )
+#else
+void yy_init_buffer( b, file )
+YY_BUFFER_STATE b;
+FILE *file;
+#endif
+
+
+	{
+	yy_flush_buffer( b );
+
+	b->yy_input_file = file;
+	b->yy_fill_buffer = 1;
+
+#if YY_ALWAYS_INTERACTIVE
+	b->yy_is_interactive = 1;
+#else
+#if YY_NEVER_INTERACTIVE
+	b->yy_is_interactive = 0;
+#else
+	b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0;
+#endif
+#endif
+	}
+
+
+#ifdef YY_USE_PROTOS
+void yy_flush_buffer( YY_BUFFER_STATE b )
+#else
+void yy_flush_buffer( b )
+YY_BUFFER_STATE b;
+#endif
+
+	{
+	if ( ! b )
+		return;
+
+	b->yy_n_chars = 0;
+
+	/* We always need two end-of-buffer characters.  The first causes
+	 * a transition to the end-of-buffer state.  The second causes
+	 * a jam in that state.
+	 */
+	b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+	b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+
+	b->yy_buf_pos = &b->yy_ch_buf[0];
+
+	b->yy_at_bol = 1;
+	b->yy_buffer_status = YY_BUFFER_NEW;
+
+	if ( b == yy_current_buffer )
+		yy_load_buffer_state();
+	}
+
+
+#ifndef YY_NO_SCAN_BUFFER
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size )
+#else
+YY_BUFFER_STATE yy_scan_buffer( base, size )
+char *base;
+yy_size_t size;
+#endif
+	{
+	YY_BUFFER_STATE b;
+
+	if ( size < 2 ||
+	     base[size-2] != YY_END_OF_BUFFER_CHAR ||
+	     base[size-1] != YY_END_OF_BUFFER_CHAR )
+		/* They forgot to leave room for the EOB's. */
+		return 0;
+
+	b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
+	if ( ! b )
+		YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" );
+
+	b->yy_buf_size = size - 2;	/* "- 2" to take care of EOB's */
+	b->yy_buf_pos = b->yy_ch_buf = base;
+	b->yy_is_our_buffer = 0;
+	b->yy_input_file = 0;
+	b->yy_n_chars = b->yy_buf_size;
+	b->yy_is_interactive = 0;
+	b->yy_at_bol = 1;
+	b->yy_fill_buffer = 0;
+	b->yy_buffer_status = YY_BUFFER_NEW;
+
+	yy_switch_to_buffer( b );
+
+	return b;
+	}
+#endif
+
+
+#ifndef YY_NO_SCAN_STRING
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str )
+#else
+YY_BUFFER_STATE yy_scan_string( yy_str )
+yyconst char *yy_str;
+#endif
+	{
+	int len;
+	for ( len = 0; yy_str[len]; ++len )
+		;
+
+	return yy_scan_bytes( yy_str, len );
+	}
+#endif
+
+
+#ifndef YY_NO_SCAN_BYTES
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len )
+#else
+YY_BUFFER_STATE yy_scan_bytes( bytes, len )
+yyconst char *bytes;
+int len;
+#endif
+	{
+	YY_BUFFER_STATE b;
+	char *buf;
+	yy_size_t n;
+	int i;
+
+	/* Get memory for full buffer, including space for trailing EOB's. */
+	n = len + 2;
+	buf = (char *) yy_flex_alloc( n );
+	if ( ! buf )
+		YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" );
+
+	for ( i = 0; i < len; ++i )
+		buf[i] = bytes[i];
+
+	buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR;
+
+	b = yy_scan_buffer( buf, n );
+	if ( ! b )
+		YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" );
+
+	/* It's okay to grow etc. this buffer, and we should throw it
+	 * away when we're done.
+	 */
+	b->yy_is_our_buffer = 1;
+
+	return b;
+	}
+#endif
+
+
+#ifndef YY_NO_PUSH_STATE
+#ifdef YY_USE_PROTOS
+static void yy_push_state( int new_state )
+#else
+static void yy_push_state( new_state )
+int new_state;
+#endif
+	{
+	if ( yy_start_stack_ptr >= yy_start_stack_depth )
+		{
+		yy_size_t new_size;
+
+		yy_start_stack_depth += YY_START_STACK_INCR;
+		new_size = yy_start_stack_depth * sizeof( int );
+
+		if ( ! yy_start_stack )
+			yy_start_stack = (int *) yy_flex_alloc( new_size );
+
+		else
+			yy_start_stack = (int *) yy_flex_realloc(
+					(void *) yy_start_stack, new_size );
+
+		if ( ! yy_start_stack )
+			YY_FATAL_ERROR(
+			"out of memory expanding start-condition stack" );
+		}
+
+	yy_start_stack[yy_start_stack_ptr++] = YY_START;
+
+	BEGIN(new_state);
+	}
+#endif
+
+
+#ifndef YY_NO_POP_STATE
+static void yy_pop_state()
+	{
+	if ( --yy_start_stack_ptr < 0 )
+		YY_FATAL_ERROR( "start-condition stack underflow" );
+
+	BEGIN(yy_start_stack[yy_start_stack_ptr]);
+	}
+#endif
+
+
+#ifndef YY_NO_TOP_STATE
+static int yy_top_state()
+	{
+	return yy_start_stack[yy_start_stack_ptr - 1];
+	}
+#endif
+
+#ifndef YY_EXIT_FAILURE
+#define YY_EXIT_FAILURE 2
+#endif
+
+#ifdef YY_USE_PROTOS
+static void yy_fatal_error( yyconst char msg[] )
+#else
+static void yy_fatal_error( msg )
+char msg[];
+#endif
+	{
+	(void) fprintf( stderr, "%s\n", msg );
+	exit( YY_EXIT_FAILURE );
+	}
+
+
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n) \
+	do \
+		{ \
+		/* Undo effects of setting up yytext. */ \
+		yytext[yyleng] = yy_hold_char; \
+		yy_c_buf_p = yytext + n; \
+		yy_hold_char = *yy_c_buf_p; \
+		*yy_c_buf_p = '\0'; \
+		yyleng = n; \
+		} \
+	while ( 0 )
+
+
+/* Internal utility routines. */
+
+#ifndef yytext_ptr
+#ifdef YY_USE_PROTOS
+static void yy_flex_strncpy( char *s1, yyconst char *s2, int n )
+#else
+static void yy_flex_strncpy( s1, s2, n )
+char *s1;
+yyconst char *s2;
+int n;
+#endif
+	{
+	register int i;
+	for ( i = 0; i < n; ++i )
+		s1[i] = s2[i];
+	}
+#endif
+
+#ifdef YY_NEED_STRLEN
+#ifdef YY_USE_PROTOS
+static int yy_flex_strlen( yyconst char *s )
+#else
+static int yy_flex_strlen( s )
+yyconst char *s;
+#endif
+	{
+	register int n;
+	for ( n = 0; s[n]; ++n )
+		;
+
+	return n;
+	}
+#endif
+
+
+#ifdef YY_USE_PROTOS
+static void *yy_flex_alloc( yy_size_t size )
+#else
+static void *yy_flex_alloc( size )
+yy_size_t size;
+#endif
+	{
+	return (void *) malloc( size );
+	}
+
+#ifdef YY_USE_PROTOS
+static void *yy_flex_realloc( void *ptr, yy_size_t size )
+#else
+static void *yy_flex_realloc( ptr, size )
+void *ptr;
+yy_size_t size;
+#endif
+	{
+	/* The cast to (char *) in the following accommodates both
+	 * implementations that use char* generic pointers, and those
+	 * that use void* generic pointers.  It works with the latter
+	 * because both ANSI C and C++ allow castless assignment from
+	 * any pointer type to void*, and deal with argument conversions
+	 * as though doing an assignment.
+	 */
+	return (void *) realloc( (char *) ptr, size );
+	}
+
+#ifdef YY_USE_PROTOS
+static void yy_flex_free( void *ptr )
+#else
+static void yy_flex_free( ptr )
+void *ptr;
+#endif
+	{
+	free( ptr );
+	}
+
+#if YY_MAIN
+int main()
+	{
+	yylex();
+	return 0;
+	}
+#endif
+#line 79 "jsp_count.l"
+
+
+#include "driver.c"
+
+static void count(void)
+{
+    if ( saw_char ) {
+        sloc++;
+	saw_char = 0;
+    }
+    line_number++;
+}
diff --git a/jsp_count.l b/jsp_count.l
new file mode 100644
index 0000000..a9ad5d6
--- /dev/null
+++ b/jsp_count.l
@@ -0,0 +1,90 @@
+%{
+
+/*
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler and Bob Brown.
+This is a tweaked version by Bob Brown, derived from
+David A. Wheeler's pascal_count.l.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+Bob Brown's website is: http://www.openeye.com/rlb
+*/
+
+#include "driver.h"
+
+#define YY_NO_UNPUT
+
+/* 1 if we saw a non-comment, non-whitespace char on this line */
+int saw_char = 0;
+static void count(void);
+
+%}
+
+%option noyywrap
+
+SPACE		[ \t\n\r\f]
+
+%x chtml
+%x cjsp
+%x string
+
+%%
+	line_number = 1;
+	saw_char = 0;
+	BEGIN(INITIAL);
+
+[ \t\r\f]	/* Do nothing */
+"<!--"	{ BEGIN(chtml); }
+"<%--"	{ BEGIN(cjsp); }
+\n	{ count(); }
+
+\"	{saw_char = 1; BEGIN(string);}
+
+[^ \t\r\f(\n<"][^<\n"]*	{saw_char = 1;}
+.	{saw_char = 1;}
+
+
+<chtml>"-->"     { BEGIN(INITIAL); }
+<chtml>\n	 { count(); }
+<chtml>.	 /* no-op */
+
+<cjsp>"--%>"	 { BEGIN(INITIAL); }
+<cjsp>\n	 { count(); }
+<cjsp>.		 /* no-op */
+
+<string>[^\"\n]+ {saw_char = 1;}
+<string>\n	 {
+		 	fprintf(stderr, "Warning: newline in string - file %s, line %ld\n",
+	                 filename, line_number);
+			count();
+			BEGIN(INITIAL); /* Switch back; this at least limits damage */
+		 }
+<string>\"		{ BEGIN(INITIAL);}
+
+%%
+
+#include "driver.c"
+
+static void count(void)
+{
+    if ( saw_char ) {
+        sloc++;
+	saw_char = 0;
+    }
+    line_number++;
+}
diff --git a/lex_count b/lex_count
new file mode 100755
index 0000000..f0adfaf
--- /dev/null
+++ b/lex_count
@@ -0,0 +1,70 @@
+#!/usr/bin/perl
+# lex_count
+# Usage: lex_count [-f file] [list_of_files]
+#  file: file with a list of files to count (if "-", read list from stdin)
+#  list_of_files: list of files to count
+#  -f file or list_of_files can be used, or both
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+  # Yes, we have -f
+  if ($ARGV[1] eq "-") {
+    # The list of files is in STDIN
+    while (<STDIN>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+  } else {
+    # The list of files is in the file $ARGV[1]
+    open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+    while (<FILEWITHLIST>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+    close FILEWITHLIST;
+  }
+  shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+  &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+  my ($file) = @_;
+  my $sloc = 0;
+
+  $sloc = `lexcount1 < "$file"`;
+  chomp($sloc);
+  print "$sloc $file\n";
+  $total_sloc += $sloc;
+}
+
diff --git a/lexcount1.c b/lexcount1.c
new file mode 100644
index 0000000..2056b14
--- /dev/null
+++ b/lexcount1.c
@@ -0,0 +1,58 @@
+
+/* lexcount1 - ignore C comments, count all lines with non-whitespace. */
+/* Read from stdin */
+/* Basically, this is enough machinery to count the physical SLOC for
+   a single file using C comments, e.g., lex. */
+/*
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+*/
+
+#include <stdio.h>
+#include <ctype.h>
+
+int peek() {
+ int c = getchar();
+ ungetc(c, stdin);
+ return c;
+}
+
+int main() {
+ int c;
+ int incomment = 0;
+ long sloc = 0;
+ int nonspace = 0;
+
+ while ( (c = getchar()) != EOF) {
+    if (!incomment) {
+      if ((c == '/') && (peek() == '*')) {incomment=1;}
+      else if (!isspace(c)) {nonspace = 1;}
+    } else {
+      if ((c == '*') && (peek() == '/')) {
+           c= getchar(); c=getchar(); incomment=0;
+      }
+    }
+    if ((c == '\n') && nonspace) {sloc++;}
+ }
+ printf("%ld\n", sloc);
+ return 0; /* Report success. */
+}
+
diff --git a/linux_unique b/linux_unique
new file mode 100755
index 0000000..160b9bc
--- /dev/null
+++ b/linux_unique
@@ -0,0 +1,64 @@
+#!/bin/sh
+
+# Show commands as they're executed.
+
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+echo "Per subdir"
+
+print_sum_subset BUILD/linux/Documentation/
+print_sum_subset BUILD/linux/arch/
+print_sum_subset BUILD/linux/configs/
+print_sum_subset BUILD/linux/drivers/
+print_sum_subset BUILD/linux/fs/
+print_sum_subset BUILD/linux/ibcs/
+print_sum_subset BUILD/linux/include/
+print_sum_subset BUILD/linux/init/
+print_sum_subset BUILD/linux/ipc/
+print_sum_subset BUILD/linux/kernel/
+print_sum_subset BUILD/linux/ksymoops-0.7c/
+print_sum_subset BUILD/linux/lib/
+print_sum_subset BUILD/linux/mm/
+print_sum_subset BUILD/linux/net/
+print_sum_subset BUILD/linux/pcmcia-cs-3.1.8/
+print_sum_subset BUILD/linux/scripts/
+
+echo "i386 vs. non-86"
+
+print_sum_subset BUILD/linux/arch/
+print_sum_subset BUILD/linux/arch/i386 
+print_sum_subset linux/drivers/sbus/  
+print_sum_subset linux/drivers/macintosh/
+print_sum_subset linux/drivers/sgi/      
+print_sum_subset linux/drivers/fc4/
+print_sum_subset linux/drivers/nubus/
+print_sum_subset linux/drivers/acorn/
+print_sum_subset linux/drivers/s390/ 
+print_sum_subset linux/Documentation/
+print_sum_subset linux/arch          
+
+
+
+
diff --git a/lisp_count b/lisp_count
new file mode 100755
index 0000000..ee8d8a5
--- /dev/null
+++ b/lisp_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+generic_count ';' $@
+
diff --git a/make_filelists b/make_filelists
new file mode 100755
index 0000000..5440d50
--- /dev/null
+++ b/make_filelists
@@ -0,0 +1,193 @@
+#!/bin/sh
+
+# On the command line, list the source code directories, e.g.:
+#    /usr/src/redhat/BUILD/*
+# This command creates a set of directories paralleling the source code
+# directories, with a file named "filelist" listing all the files.
+
+# This script goes through some trouble to turn all relative references
+# into absolute pathnames, to make sure that the intended files
+# are always referenced. Conceivably the current directory isn't the
+# data directory and the parameters given use relative addressing,
+# and we need to fix all that here.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+
+if [ "$#" -eq 0 ]
+then
+  echo "Error: You must provide a list of directories."
+  exit 1
+fi
+
+
+follow=""
+skip=""
+prefix=""
+startingdir=`pwd`
+datadir=`pwd`
+
+while [ "$#" -gt 0 ]
+do
+ case "$1"
+ in
+  --follow) follow="-follow"
+            shift;;
+  --datadir) shift
+             if [ ! -d "$1" ]
+             then
+               echo "Error: $1 is not a directory"
+               exit 1
+             fi
+             cd "$1"
+             datadir=`pwd`
+             cd "$startingdir"
+             shift;;
+  --skip) shift
+          skip="$1"
+          shift;;
+  --prefix) shift
+          prefix="$1"
+          shift;;
+  --) shift; break;;
+  --*) echo "Error: unrecognized option $1"
+       exit 1
+       shift ;;
+  *) break;;
+ esac
+done
+
+# Non-directories will be placed into the "top_dir" data directory:
+toplevel_name="${prefix}top_dir"
+
+for possible_dir
+do
+ # Reset to starting directory each time, so that relative directory
+ # requests will be processed correctly.
+ cd "$startingdir"
+
+ # Translate "." into the name of current directory.
+ # We have to handle "." and ".." specially, because we can't place
+ # files with these names into the data directory.
+ if [ "$possible_dir" = "." ]
+ then
+   possible_dir=`pwd`
+ fi
+ if [ "$possible_dir" = ".." ]
+ then
+   cd ..
+   possible_dir=`pwd`
+   # Reset current directory.
+   cd "$startingdir"
+ fi
+
+ base=`basename "$possible_dir"`
+ if [ "$base" = "$skip" ]
+ then
+  continue
+ fi
+
+ if [  -d "$possible_dir" ]
+ then
+  # Set "dir" to real name (if possible_dir is a symlink to another
+  # directory, then "dir" and "possible_dir" may have very different values)
+  # depending on how "cd" is implemented on your shell.
+  cd "$possible_dir"
+  dir=`pwd`
+
+  # The child directory's name is derived from possible_dir, not dir --
+  # that way, directories we create will have names based on the supplied
+  # name (potentially a link), not the linked-to directory's name.
+  # Thus, symlinks can be used to disambiguate names where necessary.
+  childname="${prefix}${base}"
+
+  cd "$datadir"
+  if [ -d "$childname" ]
+  then
+    echo "WARNING! Directory $childname pre-existed when adding $possible_dir"
+  else
+    mkdir "$childname"
+  fi
+
+  echo "Creating filelist for $childname"
+  find "$dir" $follow -type f -print > "${childname}/filelist"
+
+  # If it exists, copy the PROGRAM_LICENSE.
+  if [ -s "${dir}/PROGRAM_LICENSE" ]
+  then
+    cp "${dir}/PROGRAM_LICENSE" "${childname}/PROGRAM_LICENSE"
+  fi
+  # If it exists, copy the ORIGINAL_SPEC_FILE
+  if [ -s "${dir}/ORIGINAL_SPEC_FILE" ]
+  then
+    cp "${dir}/ORIGINAL_SPEC_FILE" "${childname}/ORIGINAL_SPEC_FILE"
+  fi
+
+  # Do some error-checking.
+  if [ ! -s "${childname}/filelist" ]
+  then
+   # This is inefficient, but it doesn't matter - it's only used
+   # when we have an empty filelist (which is often an error condition)
+   saw_a_file=n
+   for x in ls "$dir"
+   do
+    saw_a_file=y
+    break
+   done
+   case $saw_a_file
+   in
+    n)
+    echo "Warning: directory ${childname} got no files."
+    echo "You may need to use the --follow option.";;
+   esac
+  fi
+
+ elif [  -f "$possible_dir" ]
+ then
+  # We have a non-directory (regular file, symlink to a file, etc.).
+  # We'll just add an absolute path to it into the toplevel_name directory.
+
+  # First, convert possible_dir into an absolute pathname if necessary:
+  pathname="$possible_dir"
+  case "$pathname"
+  in
+    /*) ;;   # Already absolute pathname - do nothing.
+    *)  pathname="${startingdir}/${possible_dir}" ;;
+  esac
+
+  # Add it to the toplevel_name directory (creating the directory if needed)
+  cd "$datadir"
+  if [ ! -d "$toplevel_name" ]
+  then
+    echo "Have a non-directory at the top, so creating directory $toplevel_name"
+    mkdir "$toplevel_name"
+  fi
+  echo "Adding $pathname to $toplevel_name"
+  echo "$pathname" >> "${toplevel_name}/filelist"
+ else
+  echo "WARNING!!! Not a file nor a directory (so ignored): $possible_dir"
+ fi
+done
+exit 0
+
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..0c029f1
--- /dev/null
+++ b/makefile
@@ -0,0 +1,246 @@
+# Makefile for SLOCCount.
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+
+# My thanks to John Clezy, who provided the modifications to this makefile
+# to make sloccount work on Windows using Cygwin.
+
+# You may need to change the following options to install on your machine:
+
+# Set this for where to store the man pages and executables.
+# If you want to store this as part of an official distribution,
+# change this to "/usr":
+PREFIX=/usr/local
+
+# Set "EXE_SUFFIX" to ".exe" if you're running on Windows, like this:
+# EXE_SUFFIX=.exe
+EXE_SUFFIX=
+
+# Set this to your C compiler, if it's not "gcc"; a likely alternative is "cc".
+# The "-Wall" option turns on warnings in gcc.  gcc users might also want
+# to consider using "-Werror", which turns warnings into errors.
+CC=gcc -Wall
+
+# Set this to the name of your "install" program.  On some systems,
+# "install -C" would be useful (so unchanged files won't be modified),
+# but not all systems support this install option.  "Install" should work
+# for any Unix-like system as well as for Cygwin.
+# "INSTALL_A_DIR" is the command to create a directory in the first place.
+INSTALL=install
+INSTALL_A_DIR=$(INSTALL) -d
+
+# Set this to the name of the program to create RPMs.
+# This works for Red Hat Linux 8.0:
+RPMBUILD=rpmbuild -ba
+# This works for Red Hat Linux 7.X and below:
+# RPMBUILD=rpm -ba
+
+
+# From here on, nothing should need changing unless you're
+# changing the code itself.
+
+# To change the version #, change VERSION here, sloccount.spec,
+# sloccount, and sloccount.html.
+# Note to self: to redistribute, do this:
+# make distribute; su; make rpm; (test as desired);
+# rpm -e sloccount; ^D; make my_install; send to web site.
+
+
+NAME=sloccount
+VERSION=2.26
+RPM_VERSION=1
+ARCH=i386
+VERSIONEDNAME=$(NAME)-$(VERSION)
+INSTALL_DIR=$(PREFIX)/bin
+MAN_DIR=$(PREFIX)/share/man
+MAN_DIR_MAN1=$(MAN_DIR)/man1
+DOC_DIR=$(PREFIX)/share/doc/$(VERSIONEDNAME)-$(RPM_VERSION)
+POSTED_DIR=/home/dwheeler/dwheeler.com/sloccount
+
+COMPILED_EXECUTABLES= \
+   c_count$(EXE_SUFFIX) \
+   java_count$(EXE_SUFFIX) \
+   lexcount1$(EXE_SUFFIX) \
+   pascal_count$(EXE_SUFFIX) \
+   php_count$(EXE_SUFFIX) \
+   jsp_count$(EXE_SUFFIX) \
+   ml_count$(EXE_SUFFIX)
+
+EXECUTABLES= \
+   ada_count \
+   asm_count \
+   awk_count \
+   break_filelist \
+   cobol_count \
+   compute_all \
+   compute_sloc_lang \
+   count_extensions \
+   count_unknown_ext \
+   csh_count \
+   exp_count \
+   fortran_count \
+   f90_count \
+   generic_count \
+   get_sloc \
+   get_sloc_details \
+   haskell_count \
+   lex_count \
+   lisp_count \
+   make_filelists \
+   makefile_count \
+   modula3_count \
+   objc_count \
+   perl_count \
+   print_sum \
+   python_count \
+   ruby_count \
+   sed_count \
+   sh_count \
+   show_filecount \
+   sloccount \
+   sql_count \
+   tcl_count \
+   $(COMPILED_EXECUTABLES)
+
+MANPAGES=sloccount.1.gz
+
+MYDOCS=sloccount.html README TODO ChangeLog
+
+
+all: $(COMPILED_EXECUTABLES)
+
+lexcount1$(EXE_SUFFIX): lexcount1.c
+	$(CC) lexcount1.c -o lexcount1$(EXE_SUFFIX)
+
+c_count$(EXE_SUFFIX): c_count.c
+	$(CC) c_count.c -o c_count$(EXE_SUFFIX)
+
+php_count$(EXE_SUFFIX): php_count.c
+	$(CC) php_count.c -o php_count$(EXE_SUFFIX)
+
+pascal_count.c: pascal_count.l driver.c driver.h
+	flex -Cfe -t pascal_count.l > pascal_count.c
+
+pascal_count$(EXE_SUFFIX): pascal_count.c
+	$(CC) pascal_count.c -o pascal_count$(EXE_SUFFIX)
+
+jsp_count.c: jsp_count.l driver.c driver.h
+	flex -Cfe -t jsp_count.l > jsp_count.c
+
+jsp_count$(EXE_SUFFIX): jsp_count.c
+	$(CC) jsp_count.c -o jsp_count$(EXE_SUFFIX)
+
+ml_count$(EXE_SUFFIX): ml_count.c
+	$(CC) ml_count.c -o ml_count$(EXE_SUFFIX)
+
+sloccount.1.gz: sloccount.1
+	gzip -c sloccount.1 > sloccount.1.gz
+
+# Currently "java_count" is the same as "c_count":
+java_count$(EXE_SUFFIX): c_count$(EXE_SUFFIX)
+	cp -p c_count$(EXE_SUFFIX) java_count$(EXE_SUFFIX)
+
+# This is USC's code counter, not built by default:
+c_lines: C_LINES.C
+	$(CC) C_LINES.C -o c_lines$(EXE_SUFFIX)
+
+
+install_programs: all
+	$(INSTALL) $(EXECUTABLES) $(INSTALL_DIR)
+
+uninstall_programs:
+	cd $(INSTALL_DIR) && rm -f $(EXECUTABLES)
+
+install_man: $(MANPAGES)
+	$(INSTALL_A_DIR) $(MAN_DIR_MAN1)
+	$(INSTALL) $(MANPAGES) $(MAN_DIR_MAN1)
+
+uninstall_man:
+	cd $(MAN_DIR_MAN1) && rm -f $(MANPAGES)
+
+install_docs: install_man
+	$(INSTALL_A_DIR) $(DOC_DIR)
+	$(INSTALL) $(MYDOCS) $(DOC_DIR)
+
+uninstall_docs:
+	rm -fr $(DOC_DIR)
+
+
+install: install_programs install_man install_docs
+
+uninstall: uninstall_programs uninstall_docs uninstall_man
+
+
+clean:
+	-rm -f $(COMPILED_EXECUTABLES) core sloccount.1.gz
+
+phptest: php_count
+	./php_count *.php
+	./php_count /usr/share/php/*.php
+	./php_count /usr/share/php/*/*.php
+
+# "make distribute" creates the tarball.
+
+
+distribute: clean $(MANPAGES)
+	rm -f sloccount-$(VERSION).tgz
+	rm -f sloccount-$(VERSION).tar.gz
+	mkdir 9temp
+	cp -pr [A-Za-z]* 9temp
+	mv 9temp $(VERSIONEDNAME)
+	rm -f $(VERSIONEDNAME)/*.tgz
+	rm -f $(VERSIONEDNAME)/*.tar.gz
+	rm -f $(VERSIONEDNAME)/*.rpm
+#	rm -f $(VERSIONEDNAME)/*.1.gz
+	rm -f $(VERSIONEDNAME)/C_LINES.C
+	rm -f $(VERSIONEDNAME)/java_lines.c
+	rm -f $(VERSIONEDNAME)/c_lines
+	tar -cvf - $(VERSIONEDNAME)/* | \
+		gzip --best > $(VERSIONEDNAME).tar.gz
+	chown --reference=README $(VERSIONEDNAME).tar.gz
+	chmod a+rX *
+	rm -fr $(VERSIONEDNAME)
+
+my_install: distribute
+	chmod a+rX *
+	cp -p sloccount-$(VERSION).tar.gz $(POSTED_DIR)
+	cp -p sloccount.html $(POSTED_DIR)
+	cp -p ChangeLog $(POSTED_DIR)
+	cp -p TODO $(POSTED_DIR)
+	cp -p /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm $(POSTED_DIR)
+	cp -p /usr/src/redhat/SRPMS/$(VERSIONEDNAME)-$(RPM_VERSION)*.src.rpm $(POSTED_DIR)
+
+rpm: distribute
+	cp $(VERSIONEDNAME).tar.gz /usr/src/redhat/SOURCES
+	cp sloccount.spec /usr/src/redhat/SPECS
+	cd /usr/src/redhat/SPECS
+	# Uninstall current sloccount if any; ignore errors if not installed.
+	-rpm -e sloccount
+	$(RPMBUILD) sloccount.spec
+	chmod a+r /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm
+	chmod a+r /usr/src/redhat/SRPMS/$(VERSIONEDNAME)-$(RPM_VERSION)*.src.rpm
+	rpm -ivh /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm
+	echo "Use rpm -e $(NAME) to remove the package"
+
+test: all
+	PATH=.:${PATH}; sloccount testcode
+
+
diff --git a/makefile.orig b/makefile.orig
new file mode 100644
index 0000000..e2753d8
--- /dev/null
+++ b/makefile.orig
@@ -0,0 +1,222 @@
+# Makefile for SLOCCount.
+# (C) Copyright David A. Wheeler 2000-2002.
+# Licensed under the GPL; see sloccount.html for license information.
+# My thanks to John Clezy, who provided the modifications to this makefile
+# to make sloccount work on Windows using Cygwin.
+
+
+# You may need to change the following options to install on your machine:
+
+# Set this for where to store the man pages and executables.
+# If you want to store this as part of an official distribution,
+# change this to "/usr":
+PREFIX=/usr/local
+
+# Set "EXE_SUFFIX" to ".exe" if you're running on Windows, like this:
+# EXE_SUFFIX=.exe
+EXE_SUFFIX=
+
+# Set this to your C compiler, if it's not "gcc"; a likely alternative is "cc".
+# The "-Wall" option turns on warnings in gcc.  gcc users might also want
+# to consider using "-Werror", which turns warnings into errors.
+CC=gcc -Wall
+
+# Set this to the name of your "install" program.  On some systems,
+# "install -C" would be useful (so unchanged files won't be modified),
+# but not all systems support this install option.  "Install" should work
+# for any Unix-like system as well as for Cygwin.
+# "INSTALL_A_DIR" is the command to create a directory in the first place.
+INSTALL=install
+INSTALL_A_DIR=$(INSTALL) -d
+
+# Set this to the name of the program to create RPMs.
+# This works for Red Hat Linux 8.0:
+RPMBUILD=rpmbuild -ba
+# This works for Red Hat Linux 7.X and below:
+# RPMBUILD=rpm -ba
+
+
+# From here on, nothing should need changing unless you're
+# changing the code itself.
+
+# To change the version #, change VERSION here, sloccount.spec, and
+# sloccount.html.
+# Note to self: to redistribute, do this:
+# make distribute; su; make rpm; (test as desired);
+# rpm -e sloccount; ^D; make my_install; send to web site.
+
+
+NAME=sloccount
+VERSION=2.20
+RPM_VERSION=1
+ARCH=i386
+VERSIONEDNAME=$(NAME)-$(VERSION)
+INSTALL_DIR=$(PREFIX)/bin
+MAN_DIR=$(PREFIX)/share/man
+MAN_DIR_MAN1=$(MAN_DIR)/man1
+DOC_DIR=$(PREFIX)/share/doc/$(VERSIONEDNAME)-$(RPM_VERSION)
+POSTED_DIR=/home/dwheeler/dwheeler.com/sloccount
+
+COMPILED_EXECUTABLES= \
+   c_count$(EXE_SUFFIX) \
+   java_count$(EXE_SUFFIX) \
+   lexcount1$(EXE_SUFFIX) \
+   pascal_count$(EXE_SUFFIX) \
+   php_count$(EXE_SUFFIX) \
+   jsp_count$(EXE_SUFFIX)
+
+EXECUTABLES= \
+   ada_count \
+   asm_count \
+   awk_count \
+   break_filelist \
+   cobol_count \
+   compute_all \
+   compute_sloc_lang \
+   count_extensions \
+   count_unknown_ext \
+   csh_count \
+   exp_count \
+   fortran_count \
+   generic_count \
+   get_sloc \
+   get_sloc_details \
+   haskell_count \
+   lex_count \
+   lisp_count \
+   make_filelists \
+   makefile_count \
+   ml_count \
+   modula3_count \
+   objc_count \
+   perl_count \
+   print_sum \
+   python_count \
+   ruby_count \
+   sed_count \
+   sh_count \
+   show_filecount \
+   sloccount \
+   sql_count \
+   tcl_count \
+   $(COMPILED_EXECUTABLES)
+
+MANPAGES=sloccount.1.gz
+
+MYDOCS=sloccount.html README TODO ChangeLog
+
+
+all: $(COMPILED_EXECUTABLES)
+
+lexcount1$(EXE_SUFFIX): lexcount1.c
+	$(CC) lexcount1.c -o lexcount1$(EXE_SUFFIX)
+
+c_count$(EXE_SUFFIX): c_count.c
+	$(CC) c_count.c -o c_count$(EXE_SUFFIX)
+
+php_count$(EXE_SUFFIX): php_count.c
+	$(CC) php_count.c -o php_count$(EXE_SUFFIX)
+
+pascal_count.c: pascal_count.l driver.c driver.h
+	flex -Cfe -t pascal_count.l > pascal_count.c
+
+pascal_count$(EXE_SUFFIX): pascal_count.c
+	$(CC) pascal_count.c -o pascal_count$(EXE_SUFFIX)
+
+jsp_count.c: jsp_count.l driver.c driver.h
+	flex -Cfe -t jsp_count.l > jsp_count.c
+
+jsp_count$(EXE_SUFFIX): jsp_count.c
+	$(CC) jsp_count.c -o jsp_count$(EXE_SUFFIX)
+
+sloccount.1.gz: sloccount.1
+	gzip -c sloccount.1 > sloccount.1.gz
+
+# Currently "java_count" is the same as "c_count":
+java_count$(EXE_SUFFIX): c_count$(EXE_SUFFIX)
+	cp -p c_count$(EXE_SUFFIX) java_count$(EXE_SUFFIX)
+
+# This is USC's code counter, not built by default:
+c_lines: C_LINES.C
+	$(CC) C_LINES.C -o c_lines$(EXE_SUFFIX)
+
+
+install_programs: all
+	$(INSTALL) $(EXECUTABLES) $(INSTALL_DIR)
+
+uninstall_programs:
+	cd $(INSTALL_DIR) && rm -f $(EXECUTABLES)
+
+install_man:
+	$(INSTALL_A_DIR) $(MAN_DIR_MAN1)
+	$(INSTALL) $(MANPAGES) $(MAN_DIR_MAN1)
+
+uninstall_man:
+	cd $(MAN_DIR_MAN1) && rm -f $(MANPAGES)
+
+install_docs: install_man
+	$(INSTALL_A_DIR) $(DOC_DIR)
+	$(INSTALL) $(MYDOCS) $(DOC_DIR)
+
+uninstall_docs:
+	cd $(DOC_DIR) && rm -f $(MYDOCS) && rmdir $(DOC_DIR)
+
+
+install: install_programs install_man install_docs
+
+uninstall: uninstall_programs uninstall_docs uninstall_man
+
+
+clean:
+	-rm -f $(COMPILED_EXECUTABLES) core sloccount.1.gz
+
+phptest: php_count
+	./php_count *.php
+	./php_count /usr/share/php/*.php
+	./php_count /usr/share/php/*/*.php
+
+# "make distribute" creates the tarball.
+
+
+distribute: clean $(MANPAGES)
+	rm -f sloccount-$(VERSION).tgz
+	rm -f sloccount-$(VERSION).tar.gz
+	mkdir 9temp
+	cp -pr [A-Za-z]* 9temp
+	mv 9temp $(VERSIONEDNAME)
+	rm -f $(VERSIONEDNAME)/*.tgz
+	rm -f $(VERSIONEDNAME)/*.tar.gz
+	rm -f $(VERSIONEDNAME)/*.rpm
+#	rm -f $(VERSIONEDNAME)/*.1.gz
+	rm -f $(VERSIONEDNAME)/C_LINES.C
+	rm -f $(VERSIONEDNAME)/java_lines.c
+	rm -f $(VERSIONEDNAME)/c_lines
+	tar -cvf - $(VERSIONEDNAME)/* | \
+		gzip --best > $(VERSIONEDNAME).tar.gz
+	chown --reference=README $(VERSIONEDNAME).tar.gz
+	chmod a+rX *
+	rm -fr $(VERSIONEDNAME)
+
+my_install: distribute
+	chmod a+rX *
+	cp -p sloccount-$(VERSION).tar.gz $(POSTED_DIR)
+	cp -p sloccount.html $(POSTED_DIR)
+	cp -p ChangeLog $(POSTED_DIR)
+	cp -p TODO $(POSTED_DIR)
+	cp -p /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm $(POSTED_DIR)
+	cp -p /usr/src/redhat/SRPMS/$(VERSIONEDNAME)-$(RPM_VERSION)*.src.rpm $(POSTED_DIR)
+	
+
+rpm: distribute
+	cp $(VERSIONEDNAME).tar.gz /usr/src/redhat/SOURCES
+	cp sloccount.spec /usr/src/redhat/SPECS
+	cd /usr/src/redhat/SPECS
+	# Uninstall current sloccount if any; ignore errors if not installed.
+	-rpm -e sloccount
+	$(RPMBUILD) sloccount.spec
+	chmod a+r /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm
+	chmod a+r /usr/src/redhat/SRPMS/$(VERSIONEDNAME)-$(RPM_VERSION)*.src.rpm
+	rpm -ivh /usr/src/redhat/RPMS/$(ARCH)/$(VERSIONEDNAME)-$(RPM_VERSION)*.rpm
+	echo "Use rpm -e $(NAME) to remove the package"
+
+
diff --git a/makefile_count b/makefile_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/makefile_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+generic_count '#' $@
+
diff --git a/ml_count.c b/ml_count.c
new file mode 100644
index 0000000..dc18f35
--- /dev/null
+++ b/ml_count.c
@@ -0,0 +1,209 @@
+/* ml_count: given a list of ML files on the command line,
+   count the SLOC in each one.  SLOC = physical, non-comment lines.
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler and Michal Moskal
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+Michal Moskal may be contacted at malekith at pld-linux.org.
+
+   Based on c_count.c by:
+   (C) Copyright 2000 David A. Wheeler
+   Michal Moskal rewrote sloc_count() function, to support ML.
+
+   Usage: Use in one of the following ways:
+     ml_count                      # As filter
+     ml_count [-f file] [list_of_files]
+       file: file with a list of files to count (if "-", read list from stdin)
+       list_of_files: list of files to count
+
+   Michal Moskal states "It was easier to get string escaping and comment
+   nesting right in C then in Perl. It would be even easier in OCaml... ;-)"
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+/* Globals */
+long total_sloc;
+
+int peek(FILE *stream) {
+ int c = getc(stream);
+ ungetc(c, stream);
+ return c;
+}
+
+int ispeek(int c, FILE *stream) {
+ if (c == peek(stream)) {return 1;}
+ return 0;
+}
+
+long line_number;
+
+int getachar(FILE *stream) {
+/* Like getchar(), but keep track of line number. */
+ static int last_char_was_newline = 0;
+ int c;
+
+ c = getc(stream); 
+ if (last_char_was_newline) line_number++;
+ if (c == '\n') last_char_was_newline=1;
+ else           last_char_was_newline=0;
+ return c;
+}
+
+
+long sloc_count(char *filename, FILE *stream) {
+ /* Count the sloc in the program in stdin. */
+ long sloc = 0;
+
+ int sawchar = 0;                /* Did you see a character on this line? */
+ int c;
+ 
+ int comment_lev = 0;		/* Level of comment nesting. */
+ int in_string = 0;		/* 0 or 1 */
+ 
+
+ while ((c = getachar(stream)) != EOF) {
+   switch (c) {
+   case '"':
+     in_string = !in_string;
+     break;
+     
+   case '(':
+     if (!in_string && ispeek('*', stream)) {
+       comment_lev++;
+       getachar(stream);	/* skip '*' */
+     }
+     break;
+     
+   case '*':
+     if (comment_lev && !in_string && ispeek(')', stream)) {
+       comment_lev--;
+       getachar(stream);	/* skip ')' */
+       continue /* while */;
+     }
+     break;
+	 
+   case '\\':
+     /* Ignore next character if in string.  But don't ignore newlines. */
+     if (in_string && !ispeek('\n', stream))
+       getachar(stream);
+     break;
+   
+   case ' ':
+   case '\t':
+     /* just ignore blanks */
+     continue /* while */;
+   
+   case '\n':
+     if (sawchar) {
+       sloc++;
+       sawchar = 0;
+     }
+     continue /* while */;
+     
+   default:
+     break;
+   }
+
+   if (comment_lev == 0)
+     sawchar = 1;
+ }
+
+ /* We're done with the file.  Handle EOF-without-EOL. */
+ if (sawchar) sloc++;
+
+ if (comment_lev) {
+     fprintf(stderr, "ml_count ERROR - terminated in comment in %s\n", filename);
+ } else if (in_string) {
+     fprintf(stderr, "ml_count ERROR - terminated in string in %s\n", filename);
+ }
+
+ return sloc;
+}
+
+
+void count_file(char *filename) {
+  long sloc;
+  FILE *stream;
+
+  stream = fopen(filename, "r");
+  line_number = 1;
+  sloc = sloc_count(filename, stream);
+  total_sloc += sloc;
+  printf("%ld %s\n", sloc, filename);
+  fclose(stream);
+}
+
+char *read_a_line(FILE *file) {
+ /* Read a line in, and return a malloc'ed buffer with the line contents.
+    Any newline at the end is stripped.
+    If there's nothing left to read, returns NULL. */
+
+ /* We'll create a monstrously long buffer to make life easy for us: */
+ char buffer[10000];
+ char *returnval;
+ char *newlinepos;
+
+ returnval = fgets(buffer, sizeof(buffer), file);
+ if (returnval) {
+   newlinepos = buffer + strlen(buffer) - 1;
+   if (*newlinepos == '\n') {*newlinepos = '\0';};
+   return strdup(buffer);
+ } else {
+   return NULL;
+ }
+}
+
+
+int main(int argc, char *argv[]) {
+ long sloc;
+ int i;
+ FILE *file_list;
+ char *s;
+
+ total_sloc = 0;
+ line_number = 1;
+
+ if (argc <= 1) {
+   sloc = sloc_count("-", stdin);
+   printf("%ld %s\n", sloc, "-");
+   total_sloc += sloc;
+ } else if ((argc == 3) && (!strcmp(argv[1], "-f"))) {
+   if (!strcmp (argv[2], "-")) {
+     file_list = stdin;
+   } else {
+     file_list = fopen(argv[2], "r");
+   }
+   if (file_list) {
+     while ((s = read_a_line(file_list))) {
+       count_file(s);
+       free(s);
+     }
+   }
+ } else {
+   for (i=1; i < argc; i++) { count_file(argv[i]); }
+ }
+ printf("Total:\n");
+ printf("%ld\n", total_sloc);
+ return 0; /* Report success */
+}
+
diff --git a/modula3_count b/modula3_count
new file mode 100644
index 0000000..fa2921d
--- /dev/null
+++ b/modula3_count
@@ -0,0 +1,65 @@
+#!/usr/bin/perl -w
+# modula3_count - count physical lines of code
+# Strips out (* .. *) and counts the rest.
+# Usage: modula3_count [-f file] [list_of_files]
+#  file: file with a list of files to count (if "-", read list from stdin)
+#  list_of_files: list of files to count
+#  -f file or list_of_files can be used, or both
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+  # Yes, we have -f
+  if ($ARGV[1] eq "-") {
+    # The list of files is in STDIN
+    while (<STDIN>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+  } else {
+    # The list of files is in the file $ARGV[1]
+    open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+    while (<FILEWITHLIST>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+    close FILEWITHLIST;
+  }
+  shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+  &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+  my ($file) = @_;
+  my $sloc = 0;
+  my $incomment = 0;
+
+  open (FILE, $file);
+  while (<FILE>) {
+    if ($incomment) {
+      if (m/\*\)/) { s/^.*?\*\)//;  $incomment = 0;}
+      else { s/.*//; }
+    }
+    if (!$incomment) {
+      s!\(\*.*?\*\)!!g;
+      if (m/\(\*/) {
+        s/\(\*.*//;
+        $incomment = 1;
+      }
+    }
+    if (m/\S/) {$sloc++;}
+  }
+  print "$sloc $file\n";
+  if ($incomment) {print "ERROR: ended in comment in $file\n";}
+  $total_sloc += $sloc;
+  $sloc = 0;
+  $incomment = 0;
+  close (FILE);
+}
diff --git a/objc_count b/objc_count
new file mode 100755
index 0000000..a74bd5b
--- /dev/null
+++ b/objc_count
@@ -0,0 +1,89 @@
+#!/usr/bin/perl -w
+# objc_count - count physical lines of code
+# Strips out /* .. */ and counts the rest.
+# Usage: objc_count [-f file] [list_of_files]
+#  file: file with a list of files to count (if "-", read list from stdin)
+#  list_of_files: list of files to count
+#  -f file or list_of_files can be used, or both
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+  # Yes, we have -f
+  if ($ARGV[1] eq "-") {
+    # The list of files is in STDIN
+    while (<STDIN>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+  } else {
+    # The list of files is in the file $ARGV[1]
+    open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+    while (<FILEWITHLIST>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+    close FILEWITHLIST;
+  }
+  shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+  &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+  my ($file) = @_;
+  my $sloc = 0;
+  my $incomment = 0;
+
+  open (FILE, $file);
+  while (<FILE>) {
+    if ($incomment) {
+      if (m/\*\//) { s/^.*?\*\///;  $incomment = 0;}
+   else { s/.*//; }
+    }
+    if (!$incomment) {
+      # s/\/\*.*?\*\//g;
+      s!\/\*.*?\*\/!!g;
+      if (m/\/\*/) {
+        s/\/\*.*//;
+        $incomment = 1;
+      }
+    }
+    if (m/\S/) {$sloc++;}
+  }
+  print "$sloc $file\n";
+  if ($incomment) {print "ERROR: ended in comment in $file\n";}
+  $total_sloc += $sloc;
+  $sloc = 0;
+  $incomment = 0;
+  close (FILE);
+}
diff --git a/pascal_count.c b/pascal_count.c
new file mode 100644
index 0000000..e7f870f
--- /dev/null
+++ b/pascal_count.c
@@ -0,0 +1,1714 @@
+/* A lexical scanner generated by flex */
+
+/* Scanner skeleton version:
+ * $Header: /home/daffy/u0/vern/flex/RCS/flex.skl,v 2.91 96/09/10 16:58:48 vern Exp $
+ */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+
+#include <stdio.h>
+#include <unistd.h>
+
+
+/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */
+#ifdef c_plusplus
+#ifndef __cplusplus
+#define __cplusplus
+#endif
+#endif
+
+
+#ifdef __cplusplus
+
+#include <stdlib.h>
+
+/* Use prototypes in function declarations. */
+#define YY_USE_PROTOS
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else	/* ! __cplusplus */
+
+#if __STDC__
+
+#define YY_USE_PROTOS
+#define YY_USE_CONST
+
+#endif	/* __STDC__ */
+#endif	/* ! __cplusplus */
+
+#ifdef __TURBOC__
+ #pragma warn -rch
+ #pragma warn -use
+#include <io.h>
+#include <stdlib.h>
+#define YY_USE_CONST
+#define YY_USE_PROTOS
+#endif
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+
+#ifdef YY_USE_PROTOS
+#define YY_PROTO(proto) proto
+#else
+#define YY_PROTO(proto) ()
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an unsigned
+ * integer for use as an array index.  If the signed char is negative,
+ * we want to instead treat it as an 8-bit unsigned char, hence the
+ * double cast.
+ */
+#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
+
+/* Enter a start condition.  This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN yy_start = 1 + 2 *
+
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state.  The YYSTATE alias is for lex
+ * compatibility.
+ */
+#define YY_START ((yy_start - 1) / 2)
+#define YYSTATE YY_START
+
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+
+/* Special action meaning "start processing a new file". */
+#define YY_NEW_FILE yyrestart( yyin )
+
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#define YY_BUF_SIZE 16384
+
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+
+extern int yyleng;
+extern FILE *yyin, *yyout;
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+/* The funky do-while in the following #define is used to turn the definition
+ * int a single C statement (which needs a semi-colon terminator).  This
+ * avoids problems with code like:
+ *
+ * 	if ( condition_holds )
+ *		yyless( 5 );
+ *	else
+ *		do_something_else();
+ *
+ * Prior to using the do-while the compiler would get upset at the
+ * "else" because it interpreted the "if" statement as being all
+ * done when it reached the ';' after the yyless() call.
+ */
+
+/* Return all but the first 'n' matched characters back to the input stream. */
+
+#define yyless(n) \
+	do \
+		{ \
+		/* Undo effects of setting up yytext. */ \
+		*yy_cp = yy_hold_char; \
+		YY_RESTORE_YY_MORE_OFFSET \
+		yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \
+		YY_DO_BEFORE_ACTION; /* set up yytext again */ \
+		} \
+	while ( 0 )
+
+#define unput(c) yyunput( c, yytext_ptr )
+
+/* The following is because we cannot portably get our hands on size_t
+ * (without autoconf's help, which isn't available because we want
+ * flex-generated scanners to compile on their own).
+ */
+typedef unsigned int yy_size_t;
+
+
+struct yy_buffer_state
+	{
+	FILE *yy_input_file;
+
+	char *yy_ch_buf;		/* input buffer */
+	char *yy_buf_pos;		/* current position in input buffer */
+
+	/* Size of input buffer in bytes, not including room for EOB
+	 * characters.
+	 */
+	yy_size_t yy_buf_size;
+
+	/* Number of characters read into yy_ch_buf, not including EOB
+	 * characters.
+	 */
+	int yy_n_chars;
+
+	/* Whether we "own" the buffer - i.e., we know we created it,
+	 * and can realloc() it to grow it, and should free() it to
+	 * delete it.
+	 */
+	int yy_is_our_buffer;
+
+	/* Whether this is an "interactive" input source; if so, and
+	 * if we're using stdio for input, then we want to use getc()
+	 * instead of fread(), to make sure we stop fetching input after
+	 * each newline.
+	 */
+	int yy_is_interactive;
+
+	/* Whether we're considered to be at the beginning of a line.
+	 * If so, '^' rules will be active on the next match, otherwise
+	 * not.
+	 */
+	int yy_at_bol;
+
+	/* Whether to try to fill the input buffer when we reach the
+	 * end of it.
+	 */
+	int yy_fill_buffer;
+
+	int yy_buffer_status;
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+	/* When an EOF's been seen but there's still some text to process
+	 * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+	 * shouldn't try reading from the input source any more.  We might
+	 * still have a bunch of tokens to match, though, because of
+	 * possible backing-up.
+	 *
+	 * When we actually see the EOF, we change the status to "new"
+	 * (via yyrestart()), so that the user can continue scanning by
+	 * just pointing yyin at a new input file.
+	 */
+#define YY_BUFFER_EOF_PENDING 2
+	};
+
+static YY_BUFFER_STATE yy_current_buffer = 0;
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ */
+#define YY_CURRENT_BUFFER yy_current_buffer
+
+
+/* yy_hold_char holds the character lost when yytext is formed. */
+static char yy_hold_char;
+
+static int yy_n_chars;		/* number of characters read into yy_ch_buf */
+
+
+int yyleng;
+
+/* Points to current character in buffer. */
+static char *yy_c_buf_p = (char *) 0;
+static int yy_init = 1;		/* whether we need to initialize */
+static int yy_start = 0;	/* start state number */
+
+/* Flag which is used to allow yywrap()'s to do buffer switches
+ * instead of setting up a fresh yyin.  A bit of a hack ...
+ */
+static int yy_did_buffer_switch_on_eof;
+
+void yyrestart YY_PROTO(( FILE *input_file ));
+
+void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer ));
+void yy_load_buffer_state YY_PROTO(( void ));
+YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size ));
+void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b ));
+void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file ));
+void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b ));
+#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer )
+
+YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size ));
+YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str ));
+YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len ));
+
+static void *yy_flex_alloc YY_PROTO(( yy_size_t ));
+static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t ));
+static void yy_flex_free YY_PROTO(( void * ));
+
+#define yy_new_buffer yy_create_buffer
+
+#define yy_set_interactive(is_interactive) \
+	{ \
+	if ( ! yy_current_buffer ) \
+		yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
+	yy_current_buffer->yy_is_interactive = is_interactive; \
+	}
+
+#define yy_set_bol(at_bol) \
+	{ \
+	if ( ! yy_current_buffer ) \
+		yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
+	yy_current_buffer->yy_at_bol = at_bol; \
+	}
+
+#define YY_AT_BOL() (yy_current_buffer->yy_at_bol)
+
+
+#define yywrap() 1
+#define YY_SKIP_YYWRAP
+typedef unsigned char YY_CHAR;
+FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
+typedef int yy_state_type;
+extern char *yytext;
+#define yytext_ptr yytext
+static yyconst short yy_nxt[][10] =
+    {
+    {
+        0,    0,    0,    0,    0,    0,    0,    0,    0,    0
+    },
+
+    {
+        9,   10,   11,   12,   13,   14,   10,   10,   15,   10
+    },
+
+    {
+        9,   10,   11,   12,   13,   14,   10,   10,   15,   10
+    },
+
+    {
+        9,   16,   16,   17,   16,   16,   16,   18,   16,   16
+    },
+
+    {
+        9,   16,   16,   17,   16,   16,   16,   18,   16,   16
+    },
+
+    {
+        9,   19,   19,   20,   19,   19,   19,   19,   19,   21
+    },
+
+    {
+        9,   19,   19,   20,   19,   19,   19,   19,   19,   21
+    },
+
+    {
+        9,   22,   22,   23,   24,   22,   22,   22,   22,   22
+    },
+
+    {
+        9,   22,   22,   23,   24,   22,   22,   22,   22,   22
+    },
+
+    {
+       -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9,   -9
+
+    },
+
+    {
+        9,   25,   25,  -10,  -10,  -10,   25,   25,  -10,   25
+    },
+
+    {
+        9,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11,  -11
+    },
+
+    {
+        9,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12,  -12
+    },
+
+    {
+        9,  -13,  -13,  -13,  -13,  -13,  -13,  -13,  -13,  -13
+    },
+
+    {
+        9,  -14,  -14,  -14,  -14,  -14,  -14,   26,  -14,  -14
+    },
+
+    {
+        9,  -15,  -15,  -15,  -15,  -15,  -15,  -15,  -15,  -15
+    },
+
+    {
+        9,   27,   27,   28,   27,   27,   27,  -16,   27,   27
+    },
+
+    {
+        9,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17,  -17
+    },
+
+    {
+        9,   29,   29,   30,   29,   29,   31,   32,   29,   29
+    },
+
+    {
+        9,   33,   33,   34,   33,   33,   33,   33,   33,  -19
+
+    },
+
+    {
+        9,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20,  -20
+    },
+
+    {
+        9,  -21,  -21,  -21,  -21,  -21,  -21,  -21,  -21,  -21
+    },
+
+    {
+        9,   35,   35,  -22,  -22,   35,   35,   35,   35,   35
+    },
+
+    {
+        9,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23,  -23
+    },
+
+    {
+        9,  -24,  -24,  -24,   36,  -24,  -24,  -24,  -24,  -24
+    },
+
+    {
+        9,   25,   25,  -25,  -25,  -25,   25,   25,  -25,   25
+    },
+
+    {
+        9,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26,  -26
+    },
+
+    {
+        9,   27,   27,   28,   27,   27,   27,  -27,   27,   27
+    },
+
+    {
+        9,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28,  -28
+    },
+
+    {
+        9,   29,   29,   30,   29,   29,  -29,  -29,   29,   29
+
+    },
+
+    {
+        9,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30,  -30
+    },
+
+    {
+        9,  -31,  -31,  -31,  -31,  -31,  -31,  -31,  -31,  -31
+    },
+
+    {
+        9,   29,   29,   30,   29,   29,   31,   32,   29,   29
+    },
+
+    {
+        9,   33,   33,   34,   33,   33,   33,   33,   33,  -33
+    },
+
+    {
+        9,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34,  -34
+    },
+
+    {
+        9,   35,   35,  -35,  -35,   35,   35,   35,   35,   35
+    },
+
+    {
+        9,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36,  -36
+    },
+
+    } ;
+
+
+static yy_state_type yy_get_previous_state YY_PROTO(( void ));
+static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state ));
+static int yy_get_next_buffer YY_PROTO(( void ));
+static void yy_fatal_error YY_PROTO(( yyconst char msg[] ));
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up yytext.
+ */
+#define YY_DO_BEFORE_ACTION \
+	yytext_ptr = yy_bp; \
+	yyleng = (int) (yy_cp - yy_bp); \
+	yy_hold_char = *yy_cp; \
+	*yy_cp = '\0'; \
+	yy_c_buf_p = yy_cp;
+
+#define YY_NUM_RULES 20
+#define YY_END_OF_BUFFER 21
+static yyconst short int yy_accept[37] =
+    {   0,
+        0,    0,    0,    0,    0,    0,    0,    0,   21,    6,
+        1,    3,    5,    7,    4,    8,    9,   10,   13,   14,
+       15,   16,   17,   19,    6,    2,    8,    9,   10,   11,
+       12,   10,   13,   14,   16,   18
+    } ;
+
+static yyconst int yy_ec[256] =
+    {   0,
+        1,    1,    1,    1,    1,    1,    1,    1,    2,    3,
+        1,    2,    2,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    2,    1,    1,    1,    1,    1,    1,    4,    5,
+        6,    7,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    8,    1,    9,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
+        1,    1,    1,    1,    1
+    } ;
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+#define YY_RESTORE_YY_MORE_OFFSET
+char *yytext;
+#line 1 "pascal_count.l"
+#define INITIAL 0
+#line 2 "pascal_count.l"
+
+/*
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+*/
+
+#include "driver.h"
+
+#define YY_NO_UNPUT
+
+/* 1 if we saw a non-comment, non-whitespace char on this line */
+int saw_char = 0;
+
+#define comment 1
+
+#define bcomment 2
+
+#define string 3
+
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int yywrap YY_PROTO(( void ));
+#else
+extern int yywrap YY_PROTO(( void ));
+#endif
+#endif
+
+#ifndef YY_NO_UNPUT
+static void yyunput YY_PROTO(( int c, char *buf_ptr ));
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int ));
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen YY_PROTO(( yyconst char * ));
+#endif
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+static int yyinput YY_PROTO(( void ));
+#else
+static int input YY_PROTO(( void ));
+#endif
+#endif
+
+#if YY_STACK_USED
+static int yy_start_stack_ptr = 0;
+static int yy_start_stack_depth = 0;
+static int *yy_start_stack = 0;
+#ifndef YY_NO_PUSH_STATE
+static void yy_push_state YY_PROTO(( int new_state ));
+#endif
+#ifndef YY_NO_POP_STATE
+static void yy_pop_state YY_PROTO(( void ));
+#endif
+#ifndef YY_NO_TOP_STATE
+static int yy_top_state YY_PROTO(( void ));
+#endif
+
+#else
+#define YY_NO_PUSH_STATE 1
+#define YY_NO_POP_STATE 1
+#define YY_NO_TOP_STATE 1
+#endif
+
+#ifdef YY_MALLOC_DECL
+YY_MALLOC_DECL
+#else
+#if __STDC__
+#ifndef __cplusplus
+#include <stdlib.h>
+#endif
+#else
+/* Just try to get by without declaring the routines.  This will fail
+ * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int)
+ * or sizeof(void*) != sizeof(int).
+ */
+#endif
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#define YY_READ_BUF_SIZE 8192
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO (void) fwrite( yytext, yyleng, 1, yyout )
+#endif
+
+/* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf,result,max_size) \
+	if ( yy_current_buffer->yy_is_interactive ) \
+		{ \
+		int c = '*', n; \
+		for ( n = 0; n < max_size && \
+			     (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
+			buf[n] = (char) c; \
+		if ( c == '\n' ) \
+			buf[n++] = (char) c; \
+		if ( c == EOF && ferror( yyin ) ) \
+			YY_FATAL_ERROR( "input in flex scanner failed" ); \
+		result = n; \
+		} \
+	else if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \
+		  && ferror( yyin ) ) \
+		YY_FATAL_ERROR( "input in flex scanner failed" );
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
+#endif
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL int yylex YY_PROTO(( void ))
+#endif
+
+/* Code executed at the beginning of each rule, after yytext and yyleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK break;
+#endif
+
+#define YY_RULE_SETUP \
+	YY_USER_ACTION
+
+YY_DECL
+	{
+	register yy_state_type yy_current_state;
+	register char *yy_cp = NULL, *yy_bp = NULL;
+	register int yy_act;
+
+#line 43 "pascal_count.l"
+
+	line_number = 1;
+	saw_char = 0;
+	BEGIN(INITIAL);
+
+
+	if ( yy_init )
+		{
+		yy_init = 0;
+
+#ifdef YY_USER_INIT
+		YY_USER_INIT;
+#endif
+
+		if ( ! yy_start )
+			yy_start = 1;	/* first start state */
+
+		if ( ! yyin )
+			yyin = stdin;
+
+		if ( ! yyout )
+			yyout = stdout;
+
+		if ( ! yy_current_buffer )
+			yy_current_buffer =
+				yy_create_buffer( yyin, YY_BUF_SIZE );
+
+		yy_load_buffer_state();
+		}
+
+	while ( 1 )		/* loops until end-of-file is reached */
+		{
+		yy_cp = yy_c_buf_p;
+
+		/* Support of yytext. */
+		*yy_cp = yy_hold_char;
+
+		/* yy_bp points to the position in yy_ch_buf of the start of
+		 * the current run.
+		 */
+		yy_bp = yy_cp;
+
+		yy_current_state = yy_start;
+yy_match:
+		while ( (yy_current_state = yy_nxt[yy_current_state][yy_ec[YY_SC_TO_UI(*yy_cp)]]) > 0 )
+			++yy_cp;
+
+		yy_current_state = -yy_current_state;
+
+yy_find_action:
+		yy_act = yy_accept[yy_current_state];
+
+		YY_DO_BEFORE_ACTION;
+
+
+do_action:	/* This label is used only to access EOF actions. */
+
+
+		switch ( yy_act )
+	{ /* beginning of action switch */
+case 1:
+YY_RULE_SETUP
+#line 48 "pascal_count.l"
+/* Do nothing */
+	YY_BREAK
+case 2:
+YY_RULE_SETUP
+#line 49 "pascal_count.l"
+{BEGIN(comment);}
+	YY_BREAK
+case 3:
+YY_RULE_SETUP
+#line 50 "pascal_count.l"
+{if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+	YY_BREAK
+case 4:
+YY_RULE_SETUP
+#line 51 "pascal_count.l"
+{BEGIN(bcomment);}
+	YY_BREAK
+case 5:
+YY_RULE_SETUP
+#line 52 "pascal_count.l"
+{saw_char = 1; BEGIN(string);}
+	YY_BREAK
+case 6:
+YY_RULE_SETUP
+#line 53 "pascal_count.l"
+{saw_char = 1;}
+	YY_BREAK
+case 7:
+YY_RULE_SETUP
+#line 54 "pascal_count.l"
+{saw_char = 1;}
+	YY_BREAK
+case 8:
+YY_RULE_SETUP
+#line 56 "pascal_count.l"
+/* Do nothing */
+	YY_BREAK
+case 9:
+YY_RULE_SETUP
+#line 57 "pascal_count.l"
+{if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+	YY_BREAK
+case 10:
+YY_RULE_SETUP
+#line 58 "pascal_count.l"
+/* Do nothing */
+	YY_BREAK
+case 11:
+YY_RULE_SETUP
+#line 59 "pascal_count.l"
+{if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+	YY_BREAK
+case 12:
+YY_RULE_SETUP
+#line 60 "pascal_count.l"
+{BEGIN(INITIAL);}
+	YY_BREAK
+case 13:
+YY_RULE_SETUP
+#line 62 "pascal_count.l"
+/* Do nothing */
+	YY_BREAK
+case 14:
+YY_RULE_SETUP
+#line 63 "pascal_count.l"
+{if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+	YY_BREAK
+case 15:
+YY_RULE_SETUP
+#line 64 "pascal_count.l"
+{BEGIN(INITIAL);}
+	YY_BREAK
+case 16:
+YY_RULE_SETUP
+#line 66 "pascal_count.l"
+{saw_char = 1;}
+	YY_BREAK
+case 17:
+YY_RULE_SETUP
+#line 67 "pascal_count.l"
+{
+	fprintf(stderr, "Warning: newline in string - file %s, line %ld\n",
+	                 filename, line_number);
+	if (saw_char) {sloc++; saw_char=0;};
+	BEGIN(INITIAL); /* Switch back; this at least limits damage */
+	line_number++;
+	}
+	YY_BREAK
+case 18:
+YY_RULE_SETUP
+#line 74 "pascal_count.l"
+{saw_char = 1;}
+	YY_BREAK
+case 19:
+YY_RULE_SETUP
+#line 75 "pascal_count.l"
+{saw_char = 1; BEGIN(INITIAL);}
+	YY_BREAK
+case 20:
+YY_RULE_SETUP
+#line 77 "pascal_count.l"
+ECHO;
+	YY_BREAK
+			case YY_STATE_EOF(INITIAL):
+			case YY_STATE_EOF(comment):
+			case YY_STATE_EOF(bcomment):
+			case YY_STATE_EOF(string):
+				yyterminate();
+
+	case YY_END_OF_BUFFER:
+		{
+		/* Amount of text matched not including the EOB char. */
+		int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1;
+
+		/* Undo the effects of YY_DO_BEFORE_ACTION. */
+		*yy_cp = yy_hold_char;
+		YY_RESTORE_YY_MORE_OFFSET
+
+		if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW )
+			{
+			/* We're scanning a new file or input source.  It's
+			 * possible that this happened because the user
+			 * just pointed yyin at a new source and called
+			 * yylex().  If so, then we have to assure
+			 * consistency between yy_current_buffer and our
+			 * globals.  Here is the right place to do so, because
+			 * this is the first action (other than possibly a
+			 * back-up) that will match for the new input source.
+			 */
+			yy_n_chars = yy_current_buffer->yy_n_chars;
+			yy_current_buffer->yy_input_file = yyin;
+			yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL;
+			}
+
+		/* Note that here we test for yy_c_buf_p "<=" to the position
+		 * of the first EOB in the buffer, since yy_c_buf_p will
+		 * already have been incremented past the NUL character
+		 * (since all states make transitions on EOB to the
+		 * end-of-buffer state).  Contrast this with the test
+		 * in input().
+		 */
+		if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] )
+			{ /* This was really a NUL. */
+			yy_state_type yy_next_state;
+
+			yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text;
+
+			yy_current_state = yy_get_previous_state();
+
+			/* Okay, we're now positioned to make the NUL
+			 * transition.  We couldn't have
+			 * yy_get_previous_state() go ahead and do it
+			 * for us because it doesn't know how to deal
+			 * with the possibility of jamming (and we don't
+			 * want to build jamming into it because then it
+			 * will run more slowly).
+			 */
+
+			yy_next_state = yy_try_NUL_trans( yy_current_state );
+
+			yy_bp = yytext_ptr + YY_MORE_ADJ;
+
+			if ( yy_next_state )
+				{
+				/* Consume the NUL. */
+				yy_cp = ++yy_c_buf_p;
+				yy_current_state = yy_next_state;
+				goto yy_match;
+				}
+
+			else
+				{
+				yy_cp = yy_c_buf_p;
+				goto yy_find_action;
+				}
+			}
+
+		else switch ( yy_get_next_buffer() )
+			{
+			case EOB_ACT_END_OF_FILE:
+				{
+				yy_did_buffer_switch_on_eof = 0;
+
+				if ( yywrap() )
+					{
+					/* Note: because we've taken care in
+					 * yy_get_next_buffer() to have set up
+					 * yytext, we can now set up
+					 * yy_c_buf_p so that if some total
+					 * hoser (like flex itself) wants to
+					 * call the scanner after we return the
+					 * YY_NULL, it'll still work - another
+					 * YY_NULL will get returned.
+					 */
+					yy_c_buf_p = yytext_ptr + YY_MORE_ADJ;
+
+					yy_act = YY_STATE_EOF(YY_START);
+					goto do_action;
+					}
+
+				else
+					{
+					if ( ! yy_did_buffer_switch_on_eof )
+						YY_NEW_FILE;
+					}
+				break;
+				}
+
+			case EOB_ACT_CONTINUE_SCAN:
+				yy_c_buf_p =
+					yytext_ptr + yy_amount_of_matched_text;
+
+				yy_current_state = yy_get_previous_state();
+
+				yy_cp = yy_c_buf_p;
+				yy_bp = yytext_ptr + YY_MORE_ADJ;
+				goto yy_match;
+
+			case EOB_ACT_LAST_MATCH:
+				yy_c_buf_p =
+				&yy_current_buffer->yy_ch_buf[yy_n_chars];
+
+				yy_current_state = yy_get_previous_state();
+
+				yy_cp = yy_c_buf_p;
+				yy_bp = yytext_ptr + YY_MORE_ADJ;
+				goto yy_find_action;
+			}
+		break;
+		}
+
+	default:
+		YY_FATAL_ERROR(
+			"fatal flex scanner internal error--no action found" );
+	} /* end of action switch */
+		} /* end of scanning one token */
+	} /* end of yylex */
+
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ *	EOB_ACT_LAST_MATCH -
+ *	EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ *	EOB_ACT_END_OF_FILE - end of file
+ */
+
+static int yy_get_next_buffer()
+	{
+	register char *dest = yy_current_buffer->yy_ch_buf;
+	register char *source = yytext_ptr;
+	register int number_to_move, i;
+	int ret_val;
+
+	if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] )
+		YY_FATAL_ERROR(
+		"fatal flex scanner internal error--end of buffer missed" );
+
+	if ( yy_current_buffer->yy_fill_buffer == 0 )
+		{ /* Don't try to fill the buffer, so this is an EOF. */
+		if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 )
+			{
+			/* We matched a single character, the EOB, so
+			 * treat this as a final EOF.
+			 */
+			return EOB_ACT_END_OF_FILE;
+			}
+
+		else
+			{
+			/* We matched some text prior to the EOB, first
+			 * process it.
+			 */
+			return EOB_ACT_LAST_MATCH;
+			}
+		}
+
+	/* Try to read more data. */
+
+	/* First move last chars to start of buffer. */
+	number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1;
+
+	for ( i = 0; i < number_to_move; ++i )
+		*(dest++) = *(source++);
+
+	if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+		/* don't do the read, it's not guaranteed to return an EOF,
+		 * just force an EOF
+		 */
+		yy_current_buffer->yy_n_chars = yy_n_chars = 0;
+
+	else
+		{
+		int num_to_read =
+			yy_current_buffer->yy_buf_size - number_to_move - 1;
+
+		while ( num_to_read <= 0 )
+			{ /* Not enough room in the buffer - grow it. */
+#ifdef YY_USES_REJECT
+			YY_FATAL_ERROR(
+"input buffer overflow, can't enlarge buffer because scanner uses REJECT" );
+#else
+
+			/* just a shorter name for the current buffer */
+			YY_BUFFER_STATE b = yy_current_buffer;
+
+			int yy_c_buf_p_offset =
+				(int) (yy_c_buf_p - b->yy_ch_buf);
+
+			if ( b->yy_is_our_buffer )
+				{
+				int new_size = b->yy_buf_size * 2;
+
+				if ( new_size <= 0 )
+					b->yy_buf_size += b->yy_buf_size / 8;
+				else
+					b->yy_buf_size *= 2;
+
+				b->yy_ch_buf = (char *)
+					/* Include room in for 2 EOB chars. */
+					yy_flex_realloc( (void *) b->yy_ch_buf,
+							 b->yy_buf_size + 2 );
+				}
+			else
+				/* Can't grow it, we don't own it. */
+				b->yy_ch_buf = 0;
+
+			if ( ! b->yy_ch_buf )
+				YY_FATAL_ERROR(
+				"fatal error - scanner input buffer overflow" );
+
+			yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+			num_to_read = yy_current_buffer->yy_buf_size -
+						number_to_move - 1;
+#endif
+			}
+
+		if ( num_to_read > YY_READ_BUF_SIZE )
+			num_to_read = YY_READ_BUF_SIZE;
+
+		/* Read in more data. */
+		YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]),
+			yy_n_chars, num_to_read );
+
+		yy_current_buffer->yy_n_chars = yy_n_chars;
+		}
+
+	if ( yy_n_chars == 0 )
+		{
+		if ( number_to_move == YY_MORE_ADJ )
+			{
+			ret_val = EOB_ACT_END_OF_FILE;
+			yyrestart( yyin );
+			}
+
+		else
+			{
+			ret_val = EOB_ACT_LAST_MATCH;
+			yy_current_buffer->yy_buffer_status =
+				YY_BUFFER_EOF_PENDING;
+			}
+		}
+
+	else
+		ret_val = EOB_ACT_CONTINUE_SCAN;
+
+	yy_n_chars += number_to_move;
+	yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR;
+	yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
+
+	yytext_ptr = &yy_current_buffer->yy_ch_buf[0];
+
+	return ret_val;
+	}
+
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+static yy_state_type yy_get_previous_state()
+	{
+	register yy_state_type yy_current_state;
+	register char *yy_cp;
+
+	yy_current_state = yy_start;
+
+	for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp )
+		{
+		yy_current_state = yy_nxt[yy_current_state][(*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1)];
+		}
+
+	return yy_current_state;
+	}
+
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ *	next_state = yy_try_NUL_trans( current_state );
+ */
+
+#ifdef YY_USE_PROTOS
+static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state )
+#else
+static yy_state_type yy_try_NUL_trans( yy_current_state )
+yy_state_type yy_current_state;
+#endif
+	{
+	register int yy_is_jam;
+
+	yy_current_state = yy_nxt[yy_current_state][1];
+	yy_is_jam = (yy_current_state <= 0);
+
+	return yy_is_jam ? 0 : yy_current_state;
+	}
+
+
+#ifndef YY_NO_UNPUT
+#ifdef YY_USE_PROTOS
+static void yyunput( int c, register char *yy_bp )
+#else
+static void yyunput( c, yy_bp )
+int c;
+register char *yy_bp;
+#endif
+	{
+	register char *yy_cp = yy_c_buf_p;
+
+	/* undo effects of setting up yytext */
+	*yy_cp = yy_hold_char;
+
+	if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
+		{ /* need to shift things up to make room */
+		/* +2 for EOB chars. */
+		register int number_to_move = yy_n_chars + 2;
+		register char *dest = &yy_current_buffer->yy_ch_buf[
+					yy_current_buffer->yy_buf_size + 2];
+		register char *source =
+				&yy_current_buffer->yy_ch_buf[number_to_move];
+
+		while ( source > yy_current_buffer->yy_ch_buf )
+			*--dest = *--source;
+
+		yy_cp += (int) (dest - source);
+		yy_bp += (int) (dest - source);
+		yy_current_buffer->yy_n_chars =
+			yy_n_chars = yy_current_buffer->yy_buf_size;
+
+		if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 )
+			YY_FATAL_ERROR( "flex scanner push-back overflow" );
+		}
+
+	*--yy_cp = (char) c;
+
+
+	yytext_ptr = yy_bp;
+	yy_hold_char = *yy_cp;
+	yy_c_buf_p = yy_cp;
+	}
+#endif	/* ifndef YY_NO_UNPUT */
+
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+static int yyinput()
+#else
+static int input()
+#endif
+	{
+	int c;
+
+	*yy_c_buf_p = yy_hold_char;
+
+	if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
+		{
+		/* yy_c_buf_p now points to the character we want to return.
+		 * If this occurs *before* the EOB characters, then it's a
+		 * valid NUL; if not, then we've hit the end of the buffer.
+		 */
+		if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
+			/* This was really a NUL. */
+			*yy_c_buf_p = '\0';
+
+		else
+			{ /* need more input */
+			int offset = yy_c_buf_p - yytext_ptr;
+			++yy_c_buf_p;
+
+			switch ( yy_get_next_buffer() )
+				{
+				case EOB_ACT_LAST_MATCH:
+					/* This happens because yy_g_n_b()
+					 * sees that we've accumulated a
+					 * token and flags that we need to
+					 * try matching the token before
+					 * proceeding.  But for input(),
+					 * there's no matching to consider.
+					 * So convert the EOB_ACT_LAST_MATCH
+					 * to EOB_ACT_END_OF_FILE.
+					 */
+
+					/* Reset buffer status. */
+					yyrestart( yyin );
+
+					/* fall through */
+
+				case EOB_ACT_END_OF_FILE:
+					{
+					if ( yywrap() )
+						return EOF;
+
+					if ( ! yy_did_buffer_switch_on_eof )
+						YY_NEW_FILE;
+#ifdef __cplusplus
+					return yyinput();
+#else
+					return input();
+#endif
+					}
+
+				case EOB_ACT_CONTINUE_SCAN:
+					yy_c_buf_p = yytext_ptr + offset;
+					break;
+				}
+			}
+		}
+
+	c = *(unsigned char *) yy_c_buf_p;	/* cast for 8-bit char's */
+	*yy_c_buf_p = '\0';	/* preserve yytext */
+	yy_hold_char = *++yy_c_buf_p;
+
+
+	return c;
+	}
+#endif /* YY_NO_INPUT */
+
+#ifdef YY_USE_PROTOS
+void yyrestart( FILE *input_file )
+#else
+void yyrestart( input_file )
+FILE *input_file;
+#endif
+	{
+	if ( ! yy_current_buffer )
+		yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE );
+
+	yy_init_buffer( yy_current_buffer, input_file );
+	yy_load_buffer_state();
+	}
+
+
+#ifdef YY_USE_PROTOS
+void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer )
+#else
+void yy_switch_to_buffer( new_buffer )
+YY_BUFFER_STATE new_buffer;
+#endif
+	{
+	if ( yy_current_buffer == new_buffer )
+		return;
+
+	if ( yy_current_buffer )
+		{
+		/* Flush out information for old buffer. */
+		*yy_c_buf_p = yy_hold_char;
+		yy_current_buffer->yy_buf_pos = yy_c_buf_p;
+		yy_current_buffer->yy_n_chars = yy_n_chars;
+		}
+
+	yy_current_buffer = new_buffer;
+	yy_load_buffer_state();
+
+	/* We don't actually know whether we did this switch during
+	 * EOF (yywrap()) processing, but the only time this flag
+	 * is looked at is after yywrap() is called, so it's safe
+	 * to go ahead and always set it.
+	 */
+	yy_did_buffer_switch_on_eof = 1;
+	}
+
+
+#ifdef YY_USE_PROTOS
+void yy_load_buffer_state( void )
+#else
+void yy_load_buffer_state()
+#endif
+	{
+	yy_n_chars = yy_current_buffer->yy_n_chars;
+	yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos;
+	yyin = yy_current_buffer->yy_input_file;
+	yy_hold_char = *yy_c_buf_p;
+	}
+
+
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_create_buffer( FILE *file, int size )
+#else
+YY_BUFFER_STATE yy_create_buffer( file, size )
+FILE *file;
+int size;
+#endif
+	{
+	YY_BUFFER_STATE b;
+
+	b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
+	if ( ! b )
+		YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+	b->yy_buf_size = size;
+
+	/* yy_ch_buf has to be 2 characters longer than the size given because
+	 * we need to put in 2 end-of-buffer characters.
+	 */
+	b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 );
+	if ( ! b->yy_ch_buf )
+		YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+	b->yy_is_our_buffer = 1;
+
+	yy_init_buffer( b, file );
+
+	return b;
+	}
+
+
+#ifdef YY_USE_PROTOS
+void yy_delete_buffer( YY_BUFFER_STATE b )
+#else
+void yy_delete_buffer( b )
+YY_BUFFER_STATE b;
+#endif
+	{
+	if ( ! b )
+		return;
+
+	if ( b == yy_current_buffer )
+		yy_current_buffer = (YY_BUFFER_STATE) 0;
+
+	if ( b->yy_is_our_buffer )
+		yy_flex_free( (void *) b->yy_ch_buf );
+
+	yy_flex_free( (void *) b );
+	}
+
+
+
+#ifdef YY_USE_PROTOS
+void yy_init_buffer( YY_BUFFER_STATE b, FILE *file )
+#else
+void yy_init_buffer( b, file )
+YY_BUFFER_STATE b;
+FILE *file;
+#endif
+
+
+	{
+	yy_flush_buffer( b );
+
+	b->yy_input_file = file;
+	b->yy_fill_buffer = 1;
+
+#if YY_ALWAYS_INTERACTIVE
+	b->yy_is_interactive = 1;
+#else
+#if YY_NEVER_INTERACTIVE
+	b->yy_is_interactive = 0;
+#else
+	b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0;
+#endif
+#endif
+	}
+
+
+#ifdef YY_USE_PROTOS
+void yy_flush_buffer( YY_BUFFER_STATE b )
+#else
+void yy_flush_buffer( b )
+YY_BUFFER_STATE b;
+#endif
+
+	{
+	if ( ! b )
+		return;
+
+	b->yy_n_chars = 0;
+
+	/* We always need two end-of-buffer characters.  The first causes
+	 * a transition to the end-of-buffer state.  The second causes
+	 * a jam in that state.
+	 */
+	b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+	b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+
+	b->yy_buf_pos = &b->yy_ch_buf[0];
+
+	b->yy_at_bol = 1;
+	b->yy_buffer_status = YY_BUFFER_NEW;
+
+	if ( b == yy_current_buffer )
+		yy_load_buffer_state();
+	}
+
+
+#ifndef YY_NO_SCAN_BUFFER
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size )
+#else
+YY_BUFFER_STATE yy_scan_buffer( base, size )
+char *base;
+yy_size_t size;
+#endif
+	{
+	YY_BUFFER_STATE b;
+
+	if ( size < 2 ||
+	     base[size-2] != YY_END_OF_BUFFER_CHAR ||
+	     base[size-1] != YY_END_OF_BUFFER_CHAR )
+		/* They forgot to leave room for the EOB's. */
+		return 0;
+
+	b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) );
+	if ( ! b )
+		YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" );
+
+	b->yy_buf_size = size - 2;	/* "- 2" to take care of EOB's */
+	b->yy_buf_pos = b->yy_ch_buf = base;
+	b->yy_is_our_buffer = 0;
+	b->yy_input_file = 0;
+	b->yy_n_chars = b->yy_buf_size;
+	b->yy_is_interactive = 0;
+	b->yy_at_bol = 1;
+	b->yy_fill_buffer = 0;
+	b->yy_buffer_status = YY_BUFFER_NEW;
+
+	yy_switch_to_buffer( b );
+
+	return b;
+	}
+#endif
+
+
+#ifndef YY_NO_SCAN_STRING
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str )
+#else
+YY_BUFFER_STATE yy_scan_string( yy_str )
+yyconst char *yy_str;
+#endif
+	{
+	int len;
+	for ( len = 0; yy_str[len]; ++len )
+		;
+
+	return yy_scan_bytes( yy_str, len );
+	}
+#endif
+
+
+#ifndef YY_NO_SCAN_BYTES
+#ifdef YY_USE_PROTOS
+YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len )
+#else
+YY_BUFFER_STATE yy_scan_bytes( bytes, len )
+yyconst char *bytes;
+int len;
+#endif
+	{
+	YY_BUFFER_STATE b;
+	char *buf;
+	yy_size_t n;
+	int i;
+
+	/* Get memory for full buffer, including space for trailing EOB's. */
+	n = len + 2;
+	buf = (char *) yy_flex_alloc( n );
+	if ( ! buf )
+		YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" );
+
+	for ( i = 0; i < len; ++i )
+		buf[i] = bytes[i];
+
+	buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR;
+
+	b = yy_scan_buffer( buf, n );
+	if ( ! b )
+		YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" );
+
+	/* It's okay to grow etc. this buffer, and we should throw it
+	 * away when we're done.
+	 */
+	b->yy_is_our_buffer = 1;
+
+	return b;
+	}
+#endif
+
+
+#ifndef YY_NO_PUSH_STATE
+#ifdef YY_USE_PROTOS
+static void yy_push_state( int new_state )
+#else
+static void yy_push_state( new_state )
+int new_state;
+#endif
+	{
+	if ( yy_start_stack_ptr >= yy_start_stack_depth )
+		{
+		yy_size_t new_size;
+
+		yy_start_stack_depth += YY_START_STACK_INCR;
+		new_size = yy_start_stack_depth * sizeof( int );
+
+		if ( ! yy_start_stack )
+			yy_start_stack = (int *) yy_flex_alloc( new_size );
+
+		else
+			yy_start_stack = (int *) yy_flex_realloc(
+					(void *) yy_start_stack, new_size );
+
+		if ( ! yy_start_stack )
+			YY_FATAL_ERROR(
+			"out of memory expanding start-condition stack" );
+		}
+
+	yy_start_stack[yy_start_stack_ptr++] = YY_START;
+
+	BEGIN(new_state);
+	}
+#endif
+
+
+#ifndef YY_NO_POP_STATE
+static void yy_pop_state()
+	{
+	if ( --yy_start_stack_ptr < 0 )
+		YY_FATAL_ERROR( "start-condition stack underflow" );
+
+	BEGIN(yy_start_stack[yy_start_stack_ptr]);
+	}
+#endif
+
+
+#ifndef YY_NO_TOP_STATE
+static int yy_top_state()
+	{
+	return yy_start_stack[yy_start_stack_ptr - 1];
+	}
+#endif
+
+#ifndef YY_EXIT_FAILURE
+#define YY_EXIT_FAILURE 2
+#endif
+
+#ifdef YY_USE_PROTOS
+static void yy_fatal_error( yyconst char msg[] )
+#else
+static void yy_fatal_error( msg )
+char msg[];
+#endif
+	{
+	(void) fprintf( stderr, "%s\n", msg );
+	exit( YY_EXIT_FAILURE );
+	}
+
+
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n) \
+	do \
+		{ \
+		/* Undo effects of setting up yytext. */ \
+		yytext[yyleng] = yy_hold_char; \
+		yy_c_buf_p = yytext + n; \
+		yy_hold_char = *yy_c_buf_p; \
+		*yy_c_buf_p = '\0'; \
+		yyleng = n; \
+		} \
+	while ( 0 )
+
+
+/* Internal utility routines. */
+
+#ifndef yytext_ptr
+#ifdef YY_USE_PROTOS
+static void yy_flex_strncpy( char *s1, yyconst char *s2, int n )
+#else
+static void yy_flex_strncpy( s1, s2, n )
+char *s1;
+yyconst char *s2;
+int n;
+#endif
+	{
+	register int i;
+	for ( i = 0; i < n; ++i )
+		s1[i] = s2[i];
+	}
+#endif
+
+#ifdef YY_NEED_STRLEN
+#ifdef YY_USE_PROTOS
+static int yy_flex_strlen( yyconst char *s )
+#else
+static int yy_flex_strlen( s )
+yyconst char *s;
+#endif
+	{
+	register int n;
+	for ( n = 0; s[n]; ++n )
+		;
+
+	return n;
+	}
+#endif
+
+
+#ifdef YY_USE_PROTOS
+static void *yy_flex_alloc( yy_size_t size )
+#else
+static void *yy_flex_alloc( size )
+yy_size_t size;
+#endif
+	{
+	return (void *) malloc( size );
+	}
+
+#ifdef YY_USE_PROTOS
+static void *yy_flex_realloc( void *ptr, yy_size_t size )
+#else
+static void *yy_flex_realloc( ptr, size )
+void *ptr;
+yy_size_t size;
+#endif
+	{
+	/* The cast to (char *) in the following accommodates both
+	 * implementations that use char* generic pointers, and those
+	 * that use void* generic pointers.  It works with the latter
+	 * because both ANSI C and C++ allow castless assignment from
+	 * any pointer type to void*, and deal with argument conversions
+	 * as though doing an assignment.
+	 */
+	return (void *) realloc( (char *) ptr, size );
+	}
+
+#ifdef YY_USE_PROTOS
+static void yy_flex_free( void *ptr )
+#else
+static void yy_flex_free( ptr )
+void *ptr;
+#endif
+	{
+	free( ptr );
+	}
+
+#if YY_MAIN
+int main()
+	{
+	yylex();
+	return 0;
+	}
+#endif
+#line 77 "pascal_count.l"
+
+
+#include "driver.c"
+
+
diff --git a/pascal_count.l b/pascal_count.l
new file mode 100644
index 0000000..cedfbb4
--- /dev/null
+++ b/pascal_count.l
@@ -0,0 +1,81 @@
+%{
+
+/*
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+*/
+
+#include "driver.h"
+
+#define YY_NO_UNPUT
+
+/* 1 if we saw a non-comment, non-whitespace char on this line */
+int saw_char = 0;
+
+%}
+
+%option noyywrap
+
+SPACE		[ \t\n\r\f]
+
+%x comment
+%x bcomment
+%x string
+
+%%
+	line_number = 1;
+	saw_char = 0;
+	BEGIN(INITIAL);
+
+[ \t\r\f]	/* Do nothing */
+"(*"	{BEGIN(comment);}
+\n	{if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+"{"	{BEGIN(bcomment);}
+"'"	{saw_char = 1; BEGIN(string);}
+[^ \t\r\f(\n{'][^(\n{']*	{saw_char = 1;}
+.	{saw_char = 1;}
+
+<comment>[^*\n]+	/* Do nothing */
+<comment>[^*\n]*\n	{if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+<comment>"*"+[^*)\n]*	/* Do nothing */
+<comment>"*"+[^*)\n]*\n {if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+<comment>"*"+")"	{BEGIN(INITIAL);}
+
+<bcomment>[^}\n]+	/* Do nothing */
+<bcomment>[^}\n]*\n	{if (saw_char) {sloc++; saw_char=0;}; line_number++;}
+<bcomment>"}"		{BEGIN(INITIAL);}
+
+<string>[^'\n]+		{saw_char = 1;}
+<string>\n		{
+	fprintf(stderr, "Warning: newline in string - file %s, line %ld\n",
+	                 filename, line_number);
+	if (saw_char) {sloc++; saw_char=0;};
+	BEGIN(INITIAL); /* Switch back; this at least limits damage */
+	line_number++;
+	}
+<string>''		{saw_char = 1;}
+<string>'		{saw_char = 1; BEGIN(INITIAL);}
+
+%%
+
+#include "driver.c"
+
+
diff --git a/perl_count b/perl_count
new file mode 100755
index 0000000..472ec33
--- /dev/null
+++ b/perl_count
@@ -0,0 +1,147 @@
+#!/usr/bin/perl
+# perl_count - count physical lines of code in Perl programs.
+# Usage: perl_count [-f file] [list_of_files]
+#  file: file with a list of files to count (if "-", read list from stdin)
+#  list_of_files: list of files to count
+#  -f file or list_of_files can be used, or both
+
+# Physical lines of Perl are MUCH HARDER to count than you'd think.
+# Comments begin with "#".
+# Also, anything in a "perlpod" is a comment.
+# See perlpod(1) for more info; a perlpod starts with
+# \s*=command, can have more commands, and ends with \s*=cut.
+# Note that = followed by space is NOT a perlpod.
+# Although we ignore everything after __END__ in a file,
+# we will count everything after __DATA__; there's arguments for counting
+# and for not counting __DATA__.
+
+# What's worse, "here" documents must be COUNTED AS CODE, even if
+# they're FORMATTED AS A PERLPOD.  Surely no one would do this, right?
+# Sigh... it can happen. See perl5.005_03/pod/splitpod.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+  # Yes, we have -f
+  if ($ARGV[1] eq "-") {
+    # The list of files is in STDIN
+    while (<STDIN>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+  } else {
+    # The list of files is in the file $ARGV[1]
+    open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+    while (<FILEWITHLIST>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+    close FILEWITHLIST;
+  }
+  shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+  &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+  my ($file) = @_;
+  my $sloc = 0;
+  my $isinpod = 0;
+  my $heredocument = ""; # If nonempty, identifies the ending marker.
+
+  open (FILE, $file);
+  while (<FILE>) {
+    s/#.*//;  # Delete leading comments.
+    if ($heredocument and m/^\s*$heredocument/) {
+     $heredocument = ""; # finished here doc.
+    } elsif (m/<<\s*["']?([A-Za-z0-9_-]+)["']?[;,]\s*$/) {
+      # Beginning of a here document.
+      $heredocument = $1;
+    } elsif (!$heredocument && m/^\s*=cut/) {  # Ending a POD?
+      if (! $isinpod) {
+        print stderr "cut without pod start in file $file line $.\n";
+      }
+      s/.*//;  # Don't count the cut command.
+      $isinpod = 0;
+    } elsif (!$heredocument && m/^\s*=[a-zA-Z]/) { # Starting continuing a POD?
+      # Perlpods can have multiple contents, so it's okay if $isinpod == 1.
+      # Note that =(space) isn't a POD; library file perl5db.pl does this!
+      $isinpod = 1;
+    } elsif (m/^__END__/) {  # Stop processing this file on __END__.
+      last;
+    }
+    if ((! $isinpod) && (m/\S/)) { $sloc++;}
+  }
+  # Show per-file & total; reset $isinpod per file.
+  print "$sloc $file\n";
+  $total_sloc += $sloc;
+  $sloc = 0;
+  if ($isinpod) {
+    print stderr "pod without closing cut in file $file\n";
+  }
+  # Reset state:
+  $isinpod = 0;
+  $heredocument = "";
+  close (FILE);
+}
+
+# The following is POD documentation; it should not be counted:
+=head1 Test
+=head2 testing
+=cut
+
+__END__
+# The following should not be counted in a line-counting program:
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+print "Hello!\n";
+
+
diff --git a/php_count.c b/php_count.c
new file mode 100644
index 0000000..ee7ce10
--- /dev/null
+++ b/php_count.c
@@ -0,0 +1,335 @@
+/* php_count: given a list of C/C++/Java files on the command line,
+   count the SLOC in each one.  SLOC = physical, non-comment lines.
+   This program knows about C++ and C comments (and how they interact),
+   and correctly ignores comment markers inside strings.
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+
+   Usage: Use in one of the following ways:
+     php_count                      # As filter
+     php_count list_of_files        # Counts for each file.
+     php_count -f fl                # Counts the files listed in "fl".
+
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+
+/* If ALLOW_SHORT_TAGS is true, then <? all by itself begins PHP code. */
+#define ALLOW_SHORT_TAGS 1
+
+/* If ALLOW_ASP_TAGS is true, then <% begins PHP code. */
+#define ALLOW_ASP_TAGS 1
+
+
+
+/* Modes: PHP starts in "NONE", and <?php etc change mode to "NORMAL". */
+enum mode_t { NONE, NORMAL, INSTRING, INCOMMENT, INSINGLESTRING, HEREDOC };
+
+enum comment_t {ANSIC_STYLE, CPP_STYLE, SH_STYLE}; /* Types of comments */
+enum end_t {NORMAL_END, SCRIPT_END, ASP_END}; /* Type of ending to expect. */
+
+
+/* Globals */
+long total_sloc;
+
+long line_number;
+
+/* Handle input */
+
+/* Number of characters in one line, maximum. */
+/* The code uses fgets() so that longer lines are truncated & not a
+   buffer overflow hazard. */
+#define LONGEST_LINE 20000
+
+static char current_line[LONGEST_LINE];
+static char *clocation; /* points into current_line */
+static long sloc = 0;
+static int sawchar = 0; /* Did you see a character on this line? */
+static int beginning_of_line = 0;
+static int is_input_eof;
+
+void read_input_line(FILE *stream) {
+ /* Read in a new line - increment sloc if sawchar, & reset sawchar. */
+ if (feof(stream)) {
+   is_input_eof = 1;
+   return;
+ }
+ line_number++;
+ fgets(current_line, sizeof(current_line)-2, stream);
+ clocation = &(current_line[0]);
+ beginning_of_line = 1;
+ if (current_line[0] == '\0') is_input_eof = 1;
+ if (sawchar) {
+   /* printf("DEBUG: INCREMENTING SLOC\n"); */
+   sawchar = 0;
+   sloc++;
+ }
+}
+
+void init_input(FILE *stream) {
+ current_line[0] = '\0';
+ is_input_eof = 0;
+ sawchar = 0;
+ read_input_line(stream);
+}
+
+void consume_char(FILE *stream) {
+ /* returns TRUE if there are more characters in the input. */
+ beginning_of_line = 0;
+ if (!*clocation) read_input_line(stream);
+ else             clocation++;
+}
+
+int match_consume(const char *m, FILE *stream) {
+ /* returns TRUE & most forward if matches, and consumes */
+ if (!*clocation) read_input_line(stream);
+ if (strncasecmp(m, clocation, strlen(m)) == 0) {
+   /* printf("MATCH: %s, %s\n", m, clocation); */
+   clocation += strlen(m);
+   beginning_of_line = 0;
+   return 1;
+ } else {
+   return 0;
+ }
+}
+
+int current_char(FILE *stream) {
+ if (!*clocation) read_input_line(stream);
+ return *clocation;
+}
+
+char *rest_of_line(FILE *stream) {
+ /* returns rest of the line in a malloc'ed entry (caller must free()),
+    consuming it. */
+ char *result;
+
+ result = strdup(clocation);
+ read_input_line(stream);
+ return result;
+}
+
+
+void strstrip(char *s) {
+ /* Strip whitespace off the end of s. */
+ char *p;
+ 
+ /* Remove whitespace from the end by walking backwards. */
+ for (p= s + strlen(s) - 1; p >= s && isspace(*p); p--) {
+   *p = '\0';
+ }
+ return;
+}
+
+
+long sloc_count(char *filename, FILE *stream) {
+ /* Count the sloc in the program in stdin. */
+
+ enum mode_t mode = NONE;   /* State machine state - NORMAL == PHP code */
+ enum comment_t comment_type;   /* ANSIC_STYLE, CPP_STYLE, SH_STYLE */
+ enum end_t expected_end;   /* The kind of ending expected, e.g. ?> */
+
+ char *heredoc_end;
+
+ sloc = 0;
+ 
+
+ /* The following implements a state machine with transitions; the
+    main state is "mode"; the transitions are triggered by character input. */
+
+ while (!is_input_eof) {
+    /* printf("mode=%d, current_char=%c\n", mode, current_char()); */
+    if (mode == NONE) {
+       /* Note: PHP will raise errors if something starts with
+          <?php and isn't followed by whitespace, e.g., <?phphello
+          is illegal.  We won't look for this case, under the assumption
+          that someone won't bother to count malformed code.  It's just
+          as well, anyway - it's few would think of doing it!
+          Note that simple <? followed by arbitrary characters is okay,
+          and is handled by the <? processing, so <?echo("hello")?> works. */
+       if (match_consume("<?php", stream)) {
+               expected_end = NORMAL_END;
+               mode = NORMAL;
+       } else if (ALLOW_SHORT_TAGS && match_consume("<?", stream)) {
+               expected_end = NORMAL_END;
+               mode = NORMAL;
+       /* FIXME: <script...> should be more flexible, allowing for
+          other attributes etc. I haven't seen this as a real problem. */
+       } else if (match_consume("<script language=\"php\">", stream)) {
+               expected_end = SCRIPT_END;
+               mode = NORMAL;
+       } else if (ALLOW_ASP_TAGS && match_consume("<%", stream)) {
+               expected_end = ASP_END;
+               mode = NORMAL;
+       } else consume_char(stream);
+    } else if (mode == NORMAL) {
+       if ((expected_end==NORMAL_END) && match_consume("?>", stream)) {
+           mode = NONE;
+       } else if ((expected_end==ASP_END) && match_consume("%>", stream)) {
+           mode = NONE;
+       } else if ((expected_end==SCRIPT_END) && match_consume("</script>", stream)) {
+           mode = NONE;
+       } else if (match_consume("\"", stream)) {
+           sawchar = 1;
+           mode = INSTRING;
+       } else if (match_consume("\'", stream)) {
+           sawchar = 1;
+           mode = INSINGLESTRING;
+       } else if (match_consume("/*", stream)) {
+          mode = INCOMMENT;
+          comment_type = ANSIC_STYLE;
+       } else if (match_consume("//", stream)) {
+          mode = INCOMMENT;
+          comment_type = CPP_STYLE;
+       } else if (match_consume("#", stream)) {
+          mode = INCOMMENT;
+          comment_type = SH_STYLE;
+       } else if (match_consume("<<<", stream)) {
+          mode = HEREDOC;
+          while (isspace(current_char(stream)) && !is_input_eof) {consume_char(stream);}
+          heredoc_end = rest_of_line(stream);
+          strstrip(heredoc_end);
+       } else {
+         if (!isspace(current_char(stream))) sawchar = 1;
+         consume_char(stream);
+       }
+    } else if (mode == INSTRING) {
+      /* We only count string lines with non-whitespace -- this is to
+         gracefully handle syntactically invalid programs.
+         You could argue that multiline strings with whitespace are
+         still executable and should be counted. */
+      if (!isspace(current_char(stream))) sawchar = 1;
+      if (match_consume("\"", stream)) {mode = NORMAL;}
+      else if (match_consume("\\\"", stream) || match_consume("\\\\", stream) ||
+               match_consume("\\\'", stream)) {}
+      else consume_char(stream);
+    } else if (mode == INSINGLESTRING) {
+      /* We only count string lines with non-whitespace; see above. */
+      if (!isspace(current_char(stream))) sawchar = 1;
+      if (current_char(stream) == '\'') {}
+      if (match_consume("'", stream)) {mode = NORMAL; }
+      else if (match_consume("\\\\", stream) || match_consume("\\\'", stream)) { }
+      else { consume_char(stream); }
+    } else if (mode == INCOMMENT) {
+      if ((comment_type == ANSIC_STYLE) && match_consume("*/", stream)) {
+          mode = NORMAL; }
+      /* Note: in PHP, must accept ending markers, even in a comment: */
+      else if ((expected_end==NORMAL_END) && match_consume("?>", stream))
+          { mode = NONE; }
+      else if ((expected_end==ASP_END) && match_consume("%>", stream)) { mode = NONE; }
+      else if ((expected_end==SCRIPT_END) && match_consume("</script>", stream))
+                                     { mode = NONE; }
+      else if ( ((comment_type == CPP_STYLE) || (comment_type == SH_STYLE)) &&
+           match_consume("\n", stream)) { mode = NORMAL; }
+      else consume_char(stream);
+    } else if (mode == HEREDOC) {
+      if (!isspace(current_char(stream))) sawchar = 1;
+      if (beginning_of_line && match_consume(heredoc_end, stream)) {
+        mode=NORMAL;
+      } else {
+        consume_char(stream);
+      }
+    } else {
+       fprintf(stderr, "Warning! Unknown mode in PHP file %s, mode=%d\n",
+               filename, mode);
+       consume_char(stream);
+    }
+ }
+ if (mode != NONE) {
+   fprintf(stderr, "Warning! Unclosed PHP file %s, mode=%d\n", filename, mode);
+ }
+
+ return sloc;
+}
+
+
+void count_file(char *filename) {
+  long sloc;
+  FILE *stream;
+
+  stream = fopen(filename, "r");
+  line_number = 0;
+  init_input(stream);
+  sloc = sloc_count(filename, stream);
+  fclose (stream);
+  total_sloc += sloc;
+  printf("%ld %s\n", sloc, filename);
+}
+
+char *read_a_line(FILE *file) {
+ /* Read a line in, and return a malloc'ed buffer with the line contents.
+    Any newline at the end is stripped.
+    If there's nothing left to read, returns NULL. */
+
+ /* We'll create a monstrously long buffer to make life easy for us: */
+ char buffer[10000];
+ char *returnval;
+ char *newlinepos;
+
+ returnval = fgets(buffer, sizeof(buffer), file);
+ if (returnval) {
+   newlinepos = buffer + strlen(buffer) - 1;
+   if (*newlinepos == '\n') {*newlinepos = '\0';};
+   return strdup(buffer);
+ } else {
+   return NULL;
+ }
+}
+
+
+int main(int argc, char *argv[]) {
+ long sloc;
+ int i;
+ FILE *file_list;
+ char *s;
+
+ total_sloc = 0;
+ line_number = 0;
+
+ if (argc <= 1) {
+   init_input(stdin);
+   sloc = sloc_count("-", stdin);
+   printf("%ld %s\n", sloc, "-");
+   total_sloc += sloc;
+ } else if ((argc == 3) && (!strcmp(argv[1], "-f"))) {
+   if (!strcmp (argv[2], "-")) {
+     file_list = stdin;
+   } else {
+     file_list = fopen(argv[2], "r");
+   }
+   if (file_list) {
+     while ((s = read_a_line(file_list))) {
+       count_file(s);
+       free(s);
+     }
+   }
+ } else {
+   for (i=1; i < argc; i++) { count_file(argv[i]); }
+ }
+ printf("Total:\n");
+ printf("%ld\n", total_sloc);
+ exit(0);
+}
+
diff --git a/print_sum b/print_sum
new file mode 100755
index 0000000..f0ef453
--- /dev/null
+++ b/print_sum
@@ -0,0 +1,40 @@
+#!/usr/bin/perl
+# print_sum - read from stdin and print the sum.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+$total = 0.0;
+
+while (<>) {
+ if (m/^\s*([\d.]+)/) {
+   $maybe_number = $1;
+   $maybe_number =~ s/\.*$//; # chop trailing ".".
+   if ($maybe_number =~ m/\d/) {
+       $total += $maybe_number;
+   }
+ }
+}
+
+print "$total\n";
+
diff --git a/print_sum_subset b/print_sum_subset
new file mode 100755
index 0000000..2db2496
--- /dev/null
+++ b/print_sum_subset
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+# print the sum of SLOC for a subset of a package.
+# The subset is specified using a regular expression.
+
+# To use, "cd" into the package data directory (with the "_outfile.dat" files),
+# then specify as the first parameter the pattern defining the subset.
+
+# E.G.:
+# cd ../data/linux
+# print_sum_subset 'BUILD\/linux\/drivers\/'
+#
+#  will show the SLOC total for the "drivers" directory & subdirs
+#  of the "linux" data subdirectory.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+echo -n "$1 "
+grep -h "$1" *_outfile.dat | grep '^[1-9][0-9]* [^ ]' | print_sum
+
diff --git a/python_count b/python_count
new file mode 100755
index 0000000..ab8c99e
--- /dev/null
+++ b/python_count
@@ -0,0 +1,120 @@
+#!/usr/bin/perl -w
+# python_count - count physical lines of code in Python programs.
+# Usage: python_count [-f file] [list_of_files]
+#  file: file with a list of files to count (if "-", read list from stdin)
+#  list_of_files: list of files to count
+#  -f file or list_of_files can be used, or both
+# This is a trivial/naive program.
+
+# Comments begin with "#".
+# Python supports multi-line strings using """, which matches another """.
+# When not inside a multi-line string, a line whose first non-whitespace
+# is """ almost always indicates a programming comment;
+# this is also true for lines beginning with '"' 
+# This means that a string which is part of an expression but which begins
+# a new line won't be counted; this problem is rare in practice.
+# This code DOES count _data_ inside a triple-quote (that's not a comment).
+# Note that this isn't true for single-quote, which is used in case
+# statements (etc.) but not in this context.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+  # Yes, we have -f
+  if ($ARGV[1] eq "-") {
+    # The list of files is in STDIN
+    while (<STDIN>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+  } else {
+    # The list of files is in the file $ARGV[1]
+    open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+    while (<FILEWITHLIST>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+    close FILEWITHLIST;
+  }
+  shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+  &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+  my ($file) = @_;
+  my $sloc = 0;
+  my $isintriple = 0;  # A triple-quote is in effect.
+  my $isincomment = 0;   # We are in a multiline (triple-quoted) comment.
+
+  open (FILE, $file);
+  while (<FILE>) {
+    if (! $isintriple) {  # Normal case:
+      s/""".*"""//;  # Delete triple-quotes that begin & end on the line.
+      s/^\s*"([^"]|(\\"))+"//;  # Delete lonely strings starting on BOL.
+      s/#.*//;       # Delete "#" comments.
+      if (m/"""/) {  # Does a multiline triple-quote begin here?
+        $isintriple = 1;
+        if (m/^\s*"""/) {$isincomment = 1;}  # It's a comment if at BOL.
+      }
+    } else {  # we ARE in a triple.
+      if (m/"""/) {
+        if ($isincomment) {
+          s/.*?"""//;   # Delete string text if it's a comment (not if data)
+        } else {
+	  s/.*?"""/x/;  # Leave something there to count.
+        }
+        # But wait!  Another triple might start on this line!
+        # (see Python-1.5.2/Tools/freeze/makefreeze.py for an example)
+        if (m/"""/) {
+          # It did!  No change in state!
+        } else {
+          $isintriple = 0;
+          $isincomment = 0;
+        }
+      }
+    }
+    # TO DEBUG:
+    # print "cmmnt=${isincomment} trp=${isintriple}: $_\n";
+    if ( (!$isincomment) && m/\S/) {$sloc++;};
+  }
+  print "$sloc $file\n";
+  $total_sloc += $sloc;
+  $sloc = 0;
+  if ($isintriple) {
+    print STDERR "No closing triple-doublequote-marks in file $file\n";
+  }
+  # Reset rest of state:
+  $isintriple = 0;
+  $isincomment = 0;
+  close (FILE);  # Reset $. (line count) each time.
+}
diff --git a/redo_licenses b/redo_licenses
new file mode 100755
index 0000000..8580b38
--- /dev/null
+++ b/redo_licenses
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+# redo_licenses -- recompute licenses.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+SPECS=/usr/src/redhat/SPECS
+BUILD=/usr/src/redhat/BUILD
+
+cd $BUILD
+for builddir in *
+do
+ specfile=`cat ${builddir}/ORIGINAL_SPEC_FILE | head -1`
+ specfile=${SPECS}/$specfile
+ echo "builddir=${builddir}, specfile=${specfile}"
+ /root/extract_license "$builddir" "$specfile" > ${builddir}/PROGRAM_LICENSE
+ license=`cat ${builddir}/PROGRAM_LICENSE | head -1`
+ echo "   $license"
+
+done
+
diff --git a/rpm_unpacker b/rpm_unpacker
new file mode 100755
index 0000000..1312066
--- /dev/null
+++ b/rpm_unpacker
@@ -0,0 +1,71 @@
+#!/bin/sh
+
+# unpacker -- unpacks RPMs into the BUILD directory.
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+SPECS=/usr/src/redhat/SPECS
+BUILD=/usr/src/redhat/BUILD
+
+BUILD_LIST=/root/build_list
+OLD_BUILD_LIST=${BUILD_LIST}.old
+
+echo "lost+found" > $OLD_BUILD_LIST
+
+cd $SPECS
+for specfile in *.spec
+do
+ cd $SPECS
+ # The "yes" is to give "yes" to "do you want to run this patch" requests -
+ # in particular this is needed to unpack samba.2.0.7 in Red Hat 7.1.
+ if yes | rpm -bp $specfile
+ then
+  # Things were fine, do nothing.
+  echo "UNPACKER: Successfully performed rpm -bp $specfile"
+ else
+  echo "UNPACKER WARNING - ERROR in rpm -bp $specfile"
+ fi
+
+ # Find the new BUILD entries, and create cross-references to the old.
+ cd $BUILD
+ ls | sort > $BUILD_LIST
+ CHANGES=`comm -13 $OLD_BUILD_LIST $BUILD_LIST`
+ anychange="0"
+ for newbuild in $CHANGES
+ do
+  anychange=1
+  echo $specfile > ${newbuild}/ORIGINAL_SPEC_FILE
+  echo "UNPACKER: added build $newbuild from $specfile"
+  extract_license "$newbuild" "${SPECS}/$specfile" > ${newbuild}/PROGRAM_LICENSE
+  # For disk space, erase all HTML files.
+  # If disk space is available, REMOVE THIS LINE:
+  # find "$newbuild" -type f -name "*.html" -exec rm {} \;
+ done
+ if [ $anychange == 0 ]
+ then
+  echo "UNPACKER: did not add a build directory for spec file $specfile"
+ fi
+ mv $BUILD_LIST $OLD_BUILD_LIST
+ 
+done
+
diff --git a/ruby_count b/ruby_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/ruby_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+generic_count '#' $@
+
diff --git a/sed_count b/sed_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/sed_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+generic_count '#' $@
+
diff --git a/sh_count b/sh_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/sh_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+generic_count '#' $@
+
diff --git a/show_filecount b/show_filecount
new file mode 100755
index 0000000..95f9707
--- /dev/null
+++ b/show_filecount
@@ -0,0 +1,58 @@
+#!/bin/sh
+# given a list of data subdirs, show how many files of each type
+# are in each subdir.
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+echo "Dir C C++ Python Asm sh csh Java Lisp Tcl Expect Perl ML Modula3 awk sed make not auto unknown"
+
+for dir in $@
+do
+  if [ -d "$dir" ]
+  then
+  echo $dir | tr -d '\n '; echo -n " "
+  wc -l < $dir/ansic_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/cpp_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/python_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/asm_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/sh_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/csh_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/java_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/lisp_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/tcl_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/exp_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/perl_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/ml_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/modula3_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/awk_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/sed_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/makefile_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/not_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/auto_list.dat | tr -d '\n '; echo -n " "
+  wc -l < $dir/unknown_list.dat | tr -d '\n '; echo -n " "
+  echo
+  fi
+done
+
+
diff --git a/simplecount b/simplecount
new file mode 100755
index 0000000..4c9b125
--- /dev/null
+++ b/simplecount
@@ -0,0 +1,84 @@
+#!/usr/bin/perl -w
+
+# simplecount
+# Usage: simple_count commentstart [-f file] [list_of_files]
+#  commentstart: string that begins a comment (continuing til end-of-line)
+#  file: file with a list of files to count (if "-", read list from stdin)
+#  list_of_files: list of files to count
+#  -f file or list_of_files can be used, or both
+
+# prints the number of nonblank lines after stripping comments
+# (comments begin with comment-char and continue to end-of-line
+# This is naive, and can be fooled by comment chars in strings, but
+# that's not a significant problem.
+
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+$commentchar = shift;
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+  # Yes, we have -f
+  if ($ARGV[1] eq "-") {
+    # The list of files is in STDIN
+    while (<STDIN>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+  } else {
+    # The list of files is in the file $ARGV[1]
+    open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $ARGV[1]\n";
+    while (<FILEWITHLIST>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+    close FILEWITHLIST;
+  }
+  shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+  &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+  my ($file) = @_;
+  my $sloc = 0;
+
+  open (FILE, $file);
+  while(<FILE>) {
+    s/${commentchar}.*//;   # Strip away any commments.
+    if (m/\S/) { 
+      $sloc++ 
+    };  # Nonwhitespace in the line, count it!
+  }
+  print "$sloc $file\n";
+  $total_sloc += $sloc;
+  close (FILE);
+}
diff --git a/sloccount b/sloccount
new file mode 100755
index 0000000..9491227
--- /dev/null
+++ b/sloccount
@@ -0,0 +1,258 @@
+#!/bin/sh
+
+# This is the front-end program "sloccount", part of the
+# SLOCCount tool suite by David A. Wheeler.
+# Given a list of directories, compute the SLOC count,
+# automatically creating the directory $HOME/.slocdata.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+
+# See the SLOCCount documentation if you need
+# more details about the license.
+
+version=2.26
+
+if [ "$#" -eq 0 ]
+then
+  echo "Error: You must provide a directory or directories of source code."
+  exit 1
+fi
+
+startingdir=`pwd`
+
+
+# "datadir" is some suitable safe place for the data; here's the default:
+datadir=${HOME}/.slocdata
+
+details="n"
+cached="n"
+append="n"  # If "append", then don't delete datadir, just add to it.
+oneprogram="--oneprogram"
+display_results="n"
+duplicate_control=""
+autogen=""
+filecount=""
+wide=""
+effort_model=""
+personcost=""
+overhead=""
+follow=""
+addlang=""
+notopfollow=""
+showother=""
+
+# Perl 5.8.0 handles the "LANG" environment variable oddly;
+# if it includes ".UTF-8" (which is does in Red Hat Linux 9 and others)
+# then it will bitterly complain about ordinary text.
+# So, we'll need to filter ".UTF-8" out of LANG.
+if [ x"$LANG" != x ]
+then
+ LANG=`echo "$LANG" | sed -e 's/\.UTF-8//'`
+ export LANG
+ # echo "New LANG variable: $LANG"
+fi
+
+while [ "$#" -gt 0 ]
+do
+ case "$1"
+ in
+  --version) echo "$version"
+             exit 0;;
+  --cached) cached="y"
+            shift;;
+  --append) append="y"
+            shift;;
+  --follow) follow="--follow"
+            shift;;
+  --notopfollow) notopfollow="--notopfollow"
+            shift;;
+  --datadir) shift
+             if [ ! -d "$1" ]
+             then
+               echo "Error: $1 is not a directory"
+               exit 1
+             fi
+             cd "$1"
+             datadir=`pwd`
+             cd $startingdir
+             shift;;
+  --duplicates) duplicate_control="$1"
+                shift;;
+  --crossdups)  duplicate_control="$1"
+                shift;;
+  --autogen)    autogen="$1"
+                shift;;
+  --multiproject) oneprogram=""
+                 shift;;
+  --filecount) filecount="$1"
+               shift;;
+  --filecounts) filecount="$1"
+               shift;;
+  --wide)      wide="$1"
+               shift;;
+  --details) details="y"
+            shift;;
+  --addlang)  addlang="$addlang $1 $2"
+            shift; shift;;
+  --addlangall)  addlang="--addlangall"
+            shift;;
+  --showother)  showother="--showother"
+            shift;;
+  --effort)  effort_model="$1 $2 $3"
+            shift; shift; shift;;
+  --schedule)  schedule_model="$1 $2 $3"
+            shift; shift; shift;;
+  --personcost) personcost="$1 $2"
+             shift; shift;;
+  --overhead) overhead="$1 $2"
+             shift; shift;;
+  --) break;;
+  --*) echo "Error: no such option $1"
+       exit 1;;
+  *) break;;
+ esac
+done
+
+# --duplicates) duplicate_control="$1"
+#  --autogen)    autogen="$1"
+# $follow
+
+case "$cached"
+in
+ y)
+   if [ -n "$duplicate_control" -o -n "$autogen" -o -n "$follow" ]
+   then
+     echo "Warning: You cannot control what files are selected in a cache."
+     echo "The option '--cached' disables --duplicates, --crossdups,"
+     echo "--autogen, and --follow.  Remove the --cached option if you"
+     echo "are changing what you wish to include in your calculations." 
+     echo
+   fi
+   if [ -d "$datadir" ]
+   then
+    display_results="y"
+   else
+    echo "Sorry, data directory $datadir does not exist."
+    exit 1
+   fi;;
+ n)  # Not cached -- need to compute the results.
+
+  if [ "$append" = "n" ]; then
+     if [ -r "${datadir}/sloc_noerase" ]; then
+       echo "ERROR! This data directory is marked as do-not-erase."
+       echo "Remove the file ${datadir}/sloc_noerase to erase it."
+       exit 1
+     fi
+     if [ "$#" -eq 0 ]; then
+       echo "ERROR! No directories for initial analysis supplied."
+       exit 1
+     fi
+     rm -fr "$datadir"
+     mkdir "$datadir"
+  fi
+
+  # Now that "datadir" exists, first test to make sure wc -l works.
+  wctestfile=${datadir}/.wctestfile
+  echo "" > $wctestfile
+  echo "line two" >> $wctestfile
+  echo "line three" >> $wctestfile
+  echo "line four" >> $wctestfile
+  testcount=`wc -l < ${wctestfile} | sed -e 's/ //g'`
+  if [ "$testcount" -ne 4 ]
+  then
+   echo "FAILURE! Your wc program's -l option produces wrong results."
+   echo "Update your wc (probably in a textutils package) to a correct version."
+   exit 1
+  fi
+
+  
+  # Split things up if we're given only one directory on the argument line
+  # and that directory has more than one subdirectory:
+  split_things_up="n"
+  if [ "$#" -eq 1 ]
+  then
+   count=0
+   for x in $1/*
+   do
+    if [ -d "$x" ]
+    then
+      count=`expr $count + 1`
+      if [ $count -gt 1 ]
+      then
+       split_things_up="y"
+       break
+      fi
+    fi
+   done
+  fi
+  # If we're appending, don't split things up.
+  if [ "$append" = "y" ]
+  then
+    split_things_up="n"
+  fi
+  
+  case $split_things_up
+  in
+   y) make_filelists $follow $notopfollow --datadir "$datadir" --skip src "$1"/* ||
+          exit 1
+      if [ -d "$1"/src ]
+      then
+       make_filelists $notopfollow --datadir "$datadir" --prefix "src_" "$1"/src/* ||
+          exit 1
+      fi
+      ;;
+   *) make_filelists $follow $notopfollow --datadir "$datadir" "$@" || exit 1
+      ;;
+  esac
+  
+  cd $datadir
+  if echo "Categorizing files." &&
+     break_filelist --duplistfile sloc_hashes $duplicate_control $autogen * &&
+     echo "Computing results." &&
+     compute_all *
+  then
+    display_results=y
+  fi
+  echo
+  echo
+  ;;
+esac
+
+# If we're appending, don't display results.
+if [ "$append" = "y" ]
+then
+  display_results="n"
+  echo "To display results, use the --cached option."
+fi
+
+
+case $display_results
+in
+ y)
+  cd $datadir
+  case $details
+  in
+    y)   get_sloc_details * ;;
+    *)   get_sloc $addlang $showother $filecount $oneprogram $effort_model $schedule_model $personcost $overhead * ;;
+  esac;;
+esac
+
diff --git a/sloccount.1 b/sloccount.1
new file mode 100644
index 0000000..8a5820c
--- /dev/null
+++ b/sloccount.1
@@ -0,0 +1,235 @@
+'\"
+.\" (C) Copyright 2001-2004 David A. Wheeler (dwheeler at dwheeler.com)
+.\"
+.\" This program is free software; you can redistribute it and/or modify
+.\" it under the terms of the GNU General Public License as published by
+.\" the Free Software Foundation; either version 2 of the License, or
+.\" (at your option) any later version.
+.\" 
+.\" This program is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+.\" GNU General Public License for more details.
+.\" 
+.\" You should have received a copy of the GNU General Public License
+.\" along with this program; if not, write to the Free Software
+.\" Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+.\" 
+.\" David A. Wheeler's website is http://www.dwheeler.com
+.\"
+.\" Created Mon Jan 08 23:00:00 2001, David A. Wheeler (dwheeler at dwheeler.com)
+.\"
+.TH SLOCCOUNT 1 "31 July 2004" "SLOCCount" "SLOCCount"
+.SH NAME
+sloccount \- count source lines of code (SLOC)
+.SH SYNOPSIS
+.B sloccount
+.RB [ --version ]
+.RB [ --cached ]
+.RB [ --append ]
+[ \fB--datadir\fR \fIdirectory\fR ]
+.RB [ --follow ]
+.RB [ --duplicates ]
+.RB [ --crossdups ]
+.RB [ --autogen ]
+.RB [ --multiproject ]
+.RB [ --filecount ]
+.RB [ --wide ]
+.RB [ --details ]
+[ \fB--effort\fR \fIF\fR \fIE\fR ]
+[ \fB--schedule\fR \fIF\fR \fIE\fR ]
+[ \fB--personcost\fR \fIcost\fR ]
+[ \fB--overhead\fR \fIoverhead\fR ]
+[ \fB--addlang\fR \fIlanguage\fR ]
+[ \fB--addlangall\fR ]
+.RB [ -- ]
+.I directories
+.SH DESCRIPTION
+.PP
+sloccount counts the physical source lines of code (SLOC)
+contained in descendants of the specified set of directories.
+It automatically determines which files are source code files,
+and it automatically determines the computer language used in each file.
+By default it summarizes the SLOC results and presents various estimates
+(such as effort and cost to develop),
+but its output can be controlled by various options.
+.PP
+If you give sloccount a list of two or more directories, the counts
+will be broken up according to that list.
+There is one important limitation:
+the basenames of the directories given as parameters must be different,
+because those names are used to group summary information.
+Thus, you can't run "sloccount /usr/src/hello /usr/local/src/hello".
+This limitation only applies to parameters of sloccount - subdirectories
+descended from the top directories can have the same basename.
+.PP
+If you give sloccount only a single directory, sloccount tries
+to automatically find a reasonable breakdown for purposes of reporting
+(so it'll produce a useful report).
+In this case, if the directory has at least
+two subdirectories, then those subdirectories will be used as the
+breakdown.
+If the single directory contains files as well as directories
+(or if you give sloccount some files as parameters), those files will
+be assigned to the directory "top_dir" so you can tell them apart
+from other contents.
+Finally, if there's a subdirectory named "src", then that subdirectory is
+again broken down, with all the further subdirectories prefixed with "src_".
+Thus, if directory "X" has a subdirectory "src", which contains subdirectory
+"modules", sloccount will report a separate count for "src_modules".
+.PP
+sloccount normally considers all descendants of these directories,
+though unless told otherwise it ignores symbolic links.
+.PP
+sloccount is the usual front-end of the package of tools named "SLOCCount".
+Note that the name of the entire package has capital letters, while
+the name of this front-end program does not.
+.PP
+sloccount will normally report estimates of schedule time, effort, and
+cost, and for single projects it also estimates
+the average number of active developers.
+These are merely estimates, not etched in stone; you can modify the
+parameters used to improve the estimates.
+
+
+.SH OPTIONS
+.TP 12
+.BI --version
+Report the version number of SLOCCount and immediately exit.
+This option can't be usefully combined with any other option.
+
+.TP 12
+.BI --cached
+Do not recalculate; instead, use cached results from a previous execution.
+Without the --cached or --append option,
+sloccount automatically removes the data directory
+and recreates it.
+
+.TP 12
+.BI --append
+Do not remove previous calculations from the data directory;
+instead, add the analysis to the current contents of the data directory.
+
+.TP
+.BI --datadir " directory"
+Store or use cached data in the given data directory; default value
+is "~/.slocdata".
+
+.TP
+.BI --follow
+Follow symbolic links.
+
+.TP
+.BI --duplicates
+Count all duplicates.
+Normally, if files have equal content (as determined using
+MD5 hash values), only one is counted.
+
+.TP
+.BI --crossdups
+Count duplicates if they occur in different portions of the breakdown.
+Thus, if the top directory contains many different projects, and you
+want the duplicates in different projects to count in each project,
+choose this option.
+
+.TP
+.BI --autogen
+Count source code files that appear to be automatically generated.
+Normally these are excluded.
+
+.TP
+.BI --multiproject
+The different directories represent different projects;
+otherwise, it's assumed that all of the source code belongs
+to a single project.
+This doesn't change the total number of files or SLOC values, but
+it does affect the effort and schedule estimates.
+Given this option,
+effort is computed separately for each project (and then summed),
+and the schedule is the estimated schedule of the largest project.
+
+.TP
+.BI --filecount
+Display counts of files instead of SLOC.
+
+.TP
+.BI --wide
+Display in the "wide" (tab-separated) format.
+
+.TP
+.BI --details
+Display details, that is, results for every source code file.
+
+.TP
+.BI --effort " F E"
+Change the factor and exponent for the effort model.
+Effort (in person-months) is computed as F*(SLOC/1000)^E.
+
+.TP
+.BI --schedule " F E"
+Change the factor and exponent for the schedule model.
+Schedule (in months) is computed as F*(effort)^E.
+
+.TP
+.BI --personcost " cost"
+Change the average annual salary to
+.IR cost .
+
+.TP
+.BI --overhead " overhead"
+Change the overhead value to
+.IR overhead .
+Estimated cost is computed as effort * personcost * overhead.
+
+.TP
+.BI --addlang " language"
+Add a language not considered by default to be a ``language'' to be
+reported.
+Currently the only legal values for language are "makefile", "sql",
+and "html".
+These files
+are not normally included in the SLOC counts, although their SLOCs are
+internally calculated and they are shown in the file counts.
+If you want to include more than one such language, do it by
+passing --addlang more than once, e.g., --addlang makefile --addlang sql.
+
+.TP
+.BI --addlangall
+Add all languages not normally included in final reports.
+
+.SH "NOTES"
+As with many other programs using Unix-like options,
+directories whose names begin with a
+dash (``-'') can be misinterpreted as options.
+If the directories to be analyzed might begin with a dash, use the
+double-dash (``-\ -'') to indicate the end of the option list
+before listing the directories.
+
+.SH "BUGS"
+Filenames with embedded newlines (in the directories or their
+descendants) won't be handled correctly; they will be interpreted
+as separate filenames where the newlines are inserted.
+An attacker could prevent sloccount from working by
+creating filenames of the form /normal/directory ... NEWLINE/dev/zero.
+Such filenames are exceedingly rare in source code because they're a pain
+to work with using other tools, too.
+Future versions of sloccount may internally use NUL-separated filenames
+(like GNU find's -print0 command) to fix this.
+
+There are many more languages not yet handled by SLOCCount.
+
+SLOCCount only reports physical source lines of code.
+It would be
+very useful if it could also report logical lines of code, and perhaps
+other common metrics such as McCabe's complexity measures
+and complexity density (complexity/SLOC for each function or procedure).
+
+
+.SH "SEE ALSO"
+See the SLOCCount website at http://www.dwheeler.com/sloccount.
+Note that more detailed documentation is available both on the website
+and with the SLOCCount package.
+
+.SH AUTHOR
+David A. Wheeler (dwheeler@dwheeler.com).
+
diff --git a/sloccount.1.gz b/sloccount.1.gz
new file mode 100644
index 0000000..33d29e9
Binary files /dev/null and b/sloccount.1.gz differ
diff --git a/sloccount.html b/sloccount.html
new file mode 100644
index 0000000..233ae9a
--- /dev/null
+++ b/sloccount.html
@@ -0,0 +1,2464 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<title>SLOCCount User's Guide</title>
+</head>
+<body bgcolor="#FFFFFF">
+<center>
+<font size="+3"><b><span class="title">SLOCCount User's Guide</span></b></font>
+<br>
+<font size="+2"><span class="author">by David A. Wheeler (dwheeler, at, dwheeler.com)</span></font>
+<br>
+<font size="+2"><span class="pubdate">August 1, 2004</span></font>
+<br>
+<font size="+2"><span class="version">Version 2.26</span></font>
+</center>
+<p>
+<h1><a name="introduction">Introduction</a></h1>
+<p>
+SLOCCount (pronounced "sloc-count") is a suite of programs for counting
+physical source lines of code (SLOC) in potentially large software systems.
+Thus, SLOCCount is a "software metrics tool" or "software measurement tool".
+SLOCCount was developed by David A. Wheeler,
+originally to count SLOC in a GNU/Linux distribution, but it can be
+used for counting the SLOC of arbitrary software systems.
+<p>
+SLOCCount is known to work on Linux systems, and has been tested
+on Red Hat Linux versions 6.2, 7, and 7.1.
+SLOCCount should run on many other Unix-like systems (if Perl is installed),
+in particular, I would expect a *BSD system to work well.
+Windows users can run sloccount by first installing
+<a href="http://sources.redhat.com/cygwin">Cygwin</a>.
+SLOCCount is much slower on Windows/Cygwin, and it's not as easy to install
+or use on Windows, but it works.
+Of course, feel free to upgrade to an open source Unix-like system
+(such as Linux or *BSD) instead :-).
+<p>
+SLOCCount can count physical SLOC for a wide number of languages.
+Listed alphabetically, they are
+Ada, Assembly (for many machines and assemblers),
+awk (including gawk and nawk),
+Bourne shell (and relatives such as bash, ksh, zsh, and pdksh),
+C, C++, C# (also called C-sharp or cs), C shell (including tcsh),
+COBOL, Expect, Fortran (including Fortran 90), Haskell,
+Java, lex (including flex),
+LISP (including Scheme),
+makefiles (though they aren't usually shown in final reports),
+Modula3, Objective-C, Pascal, Perl, PHP, Python, Ruby, sed,
+SQL (normally not shown),
+TCL, and Yacc.
+It can gracefully handle awkward situations in many languages,
+for example, it can determine the
+syntax used in different assembly language files and adjust appropriately,
+it knows about Python's use of string constants as comments, and it
+can handle various Perl oddities (e.g., perlpods, here documents,
+and Perl's _&nbsp;_END_&nbsp;_ marker).
+It even has a "generic" SLOC counter that you may be able to use count the
+SLOC of other languages (depending on the language's syntax).
+<p>
+SLOCCount can also take a large list of files and automatically categorize
+them using a number of different heuristics.
+The heuristics automatically determine if a file
+is a source code file or not, and if so, which language it's written in.
+For example,
+it knows that ".pc" is usually a C source file for an Oracle preprocessor,
+but it can detect many circumstances where it's actually a file about
+a "PC" (personal computer).
+For another example, it knows that ".m" is the standard extension for
+Objective-C, but it will check the file contents to
+see if really is Objective-C.
+It will even examine file headers to attempt to accurately determine
+the file's true type.
+As a result, you can analyze large systems completely automatically.
+<p>
+Finally, SLOCCount has some report-generating tools
+to collect the data generated,
+and then present it in several different formats and sorted different ways.
+The report-generating tool can also generate simple tab-separated files
+so data can be passed on to other analysis tools (such as spreadsheets
+and database systems).
+<p>
+SLOCCount will try to quickly estimate development time and effort given only
+the lines of code it computes, using the original Basic COCOMO model.
+This estimate can be improved if you can give more information about the project.
+See the
+<a href="#cocomo">discussion below about COCOMO, including intermediate COCOMO</a>,
+if you want to improve the estimates by giving additional information about
+the project.
+<p>
+SLOCCount is open source software/free software (OSS/FS),
+released under the GNU General Public License (GPL), version 2;
+see the <a href="#license">license below</a>.
+The master web site for SLOCCount is
+<a href="http://www.dwheeler.com/sloccount">http://www.dwheeler.com/sloccount</a>.
+You can learn a lot about SLOCCount by reading the paper that caused its
+creation, available at
+<a href="http://www.dwheeler.com/sloc">http://www.dwheeler.com/sloc</a>.
+Feel free to see my master web site at
+<a href="http://www.dwheeler.com">http://www.dwheeler.com</a>, which has
+other material such as the
+<a href="http://www.dwheeler.com/secure-programs"><i>Secure Programming
+for Linux and Unix HOWTO</i></a>,
+my <a href="http://www.dwheeler.com/oss_fs_refs.html">list of
+OSS/FS references</a>, and my paper
+<a href="http://www.dwheeler.com/oss_fs_why.html"><i>Why OSS/FS? Look at
+the Numbers!</i></a>
+Please send improvements by email
+to dwheeler, at, dwheeler.com (DO NOT SEND SPAM - please remove the
+commas, remove the spaces, and change the word "at" into the at symbol).
+<p>
+The following sections first give a "quick start"
+(discussing how to use SLOCCount once it's installed),
+discuss basic SLOCCount concepts,
+how to install it, how to set your PATH,
+how to install source code on RPM-based systems if you wish, and
+more information on how to use the "sloccount" front-end.
+This is followed by material for advanced users:
+how to use SLOCCount tools individually (for when you want more control
+than the "sloccount" tool gives you), designer's notes,
+the definition of SLOC, and miscellaneous notes.
+The last sections states the license used (GPL) and gives
+hints on how to submit changes to SLOCCount (if you decide to make changes
+to the program).
+
+
+<p>
+<h1><a name="quick-start">Quick Start</a></h1>
+<p>
+Once you've installed SLOCCount (discussed below),
+you can measure an arbitrary program by typing everything
+after the dollar sign into a terminal session:
+<pre>
+  $  sloccount <i>topmost-source-code-directory</i>
+</pre>
+<p>
+The directory listed and all its descendants will be examined.
+You'll see output while it calculates,
+culminating with physical SLOC totals and
+estimates of development time, schedule, and cost.
+If the directory contains a set of directories, each of which is
+a different project developed independently,
+use the "--multiproject" option so the effort estimations
+can correctly take this into account.
+<p>
+You can redisplay the data different ways by using the "--cached"
+option, which skips the calculation stage and re-prints previously
+computed information.
+You can use other options to control what's displayed:
+"--filecount" shows counts of files instead of SLOC, and
+"--details" shows the detailed information about every source code file.
+So, to display all the details of every file once you've previously
+calculated the results, just type:
+<pre>
+  sloccount --cached --details
+</pre>
+<p>
+You'll notice that the default output ends with a request.
+If you use this data (e.g., in a report), please
+credit that data as being "generated using 'SLOCCount' by David A. Wheeler."
+I make no money from this program, so at least please give me some credit.
+<p>
+SLOCCount tries to ignore all automatically generated files, but its
+heuristics to detect this are necessarily imperfect (after all, even humans
+sometimes have trouble determining if a file was automatically genenerated).
+If possible, try to clean out automatically generated files from
+the source directories -- 
+in many situations "make clean" does this.
+<p>
+There's more to SLOCCount than this, but first we'll need to
+explain some basic concepts, then we'll discuss other options
+and advanced uses of SLOCCount.
+
+<p>
+<h1><a name="concepts">Basic Concepts</a></h1>
+<p>
+SLOCCount counts physical SLOC, also called "non-blank, non-comment lines".
+More formally, physical SLOC is defined as follows:
+``a physical source line of code (SLOC) is a line ending
+in a newline or end-of-file marker,
+and which contains at least one non-whitespace non-comment character.''
+Comment delimiters (characters other than newlines starting and ending
+a comment) are considered comment characters.
+Data lines only including whitespace
+(e.g., lines with only tabs and spaces in multiline strings) are not included.
+<p>
+In SLOCCount, there are 3 different directories:
+<ol>
+<li>The "source code directory", a directory containing the source code
+   being measured
+   (possibly in recursive subdirectories).  The directories immediately
+   contained in the source code directory will normally be counted separately,
+   so it helps if your system is designed so that this top set of directories
+   roughly represents the system's major components.
+   If it doesn't, there are various tricks you can use to group source
+   code into components, but it's more work.
+   You don't need write access to the source code directory, but
+   you do need read access to all files, and read and search (execute) access
+   to all subdirectories.
+<li>The "bin directory", the directory containing the SLOCCount executables.
+   By default, installing the program creates a subdirectory
+   named "sloccount-VERSION" which is the bin directory.
+   The bin directory must be part of your PATH.
+<li>The "data directory", which stores the analysis results.
+   When measuring programs using "sloccount", by default
+   this is the directory ".slocdata" inside your home directory.
+   When you use the advanced SLOCCount tools directly,
+   in many cases this must be your "current" directory.
+   Inside the data directory are "data directory children" - these are
+   subdirectories that contain a file named "filelist", and each child
+   is used to represent a different project or a different
+   major component of a project.
+</ol>
+<p>
+SLOCCount can handle many different programming languages, and separate
+them by type (so you can compare the use of each).
+Here is the set of languages, sorted alphabetically;
+common filename extensions are in
+parentheses, with SLOCCount's ``standard name'' for the language
+listed in brackets:
+<ol>
+<li>Ada (.ada, .ads, .adb, .pad) [ada]
+<li>Assembly for many machines and assemblers (.s, .S, .asm) [asm]
+<li>awk (.awk) [awk]
+<li>Bourne shell and relatives such as bash, ksh, zsh, and pdksh (.sh) [sh]
+<li>C (.c, .pc, .ec, .ecp) [ansic]
+<li>C++  (.C, .cpp, .cxx, .cc, .pcc) [cpp]
+<li>C# (.cs) [cs]
+<li>C shell including tcsh (.csh) [csh]
+<li>COBOL (.cob, .cbl, .COB, .CBL) [cobol]
+<li>Expect (.exp) [exp]
+<li>Fortran 77 (.f, .f77, .F, .F77) [fortran]
+<li>Fortran 90 (.f90, .F90) [f90]
+<li>Haskell (.hs, .lhs) [haskell]; deals with both types of literate files.
+<li>Java (.java) [java]
+<li>lex (.l) [lex]
+<li>LISP including Scheme (.cl, .el, .scm, .lsp, .jl) [lisp]
+<li>makefiles (makefile) [makefile]
+<li>ML (.ml, .ml3) [ml]
+<li>Modula3 (.m3, .mg, .i3, .ig) [modula3]
+<li>Objective-C (.m) [objc]
+<li>Pascal (.p, .pas) [pascal]
+<li>Perl (.pl, .pm, .perl) [perl]
+<li>PHP (.php, .php[3456], .inc) [php]
+<li>Python (.py) [python]
+<li>Ruby (.rb) [ruby]
+<li>sed (.sed) [sed]
+<li>sql (.sql) [sql]
+<li>TCL (.tcl, .tk, .itk) [tcl]
+<li>Yacc (.y) [yacc]
+</ol>
+
+<p>
+<h1><a name="installing">Installing SLOCCount</a></h1>
+<p>
+Obviously, before using SLOCCount you'll need to install it.
+SLOCCount depends on other programs, in particular perl, bash,
+a C compiler (gcc will do), and md5sum
+(you can get a useful md5sum program in the ``textutils'' package
+on many Unix-like systems), so you'll need to get them installed
+if they aren't already.
+<p>
+If your system uses RPM version 4 or greater to install software
+(e.g., Red Hat Linux 7 or later), just download the SLOCCount RPM
+and install it using a normal installation command; from the text line
+you can use:
+<pre>
+  rpm -Uvh sloccount*.rpm
+</pre>
+<p>
+Everyone else will need to install from a tar file, and Windows users will
+have to install Cygwin before installing sloccount.
+<p>
+If you're using Windows, you'll need to first install
+<a href="http://sources.redhat.com/cygwin">Cygwin</a>.
+By installing Cygwin, you'll install an environment and a set of
+open source Unix-like tools.
+Cygwin essentially creates a Unix-like environment in which sloccount can run.
+You may be able to run parts of sloccount without Cygwin, in particular,
+the perl programs should run in the Windows port of Perl, but you're
+on your own - many of the sloccount components expect a Unix-like environment.
+If you want to install Cygwin, go to the
+<a href="http://sources.redhat.com/cygwin">Cygwin main page</a>
+and install it.
+If you're using Cygwin, <b>install it to use Unix newlines, not
+DOS newlines</b> - DOS newlines will cause odd errors in SLOCCount
+(and probably other programs, too).
+I have only tested a "full" Cygwin installation, so I suggest installing
+everything.
+If you're short on disk space,  at least install
+binutils, bash, fileutils, findutils,
+gcc, grep, gzip, make, man, perl, readline,
+sed, sh-utils, tar, textutils, unzip, and zlib;
+you should probably install vim as well,
+and there may be other dependencies as well.
+By default Cygwin will create a directory C:\cygwin\home\NAME,
+and will set up the ability to run Unix programs
+(which will think that the same directory is called /home/NAME).
+Now double-click on the Cygwin icon, or select from the Start menu
+the selection Programs / Cygnus Solutions / Cygwin Bash shell;
+you'll see a terminal screen with a Unix-like interface.
+Now follow the instructions (next) for tar file users.
+<p>
+If you're installing from the tar file, download the file
+(into your home directory is fine).
+Unpacking the file will create a subdirectory, so if you want the
+unpacked subdirectory to go somewhere special, "cd" to where you
+want it to go.
+Most likely, your home directory is just fine.
+Now gunzip and untar SLOCCount (the * replaces the version #) by typing
+this at a terminal session:
+<pre>
+  gunzip -c sloccount*.tar.gz | tar xvf -
+</pre>
+Replace "sloccount*.tar.gz" shown above
+with the full path of the downloaded file, wherever that is.
+You've now created the "bin directory", which is simply the
+"sloccount-VERSION" subdirectory created by the tar command
+(where VERSION is the version number).
+<p>
+Now you need to compile the few compiled programs in the "bin directory" so
+SLOCCount will be ready to go.
+First, cd into the newly-created bin directory, by typing:
+<pre>
+  cd sloccount*
+</pre>
+<p>
+You may then need to override some installation settings.
+You can can do this by editing the supplied makefile, or alternatively,
+by providing options to "make" whenever you run make.
+The supplied makefile assumes your C compiler is named "gcc", which
+is true for most Linux systems, *BSD systems, and Windows systems using Cygwin.
+If this isn't true, you'll need to set
+the "CC" variable to the correct value (e.g., "cc").
+You can also modify where the files are stored; this variable is
+called PREFIX and its default is /usr/local
+(older versions of sloccount defaulted to /usr).
+<p>
+If you're using Windows and Cygwin, you
+<b>must</b> override one of the installation
+settings, EXE_SUFFIX, for installation to work correctly.
+One way to set this value is to edit the "makefile" file so that
+the line beginning with "EXE_SUFFIX" reads as follows:
+<pre>
+  EXE_SUFFIX=.exe
+</pre>
+If you're using Cygwin and you choose to modify the "makefile", you
+can use any text editor on the Cygwin side, or you can use a
+Windows text editor if it can read and write Unix-formatted text files.
+Cygwin users are free to use vim, for example.
+If you're installing into your home directory and using the default locations,
+Windows text editors will see the makefile as file
+C:\cygwin\home\NAME\sloccount-VERSION\makefile.
+Note that the Windows "Notepad" application doesn't work well, because it's not
+able to handle Unix text files correctly.
+Since this can be quite a pain, Cygus users may instead decide to override
+make the makefile values instead during installation.
+<p>
+Finally, compile the few compiled programs in it by typing "make":
+<pre>
+  make
+</pre>
+If you didn't edit the makefile in the previous step, you
+need to provide options to make invocations to set the correct values.
+This is done by simply saying (after "make") the name of the variable,
+an equal sign, and its correct value.
+Thus, to compile the program on a Windows system using Cygus, you can
+skip modifying the makefile file by typing this instead of just "make":
+<pre>
+  make EXE_SUFFIX=.exe
+</pre>
+<p>
+If you want, you can install sloccount for system-wide use without
+using the RPM version.
+Windows users using Cygwin should probably do this, particularly
+if they chose a "local" installation.
+To do this, first log in as root (Cygwin users don't need to do this
+for local installation).
+Edit the makefile to match your system's conventions, if necessary,
+and then type "make install":
+<pre>
+  make install
+</pre>
+If you need to set some make options, remember to do that here too.
+If you use "make install", you can uninstall it later using
+"make uninstall".
+Installing sloccount for system-wide use is optional;
+SLOCCount works without a system-wide installation.
+However, if you don't install sloccount system-wide, you'll need to
+set up your PATH variable; see the section on
+<a href="#path">setting your path</a>.
+<p>
+A note for Cygwin users (and some others): some systems, including Cygwin,
+don't set up the environment quite right and thus can't display the manual
+pages as installed.
+The problem is that they forget to search /usr/local/share/man for
+manual pages.
+If you want to read the installed manual pages, type this
+into a Bourne-like shell:
+<pre>
+  MANPATH=/usr/local/share/man:/usr/share/man:/usr/man
+  export MANPATH
+</pre>
+Or, if you use a C shell:
+<pre>
+  setenv MANPATH "/usr/local/share/man:/usr/share/man:/usr/man"
+</pre>
+From then on, you'll be able to view the reference manual pages
+by typing "man sloccount" (or by using whatever manual page display system
+you prefer).
+<p>
+
+<p>
+<h1><a name="installing-source">Installing The Source Code To Measure</a></h1>
+<p>
+Obviously, you must install the software source code you're counting,
+so somehow you must create the "source directory"
+with the source code to measure.
+You must also make sure that permissions are set so the software can
+read these directories and files.
+<p>
+For example, if you're trying to count the SLOC for an RPM-based Linux system,
+install the software source code by doing the following as root
+(which will place all source code into the source directory
+/usr/src/redhat/BUILD):
+<ol>
+<li>Install all source rpm's:
+<pre>
+    mount /mnt/cdrom
+    cd /mnt/cdrom/SRPMS
+    rpm -ivh *.src.rpm
+</pre>
+<li>Remove RPM spec files you don't want to count:
+<pre>
+    cd ../SPECS
+    (look in contents of spec files, removing what you don't want)
+</pre>
+<li>build/prep all spec files:
+<pre>
+    rpm -bp *.spec
+</pre>
+<li>Set permissions so the source files can be read by all:
+<pre>
+    chmod -R a+rX /usr/src/redhat/BUILD
+</pre>
+</ol>
+<p>
+Here's an example of how to download source code from an
+anonymous CVS server.
+Let's say you want to examine the source code in GNOME's "gnome-core"
+directory, as stored at the CVS server "anoncvs.gnome.org".
+Here's how you'd do that:
+<ol>
+<li>Set up site and login parameters:
+<pre>
+  export CVSROOT=':pserver:anonymous@anoncvs.gnome.org:/cvs/gnome'
+</pre>
+<li>Log in:
+<pre>
+  cvs login
+</pre>
+<li>Check out the software (copy it to your local directory), using
+mild compression to save on bandwidth:
+<pre>
+  cvs -z3 checkout gnome-core
+</pre>
+</ol>
+<p>
+Of course, if you have a non-anonymous account, you'd set CVSROOT
+to reflect this.  For example, to log in using the "pserver"
+protocol as ACCOUNT_NAME, do:
+<pre>
+  export CVSROOT=':pserver:ACCOUNT_NAME@cvs.gnome.org:/cvs/gnome'
+</pre>
+<p>
+You may need root privileges to install the source code and to give
+another user permission to read it, but please avoid running the
+sloccount program as root.
+Although I know of no specific reason this would be a problem,
+running any program as root turns off helpful safeguards.
+<p>
+Although SLOCCount tries to detect (and ignore) many cases where
+programs are automatically generated, these heuristics are necessarily
+imperfect.
+So, please don't run any programs that generate other programs - just
+do enough to get the source code prepared for counting.
+In general you shouldn't run "make" on the source code, and if you have,
+consider running "make clean" or "make really_clean" on the source code first.
+It often doesn't make any difference, but identifying those circumstances
+is difficult.
+<p>
+SLOCCount will <b>not</b> automatically uncompress files that are
+compressed/archive files (such as .zip, .tar, or .tgz files).
+Often such files are just "left over" old versions or files
+that you're already counting.
+If you want to count the contents of compressed files, uncompress them first.
+<p>
+SLOCCount also doesn't delve into files using "literate programming"
+techniques, in part because there are too many incompatible formats
+that implement it.
+Thus, run the tools to extract the code from the literate programming files
+before running SLOCCount.  Currently, the only exception to this rule is
+Haskell.
+
+
+<h1><a name="path">Setting your PATH</a></h1>
+Before you can run SLOCCount, you'll need to make sure
+the SLOCCount "bin directory" is in your PATH.
+If you've installed SLOCCount in a system-wide location
+such as /usr/bin, then you needn't do more; the RPMs and "make install"
+commands essentially do this.
+<p>
+Otherwise, in Bourne-shell variants, type:
+<pre>
+    PATH="$PATH:<i>the directory with SLOCCount's executable files</i>"
+    export PATH
+</pre>
+Csh users should instead type:
+<pre>
+    setenv PATH "$PATH:<i>the directory with SLOCCount's executable files</i>"
+</pre>
+
+<h1><a name="using-basics">Using SLOCCount: The Basics</a></h1>
+
+Normal use of SLOCCount is very simple.
+In a terminal window just type "sloccount", followed by a
+list of the source code directories to count.
+If you give it only a single directory, SLOCCount tries to be
+a little clever and break the source code into
+subdirectories for purposes of reporting:
+<ol>
+<li>if directory has at least
+two subdirectories, then those subdirectories will be used as the
+breakdown (see the example below).
+<li>If the single directory contains files as well as directories
+(or if you give sloccount some files as parameters), those files will
+be assigned to the directory "top_dir" so you can tell them apart
+from other directories.
+<li>If there's a subdirectory named "src", then that subdirectory is again
+broken down, with all the further subdirectories prefixed with "src_".
+So if directory "X" has a subdirectory "src", which contains subdirectory
+"modules", the program will report a separate count from "src_modules".
+</ol>
+In the terminology discussed above, each of these directories would become
+"data directory children."
+<p>
+You can also give "sloccount" a list of directories, in which case the
+report will be broken down by these directories
+(make sure that the basenames of these directories differ).
+SLOCCount normally considers all descendants of these directories,
+though unless told otherwise it ignores symbolic links.
+<p>
+This is all easier to explain by example.
+Let's say that we want to measure Apache 1.3.12 as installed using an RPM.
+Once it's installed, we just type:
+<pre>
+ sloccount /usr/src/redhat/BUILD/apache_1.3.12
+</pre>
+The output we'll see shows status reports while it analyzes things,
+and then it prints out:
+
+<pre>
+SLOC	Directory	SLOC-by-Language (Sorted)
+24728   src_modules     ansic=24728
+19067   src_main        ansic=19067
+8011    src_lib         ansic=8011
+5501    src_os          ansic=5340,sh=106,cpp=55
+3886    src_support     ansic=2046,perl=1712,sh=128
+3823    src_top_dir     sh=3812,ansic=11
+3788    src_include     ansic=3788
+3469    src_regex       ansic=3407,sh=62
+2783    src_ap          ansic=2783
+1378    src_helpers     sh=1345,perl=23,ansic=10
+1304    top_dir         sh=1304
+104     htdocs          perl=104
+31      cgi-bin         sh=24,perl=7
+0       icons           (none)
+0       conf            (none)
+0       logs            (none)
+
+
+ansic:       69191 (88.85%)
+sh:           6781 (8.71%)
+perl:         1846 (2.37%)
+cpp:            55 (0.07%)
+
+
+Total Physical Source Lines of Code (SLOC)                   = 77873
+Estimated Development Effort in Person-Years (Person-Months) = 19.36 (232.36)
+ (Basic COCOMO model, Person-Months = 2.4 * (KSLOC**1.05))
+Estimated Schedule in Years (Months)                         = 1.65 (19.82)
+ (Basic COCOMO model, Months = 2.5 * (person-months**0.38))
+Estimated Average Number of Developers  (Effort/Schedule)    = 11.72
+Total Estimated Cost to Develop                              = $ 2615760
+ (average salary = $56286/year, overhead = 2.4).
+
+Please credit this data as "generated using 'SLOCCount' by David A. Wheeler."
+</pre>
+<p>
+Interpreting this should be straightforward.
+The Apache directory has several subdirectories, including "htdocs", "cgi-bin",
+and "src".
+The "src" directory has many subdirectories in it
+("modules", "main", and so on).
+Code files directly
+contained in the main directory /usr/src/redhat/BUILD/apache_1.3.12
+is labelled "top_dir", while
+code directly contained in the src subdirectory is labelled "src_top_dir".
+Code in the "src/modules" directory is labelled "src_modules" here.
+The output shows each major directory broken
+out, sorted from largest to smallest.
+Thus, the "src/modules" directory had the most code of the directories,
+24728 physical SLOC, all of it in C.
+The "src/helpers" directory had a mix of shell, perl, and C; note that
+when multiple languages are shown, the list of languages in that child
+is also sorted from largest to smallest.
+<p>
+Below the per-component set is a list of all languages used,
+with their total SLOC shown, sorted from most to least.
+After this is the total physical SLOC (77,873 physical SLOC in this case).
+<p>
+Next is an estimation of the effort and schedule (calendar time)
+it would take to develop this code.
+For effort, the units shown are person-years (with person-months
+shown in parentheses); for schedule, total years are shown first
+(with months in parentheses).
+When invoked through "sloccount", the default assumption is that all code is
+part of a single program; the "--multiproject" option changes this
+to assume that all top-level components are independently developed
+programs.
+When "--multiproject" is invoked, each project's efforts are estimated
+separately (and then summed), and the schedule estimate presented
+is the largest estimated schedule of any single component.
+<p>
+By default the "Basic COCOMO" model is used for estimating
+effort and schedule; this model
+includes design, code, test, and documentation time (both
+user/admin documentation and development documentation).
+<a href="#cocomo">See below for more information on COCOMO</a>
+as it's used in this program.
+<p>
+Next are several numbers that attempt to estimate what it would have cost
+to develop this program.
+This is simply the amount of effort, multiplied by the average annual
+salary and by the "overhead multiplier".
+The default annual salary is
+$56,286 per year; this value was from the
+<i>ComputerWorld</i>, September 4, 2000's Salary Survey
+of an average U.S. programmer/analyst salary in the year 2000.
+You might consider using other numbers
+(<i>ComputerWorld</i>'s September 3, 2001 Salary Survey found
+an average U.S. programmer/analyst salary making $55,100, senior
+systems programmers averaging $68,900, and senior systems analysts averaging
+$72,300).
+
+<p>
+Overhead is much harder to estimate; I did not find a definitive source
+for information on overheads.
+After informal discussions with several cost analysts,
+I determined that an overhead of 2.4
+would be representative of the overhead sustained by
+a typical software development company.
+As discussed in the next section, you can change these numbers too.
+
+<p>
+You may be surprised by the high cost estimates, but remember,
+these include design, coding, testing, documentation (both for users
+and for programmers), and a wrap rate for corporate overhead
+(to cover facilities, equipment, accounting, and so on).
+Many programmers forget these other costs and are shocked by the high figures.
+If you only wanted to know the costs of the coding, you'd need to get
+those figures.
+
+
+<p>
+Note that if any top-level directory has a file named PROGRAM_LICENSE,
+that file is assumed to contain the name of the license
+(e.g., "GPL", "LGPL", "MIT", "BSD", "MPL", and so on).
+If there is at least one such file, sloccount will also report statistics
+on licenses.
+
+<p>
+Note: sloccount internally uses MD5 hashes to detect duplicate files,
+and thus needs some program that can compute MD5 hashes.
+Normally it will use "md5sum" (available, for example, as a GNU utility).
+If that doesn't work, it will try to use "md5" and "openssl", and you may
+see error messages in this format:
+<pre>
+ Can't exec "md5sum": No such file or directory at
+     /usr/local/bin/break_filelist line 678, &lt;CODE_FILE&gt; line 15.
+ Can't exec "md5": No such file or directory at
+     /usr/local/bin/break_filelist line 678, &lt;CODE_FILE&gt; line 15.
+</pre>
+You can safely ignore these error messages; these simply show that
+SLOCCount is probing for a working program to compute MD5 hashes.
+For example, Mac OS X users normally don't have md5sum installed, but
+do have md5 installed, so they will probably see the first error
+message (because md5sum isn't available), followed by a note that a
+working MD5 program was found.
+
+
+<h1><a name="options">Options</a></h1>
+The program "sloccount" has a large number of options
+so you can control what is selected for counting and how the
+results are displayed.
+<p>
+There are several options that control which files are selected
+for counting:
+<pre>
+ --duplicates   Count all duplicate files as normal files
+ --crossdups    Count duplicate files if they're in different data directory
+                children.
+ --autogen      Count automatically generated files
+ --follow       Follow symbolic links (normally they're ignored)
+ --addlang      Add languages to be counted that normally aren't shown.
+ --append       Add more files to the data directory
+</pre>
+Normally, files which have exactly the same content are counted only once
+(data directory children are counted alphabetically, so the child
+"first" in the alphabet will be considered the owner of the master copy).
+If you want them all counted, use "--duplicates".
+Sometimes when you use sloccount, each directory represents a different
+project, in which case you might want to specify "--crossdups".
+The program tries to reject files that are automatically generated
+(e.g., a C file generated by bison), but you can disable this as well.
+You can use "--addlang" to show makefiles and SQL files, which aren't
+usually counted.
+<p>
+Possibly the most important option is "--cached".
+Normally, when sloccount runs, it computes a lot of information and
+stores this data in a "data directory" (by default, "~/.slocdata").
+The "--cached" option tells sloccount to use data previously computed,
+greatly speeding up use once you've done the computation once.
+The "--cached" option can't be used along with the options used to
+select what files should be counted.
+You can also select a different data directory by using the
+"--datadir" option.
+<p>
+There are many options for controlling the output:
+<pre>
+ --filecount     Show counts of files instead of SLOC.
+ --details       Present details: present one line per source code file.
+ --wide          Show "wide" format.  Ignored if "--details" selected
+ --multiproject  Assume each directory is for a different project
+                 (this modifies the effort estimation calculations)
+ --effort F E    Change the effort estimation model, so that it uses
+                 F as the factor and E as the exponent.
+ --schedule F E  Change the schedule estimation model, so that it uses
+                 F as the factor and E as the exponent.
+ --personcost P  Change the average annual salary to P.
+ --overhead O    Change the annual overhead to O.
+ --              End of options
+</pre>
+<p>
+Basically, the first time you use sloccount, if you're measuring
+a set of projects (not a single project) you might consider
+using "--crossdups" instead of the defaults.
+Then, you can redisplay data quickly by using "--cached",
+combining it with options such as "--filecount".
+If you want to send the data to another tool, use "--details".
+<p>
+If you're measuring a set of projects, you probably ought to pass
+the option "--multiproject".
+When "--multiproject" is used, efforts are computed for each component
+separately and summed, and the time estimate used is the maximum
+single estimated time.
+<p>
+The "--details" option dumps the available data in 4 columns,
+tab-separated, where each line
+represents a source code file in the data directory children identified.
+The first column is the SLOC, the second column is the language type,
+the third column is the name of the data directory child
+(as it was given to get_sloc_details),
+and the last column is the absolute pathname of the source code file.
+You can then pipe this output to "sort" or some other tool for further
+analysis (such as a spreadsheet or RDBMS).
+<p>
+You can change the parameters used to estimate effort using "--effort".
+For example, if you believe that in the environment being used
+you can produce 2 KSLOC/month scaling linearly, then
+that means that the factor for effort you should use is 1/2 = 0.5 month/KSLOC,
+and the exponent for effort is 1 (linear).
+Thus, you can use "--effort 0.5 1".
+<p>
+You can also set the annual salary and overheads used to compute
+estimated development cost.
+While "$" is shown, there's no reason you have to use dollars;
+the unit of development cost is the same unit as the unit used for
+"--personcost".
+
+<h1><a name="cocomo">More about COCOMO</a></h1>
+
+<p>
+By default SLOCCount uses a very simple estimating model for effort and schedule:
+the basic COCOMO model in the "organic" mode (modes are more fully discussed below).
+This model estimates effort and schedule, including design, code, test,
+and documentation time (both user/admin documentation and development documentation).
+Basic COCOMO is a nice simple model, and it's used as the default because
+it doesn't require any information about the code other than the SLOC count
+already computed.
+<p>
+However, basic COCOMO's accuracy is limited for the same reason -
+basic COCOMO doesn't take a number of important factors into account.
+If you have the necessary information, you can improve the model's accuracy
+by taking these factors into account. You can at least quickly determine
+if the right "mode" is being used to improve accuracy. You can also
+use the "Intermediate COCOMO" and "Detailed COCOMO" models that take more
+factors into account, and are likely to produce more accurate estimates as
+a result. Take these estimates as just that - estimates - they're not grand truths.
+If you have the necessary information,
+you can improve the model's accuracy by taking these factors into account, and
+pass this additional information to sloccount using its
+"--effort" and "--schedule" options (as discussed in
+<a href="#options">options</a>).
+<p>
+To use the COCOMO model, you first need to determine if your application's
+mode, which can be "Organic", "embedded", or "semidetached".
+Most software is "organic" (which is why it's the default).
+Here are simple definitions of these modes:
+<ul>
+<li>Organic: Relatively small software teams develop software in a highly
+familiar, in-house environment. &nbsp;It has a generally stable development
+environment, minimal need for innovative algorithms, and requirements can
+be relaxed to avoid extensive rework.</li>
+<li>Semidetached: This is an intermediate
+step between organic and embedded. This is generally characterized by reduced
+flexibility in the requirements.</li>
+<li>Embedded: The project must operate
+within tight (hard-to-meet) constraints, and requirements
+and interface specifications are often non-negotiable.
+The software will be embedded in a complex environment that the
+software must deal with as-is.</li>
+</ul>
+By default, SLOCCount uses the basic COCOMO model in the organic mode.
+For the basic COCOMO model, here are the critical factors for --effort and --schedule:<br>
+<ul>
+<li>Organic: effort factor = 2.4, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li>
+<li>Semidetached:  effort factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li>
+<li>Embedded:  effort factor = 3.6, exponent = 1.20; schedule factor = 2.5, exponent = 0.32</li>
+</ul>
+Thus, if you want to use SLOCCount but the project is actually semidetached,
+you can use the options "--effort 3.0 1.12 --schedule 2.5 0.35"
+to get a more accurate estimate.
+<br>
+For more accurate estimates, you can use the intermediate COCOMO models.
+For intermediate COCOMO, use the following figures:<br>
+<ul>
+  <li>Organic: effort base factor = 2.3, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li>
+  <li>Semidetached: effort base factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li>
+  <li>Embedded: effort base factor = 2.8, exponent = 1.20; schedule factor = 2.5, exponent = 0.32</li>
+</ul>
+The intermediate COCOMO values for schedule are exactly the same as the basic
+COCOMO model; the starting effort values are not quite the same, as noted
+in Boehm's book. However, in the intermediate COCOMO model, you don't
+normally use the effort factors as-is, you use various corrective factors
+(called cost drivers). To use these corrections, you consider
+all the cost drivers, determine what best describes them,
+and multiply their corrective values by the effort base factor.
+The result is the final effort factor.
+Here are the cost drivers (from Boehm's book, table 8-2 and 8-3):
+
+<table cellpadding="2" cellspacing="2" border="1" width="100%">
+  <tbody>
+    <tr>
+      <th rowspan="1" colspan="2">Cost Drivers
+      </th>
+      <th rowspan="1" colspan="6">Ratings
+      </th>
+    </tr>
+    <tr>
+      <th>ID
+      </th>
+      <th>Driver Name
+      </th>
+      <th>Very Low
+      </th>
+      <th>Low
+      </th>
+      <th>Nominal
+      </th>
+      <th>High
+      </th>
+      <th>Very High
+      </th>
+      <th>Extra High
+      </th>
+    </tr>
+    <tr>
+      <td>RELY
+      </td>
+      <td>Required software reliability
+      </td>
+      <td>0.75 (effect is slight inconvenience)
+      </td>
+      <td>0.88 (easily recovered losses)
+      </td>
+      <td>1.00 (recoverable losses)
+      </td>
+      <td>1.15 (high financial loss)
+      </td>
+      <td>1.40 (risk to human life)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>DATA
+      </td>
+      <td>Database size
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>0.94 (database bytes/SLOC &lt; 10)
+      </td>
+      <td>1.00 (D/S between 10 and 100)
+      </td>
+      <td>1.08 (D/S between 100 and 1000)
+      </td>
+      <td>1.16 (D/S &gt; 1000)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>CPLX
+      </td>
+      <td>Product complexity
+      </td>
+      <td>0.70 (mostly straightline code, simple arrays, simple expressions)
+      </td>
+      <td>0.85
+      </td>
+      <td>1.00
+      </td>
+      <td>1.15
+      </td>
+      <td>1.30
+      </td>
+      <td>1.65 (microcode, multiple resource scheduling, device timing dependent coding)
+      </td>
+    </tr>
+    <tr>
+      <td>TIME
+      </td>
+      <td>Execution time constraint
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>1.00 (&lt;50% use of available execution time)
+      </td>
+      <td>1.11 (70% use)
+      </td>
+      <td>1.30 (85% use)
+      </td>
+      <td>1.66 (95% use)
+      </td>
+    </tr>
+    <tr>
+      <td>STOR
+      </td>
+      <td>Main storage constraint
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>1.00&nbsp;(&lt;50% use of available storage)</td>
+      <td>1.06  (70% use)
+      </td>
+      <td>1.21 (85% use)
+      </td>
+      <td>1.56 (95% use)
+      </td>
+    </tr>
+    <tr>
+      <td>VIRT
+      </td>
+      <td>Virtual machine (HW and OS) volatility
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>0.87 (major change every 12 months, minor every month)
+      </td>
+      <td>1.00 (major change every 6 months, minor every 2 weeks)</td>
+      <td>1.15 (major change every 2 months, minor changes every week)
+      </td>
+      <td>1.30 (major changes every 2 weeks, minor changes every 2 days)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>TURN
+      </td>
+      <td>Computer turnaround time
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>0.87 (interactive)
+      </td>
+      <td>1.00 (average turnaround &lt; 4 hours)
+      </td>
+      <td>1.07
+      </td>
+      <td>1.15
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>ACAP
+      </td>
+      <td>Analyst capability
+      </td>
+      <td>1.46 (15th percentile)
+      </td>
+      <td>1.19 (35th percentile)
+      </td>
+      <td>1.00 (55th percentile)
+      </td>
+      <td>0.86 (75th percentile)
+      </td>
+      <td>0.71 (90th percentile)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>AEXP
+      </td>
+      <td>Applications experience
+      </td>
+      <td>1.29 (&lt;= 4 months experience)
+      </td>
+      <td>1.13 (1 year)
+      </td>
+      <td>1.00 (3 years)
+      </td>
+      <td>0.91 (6 years)
+      </td>
+      <td>0.82 (12 years)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>PCAP
+      </td>
+      <td>Programmer capability
+      </td>
+      <td>1.42 (15th percentile)
+      </td>
+      <td>1.17  (35th percentile)
+      </td>
+      <td>1.00 (55th percentile)
+      </td>
+      <td>0.86  (75th percentile)
+      </td>
+      <td>0.70 (90th percentile)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>VEXP
+      </td>
+      <td>Virtual machine experience
+      </td>
+      <td>1.21 (&lt;= 1 month experience)
+      </td>
+      <td>1.10 (4 months)
+      </td>
+      <td>1.00 (1 year)
+      </td>
+      <td>0.90 (3 years)
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>LEXP
+      </td>
+      <td>Programming language experience
+      </td>
+      <td>1.14  (&lt;= 1 month experience)
+      </td>
+      <td>1.07 (4 months)
+      </td>
+      <td>1.00 (1 year)
+      </td>
+      <td>0.95 (3 years)
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>MODP
+      </td>
+      <td>Use of "modern" programming practices (e.g. structured programming)
+      </td>
+      <td>1.24 (No use)
+      </td>
+      <td>1.10
+      </td>
+      <td>1.00 (some use)
+      </td>
+      <td>0.91
+      </td>
+      <td>0.82 (routine use)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>TOOL
+      </td>
+      <td>Use of software tools
+      </td>
+      <td>1.24
+      </td>
+      <td>1.10
+      </td>
+      <td>1.00 (basic tools)
+      </td>
+      <td>0.91 (test tools)
+      </td>
+      <td>0.83 (requirements, design, management, documentation tools)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>SCED
+      </td>
+      <td>Required development schedule
+      </td>
+      <td>1.23 (75% of nominal)
+      </td>
+      <td>1.08 (85% of nominal)
+      </td>
+      <td>1.00 (nominal)
+      </td>
+      <td>1.04 (130% of nominal)
+      </td>
+      <td>1.10 (160% of nominal)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+  </tbody>
+</table>
+<br>
+<br>
+<br>
+So, once all of the factors have been multiplied together, you can
+then use the "--effort" flag to set more accurate factors and exponents.
+Note that some factors will probably not be "nominal" simply because
+times have changed since COCOMO was originally developed, so a few regions
+that were desirable have become more common today.
+For example,
+for many software projects of today, virtual machine volatility tends to
+be low, and the
+use of "modern" programming practices (structured programming,
+object-oriented programming, abstract data types, etc.) tends to be high.
+COCOMO automatically handles these differences.
+<p>
+For example, imagine that you're examining a fairly simple application that
+meets the "organic" requirements. Organic projects have a base factor
+of 2.3 and exponents of 1.05, as noted above.
+We then examine all the factors to determine a corrected base factor.
+For this example, imagine
+that we determine the values of these cost drivers are as follows:<br>
+<br>
+<table cellpadding="2" cellspacing="2" border="1" width="100%">
+
+  <tbody>
+    <tr>
+      <td rowspan="1" colspan="2">Cost Drivers<br>
+      </td>
+      <td rowspan="1" colspan="2">Ratings<br>
+      </td>
+    </tr>
+    <tr>
+      <td>ID<br>
+      </td>
+      <td>Driver Name<br>
+      </td>
+      <td>Rating<br>
+      </td>
+      <td>Multiplier<br>
+      </td>
+    </tr>
+    <tr>
+      <td>RELY<br>
+      </td>
+      <td>Required software reliability<br>
+      </td>
+      <td>Low - easily recovered losses<br>
+      </td>
+      <td>0.88<br>
+      </td>
+    </tr>
+    <tr>
+      <td>DATA<br>
+      </td>
+      <td>Database size<br>
+      </td>
+      <td>Low<br>
+      </td>
+      <td>0.94<br>
+      </td>
+    </tr>
+    <tr>
+      <td>CPLX<br>
+      </td>
+      <td>Product complexity<br>
+      </td>
+      <td>Nominal<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>TIME<br>
+      </td>
+      <td>Execution time constraint<br>
+      </td>
+      <td>Nominal<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>STOR<br>
+      </td>
+      <td>Main storage constraint<br>
+      </td>
+      <td>Nominal<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>VIRT<br>
+      </td>
+      <td>Virtual machine (HW and OS) volatility<br>
+      </td>
+      <td>Low  (major change every 12 months, minor every month)<br>
+      </td>
+      <td>0.87<br>
+      </td>
+    </tr>
+    <tr>
+      <td>TURN<br>
+      </td>
+      <td>Computer turnaround time<br>
+      </td>
+      <td>Nominal<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>ACAP<br>
+      </td>
+      <td>Analyst capability<br>
+      </td>
+      <td>Nominal  (55th percentile)<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>AEXP<br>
+      </td>
+      <td>Applications experience<br>
+      </td>
+      <td>Nominal (3 years)<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>PCAP<br>
+      </td>
+      <td>Programmer capability<br>
+      </td>
+      <td>Nominal  (55th percentile)<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>VEXP<br>
+      </td>
+      <td>Virtual machine experience<br>
+      </td>
+      <td>High (3 years)<br>
+      </td>
+      <td>0.90<br>
+      </td>
+    </tr>
+    <tr>
+      <td>LEXP<br>
+      </td>
+      <td>Programming language experience<br>
+      </td>
+      <td>High (3 years)<br>
+      </td>
+      <td>0.95<br>
+      </td>
+    </tr>
+    <tr>
+      <td>MODP<br>
+      </td>
+      <td>Use of "modern" programming practices (e.g. structured programming)<br>
+      </td>
+      <td>High (Routine use)<br>
+      </td>
+      <td>0.82<br>
+      </td>
+    </tr>
+    <tr>
+      <td>TOOL<br>
+      </td>
+      <td>Use of software tools<br>
+      </td>
+      <td>Nominal (basic tools)<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>SCED<br>
+      </td>
+      <td>Required development schedule<br>
+      </td>
+      <td>Nominal<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    
+    
+    
+    
+  </tbody>
+</table>
+<p>
+So, starting with the base factor (2.3 in this case), and then multiplying
+the driver values, we'll compute a final factor of:
+By multiplying these driver values together in this example, we compute:<br>
+<pre>2.3*0.88*0.94*1*1*1*0.87*1.00*1*1*1*0.90*0.95*0.82*1*1</pre>
+For this
+example, the final factor for the effort calculation is 1.1605. You would then
+invoke sloccount with "--effort 1.1605 1.05" to pass in the corrected factor
+and exponent for the effort estimation.
+You don't need to use "--schedule" to set the factors when you're using
+organic model, because in SLOCCount
+the default values are the values for the organic model.
+You can set scheduling parameters manually
+anyway by setting "--schedule 2.5 0.38".
+You <i>do</i> need to use the --schedule option for
+embedded and semidetached projects, because those modes have different
+schedule parameters. The final command would be:<br>
+<br>
+sloccount --effort 1.1605 1.05 --schedule 2.5 0.38 my_project<br>
+<p>
+The detailed COCOMO model requires breaking information down further.
+<p>
+For more information about the original COCOMO model, including the detailed
+COCOMO model, see the book
+<i>Software Engineering Economics</i> by Barry Boehm.
+<p>
+You may be surprised by the high cost estimates, but remember,
+these include design, coding, testing (including
+integration and testing), documentation (both for users
+and for programmers), and a wrap rate for corporate overhead
+(to cover facilities, equipment, accounting, and so on).
+Many programmers forget these other costs and are shocked by the high cost
+estimates.
+<p>
+If you want to know a subset of this cost, you'll need to isolate
+just those figures that you're trying to measure.
+For example, let's say you want to find the money a programmer would receive
+to do just the coding of the units of the program
+(ignoring wrap rate, design, testing, integration, and so on).
+According to Boehm's book (page 65, table 5-2),
+the percentage varies by product size.
+For effort, code and unit test takes 42% for small (2 KSLOC), 40% for
+intermediate (8 KSLOC), 38% for medium (32 KSLOC), and 36% for large
+(128 KSLOC).
+Sadly, Boehm doesn't separate coding from unit test; perhaps
+50% of the time is spent in unit test in traditional proprietary
+development (including fixing bugs found from unit test).
+If you want to know the income to the programmer (instead of cost to
+the company), you'll also want to remove the wrap rate.
+Thus, a programmer's income to <i>only</i> write the code for a
+small program (circa 2 KSLOC) would be 8.75% (42% x 50% x (1/2.4)) 
+of the default figure computed by SLOCCount.
+<p>
+In other words, less than one-tenth of the cost as computed by SLOCCount
+is what actually would be made by a programmer for a small program for
+just the coding task.
+Note that a proprietary commercial company that bid using
+this lower figure would rapidly go out of business, since this figure
+ignores the many other costs they have to incur to actually develop
+working products.
+Programs don't arrive out of thin air; someone needs to determine what
+the requirements are, how to design it, and perform at least
+some testing of it.
+<p>
+There's another later estimation model for effort and schedule
+called "COCOMO II", but COCOMO II requires logical SLOC instead
+of physical SLOC.
+SLOCCount doesn't currently measure logical SLOC, so
+SLOCCount doesn't currently use COCOMO II.
+Contributions of code to compute logical SLOC and then optionally
+use COCOMO II will be gratefully accepted.
+
+<h1><a name="specific-files">Counting Specific Files</a></h1>
+<p>
+If you want to count a specific subset, you can use the "--details"
+option to list individual files, pipe this into "grep" to select the
+files you're interested in, and pipe the result to
+my tool "print_sum" (which reads lines beginning with numbers, and
+returns the total of those numbers).
+If you've already done the analysis, an example would be:
+<pre>
+  sloccount --cached --details | grep "/some/subdirectory/" | print_sum
+</pre>
+<p>
+If you just want to count specific files, and you know what language
+they're in, you
+can just invoke the basic SLOC counters directly.
+By convention the simple counters are named "LANGUAGE_count",
+and they take on the command line a list of the
+source files to count.
+Here are some examples:
+<pre>
+  c_count *.c *.cpp *.h  # Count C and C++ in current directory.
+  asm_count *.S          # Count assembly.
+</pre>
+All the counters (*_count) program accept a &quot;-f FILENAME&quot; option, where FILENAME
+is a file containing the names of all the source files to count
+(one file per text line). If FILENAME is &quot;-&quot;, the
+    list of file names is taken from the standard input.
+The &quot;c_count&quot; program handles both C and C++ (but not objective-C;
+for that use objc_count).
+The available counters are
+ada_count,
+asm_count,
+awk_count,
+c_count,
+csh_count,
+exp_count,
+fortran_count,
+f90_count,
+java_count,
+lex_count,
+lisp_count,
+ml_count,
+modula3_count,
+objc_count,
+pascal_count,
+perl_count,
+python_count,
+sed_count,
+sh_count,
+sql_count, and
+tcl_count.
+<p>
+There is also "generic_count", which takes as its first parameter
+the ``comment string'', followed by a list of files.
+The comment string begins a comment that ends at the end of the line.
+Sometimes, if you have source for a language not listed, generic_count
+will be sufficient.
+<p>
+The basic SLOC counters will send output to standard out, one line per file
+(showing the SLOC count and filename).
+The assembly counter shows some additional information about each file.
+The basic SLOC counters always complete their output with a line
+saying "Total:", followe by a line with the
+total SLOC count.
+
+<h1><a name="errors">Countering Problems and Handling Errors</a></h1>
+
+If you're analyzing unfamiliar code, there's always the possibility
+that it uses languages not processed by SLOCCount.
+To counter this, after running SLOCCount, run the following program:
+<pre>
+ count_unknown_ext
+</pre>
+This will look at the resulting data (in its default data directory
+location, ~/.slocdata) and report a sorted list of the file extensions
+for uncategorized ("unknown") files.
+The list will show every file extension and how many files had that
+extension, and is sorted by most common first.
+It's not a problem if an "unknown" type isn't a source code file, but
+if there are a significant number of source files in this category,
+you'll need to change SLOCCount to get an accurate result.
+
+<p>
+One error report that you may see is:
+<pre>
+  c_count ERROR - terminated in string in (filename)
+</pre>
+
+The cause of this is that c_count (the counter for C-like languages)
+keeps track of whether or not it's in a string, and when the counter
+reached the end of the file, it still thought it was in a string.
+
+<p>
+Note that c_count really does have to keep track of whether or
+not it's a string.
+For example, this is three lines of code, not two, because the
+``comment'' is actually in string data:
+
+<pre>
+ a = "hello
+ /* this is not a comment */
+ bye";
+</pre>
+<p>
+Usually this error means you have code that won't compile
+given certain #define settings.  E.G., XFree86 has a line of code that's
+actually wrong (it has a string that's not terminated), but people
+don't notice because the #define to enable it is not usually set.
+Legitimate code can trigger this message, but code that triggers
+this message is horrendously formatted and is begging for problems.
+
+<p>
+In either case, the best way to handle the situation
+is to modify the source code (slightly) so that the code's intent is clear
+(by making sure that double-quotes balance).
+If it's your own code, you definitely should fix this anyway.
+You need to look at the double-quote (") characters.  One approach is to
+just grep for double-quote, and look at every line for text that isn't
+terminated, e.g., printf("hello %s, myname);
+
+<p>
+SLOCcount reports warnings when an unusually
+large number of duplicate files are reported.
+A large number of duplicates <i>may</i> suggest that you're counting
+two different versions of the same program as though they were
+independently developed.
+You may want to cd into the data directory (usually ~/.slocdata), cd into
+the child directories corresponding to each component, and then look
+at their dup_list.dat files, which list the filenames that appeared
+to be duplicated (and what they duplicate with).
+
+
+<h1><a name="adding">Adding Support for New Languages</a></h1>
+SLOCcount handles many languages, but if it doesn't support one you need,
+you'll need to give the language a standard (lowercase ASCII) name,
+then modify SLOCcount to (1) detect and (2) count code in that language.
+
+<ol>
+<li>
+To detect a new language, you'll need to modify the program break_filelist.
+If the filename extension is reliable, you can modify the array
+%file_extensions, which maps various filename extensions into languages.
+If your needs are more complex, you'll need to modify the code
+(typically in functions get_file_type or file_type_from_contents)
+so that the correct file type is determined.
+For example, if a file with a given filename extension is only
+<i>sometimes</i> that type, you'll need to write code to examine the
+file contents.
+<li>
+You'll need to create a SLOC counter for that language type.
+It must have the name XYZ_count, where XYZ is the standard name for the
+language.
+<p>
+For some languages, you may be able to use the ``generic_count'' program
+to implement your counter - generic_count takes as its first argument
+the pattern which
+identifies comment begins (which continue until the end of the line);
+the other arguments are the files to count.
+Thus, the LISP counter looks like this:
+<pre>
+ #!/bin/sh
+ generic_count ';' $@
+</pre>
+The generic_count program won't work correctly if there are multiline comments
+(e.g., C) or multiline string constants.
+If your language is identical to C/C++'s syntax in terms of
+string constant definitions and commenting syntax
+(using // or /* .. */), then you can use the c_count program - in this case,
+modify compute_sloc_lang so that the c_count program is used.
+<p>
+Otherwise, you'll have to devise your own counting program.
+The program must generate files with the same format, e.g.,
+for every filename passed as an argument, it needs to return separate lines,
+where each line presents the SLOC
+for that file, a space, and the filename.
+(Note: the assembly language counter produces a slightly different format.)
+After that, print "Total:" on its own line, and the actual SLOC total
+on the following (last) line.
+</ol>
+
+<h1><a name="advanced">Advanced SLOCCount Use</a></h1>
+For most people, the previous information is enough.
+However, if you're measuring a large set of programs, or have unusual needs,
+those steps may not give you enough control.
+In that case, you may need to create your own "data directory"
+by hand and separately run the SLOCCount tools.
+Basically, "sloccount" (note the lower case) is the name for
+a high-level tool which invokes many other tools; this entire
+suite is named SLOCCount (note the mixed case).
+The next section will describe how to invoke the various tools "manually"
+so you can gain explicit control over the measuring process when
+the defaults are not to your liking, along with various suggestions
+for how to handle truly huge sets of data.
+<p>
+Here's how to manually create a "data directory" to hold
+intermediate results, and how to invoke each tool in sequence
+(with discussion of options):
+<ol>
+<li>Set your PATH to include the SLOCCount "bin directory", as discussed above.
+<li>Make an empty "data directory"
+(where all intermediate results will be stored);
+you can pick any name and location you like for this directory.
+Here, I'll use the name "data":
+<pre>
+    mkdir ~/data
+</pre>
+<li>Change your current directory to this "data directory":
+<pre>
+    cd ~/data
+</pre>
+The rest of these instructions assume that your current directory
+is the data directory.
+You can set up many different data directories if you wish, to analyze
+different source programs or analyze the programs in different ways;
+just "cd" to the one you want to work with.
+<li>(Optional) Some of the later steps will produce
+a lot of output while they're running.
+If you want to capture this information into a file, use the standard
+"script" command do to so.
+For example, "script run1" will save the output of everything you do into
+file "run1" (until you type control-D to stop saving the information).
+Don't forget that you're creating such a file, or it will become VERY large,
+and in particular don't type any passwords into such a session.
+You can store the script in the data directory, or create a subdirectory
+for such results - any data directory subdirectory that doesn't have the
+special file "filelist" is not a "data directory child" and is thus
+ignored by the later SLOCCount analysis routines.
+<li>Now initialize the "data directory".
+ In particular, initialization will create the "data directory children",
+ a set of subdirectories equivalent to the source code directory's
+ top directories.  Each of these data directory children (subdirectories)
+ will contain a file named "filelist", which
+ lists all filenames in the corresponding source code directory.
+ These data directory children
+ will also eventually contain intermediate results
+ of analysis, which you can check for validity
+ (also, having a cache of these values speeds later analysis steps).
+ <p>
+ You use the "make_filelists" command to initialize a data directory.
+ For example, if your source code is in /usr/src/redhat/BUILD, run:
+<pre>
+   make_filelists /usr/src/redhat/BUILD/*
+</pre>
+<p>
+ Internally, make_filelists uses "find" to create the list of files, and
+ by default it ignores all symbolic links.  However, you may need to
+ follow symbolic links; if you do, give make_filelists the
+ "--follow" option (which will use find's "-follow" option).
+ Here are make_filelists' options:
+<pre>
+ --follow         Follow symbolic links
+ --datadir D      Use this data directory
+ --skip S         Skip basenames named S
+ --prefix P       When creating children, prepend P to their name.
+ --               No more options
+</pre>
+<p>
+ Although you don't normally need to do so, if you want certain files to
+ not be counted at all in your analysis, you can remove
+ data directory children or edit the "filelist" files to do so.
+ There's no need to remove files which aren't source code files normally;
+ this is handled automatically by the next step.
+<p>
+ If you don't have a single source code directory where the subdirectories
+ represent the major components you want to count separately, you can
+ still use the tool but it's more work.
+ One solution is to create a "shadow" directory with the structure
+ you wish the program had, using symbolic links (you must use "--follow"
+ for this to work).
+ You can also just invoke make_filelists multiple times, with parameters
+ listing the various top-level directories you wish to include.
+ Note that the basenames of the directories must be unique.
+<p>
+ If there are so many directories (e.g., a massive number of projects)
+ that the command line is too long,
+ you can run make_filelists multiple times in the same
+ directory with different arguments to create them.
+ You may find "find" and/or "xargs" helpful in doing this automatically.
+ For example, here's how to do the same thing using "find":
+<pre>
+ find /usr/src/redhat/BUILD -maxdepth 1 -mindepth 1 -type d \
+        -exec make_filelists {} \;
+</pre>
+<li>Categorize each file.
+This means that we must determine which
+files contain source code (eliminating auto-generated and duplicate files),
+and of those files which language each file contains.
+The result will be a set of files in each subdirectory of the data directory,
+where each file represents a category (e.g., a language).
+<pre>
+   break_filelist *
+</pre>
+ At this point you might want to examine the data directory subdirectories
+ to ensure that "break_filelist" has correctly determined the types of
+ the various files.
+ In particular, the "unknown" category may have source files in a language
+ SLOCCount doesn't know about.
+ If the heuristics got some categorization wrong, you can modify the
+ break_filelist program and re-run break_filelist.
+<p>
+ By default break_filelist removes duplicates, doesn't count
+ automatically generated files as normal source code files, and
+ only gives some feedback.  You can change these defaults with the
+ following options:
+<pre>
+ --duplicates   Count all duplicate files as normal files
+ --crossdups    Count duplicate files if they're in different data directory
+                children (i.e., in different "filelists")
+ --autogen      Count automatically generated files
+ --verbose      Present more verbose status information while processing.
+</pre>
+<p>
+ Duplicate control in particular is an issue; you probably don't want
+ duplicates counted, so that's the default.
+ Duplicate files are detected by determining if their MD5 checksums
+ are identical; the "first" duplicate encountered is the only one kept.
+ Normally, since shells sort directory names, this means that the
+ file in the alphabetically first child directory is the one counted.
+ You can change this around by listing directories in the sort order you
+ wish followed by "*"; if the same data directory child
+ is requested for analysis more
+ than once in a given execution, it's skipped after the first time.
+ So, if you want any duplicate files with child directory "glibc" to 
+ count as part of "glibc", then you should provide the data directory children
+ list as "glibc *".
+<p>
+ Beware of choosing something other than "*" as the parameter here,
+ unless you use the "--duplicates" or "--crossdups" options.
+ The "*" represents the list of data directory children to examine.
+ Since break_filelist skips duplicate files identified
+ in a particular run, if you run break_filelist
+ on only certain children, some duplicate files won't be detected.
+ If you're allowing duplicates (via "--duplicates" or
+ "--crossdups"), then this isn't a problem.
+ Or, you can use the ``--duplistfile'' option to store and retrieve
+ hashes of files, so that additional files can be handled.
+<p>
+ If there are so many directories that the command line is too long,
+ you can run break_filelist multiple times and give it
+ a subset of the directories each time.
+ You'll need to use one of the duplicate control options to do this.
+ I would suggest using "--crossdups", which
+ means that duplicates inside a child will only be counted once,
+ eliminating at least some of the problems of duplicates.
+ Here's the equivalent of "break_filelist *" when there are a large
+ number of subdirectories:
+<pre>
+ find . -maxdepth 1 -mindepth 1 -type d -exec break_filelist --crossdups {} \;
+</pre>
+ Indeed, for all of the later commands where "*" is listed as the parameter
+ in these instructions
+ (for the list of data directory children), just run the above "find"
+ command and replace "break_filelist --crossdups" with the command shown.
+<li>(Optional)
+If you're not very familiar with the program you're analyzing, you
+might not be sure that "break_filelist" has correctly identified
+all of the files.
+In particular, the system might be using an unexpected
+programming language or extension not handled by SLOCCount.
+If this is your circumstance, you can just run the command:
+<pre>
+ count_unknown_ext
+</pre>
+(note that this command is unusual - it doesn't take any arguments,
+since it's hard to imagine a case where you wouldn't want every
+directory examined).
+Unlike the other commands discussed, this one specifically looks at
+${HOME}/.slocdata.
+This command presents a list of extensions which are unknown to break_filelist,
+with the most common ones listed first.
+The output format is a name, followed by the number of instances;
+the name begins with a "." if it's an extension, or, if there's no
+extension, it begins with "/" followed by the base name of the file.
+break_filelist already knows about common extensions such as ".gif" and ".png",
+as well as common filenames like "README".
+You can also view the contents of each of the data directory children's
+files to see if break_filelist has correctly categorized the files.
+<li>Now compute SLOC and filecounts for each language; you can compute for all
+ languages at once by calling:
+<pre>
+   compute_all *
+</pre>
+If you only want to compute SLOC for a specific language,
+you can invoke compute_sloc_lang, which takes as its first parameter
+the SLOCCount name of the language ("ansic" for C, "cpp" for C++,
+"ada" for Ada, "asm" for assembly), followed by the list
+of data directory children.
+Note that these names are a change from version 1.0, which
+called the master program "compute_all",
+and had "compute_*" programs for each language.
+<p>
+Notice the "*"; you can replace the "*" with just the list of
+data directory children (subdirectories) to compute, if you wish.
+Indeed, you'll notice that nearly all of the following commands take a
+list of data directory children as arguments; when you want all of them, use
+"*" (as shown in these instructions), otherwise, list the ones you want.
+<p>
+When you run compute_all or compute_sloc_lang, each data directory
+child (subdirectory)
+is consulted in turn for a list of the relevant files, and the
+SLOC results are placed in that data directory child.
+In each child,
+the file "LANGUAGE-outfile.dat" lists the information from the
+basic SLOC counters.
+That is, the oufile lists the SLOC and filename
+(the assembly outfile has additional information), and ends with
+a line saying "Total:" followed by a line showing the total SLOC of
+that language in that data directory child.
+The file "all-physical.sloc" has the final total SLOC for every language
+in that child directory (i.e., it's the last line of the outfile).
+<li>(Optional) If you want, you can also use USC's CodeCount.
+I've had trouble with these programs, so I don't do this normally.
+However, you're welcome to try - they support logical SLOC measures
+as well as physical ones (though not for most of the languages
+supported by SLOCCount).
+Sadly, they don't seem to compile in gcc without a lot of help, they
+used fixed-width buffers that make me nervous, and I found a
+number of bugs (e.g., it couldn't handle "/* text1 *//* text2 */" in
+C code, a format that's legal and used often in the Linux kernel).
+If you want to do this,
+modify the files compute_c_usc and compute_java_usc so they point to the
+right directories, and type:
+<pre>
+ compute_c_usc *
+</pre>
+<li>Now you can analyze the results. The main tool for
+presenting SLOCCount results is "get_sloc", e.g,:
+<pre>
+  get_sloc * | less
+</pre>
+The get_sloc program takes many options, including:
+<pre>
+ --filecount    Display number of files instead of SLOC (SLOC is the default)
+ --wide         Use "wide" format instead (tab-separated columns)
+ --nobreak      Don't insert breaks in long lines
+ --sort  X      Sort by "X", where "X" is the name of a language
+                ("ansic", "cpp", "fortran", etc.), or "total".
+                By default, get_sloc sorts by "total".
+ --nosort       Don't sort - just present results in order of directory
+                listing given.
+ --showother    Show non-language totals (e.g., # duplicate files).
+ --oneprogram   When computing effort, assume that all files are part of
+                a single program.  By default, each subdirectory specified
+                is assumed to be a separate, independently-developed program.
+ --noheader     Don't show the header
+ --nofooter     Don't show the footer (the per-language values and totals)
+</pre>
+<p>
+Note that unlike the "sloccount" tool, get_sloc requires the current
+directory to be the data directory.
+<p>
+If you're displaying SLOC, get_sloc will also estimate the time it
+would take to develop the software using COCOMO (using its "basic" model).
+By default, this figure assumes that each of the major subdirectories was
+developed independently of the others;
+you can use "--oneprogram" to make the assumption that all files are
+part of the same program.
+The COCOMO model makes many other assumptions; see the paper at
+<a href="http://www.dwheeler.com/sloc">http://www.dwheeler.com/sloc</a>
+for more information.
+<p>
+If you need to do more analysis, you might want to use the "--wide"
+option and send the data to another tool such as a spreadsheet
+(e.g., gnumeric) or RDBMS (e.g., PostgreSQL).
+Using the "--wide" option creates tab-separated data, which is easier to
+import.
+You may also want to use the "--noheader" and/or "--nofooter" options to
+simplify porting the data to another tool.
+<p>
+Note that in version 1.0, "get_sloc" was called "get_data".
+<p>
+If you have so many data directory children that you can't use "*"
+on the command line, get_sloc won't be as helpful.
+Feel free to patch get_sloc to add this capability (as another option),
+or use get_sloc_detail (discussed next) to feed the data into another tool.
+<li>(Optional) If you just can't get the information you need from get_sloc,
+then you can get the raw results of everything and process the data
+yourself.
+I have a little tool to do this, called get_sloc_details.
+You invoke it in a similar manner:
+<pre>
+get_sloc_details *
+</pre>
+</ol>
+
+<p>
+<h1><a name="designer-notes">Designer's Notes</a></h1>
+<p>
+Here are some ``designer's notes'' on how SLOCCount works,
+including what it can handle.
+<p>
+The program break_filelist
+has categories for each programming language it knows about,
+plus the special categories ``not'' (not a source code file),
+``auto'' (an automatically-generated file and thus not to be counted),
+``zero'' (a zero-length file),
+``dup'' (a duplicate of another file as determined by an md5 checksum),
+and
+``unknown'' (a file which doesn't seem to be a source code file
+nor any of these other categories).
+It's a good idea to examine
+the ``unknown'' items later, checking the common extensions
+to ensure you have not missed any common types of code.
+<p>
+The program break_filelist uses lots of heuristics to correctly
+categorize files.
+Here are few notes about its heuristics:
+<ol>
+<li>
+break_filelist first checks for well-known extensions (such as .gif) that
+cannot be program files, and for a number of common generated filenames.
+<li>
+It then peeks at the first few lines for "#!" followed by a legal script
+name.
+Sometimes it looks further, for example, many Python programs
+invoke "env" and then use it to invoke python.
+<li>
+If that doesn't work, it uses the extension to try to determine the category.
+For a number of languages, the extension is not reliable, so for those
+languages it examines the file contents and uses a set of heuristics
+to determine if the file actually belongs to that category.
+<li>
+Detecting automatically generated files is not easy, and it's
+quite conceivable that it won't detect some automatically generated files.
+The first 15 lines are examined, to determine if any of them
+include at the beginning of the line (after spaces and
+possible comment markers) one of the following phrases (ignoring
+upper and lower case distinctions):
+``generated automatically'',
+``automatically generated'',
+``this is a generated file'',
+``generated with the (something) utility'',
+or ``do not edit''.
+<li>A number of filename conventions are used, too.
+For example,
+any ``configure'' file is presumed to be automatically generated if
+there's a ``configure.in'' file in the same directory.
+<li>
+To eliminate duplicates,
+the program keeps md5 checksums of each program file.
+Any given md5 checksum is only counted once.
+Build directories are processed alphabetically, so
+if the same file content is in both directories ``a'' and ``b'',
+it will be counted only once as being part of ``a'' unless you make
+other arrangements.
+Thus, some data directory children with names later in the alphabet may appear
+smaller than would make sense at first glance.
+It is very difficult to eliminate ``almost identical'' files
+(e.g., an older and newer version of the same code, included in two
+separate packages), because
+it is difficult to determine when two ``similar'' files are essentially
+the same file.
+Changes such as the use of pretty-printers and massive renaming of variables
+could make small changes seem large, while the small files
+might easily appear to be the ``same''.
+Thus, files with different contents are simply considered different.
+<li>
+If all else fails, the file is placed in the ``unknown'' category for
+later analysis.
+</ol>
+<p>
+One complicating factor is that I wished to separate C, C++, and
+Objective-C code, but a header file ending with
+``.h'' or ``.hpp'' file could be any of these languages.
+In theory, ``.hpp'' is only C++, but I found that in practice this isn't true.
+I developed a number of heuristics to determine, for each file,
+what language a given header belonged to.
+For example, if a given directory has exactly one of these languages
+(ignoring header files),
+the header is assumed to belong to that category as well.
+Similarly, if there is a body file (e.g., ".c") that has the same name
+as the header file, then presumably the header file is of the same language.
+Finally, a header file with the keyword ``class'' is almost certainly not a
+C header file, but a C++ header file; otherwise it's assumed to
+be a C file.
+<p>
+None of the SLOC counters fully parse the source code; they just examine
+the code using simple text processing patterns to count the SLOC.
+In practice, by handling a number of special cases this seems to be fine.
+Here are some notes on some of the language counters;
+the language name is followed by common extensions in parentheses
+and the SLOCCount name of the language in brackets:
+<ol>
+<li>Ada (.ada, .ads, .adb) [ada]: Comments begin with "--".
+<li>Assembly (.s, .S, .asm) [asm]:
+Assembly languages vary greatly in the comment character they use,
+so my counter had to handle this variance.
+The assembly language counter (asm_count)
+first examines the file to determine if
+C-style ``/*'' comments and C preprocessor commands
+(e.g., ``#include'') are used.
+If both ``/*'' and ``*/'' are in the file, it's assumed that
+C-style comments are being used
+(since it is unlikely that <i>both</i> would be used
+as something else, say as string data, in the same assembly language file).
+Determining if a file used the C preprocessor was trickier, since
+many assembly files do use ``#'' as a comment character and some
+preprocessor directives are ordinary words that might be included
+in a human comment.
+The heuristic used is as follows: if #ifdef, #endif, or #include are used, the
+C preprocessor is used; or if at least three lines have either #define or #else,
+then the C preprocessor is used.
+No doubt other heuristics are possible, but this at least seems to produce
+reasonable results.
+The program then determines what the comment character is by identifying
+which punctuation mark (from a set of possible marks)
+is the most common non-space initial character on a line
+(ignoring ``/'' and ``#'' if C comments or preprocessor commands,
+respectively, are used).
+Once the comment character has been determined, and it's been determined
+if C-style comments are allowed, the lines of code
+are counted in the file.
+<li>awk (.awk) [awk]: Comments begin with "#".
+<li>C (.c) [ansic]: Both traditional C comments (/* .. */) and C++
+(//) style comments are supported.
+Although the older ANSI and ISO C standards didn't support // style
+comments, in practice many C programs have used them for some time, and
+the C99 standard includes them.
+The C counter understands multi-line strings, so
+comment characters (/* .. */ and //) are treated as data inside strings.
+Conversely, the counter knows that any double-quote characters inside a
+comment does not begin a C/C++ string.
+<li>C++  (.C, .cpp, .cxx, .cc) [cpp]: The same counter is used for
+both C and C++.
+Note that break_filelist does try to separate C from C++ for purposes
+of accounting between them.
+<li>C# (.cs): The same counter is used as for C and C++.
+Note that there are no "header" filetypes in C#.
+<li>C shell (.csh) [csh]: Comments begin with "#".
+<li>COBOL (.cob, .cbl) [cobol]: SLOCCount
+detects if a "freeform" command has been given; until such a command is
+given, fixed format is assumed.
+In fixed format, comments have a "*" or "/" in column 7 or column 1;
+any line that's not a comment, and has a nonwhitespace character after column 7
+(the indicator area) is counted as a source line of code.
+In a freeform style, any line beginning with optional whitespace and
+then "*" or "/" is considered a comment; any noncomment line
+with a nonwhitespace characeter is counted as SLOC.
+<li>Expect (.exp) [exp]: Comments begin with "#".
+<li>Fortran 77 (.f, .f77, .F, .F77) [fortran]: Comment-only lines are lines
+where column 1 character = C, c, *, or !, or
+where ! is preceded only by white space.
+<li>Fortran 90 (.f90, .F90) [f90]: Comment-only lines are lines
+where ! is preceded only by white space.
+<li>Haskell (.hs) [haskell]:
+This counter handles block comments {- .. -} and single line comments (--);
+pragmas {-# .. -} are counted as SLOC.
+This is a simplistic counter,
+and can be fooled by certain unlikely combinations of block comments
+and other syntax (line-ending comments or strings).
+In particular,  "Hello {-"  will be incorrectly interpreted as a
+comment block begin, and "{- -- -}" will be incorrectly interpreted as a
+comment block begin without an end.  Literate files are detected by
+their extension, and the style (TeX or plain text) is determined by
+searching for a \begin{code} or "&gt;" at the beginning of lines.
+See the <a
+    href="http://www.haskell.org/onlinereport/literate.html">Haskell 98
+    report section on literate Haskell</a> for more information.
+<li>Java (.java) [java]: Java is counted using the same counter as C and C++.
+<li>lex (.l) [lex]: Uses traditional C /* .. */ comments.
+Note that this does not use the counter as C/C++ internally, since
+it's quite legal in lex to have "//" (where it is NOT a comment).
+<li>LISP (.cl, .el, .scm, .lsp, .jl) [lisp]: Comments begin with ";".
+<li>ML (.ml, .mli, .mll, mly) [ml]: Comments nest and are enclosed in (* .. *).
+<li>Modula3 (.m3, .mg, .i3, .ig) [modula3]: Comments are enclosed in (* .. *).
+<li>Objective-C (.m) [objc]: Comments are old C-style /* .. */ comments.
+<li>Pascal (.p, .pas) [pascal]: Comments are enclosed in curly braces {}
+or (*..*).  This counter has known weaknesses; see the BUGS section of
+the manual page for more information.
+<li>Perl (.pl, .pm, .perl) [perl]:
+Comments begin with "#".
+Perl permits in-line ``perlpod'' documents, ``here'' documents, and an
+__END__ marker that complicate code-counting.
+Perlpod documents are essentially comments, but a ``here'' document
+may include text to generate them (in which case the perlpod document
+is data and should be counted).
+The __END__ marker indicates the end of the file from Perl's
+viewpoint, even if there's more text afterwards.
+<li>PHP (.php, .php[3456], .inc) [php]:
+Code is counted as PHP code if it has a .php file extension;
+it's also counted if it has an .inc extension and looks like PHP code.
+SLOCCount does <b>not</b> count PHP code embedded in HTML files normally,
+though its lower-level routines can do so if you want to
+(use php_count to do this).
+Any of the various ways to begin PHP code can be used
+(&lt;? .. ?&gt;,
+&lt;?php .. ?&gt;,
+&lt;script language="php"&gt; .. &lt;/script&gt;,
+or even &lt;% .. %&gt;).
+Any of the PHP comment formats (C, C++, and shell) can be used, and
+any string constant formats ("here document", double quote, and single
+quote) can be used as well.
+<li>Python (.py) [python]:
+Comments begin with "#".
+Python has a convention that, at the beginning of a definition
+(e.g., of a function, method, or class), an unassigned string can be
+placed to describe what's being defined. Since this is essentially
+a comment (though it doesn't syntactically look like one), the counter
+avoids counting such strings, which may have multiple lines.
+To handle this,
+strings which started the beginning of a line were not counted.
+Python also has the ``triple quote'' operator, permitting multiline
+strings; these needed to be handled specially.
+Triple quote stirngs are normally considered as data, regardless of
+content, unless they were used as a comment about a definition.
+<li>Ruby (.rb) [ruby]: Comments begin with "#".
+<li>sed (.sed) [sed]: Comments begin with "#".
+Note that these are "sed-only" files; many uses of sed are embeded in
+shell scripts (and are categorized as shell scripts in those cases).
+<li>shell (.sh) [sh]: Comments begin with "#".
+Note that I classify ksh, bash, and the original Bourne shell sh together,
+because they have very similar syntaxes.
+For example, in all of these shells,
+setting a variable is expressed as "varname=value",
+while C shells use the use "set varname=value".
+<li>TCL (.tcl, .tk, .itk) [tcl]: Comments begin with "#".
+<li>Yacc (.y) [yacc]: Yacc is counted using the same counter as C and C++.
+</ol>
+<p>
+Much of the code is written in Perl, since it's primarily a text processing
+problem and Perl is good at that.
+Many short scripts are Bourne shell scripts (it's good at
+short scripts for calling other programs), and the
+basic C/C++ SLOC counter is written in C for speed.
+<p>
+I originally named it "SLOC-Count", but I found that some web search
+engines (notably Google) treated that as two words.
+By naming it "SLOCCount", it's easier to find by those who know
+the name of the program.
+<p>
+SLOCCount only counts physical SLOC, not logical SLOC.
+Logical SLOC counting requires much more code to implement,
+and I needed to cover a large number of programming languages.
+
+
+<p>
+<h1><a name="sloc-definition">Definition of SLOC</a></h1>
+<p>
+This tool measures ``physical SLOC.''
+Physical SLOC is defined as follows:
+``a physical source line of code (SLOC) is a line ending
+in a newline or end-of-file marker,
+and which contains at least one non-whitespace non-comment character.''
+Comment delimiters (characters other than newlines starting and ending
+a comment) are considered comment characters.
+Data lines only including whitespace
+(e.g., lines with only tabs and spaces in multiline strings) are not included.
+<p>
+To make this concrete, here's an example of a simple C program
+(it strips ANSI C comments out).
+On the left side is the running SLOC total, where "-" indicates a line
+that is not considered a physical "source line of code":
+<pre>
+ 1    #include &lt;stdio.h&gt;
+ -    
+ -    /* peek at the next character in stdin, but don't get it */
+ 2    int peek() {
+ 3     int c = getchar();
+ 4     ungetc(c, stdin);
+ 5     return c;
+ 6    }
+ -    
+ 7    main() {
+ 8     int c;
+ 9     int incomment = 0;  /* 1 = we are inside a comment */
+ -    
+10     while ( (c = getchar()) != EOF) {
+11        if (!incomment) {
+12          if ((c == '/') &amp;&amp; (peek() == '*')) {incomment=1;}
+13        } else {
+14          if ((c == '*') &amp;&amp; (peek() == '/')) {
+15               c= getchar(); c=getchar(); incomment=0;
+16          }
+17        }
+18        if ((c != EOF) &amp;&amp; !incomment) {putchar(c);}
+19     }
+20    }
+</pre>
+<p>
+<a href="http://www.sei.cmu.edu/publications/documents/92.reports/92.tr.020.html">Robert E. Park et al.'s
+<i>Software Size Measurement:
+A Framework for Counting Source Statements</i></a>
+(Technical Report CMU/SEI-92-TR-20)
+presents a set of issues to be decided when trying to count code.
+The paper's abstract states:
+<blockquote><i>
+This report presents guidelines for defining, recording, and reporting
+two frequently used measures of software size� physical source lines
+and logical source statements.
+We propose a general framework for constructing size
+definitions and use it to derive operational methods for
+reducing misunderstandings in measurement results.
+</i></blockquote>
+<p>
+Using Park's framework, here is how physical lines of code are counted:
+<ol>
+<li>Statement Type: I used a physical line-of-code as my basis.
+I included executable statements, declarations
+(e.g., data structure definitions), and compiler directives
+(e.g., preprocessor commands such as #define).
+I excluded all comments and blank lines.
+<li>How Produced:
+I included all programmed code, including any files that had been modified.
+I excluded code generated with source code generators, converted with
+automatic translators, and those copied or reused without change.
+If a file was in the source package, I included it; if the file had
+been removed from a source package (including via a patch), I did
+not include it.
+<li>Origin: You select the files (and thus their origin).
+<li>Usage: You selects the files (and thus their usage), e.g.,
+you decide if you're going to
+include additional applications able to run on the system but not
+included with the system.
+<li>Delivery: You'll decide what code to include, but of course,
+if you don't have the code you can't count it.
+<li>Functionality: This tool will include both operative and inoperative code
+if they're mixed together.
+An example of intentionally ``inoperative'' code is
+code turned off by #ifdef commands; since it could be
+turned on for special purposes, it made sense to count it.
+An example of unintentionally ``inoperative'' code is dead or unused code.
+<li>Replications:
+Normally, duplicate files are ignored, unless you use
+the "--duplicates" or "--crossdups" option.
+The tool will count
+``physical replicates of master statements stored in
+the master code''.
+This is simply code cut and pasted from one place to another to reuse code;
+it's hard to tell where this happens, and since it has to be maintained
+separately, it's fair to include this in the measure.
+I excluded copies inserted, instantiated, or expanded when compiling
+or linking, and I excluded postproduction replicates
+(e.g., reparameterized systems).
+<li>Development Status: You'll decide what code
+should be included (and thus the development status of the code that
+you'll accept).
+<li>Languages: You can see the language list above.
+<li>Clarifications: I included all statement types.
+This included nulls, continues, no-ops, lone semicolons,
+statements that instantiate generics,
+lone curly braces ({ and }), and labels by themselves.
+</ol>
+<p>
+Thus, SLOCCount generally follows Park's ``basic definition'',
+but with the following exceptions depending on how you use it:
+<ol>
+<li>How Produced:
+By default, this tool excludes duplicate files and
+code generated with source code generators.
+After all, the COCOMO model states that the
+only code that should be counted is code
+``produced by project personnel'', whereas these kinds of files are
+instead the output of ``preprocessors and compilers.''
+If code is always maintained as the input to a code generator, and then
+the code generator is re-run, it's only the code generator input's size that
+validly measures the size of what is maintained.
+Note that while I attempted to exclude generated code, this exclusion
+is based on heuristics which may have missed some cases.
+If you want to count duplicates, use the
+"--autogen", "--duplicates", and/or "--crossdups" options.
+If you want to count automatically generated files, pass
+the "--autogen" option mentioned above.
+<li>Origin:
+You can choose what source code you'll measure.
+Normally physical SLOC doesn't include an unmodified
+``vendor-supplied language support library'' nor a
+``vendor-supplied system or utility''.
+However, if this is what you are measuring, then you need to include it.
+If you include such code, your set will be different
+than the usual ``basic definition.''
+<li>Functionality: I included counts of unintentionally inoperative code
+(e.g., dead or unused code).
+It is very difficult to automatically detect such code
+in general for many languages.
+For example, a program not directly invoked by anything else nor
+installed by the installer is much more likely to be a test program,
+which you may want to include in the count (you often would include it
+if you're estimating effort).
+Clearly, discerning human ``intent'' is hard to automate.
+</ol>
+<p>
+Otherwise, this counter follows Park's
+``basic definition'' of a physical line of code, even down to Park's
+language-specific definitions where Park defined them for a language.
+
+
+<p>
+<h1><a name="miscellaneous">Miscellaneous Notes</a></h1>
+<p>
+There are other undocumented analysis tools in the original tar file.
+Most of them are specialized scripts for my circumstances, but feel
+free to use them as you wish.
+<p>
+If you're packaging this program, don't just copy every executable
+into the system "bin" directory - many of the files are those
+specialized scripts.
+Just put in the bin directory every executable documented here, plus the
+the files they depend on (there aren't that many).
+See the RPM specification file to see what's actually installed.
+<p>
+You have to take any measure of SLOC (including this one) with a
+large grain of salt.
+Physical SLOC is sensitive to the format of source code.
+There's a correlation between SLOC and development effort, and some
+correlation between SLOC and functionality,
+but there's absolutely no correlation between SLOC
+and either "quality" or "value".
+<p>
+A problem of physical SLOC is that it's sensitive to formatting,
+and that's a legitimate (and known) problem with the measure.
+However, to be fair, logical SLOC is influenced by coding style too.
+For example, the following two phrases are semantically identical,
+but will have different logical SLOC values:
+<pre>
+   int i, j;  /* 1 logical SLOC */
+
+   int i;     /* 2 logical SLOC, but it does the same thing */
+   int j;
+</pre>
+<p>
+If you discover other information that can be divided up by
+data directory children (e.g., the license used), it's probably best
+to add that to each subdirectory (e.g., as a "license" file in the
+subdirectory).
+Then you can modify tools like get_sloc
+to add them to their display.
+<p>
+I developed SLOCCount for my own use, not originally as
+a community tool, so it's certainly not beautiful code.
+However, I think it's serviceable - I hope you find it useful.
+Please send me patches for any improvements you make!
+<p>
+You can't use this tool as-is with some estimation models, such as COCOMO II,
+because this tool doesn't compute logical SLOC.
+I certainly would accept code contributions to add the ability to
+measure logical SLOC (or related measures such as
+Cyclomatic Complexity and Cyclomatic density);
+selecting them could be a compile-time option.
+However, measuring logical SLOC takes more development effort, so I
+haven't done so; see USC's "CodeCount" for a set of code that
+measures logical SLOC for some languages
+(though I've had trouble with CodeCount - in particular, its C counter
+doesn't correctly handle large programs like the Linux kernel).
+
+
+<p>
+<h1><a name="license">SLOCCount License</a></h1>
+<p>
+Here is the SLOCCount License; the file COPYING contains the standard
+GPL version 2 license:
+<pre>
+=====================================================================
+SLOCCount
+Copyright (C) 2000-2001 David A. Wheeler (dwheeler, at, dwheeler.com)
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+=====================================================================
+</pre>
+<p>
+While it's not formally required by the license, please give credit
+to me and this software in any report that uses results generated by it.
+<p>
+This document was written by David A. Wheeler (dwheeler, at, dwheeler.com),
+and is
+(C) Copyright 2001 David A. Wheeler.
+This document is covered by the license (GPL) listed above.
+<p>
+The license <i>does</i> give you the right to
+use SLOCCount to analyze proprietary programs.
+
+<p>
+<h1><a name="related-tools">Related Tools</a></h1>
+<p>
+One available toolset is
+<a href="http://sunset.usc.edu/research/CODECOUNT">CodeCount</a>.
+I tried using this toolset, but I eventually gave up.
+It had too many problems handling the code I was trying to analyze, and it
+does a poor job automatically categorizing code.
+It also has no support for many of today's languages (such as Python,
+Perl, Ruby, PHP, and so on).
+However, it does a lot of analysis and measurements that SLOCCount
+doesn't do, so it all depends on your need.
+Its license appeared to be open source, but it's quite unusual and
+I'm not enough of a lawyer to be able to confirm that.
+<p>
+Another tool that's available is <a href="http://csdl.ics.hawaii.edu/Research/LOCC/LOCC.html">LOCC</a>.
+It's available under the GPL.
+It can count Java code, and there's experimental support for C++.
+LOCC is really intended for more deeply analyzing each Java file;
+what's particularly interesting about it is that it can measure
+"diffs" (how much has changed).
+See
+<a href="http://csdl.ics.hawaii.edu/Publications/MasterList.html#csdl2-00-10">
+A comparative review of LOCC and CodeCount</a>.
+<p>
+<a href="http://sourceforge.net/projects/cccc">
+CCCC</a> is a tool which analyzes C++ and Java files
+and generates a report on various metrics of the code.
+Metrics supported include lines of code, McCabe's complexity,
+and metrics proposed by Chidamber &amp; Kemerer and Henry &amp; Kafura.
+(You can see
+<a href="http://cccc.sourceforge.net/">Time Littlefair's comments</a>).
+CCCC is in the public domain.
+It reports on metrics that sloccount doesn't, but sloccount can handle
+far more computer languages.
+
+<p>
+<h1><a name="submitting-changes">Submitting Changes</a></h1>
+<p>
+The GPL license doesn't require you to submit changes you make back to
+its maintainer (currently me),
+but it's highly recommended and wise to do so.
+Because others <i>will</i> send changes to me, a version you make on your
+own will slowly because obsolete and incompatible.
+Rather than allowing this to happen, it's better to send changes in to me
+so that the latest version of SLOCCount also has the
+features you're looking for.
+If you're submitting support for new languages, be sure that your
+chnage correctly ignores files that aren't in that new language
+(some filename extensions have multiple meanings).
+You might want to look at the <a href="TODO">TODO</a> file first.
+<p>
+When you send changes to me, send them as "diff" results so that I can
+use the "patch" program to install them.
+If you can, please send ``unified diffs'' -- GNU's diff can create these
+using the "-u" option.
+</body>
+
diff --git a/sloccount.html.orig b/sloccount.html.orig
new file mode 100644
index 0000000..dd0ad54
--- /dev/null
+++ b/sloccount.html.orig
@@ -0,0 +1,2440 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<title>SLOCCount User's Guide</title>
+</head>
+<body bgcolor="#FFFFFF">
+<center>
+<font size="+3"><b><span class="title">SLOCCount User's Guide</span></b></font>
+<br>
+<font size="+2"><span class="author">by David A. Wheeler (dwheeler, at, dwheeler.com)</span></font>
+<br>
+<font size="+2"><span class="pubdate">December 2, 2002</span></font>
+<br>
+<font size="+2"><span class="version">Version 2.20</span></font>
+</center>
+<p>
+<h1><a name="introduction">Introduction</a></h1>
+<p>
+SLOCCount (pronounced "sloc-count") is a suite of programs for counting
+physical source lines of code (SLOC) in potentially large software systems.
+Thus, SLOCCount is a "software metrics tool" or "software measurement tool".
+SLOCCount was developed by David A. Wheeler,
+originally to count SLOC in a GNU/Linux distribution, but it can be
+used for counting the SLOC of arbitrary software systems.
+<p>
+SLOCCount is known to work on Linux systems, and has been tested
+on Red Hat Linux versions 6.2, 7, and 7.1.
+SLOCCount should run on many other Unix-like systems (if Perl is installed),
+in particular, I would expect a *BSD system to work well.
+Windows users can run sloccount by first installing
+<a href="http://sources.redhat.com/cygwin">Cygwin</a>.
+SLOCCount is much slower on Windows/Cygwin, and it's not as easy to install
+or use on Windows, but it works.
+Of course, feel free to upgrade to an open source Unix-like system
+(such as Linux or *BSD) instead :-).
+<p>
+SLOCCount can count physical SLOC for a wide number of languages.
+Listed alphabetically, they are
+Ada, Assembly (for many machines and assemblers),
+awk (including gawk and nawk),
+Bourne shell (and relatives such as bash, ksh, zsh, and pdksh),
+C, C++, C# (also called C-sharp or cs), C shell (including tcsh),
+COBOL, Expect, Fortran, Haskell,
+Java, lex (including flex),
+LISP (including Scheme),
+makefiles (though they aren't usually shown in final reports),
+Modula3, Objective-C, Pascal, Perl, PHP, Python, Ruby, sed,
+SQL (normally not shown),
+TCL, and Yacc.
+It can gracefully handle awkward situations in many languages,
+for example, it can determine the
+syntax used in different assembly language files and adjust appropriately,
+it knows about Python's use of string constants as comments, and it
+can handle various Perl oddities (e.g., perlpods, here documents,
+and Perl's _&nbsp;_END_&nbsp;_ marker).
+It even has a "generic" SLOC counter that you may be able to use count the
+SLOC of other languages (depending on the language's syntax).
+<p>
+SLOCCount can also take a large list of files and automatically categorize
+them using a number of different heuristics.
+The heuristics automatically determine if a file
+is a source code file or not, and if so, which language it's written in.
+For example,
+it knows that ".pc" is usually a C source file for an Oracle preprocessor,
+but it can detect many circumstances where it's actually a file about
+a "PC" (personal computer).
+For another example, it knows that ".m" is the standard extension for
+Objective-C, but it will check the file contents to
+see if really is Objective-C.
+It will even examine file headers to attempt to accurately determine
+the file's true type.
+As a result, you can analyze large systems completely automatically.
+<p>
+Finally, SLOCCount has some report-generating tools
+to collect the data generated,
+and then present it in several different formats and sorted different ways.
+The report-generating tool can also generate simple tab-separated files
+so data can be passed on to other analysis tools (such as spreadsheets
+and database systems).
+<p>
+SLOCCount will try to quickly estimate development time and effort given only
+the lines of code it computes, using the original Basic COCOMO model.
+This estimate can be improved if you can give more information about the project.
+See the
+<a href="#cocomo">discussion below about COCOMO, including intermediate COCOMO</a>,
+if you want to improve the estimates by giving additional information about
+the project.
+<p>
+SLOCCount is open source software/free software (OSS/FS),
+released under the GNU General Public License (GPL), version 2;
+see the <a href="#license">license below</a>.
+The master web site for SLOCCount is
+<a href="http://www.dwheeler.com/sloccount">http://www.dwheeler.com/sloccount</a>.
+You can learn a lot about SLOCCount by reading the paper that caused its
+creation, available at
+<a href="http://www.dwheeler.com/sloc">http://www.dwheeler.com/sloc</a>.
+Feel free to see my master web site at
+<a href="http://www.dwheeler.com">http://www.dwheeler.com</a>, which has
+other material such as the
+<a href="http://www.dwheeler.com/secure-programs"><i>Secure Programming
+for Linux and Unix HOWTO</i></a>,
+my <a href="http://www.dwheeler.com/oss_fs_refs.html">list of
+OSS/FS references</a>, and my paper
+<a href="http://www.dwheeler.com/oss_fs_why.html"><i>Why OSS/FS? Look at
+the Numbers!</i></a>
+Please send improvements by email
+to dwheeler, at, dwheeler.com (DO NOT SEND SPAM - please remove the
+commas, remove the spaces, and change the word "at" into the at symbol).
+<p>
+The following sections first give a "quick start"
+(discussing how to use SLOCCount once it's installed),
+discuss basic SLOCCount concepts,
+how to install it, how to set your PATH,
+how to install source code on RPM-based systems if you wish, and
+more information on how to use the "sloccount" front-end.
+This is followed by material for advanced users:
+how to use SLOCCount tools individually (for when you want more control
+than the "sloccount" tool gives you), designer's notes,
+the definition of SLOC, and miscellaneous notes.
+The last sections states the license used (GPL) and gives
+hints on how to submit changes to SLOCCount (if you decide to make changes
+to the program).
+
+
+<p>
+<h1><a name="quick-start">Quick Start</a></h1>
+<p>
+Once you've installed SLOCCount (discussed below),
+you can measure an arbitrary program by typing everything
+after the dollar sign into a terminal session:
+<pre>
+  $  sloccount <i>topmost-source-code-directory</i>
+</pre>
+<p>
+The directory listed and all its descendants will be examined.
+You'll see output while it calculates,
+culminating with physical SLOC totals and
+estimates of development time, schedule, and cost.
+If the directory contains a set of directories, each of which is
+a different project developed independently,
+use the "--multiproject" option so the effort estimations
+can correctly take this into account.
+<p>
+You can redisplay the data different ways by using the "--cached"
+option, which skips the calculation stage and re-prints previously
+computed information.
+You can use other options to control what's displayed:
+"--filecount" shows counts of files instead of SLOC, and
+"--details" shows the detailed information about every source code file.
+So, to display all the details of every file once you've previously
+calculated the results, just type:
+<pre>
+  sloccount --cached --details
+</pre>
+<p>
+You'll notice that the default output ends with a request.
+If you use this data (e.g., in a report), please
+credit that data as being "generated using 'SLOCCount' by David A. Wheeler."
+I make no money from this program, so at least please give me some credit.
+<p>
+SLOCCount tries to ignore all automatically generated files, but its
+heuristics to detect this are necessarily imperfect (after all, even humans
+sometimes have trouble determining if a file was automatically genenerated).
+If possible, try to clean out automatically generated files from
+the source directories -- 
+in many situations "make clean" does this.
+<p>
+There's more to SLOCCount than this, but first we'll need to
+explain some basic concepts, then we'll discuss other options
+and advanced uses of SLOCCount.
+
+<p>
+<h1><a name="concepts">Basic Concepts</a></h1>
+<p>
+SLOCCount counts physical SLOC, also called "non-blank, non-comment lines".
+More formally, physical SLOC is defined as follows:
+``a physical source line of code (SLOC) is a line ending
+in a newline or end-of-file marker,
+and which contains at least one non-whitespace non-comment character.''
+Comment delimiters (characters other than newlines starting and ending
+a comment) are considered comment characters.
+Data lines only including whitespace
+(e.g., lines with only tabs and spaces in multiline strings) are not included.
+<p>
+In SLOCCount, there are 3 different directories:
+<ol>
+<li>The "source code directory", a directory containing the source code
+   being measured
+   (possibly in recursive subdirectories).  The directories immediately
+   contained in the source code directory will normally be counted separately,
+   so it helps if your system is designed so that this top set of directories
+   roughly represents the system's major components.
+   If it doesn't, there are various tricks you can use to group source
+   code into components, but it's more work.
+   You don't need write access to the source code directory, but
+   you do need read access to all files, and read and search (execute) access
+   to all subdirectories.
+<li>The "bin directory", the directory containing the SLOCCount executables.
+   By default, installing the program creates a subdirectory
+   named "sloccount-VERSION" which is the bin directory.
+   The bin directory must be part of your PATH.
+<li>The "data directory", which stores the analysis results.
+   When measuring programs using "sloccount", by default
+   this is the directory ".slocdata" inside your home directory.
+   When you use the advanced SLOCCount tools directly,
+   in many cases this must be your "current" directory.
+   Inside the data directory are "data directory children" - these are
+   subdirectories that contain a file named "filelist", and each child
+   is used to represent a different project or a different
+   major component of a project.
+</ol>
+<p>
+SLOCCount can handle many different programming languages, and separate
+them by type (so you can compare the use of each).
+Here is the set of languages, sorted alphabetically;
+common filename extensions are in
+parentheses, with SLOCCount's ``standard name'' for the language
+listed in brackets:
+<ol>
+<li>Ada (.ada, .ads, .adb, .pad) [ada]
+<li>Assembly for many machines and assemblers (.s, .S, .asm) [asm]
+<li>awk (.awk) [awk]
+<li>Bourne shell and relatives such as bash, ksh, zsh, and pdksh (.sh) [sh]
+<li>C (.c, .pc, .ec, .ecp) [ansic]
+<li>C++  (.C, .cpp, .cxx, .cc, .pcc) [cpp]
+<li>C# (.cs) [cs]
+<li>C shell including tcsh (.csh) [csh]
+<li>COBOL (.cob, .cbl, .COB, .CBL) [cobol]
+<li>Expect (.exp) [exp]
+<li>Fortran (.f, .f77, .F) [fortran]
+<li>Haskell (.hs) [haskell]; please preprocess .lhs files.
+<li>Java (.java) [java]
+<li>lex (.l) [lex]
+<li>LISP including Scheme (.el, .scm, .lsp, .jl) [lisp]
+<li>makefiles (makefile) [makefile]
+<li>ML (.ml, .ml3) [ml]
+<li>Modula3 (.m3, .i3) [modula3]
+<li>Objective-C (.m) [objc]
+<li>Pascal (.p, .pas) [pascal]
+<li>Perl (.pl, .pm, .perl) [perl]
+<li>PHP (.php, .php[3456], .inc) [php]
+<li>Python (.py) [python]
+<li>Ruby (.rb) [ruby]
+<li>sed (.sed) [sed]
+<li>sql (.sql) [sql]
+<li>TCL (.tcl, .tk, .itk) [tcl]
+<li>Yacc (.y) [yacc]
+</ol>
+
+<p>
+<h1><a name="installing">Installing SLOCCount</a></h1>
+<p>
+Obviously, before using SLOCCount you'll need to install it.
+SLOCCount depends on other programs, in particular perl, bash,
+a C compiler (gcc will do), and md5sum
+(you can get a useful md5sum program in the ``textutils'' package
+on many Unix-like systems), so you'll need to get them installed
+if they aren't already.
+<p>
+If your system uses RPM version 4 or greater to install software
+(e.g., Red Hat Linux 7 or later), just download the SLOCCount RPM
+and install it using a normal installation command; from the text line
+you can use:
+<pre>
+  rpm -Uvh sloccount*.rpm
+</pre>
+<p>
+Everyone else will need to install from a tar file, and Windows users will
+have to install Cygwin before installing sloccount.
+<p>
+If you're using Windows, you'll need to first install
+<a href="http://sources.redhat.com/cygwin">Cygwin</a>.
+By installing Cygwin, you'll install an environment and a set of
+open source Unix-like tools.
+Cygwin essentially creates a Unix-like environment in which sloccount can run.
+You may be able to run parts of sloccount without Cygwin, in particular,
+the perl programs should run in the Windows port of Perl, but you're
+on your own - many of the sloccount components expect a Unix-like environment.
+If you want to install Cygwin, go to the
+<a href="http://sources.redhat.com/cygwin">Cygwin main page</a>
+and install it.
+If you're using Cygwin, <b>install it to use Unix newlines, not
+DOS newlines</b> - DOS newlines will cause odd errors in SLOCCount
+(and probably other programs, too).
+I have only tested a "full" Cygwin installation, so I suggest installing
+everything.
+If you're short on disk space,  at least install
+binutils, bash, fileutils, findutils,
+gcc, grep, gzip, make, man, perl, readline,
+sed, sh-utils, tar, textutils, unzip, and zlib;
+you should probably install vim as well,
+and there may be other dependencies as well.
+By default Cygwin will create a directory C:\cygwin\home\NAME,
+and will set up the ability to run Unix programs
+(which will think that the same directory is called /home/NAME).
+Now double-click on the Cygwin icon, or select from the Start menu
+the selection Programs / Cygnus Solutions / Cygwin Bash shell;
+you'll see a terminal screen with a Unix-like interface.
+Now follow the instructions (next) for tar file users.
+<p>
+If you're installing from the tar file, download the file
+(into your home directory is fine).
+Unpacking the file will create a subdirectory, so if you want the
+unpacked subdirectory to go somewhere special, "cd" to where you
+want it to go.
+Most likely, your home directory is just fine.
+Now gunzip and untar SLOCCount (the * replaces the version #) by typing
+this at a terminal session:
+<pre>
+  gunzip -c sloccount*.tar.gz | tar xvf -
+</pre>
+Replace "sloccount*.tar.gz" shown above
+with the full path of the downloaded file, wherever that is.
+You've now created the "bin directory", which is simply the
+"sloccount-VERSION" subdirectory created by the tar command
+(where VERSION is the version number).
+<p>
+Now you need to compile the few compiled programs in the "bin directory" so
+SLOCCount will be ready to go.
+First, cd into the newly-created bin directory, by typing:
+<pre>
+  cd sloccount*
+</pre>
+<p>
+You may then need to override some installation settings.
+You can can do this by editing the supplied makefile, or alternatively,
+by providing options to "make" whenever you run make.
+The supplied makefile assumes your C compiler is named "gcc", which
+is true for most Linux systems, *BSD systems, and Windows systems using Cygwin.
+If this isn't true, you'll need to set
+the "CC" variable to the correct value (e.g., "cc").
+You can also modify where the files are stored; this variable is
+called PREFIX and its default is /usr/local
+(older versions of sloccount defaulted to /usr).
+<p>
+If you're using Windows and Cygwin, you
+<b>must</b> override one of the installation
+settings, EXE_SUFFIX, for installation to work correctly.
+One way to set this value is to edit the "makefile" file so that
+the line beginning with "EXE_SUFFIX" reads as follows:
+<pre>
+  EXE_SUFFIX=.exe
+</pre>
+If you're using Cygwin and you choose to modify the "makefile", you
+can use any text editor on the Cygwin side, or you can use a
+Windows text editor if it can read and write Unix-formatted text files.
+Cygwin users are free to use vim, for example.
+If you're installing into your home directory and using the default locations,
+Windows text editors will see the makefile as file
+C:\cygwin\home\NAME\sloccount-VERSION\makefile.
+Note that the Windows "Notepad" application doesn't work well, because it's not
+able to handle Unix text files correctly.
+Since this can be quite a pain, Cygus users may instead decide to override
+make the makefile values instead during installation.
+<p>
+Finally, compile the few compiled programs in it by typing "make":
+<pre>
+  make
+</pre>
+If you didn't edit the makefile in the previous step, you
+need to provide options to make invocations to set the correct values.
+This is done by simply saying (after "make") the name of the variable,
+an equal sign, and its correct value.
+Thus, to compile the program on a Windows system using Cygus, you can
+skip modifying the makefile file by typing this instead of just "make":
+<pre>
+  make EXE_SUFFIX=.exe
+</pre>
+<p>
+If you want, you can install sloccount for system-wide use without
+using the RPM version.
+Windows users using Cygwin should probably do this, particularly
+if they chose a "local" installation.
+To do this, first log in as root (Cygwin users don't need to do this
+for local installation).
+Edit the makefile to match your system's conventions, if necessary,
+and then type "make install":
+<pre>
+  make install
+</pre>
+If you need to set some make options, remember to do that here too.
+If you use "make install", you can uninstall it later using
+"make uninstall".
+Installing sloccount for system-wide use is optional;
+SLOCCount works without a system-wide installation.
+However, if you don't install sloccount system-wide, you'll need to
+set up your PATH variable; see the section on
+<a href="#path">setting your path</a>.
+<p>
+A note for Cygwin users (and some others): some systems, including Cygwin,
+don't set up the environment quite right and thus can't display the manual
+pages as installed.
+The problem is that they forget to search /usr/local/share/man for
+manual pages.
+If you want to read the installed manual pages, type this
+into a Bourne-like shell:
+<pre>
+  MANPATH=/usr/local/share/man:/usr/share/man:/usr/man
+  export MANPATH
+</pre>
+Or, if you use a C shell:
+<pre>
+  setenv MANPATH "/usr/local/share/man:/usr/share/man:/usr/man"
+</pre>
+From then on, you'll be able to view the reference manual pages
+by typing "man sloccount" (or by using whatever manual page display system
+you prefer).
+<p>
+
+<p>
+<h1><a name="installing-source">Installing The Source Code To Measure</a></h1>
+<p>
+Obviously, you must install the software source code you're counting,
+so somehow you must create the "source directory"
+with the source code to measure.
+You must also make sure that permissions are set so the software can
+read these directories and files.
+<p>
+For example, if you're trying to count the SLOC for an RPM-based Linux system,
+install the software source code by doing the following as root
+(which will place all source code into the source directory
+/usr/src/redhat/BUILD):
+<ol>
+<li>Install all source rpm's:
+<pre>
+    mount /mnt/cdrom
+    cd /mnt/cdrom/SRPMS
+    rpm -ivh *.src.rpm
+</pre>
+<li>Remove RPM spec files you don't want to count:
+<pre>
+    cd ../SPECS
+    (look in contents of spec files, removing what you don't want)
+</pre>
+<li>build/prep all spec files:
+<pre>
+    rpm -bp *.spec
+</pre>
+<li>Set permissions so the source files can be read by all:
+<pre>
+    chmod -R a+rX /usr/src/redhat/BUILD
+</pre>
+</ol>
+<p>
+Here's an example of how to download source code from an
+anonymous CVS server.
+Let's say you want to examine the source code in GNOME's "gnome-core"
+directory, as stored at the CVS server "anoncvs.gnome.org".
+Here's how you'd do that:
+<ol>
+<li>Set up site and login parameters:
+<pre>
+  export CVSROOT=':pserver:anonymous@anoncvs.gnome.org:/cvs/gnome'
+</pre>
+<li>Log in:
+<pre>
+  cvs login
+</pre>
+<li>Check out the software (copy it to your local directory), using
+mild compression to save on bandwidth:
+<pre>
+  cvs -z3 checkout gnome-core
+</pre>
+</ol>
+<p>
+Of course, if you have a non-anonymous account, you'd set CVSROOT
+to reflect this.  For example, to log in using the "pserver"
+protocol as ACCOUNT_NAME, do:
+<pre>
+  export CVSROOT=':pserver:ACCOUNT_NAME@cvs.gnome.org:/cvs/gnome'
+</pre>
+<p>
+You may need root privileges to install the source code and to give
+another user permission to read it, but please avoid running the
+sloccount program as root.
+Although I know of no specific reason this would be a problem,
+running any program as root turns off helpful safeguards.
+<p>
+Although SLOCCount tries to detect (and ignore) many cases where
+programs are automatically generated, these heuristics are necessarily
+imperfect.
+So, please don't run any programs that generate other programs - just
+do enough to get the source code prepared for counting.
+In general you shouldn't run "make" on the source code, and if you have,
+consider running "make clean" or "make really_clean" on the source code first.
+It often doesn't make any difference, but identifying those circumstances
+is difficult.
+<p>
+SLOCCount will <b>not</b> automatically uncompress files that are
+compressed/archive files (such as .zip, .tar, or .tgz files).
+Often such files are just "left over" old versions or files
+that you're already counting.
+If you want to count the contents of compressed files, uncompress them first.
+<p>
+SLOCCount also doesn't delve into files using "literate programming"
+techniques, in part because there are too many incompatible formats
+that implement it.
+Thus, run the tools to extract the code from the literate programming files
+before running SLOCCount.
+For example, if you have many literate Haskell files (.lhs), please
+extract them.
+
+
+<h1><a name="path">Setting your PATH</a></h1>
+Before you can run SLOCCount, you'll need to make sure
+the SLOCCount "bin directory" is in your PATH.
+If you've installed SLOCCount in a system-wide location
+such as /usr/bin, then you needn't do more; the RPMs and "make install"
+commands essentially do this.
+<p>
+Otherwise, in Bourne-shell variants, type:
+<pre>
+    PATH="$PATH:<i>the directory with SLOCCount's executable files</i>"
+    export PATH
+</pre>
+Csh users should instead type:
+<pre>
+    setenv PATH "$PATH:<i>the directory with SLOCCount's executable files</i>"
+</pre>
+
+<h1><a name="using-basics">Using SLOCCount: The Basics</a></h1>
+
+Normal use of SLOCCount is very simple.
+In a terminal window just type "sloccount", followed by a
+list of the source code directories to count.
+If you give it only a single directory, SLOCCount tries to be
+a little clever and break the source code into
+subdirectories for purposes of reporting:
+<ol>
+<li>if directory has at least
+two subdirectories, then those subdirectories will be used as the
+breakdown (see the example below).
+<li>If the single directory contains files as well as directories
+(or if you give sloccount some files as parameters), those files will
+be assigned to the directory "top_dir" so you can tell them apart
+from other directories.
+<li>If there's a subdirectory named "src", then that subdirectory is again
+broken down, with all the further subdirectories prefixed with "src_".
+So if directory "X" has a subdirectory "src", which contains subdirectory
+"modules", the program will report a separate count from "src_modules".
+</ol>
+In the terminology discussed above, each of these directories would become
+"data directory children."
+<p>
+You can also give "sloccount" a list of directories, in which case the
+report will be broken down by these directories
+(make sure that the basenames of these directories differ).
+SLOCCount normally considers all descendants of these directories,
+though unless told otherwise it ignores symbolic links.
+<p>
+This is all easier to explain by example.
+Let's say that we want to measure Apache 1.3.12 as installed using an RPM.
+Once it's installed, we just type:
+<pre>
+ sloccount /usr/src/redhat/BUILD/apache_1.3.12
+</pre>
+The output we'll see shows status reports while it analyzes things,
+and then it prints out:
+
+<pre>
+SLOC	Directory	SLOC-by-Language (Sorted)
+24728   src_modules     ansic=24728
+19067   src_main        ansic=19067
+8011    src_lib         ansic=8011
+5501    src_os          ansic=5340,sh=106,cpp=55
+3886    src_support     ansic=2046,perl=1712,sh=128
+3823    src_top_dir     sh=3812,ansic=11
+3788    src_include     ansic=3788
+3469    src_regex       ansic=3407,sh=62
+2783    src_ap          ansic=2783
+1378    src_helpers     sh=1345,perl=23,ansic=10
+1304    top_dir         sh=1304
+104     htdocs          perl=104
+31      cgi-bin         sh=24,perl=7
+0       icons           (none)
+0       conf            (none)
+0       logs            (none)
+
+
+ansic:       69191 (88.85%)
+sh:           6781 (8.71%)
+perl:         1846 (2.37%)
+cpp:            55 (0.07%)
+
+
+Total Physical Source Lines of Code (SLOC)                   = 77873
+Estimated Development Effort in Person-Years (Person-Months) = 19.36 (232.36)
+ (Basic COCOMO model, Person-Months = 2.4 * (KSLOC**1.05))
+Estimated Schedule in Years (Months)                         = 1.65 (19.82)
+ (Basic COCOMO model, Months = 2.5 * (person-months**0.38))
+Estimated Average Number of Developers  (Effort/Schedule)    = 11.72
+Total Estimated Cost to Develop                              = $ 2615760
+ (average salary = $56286/year, overhead = 2.4).
+
+Please credit this data as "generated using 'SLOCCount' by David A. Wheeler."
+</pre>
+<p>
+Interpreting this should be straightward.
+The Apache directory has several subdirectories, including "htdocs", "cgi-bin",
+and "src".
+The "src" directory has many subdirectories in it
+("modules", "main", and so on).
+Code files directly
+contained in the main directory /usr/src/redhat/BUILD/apache_1.3.12
+is labelled "top_dir", while
+code directly contained in the src subdirectory is labelled "src_top_dir".
+Code in the "src/modules" directory is labelled "src_modules" here.
+The output shows each major directory broken
+out, sorted from largest to smallest.
+Thus, the "src/modules" directory had the most code of the directories,
+24728 physical SLOC, all of it in C.
+The "src/helpers" directory had a mix of shell, perl, and C; note that
+when multiple languages are shown, the list of languages in that child
+is also sorted from largest to smallest.
+<p>
+Below the per-component set is a list of all languages used,
+with their total SLOC shown, sorted from most to least.
+After this is the total physical SLOC (77,873 physical SLOC in this case).
+<p>
+Next is an estimation of the effort and schedule (calendar time)
+it would take to develop this code.
+For effort, the units shown are person-years (with person-months
+shown in parentheses); for schedule, total years are shown first
+(with months in parentheses).
+When invoked through "sloccount", the default assumption is that all code is
+part of a single program; the "--multiproject" option changes this
+to assume that all top-level components are independently developed
+programs.
+When "--multiproject" is invoked, each project's efforts are estimated
+separately (and then summed), and the schedule estimate presented
+is the largest estimated schedule of any single component.
+<p>
+By default the "Basic COCOMO" model is used for estimating
+effort and schedule; this model
+includes design, code, test, and documentation time (both
+user/admin documentation and development documentation).
+<a href="#cocomo">See below for more information on COCOMO</a>
+as it's used in this program.
+<p>
+Next are several numbers that attempt to estimate what it would have cost
+to develop this program.
+This is simply the amount of effort, multiplied by the average annual
+salary and by the "overhead multiplier".
+The default annual salary is
+$56,286 per year; this value was from the
+<i>ComputerWorld</i>, September 4, 2000's Salary Survey
+of an average U.S. programmer/analyst salary in the year 2000.
+You might consider using other numbers
+(<i>ComputerWorld</i>'s September 3, 2001 Salary Survey found
+an average U.S. programmer/analyst salary making $55,100, senior
+systems programmers averaging $68,900, and senior systems analysts averaging
+$72,300).
+
+<p>
+Overhead is much harder to estimate; I did not find a definitive source
+for information on overheads.
+After informal discussions with several cost analysts,
+I determined that an overhead of 2.4
+would be representative of the overhead sustained by
+a typical software development company.
+As discussed in the next section, you can change these numbers too.
+
+<p>
+You may be surprised by the high cost estimates, but remember,
+these include design, coding, testing, documentation (both for users
+and for programmers), and a wrap rate for corporate overhead
+(to cover facilities, equipment, accounting, and so on).
+Many programmers forget these other costs and are shocked by the high figures.
+If you only wanted to know the costs of the coding, you'd need to get
+those figures.
+
+
+<p>
+Note that if any top-level directory has a file named PROGRAM_LICENSE,
+that file is assumed to contain the name of the license
+(e.g., "GPL", "LGPL", "MIT", "BSD", "MPL", and so on).
+If there is at least one such file, sloccount will also report statistics
+on licenses.
+
+
+<h1><a name="options">Options</a></h1>
+The program "sloccount" has a large number of options
+so you can control what is selected for counting and how the
+results are displayed.
+<p>
+There are several options that control which files are selected
+for counting:
+<pre>
+ --duplicates   Count all duplicate files as normal files
+ --crossdups    Count duplicate files if they're in different data directory
+                children.
+ --autogen      Count automatically generated files
+ --follow       Follow symbolic links (normally they're ignored)
+ --addlang      Add languages to be counted that normally aren't shown.
+ --append       Add more files to the data directory
+</pre>
+Normally, files which have exactly the same content are counted only once
+(data directory children are counted alphabetically, so the child
+"first" in the alphabet will be considered the owner of the master copy).
+If you want them all counted, use "--duplicates".
+Sometimes when you use sloccount, each directory represents a different
+project, in which case you might want to specify "--crossdups".
+The program tries to reject files that are automatically generated
+(e.g., a C file generated by bison), but you can disable this as well.
+You can use "--addlang" to show makefiles and SQL files, which aren't
+usually counted.
+<p>
+Possibly the most important option is "--cached".
+Normally, when sloccount runs, it computes a lot of information and
+stores this data in a "data directory" (by default, "~/.slocdata").
+The "--cached" option tells sloccount to use data previously computed,
+greatly speeding up use once you've done the computation once.
+The "--cached" option can't be used along with the options used to
+select what files should be counted.
+You can also select a different data directory by using the
+"--datadir" option.
+<p>
+There are many options for controlling the output:
+<pre>
+ --filecount     Show counts of files instead of SLOC.
+ --details       Present details: present one line per source code file.
+ --wide          Show "wide" format.  Ignored if "--details" selected
+ --multiproject  Assume each directory is for a different project
+                 (this modifies the effort estimation calculations)
+ --effort F E    Change the effort estimation model, so that it uses
+                 F as the factor and E as the exponent.
+ --schedule F E  Change the schedule estimation model, so that it uses
+                 F as the factor and E as the exponent.
+ --personcost P  Change the average annual salary to P.
+ --overhead O    Change the annual overhead to O.
+ --              End of options
+</pre>
+<p>
+Basically, the first time you use sloccount, if you're measuring
+a set of projects (not a single project) you might consider
+using "--crossdups" instead of the defaults.
+Then, you can redisplay data quickly by using "--cached",
+combining it with options such as "--filecount".
+If you want to send the data to another tool, use "--details".
+<p>
+If you're measuring a set of projects, you probably ought to pass
+the option "--multiproject".
+When "--multiproject" is used, efforts are computed for each component
+separately and summed, and the time estimate used is the maximum
+single estimated time.
+<p>
+The "--details" option dumps the available data in 4 columns,
+tab-separated, where each line
+represents a source code file in the data directory children identified.
+The first column is the SLOC, the second column is the language type,
+the third column is the name of the data directory child
+(as it was given to get_sloc_details),
+and the last column is the absolute pathname of the source code file.
+You can then pipe this output to "sort" or some other tool for further
+analysis (such as a spreadsheet or RDBMS).
+<p>
+You can change the parameters used to estimate effort using "--effort".
+For example, if you believe that in the environment being used
+you can produce 2 KSLOC/month scaling linearly, then
+that means that the factor for effort you should use is 1/2 = 0.5 month/KSLOC,
+and the exponent for effort is 1 (linear).
+Thus, you can use "--effort 0.5 1".
+<p>
+You can also set the annual salary and overheads used to compute
+estimated development cost.
+While "$" is shown, there's no reason you have to use dollars;
+the unit of development cost is the same unit as the unit used for
+"--personcost".
+
+<h1><a name="cocomo">More about COCOMO</a></h1>
+
+<p>
+By default SLOCCount uses a very simple estimating model for effort and schedule:
+the basic COCOMO model in the "organic" mode (modes are more fully discussed below).
+This model estimates effort and schedule, including design, code, test,
+and documentation time (both user/admin documentation and development documentation).
+Basic COCOMO is a nice simple model, and it's used as the default because
+it doesn't require any information about the code other than the SLOC count
+already computed.
+<p>
+However, basic COCOMO's accuracy is limited for the same reason -
+basic COCOMO doesn't take a number of important factors into account.
+If you have the necessary information, you can improve the model's accuracy
+by taking these factors into account. You can at least quickly determine
+if the right "mode" is being used to improve accuracy. You can also
+use the "Intermediate COCOMO" and "Detailed COCOMO" models that take more
+factors into account, and are likely to produce more accurate estimates as
+a result. Take these estimates as just that - estimates - they're not grand truths.
+If you have the necessary information,
+you can improve the model's accuracy by taking these factors into account, and
+pass this additional information to sloccount using its
+"--effort" and "--schedule" options (as discussed in
+<a href="options">options</a>).
+<p>
+To use the COCOMO model, you first need to determine if your application's
+mode, which can be "Organic", "embedded", or "semidetached".
+Most software is "organic" (which is why it's the default).
+Here are simple definitions of these modes:
+<ul>
+<li>Organic: Relatively small software teams develop software in a highly
+familiar, in-house environment. &nbsp;It has a generally stable development
+environment, minimal need for innovative algorithms, and requirements can
+be relaxed to avoid extensive rework.</li>
+<li>Semidetached: This is an intermediate
+step between organic and embedded. This is generally characterized by reduced
+flexibility in the requirements.</li>
+<li>Embedded: The project must operate
+within tight (hard-to-meet) constraints, and requirements
+and interface specifications are often non-negotiable.
+The software will be embedded in a complex environment that the
+software must deal with as-is.</li>
+</ul>
+By default, SLOCCount uses the basic COCOMO model in the organic mode.
+For the basic COCOMO model, here are the critical factors for --effort and --schedule:<br>
+<ul>
+<li>Organic: effort factor = 2.4, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li>
+<li>Semidetached:  effort factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li>
+<li>Embedded:  effort factor = 3.6, exponent = 1.20; schedule factor = 2.5, exponent = 0.32</li>
+</ul>
+Thus, if you want to use SLOCCount but the project is actually semidetached,
+you can use the options "--effort 3.0 1.12 --schedule 2.5 0.35"
+to get a more accurate estimate.
+<br>
+For more accurate estimates, you can use the intermediate COCOMO models.
+For intermediate COCOMO, use the following figures:<br>
+<ul>
+  <li>Organic: effort base factor = 2.3, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li>
+  <li>Semidetached: effort base factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li>
+  <li>Embedded: effort base factor = 2.8, exponent = 1.20; schedule factor = 2.5, exponent = 0.32</li>
+</ul>
+The intermediate COCOMO values for schedule are exactly the same as the basic
+COCOMO model; the starting effort values are not quite the same, as noted
+in Boehm's book. However, in the intermediate COCOMO model, you don't
+normally use the effort factors as-is, you use various corrective factors
+(called cost drivers). To use these corrections, you consider
+all the cost drivers, determine what best describes them,
+and multiply their corrective values by the effort base factor.
+The result is the final effort factor.
+Here are the cost drivers (from Boehm's book, table 8-2 and 8-3):
+
+<table cellpadding="2" cellspacing="2" border="1" width="100%">
+  <tbody>
+    <tr>
+      <th rowspan="1" colspan="2">Cost Drivers
+      </th>
+      <th rowspan="1" colspan="6">Ratings
+      </th>
+    </tr>
+    <tr>
+      <th>ID
+      </th>
+      <th>Driver Name
+      </th>
+      <th>Very Low
+      </th>
+      <th>Low
+      </th>
+      <th>Nominal
+      </th>
+      <th>High
+      </th>
+      <th>Very High
+      </th>
+      <th>Extra High
+      </th>
+    </tr>
+    <tr>
+      <td>RELY
+      </td>
+      <td>Required software reliability
+      </td>
+      <td>0.75 (effect is slight inconvenience)
+      </td>
+      <td>0.88 (easily recovered losses)
+      </td>
+      <td>1.00 (recoverable losses)
+      </td>
+      <td>1.15 (high financial loss)
+      </td>
+      <td>1.40 (risk to human life)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>DATA
+      </td>
+      <td>Database size
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>0.94 (database bytes/SLOC &lt; 10)
+      </td>
+      <td>1.00 (D/S between 10 and 100)
+      </td>
+      <td>1.08 (D/S between 100 and 1000)
+      </td>
+      <td>1.16 (D/S &gt; 1000)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>CPLX
+      </td>
+      <td>Product complexity
+      </td>
+      <td>0.70 (mostly straightline code, simple arrays, simple expressions)
+      </td>
+      <td>0.85
+      </td>
+      <td>1.00
+      </td>
+      <td>1.15
+      </td>
+      <td>1.30
+      </td>
+      <td>1.65 (microcode, multiple resource scheduling, device timing dependent coding)
+      </td>
+    </tr>
+    <tr>
+      <td>TIME
+      </td>
+      <td>Execution time constraint
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>1.00 (&lt;50% use of available execution time)
+      </td>
+      <td>1.11 (70% use)
+      </td>
+      <td>1.30 (85% use)
+      </td>
+      <td>1.66 (95% use)
+      </td>
+    </tr>
+    <tr>
+      <td>STOR
+      </td>
+      <td>Main storage constraint
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>1.00&nbsp;(&lt;50% use of available storage)</td>
+      <td>1.06  (70% use)
+      </td>
+      <td>1.21 (85% use)
+      </td>
+      <td>1.56 (95% use)
+      </td>
+    </tr>
+    <tr>
+      <td>VIRT
+      </td>
+      <td>Virtual machine (HW and OS) volatility
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>0.87 (major change every 12 months, minor every month)
+      </td>
+      <td>1.00 (major change every 6 months, minor every 2 weeks)</td>
+      <td>1.15 (major change every 2 months, minor changes every week)
+      </td>
+      <td>1.30 (major changes every 2 weeks, minor changes every 2 days)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>TURN
+      </td>
+      <td>Computer turnaround time
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>0.87 (interactive)
+      </td>
+      <td>1.00 (average turnaround &lt; 4 hours)
+      </td>
+      <td>1.07
+      </td>
+      <td>1.15
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>ACAP
+      </td>
+      <td>Analyst capability
+      </td>
+      <td>1.46 (15th percentile)
+      </td>
+      <td>1.19 (35th percentile)
+      </td>
+      <td>1.00 (55th percentile)
+      </td>
+      <td>0.86 (75th percentile)
+      </td>
+      <td>0.71 (90th percentile)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>AEXP
+      </td>
+      <td>Applications experience
+      </td>
+      <td>1.29 (&lt;= 4 months experience)
+      </td>
+      <td>1.13 (1 year)
+      </td>
+      <td>1.00 (3 years)
+      </td>
+      <td>0.91 (6 years)
+      </td>
+      <td>0.82 (12 years)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>PCAP
+      </td>
+      <td>Programmer capability
+      </td>
+      <td>1.42 (15th percentile)
+      </td>
+      <td>1.17  (35th percentile)
+      </td>
+      <td>1.00 (55th percentile)
+      </td>
+      <td>0.86  (75th percentile)
+      </td>
+      <td>0.70 (90th percentile)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>VEXP
+      </td>
+      <td>Virtual machine experience
+      </td>
+      <td>1.21 (&lt;= 1 month experience)
+      </td>
+      <td>1.10 (4 months)
+      </td>
+      <td>1.00 (1 year)
+      </td>
+      <td>0.90 (3 years)
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>LEXP
+      </td>
+      <td>Programming language experience
+      </td>
+      <td>1.14  (&lt;= 1 month experience)
+      </td>
+      <td>1.07 (4 months)
+      </td>
+      <td>1.00 (1 year)
+      </td>
+      <td>0.95 (3 years)
+      </td>
+      <td>&nbsp;
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>MODP
+      </td>
+      <td>Use of "modern" programming practices (e.g. structured programming)
+      </td>
+      <td>1.24 (No use)
+      </td>
+      <td>1.10
+      </td>
+      <td>1.00 (some use)
+      </td>
+      <td>0.91
+      </td>
+      <td>0.82 (routine use)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>TOOL
+      </td>
+      <td>Use of software tools
+      </td>
+      <td>1.24
+      </td>
+      <td>1.10
+      </td>
+      <td>1.00 (basic tools)
+      </td>
+      <td>0.91 (test tools)
+      </td>
+      <td>0.83 (requirements, design, management, documentation tools)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+    <tr>
+      <td>SCED
+      </td>
+      <td>Required development schedule
+      </td>
+      <td>1.23 (75% of nominal)
+      </td>
+      <td>1.08 (85% of nominal)
+      </td>
+      <td>1.00 (nominal)
+      </td>
+      <td>1.04 (130% of nominal)
+      </td>
+      <td>1.10 (160% of nominal)
+      </td>
+      <td>&nbsp;
+      </td>
+    </tr>
+  </tbody>
+</table>
+<br>
+<br>
+<br>
+So, once all of the factors have been multiplied together, you can
+then use the "--effort" flag to set more accurate factors and exponents.
+Note that some factors will probably not be "nominal" simply because
+times have changed since COCOMO was originally developed, so a few regions
+that were desirable have become more common today.
+For example,
+for many software projects of today, virtual machine volatility tends to
+be low, and the
+use of "modern" programming practices (structured programming,
+object-oriented programming, abstract data types, etc.) tends to be high.
+COCOMO automatically handles these differences.
+<p>
+For example, imagine that you're examining a fairly simple application that
+meets the "organic" requirements. Organic projects have a base factor
+of 2.3 and exponents of 1.05, as noted above.
+We then examine all the factors to determine a corrected base factor.
+For this example, imagine
+that we determine the values of these cost drivers are as follows:<br>
+<br>
+<table cellpadding="2" cellspacing="2" border="1" width="100%">
+
+  <tbody>
+    <tr>
+      <td rowspan="1" colspan="2">Cost Drivers<br>
+      </td>
+      <td rowspan="1" colspan="2">Ratings<br>
+      </td>
+    </tr>
+    <tr>
+      <td>ID<br>
+      </td>
+      <td>Driver Name<br>
+      </td>
+      <td>Rating<br>
+      </td>
+      <td>Multiplier<br>
+      </td>
+    </tr>
+    <tr>
+      <td>RELY<br>
+      </td>
+      <td>Required software reliability<br>
+      </td>
+      <td>Low - easily recovered losses<br>
+      </td>
+      <td>0.88<br>
+      </td>
+    </tr>
+    <tr>
+      <td>DATA<br>
+      </td>
+      <td>Database size<br>
+      </td>
+      <td>Low<br>
+      </td>
+      <td>0.94<br>
+      </td>
+    </tr>
+    <tr>
+      <td>CPLX<br>
+      </td>
+      <td>Product complexity<br>
+      </td>
+      <td>Nominal<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>TIME<br>
+      </td>
+      <td>Execution time constraint<br>
+      </td>
+      <td>Nominal<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>STOR<br>
+      </td>
+      <td>Main storage constraint<br>
+      </td>
+      <td>Nominal<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>VIRT<br>
+      </td>
+      <td>Virtual machine (HW and OS) volatility<br>
+      </td>
+      <td>Low  (major change every 12 months, minor every month)<br>
+      </td>
+      <td>0.87<br>
+      </td>
+    </tr>
+    <tr>
+      <td>TURN<br>
+      </td>
+      <td>Computer turnaround time<br>
+      </td>
+      <td>Nominal<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>ACAP<br>
+      </td>
+      <td>Analyst capability<br>
+      </td>
+      <td>Nominal  (55th percentile)<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>AEXP<br>
+      </td>
+      <td>Applications experience<br>
+      </td>
+      <td>Nominal (3 years)<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>PCAP<br>
+      </td>
+      <td>Programmer capability<br>
+      </td>
+      <td>Nominal  (55th percentile)<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>VEXP<br>
+      </td>
+      <td>Virtual machine experience<br>
+      </td>
+      <td>High (3 years)<br>
+      </td>
+      <td>0.90<br>
+      </td>
+    </tr>
+    <tr>
+      <td>LEXP<br>
+      </td>
+      <td>Programming language experience<br>
+      </td>
+      <td>High (3 years)<br>
+      </td>
+      <td>0.95<br>
+      </td>
+    </tr>
+    <tr>
+      <td>MODP<br>
+      </td>
+      <td>Use of "modern" programming practices (e.g. structured programming)<br>
+      </td>
+      <td>High (Routine use)<br>
+      </td>
+      <td>0.82<br>
+      </td>
+    </tr>
+    <tr>
+      <td>TOOL<br>
+      </td>
+      <td>Use of software tools<br>
+      </td>
+      <td>Nominal (basic tools)<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td>SCED<br>
+      </td>
+      <td>Required development schedule<br>
+      </td>
+      <td>Nominal<br>
+      </td>
+      <td>1.00<br>
+      </td>
+    </tr>
+    
+    
+    
+    
+  </tbody>
+</table>
+<p>
+So, starting with the base factor (2.3 in this case), and then multiplying
+the driver values, we'll compute a final factor of:
+By multiplying these driver values together in this example, we compute:<br>
+<pre>2.3*0.88*0.94*1*1*1*0.87*1.00*1*1*1*0.90*0.95*0.82*1*1</pre>
+For this
+example, the final factor for the effort calculation is 1.1605. You would then
+invoke sloccount with "--effort 1.1605 1.05" to pass in the corrected factor
+and exponent for the effort estimation.
+You don't need to use "--schedule" to set the factors when you're using
+organic model, because in SLOCCount
+the default values are the values for the organic model.
+You can set scheduling parameters manually
+anyway by setting "--schedule 2.5 0.38".
+You <i>do</i> need to use the --schedule option for
+embedded and semidetached projects, because those modes have different
+schedule parameters. The final command would be:<br>
+<br>
+sloccount --effort 1.1605 1.05 --schedule 2.5 0.38 my_project<br>
+<p>
+The detailed COCOMO model requires breaking information down further.
+<p>
+For more information about the original COCOMO model, including the detailed
+COCOMO model, see the book
+<i>Software Engineering Economics</i> by Barry Boehm.
+<p>
+You may be surprised by the high cost estimates, but remember,
+these include design, coding, testing (including
+integration and testing), documentation (both for users
+and for programmers), and a wrap rate for corporate overhead
+(to cover facilities, equipment, accounting, and so on).
+Many programmers forget these other costs and are shocked by the high cost
+estimates.
+<p>
+If you want to know a subset of this cost, you'll need to isolate
+just those figures that you're trying to measure.
+For example, let's say you want to find the money a programmer would receive
+to do just the coding of the units of the program
+(ignoring wrap rate, design, testing, integration, and so on).
+According to Boehm's book (page 65, table 5-2),
+the percentage varies by product size.
+For effort, code and unit test takes 42% for small (2 KSLOC), 40% for
+intermediate (8 KSLOC), 38% for medium (32 KSLOC), and 36% for large
+(128 KSLOC).
+Sadly, Boehm doesn't separate coding from unit test; perhaps
+50% of the time is spent in unit test in traditional proprietary
+development (including fixing bugs found from unit test).
+If you want to know the income to the programmer (instead of cost to
+the company), you'll also want to remove the wrap rate.
+Thus, a programmer's income to <i>only</i> write the code for a
+small program (circa 2 KSLOC) would be 8.75% (42% x 50% x (1/2.4)) 
+of the default figure computed by SLOCCount.
+<p>
+In other words, less than one-tenth of the cost as computed by SLOCCount
+is what actually would be made by a programmer for a small program for
+just the coding task.
+Note that a proprietary commercial company that bid using
+this lower figure would rapidly go out of business, since this figure
+ignores the many other costs they have to incur to actually develop
+working products.
+Programs don't arrive out of thin air; someone needs to determine what
+the requirements are, how to design it, and perform at least
+some testing of it.
+<p>
+There's another later estimation model for effort and schedule
+called "COCOMO II", but COCOMO II requires logical SLOC instead
+of physical SLOC.
+SLOCCount doesn't currently measure logical SLOC, so
+SLOCCount doesn't currently use COCOMO II.
+Contributions of code to compute logical SLOC and then optionally
+use COCOMO II will be gratefully accepted.
+
+<h1><a name="specific-files">Counting Specific Files</a></h1>
+<p>
+If you want to count a specific subset, you can use the "--details"
+option to list individual files, pipe this into "grep" to select the
+files you're interested in, and pipe the result to
+my tool "print_sum" (which reads lines beginning with numbers, and
+returns the total of those numbers).
+If you've already done the analysis, an example would be:
+<pre>
+  sloccount --cached --details | grep "/some/subdirectory/" | print_sum
+</pre>
+<p>
+If you just want to count specific files, and you know what language
+they're in, you
+can just invoke the basic SLOC counters directly.
+By convention the simple counters are named "LANGUAGE_count",
+and they take on the command line a list of the
+source files to count.
+Here are some examples:
+<pre>
+  c_count *.c *.cpp *.h  # Count C and C++ in current directory.
+  asm_count *.S          # Count assembly.
+</pre>
+All the counters (*_count) program accept a &quot;-f FILENAME&quot; option, where FILENAME
+is a file containing the names of all the source files to count
+(one file per text line). If FILENAME is &quot;-&quot;, the
+    list of file names is taken from the standard input.
+The &quot;c_count&quot; program handles both C and C++ (but not objective-C;
+for that use objc_count).
+The available counters are
+ada_count,
+asm_count,
+awk_count,
+c_count,
+csh_count,
+exp_count,
+fortran_count,
+java_count,
+lex_count,
+lisp_count,
+ml_count,
+modula3_count,
+objc_count,
+pascal_count,
+perl_count,
+python_count,
+sed_count,
+sh_count,
+sql_count, and
+tcl_count.
+<p>
+There is also "generic_count", which takes as its first parameter
+the ``comment string'', followed by a list of files.
+The comment string begins a comment that ends at the end of the line.
+Sometimes, if you have source for a language not listed, generic_count
+will be sufficient.
+<p>
+The basic SLOC counters will send output to standard out, one line per file
+(showing the SLOC count and filename).
+The assembly counter shows some additional information about each file.
+The basic SLOC counters always complete their output with a line
+saying "Total:", followe by a line with the
+total SLOC count.
+
+<h1><a name="errors">Countering Problems and Handling Errors</a></h1>
+
+If you're analyzing unfamiliar code, there's always the possibility
+that it uses languages not processed by SLOCCount.
+To counter this, after running SLOCCount, run the following program:
+<pre>
+ count_unknown_ext
+</pre>
+This will look at the resulting data (in its default data directory
+location, ~/.slocdata) and report a sorted list of the file extensions
+for uncategorized ("unknown") files.
+The list will show every file extension and how many files had that
+extension, and is sorted by most common first.
+It's not a problem if an "unknown" type isn't a source code file, but
+if there are a significant number of source files in this category,
+you'll need to change SLOCCount to get an accurate result.
+
+<p>
+One error report that you may see is:
+<pre>
+  c_count ERROR - terminated in string in (filename)
+</pre>
+
+The cause of this is that c_count (the counter for C-like languages)
+keeps track of whether or not it's in a string, and when the counter
+reached the end of the file, it still thought it was in a string.
+
+<p>
+Note that c_count really does have to keep track of whether or
+not it's a string.
+For example, this is three lines of code, not two, because the
+``comment'' is actually in string data:
+
+<pre>
+ a = "hello
+ /* this is not a comment */
+ bye";
+</pre>
+<p>
+Usually this error means you have code that won't compile
+given certain #define settings.  E.G., XFree86 has a line of code that's
+actually wrong (it has a string that's not terminated), but people
+don't notice because the #define to enable it is not usually set.
+Legitimate code can trigger this message, but code that triggers
+this message is horrendously formatted and is begging for problems.
+
+<p>
+In either case, the best way to handle the situation
+is to modify the source code (slightly) so that the code's intent is clear
+(by making sure that double-quotes balance).
+If it's your own code, you definitely should fix this anyway.
+You need to look at the double-quote (") characters.  One approach is to
+just grep for double-quote, and look at every line for text that isn't
+terminated, e.g., printf("hello %s, myname);
+
+<p>
+SLOCcount reports warnings when an unusually
+large number of duplicate files are reported.
+A large number of duplicates <i>may</i> suggest that you're counting
+two different versions of the same program as though they were
+independently developed.
+You may want to cd into the data directory (usually ~/.slocdata), cd into
+the child directories corresponding to each component, and then look
+at their dup_list.dat files, which list the filenames that appeared
+to be duplicated (and what they duplicate with).
+
+
+<h1><a name="adding">Adding Support for New Languages</a></h1>
+SLOCcount handles many languages, but if it doesn't support one you need,
+you'll need to give the language a standard (lowercase ASCII) name,
+then modify SLOCcount to (1) detect and (2) count code in that language.
+
+<ol>
+<li>
+To detect a new language, you'll need to modify the program break_filelist.
+If the filename extension is reliable, you can modify the array
+%file_extensions, which maps various filename extensions into languages.
+If your needs are more complex, you'll need to modify the code
+(typically in functions get_file_type or file_type_from_contents)
+so that the correct file type is determined.
+For example, if a file with a given filename extension is only
+<i>sometimes</i> that type, you'll need to write code to examine the
+file contents.
+<li>
+You'll need to create a SLOC counter for that language type.
+It must have the name XYZ_count, where XYZ is the standard name for the
+language.
+<p>
+For some languages, you may be able to use the ``generic_count'' program
+to implement your counter - generic_count takes as its first argument
+the pattern which
+identifies comment begins (which continue until the end of the line);
+the other arguments are the files to count.
+Thus, the LISP counter looks like this:
+<pre>
+ #!/bin/sh
+ generic_count ';' $@
+</pre>
+The generic_count program won't work correctly if there are multiline comments
+(e.g., C) or multiline string constants.
+If your language is identical to C/C++'s syntax in terms of
+string constant definitions and commenting syntax
+(using // or /* .. */), then you can use the c_count program - in this case,
+modify compute_sloc_lang so that the c_count program is used.
+<p>
+Otherwise, you'll have to devise your own counting program.
+The program must generate files with the same format, e.g.,
+for every filename passed as an argument, it needs to return separate lines,
+where each line presents the SLOC
+for that file, a space, and the filename.
+(Note: the assembly language counter produces a slightly different format.)
+After that, print "Total:" on its own line, and the actual SLOC total
+on the following (last) line.
+</ol>
+
+<h1><a name="advanced">Advanced SLOCCount Use</a></h1>
+For most people, the previous information is enough.
+However, if you're measuring a large set of programs, or have unusual needs,
+those steps may not give you enough control.
+In that case, you may need to create your own "data directory"
+by hand and separately run the SLOCCount tools.
+Basically, "sloccount" (note the lower case) is the name for
+a high-level tool which invokes many other tools; this entire
+suite is named SLOCCount (note the mixed case).
+The next section will describe how to invoke the various tools "manually"
+so you can gain explicit control over the measuring process when
+the defaults are not to your liking, along with various suggestions
+for how to handle truly huge sets of data.
+<p>
+Here's how to manually create a "data directory" to hold
+intermediate results, and how to invoke each tool in sequence
+(with discussion of options):
+<ol>
+<li>Set your PATH to include the SLOCCount "bin directory", as discussed above.
+<li>Make an empty "data directory"
+(where all intermediate results will be stored);
+you can pick any name and location you like for this directory.
+Here, I'll use the name "data":
+<pre>
+    mkdir ~/data
+</pre>
+<li>Change your current directory to this "data directory":
+<pre>
+    cd ~/data
+</pre>
+The rest of these instructions assume that your current directory
+is the data directory.
+You can set up many different data directories if you wish, to analyze
+different source programs or analyze the programs in different ways;
+just "cd" to the one you want to work with.
+<li>(Optional) Some of the later steps will produce
+a lot of output while they're running.
+If you want to capture this information into a file, use the standard
+"script" command do to so.
+For example, "script run1" will save the output of everything you do into
+file "run1" (until you type control-D to stop saving the information).
+Don't forget that you're creating such a file, or it will become VERY large,
+and in particular don't type any passwords into such a session.
+You can store the script in the data directory, or create a subdirectory
+for such results - any data directory subdirectory that doesn't have the
+special file "filelist" is not a "data directory child" and is thus
+ignored by the later SLOCCount analysis routines.
+<li>Now initialize the "data directory".
+ In particular, initialization will create the "data directory children",
+ a set of subdirectories equivalent to the source code directory's
+ top directories.  Each of these data directory children (subdirectories)
+ will contain a file named "filelist", which
+ lists all filenames in the corresponding source code directory.
+ These data directory children
+ will also eventually contain intermediate results
+ of analysis, which you can check for validity
+ (also, having a cache of these values speeds later analysis steps).
+ <p>
+ You use the "make_filelists" command to initialize a data directory.
+ For example, if your source code is in /usr/src/redhat/BUILD, run:
+<pre>
+   make_filelists /usr/src/redhat/BUILD/*
+</pre>
+<p>
+ Internally, make_filelists uses "find" to create the list of files, and
+ by default it ignores all symbolic links.  However, you may need to
+ follow symbolic links; if you do, give make_filelists the
+ "--follow" option (which will use find's "-follow" option).
+ Here are make_filelists' options:
+<pre>
+ --follow         Follow symbolic links
+ --datadir D      Use this data directory
+ --skip S         Skip basenames named S
+ --prefix P       When creating children, prepend P to their name.
+ --               No more options
+</pre>
+<p>
+ Although you don't normally need to do so, if you want certain files to
+ not be counted at all in your analysis, you can remove
+ data directory children or edit the "filelist" files to do so.
+ There's no need to remove files which aren't source code files normally;
+ this is handled automatically by the next step.
+<p>
+ If you don't have a single source code directory where the subdirectories
+ represent the major components you want to count separately, you can
+ still use the tool but it's more work.
+ One solution is to create a "shadow" directory with the structure
+ you wish the program had, using symbolic links (you must use "--follow"
+ for this to work).
+ You can also just invoke make_filelists multiple times, with parameters
+ listing the various top-level directories you wish to include.
+ Note that the basenames of the directories must be unique.
+<p>
+ If there are so many directories (e.g., a massive number of projects)
+ that the command line is too long,
+ you can run make_filelists multiple times in the same
+ directory with different arguments to create them.
+ You may find "find" and/or "xargs" helpful in doing this automatically.
+ For example, here's how to do the same thing using "find":
+<pre>
+ find /usr/src/redhat/BUILD -maxdepth 1 -mindepth 1 -type d \
+        -exec make_filelists {} \;
+</pre>
+<li>Categorize each file.
+This means that we must determine which
+files contain source code (eliminating auto-generated and duplicate files),
+and of those files which language each file contains.
+The result will be a set of files in each subdirectory of the data directory,
+where each file represents a category (e.g., a language).
+<pre>
+   break_filelist *
+</pre>
+ At this point you might want to examine the data directory subdirectories
+ to ensure that "break_filelist" has correctly determined the types of
+ the various files.
+ In particular, the "unknown" category may have source files in a language
+ SLOCCount doesn't know about.
+ If the heuristics got some categorization wrong, you can modify the
+ break_filelist program and re-run break_filelist.
+<p>
+ By default break_filelist removes duplicates, doesn't count
+ automatically generated files as normal source code files, and
+ only gives some feedback.  You can change these defaults with the
+ following options:
+<pre>
+ --duplicates   Count all duplicate files as normal files
+ --crossdups    Count duplicate files if they're in different data directory
+                children (i.e., in different "filelists")
+ --autogen      Count automatically generated files
+ --verbose      Present more verbose status information while processing.
+</pre>
+<p>
+ Duplicate control in particular is an issue; you probably don't want
+ duplicates counted, so that's the default.
+ Duplicate files are detected by determining if their MD5 checksums
+ are identical; the "first" duplicate encountered is the only one kept.
+ Normally, since shells sort directory names, this means that the
+ file in the alphabetically first child directory is the one counted.
+ You can change this around by listing directories in the sort order you
+ wish followed by "*"; if the same data directory child
+ is requested for analysis more
+ than once in a given execution, it's skipped after the first time.
+ So, if you want any duplicate files with child directory "glibc" to 
+ count as part of "glibc", then you should provide the data directory children
+ list as "glibc *".
+<p>
+ Beware of choosing something other than "*" as the parameter here,
+ unless you use the "--duplicates" or "--crossdups" options.
+ The "*" represents the list of data directory children to examine.
+ Since break_filelist skips duplicate files identified
+ in a particular run, if you run break_filelist
+ on only certain children, some duplicate files won't be detected.
+ If you're allowing duplicates (via "--duplicates" or
+ "--crossdups"), then this isn't a problem.
+ Or, you can use the ``--duplistfile'' option to store and retrieve
+ hashes of files, so that additional files can be handled.
+<p>
+ If there are so many directories that the command line is too long,
+ you can run break_filelist multiple times and give it
+ a subset of the directories each time.
+ You'll need to use one of the duplicate control options to do this.
+ I would suggest using "--crossdups", which
+ means that duplicates inside a child will only be counted once,
+ eliminating at least some of the problems of duplicates.
+ Here's the equivalent of "break_filelist *" when there are a large
+ number of subdirectories:
+<pre>
+ find . -maxdepth 1 -mindepth 1 -type d -exec break_filelist --crossdups {} \;
+</pre>
+ Indeed, for all of the later commands where "*" is listed as the parameter
+ in these instructions
+ (for the list of data directory children), just run the above "find"
+ command and replace "break_filelist --crossdups" with the command shown.
+<li>(Optional)
+If you're not very familiar with the program you're analyzing, you
+might not be sure that "break_filelist" has correctly identified
+all of the files.
+In particular, the system might be using an unexpected
+programming language or extension not handled by SLOCCount.
+If this is your circumstance, you can just run the command:
+<pre>
+ count_unknown_ext
+</pre>
+(note that this command is unusual - it doesn't take any arguments,
+since it's hard to imagine a case where you wouldn't want every
+directory examined).
+Unlike the other commands discussed, this one specifically looks at
+${HOME}/.slocdata.
+This command presents a list of extensions which are unknown to break_filelist,
+with the most common ones listed first.
+The output format is a name, followed by the number of instances;
+the name begins with a "." if it's an extension, or, if there's no
+extension, it begins with "/" followed by the base name of the file.
+break_filelist already knows about common extensions such as ".gif" and ".png",
+as well as common filenames like "README".
+You can also view the contents of each of the data directory children's
+files to see if break_filelist has correctly categorized the files.
+<li>Now compute SLOC and filecounts for each language; you can compute for all
+ languages at once by calling:
+<pre>
+   compute_all *
+</pre>
+If you only want to compute SLOC for a specific language,
+you can invoke compute_sloc_lang, which takes as its first parameter
+the SLOCCount name of the language ("ansic" for C, "cpp" for C++,
+"ada" for Ada, "asm" for assembly), followed by the list
+of data directory children.
+Note that these names are a change from version 1.0, which
+called the master program "compute_all",
+and had "compute_*" programs for each language.
+<p>
+Notice the "*"; you can replace the "*" with just the list of
+data directory children (subdirectories) to compute, if you wish.
+Indeed, you'll notice that nearly all of the following commands take a
+list of data directory children as arguments; when you want all of them, use
+"*" (as shown in these instructions), otherwise, list the ones you want.
+<p>
+When you run compute_all or compute_sloc_lang, each data directory
+child (subdirectory)
+is consulted in turn for a list of the relevant files, and the
+SLOC results are placed in that data directory child.
+In each child,
+the file "LANGUAGE-outfile.dat" lists the information from the
+basic SLOC counters.
+That is, the oufile lists the SLOC and filename
+(the assembly outfile has additional information), and ends with
+a line saying "Total:" followed by a line showing the total SLOC of
+that language in that data directory child.
+The file "all-physical.sloc" has the final total SLOC for every language
+in that child directory (i.e., it's the last line of the outfile).
+<li>(Optional) If you want, you can also use USC's CodeCount.
+I've had trouble with these programs, so I don't do this normally.
+However, you're welcome to try - they support logical SLOC measures
+as well as physical ones (though not for most of the languages
+supported by SLOCCount).
+Sadly, they don't seem to compile in gcc without a lot of help, they
+used fixed-width buffers that make me nervous, and I found a
+number of bugs (e.g., it couldn't handle "/* text1 *//* text2 */" in
+C code, a format that's legal and used often in the Linux kernel).
+If you want to do this,
+modify the files compute_c_usc and compute_java_usc so they point to the
+right directories, and type:
+<pre>
+ compute_c_usc *
+</pre>
+<li>Now you can analyze the results. The main tool for
+presenting SLOCCount results is "get_sloc", e.g,:
+<pre>
+  get_sloc * | less
+</pre>
+The get_sloc program takes many options, including:
+<pre>
+ --filecount    Display number of files instead of SLOC (SLOC is the default)
+ --wide         Use "wide" format instead (tab-separated columns)
+ --nobreak      Don't insert breaks in long lines
+ --sort  X      Sort by "X", where "X" is the name of a language
+                ("ansic", "cpp", "fortran", etc.), or "total".
+                By default, get_sloc sorts by "total".
+ --nosort       Don't sort - just present results in order of directory
+                listing given.
+ --showother    Show non-language totals (e.g., # duplicate files).
+ --oneprogram   When computing effort, assume that all files are part of
+                a single program.  By default, each subdirectory specified
+                is assumed to be a separate, independently-developed program.
+ --noheader     Don't show the header
+ --nofooter     Don't show the footer (the per-language values and totals)
+</pre>
+<p>
+Note that unlike the "sloccount" tool, get_sloc requires the current
+directory to be the data directory.
+<p>
+If you're displaying SLOC, get_sloc will also estimate the time it
+would take to develop the software using COCOMO (using its "basic" model).
+By default, this figure assumes that each of the major subdirectories was
+developed independently of the others;
+you can use "--oneprogram" to make the assumption that all files are
+part of the same program.
+The COCOMO model makes many other assumptions; see the paper at
+<a href="http://www.dwheeler.com/sloc">http://www.dwheeler.com/sloc</a>
+for more information.
+<p>
+If you need to do more analysis, you might want to use the "--wide"
+option and send the data to another tool such as a spreadsheet
+(e.g., gnumeric) or RDBMS (e.g., PostgreSQL).
+Using the "--wide" option creates tab-separated data, which is easier to
+import.
+You may also want to use the "--noheader" and/or "--nofooter" options to
+simplify porting the data to another tool.
+<p>
+Note that in version 1.0, "get_sloc" was called "get_data".
+<p>
+If you have so many data directory children that you can't use "*"
+on the command line, get_sloc won't be as helpful.
+Feel free to patch get_sloc to add this capability (as another option),
+or use get_sloc_detail (discussed next) to feed the data into another tool.
+<li>(Optional) If you just can't get the information you need from get_sloc,
+then you can get the raw results of everything and process the data
+yourself.
+I have a little tool to do this, called get_sloc_details.
+You invoke it in a similar manner:
+<pre>
+get_sloc_details *
+</pre>
+</ol>
+
+<p>
+<h1><a name="designer-notes">Designer's Notes</a></h1>
+<p>
+Here are some ``designer's notes'' on how SLOCCount works,
+including what it can handle.
+<p>
+The program break_filelist
+has categories for each programming language it knows about,
+plus the special categories ``not'' (not a source code file),
+``auto'' (an automatically-generated file and thus not to be counted),
+``zero'' (a zero-length file),
+``dup'' (a duplicate of another file as determined by an md5 checksum),
+and
+``unknown'' (a file which doesn't seem to be a source code file
+nor any of these other categories).
+It's a good idea to examine
+the ``unknown'' items later, checking the common extensions
+to ensure you have not missed any common types of code.
+<p>
+The program break_filelist uses lots of heuristics to correctly
+categorize files.
+Here are few notes about its heuristics:
+<ol>
+<li>
+break_filelist first checks for well-known extensions (such as .gif) that
+cannot be program files, and for a number of common generated filenames.
+<li>
+It then peeks at the first few lines for "#!" followed by a legal script
+name.
+Sometimes it looks further, for example, many Python programs
+invoke "env" and then use it to invoke python.
+<li>
+If that doesn't work, it uses the extension to try to determine the category.
+For a number of languages, the extension is not reliable, so for those
+languages it examines the file contents and uses a set of heuristics
+to determine if the file actually belongs to that category.
+<li>
+Detecting automatically generated files is not easy, and it's
+quite conceivable that it won't detect some automatically generated files.
+The first 15 lines are examined, to determine if any of them
+include at the beginning of the line (after spaces and
+possible comment markers) one of the following phrases (ignoring
+upper and lower case distinctions):
+``generated automatically'',
+``automatically generated'',
+``this is a generated file'',
+``generated with the (something) utility'',
+or ``do not edit''.
+<li>A number of filename conventions are used, too.
+For example,
+any ``configure'' file is presumed to be automatically generated if
+there's a ``configure.in'' file in the same directory.
+<li>
+To eliminate duplicates,
+the program keeps md5 checksums of each program file.
+Any given md5 checksum is only counted once.
+Build directories are processed alphabetically, so
+if the same file content is in both directories ``a'' and ``b'',
+it will be counted only once as being part of ``a'' unless you make
+other arrangements.
+Thus, some data directory children with names later in the alphabet may appear
+smaller than would make sense at first glance.
+It is very difficult to eliminate ``almost identical'' files
+(e.g., an older and newer version of the same code, included in two
+separate packages), because
+it is difficult to determine when two ``similar'' files are essentially
+the same file.
+Changes such as the use of pretty-printers and massive renaming of variables
+could make small changes seem large, while the small files
+might easily appear to be the ``same''.
+Thus, files with different contents are simply considered different.
+<li>
+If all else fails, the file is placed in the ``unknown'' category for
+later analysis.
+</ol>
+<p>
+One complicating factor is that I wished to separate C, C++, and
+Objective-C code, but a header file ending with
+``.h'' or ``.hpp'' file could be any of these languages.
+In theory, ``.hpp'' is only C++, but I found that in practice this isn't true.
+I developed a number of heuristics to determine, for each file,
+what language a given header belonged to.
+For example, if a given directory has exactly one of these languages
+(ignoring header files),
+the header is assumed to belong to that category as well.
+Similarly, if there is a body file (e.g., ".c") that has the same name
+as the header file, then presumably the header file is of the same language.
+Finally, a header file with the keyword ``class'' is almost certainly not a
+C header file, but a C++ header file; otherwise it's assumed to
+be a C file.
+<p>
+None of the SLOC counters fully parse the source code; they just examine
+the code using simple text processing patterns to count the SLOC.
+In practice, by handling a number of special cases this seems to be fine.
+Here are some notes on some of the language counters;
+the language name is followed by common extensions in parentheses
+and the SLOCCount name of the language in brackets:
+<ol>
+<li>Ada (.ada, .ads, .adb) [ada]: Comments begin with "--".
+<li>Assembly (.s, .S, .asm) [asm]:
+Assembly languages vary greatly in the comment character they use,
+so my counter had to handle this variance.
+The assembly language counter (asm_count)
+first examines the file to determine if
+C-style ``/*'' comments and C preprocessor commands
+(e.g., ``#include'') are used.
+If both ``/*'' and ``*/'' are in the file, it's assumed that
+C-style comments are being used
+(since it is unlikely that <i>both</i> would be used
+as something else, say as string data, in the same assembly language file).
+Determining if a file used the C preprocessor was trickier, since
+many assembly files do use ``#'' as a comment character and some
+preprocessor directives are ordinary words that might be included
+in a human comment.
+The heuristic used is as follows: if #ifdef, #endif, or #include are used, the
+C preprocessor is used; or if at least three lines have either #define or #else,
+then the C preprocessor is used.
+No doubt other heuristics are possible, but this at least seems to produce
+reasonable results.
+The program then determines what the comment character is by identifying
+which punctuation mark (from a set of possible marks)
+is the most common non-space initial character on a line
+(ignoring ``/'' and ``#'' if C comments or preprocessor commands,
+respectively, are used).
+Once the comment character has been determined, and it's been determined
+if C-style comments are allowed, the lines of code
+are counted in the file.
+<li>awk (.awk) [awk]: Comments begin with "#".
+<li>C (.c) [ansic]: Both traditional C comments (/* .. */) and C++
+(//) comments are supported.
+Technically, C doesn't support "//", but in practice many C programs use them.
+The C counter understands multi-line strings, so
+comment characters (/* .. */ and //) are treated as data inside strings.
+Conversely, the counter knows that any double-quote characters inside a
+comment does not begin a C/C++ string.
+<li>C++  (.C, .cpp, .cxx, .cc) [cpp]: The same counter is used for
+both C and C++.
+Note that break_filelist does try to separate C from C++ for purposes
+of accounting between them.
+<li>C# (.cs): The same counter is used as for C and C++.
+Note that there are no "header" filetypes in C#.
+<li>C shell (.csh) [csh]: Comments begin with "#".
+<li>COBOL (.cob, .cbl) [cobol]: SLOCCount
+detects if a "freeform" command has been given; until such a command is
+given, fixed format is assumed.
+In fixed format, comments have a "*" or "/" in column 7 or column 1;
+any line that's not a comment, and has a nonwhitespace character after column 7
+(the indicator area) is counted as a source line of code.
+In a freeform style, any line beginning with optional whitespace and
+then "*" or "/" is considered a comment; any noncomment line
+with a nonwhitespace characeter is counted as SLOC.
+<li>Expect (.exp) [exp]: Comments begin with "#".
+<li>Fortran (.f) [fortran]: Comment-only lines are lines
+where column 1 character = C, c, *, or !.
+Note that this is really only a Fortran-77 SLOC counter.
+<.li>Haskell (.hs) [haskell]:
+This counter handles block comments {- .. -} and single line comments (--);
+pragmas {-# .. -} are counted as SLOC.
+This is a simplistic counter,
+and can be fooled by certain unlikely combinations of block comments
+and other syntax (line-ending comments or strings).
+In particular,  "Hello {-"  will be incorrectly interpreted as a
+comment block begin, and "{- -- -}" will be incorrectly interpreted as a
+comment block begin without an end.
+Note that .lhs (literate Haskell) is <i>not</i> supported; please
+preprocess .lhs files into .hs files before counting.
+See the
+<a href="http://www.haskell.org/onlinereport/literate.html">Haskell 98
+report section on literate Haskell</a> for more information.
+<li>Java (.java) [java]: Java is counted using the same counter as C and C++.
+<li>lex (.l) [lex]: Uses traditional C /* .. */ comments.
+Note that this does not use the counter as C/C++ internally, since
+it's quite legal in lex to have "//" (where it is NOT a comment).
+<li>LISP (.el, .scm, .lsp, .jl) [lisp]: Comments begin with ";".
+<li>ML (.ml, .mli) [ml]: Comments are enclosed in (* .. *).
+<li>Modula3 (.m3, .i3) [modula3]: Comments are enclosed in (* .. *).
+<li>Objective-C (.m) [objc]: Comments are old C-style /* .. */ comments.
+<li>Pascal (.p, .pas) [pascal]: Comments are enclosed in curly braces {}
+or (*..*).  This counter has known weaknesses; see the BUGS section of
+the manual page for more information.
+<li>Perl (.pl, .pm, .perl) [perl]:
+Comments begin with "#".
+Perl permits in-line ``perlpod'' documents, ``here'' documents, and an
+__END__ marker that complicate code-counting.
+Perlpod documents are essentially comments, but a ``here'' document
+may include text to generate them (in which case the perlpod document
+is data and should be counted).
+The __END__ marker indicates the end of the file from Perl's
+viewpoint, even if there's more text afterwards.
+<li>PHP (.php, .php[3456], .inc) [php]:
+Code is counted as PHP code if it has a .php file extension;
+it's also counted if it has an .inc extension and looks like PHP code.
+SLOCCount does <b>not</b> count PHP code embedded in HTML files normally,
+though its lower-level routines can do so if you want to
+(use php_count to do this).
+Any of the various ways to begin PHP code can be used
+(&lt;? .. ?&gt;,
+&lt;?php .. ?&gt;,
+&lt;script language="php"&gt; .. &lt;/script&gt;,
+or even &lt;% .. %&gt;).
+Any of the PHP comment formats (C, C++, and shell) can be used, and
+any string constant formats ("here document", double quote, and single
+quote) can be used as well.
+<li>Python (.py) [python]:
+Comments begin with "#".
+Python has a convention that, at the beginning of a definition
+(e.g., of a function, method, or class), an unassigned string can be
+placed to describe what's being defined. Since this is essentially
+a comment (though it doesn't syntactically look like one), the counter
+avoids counting such strings, which may have multiple lines.
+To handle this,
+strings which started the beginning of a line were not counted.
+Python also has the ``triple quote'' operator, permitting multiline
+strings; these needed to be handled specially.
+Triple quote stirngs are normally considered as data, regardless of
+content, unless they were used as a comment about a definition.
+<li>Ruby (.rb) [ruby]: Comments begin with "#".
+<li>sed (.sed) [sed]: Comments begin with "#".
+Note that these are "sed-only" files; many uses of sed are embeded in
+shell scripts (and are categorized as shell scripts in those cases).
+<li>shell (.sh) [sh]: Comments begin with "#".
+Note that I classify ksh, bash, and the original Bourne shell sh together,
+because they have very similar syntaxes.
+For example, in all of these shells,
+setting a variable is expressed as "varname=value",
+while C shells use the use "set varname=value".
+<li>TCL (.tcl, .tk, .itk) [tcl]: Comments begin with "#".
+<li>Yacc (.y) [yacc]: Yacc is counted using the same counter as C and C++.
+</ol>
+<p>
+Much of the code is written in Perl, since it's primarily a text processing
+problem and Perl is good at that.
+Many short scripts are Bourne shell scripts (it's good at
+short scripts for calling other programs), and the
+basic C/C++ SLOC counter is written in C for speed.
+<p>
+I originally named it "SLOC-Count", but I found that some web search
+engines (notably Google) treated that as two words.
+By naming it "SLOCCount", it's easier to find by those who know
+the name of the program.
+<p>
+SLOCCount only counts physical SLOC, not logical SLOC.
+Logical SLOC counting requires much more code to implement,
+and I needed to cover a large number of programming languages.
+
+
+<p>
+<h1><a name="sloc-definition">Definition of SLOC</a></h1>
+<p>
+This tool measures ``physical SLOC.''
+Physical SLOC is defined as follows:
+``a physical source line of code (SLOC) is a line ending
+in a newline or end-of-file marker,
+and which contains at least one non-whitespace non-comment character.''
+Comment delimiters (characters other than newlines starting and ending
+a comment) are considered comment characters.
+Data lines only including whitespace
+(e.g., lines with only tabs and spaces in multiline strings) are not included.
+<p>
+To make this concrete, here's an example of a simple C program
+(it strips ANSI C comments out).
+On the left side is the running SLOC total, where "-" indicates a line
+that is not considered a physical "source line of code":
+<pre>
+ 1    #include &lt;stdio.h&gt;
+ -    
+ -    /* peek at the next character in stdin, but don't get it */
+ 2    int peek() {
+ 3     int c = getchar();
+ 4     ungetc(c, stdin);
+ 5     return c;
+ 6    }
+ -    
+ 7    main() {
+ 8     int c;
+ 9     int incomment = 0;  /* 1 = we are inside a comment */
+ -    
+10     while ( (c = getchar()) != EOF) {
+11        if (!incomment) {
+12          if ((c == '/') &amp;&amp; (peek() == '*')) {incomment=1;}
+13        } else {
+14          if ((c == '*') &amp;&amp; (peek() == '/')) {
+15               c= getchar(); c=getchar(); incomment=0;
+16          }
+17        }
+18        if ((c != EOF) &amp;&amp; !incomment) {putchar(c);}
+19     }
+20    }
+</pre>
+<p>
+<a href="http://www.sei.cmu.edu/publications/documents/92.reports/92.tr.020.html">Robert E. Park et al.'s
+<i>Software Size Measurement:
+A Framework for Counting Source Statements</i></a>
+(Technical Report CMU/SEI-92-TR-20)
+presents a set of issues to be decided when trying to count code.
+The paper's abstract states:
+<blockquote><i>
+This report presents guidelines for defining, recording, and reporting
+two frequently used measures of software size� physical source lines
+and logical source statements.
+We propose a general framework for constructing size
+definitions and use it to derive operational methods for
+reducing misunderstandings in measurement results.
+</i></blockquote>
+<p>
+Using Park's framework, here is how physical lines of code are counted:
+<ol>
+<li>Statement Type: I used a physical line-of-code as my basis.
+I included executable statements, declarations
+(e.g., data structure definitions), and compiler directives
+(e.g., preprocessor commands such as #define).
+I excluded all comments and blank lines.
+<li>How Produced:
+I included all programmed code, including any files that had been modified.
+I excluded code generated with source code generators, converted with
+automatic translators, and those copied or reused without change.
+If a file was in the source package, I included it; if the file had
+been removed from a source package (including via a patch), I did
+not include it.
+<li>Origin: You select the files (and thus their origin).
+<li>Usage: You selects the files (and thus their usage), e.g.,
+you decide if you're going to
+include additional applications able to run on the system but not
+included with the system.
+<li>Delivery: You'll decide what code to include, but of course,
+if you don't have the code you can't count it.
+<li>Functionality: This tool will include both operative and inoperative code
+if they're mixed together.
+An example of intentionally ``inoperative'' code is
+code turned off by #ifdef commands; since it could be
+turned on for special purposes, it made sense to count it.
+An example of unintentionally ``inoperative'' code is dead or unused code.
+<li>Replications:
+Normally, duplicate files are ignored, unless you use
+the "--duplicates" or "--crossdups" option.
+The tool will count
+``physical replicates of master statements stored in
+the master code''.
+This is simply code cut and pasted from one place to another to reuse code;
+it's hard to tell where this happens, and since it has to be maintained
+separately, it's fair to include this in the measure.
+I excluded copies inserted, instantiated, or expanded when compiling
+or linking, and I excluded postproduction replicates
+(e.g., reparameterized systems).
+<li>Development Status: You'll decide what code
+should be included (and thus the development status of the code that
+you'll accept).
+<li>Languages: You can see the language list above.
+<li>Clarifications: I included all statement types.
+This included nulls, continues, no-ops, lone semicolons,
+statements that instantiate generics,
+lone curly braces ({ and }), and labels by themselves.
+</ol>
+<p>
+Thus, SLOCCount generally follows Park's ``basic definition'',
+but with the following exceptions depending on how you use it:
+<ol>
+<li>How Produced:
+By default, this tool excludes duplicate files and
+code generated with source code generators.
+After all, the COCOMO model states that the
+only code that should be counted is code
+``produced by project personnel'', whereas these kinds of files are
+instead the output of ``preprocessors and compilers.''
+If code is always maintained as the input to a code generator, and then
+the code generator is re-run, it's only the code generator input's size that
+validly measures the size of what is maintained.
+Note that while I attempted to exclude generated code, this exclusion
+is based on heuristics which may have missed some cases.
+If you want to count duplicates, use the
+"--autogen", "--duplicates", and/or "--crossdups" options.
+If you want to count automatically generated files, pass
+the "--autogen" option mentioned above.
+<li>Origin:
+You can choose what source code you'll measure.
+Normally physical SLOC doesn't include an unmodified
+``vendor-supplied language support library'' nor a
+``vendor-supplied system or utility''.
+However, if this is what you are measuring, then you need to include it.
+If you include such code, your set will be different
+than the usual ``basic definition.''
+<li>Functionality: I included counts of unintentionally inoperative code
+(e.g., dead or unused code).
+It is very difficult to automatically detect such code
+in general for many languages.
+For example, a program not directly invoked by anything else nor
+installed by the installer is much more likely to be a test program,
+which you may want to include in the count (you often would include it
+if you're estimating effort).
+Clearly, discerning human ``intent'' is hard to automate.
+</ol>
+<p>
+Otherwise, this counter follows Park's
+``basic definition'' of a physical line of code, even down to Park's
+language-specific definitions where Park defined them for a language.
+
+
+<p>
+<h1><a name="miscellaneous">Miscellaneous Notes</a></h1>
+<p>
+There are other undocumented analysis tools in the original tar file.
+Most of them are specialized scripts for my circumstances, but feel
+free to use them as you wish.
+<p>
+If you're packaging this program, don't just copy every executable
+into the system "bin" directory - many of the files are those
+specialized scripts.
+Just put in the bin directory every executable documented here, plus the
+the files they depend on (there aren't that many).
+See the RPM specification file to see what's actually installed.
+<p>
+You have to take any measure of SLOC (including this one) with a
+large grain of salt.
+Physical SLOC is sensitive to the format of source code.
+There's a correlation between SLOC and development effort, and some
+correlation between SLOC and functionality,
+but there's absolutely no correlation between SLOC
+and either "quality" or "value".
+<p>
+A problem of physical SLOC is that it's sensitive to formatting,
+and that's a legitimate (and known) problem with the measure.
+However, to be fair, logical SLOC is influenced by coding style too.
+For example, the following two phrases are semantically identical,
+but will have different logical SLOC values:
+<pre>
+   int i, j;  /* 1 logical SLOC */
+
+   int i;     /* 2 logical SLOC, but it does the same thing */
+   int j;
+</pre>
+<p>
+If you discover other information that can be divided up by
+data directory children (e.g., the license used), it's probably best
+to add that to each subdirectory (e.g., as a "license" file in the
+subdirectory).
+Then you can modify tools like get_sloc
+to add them to their display.
+<p>
+I developed SLOCCount for my own use, not originally as
+a community tool, so it's certainly not beautiful code.
+However, I think it's serviceable - I hope you find it useful.
+Please send me patches for any improvements you make!
+<p>
+You can't use this tool as-is with some estimation models, such as COCOMO II,
+because this tool doesn't compute logical SLOC.
+I certainly would accept code contributions to add the ability to
+measure logical SLOC (or related measures such as
+Cyclomatic Complexity and Cyclomatic density);
+selecting them could be a compile-time option.
+However, measuring logical SLOC takes more development effort, so I
+haven't done so; see USC's "CodeCount" for a set of code that
+measures logical SLOC for some languages
+(though I've had trouble with CodeCount - in particular, its C counter
+doesn't correctly handle large programs like the Linux kernel).
+
+
+<p>
+<h1><a name="license">SLOCCount License</a></h1>
+<p>
+Here is the SLOCCount License; the file COPYING contains the standard
+GPL version 2 license:
+<pre>
+=====================================================================
+SLOCCount
+Copyright (C) 2000-2001 David A. Wheeler (dwheeler, at, dwheeler.com)
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+=====================================================================
+</pre>
+<p>
+While it's not formally required by the license, please give credit
+to me and this software in any report that uses results generated by it.
+<p>
+This document was written by David A. Wheeler (dwheeler, at, dwheeler.com),
+and is
+(C) Copyright 2001 David A. Wheeler.
+This document is covered by the license (GPL) listed above.
+<p>
+The license <i>does</i> give you the right to
+use SLOCCount to analyze proprietary programs.
+
+<p>
+<h1><a name="related-tools">Related Tools</a></h1>
+<p>
+One available toolset is
+<a href="http://sunset.usc.edu/research/CODECOUNT">CodeCount</a>.
+I tried using this toolset, but I eventually gave up.
+It had too many problems handling the code I was trying to analyze, and it
+does a poor job automatically categorizing code.
+It also has no support for many of today's languages (such as Python,
+Perl, Ruby, PHP, and so on).
+However, it does a lot of analysis and measurements that SLOCCount
+doesn't do, so it all depends on your need.
+Its license appeared to be open source, but it's quite unusual and
+I'm not enough of a lawyer to be able to confirm that.
+<p>
+Another tool that's available is <a href="http://csdl.ics.hawaii.edu/Research/LOCC/LOCC.html">LOCC</a>.
+It's available under the GPL.
+It can count Java code, and there's experimental support for C++.
+LOCC is really intended for more deeply analyzing each Java file;
+what's particularly interesting about it is that it can measure
+"diffs" (how much has changed).
+See
+<a href="http://csdl.ics.hawaii.edu/Publications/MasterList.html#csdl2-00-10">
+A comparative review of LOCC and CodeCount</a>.
+<p>
+<a href="http://sourceforge.net/projects/cccc">
+CCCC</a> is a tool which analyzes C++ and Java files
+and generates a report on various metrics of the code.
+Metrics supported include lines of code, McCabe's complexity,
+and metrics proposed by Chidamber &amp; Kemerer and Henry &amp; Kafura.
+(You can see
+<a href="http://cccc.sourceforge.net/">Time Littlefair's comments</a>).
+CCCC is in the public domain.
+It reports on metrics that sloccount doesn't, but sloccount can handle
+far more computer languages.
+
+<p>
+<h1><a name="submitting-changes">Submitting Changes</a></h1>
+<p>
+The GPL license doesn't require you to submit changes you make back to
+its maintainer (currently me),
+but it's highly recommended and wise to do so.
+Because others <i>will</i> send changes to me, a version you make on your
+own will slowly because obsolete and incompatible.
+Rather than allowing this to happen, it's better to send changes in to me
+so that the latest version of SLOCCount also has the
+features you're looking for.
+If you're submitting support for new languages, be sure that your
+chnage correctly ignores files that aren't in that new language
+(some filename extensions have multiple meanings).
+You might want to look at the <a href="TODO">TODO</a> file first.
+<p>
+When you send changes to me, send them as "diff" results so that I can
+use the "patch" program to install them.
+If you can, please send ``unified diffs'' -- GNU's diff can create these
+using the "-u" option.
+</body>
+
diff --git a/sloccount.spec b/sloccount.spec
new file mode 100644
index 0000000..62dd7b4
--- /dev/null
+++ b/sloccount.spec
@@ -0,0 +1,56 @@
+#
+# RPM spec file for "sloccount".
+#
+%define PKG_VERSION 2.26
+
+Name: sloccount
+Summary: Measures source lines of code (SLOC) in programs
+Version: %{PKG_VERSION}
+Release: 1
+Copyright: GPL
+Group: Development/Tools
+Source: http://www.dwheeler.com/sloccount/sloccount-%{PKG_VERSION}.tar.gz
+URL: http://www.dwheeler.com/sloccount
+Vendor: David A. Wheeler
+Packager: David A. Wheeler <dwheeler@dwheeler.com>
+Prefix: /usr
+BuildRoot:     /var/tmp/%name-buildroot
+
+%description
+SLOCCount (pronounced "sloc-count") is a suite of programs for counting
+physical source lines of code (SLOC) in potentially large software systems
+(thus, SLOCCount is a "software metrics tool" or "software measurement tool").
+SLOCCount can count physical SLOC for a wide number of languages;
+listed alphabetically, they are: Ada, Assembly, awk, Bourne shell, C, C++,
+C shell, COBOL, Expect, Fortran, Java, lex/flex, LISP (including Scheme),
+Modula-3, Objective-C, Pascal, Perl, PHP, Python, sed, TCL, and Yacc.
+SLOCCount can automatically determine if a file
+is a source code file or not, and if so, which language it's written in.
+As a result, you can analyze large systems completely automatically;
+it's been used to examine entire GNU/Linux distributions, for example.
+SLOCCount also includes some report-generating tools
+to collect the data generated and present it in several different formats.
+Normally you can just run "sloccount DIRECTORY" and all the source code
+in the directory and its descendants will be counted.
+
+%prep
+%setup
+
+%build
+make
+
+%install
+rm -rf ${RPM_BUILD_ROOT}
+mkdir -p ${RPM_BUILD_ROOT}%{_bindir}
+mkdir -p ${RPM_BUILD_ROOT}%{_mandir}/man1
+make install_programs PREFIX=${RPM_BUILD_ROOT}%{_prefix}
+make install_man PREFIX=${RPM_BUILD_ROOT}%{_prefix}
+
+%clean
+rm -rf ${RPM_BUILD_ROOT}
+
+%files
+%defattr(-, root, root)
+%doc sloccount.html README ChangeLog COPYING TODO
+%{_bindir}/*
+%{_mandir}/*/*
diff --git a/sql_count b/sql_count
new file mode 100755
index 0000000..8240fd9
--- /dev/null
+++ b/sql_count
@@ -0,0 +1,76 @@
+#!/usr/bin/perl
+# sql_count - count physical lines of code in SQL.
+
+# SQL is really screwed up in its commenting system.
+# In ANSI, "--" means start of comment, but this causes many problems
+# with automatically generated SQL queries.  For example, given:
+#   UPDATE tbl_name SET credit=credit-!payment!
+# If !payment! is automatically substituted for a negative number,
+# a comment is unexpectedly generated.
+
+# So, this program accepts "-- " (dash-dash-space) as a comment character.
+# It also supports "#" and /* .. */, which are supported by MySQL.
+
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+$total_sloc = 0;
+
+# Do we have "-f" (read list of files from second argument)?
+if (($#ARGV >= 1) && ($ARGV[0] eq "-f")) {
+  # Yes, we have -f
+  if ($ARGV[1] eq "-") {
+    # The list of files is in STDIN
+    while (<STDIN>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+  } else {
+    # The list of files is in the file $ARGV[1]
+    open (FILEWITHLIST, $ARGV[1]) || die "Error: Could not open $filewithlist\n";
+    while (<FILEWITHLIST>) {
+      chomp ($_);
+      &count_file ($_);
+    }
+    close FILEWITHLIST;
+  }
+  shift @ARGV; shift @ARGV;
+}
+# Process all (remaining) arguments as file names
+while ($file = shift @ARGV) {
+  &count_file ($file);
+}
+
+print "Total:\n";
+print "$total_sloc\n";
+
+sub count_file {
+  my ($file) = @_;
+  my $sloc = 0;
+
+ $result = `sed -e "s/#.*//" -e "s/-- .*//" < "$file" | c_count`;
+ $result =~ m/^\s*([0-9]+)/;
+ $sloc = $1;
+ print "$sloc $file\n";
+ $total_sloc += $sloc;
+}
diff --git a/stripccomments.c b/stripccomments.c
new file mode 100644
index 0000000..187659c
--- /dev/null
+++ b/stripccomments.c
@@ -0,0 +1,50 @@
+/*
+stripcomments - a simple program to remove C comments.
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+
+*/
+
+#include <stdio.h>
+
+int peek() {
+ int c = getchar();
+ ungetc(c, stdin);
+ return c;
+}
+
+main() {
+ int c;
+ int incomment = 0;
+
+ while ( (c = getchar()) != EOF) {
+    if (!incomment) {
+      if ((c == '/') && (peek() == '*')) {incomment=1;}
+    } else {
+      if ((c == '*') && (peek() == '/')) {
+           c= getchar(); c=getchar(); incomment=0;
+      }
+    }
+    if ((c != EOF) && !incomment) {putchar(c);}
+ }
+}
+
diff --git a/stub b/stub
new file mode 100644
index 0000000..40cfabc
--- /dev/null
+++ b/stub
@@ -0,0 +1,22 @@
+
+This is part of SLOCCount, a toolsuite that counts source lines of code (SLOC).
+Copyright (C) 2001-2004 David A. Wheeler.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+To contact David A. Wheeler, see his website at:
+ http://www.dwheeler.com.
+
+
diff --git a/stubsh b/stubsh
new file mode 100644
index 0000000..74456aa
--- /dev/null
+++ b/stubsh
@@ -0,0 +1,23 @@
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
diff --git a/table.html b/table.html
new file mode 100644
index 0000000..81474a3
--- /dev/null
+++ b/table.html
@@ -0,0 +1,569 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"><html><head><title>table</title><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"></head><body><br>
+By default SLOCCount uses a very simple estimating model for effort and schedule:
+the basic COCOMO model in the "organic" mode (see below for more about this).
+&nbsp;This model estimates effort and schedule, including design, code, test,
+and documentation time (both user/admin documentation and development documentation).
+Basic COCOMO is a nice simple model, and it's used as the default because
+it doesn't require any information about the code other than the SLOC count
+already computed.<br>
+<br>
+However, basic COCOMO's accuracy is limited in part for the same reason -
+basic COCOMO doesn't take a number of important factors into account.<br>
+If you have the necessary information, you can improve the model's accuracy
+by taking these factors into account. &nbsp;You can at least quickly determine
+if the right "mode" is being used to improve accuracy. &nbsp;You can also
+use the "Intermediate COCOMO" and "Detailed COCOMO" models that take more
+factors into account, and are likely to produce more accurate estimates as
+a result. Take these estimates as just that - estimates - they're not grand
+truths.<br>
+<br>
+To use the COCOMO model, you first need to determine if your application's
+mode, which can be "Organic", "embedded", or "semidetached". &nbsp;Most software
+is "organic" (which is why it's the default). &nbsp;Here are simple definitions
+of these modes:<br>
+<ul>
+<li>Organic: Relatively small software teams develop software in a highly
+familiar, in-house environment. &nbsp;It has a generally stable development
+environment, minimal need for innovative algorithms, and requirements can
+be relaxed to avoid extensive rework.</li><li>Semidetached: This is an intermediate
+step between organic and embedded. This is generally characterized by reduced
+flexibility in the requirements.</li><li>Embedded: The project must operate
+within tight (hard-to-meet) constraints, and requirements and interface specifications
+are often non-negotiable. &nbsp;The software will be embedded in a complex environment that the software must deal with as-is.<br>
+  </li>
+</ul>
+By default, SLOCCount uses the basic COCOMO model in the organic mode. &nbsp;For
+the basic COCOMO model, here are the critical factors for --effort and --schedule:<br>
+<ul>
+<li>Organic: effort factor = 2.4, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li><li>Semidetached:  effort factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li><li>Embedded:  effort factor = 3.6, exponent = 1.20; schedule factor = 2.5, exponent = 0.32<br>
+  </li>
+</ul>
+Thus, if you want to use sloccount but the project is actually semidetached,
+you can use "--effort 3.0 1.12 --schedule 2.5 0.35" as options to sloccount
+to get a more accurate estimate.<br>
+<br>
+For more accurate estimates, you can use the intermediate COCOMO models.
+&nbsp;For intermediate COCOMO, use the following figures:<br>
+<ul>
+  <li>Organic: effort base factor = 2.3, exponent = 1.05; schedule factor = 2.5, exponent = 0.38</li>
+  <li>Semidetached: effort base factor = 3.0, exponent = 1.12; schedule factor = 2.5, exponent = 0.35</li>
+  <li>Embedded: effort base factor = 2.8, exponent = 1.20; schedule factor = 2.5, exponent = 0.32<br>
+  </li>
+</ul>
+The intermediate COCOMO values for schedule are exactly the same as the basic
+COCOMO model; the starting effort values are not quite the same, as discussed
+in Boehm's book. &nbsp;However, in the intermediate COCOMO model, you don't
+normally use the effort factors as-is, you use various corrective factors
+(called cost drivers). &nbsp;To use these corrections, you then consider
+all the cost drivers, and multiply them by the effort base factor, to determine
+the final effort factor. &nbsp;Here are the cost drivers (from Boehm's book,
+table 8-2 and 8-3):<br>
+<br>
+<table cellpadding="2" cellspacing="2" border="1" width="100%">
+
+
+  <tbody>
+    <tr>
+      <td valign="top" rowspan="1" colspan="2">Cost Drivers<br>
+      </td>
+      <td valign="top" rowspan="1" colspan="6">Ratings<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">ID<br>
+      </td>
+      <td valign="top">Driver Name<br>
+      </td>
+      <td valign="top">Very Low<br>
+      </td>
+      <td valign="top">Low<br>
+      </td>
+      <td valign="top">Nominal<br>
+      </td>
+      <td valign="top">High<br>
+      </td>
+      <td valign="top">Very High<br>
+      </td>
+      <td valign="top">Extra High<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">RELY<br>
+      </td>
+      <td valign="top">Required software reliability<br>
+      </td>
+      <td valign="top">0.75 (effect is slight inconvenience)<br>
+      </td>
+      <td valign="top">0.88 (easily recovered losses)<br>
+      </td>
+      <td valign="top">1.00 (recoverable losses)<br>
+      </td>
+      <td valign="top">1.15 (high financial loss)<br>
+      </td>
+      <td valign="top">1.40 (risk to human life)<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">DATA<br>
+      </td>
+      <td valign="top">Database size<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+      <td valign="top">0.94 (database bytes/SLOC &lt; 10)<br>
+      </td>
+      <td valign="top">1.00 (D/S between 10 and 100)<br>
+      </td>
+      <td valign="top">1.08 (D/S between 100 and 1000)<br>
+      </td>
+      <td valign="top">1.16 (D/S &gt; 1000)<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">CPLX<br>
+      </td>
+      <td valign="top">Product complexity<br>
+      </td>
+      <td valign="top">0.70 (mostly straightline code, simple arrays, simple expressions)<br>
+      </td>
+      <td valign="top">0.85<br>
+      </td>
+      <td valign="top">1.00<br>
+      </td>
+      <td valign="top">1.15<br>
+      </td>
+      <td valign="top">1.30<br>
+      </td>
+      <td valign="top">1.65 (microcode, multiple resource scheduling, device timing dependent coding)<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">TIME<br>
+      </td>
+      <td valign="top">Execution time constraint<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+      <td valign="top"><br>
+      </td>
+      <td valign="top">1.00 (&lt;50% use of available execution time)<br>
+      </td>
+      <td valign="top">1.11 (70% use)<br>
+      </td>
+      <td valign="top">1.30 (85% use)<br>
+      </td>
+      <td valign="top">1.66 (95% use)<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">STOR<br>
+      </td>
+      <td valign="top">Main storage constraint<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+      <td valign="top"><br>
+      </td>
+      <td valign="top">1.00&nbsp;(&lt;50% use of available storage)</td>
+      <td valign="top">1.06  (70% use)<br>
+      </td>
+      <td valign="top">1.21 (85% use)<br>
+      </td>
+      <td valign="top">1.56 (95% use)<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">VIRT<br>
+      </td>
+      <td valign="top">Virtual machine (HW and OS) volatility<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+      <td valign="top">0.87 (major change every 12 months, minor every month)<br>
+      </td>
+      <td valign="top">1.00 (major change every 6 months, minor every 2 weeks)</td>
+      <td valign="top">1.15 (major change every 2 months, minor changes every week)<br>
+      </td>
+      <td valign="top">1.30 (major changes every 2 weeks, minor changes every 2 days)<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">TURN<br>
+      </td>
+      <td valign="top">Computer turnaround time<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+      <td valign="top">0.87 (interactive)<br>
+      </td>
+      <td valign="top">1.00 (average turnaround &lt; 4 hours)<br>
+      </td>
+      <td valign="top">1.07<br>
+      </td>
+      <td valign="top">1.15<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">ACAP<br>
+      </td>
+      <td valign="top">Analyst capability<br>
+      </td>
+      <td valign="top">1.46 (15th percentile)<br>
+      </td>
+      <td valign="top">1.19 (35th percentile)<br>
+      </td>
+      <td valign="top">1.00 (55th percentile)<br>
+      </td>
+      <td valign="top">0.86 (75th percentile)<br>
+      </td>
+      <td valign="top">0.71 (90th percentile)<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">AEXP<br>
+      </td>
+      <td valign="top">Applications experience<br>
+      </td>
+      <td valign="top">1.29 (&lt;= 4 months experience)<br>
+      </td>
+      <td valign="top">1.13 (1 year)<br>
+      </td>
+      <td valign="top">1.00 (3 years)<br>
+      </td>
+      <td valign="top">0.91 (6 years)<br>
+      </td>
+      <td valign="top">0.82 (12 years)<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">PCAP<br>
+      </td>
+      <td valign="top">Programmer capability<br>
+      </td>
+      <td valign="top">1.42 (15th percentile)<br>
+      </td>
+      <td valign="top">1.17  (35th percentile)<br>
+      </td>
+      <td valign="top">1.00 (55th percentile)<br>
+      </td>
+      <td valign="top">0.86  (75th percentile)<br>
+      </td>
+      <td valign="top">0.70 (90th percentile)<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">VEXP<br>
+      </td>
+      <td valign="top">Virtual machine experience<br>
+      </td>
+      <td valign="top">1.21 (&lt;= 1 month experience)<br>
+      </td>
+      <td valign="top">1.10 (4 months)<br>
+      </td>
+      <td valign="top">1.00 (1 year)<br>
+      </td>
+      <td valign="top">0.90 (3 years)<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+      <td valign="top"><br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">LEXP<br>
+      </td>
+      <td valign="top">Programming language experience<br>
+      </td>
+      <td valign="top">1.14  (&lt;= 1 month experience)<br>
+      </td>
+      <td valign="top">1.07 (4 months)<br>
+      </td>
+      <td valign="top">1.00 (1 year)<br>
+      </td>
+      <td valign="top">0.95 (3 years)<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+      <td valign="top"><br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">MODP<br>
+      </td>
+      <td valign="top">Use of "modern" programming practices (e.g. structured programming)<br>
+      </td>
+      <td valign="top">1.24 (No use)<br>
+      </td>
+      <td valign="top">1.10<br>
+      </td>
+      <td valign="top">1.00 (some use)<br>
+      </td>
+      <td valign="top">0.91<br>
+      </td>
+      <td valign="top">0.82 (routine use)<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">TOOL<br>
+      </td>
+      <td valign="top">Use of software tools<br>
+      </td>
+      <td valign="top">1.24<br>
+      </td>
+      <td valign="top">1.10<br>
+      </td>
+      <td valign="top">1.00 (basic tools)<br>
+      </td>
+      <td valign="top">0.91 (test tools)<br>
+      </td>
+      <td valign="top">0.83 (requirements, design, management, documentation tools)<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">SCED<br>
+      </td>
+      <td valign="top">Required development schedule<br>
+      </td>
+      <td valign="top">1.23 (75% of nominal)<br>
+      </td>
+      <td valign="top">1.08 (85% of nominal)<br>
+      </td>
+      <td valign="top">1.00 (nominal)<br>
+      </td>
+      <td valign="top">1.04 (130% of nominal)<br>
+      </td>
+      <td valign="top">1.10 (160% of nominal)<br>
+      </td>
+      <td valign="top"><br>
+      </td>
+    </tr>
+    
+    
+    
+    
+  </tbody>
+</table>
+<br>
+<br>
+<br>
+So, once all of the factors have been multiplied together, &nbsp;you can
+then use the "--effort" flag to set more accurate factors and exponents.<br>
+<br>
+For example, imagine that you're examining a fairly simple application that
+meets the "organic" requirements. &nbsp;Organic projects have a base factor
+of 2.3 and exponents of 1.05, as noted above. &nbsp;We then examine all the
+factors to determine a corrected base factor. &nbsp;For this example, imagine
+that we determine the values of these cost drivers are as follows:<br>
+<br>
+<table cellpadding="2" cellspacing="2" border="1" width="100%">
+
+  <tbody>
+    <tr>
+      <td valign="top" rowspan="1" colspan="2">Cost Drivers<br>
+      </td>
+      <td valign="top" rowspan="1" colspan="2">Ratings<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">ID<br>
+      </td>
+      <td valign="top">Driver Name<br>
+      </td>
+      <td valign="top">Rating<br>
+      </td>
+      <td valign="top">Multiplier<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">RELY<br>
+      </td>
+      <td valign="top">Required software reliability<br>
+      </td>
+      <td valign="top">Low - easily recovered losses<br>
+      </td>
+      <td valign="top">0.88<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">DATA<br>
+      </td>
+      <td valign="top">Database size<br>
+      </td>
+      <td valign="top">Low<br>
+      </td>
+      <td valign="top">0.94<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">CPLX<br>
+      </td>
+      <td valign="top">Product complexity<br>
+      </td>
+      <td valign="top">Nominal<br>
+      </td>
+      <td valign="top">1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">TIME<br>
+      </td>
+      <td valign="top">Execution time constraint<br>
+      </td>
+      <td valign="top">Nominal<br>
+      </td>
+      <td valign="top">1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">STOR<br>
+      </td>
+      <td valign="top">Main storage constraint<br>
+      </td>
+      <td valign="top">Nominal<br>
+      </td>
+      <td valign="top">1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">VIRT<br>
+      </td>
+      <td valign="top">Virtual machine (HW and OS) volatility<br>
+      </td>
+      <td valign="top">Low  (major change every 12 months, minor every month)<br>
+      </td>
+      <td valign="top">0.87<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">TURN<br>
+      </td>
+      <td valign="top">Computer turnaround time<br>
+      </td>
+      <td valign="top">Low (interactive)<br>
+      </td>
+      <td valign="top">0.87<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">ACAP<br>
+      </td>
+      <td valign="top">Analyst capability<br>
+      </td>
+      <td valign="top">Nominal  (55th percentile)<br>
+      </td>
+      <td valign="top">1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">AEXP<br>
+      </td>
+      <td valign="top">Applications experience<br>
+      </td>
+      <td valign="top">Nominal (3 years)<br>
+      </td>
+      <td valign="top">1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">PCAP<br>
+      </td>
+      <td valign="top">Programmer capability<br>
+      </td>
+      <td valign="top">Nominal  (55th percentile)<br>
+      </td>
+      <td valign="top">1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">VEXP<br>
+      </td>
+      <td valign="top">Virtual machine experience<br>
+      </td>
+      <td valign="top">High (3 years)<br>
+      </td>
+      <td valign="top">0.90<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">LEXP<br>
+      </td>
+      <td valign="top">Programming language experience<br>
+      </td>
+      <td valign="top">High (3 years)<br>
+      </td>
+      <td valign="top">0.95<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">MODP<br>
+      </td>
+      <td valign="top">Use of "modern" programming practices (e.g. structured programming)<br>
+      </td>
+      <td valign="top">High (Routine use)<br>
+      </td>
+      <td valign="top">0.82<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">TOOL<br>
+      </td>
+      <td valign="top">Use of software tools<br>
+      </td>
+      <td valign="top">Nominal (basic tools)<br>
+      </td>
+      <td valign="top">1.00<br>
+      </td>
+    </tr>
+    <tr>
+      <td valign="top">SCED<br>
+      </td>
+      <td valign="top">Required development schedule<br>
+      </td>
+      <td valign="top">Nominal<br>
+      </td>
+      <td valign="top">1.00<br>
+      </td>
+    </tr>
+    
+    
+    
+    
+  </tbody>
+</table>
+<br>
+By multiplying these driver values together in this example, we compute:<br>
+<pre>0.88*0.94*1*1*1*0.87*0.87*1*1*1*0.90*0.95*0.82*1*1</pre>
+The correction from these is 0.438964094, which you multiply by the base
+factor (2.3 in this case) to determine a final effort factor. &nbsp;For this
+example, the final factor for the effort calculation is 1.01. You would then
+invoke sloccount with "--effort 1.01 1.05" to pass in the corrected factor
+and exponent. &nbsp;You don't need to use "--schedule" to set the factors
+(they default to the values for organic model), but you can set them manually
+anyway by setting "--schedule 2.5 0.38". &nbsp;You <i>do</i> need to use the --schedule option for embedded and semidetached projects. The final command would be:<br>
+<br>
+sloccount --effort 1.01 1.05 --schedule 2.5 0.38 my_project<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+</body></html>
\ No newline at end of file
diff --git a/tcl_count b/tcl_count
new file mode 100755
index 0000000..f892692
--- /dev/null
+++ b/tcl_count
@@ -0,0 +1,27 @@
+#!/bin/sh
+# 
+# This is part of SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# Copyright (C) 2001-2004 David A. Wheeler.
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+generic_count '#' $@
+
diff --git a/testcode/conditions.CBL b/testcode/conditions.CBL
new file mode 100644
index 0000000..8e12724
--- /dev/null
+++ b/testcode/conditions.CBL
@@ -0,0 +1,31 @@
+      $ SET SOURCEFORMAT"FREE"
+IDENTIFICATION DIVISION.
+PROGRAM-ID.  Conditions.
+AUTHOR.  Michael Coughlan.
+* An example program demonstrating the use of 
+* condition names (level 88's).
+* The EVALUATE and PERFORM verbs are also used.
+
+DATA DIVISION.
+WORKING-STORAGE SECTION.
+01  Char               PIC X.
+    88 Vowel           VALUE "a", "e", "i", "o", "u".
+    88 Consonant       VALUE "b", "c", "d", "f", "g", "h"
+                             "j" THRU "n", "p" THRU "t", "v" THRU "z".
+    88 Digit           VALUE "0" THRU "9".
+    88 ValidCharacter  VALUE "a" THRU "z", "0" THRU "9".
+
+PROCEDURE DIVISION.
+Begin.
+    DISPLAY "Enter lower case character or digit. No data ends.".
+    ACCEPT Char.
+    PERFORM UNTIL NOT ValidCharacter
+        EVALUATE TRUE
+           WHEN Vowel DISPLAY "The letter " Char " is a vowel."
+           WHEN Consonant DISPLAY "The letter " Char " is a consonant."
+           WHEN Digit DISPLAY Char " is a digit."
+           WHEN OTHER DISPLAY "problems found"
+        END-EVALUATE
+    END-PERFORM
+    STOP RUN.
+    
diff --git a/testcode/hello.f b/testcode/hello.f
new file mode 100644
index 0000000..f66fe77
--- /dev/null
+++ b/testcode/hello.f
@@ -0,0 +1,10 @@
+c     Hello World
+*     Hello World
+!     Hello World
+      program hello
+      implicit none
+      print '("Hello, World!")'
+      end
+      ! a fancy comment
+!hpf$ not a comment
+!omp$ not a comment either
diff --git a/testcode/hello.f90 b/testcode/hello.f90
new file mode 100644
index 0000000..6b26a2e
--- /dev/null
+++ b/testcode/hello.f90
@@ -0,0 +1,7 @@
+! Hello World
+program hello
+  implicit none
+  print '("Hello, World!")'
+end program hello
+!hpf$ not a comment
+!omp$ not a comment either
diff --git a/testcode/hello.pas b/testcode/hello.pas
new file mode 100644
index 0000000..40c6005
--- /dev/null
+++ b/testcode/hello.pas
@@ -0,0 +1,9 @@
+{ Hello World in Pascal, for testing SLOCCount.
+  This is multi-line, testing curly braces. }
+(* This is another multi-line comment.
+   Here's another line. *)
+program Hello;
+begin     (* Main *)
+   writeln ('Hello, world.')
+end.      (* Main *)
+
diff --git a/testcode/hello1.pas b/testcode/hello1.pas
new file mode 100644
index 0000000..c53c0d2
--- /dev/null
+++ b/testcode/hello1.pas
@@ -0,0 +1,12 @@
+{ Hello World in Pascal, for testing SLOCCount.
+  This is multi-line, testing curly braces. }
+(* This is another multi-line comment.
+   Here's another line. *)
+(* This is { another } test. **)
+program Hello;
+begin     (* Main *)
+   writeln ('Hello, world.');
+   writeln ('It''s a test!');
+   writeln ('Show that newlines are detected')
+end.      (* Main *)
+
diff --git a/testcode/messages.rb b/testcode/messages.rb
new file mode 100644
index 0000000..1521ae6
--- /dev/null
+++ b/testcode/messages.rb
@@ -0,0 +1,152 @@
+#!/usr/local/bin/ruby
+# messages.rb - this is a test for the Ruby SLOC counter.
+# You should get 110 SLOC for this file.
+
+# Guru module: private messages among players
+# Copyright (C) 2001, 2002 Josef Spillner, dr_maux@user.sourceforge.net
+# This is used as a test case in SLOCCount, a toolsuite that counts
+# source lines of code (SLOC).
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# 
+# To contact David A. Wheeler, see his website at:
+#  http://www.dwheeler.com.
+# 
+# 
+
+# Commands:
+# guru do i have any messages
+# guru tell grubby nice to meet myself :)
+# guru alert grubby
+
+databasedir = ENV['HOME'] + "/.ggz/grubby"
+
+####################################################################################
+
+class GuruMessages
+  def initialize
+    @msg = Array.new
+	@alerts = Array.new
+  end
+  def add(fromplayer, player, message)
+    @entry = Array.new
+	newmessage = (fromplayer + " said: " + message.join(" ")).split(" ")
+    @entry << player << newmessage
+    @msg.push(@entry)
+	print "OK, I make sure he gets the message."
+	$stdout.flush
+	sleep 1
+  end
+  def tell(player)
+    len = @msg.length
+	a = 0
+    for i in 0..len
+	  unless @msg[len-i] == nil
+	    print @msg[len-i][1][0..@msg[len-i][1].length - 1].join(" ") + "\n" if player == @msg[len-i][0]
+		if player == @msg[len-i][0]
+  	      @msg.delete_at(len-i)
+		  a = 1
+		end
+	  end
+	end
+	if a == 0
+	  print "Sorry " + player + ", I guess you're not important enough to get any messages."
+	end
+	$stdout.flush
+	sleep 1
+  end
+  def alert(fromplayer, player)
+    @entry = Array.new << fromplayer << player
+    @alerts.push(@entry)
+	print "OK, I alert " + player + " when I see him."
+	$stdout.flush
+	sleep 1
+  end
+  def trigger(player)
+    len = @alerts.length
+	a = 0
+    for i in 0..len
+	  unless @alerts[len-i] == nil
+  	    if player == @alerts[len-i][0]
+	      print player + ": ALERT from " + @alerts[len-i][1] + "\n"
+	      @alerts.delete_at(len-i)
+		  a = 1
+		end
+	  end
+	end
+	if a == 1
+	  $stdout.flush
+	  sleep 1
+	  return 1
+	end
+	return 0
+  end
+end
+
+input = $stdin.gets.chomp.split(/\ /)
+
+mode = 0
+if (input[1] == "do") && (input[2] == "i") && (input[3] == "have") &&
+  (input[4] == "any") && (input[5] == "messages")
+  mode = 1
+  player = ARGV[0]
+end
+if (input[1] == "tell")
+  mode = 2
+  fromplayer = ARGV[0]
+  player = input[2]
+  message = input[3..input.length]
+end
+if(input[1] == "alert")
+  mode = 3
+  fromplayer = ARGV[0]
+  player = input[2]
+end
+
+m = nil
+begin
+  File.open(databasedir + "/messages") do |f|
+    m = Marshal.load(f)
+  end
+rescue
+  m = GuruMessages.new
+end
+
+if mode == 0
+  ret = m.trigger ARGV[0]
+  if ret == 0
+    exit
+  end
+end
+if mode == 1
+  if player != nil
+    m.tell player
+  else
+    print "If you mind telling me who you are?"
+    $stdout.flush
+	sleep 1
+  end
+end
+if mode == 2
+  m.add fromplayer, player, message
+end
+if mode == 3
+  m.alert fromplayer, player
+end
+
+File.open(databasedir + "/messages", "w+") do |f|
+  Marshal.dump(m, f)
+end
+
diff --git a/testcode/temp.c b/testcode/temp.c
new file mode 100644
index 0000000..d540f08
--- /dev/null
+++ b/testcode/temp.c
@@ -0,0 +1,5 @@
+
+
+main() {
+ int i;
+}
diff --git a/testcode/test.hs b/testcode/test.hs
new file mode 100644
index 0000000..de874df
--- /dev/null
+++ b/testcode/test.hs
@@ -0,0 +1,19 @@
+
+-- This literate program prompts the user for a number
+-- and prints the factorial of that number:
+
+{- This is a comment. -}
+{- This is a comment,
+   too -}
+
+{-# this is a pragma, COUNT IT -}
+
+ main :: IO ()
+ main = do putStr "Enter a number: "
+           l <- readLine
+           putStr "n!= "
+           print (fact (read l))
+ fact :: Integer -> Integer
+ fact 0 = 1
+ fact n = n * fact (n-1)
+
diff --git a/testcode/test1.inc b/testcode/test1.inc
new file mode 100644
index 0000000..a56d14e
--- /dev/null
+++ b/testcode/test1.inc
@@ -0,0 +1,23 @@
+<?php
+
+  /**
+  * Test file for php_count, part of SLOCCount.  This is a C-style comment.
+  * This file is different from .php.
+  */
+
+  // This is a C++-style comment.
+
+  # This is a shell-style comment.
+
+  # Here are 9 lines of code:
+
+  function get()
+  {
+    $total = 0;
+    $simplestring = 'hello';
+    $simplestring = '\\hello\'';
+    $funkystring = "hello";
+    $funkystring = "$hi\\\"";
+    return 0;
+  }
+?>
diff --git a/testcode/test1.lhs b/testcode/test1.lhs
new file mode 100644
index 0000000..3c19a70
--- /dev/null
+++ b/testcode/test1.lhs
@@ -0,0 +1,15 @@
+\documentstyle{article}
+
+\begin{document}
+
+\section{Introduction}
+
+This is a trivial program that prints the first 20
+factorials.  It should have 2 lines of code.
+
+\begin{code}
+main :: IO ()
+main =  print [ (n, product [1..n]) | n <- [1..20]]
+\end{code}
+
+\end{document}
diff --git a/testcode/test1.php b/testcode/test1.php
new file mode 100644
index 0000000..9fd2510
--- /dev/null
+++ b/testcode/test1.php
@@ -0,0 +1,27 @@
+<?php
+
+  /**
+  * Test file for php_count, part of SLOCCount.  This is a C-style comment.
+  */
+
+  // This is a C++-style comment.
+
+  # This is a shell-style comment.
+
+  # Here are 13 lines of code:
+
+  function get()
+  {
+    $total = 0;
+    $simplestring = 'hello';
+    $simplestring = '\\hello\'';
+    $funkystring = "hello";
+    $funkystring = "$hi\\\"";
+    $heretest <<<  wiggle
+juggle
+   wiggle  /* This doesn't end the string, so this isn't a C comment.
+wiggle;
+    return 0;
+  }
+
+?>
diff --git a/testcode/test2.lhs b/testcode/test2.lhs
new file mode 100644
index 0000000..6e39905
--- /dev/null
+++ b/testcode/test2.lhs
@@ -0,0 +1,44 @@
+
+This is an extract of a larger literate Haskell file for testing
+SLOCCount.  It should have 21 lines of code.
+
+This dumps the tree in dot format, which is very handy for visualizing
+the trees.
+
+> dotTree name t = "digraph " ++ filter dotChars name ++ " { " ++ (dotTree' t 0) ++ " }"
+
+> dotTree' Empty _ = ""
+> dotTree' t i | is_leaf t = "n"++(show i)++" [label=\""++(show $ x_span t)++
+>                            "\",shape=box]; "
+>              | otherwise = "n"++(show i)++" [label=\""++(show $ x_span t)++"\"]; " ++
+>			     "n"++(show i)++" -> n"++(show (2*i+1))++"; "++
+>                            "n"++(show i)++" -> n"++(show (2*i+2))++"; "++
+>                            dotTree' (left t) (2*i+1) ++
+>                            dotTree' (right t) (2*i+2)
+>   where is_leaf Node { left = Empty, right = Empty } = True
+>         is_leaf _ = False
+> {- this is a comment
+
+foo bar baz
+
+>    that
+>    spans literate blocks -}
+
+> dotChars '.' = False
+> dotChars '/' = False
+> dotChars _ = True
+
+These functions fill in the monotonically increasing index values for
+the lines in the finite map.  They also do appropriate things to combine
+the world values.
+
+> idxList [] n = []
+> idxList (x:xs) n = (x {idx=n}):(idxList xs (n+1))
+
+> idxFM' fm (x,k) = addToFM (delFromFM fm k) k (y {idx=toInteger x})
+>	where y = case lookupFM fm k of
+>                   Just foo -> foo
+>                   Nothing  -> error $ "No such key: " ++ show k
+
+> idxFM fm = foldl idxFM' fm (zip [1..sizeFM fm] $ keysFM fm)
+
diff --git a/testcode/wokka.cbl b/testcode/wokka.cbl
new file mode 100644
index 0000000..d7ccd0c
--- /dev/null
+++ b/testcode/wokka.cbl
@@ -0,0 +1,4 @@
+      * Comment.
+        IDENTIFICATION DIVISION.
+        PROGRAM-ID.  Conditions.
+
diff --git a/testcode/wokka.cs b/testcode/wokka.cs
new file mode 100644
index 0000000..fa95425
--- /dev/null
+++ b/testcode/wokka.cs
@@ -0,0 +1,8 @@
+
+/* comment: This has 5 physical lines of code. */
+
+class Test {
+  static void Main() {
+    System.Console.WriteLine("Hello, World (in C#)");
+  }
+}
diff --git a/usc_subset.tar b/usc_subset.tar
new file mode 100644
index 0000000..67e2d5a
--- /dev/null
+++ b/usc_subset.tar
@@ -0,0 +1 @@
+java_lines.c
-- 
cgit v1.2.1